From 34649f72b628c7c4966c167b8208f2595ca51251 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@mellanox.com>
Date: Thu, 23 Mar 2017 11:31:19 -0400
Subject: [PATCH 0001/1958] tile: prefer <arch/intreg.h> to __need_int_reg_t

As part of some work in glibc to move away from the "__need" prefix,
this commit breaks away the definitions of __int_reg_t, __uint_reg_t,
__INT_REG_BITS, and __INT_REG_FMT to a separate <arch/intreg.h>
"microheader".  It is then included from <arch/abi.h> to preserve
the semantics of the previous header.

For now, we continue to preserve the __need_int_reg_t semantics
in <arch/abi.h> as well, but anticipate that after a few years
we can obsolete it.
---
 arch/tile/include/uapi/arch/abi.h    | 49 ++-----------------
 arch/tile/include/uapi/arch/intreg.h | 70 ++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 45 deletions(-)
 create mode 100644 arch/tile/include/uapi/arch/intreg.h

diff --git a/arch/tile/include/uapi/arch/abi.h b/arch/tile/include/uapi/arch/abi.h
index c55a3d4326444..328e62260272e 100644
--- a/arch/tile/include/uapi/arch/abi.h
+++ b/arch/tile/include/uapi/arch/abi.h
@@ -20,58 +20,17 @@
 
 #ifndef __ARCH_ABI_H__
 
-#if !defined __need_int_reg_t && !defined __DOXYGEN__
-# define __ARCH_ABI_H__
-# include <arch/chip.h>
-#endif
-
-/* Provide the basic machine types. */
-#ifndef __INT_REG_BITS
-
-/** Number of bits in a register. */
-#if defined __tilegx__
-# define __INT_REG_BITS 64
-#elif defined __tilepro__
-# define __INT_REG_BITS 32
-#elif !defined __need_int_reg_t
+#ifndef __tile__   /* support uncommon use of arch headers in non-tile builds */
 # include <arch/chip.h>
 # define __INT_REG_BITS CHIP_WORD_SIZE()
-#else
-# error Unrecognized architecture with __need_int_reg_t
-#endif
-
-#if __INT_REG_BITS == 64
-
-#ifndef __ASSEMBLER__
-/** Unsigned type that can hold a register. */
-typedef unsigned long long __uint_reg_t;
-
-/** Signed type that can hold a register. */
-typedef long long __int_reg_t;
-#endif
-
-/** String prefix to use for printf(). */
-#define __INT_REG_FMT "ll"
-
-#else
-
-#ifndef __ASSEMBLER__
-/** Unsigned type that can hold a register. */
-typedef unsigned long __uint_reg_t;
-
-/** Signed type that can hold a register. */
-typedef long __int_reg_t;
-#endif
-
-/** String prefix to use for printf(). */
-#define __INT_REG_FMT "l"
-
 #endif
-#endif /* __INT_REG_BITS */
 
+#include <arch/intreg.h>
 
+/* __need_int_reg_t is deprecated: just include <arch/intreg.h> */
 #ifndef __need_int_reg_t
 
+#define __ARCH_ABI_H__
 
 #ifndef __ASSEMBLER__
 /** Unsigned type that can hold a register. */
diff --git a/arch/tile/include/uapi/arch/intreg.h b/arch/tile/include/uapi/arch/intreg.h
new file mode 100644
index 0000000000000..1cf2fbf743065
--- /dev/null
+++ b/arch/tile/include/uapi/arch/intreg.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2017 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file
+ *
+ * Provide types and defines for the type that can hold a register,
+ * in the implementation namespace.
+ */
+
+#ifndef __ARCH_INTREG_H__
+#define __ARCH_INTREG_H__
+
+/*
+ * Get number of bits in a register.  __INT_REG_BITS may be defined
+ * prior to including this header to force a particular bit width.
+ */
+
+#ifndef __INT_REG_BITS
+# if defined __tilegx__
+#  define __INT_REG_BITS 64
+# elif defined __tilepro__
+#  define __INT_REG_BITS 32
+# else
+#  error Unrecognized architecture
+# endif
+#endif
+
+#if __INT_REG_BITS == 64
+
+# ifndef __ASSEMBLER__
+/** Unsigned type that can hold a register. */
+typedef unsigned long long __uint_reg_t;
+
+/** Signed type that can hold a register. */
+typedef long long __int_reg_t;
+# endif
+
+/** String prefix to use for printf(). */
+# define __INT_REG_FMT "ll"
+
+#elif __INT_REG_BITS == 32
+
+# ifndef __ASSEMBLER__
+/** Unsigned type that can hold a register. */
+typedef unsigned long __uint_reg_t;
+
+/** Signed type that can hold a register. */
+typedef long __int_reg_t;
+# endif
+
+/** String prefix to use for printf(). */
+# define __INT_REG_FMT "l"
+
+#else
+# error Unrecognized value of __INT_REG_BITS
+#endif
+
+#endif /* !__ARCH_INTREG_H__ */
-- 
GitLab


From 0af0bc38175d9859753a08f07af3aa767601265b Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 30 Mar 2017 13:54:50 +0200
Subject: [PATCH 0002/1958] mm, tile: drop arch_{add,remove}_memory

these functions are unreachable because tile doesn't support memory
hotplug becasuse it doesn't select ARCH_ENABLE_MEMORY_HOTPLUG nor
it supports SPARSEMEM.

This code hasn't been compiled for a while obviously because nobody has
noticed that __add_pages has a different signature since 2009.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
---
 arch/tile/mm/init.c | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 3a97e4d7205cf..5f757e04bcd27 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -857,36 +857,6 @@ void __init mem_init(void)
 #endif
 }
 
-/*
- * this is for the non-NUMA, single node SMP system case.
- * Specifically, in the case of x86, we will always add
- * memory to the highmem for now.
- */
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-int arch_add_memory(u64 start, u64 size, bool for_device)
-{
-	struct pglist_data *pgdata = &contig_page_data;
-	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long nr_pages = size >> PAGE_SHIFT;
-
-	return __add_pages(zone, start_pfn, nr_pages);
-}
-
-int remove_memory(u64 start, u64 size)
-{
-	return -EINVAL;
-}
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
-{
-	/* TODO */
-	return -EBUSY;
-}
-#endif
-#endif
-
 struct kmem_cache *pgd_cache;
 
 void __init pgtable_cache_init(void)
-- 
GitLab


From cf9806f32ef63b745f2486e0dbb2ac21f4ca44f0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 5 May 2017 08:33:24 +0300
Subject: [PATCH 0003/1958] ipmi_ssif: unlock on allocation failure

We should unlock and re-enable IRQs if this allocation fails.

Fixes: 259307074bfc ("ipmi: Add SMBus interface driver (SSIF) ")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 0b22a9be50291..6dd6476ea5d31 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -408,6 +408,7 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags)
 	msg = ipmi_alloc_smi_msg();
 	if (!msg) {
 		ssif_info->ssif_state = SSIF_NORMAL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
 		return;
 	}
 
@@ -430,6 +431,7 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info,
 	msg = ipmi_alloc_smi_msg();
 	if (!msg) {
 		ssif_info->ssif_state = SSIF_NORMAL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
 		return;
 	}
 
-- 
GitLab


From 860f01e96981a68553f3ca49f574ff14fe955e72 Mon Sep 17 00:00:00 2001
From: Valentin Vidic <Valentin.Vidic@CARNet.hr>
Date: Fri, 5 May 2017 21:07:33 +0200
Subject: [PATCH 0004/1958] ipmi/watchdog: fix watchdog timeout set on reboot

systemd by default starts watchdog on reboot and sets the timer to
ShutdownWatchdogSec=10min.  Reboot handler in ipmi_watchdog than reduces
the timer to 120s which is not enough time to boot a Xen machine with
a lot of RAM.  As a result the machine is rebooted the second time
during the long run of (XEN) Scrubbing Free RAM.....

Fix this by setting the timer to 120s only if it was previously
set to a low value.

Signed-off-by: Valentin Vidic <Valentin.Vidic@CARNet.hr>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_watchdog.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index d165af8abe36c..4161d9961a241 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1163,10 +1163,11 @@ static int wdog_reboot_handler(struct notifier_block *this,
 			ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
 			ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
 		} else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
-			/* Set a long timer to let the reboot happens, but
-			   reboot if it hangs, but only if the watchdog
+			/* Set a long timer to let the reboot happen or
+			   reset if it hangs, but only if the watchdog
 			   timer was already running. */
-			timeout = 120;
+			if (timeout < 120)
+				timeout = 120;
 			pretimeout = 0;
 			ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
 			ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
-- 
GitLab


From c41e43c6faf68a9b70afdb0dfee45d196c27031b Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Fri, 21 Apr 2017 09:30:21 -0700
Subject: [PATCH 0005/1958] mtd: dataflash: Replace C99 types with their kernel
 counterparts

No functional change intended.

Cc: cphealy@gmail.com
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Cc: linux-kernel@vger.kernel.org
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/mtd_dataflash.c | 40 ++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index f9e9bd1cfaa03..a566231a9c796 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -84,7 +84,7 @@
 
 
 struct dataflash {
-	uint8_t			command[4];
+	u8			command[4];
 	char			name[24];
 
 	unsigned short		page_offset;	/* offset in flash address */
@@ -153,8 +153,8 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 	struct spi_transfer	x = { };
 	struct spi_message	msg;
 	unsigned		blocksize = priv->page_size << 3;
-	uint8_t			*command;
-	uint32_t		rem;
+	u8			*command;
+	u32			rem;
 
 	pr_debug("%s: erase addr=0x%llx len 0x%llx\n",
 	      dev_name(&spi->dev), (long long)instr->addr,
@@ -187,8 +187,8 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 		pageaddr = pageaddr << priv->page_offset;
 
 		command[0] = do_block ? OP_ERASE_BLOCK : OP_ERASE_PAGE;
-		command[1] = (uint8_t)(pageaddr >> 16);
-		command[2] = (uint8_t)(pageaddr >> 8);
+		command[1] = (u8)(pageaddr >> 16);
+		command[2] = (u8)(pageaddr >> 8);
 		command[3] = 0;
 
 		pr_debug("ERASE %s: (%x) %x %x %x [%i]\n",
@@ -239,7 +239,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 	struct spi_transfer	x[2] = { };
 	struct spi_message	msg;
 	unsigned int		addr;
-	uint8_t			*command;
+	u8			*command;
 	int			status;
 
 	pr_debug("%s: read 0x%x..0x%x\n", dev_name(&priv->spi->dev),
@@ -271,9 +271,9 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 	 * fewer "don't care" bytes.  Both buffers stay unchanged.
 	 */
 	command[0] = OP_READ_CONTINUOUS;
-	command[1] = (uint8_t)(addr >> 16);
-	command[2] = (uint8_t)(addr >> 8);
-	command[3] = (uint8_t)(addr >> 0);
+	command[1] = (u8)(addr >> 16);
+	command[2] = (u8)(addr >> 8);
+	command[3] = (u8)(addr >> 0);
 	/* plus 4 "don't care" bytes */
 
 	status = spi_sync(priv->spi, &msg);
@@ -308,7 +308,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 	size_t			remaining = len;
 	u_char			*writebuf = (u_char *) buf;
 	int			status = -EINVAL;
-	uint8_t			*command;
+	u8			*command;
 
 	pr_debug("%s: write 0x%x..0x%x\n",
 		dev_name(&spi->dev), (unsigned)to, (unsigned)(to + len));
@@ -455,11 +455,11 @@ static int dataflash_get_otp_info(struct mtd_info *mtd, size_t len,
 }
 
 static ssize_t otp_read(struct spi_device *spi, unsigned base,
-		uint8_t *buf, loff_t off, size_t len)
+		u8 *buf, loff_t off, size_t len)
 {
 	struct spi_message	m;
 	size_t			l;
-	uint8_t			*scratch;
+	u8			*scratch;
 	struct spi_transfer	t;
 	int			status;
 
@@ -538,7 +538,7 @@ static int dataflash_write_user_otp(struct mtd_info *mtd,
 {
 	struct spi_message	m;
 	const size_t		l = 4 + 64;
-	uint8_t			*scratch;
+	u8			*scratch;
 	struct spi_transfer	t;
 	struct dataflash	*priv = mtd->priv;
 	int			status;
@@ -689,14 +689,14 @@ struct flash_info {
 	/* JEDEC id has a high byte of zero plus three data bytes:
 	 * the manufacturer id, then a two byte device id.
 	 */
-	uint32_t	jedec_id;
+	u32		jedec_id;
 
 	/* The size listed here is what works with OP_ERASE_PAGE. */
 	unsigned	nr_pages;
-	uint16_t	pagesize;
-	uint16_t	pageoffset;
+	u16		pagesize;
+	u16		pageoffset;
 
-	uint16_t	flags;
+	u16		flags;
 #define SUP_POW2PS	0x0002		/* supports 2^N byte pages */
 #define IS_POW2PS	0x0001		/* uses 2^N byte pages */
 };
@@ -739,9 +739,9 @@ static struct flash_info dataflash_data[] = {
 static struct flash_info *jedec_probe(struct spi_device *spi)
 {
 	int			tmp;
-	uint8_t			code = OP_READ_ID;
-	uint8_t			id[3];
-	uint32_t		jedec;
+	u8			code = OP_READ_ID;
+	u8			id[3];
+	u32			jedec;
 	struct flash_info	*info;
 	int status;
 
-- 
GitLab


From 41c9c6621afa22c86fe74cf07536fd21c7719ca6 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Fri, 21 Apr 2017 09:30:22 -0700
Subject: [PATCH 0006/1958] mtd: dataflash: Improve coding style in
 jedec_probe()

Change the following:

   - Replace indentation between type and name of local variable from
     tabs to spaces

   - Replace magic number 0x1F with CFI_MFR_ATMEL macro

   - Replace variable 'tmp' with 'ret' and 'i' where appropriate

   - Reformat multi-line comments and add newlines where appropriate

No functional change intended.

Cc: cphealy@gmail.com
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: linux-kernel@vger.kernel.org
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/mtd_dataflash.c | 31 ++++++++++++++++-------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index a566231a9c796..5b7a8c36e4f7f 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -82,6 +82,7 @@
 #define OP_WRITE_SECURITY_REVC	0x9A
 #define OP_WRITE_SECURITY	0x9B	/* revision D */
 
+#define CFI_MFR_ATMEL		0x1F
 
 struct dataflash {
 	u8			command[4];
@@ -738,14 +739,15 @@ static struct flash_info dataflash_data[] = {
 
 static struct flash_info *jedec_probe(struct spi_device *spi)
 {
-	int			tmp;
-	u8			code = OP_READ_ID;
-	u8			id[3];
-	u32			jedec;
-	struct flash_info	*info;
+	int ret, i;
+	u8 code = OP_READ_ID;
+	u8 id[3];
+	u32 jedec;
+	struct flash_info *info;
 	int status;
 
-	/* JEDEC also defines an optional "extended device information"
+	/*
+	 * JEDEC also defines an optional "extended device information"
 	 * string for after vendor-specific data, after the three bytes
 	 * we use here.  Supporting some chips might require using it.
 	 *
@@ -753,13 +755,14 @@ static struct flash_info *jedec_probe(struct spi_device *spi)
 	 * That's not an error; only rev C and newer chips handle it, and
 	 * only Atmel sells these chips.
 	 */
-	tmp = spi_write_then_read(spi, &code, 1, id, 3);
-	if (tmp < 0) {
+	ret = spi_write_then_read(spi, &code, 1, id, 3);
+	if (ret < 0) {
 		pr_debug("%s: error %d reading JEDEC ID\n",
-			dev_name(&spi->dev), tmp);
-		return ERR_PTR(tmp);
+			dev_name(&spi->dev), ret);
+		return ERR_PTR(ret);
 	}
-	if (id[0] != 0x1f)
+
+	if (id[0] != CFI_MFR_ATMEL)
 		return NULL;
 
 	jedec = id[0];
@@ -768,9 +771,9 @@ static struct flash_info *jedec_probe(struct spi_device *spi)
 	jedec = jedec << 8;
 	jedec |= id[2];
 
-	for (tmp = 0, info = dataflash_data;
-			tmp < ARRAY_SIZE(dataflash_data);
-			tmp++, info++) {
+	for (i = 0, info = dataflash_data;
+			i < ARRAY_SIZE(dataflash_data);
+			i++, info++) {
 		if (info->jedec_id == jedec) {
 			pr_debug("%s: OTP, sector protect%s\n",
 				dev_name(&spi->dev),
-- 
GitLab


From 02f62864f6cebbbbff6bb611fddf78c1d05a9747 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Fri, 21 Apr 2017 09:30:23 -0700
Subject: [PATCH 0007/1958] mtd: dataflash: Replace pr_debug, printk with dev_*
 functions

Lion's share of calls to pr_debug in this driver follow the pattern of
pr_debug("%s <message>", dev_name(<dev>), <arguments>), which should
be semantically identical to dev_dbg(<dev>, "<message>", <arguments>),
so replace such occurencies to simplify the code.

Convert the small minority of pr_debug that do not follow pattern from
above to use dev_dbg as well, for the sake of consistency.

Convert similar patter of printk(KERN_ERR, "%s: ...", dev_name(...),
...) to use dev_err instead.

No functional change intended.

Cc: cphealy@gmail.com
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/mtd_dataflash.c | 74 +++++++++++++----------------
 1 file changed, 33 insertions(+), 41 deletions(-)

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 5b7a8c36e4f7f..ccd1e024d343b 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -130,8 +130,7 @@ static int dataflash_waitready(struct spi_device *spi)
 	for (;;) {
 		status = dataflash_status(spi);
 		if (status < 0) {
-			pr_debug("%s: status %d?\n",
-					dev_name(&spi->dev), status);
+			dev_dbg(&spi->dev, "status %d?\n", status);
 			status = 0;
 		}
 
@@ -157,9 +156,8 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 	u8			*command;
 	u32			rem;
 
-	pr_debug("%s: erase addr=0x%llx len 0x%llx\n",
-	      dev_name(&spi->dev), (long long)instr->addr,
-	      (long long)instr->len);
+	dev_dbg(&spi->dev, "erase addr=0x%llx len 0x%llx\n",
+		(long long)instr->addr, (long long)instr->len);
 
 	div_u64_rem(instr->len, priv->page_size, &rem);
 	if (rem)
@@ -192,7 +190,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 		command[2] = (u8)(pageaddr >> 8);
 		command[3] = 0;
 
-		pr_debug("ERASE %s: (%x) %x %x %x [%i]\n",
+		dev_dbg(&spi->dev, "ERASE %s: (%x) %x %x %x [%i]\n",
 			do_block ? "block" : "page",
 			command[0], command[1], command[2], command[3],
 			pageaddr);
@@ -201,8 +199,8 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 		(void) dataflash_waitready(spi);
 
 		if (status < 0) {
-			printk(KERN_ERR "%s: erase %x, err %d\n",
-				dev_name(&spi->dev), pageaddr, status);
+			dev_err(&spi->dev, "erase %x, err %d\n",
+				pageaddr, status);
 			/* REVISIT:  can retry instr->retries times; or
 			 * giveup and instr->fail_addr = instr->addr;
 			 */
@@ -243,8 +241,8 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 	u8			*command;
 	int			status;
 
-	pr_debug("%s: read 0x%x..0x%x\n", dev_name(&priv->spi->dev),
-			(unsigned)from, (unsigned)(from + len));
+	dev_dbg(&priv->spi->dev, "read 0x%x..0x%x\n",
+		  (unsigned int)from, (unsigned int)(from + len));
 
 	/* Calculate flash page/byte address */
 	addr = (((unsigned)from / priv->page_size) << priv->page_offset)
@@ -252,7 +250,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 
 	command = priv->command;
 
-	pr_debug("READ: (%x) %x %x %x\n",
+	dev_dbg(&priv->spi->dev, "READ: (%x) %x %x %x\n",
 		command[0], command[1], command[2], command[3]);
 
 	spi_message_init(&msg);
@@ -284,8 +282,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 		*retlen = msg.actual_length - 8;
 		status = 0;
 	} else
-		pr_debug("%s: read %x..%x --> %d\n",
-			dev_name(&priv->spi->dev),
+		dev_dbg(&priv->spi->dev, "read %x..%x --> %d\n",
 			(unsigned)from, (unsigned)(from + len),
 			status);
 	return status;
@@ -311,8 +308,8 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 	int			status = -EINVAL;
 	u8			*command;
 
-	pr_debug("%s: write 0x%x..0x%x\n",
-		dev_name(&spi->dev), (unsigned)to, (unsigned)(to + len));
+	dev_dbg(&spi->dev, "write 0x%x..0x%x\n",
+		(unsigned int)to, (unsigned int)(to + len));
 
 	spi_message_init(&msg);
 
@@ -329,7 +326,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 	mutex_lock(&priv->lock);
 	while (remaining > 0) {
-		pr_debug("write @ %i:%i len=%i\n",
+		dev_dbg(&spi->dev, "write @ %i:%i len=%i\n",
 			pageaddr, offset, writelen);
 
 		/* REVISIT:
@@ -357,13 +354,13 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 			command[2] = (addr & 0x0000FF00) >> 8;
 			command[3] = 0;
 
-			pr_debug("TRANSFER: (%x) %x %x %x\n",
+			dev_dbg(&spi->dev, "TRANSFER: (%x) %x %x %x\n",
 				command[0], command[1], command[2], command[3]);
 
 			status = spi_sync(spi, &msg);
 			if (status < 0)
-				pr_debug("%s: xfer %u -> %d\n",
-					dev_name(&spi->dev), addr, status);
+				dev_dbg(&spi->dev, "xfer %u -> %d\n",
+					addr, status);
 
 			(void) dataflash_waitready(priv->spi);
 		}
@@ -375,7 +372,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		command[2] = (addr & 0x0000FF00) >> 8;
 		command[3] = (addr & 0x000000FF);
 
-		pr_debug("PROGRAM: (%x) %x %x %x\n",
+		dev_dbg(&spi->dev, "PROGRAM: (%x) %x %x %x\n",
 			command[0], command[1], command[2], command[3]);
 
 		x[1].tx_buf = writebuf;
@@ -384,8 +381,8 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		status = spi_sync(spi, &msg);
 		spi_transfer_del(x + 1);
 		if (status < 0)
-			pr_debug("%s: pgm %u/%u -> %d\n",
-				dev_name(&spi->dev), addr, writelen, status);
+			dev_dbg(&spi->dev, "pgm %u/%u -> %d\n",
+				addr, writelen, status);
 
 		(void) dataflash_waitready(priv->spi);
 
@@ -399,20 +396,20 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		command[2] = (addr & 0x0000FF00) >> 8;
 		command[3] = 0;
 
-		pr_debug("COMPARE: (%x) %x %x %x\n",
+		dev_dbg(&spi->dev, "COMPARE: (%x) %x %x %x\n",
 			command[0], command[1], command[2], command[3]);
 
 		status = spi_sync(spi, &msg);
 		if (status < 0)
-			pr_debug("%s: compare %u -> %d\n",
-				dev_name(&spi->dev), addr, status);
+			dev_dbg(&spi->dev, "compare %u -> %d\n",
+				addr, status);
 
 		status = dataflash_waitready(priv->spi);
 
 		/* Check result of the compare operation */
 		if (status & (1 << 6)) {
-			printk(KERN_ERR "%s: compare page %u, err %d\n",
-				dev_name(&spi->dev), pageaddr, status);
+			dev_err(&spi->dev, "compare page %u, err %d\n",
+				pageaddr, status);
 			remaining = 0;
 			status = -EIO;
 			break;
@@ -757,8 +754,7 @@ static struct flash_info *jedec_probe(struct spi_device *spi)
 	 */
 	ret = spi_write_then_read(spi, &code, 1, id, 3);
 	if (ret < 0) {
-		pr_debug("%s: error %d reading JEDEC ID\n",
-			dev_name(&spi->dev), ret);
+		dev_dbg(&spi->dev, "error %d reading JEDEC ID\n", ret);
 		return ERR_PTR(ret);
 	}
 
@@ -775,16 +771,14 @@ static struct flash_info *jedec_probe(struct spi_device *spi)
 			i < ARRAY_SIZE(dataflash_data);
 			i++, info++) {
 		if (info->jedec_id == jedec) {
-			pr_debug("%s: OTP, sector protect%s\n",
-				dev_name(&spi->dev),
-				(info->flags & SUP_POW2PS)
-					? ", binary pagesize" : ""
-				);
+			dev_dbg(&spi->dev, "OTP, sector protect%s\n",
+				(info->flags & SUP_POW2PS) ?
+				", binary pagesize" : "");
 			if (info->flags & SUP_POW2PS) {
 				status = dataflash_status(spi);
 				if (status < 0) {
-					pr_debug("%s: status error %d\n",
-						dev_name(&spi->dev), status);
+					dev_dbg(&spi->dev, "status error %d\n",
+						status);
 					return ERR_PTR(status);
 				}
 				if (status & 0x1) {
@@ -848,8 +842,7 @@ static int dataflash_probe(struct spi_device *spi)
 	 */
 	status = dataflash_status(spi);
 	if (status <= 0 || status == 0xff) {
-		pr_debug("%s: status error %d\n",
-				dev_name(&spi->dev), status);
+		dev_dbg(&spi->dev, "status error %d\n", status);
 		if (status == 0 || status == 0xff)
 			status = -ENODEV;
 		return status;
@@ -890,8 +883,7 @@ static int dataflash_probe(struct spi_device *spi)
 	}
 
 	if (status < 0)
-		pr_debug("%s: add_dataflash --> %d\n", dev_name(&spi->dev),
-				status);
+		dev_dbg(&spi->dev, "add_dataflash --> %d\n", status);
 
 	return status;
 }
@@ -901,7 +893,7 @@ static int dataflash_remove(struct spi_device *spi)
 	struct dataflash	*flash = spi_get_drvdata(spi);
 	int			status;
 
-	pr_debug("%s: remove\n", dev_name(&spi->dev));
+	dev_dbg(&spi->dev, "remove\n");
 
 	status = mtd_device_unregister(&flash->mtd);
 	if (status == 0)
-- 
GitLab


From a296a1bc3eb54382d2a61d47529e71c9d3bc615e Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Fri, 21 Apr 2017 09:30:24 -0700
Subject: [PATCH 0008/1958] mtd: dataflash: Get rid of loop counter in
 jedec_probe()

"For" loop in jedec_probe can be simplified to not need counter
'i'. Convert the code and get rid of the variable.

Cc: cphealy@gmail.com
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/mtd_dataflash.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index ccd1e024d343b..2d3e4034b5914 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -736,7 +736,7 @@ static struct flash_info dataflash_data[] = {
 
 static struct flash_info *jedec_probe(struct spi_device *spi)
 {
-	int ret, i;
+	int ret;
 	u8 code = OP_READ_ID;
 	u8 id[3];
 	u32 jedec;
@@ -767,9 +767,9 @@ static struct flash_info *jedec_probe(struct spi_device *spi)
 	jedec = jedec << 8;
 	jedec |= id[2];
 
-	for (i = 0, info = dataflash_data;
-			i < ARRAY_SIZE(dataflash_data);
-			i++, info++) {
+	for (info = dataflash_data;
+	     info < dataflash_data + ARRAY_SIZE(dataflash_data);
+	     info++) {
 		if (info->jedec_id == jedec) {
 			dev_dbg(&spi->dev, "OTP, sector protect%s\n",
 				(info->flags & SUP_POW2PS) ?
-- 
GitLab


From 1da8869a428317a6d3cd8d47184cf87feb34a98b Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Fri, 21 Apr 2017 09:30:25 -0700
Subject: [PATCH 0009/1958] mtd: dataflash: Make use of "extened device
 information"

In anticipation of supporting chips that need it, extend the size of
struct flash_info's 'jedec_id' field to make room 2 byte of extended
device information as well as add code to fetch this data during
jedec_probe().

Cc: cphealy@gmail.com
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Cc: linux-kernel@vger.kernel.org
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/mtd_dataflash.c | 88 +++++++++++++++++++----------
 1 file changed, 57 insertions(+), 31 deletions(-)

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 2d3e4034b5914..3f1a0fbe5a583 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -84,6 +84,9 @@
 
 #define CFI_MFR_ATMEL		0x1F
 
+#define DATAFLASH_SHIFT_EXTID	24
+#define DATAFLASH_SHIFT_ID	40
+
 struct dataflash {
 	u8			command[4];
 	char			name[24];
@@ -687,7 +690,7 @@ struct flash_info {
 	/* JEDEC id has a high byte of zero plus three data bytes:
 	 * the manufacturer id, then a two byte device id.
 	 */
-	u32		jedec_id;
+	u64		jedec_id;
 
 	/* The size listed here is what works with OP_ERASE_PAGE. */
 	unsigned	nr_pages;
@@ -695,6 +698,7 @@ struct flash_info {
 	u16		pageoffset;
 
 	u16		flags;
+#define SUP_EXTID	0x0004		/* supports extended ID data */
 #define SUP_POW2PS	0x0002		/* supports 2^N byte pages */
 #define IS_POW2PS	0x0001		/* uses 2^N byte pages */
 };
@@ -734,42 +738,18 @@ static struct flash_info dataflash_data[] = {
 	{ "at45db642d",  0x1f2800, 8192, 1024, 10, SUP_POW2PS | IS_POW2PS},
 };
 
-static struct flash_info *jedec_probe(struct spi_device *spi)
+static struct flash_info *jedec_lookup(struct spi_device *spi,
+				       u64 jedec, bool use_extid)
 {
-	int ret;
-	u8 code = OP_READ_ID;
-	u8 id[3];
-	u32 jedec;
 	struct flash_info *info;
 	int status;
 
-	/*
-	 * JEDEC also defines an optional "extended device information"
-	 * string for after vendor-specific data, after the three bytes
-	 * we use here.  Supporting some chips might require using it.
-	 *
-	 * If the vendor ID isn't Atmel's (0x1f), assume this call failed.
-	 * That's not an error; only rev C and newer chips handle it, and
-	 * only Atmel sells these chips.
-	 */
-	ret = spi_write_then_read(spi, &code, 1, id, 3);
-	if (ret < 0) {
-		dev_dbg(&spi->dev, "error %d reading JEDEC ID\n", ret);
-		return ERR_PTR(ret);
-	}
-
-	if (id[0] != CFI_MFR_ATMEL)
-		return NULL;
-
-	jedec = id[0];
-	jedec = jedec << 8;
-	jedec |= id[1];
-	jedec = jedec << 8;
-	jedec |= id[2];
-
 	for (info = dataflash_data;
 	     info < dataflash_data + ARRAY_SIZE(dataflash_data);
 	     info++) {
+		if (use_extid && !(info->flags & SUP_EXTID))
+			continue;
+
 		if (info->jedec_id == jedec) {
 			dev_dbg(&spi->dev, "OTP, sector protect%s\n",
 				(info->flags & SUP_POW2PS) ?
@@ -793,12 +773,58 @@ static struct flash_info *jedec_probe(struct spi_device *spi)
 		}
 	}
 
+	return ERR_PTR(-ENODEV);
+}
+
+static struct flash_info *jedec_probe(struct spi_device *spi)
+{
+	int ret;
+	u8 code = OP_READ_ID;
+	u64 jedec;
+	u8 id[sizeof(jedec)] = {0};
+	const unsigned int id_size = 5;
+	struct flash_info *info;
+
+	/*
+	 * JEDEC also defines an optional "extended device information"
+	 * string for after vendor-specific data, after the three bytes
+	 * we use here.  Supporting some chips might require using it.
+	 *
+	 * If the vendor ID isn't Atmel's (0x1f), assume this call failed.
+	 * That's not an error; only rev C and newer chips handle it, and
+	 * only Atmel sells these chips.
+	 */
+	ret = spi_write_then_read(spi, &code, 1, id, id_size);
+	if (ret < 0) {
+		dev_dbg(&spi->dev, "error %d reading JEDEC ID\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	if (id[0] != CFI_MFR_ATMEL)
+		return NULL;
+
+	jedec = be64_to_cpup((__be64 *)id);
+
+	/*
+	 * First, try to match device using extended device
+	 * information
+	 */
+	info = jedec_lookup(spi, jedec >> DATAFLASH_SHIFT_EXTID, true);
+	if (!IS_ERR(info))
+		return info;
+	/*
+	 * If that fails, make another pass using regular ID
+	 * information
+	 */
+	info = jedec_lookup(spi, jedec >> DATAFLASH_SHIFT_ID, false);
+	if (!IS_ERR(info))
+		return info;
 	/*
 	 * Treat other chips as errors ... we won't know the right page
 	 * size (it might be binary) even when we can tell which density
 	 * class is involved (legacy chip id scheme).
 	 */
-	dev_warn(&spi->dev, "JEDEC id %06x not handled\n", jedec);
+	dev_warn(&spi->dev, "JEDEC id %016llx not handled\n", jedec);
 	return ERR_PTR(-ENODEV);
 }
 
-- 
GitLab


From 67e4145ebf2c161d7404770461b8404f00a6d3bf Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Fri, 21 Apr 2017 09:30:26 -0700
Subject: [PATCH 0010/1958] mtd: dataflash: Add flash_info for AT45DB641E

Cc: cphealy@gmail.com
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Cc: linux-kernel@vger.kernel.org
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/mtd_dataflash.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 3f1a0fbe5a583..5dc8bd042cc54 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -736,6 +736,9 @@ static struct flash_info dataflash_data[] = {
 
 	{ "AT45DB642x",  0x1f2800, 8192, 1056, 11, SUP_POW2PS},
 	{ "at45db642d",  0x1f2800, 8192, 1024, 10, SUP_POW2PS | IS_POW2PS},
+
+	{ "AT45DB641E",  0x1f28000100, 32768, 264, 9, SUP_EXTID | SUP_POW2PS},
+	{ "at45db641e",  0x1f28000100, 32768, 256, 8, SUP_EXTID | SUP_POW2PS | IS_POW2PS},
 };
 
 static struct flash_info *jedec_lookup(struct spi_device *spi,
-- 
GitLab


From 5dc17fa6fb7063c6bc8ea9f3183ec19ca68bbfd6 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Apr 2017 17:34:31 +0200
Subject: [PATCH 0011/1958] mtd: mchp23k256: Add driver for this SPI SRAM
 device

The Microchip 23k256 is a 32K Byte SRAM connected via SPI.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
[Brian: fixed copyright to be in this millenium]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/Kconfig      |  10 ++
 drivers/mtd/devices/Makefile     |   1 +
 drivers/mtd/devices/mchp23k256.c | 182 +++++++++++++++++++++++++++++++
 3 files changed, 193 insertions(+)
 create mode 100644 drivers/mtd/devices/mchp23k256.c

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 58329d2dacd1f..6def5445e03e1 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -95,6 +95,16 @@ config MTD_M25P80
 	  if you want to specify device partitioning or to use a device which
 	  doesn't support the JEDEC ID instruction.
 
+config MTD_MCHP23K256
+	tristate "Microchip 23K256 SRAM"
+	depends on SPI_MASTER
+	help
+	  This enables access to Microchip 23K256 SRAM chips, using SPI.
+
+	  Set up your spi devices with the right board-specific
+	  platform data, or a device tree description if you want to
+	  specify device partitioning
+
 config MTD_SPEAR_SMI
 	tristate "SPEAR MTD NOR Support through SMI controller"
 	depends on PLAT_SPEAR
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 7912d3a0ee343..f0f767624cc68 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_MTD_LART)		+= lart.o
 obj-$(CONFIG_MTD_BLOCK2MTD)	+= block2mtd.o
 obj-$(CONFIG_MTD_DATAFLASH)	+= mtd_dataflash.o
 obj-$(CONFIG_MTD_M25P80)	+= m25p80.o
+obj-$(CONFIG_MTD_MCHP23K256)	+= mchp23k256.o
 obj-$(CONFIG_MTD_SPEAR_SMI)	+= spear_smi.o
 obj-$(CONFIG_MTD_SST25L)	+= sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)	+= bcm47xxsflash.o
diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
new file mode 100644
index 0000000000000..e237db9f1bdbd
--- /dev/null
+++ b/drivers/mtd/devices/mchp23k256.c
@@ -0,0 +1,182 @@
+/*
+ * mchp23k256.c
+ *
+ * Driver for Microchip 23k256 SPI RAM chips
+ *
+ * Copyright © 2016 Andrew Lunn <andrew@lunn.ch>
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/spi/flash.h>
+#include <linux/spi/spi.h>
+
+struct mchp23k256_flash {
+	struct spi_device	*spi;
+	struct mutex		lock;
+	struct mtd_info		mtd;
+};
+
+#define MCHP23K256_CMD_WRITE_STATUS	0x01
+#define MCHP23K256_CMD_WRITE		0x02
+#define MCHP23K256_CMD_READ		0x03
+#define MCHP23K256_MODE_SEQ		BIT(6)
+
+#define to_mchp23k256_flash(x) container_of(x, struct mchp23k256_flash, mtd)
+
+static int mchp23k256_write(struct mtd_info *mtd, loff_t to, size_t len,
+			    size_t *retlen, const unsigned char *buf)
+{
+	struct mchp23k256_flash *flash = to_mchp23k256_flash(mtd);
+	struct spi_transfer transfer[2] = {};
+	struct spi_message message;
+	unsigned char command[3];
+
+	spi_message_init(&message);
+
+	command[0] = MCHP23K256_CMD_WRITE;
+	command[1] = to >> 8;
+	command[2] = to;
+
+	transfer[0].tx_buf = command;
+	transfer[0].len = sizeof(command);
+	spi_message_add_tail(&transfer[0], &message);
+
+	transfer[1].tx_buf = buf;
+	transfer[1].len = len;
+	spi_message_add_tail(&transfer[1], &message);
+
+	mutex_lock(&flash->lock);
+
+	spi_sync(flash->spi, &message);
+
+	if (retlen && message.actual_length > sizeof(command))
+		*retlen += message.actual_length - sizeof(command);
+
+	mutex_unlock(&flash->lock);
+	return 0;
+}
+
+static int mchp23k256_read(struct mtd_info *mtd, loff_t from, size_t len,
+			   size_t *retlen, unsigned char *buf)
+{
+	struct mchp23k256_flash *flash = to_mchp23k256_flash(mtd);
+	struct spi_transfer transfer[2] = {};
+	struct spi_message message;
+	unsigned char command[3];
+
+	spi_message_init(&message);
+
+	memset(&transfer, 0, sizeof(transfer));
+	command[0] = MCHP23K256_CMD_READ;
+	command[1] = from >> 8;
+	command[2] = from;
+
+	transfer[0].tx_buf = command;
+	transfer[0].len = sizeof(command);
+	spi_message_add_tail(&transfer[0], &message);
+
+	transfer[1].rx_buf = buf;
+	transfer[1].len = len;
+	spi_message_add_tail(&transfer[1], &message);
+
+	mutex_lock(&flash->lock);
+
+	spi_sync(flash->spi, &message);
+
+	if (retlen && message.actual_length > sizeof(command))
+		*retlen += message.actual_length - sizeof(command);
+
+	mutex_unlock(&flash->lock);
+	return 0;
+}
+
+/*
+ * Set the device into sequential mode. This allows read/writes to the
+ * entire SRAM in a single operation
+ */
+static int mchp23k256_set_mode(struct spi_device *spi)
+{
+	struct spi_transfer transfer = {};
+	struct spi_message message;
+	unsigned char command[2];
+
+	spi_message_init(&message);
+
+	command[0] = MCHP23K256_CMD_WRITE_STATUS;
+	command[1] = MCHP23K256_MODE_SEQ;
+
+	transfer.tx_buf = command;
+	transfer.len = sizeof(command);
+	spi_message_add_tail(&transfer, &message);
+
+	return spi_sync(spi, &message);
+}
+
+static int mchp23k256_probe(struct spi_device *spi)
+{
+	struct mchp23k256_flash *flash;
+	struct flash_platform_data *data;
+	int err;
+
+	flash = devm_kzalloc(&spi->dev, sizeof(*flash), GFP_KERNEL);
+	if (!flash)
+		return -ENOMEM;
+
+	flash->spi = spi;
+	mutex_init(&flash->lock);
+	spi_set_drvdata(spi, flash);
+
+	err = mchp23k256_set_mode(spi);
+	if (err)
+		return err;
+
+	data = dev_get_platdata(&spi->dev);
+
+	flash->mtd.dev.parent	= &spi->dev;
+	flash->mtd.type		= MTD_RAM;
+	flash->mtd.flags	= MTD_CAP_RAM;
+	flash->mtd.writesize	= 1;
+	flash->mtd.size		= SZ_32K;
+	flash->mtd._read	= mchp23k256_read;
+	flash->mtd._write	= mchp23k256_write;
+
+	err = mtd_device_parse_register(&flash->mtd, NULL, NULL,
+					data ? data->parts : NULL,
+					data ? data->nr_parts : 0);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int mchp23k256_remove(struct spi_device *spi)
+{
+	struct mchp23k256_flash *flash = spi_get_drvdata(spi);
+
+	return mtd_device_unregister(&flash->mtd);
+}
+
+static struct spi_driver mchp23k256_driver = {
+	.driver = {
+		.name	= "mchp23k256",
+	},
+	.probe		= mchp23k256_probe,
+	.remove		= mchp23k256_remove,
+};
+
+module_spi_driver(mchp23k256_driver);
+
+MODULE_DESCRIPTION("MTD SPI driver for MCHP23K256 RAM chips");
+MODULE_AUTHOR("Andrew Lunn <andre@lunn.ch>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:mchp23k256");
-- 
GitLab


From a63be500c6663a98cedda15c1bb380e9a34349e8 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Thu, 4 May 2017 22:10:46 +0200
Subject: [PATCH 0012/1958] mtd: cfi_cmdset_0020: Drop unnecessary static

Drop static on a local variable, when the variable is initialized before
any use on every possible execution path through the function.  The static
has no benefit, and dropping it reduces the code size.

The semantic patch that fixes this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@bad exists@
position p;
identifier x;
type T;
@@

static T x@p;
...
x = <+...x...+>

@@
identifier x;
expression e;
type T;
position p != bad.p;
@@

-static
 T x@p;
 ... when != x
     when strict
?x = e;
// </smpl>

The change in code size is indicates by the following output from the size
command.

before:
   text    data     bss     dec     hex filename
  16671      48      16   16735    415f drivers/mtd/chips/cfi_cmdset_0020.o

after:
   text    data     bss     dec     hex filename
  16639      48       8   16695    4137 drivers/mtd/chips/cfi_cmdset_0020.o

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/chips/cfi_cmdset_0020.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 94d3eb42c4d5f..7d342965f3922 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -666,7 +666,7 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	size_t	 totlen = 0, thislen;
 	int	 ret = 0;
 	size_t	 buflen = 0;
-	static char *buffer;
+	char *buffer;
 
 	if (!ECCBUF_SIZE) {
 		/* We should fall back to a general writev implementation.
-- 
GitLab


From 6c51a52eeb58befd2e9be2ed7ee2c4c04139b336 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Thu, 4 May 2017 22:10:47 +0200
Subject: [PATCH 0013/1958] mtd: physmap_of: Drop unnecessary static

Drop static on a local variable, when the variable is initialized before
any use on every possible execution path through the function.  The static
has no benefit, and dropping it reduces the code size.

The semantic patch that fixes this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@bad exists@
position p;
identifier x;
type T;
@@

static T x@p;
...
x = <+...x...+>

@@
identifier x;
expression e;
type T;
position p != bad.p;
@@

-static
 T x@p;
 ... when != x
     when strict
?x = e;
// </smpl>

The change in code size is indicates by the following output from the size
command.

before:
   text    data     bss     dec     hex filename
    835      80       8     923     39b drivers/mtd/maps/physmap_of_gemini.o

after:
   text    data     bss     dec     hex filename
    823      80       0     903     387 drivers/mtd/maps/physmap_of_gemini.o

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/maps/physmap_of_gemini.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/maps/physmap_of_gemini.c b/drivers/mtd/maps/physmap_of_gemini.c
index 9d371cd728ea1..05b286b5289f5 100644
--- a/drivers/mtd/maps/physmap_of_gemini.c
+++ b/drivers/mtd/maps/physmap_of_gemini.c
@@ -59,7 +59,7 @@ int of_flash_probe_gemini(struct platform_device *pdev,
 			  struct device_node *np,
 			  struct map_info *map)
 {
-	static struct regmap *rmap;
+	struct regmap *rmap;
 	struct device *dev = &pdev->dev;
 	u32 val;
 	int ret;
-- 
GitLab


From 7699f1e358ec60958a153e365901cb1e7ca62e93 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Date: Tue, 2 May 2017 16:49:51 -0300
Subject: [PATCH 0014/1958] mtd: nand: Add Hisilicon machine dependency

The Hisilicon NAND driver is only needed for a specific
platform, so avoid cluttering the configuration.

Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index c3029528063b8..effe767fc3471 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -542,6 +542,7 @@ config MTD_NAND_SUNXI
 
 config MTD_NAND_HISI504
 	tristate "Support for NAND controller on Hisilicon SoC Hip04"
+	depends on ARCH_HISI || COMPILE_TEST
 	depends on HAS_DMA
 	help
 	  Enables support for NAND controller on Hisilicon SoC Hip04.
-- 
GitLab


From 6f9ad5f360e865cba263109cb186cb2e16dbdc8d Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Date: Tue, 2 May 2017 16:49:52 -0300
Subject: [PATCH 0015/1958] mtd: nand: Add Mediatek machine dependency

The Mediatek NAND driver is only needed for a specific
platform, so avoid cluttering the configuration.

Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index effe767fc3471..0bd2319d3035c 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -556,6 +556,7 @@ config MTD_NAND_QCOM
 
 config MTD_NAND_MTK
 	tristate "Support for NAND controller on MTK SoCs"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
 	depends on HAS_DMA
 	help
 	  Enables support for NAND controller on MTK SoCs.
-- 
GitLab


From 0b2f93dc0099e3b8a739b8918eeb995e12520940 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 2 May 2017 12:29:13 +0200
Subject: [PATCH 0016/1958] mtd: nand: jz4780: Use mtd_set_ooblayout() to set
 the ooblayout

The mtd_set_ooblayout() accesor has been added to hide internals of
mtd_info and ease future refactoring. Call mtd_set_ooblayout() instead of
directly accessing mtd->ooblayout.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Harvey Hunt <harveyhuntnexus@gmail.com>
---
 drivers/mtd/nand/jz4780_nand.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/jz4780_nand.c b/drivers/mtd/nand/jz4780_nand.c
index a39bb70175eea..8bc835f71b266 100644
--- a/drivers/mtd/nand/jz4780_nand.c
+++ b/drivers/mtd/nand/jz4780_nand.c
@@ -205,7 +205,7 @@ static int jz4780_nand_init_ecc(struct jz4780_nand_chip *nand, struct device *de
 		return -EINVAL;
 	}
 
-	mtd->ooblayout = &nand_ooblayout_lp_ops;
+	mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
 
 	return 0;
 }
-- 
GitLab


From 19d8ccc42b148d75284a3809f1eb1eba13a81677 Mon Sep 17 00:00:00 2001
From: Alexander Couzens <lynxis@fe80.eu>
Date: Tue, 2 May 2017 11:47:36 +0200
Subject: [PATCH 0017/1958] mtd: nand: davinci: set ECC algorithm explicitly
 for HW based ECC

If ECC strength is 4bits/512bytes the algorithm of the ECC engine is
BCH, otherwise (1bit/512bytes) Hamming is used.

Signed-off-by: Alexander Couzens <lynxis@fe80.eu>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/davinci_nand.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 531c51991e574..7b26e53b95b11 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -771,11 +771,14 @@ static int nand_davinci_probe(struct platform_device *pdev)
 			info->chip.ecc.hwctl = nand_davinci_hwctl_4bit;
 			info->chip.ecc.bytes = 10;
 			info->chip.ecc.options = NAND_ECC_GENERIC_ERASED_CHECK;
+			info->chip.ecc.algo = NAND_ECC_BCH;
 		} else {
+			/* 1bit ecc hamming */
 			info->chip.ecc.calculate = nand_davinci_calculate_1bit;
 			info->chip.ecc.correct = nand_davinci_correct_1bit;
 			info->chip.ecc.hwctl = nand_davinci_hwctl_1bit;
 			info->chip.ecc.bytes = 3;
+			info->chip.ecc.algo = NAND_ECC_HAMMING;
 		}
 		info->chip.ecc.size = 512;
 		info->chip.ecc.strength = pdata->ecc_bits;
-- 
GitLab


From 6335b509b2b6fbe4cbeaef739434b40b8018df82 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 10:52:34 +0200
Subject: [PATCH 0018/1958] mtd: nand: fsmc: reduce number of arguments of
 fsmc_nand_setup()

In preparation for the introduction of support for using SDR timings
exposed by the NAND flash instead of hard-coded timings, this commit
reworks the fsmc_nand_setup() function to take a "struct fsmc_nand_data"
as argument, which already contains the I/O registers base address, bank
and bus width information.

The timings is also currently contained in the "struct fsmc_nand_data",
but we still pass it as a separate argument because the support for
using SDR timings will pass a different value.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index cea50d2f218c1..43108ddd7bdd0 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -302,11 +302,13 @@ static void fsmc_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
  * This routine initializes timing parameters related to NAND memory access in
  * FSMC registers
  */
-static void fsmc_nand_setup(void __iomem *regs, uint32_t bank,
-			   uint32_t busw, struct fsmc_nand_timings *timings)
+static void fsmc_nand_setup(struct fsmc_nand_data *host,
+			    struct fsmc_nand_timings *timings)
 {
 	uint32_t value = FSMC_DEVTYPE_NAND | FSMC_ENABLE | FSMC_WAITON;
 	uint32_t tclr, tar, thiz, thold, twait, tset;
+	unsigned int bank = host->bank;
+	void __iomem *regs = host->regs_va;
 	struct fsmc_nand_timings *tims;
 	struct fsmc_nand_timings default_timings = {
 		.tclr	= FSMC_TCLR_1,
@@ -318,7 +320,7 @@ static void fsmc_nand_setup(void __iomem *regs, uint32_t bank,
 	};
 
 	if (timings)
-		tims = timings;
+		tims = host->dev_timings;
 	else
 		tims = &default_timings;
 
@@ -329,7 +331,7 @@ static void fsmc_nand_setup(void __iomem *regs, uint32_t bank,
 	twait = (tims->twait & FSMC_TWAIT_MASK) << FSMC_TWAIT_SHIFT;
 	tset = (tims->tset & FSMC_TSET_MASK) << FSMC_TSET_SHIFT;
 
-	if (busw)
+	if (host->nand.options & NAND_BUSWIDTH_16)
 		writel_relaxed(value | FSMC_DEVWID_16,
 				FSMC_NAND_REG(regs, bank, PC));
 	else
@@ -933,9 +935,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 		break;
 	}
 
-	fsmc_nand_setup(host->regs_va, host->bank,
-			nand->options & NAND_BUSWIDTH_16,
-			host->dev_timings);
+	fsmc_nand_setup(host, host->dev_timings);
 
 	if (AMBA_REV_BITS(host->pid) >= 8) {
 		nand->ecc.read_page = fsmc_read_page_hwecc;
@@ -1073,9 +1073,7 @@ static int fsmc_nand_resume(struct device *dev)
 	struct fsmc_nand_data *host = dev_get_drvdata(dev);
 	if (host) {
 		clk_prepare_enable(host->clk);
-		fsmc_nand_setup(host->regs_va, host->bank,
-				host->nand.options & NAND_BUSWIDTH_16,
-				host->dev_timings);
+		fsmc_nand_setup(host, host->dev_timings);
 	}
 	return 0;
 }
-- 
GitLab


From d9fb0795718333e36f7e472d7d81b7b8efe347c8 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 10:52:35 +0200
Subject: [PATCH 0019/1958] mtd: nand: fsmc: add support for SDR timings

Until now, the fsmc_nand driver was either using controller timings
specified in the Device Tree (through FSMC specific DT properties) or
alternatively default/fallback timings.

This commit implements support to use the timings advertised by the NAND
chip itself, by implementing the ->setup_data_interface() hook. To
preserve backward compatibility, if timings are specified in the Device
Tree, we use the timings from the Device Tree (and don't implement
->setup_data_interface).

Many thanks to Boris Brezillon for coming up with the logic to convert
the NAND chip timings into the timings expected by the FSMC controller.

Also, since the timings are now not only coming from the DT, the message
warning that default timings will be used is removed.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 94 ++++++++++++++++++++++++++++++++++--
 1 file changed, 89 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 43108ddd7bdd0..442e4dff05caa 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -346,6 +346,88 @@ static void fsmc_nand_setup(struct fsmc_nand_data *host,
 			FSMC_NAND_REG(regs, bank, ATTRIB));
 }
 
+static int fsmc_calc_timings(struct fsmc_nand_data *host,
+			     const struct nand_sdr_timings *sdrt,
+			     struct fsmc_nand_timings *tims)
+{
+	unsigned long hclk = clk_get_rate(host->clk);
+	unsigned long hclkn = NSEC_PER_SEC / hclk;
+	uint32_t thiz, thold, twait, tset;
+
+	if (sdrt->tRC_min < 30000)
+		return -EOPNOTSUPP;
+
+	tims->tar = DIV_ROUND_UP(sdrt->tAR_min / 1000, hclkn) - 1;
+	if (tims->tar > FSMC_TAR_MASK)
+		tims->tar = FSMC_TAR_MASK;
+	tims->tclr = DIV_ROUND_UP(sdrt->tCLR_min / 1000, hclkn) - 1;
+	if (tims->tclr > FSMC_TCLR_MASK)
+		tims->tclr = FSMC_TCLR_MASK;
+
+	thiz = sdrt->tCS_min - sdrt->tWP_min;
+	tims->thiz = DIV_ROUND_UP(thiz / 1000, hclkn);
+
+	thold = sdrt->tDH_min;
+	if (thold < sdrt->tCH_min)
+		thold = sdrt->tCH_min;
+	if (thold < sdrt->tCLH_min)
+		thold = sdrt->tCLH_min;
+	if (thold < sdrt->tWH_min)
+		thold = sdrt->tWH_min;
+	if (thold < sdrt->tALH_min)
+		thold = sdrt->tALH_min;
+	if (thold < sdrt->tREH_min)
+		thold = sdrt->tREH_min;
+	tims->thold = DIV_ROUND_UP(thold / 1000, hclkn);
+	if (tims->thold == 0)
+		tims->thold = 1;
+	else if (tims->thold > FSMC_THOLD_MASK)
+		tims->thold = FSMC_THOLD_MASK;
+
+	twait = max(sdrt->tRP_min, sdrt->tWP_min);
+	tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1;
+	if (tims->twait == 0)
+		tims->twait = 1;
+	else if (tims->twait > FSMC_TWAIT_MASK)
+		tims->twait = FSMC_TWAIT_MASK;
+
+	tset = max(sdrt->tCS_min - sdrt->tWP_min,
+		   sdrt->tCEA_max - sdrt->tREA_max);
+	tims->tset = DIV_ROUND_UP(tset / 1000, hclkn) - 1;
+	if (tims->tset == 0)
+		tims->tset = 1;
+	else if (tims->tset > FSMC_TSET_MASK)
+		tims->tset = FSMC_TSET_MASK;
+
+	return 0;
+}
+
+static int fsmc_setup_data_interface(struct mtd_info *mtd,
+				     const struct nand_data_interface *conf,
+				     bool check_only)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct fsmc_nand_data *host = nand_get_controller_data(nand);
+	struct fsmc_nand_timings tims;
+	const struct nand_sdr_timings *sdrt;
+	int ret;
+
+	sdrt = nand_get_sdr_timings(conf);
+	if (IS_ERR(sdrt))
+		return PTR_ERR(sdrt);
+
+	ret = fsmc_calc_timings(host, sdrt, &tims);
+	if (ret)
+		return ret;
+
+	if (check_only)
+		return 0;
+
+	fsmc_nand_setup(host, &tims);
+
+	return 0;
+}
+
 /*
  * fsmc_enable_hwecc - Enables Hardware ECC through FSMC registers
  */
@@ -798,10 +880,8 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
 		return -ENOMEM;
 	ret = of_property_read_u8_array(np, "timings", (u8 *)host->dev_timings,
 						sizeof(*host->dev_timings));
-	if (ret) {
-		dev_info(&pdev->dev, "No timings in dts specified, using default timings!\n");
+	if (ret)
 		host->dev_timings = NULL;
-	}
 
 	/* Set default NAND bank to 0 */
 	host->bank = 0;
@@ -935,7 +1015,10 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 		break;
 	}
 
-	fsmc_nand_setup(host, host->dev_timings);
+	if (host->dev_timings)
+		fsmc_nand_setup(host, host->dev_timings);
+	else
+		nand->setup_data_interface = fsmc_setup_data_interface;
 
 	if (AMBA_REV_BITS(host->pid) >= 8) {
 		nand->ecc.read_page = fsmc_read_page_hwecc;
@@ -1073,7 +1156,8 @@ static int fsmc_nand_resume(struct device *dev)
 	struct fsmc_nand_data *host = dev_get_drvdata(dev);
 	if (host) {
 		clk_prepare_enable(host->clk);
-		fsmc_nand_setup(host, host->dev_timings);
+		if (host->dev_timings)
+			fsmc_nand_setup(host, host->dev_timings);
 	}
 	return 0;
 }
-- 
GitLab


From 1debdb96643a3344e7c231d49e89d97078bc2c45 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 10:52:36 +0200
Subject: [PATCH 0020/1958] mtd: nand: fsmc: remove default timings

When timings are no longer provided by the Device Tree, we now use the
SDR timings specified by the NAND flash, and such SDR timings are always
provided. Therefore, it is no longer necessary to keep "default" timings
in the fmsc driver.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 442e4dff05caa..f58c912fdf3bd 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -303,26 +303,12 @@ static void fsmc_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
  * FSMC registers
  */
 static void fsmc_nand_setup(struct fsmc_nand_data *host,
-			    struct fsmc_nand_timings *timings)
+			    struct fsmc_nand_timings *tims)
 {
 	uint32_t value = FSMC_DEVTYPE_NAND | FSMC_ENABLE | FSMC_WAITON;
 	uint32_t tclr, tar, thiz, thold, twait, tset;
 	unsigned int bank = host->bank;
 	void __iomem *regs = host->regs_va;
-	struct fsmc_nand_timings *tims;
-	struct fsmc_nand_timings default_timings = {
-		.tclr	= FSMC_TCLR_1,
-		.tar	= FSMC_TAR_1,
-		.thiz	= FSMC_THIZ_1,
-		.thold	= FSMC_THOLD_4,
-		.twait	= FSMC_TWAIT_6,
-		.tset	= FSMC_TSET_0,
-	};
-
-	if (timings)
-		tims = host->dev_timings;
-	else
-		tims = &default_timings;
 
 	tclr = (tims->tclr & FSMC_TCLR_MASK) << FSMC_TCLR_SHIFT;
 	tar = (tims->tar & FSMC_TAR_MASK) << FSMC_TAR_SHIFT;
-- 
GitLab


From 85f94b5ef0eede10e7071f9e7e5b864ffad96d72 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 11:06:42 +0200
Subject: [PATCH 0021/1958] dt-bindings: mtd: document new "on-die"
 nand-ecc-mode

A number of NAND chips support a feature called on-die ECC, where the
NAND chip itself is capable of doing error detection and correction. The
new "on-die" value for nand-ecc-mode indicates that we want this
functionality to be used.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 Documentation/devicetree/bindings/mtd/nand.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
index b05601600083d..133f3813719c2 100644
--- a/Documentation/devicetree/bindings/mtd/nand.txt
+++ b/Documentation/devicetree/bindings/mtd/nand.txt
@@ -21,7 +21,7 @@ Optional NAND chip properties:
 
 - nand-ecc-mode : String, operation mode of the NAND ecc mode.
 		  Supported values are: "none", "soft", "hw", "hw_syndrome",
-		  "hw_oob_first".
+		  "hw_oob_first", "on-die".
 		  Deprecated values:
 		  "soft_bch": use "soft" and nand-ecc-algo instead
 - nand-ecc-algo: string, algorithm of NAND ECC.
-- 
GitLab


From 785818fa8385fe55dab253e42a4c6728fca61333 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 11:06:43 +0200
Subject: [PATCH 0022/1958] mtd: nand: add core support for on-die ECC

A number of NAND flashes have a capability called "on-die ECC" where the
NAND chip itself is capable of detecting and correcting errors.

Linux already has support for using the ECC implementation of the NAND
controller, or a software based ECC implementation, but not for using
the ECC implementation of the NAND controller. However, such an
implementation is sometimes useful in situations where the NAND
controller provides ECC algorithms that are not strong enough for the
NAND chip used on the system. A typical case is a NAND chip that
requires a 4-bit ECC, while the NAND controller only provides a 1-bit
ECC algorithm.

This commit introduces the support for the NAND_ECC_ON_DIE ECC mode:

 - Parsing of the "on-die" value for the "nand-ecc-mode" Device Tree
   property

 - Handling NAND_ECC_ON_DIE case in nand_scan_tail(). The idea is that
   the vendor specific code for the NAND chip must implement
   ->read_page() and ->write_page(). It may optionally provide its own
   ->read_page_raw() and ->write_page_raw() as well. For OOB operation,
   we assume the standard operations are good enough, but they can be
   overridden by the vendor specific code if needed.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 13 +++++++++++++
 include/linux/mtd/nand.h     |  1 +
 2 files changed, 14 insertions(+)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d474378ed810b..f49aecd3f1de2 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4177,6 +4177,7 @@ static const char * const nand_ecc_modes[] = {
 	[NAND_ECC_HW]		= "hw",
 	[NAND_ECC_HW_SYNDROME]	= "hw_syndrome",
 	[NAND_ECC_HW_OOB_FIRST]	= "hw_oob_first",
+	[NAND_ECC_ON_DIE]	= "on-die",
 };
 
 static int of_get_nand_ecc_mode(struct device_node *np)
@@ -4717,6 +4718,18 @@ int nand_scan_tail(struct mtd_info *mtd)
 		}
 		break;
 
+	case NAND_ECC_ON_DIE:
+		if (!ecc->read_page || !ecc->write_page) {
+			WARN(1, "No ECC functions supplied; on-die ECC not possible\n");
+			ret = -EINVAL;
+			goto err_free;
+		}
+		if (!ecc->read_oob)
+			ecc->read_oob = nand_read_oob_std;
+		if (!ecc->write_oob)
+			ecc->write_oob = nand_write_oob_std;
+		break;
+
 	case NAND_ECC_NONE:
 		pr_warn("NAND_ECC_NONE selected by board driver. This is not recommended!\n");
 		ecc->read_page = nand_read_page_raw;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 8f67b15816836..603522097ec9b 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -116,6 +116,7 @@ typedef enum {
 	NAND_ECC_HW,
 	NAND_ECC_HW_SYNDROME,
 	NAND_ECC_HW_OOB_FIRST,
+	NAND_ECC_ON_DIE,
 } nand_ecc_modes_t;
 
 enum nand_ecc_algo {
-- 
GitLab


From cc0f51ec111266f5d255e753bf3254ad411d5c12 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 11:06:44 +0200
Subject: [PATCH 0023/1958] mtd: nand: export nand_{read,write}_page_raw()

The nand_read_page_raw() and nand_write_page_raw() functions might be
re-used by vendor-specific implementations of the read_page/write_page
functions. Instead of having vendor-specific code duplicate this code,
it is much better to export those functions and allow them to be
re-used.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 10 ++++++----
 include/linux/mtd/nand.h     |  8 ++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index f49aecd3f1de2..0b3b1a88091a7 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1524,14 +1524,15 @@ EXPORT_SYMBOL(nand_check_erased_ecc_chunk);
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
-static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf, int oob_required, int page)
+int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+		       uint8_t *buf, int oob_required, int page)
 {
 	chip->read_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
 		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
 	return 0;
 }
+EXPORT_SYMBOL(nand_read_page_raw);
 
 /**
  * nand_read_page_raw_syndrome - [INTERN] read raw page data without ecc
@@ -2469,8 +2470,8 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
-static int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required, int page)
+int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf, int oob_required, int page)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
@@ -2478,6 +2479,7 @@ static int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 
 	return 0;
 }
+EXPORT_SYMBOL(nand_write_page_raw);
 
 /**
  * nand_write_page_raw_syndrome - [INTERN] raw page write function
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 603522097ec9b..7a01d2eb74430 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1259,6 +1259,14 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 			   int page);
 
+/* Default read_page_raw implementation */
+int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+		       uint8_t *buf, int oob_required, int page);
+
+/* Default write_page_raw implementation */
+int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf, int oob_required, int page);
+
 /* Reset and initialize a NAND device */
 int nand_reset(struct nand_chip *chip, int chipnr);
 
-- 
GitLab


From c512f36b581862df20acef050c3e1a875166bd6f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 09:31:19 +0200
Subject: [PATCH 0024/1958] sunrpc: properly type argument to kxdreproc_t

Pass struct rpc_request as the first argument instead of an untyped blob,
and mark the data object as const.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 include/linux/sunrpc/xdr.h | 5 ++++-
 net/sunrpc/clnt.c          | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 054c8cde18f33..290f189de2009 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -17,6 +17,8 @@
 #include <asm/unaligned.h>
 #include <linux/scatterlist.h>
 
+struct rpc_rqst;
+
 /*
  * Buffer adjustment
  */
@@ -222,7 +224,8 @@ struct xdr_stream {
 /*
  * These are the xdr_stream style generic XDR encode and decode functions.
  */
-typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj);
 typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b5cb921775a0b..9fee20dc0c809 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2476,7 +2476,8 @@ rpc_verify_header(struct rpc_task *task)
 	goto out_garbage;
 }
 
-static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj)
 {
 }
 
-- 
GitLab


From 7d55f7febab6af865630ccb3a0fac9a144c459df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 09:59:45 +0200
Subject: [PATCH 0025/1958] sunrpc: fix encoder callback prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/rpcb_clnt.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 5b30603596d0c..d0269a39afdfa 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -843,8 +843,9 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
  */
 
 static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct rpcbind_args *rpcb)
+			     const void *data)
 {
+	const struct rpcbind_args *rpcb = data;
 	__be32 *p;
 
 	dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
@@ -917,8 +918,9 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
 }
 
 static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct rpcbind_args *rpcb)
+			     const void *data)
 {
+	const struct rpcbind_args *rpcb = data;
 	__be32 *p;
 
 	dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
@@ -992,7 +994,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_encode	= rpcb_enc_mapping,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1002,7 +1004,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_encode	= rpcb_enc_mapping,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1012,7 +1014,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_encode	= rpcb_enc_mapping,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
@@ -1025,7 +1027,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1035,7 +1037,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1045,7 +1047,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
@@ -1058,7 +1060,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1068,7 +1070,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1078,7 +1080,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
-- 
GitLab


From bf96391e7b59a994c8aa72f5c425e3897ee799fa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 09:34:04 +0200
Subject: [PATCH 0026/1958] lockd: fix encoder callback prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clnt4xdr.c | 22 +++++++++++++++-------
 fs/lockd/clntxdr.c  | 22 +++++++++++++++-------
 fs/lockd/mon.c      |  8 ++++----
 3 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index d3e40db289302..3cbad662120a5 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -381,8 +381,9 @@ static void encode_nlm4_lock(struct xdr_stream *xdr,
  */
 static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -402,8 +403,9 @@ static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -424,8 +426,9 @@ static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -442,8 +445,9 @@ static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nlm_args *args)
+				    const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -458,8 +462,10 @@ static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_res(struct rpc_rqst *req,
 			     struct xdr_stream *xdr,
-			     const struct nlm_res *result)
+			     const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm4_stat(xdr, result->status);
 }
@@ -479,8 +485,10 @@ static void nlm4_xdr_enc_res(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_res *result)
+				 const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm4_stat(xdr, result->status);
 	if (result->status == nlm_lck_denied)
@@ -566,7 +574,7 @@ static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 #define PROC(proc, argtype, restype)					\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nlm4_xdr_enc_##argtype,		\
+	.p_encode    = nlm4_xdr_enc_##argtype,				\
 	.p_decode    = (kxdrdproc_t)nlm4_xdr_dec_##restype,		\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 3e9f7874b9755..825c0fde8c80b 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -374,8 +374,9 @@ static void encode_nlm_lock(struct xdr_stream *xdr,
  */
 static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -395,8 +396,9 @@ static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -417,8 +419,9 @@ static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -435,8 +438,9 @@ static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nlm_args *args)
+				   const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -451,8 +455,10 @@ static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_res(struct rpc_rqst *req,
 			    struct xdr_stream *xdr,
-			    const struct nlm_res *result)
+			    const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm_stat(xdr, result->status);
 }
@@ -479,8 +485,10 @@ static void encode_nlm_testrply(struct xdr_stream *xdr,
 
 static void nlm_xdr_enc_testres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				const struct nlm_res *result)
+				const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm_stat(xdr, result->status);
 	encode_nlm_testrply(xdr, result);
@@ -564,7 +572,7 @@ static int nlm_xdr_dec_res(struct rpc_rqst *req,
 #define PROC(proc, argtype, restype)	\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nlm_xdr_enc_##argtype,		\
+	.p_encode    = nlm_xdr_enc_##argtype,		\
 	.p_decode    = (kxdrdproc_t)nlm_xdr_dec_##restype,		\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 19166d4a8d313..8043fd4b8a5c2 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -476,14 +476,14 @@ static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
 }
 
 static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    const struct nsm_args *argp)
+			    const void *argp)
 {
 	encode_mon_id(xdr, argp);
 	encode_priv(xdr, argp);
 }
 
 static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      const struct nsm_args *argp)
+			      const void *argp)
 {
 	encode_mon_id(xdr, argp);
 }
@@ -532,7 +532,7 @@ static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
-		.p_encode	= (kxdreproc_t)nsm_xdr_enc_mon,
+		.p_encode	= nsm_xdr_enc_mon,
 		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
@@ -541,7 +541,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
-		.p_encode	= (kxdreproc_t)nsm_xdr_enc_unmon,
+		.p_encode	= nsm_xdr_enc_unmon,
 		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
-- 
GitLab


From 0096d39b9632706ca65a759a7024aef6b7833c6d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 10:01:49 +0200
Subject: [PATCH 0027/1958] nfs: fix encoder callback prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/mount_clnt.c |  10 +--
 fs/nfs/nfs2xdr.c    |  49 +++++++++---
 fs/nfs/nfs3xdr.c    |  83 +++++++++++++------
 fs/nfs/nfs42xdr.c   |  34 ++++----
 fs/nfs/nfs4xdr.c    | 188 ++++++++++++++++++++++++++++----------------
 5 files changed, 242 insertions(+), 122 deletions(-)

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 09b190015df41..f435d640d5527 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -304,7 +304,7 @@ static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
 }
 
 static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const char *dirpath)
+				const void *dirpath)
 {
 	encode_mntdirpath(xdr, dirpath);
 }
@@ -467,7 +467,7 @@ static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
@@ -476,7 +476,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 	[MOUNTPROC_UMNT] = {
 		.p_proc		= MOUNTPROC_UMNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC_UMNT,
 		.p_name		= "UMOUNT",
@@ -486,7 +486,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
@@ -495,7 +495,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 	[MOUNTPROC3_UMNT] = {
 		.p_proc		= MOUNTPROC3_UMNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC3_UMNT,
 		.p_name		= "UMOUNT",
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index b4e03ed8599de..8ecd58597228d 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -568,8 +568,10 @@ static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
 
 static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nfs_fh *fh)
+				 const void *data)
 {
+	const struct nfs_fh *fh = data;
+
 	encode_fhandle(xdr, fh);
 }
 
@@ -583,23 +585,29 @@ static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_sattrargs *args)
+				   const void *data)
 {
+	const struct nfs_sattrargs *args = data;
+
 	encode_fhandle(xdr, args->fh);
 	encode_sattr(xdr, args->sattr);
 }
 
 static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_diropargs *args)
+				   const void *data)
 {
+	const struct nfs_diropargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name, args->len);
 }
 
 static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs_readlinkargs *args)
+				      const void *data)
 {
+	const struct nfs_readlinkargs *args = data;
+
 	encode_fhandle(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS_readlinkres_sz);
@@ -632,8 +640,10 @@ static void encode_readargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_pgio_args *args)
+				  const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_readargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS_readres_sz);
@@ -672,8 +682,10 @@ static void encode_writeargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_pgio_args *args)
+				   const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_writeargs(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
 }
@@ -688,16 +700,20 @@ static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_createargs *args)
+				    const void *data)
 {
+	const struct nfs_createargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name, args->len);
 	encode_sattr(xdr, args->sattr);
 }
 
 static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_removeargs *args)
+				    const void *data)
 {
+	const struct nfs_removeargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
 }
 
@@ -711,8 +727,9 @@ static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_renameargs *args)
+				    const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
 
@@ -730,8 +747,10 @@ static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_linkargs *args)
+				  const void *data)
 {
+	const struct nfs_linkargs *args = data;
+
 	encode_fhandle(xdr, args->fromfh);
 	encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
 }
@@ -747,8 +766,10 @@ static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_symlinkargs *args)
+				     const void *data)
 {
+	const struct nfs_symlinkargs *args = data;
+
 	encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
 	encode_path(xdr, args->pages, args->pathlen);
 	encode_sattr(xdr, args->sattr);
@@ -777,8 +798,10 @@ static void encode_readdirargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_readdirargs *args)
+				     const void *data)
 {
+	const struct nfs_readdirargs *args = data;
+
 	encode_readdirargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 					args->count, NFS_readdirres_sz);
@@ -1118,7 +1141,7 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 #define PROC(proc, argtype, restype, timer)				\
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
-	.p_encode   =  (kxdreproc_t)nfs2_xdr_enc_##argtype,		\
+	.p_encode   =  nfs2_xdr_enc_##argtype,				\
 	.p_decode   =  (kxdrdproc_t)nfs2_xdr_dec_##restype,		\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 267126d32ec0f..773150678633f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -846,8 +846,10 @@ static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
  */
 static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs_fh *fh)
+				      const void *data)
 {
+	const struct nfs_fh *fh = data;
+
 	encode_nfs_fh3(xdr, fh);
 }
 
@@ -884,8 +886,9 @@ static void encode_sattrguard3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_sattrargs *args)
+				      const void *data)
 {
+	const struct nfs3_sattrargs *args = data;
 	encode_nfs_fh3(xdr, args->fh);
 	encode_sattr3(xdr, args->sattr);
 	encode_sattrguard3(xdr, args);
@@ -900,8 +903,10 @@ static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_diropargs *args)
+				     const void *data)
 {
+	const struct nfs3_diropargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 }
 
@@ -922,8 +927,10 @@ static void encode_access3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_accessargs *args)
+				     const void *data)
 {
+	const struct nfs3_accessargs *args = data;
+
 	encode_access3args(xdr, args);
 }
 
@@ -936,8 +943,10 @@ static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
 				       struct xdr_stream *xdr,
-				       const struct nfs3_readlinkargs *args)
+				       const void *data)
 {
+	const struct nfs3_readlinkargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS3_readlinkres_sz);
@@ -966,8 +975,10 @@ static void encode_read3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_pgio_args *args)
+				   const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS3_readres_sz);
@@ -1008,8 +1019,10 @@ static void encode_write3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_pgio_args *args)
+				    const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_write3args(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
 }
@@ -1055,8 +1068,10 @@ static void encode_createhow3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_createargs *args)
+				     const void *data)
 {
+	const struct nfs3_createargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_createhow3(xdr, args);
 }
@@ -1071,8 +1086,10 @@ static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs3_mkdirargs *args)
+				    const void *data)
 {
+	const struct nfs3_mkdirargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_sattr3(xdr, args->sattr);
 }
@@ -1091,16 +1108,20 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_symlinkdata3(struct xdr_stream *xdr,
-				const struct nfs3_symlinkargs *args)
+				const void *data)
 {
+	const struct nfs3_symlinkargs *args = data;
+
 	encode_sattr3(xdr, args->sattr);
 	encode_nfspath3(xdr, args->pages, args->pathlen);
 }
 
 static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_symlinkargs *args)
+				      const void *data)
 {
+	const struct nfs3_symlinkargs *args = data;
+
 	encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
 	encode_symlinkdata3(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
@@ -1160,8 +1181,10 @@ static void encode_mknoddata3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs3_mknodargs *args)
+				    const void *data)
 {
+	const struct nfs3_mknodargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_mknoddata3(xdr, args);
 }
@@ -1175,8 +1198,10 @@ static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_removeargs *args)
+				     const void *data)
 {
+	const struct nfs_removeargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
 }
 
@@ -1190,8 +1215,9 @@ static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_renameargs *args)
+				     const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
 
@@ -1209,8 +1235,10 @@ static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs3_linkargs *args)
+				   const void *data)
 {
+	const struct nfs3_linkargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fromfh);
 	encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
 }
@@ -1240,8 +1268,10 @@ static void encode_readdir3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_readdirargs *args)
+				      const void *data)
 {
+	const struct nfs3_readdirargs *args = data;
+
 	encode_readdir3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
@@ -1280,8 +1310,10 @@ static void encode_readdirplus3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
 					  struct xdr_stream *xdr,
-					  const struct nfs3_readdirargs *args)
+					  const void *data)
 {
+	const struct nfs3_readdirargs *args = data;
+
 	encode_readdirplus3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
@@ -1310,8 +1342,10 @@ static void encode_commit3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_commitargs *args)
+				     const void *data)
 {
+	const struct nfs_commitargs *args = data;
+
 	encode_commit3args(xdr, args);
 }
 
@@ -1319,8 +1353,10 @@ static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
 
 static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_getaclargs *args)
+				     const void *data)
 {
+	const struct nfs3_getaclargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fh);
 	encode_uint32(xdr, args->mask);
 	if (args->mask & (NFS_ACL | NFS_DFACL))
@@ -1331,8 +1367,9 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 
 static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_setaclargs *args)
+				     const void *data)
 {
+	const struct nfs3_setaclargs *args = data;
 	unsigned int base;
 	int error;
 
@@ -2495,7 +2532,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 #define PROC(proc, argtype, restype, timer)				\
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args,	\
+	.p_encode    = nfs3_xdr_enc_##argtype##3args,			\
 	.p_decode    = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res,	\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
@@ -2538,7 +2575,7 @@ const struct rpc_version nfs_version3 = {
 static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
-		.p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
+		.p_encode = nfs3_xdr_enc_getacl3args,
 		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
@@ -2547,7 +2584,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
-		.p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
+		.p_encode = nfs3_xdr_enc_setacl3args,
 		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 528362f69cc1b..0a1bd60a1f8e8 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -112,7 +112,7 @@
 					 decode_getattr_maxsz)
 
 static void encode_fallocate(struct xdr_stream *xdr,
-			     struct nfs42_falloc_args *args)
+			     const struct nfs42_falloc_args *args)
 {
 	encode_nfs4_stateid(xdr, &args->falloc_stateid);
 	encode_uint64(xdr, args->falloc_offset);
@@ -120,7 +120,7 @@ static void encode_fallocate(struct xdr_stream *xdr,
 }
 
 static void encode_allocate(struct xdr_stream *xdr,
-			    struct nfs42_falloc_args *args,
+			    const struct nfs42_falloc_args *args,
 			    struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr);
@@ -128,7 +128,7 @@ static void encode_allocate(struct xdr_stream *xdr,
 }
 
 static void encode_copy(struct xdr_stream *xdr,
-			struct nfs42_copy_args *args,
+			const struct nfs42_copy_args *args,
 			struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_COPY, decode_copy_maxsz, hdr);
@@ -145,7 +145,7 @@ static void encode_copy(struct xdr_stream *xdr,
 }
 
 static void encode_deallocate(struct xdr_stream *xdr,
-			      struct nfs42_falloc_args *args,
+			      const struct nfs42_falloc_args *args,
 			      struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr);
@@ -153,7 +153,7 @@ static void encode_deallocate(struct xdr_stream *xdr,
 }
 
 static void encode_seek(struct xdr_stream *xdr,
-			struct nfs42_seek_args *args,
+			const struct nfs42_seek_args *args,
 			struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_SEEK, decode_seek_maxsz, hdr);
@@ -163,7 +163,7 @@ static void encode_seek(struct xdr_stream *xdr,
 }
 
 static void encode_layoutstats(struct xdr_stream *xdr,
-			       struct nfs42_layoutstat_args *args,
+			       const struct nfs42_layoutstat_args *args,
 			       struct nfs42_layoutstat_devinfo *devinfo,
 			       struct compound_hdr *hdr)
 {
@@ -191,7 +191,7 @@ static void encode_layoutstats(struct xdr_stream *xdr,
 }
 
 static void encode_clone(struct xdr_stream *xdr,
-			 struct nfs42_clone_args *args,
+			 const struct nfs42_clone_args *args,
 			 struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -210,8 +210,9 @@ static void encode_clone(struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  struct nfs42_falloc_args *args)
+				  const void *data)
 {
+	const struct nfs42_falloc_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -225,7 +226,7 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 }
 
 static void encode_copy_commit(struct xdr_stream *xdr,
-			  struct nfs42_copy_args *args,
+			  const struct nfs42_copy_args *args,
 			  struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -241,8 +242,9 @@ static void encode_copy_commit(struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 			      struct xdr_stream *xdr,
-			      struct nfs42_copy_args *args)
+			      const void *data)
 {
+	const struct nfs42_copy_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -262,8 +264,9 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs42_falloc_args *args)
+				    const void *data)
 {
+	const struct nfs42_falloc_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -281,8 +284,9 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
 			      struct xdr_stream *xdr,
-			      struct nfs42_seek_args *args)
+			      const void *data)
 {
+	const struct nfs42_seek_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -299,8 +303,9 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs42_layoutstat_args *args)
+				     const void *data)
 {
+	const struct nfs42_layoutstat_args *args = data;
 	int i;
 
 	struct compound_hdr hdr = {
@@ -321,8 +326,9 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
 			       struct xdr_stream *xdr,
-			       struct nfs42_clone_args *args)
+			       const void *data)
 {
+	const struct nfs42_clone_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3aebfdc82b303..c5036ef770f9a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1651,7 +1651,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 }
 
 static void
-encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
+encode_setacl(struct xdr_stream *xdr, const struct nfs_setaclargs *arg,
+		struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -1735,7 +1736,7 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
 #if defined(CONFIG_NFS_V4_1)
 /* NFSv4.1 operations */
 static void encode_bind_conn_to_session(struct xdr_stream *xdr,
-				   struct nfs41_bind_conn_to_session_args *args,
+				   const struct nfs41_bind_conn_to_session_args *args,
 				   struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1748,7 +1749,7 @@ static void encode_bind_conn_to_session(struct xdr_stream *xdr,
 	*p = (args->use_conn_in_rdma_mode) ? cpu_to_be32(1) : cpu_to_be32(0);
 }
 
-static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
+static void encode_op_map(struct xdr_stream *xdr, const struct nfs4_op_map *op_map)
 {
 	unsigned int i;
 	encode_uint32(xdr, NFS4_OP_MAP_NUM_WORDS);
@@ -1757,7 +1758,7 @@ static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
 }
 
 static void encode_exchange_id(struct xdr_stream *xdr,
-			       struct nfs41_exchange_id_args *args,
+			       const struct nfs41_exchange_id_args *args,
 			       struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1809,7 +1810,7 @@ static void encode_exchange_id(struct xdr_stream *xdr,
 }
 
 static void encode_create_session(struct xdr_stream *xdr,
-				  struct nfs41_create_session_args *args,
+				  const struct nfs41_create_session_args *args,
 				  struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1862,7 +1863,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 }
 
 static void encode_destroy_session(struct xdr_stream *xdr,
-				   struct nfs4_session *session,
+				   const struct nfs4_session *session,
 				   struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr);
@@ -1878,7 +1879,7 @@ static void encode_destroy_clientid(struct xdr_stream *xdr,
 }
 
 static void encode_reclaim_complete(struct xdr_stream *xdr,
-				    struct nfs41_reclaim_complete_args *args,
+				    const struct nfs41_reclaim_complete_args *args,
 				    struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr);
@@ -1974,7 +1975,7 @@ encode_layoutget(struct xdr_stream *xdr,
 static int
 encode_layoutcommit(struct xdr_stream *xdr,
 		    struct inode *inode,
-		    struct nfs4_layoutcommit_args *args,
+		    const struct nfs4_layoutcommit_args *args,
 		    struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -2044,7 +2045,7 @@ encode_secinfo_no_name(struct xdr_stream *xdr,
 }
 
 static void encode_test_stateid(struct xdr_stream *xdr,
-				struct nfs41_test_stateid_args *args,
+				const struct nfs41_test_stateid_args *args,
 				struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
@@ -2053,7 +2054,7 @@ static void encode_test_stateid(struct xdr_stream *xdr,
 }
 
 static void encode_free_stateid(struct xdr_stream *xdr,
-				struct nfs41_free_stateid_args *args,
+				const struct nfs41_free_stateid_args *args,
 				struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
@@ -2086,8 +2087,9 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
  * Encode an ACCESS request
  */
 static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_accessargs *args)
+				const void *data)
 {
+	const struct nfs4_accessargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2104,8 +2106,9 @@ static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode LOOKUP request
  */
 static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_lookup_arg *args)
+				const void *data)
 {
+	const struct nfs4_lookup_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2124,8 +2127,9 @@ static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs4_lookup_root_arg *args)
+				     const void *data)
 {
+	const struct nfs4_lookup_root_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2142,8 +2146,9 @@ static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
  * Encode REMOVE request
  */
 static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs_removeargs *args)
+				const void *data)
 {
+	const struct nfs_removeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2159,8 +2164,9 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode RENAME request
  */
 static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs_renameargs *args)
+				const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2178,8 +2184,9 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode LINK request
  */
 static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct nfs4_link_arg *args)
+			      const void *data)
 {
+	const struct nfs4_link_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2199,8 +2206,9 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode CREATE request
  */
 static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_create_arg *args)
+				const void *data)
 {
+	const struct nfs4_create_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2218,8 +2226,10 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode SYMLINK request
  */
 static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_create_arg *args)
+				 const void *data)
 {
+	const struct nfs4_create_arg *args = data;
+
 	nfs4_xdr_enc_create(req, xdr, args);
 }
 
@@ -2227,8 +2237,9 @@ static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode GETATTR request
  */
 static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_getattr_arg *args)
+				 const void *data)
 {
+	const struct nfs4_getattr_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2244,8 +2255,9 @@ static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a CLOSE request
  */
 static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_closeargs *args)
+			       const void *data)
 {
+	const struct nfs_closeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2265,8 +2277,9 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode an OPEN request
  */
 static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_openargs *args)
+			      const void *data)
 {
+	const struct nfs_openargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2287,8 +2300,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs_open_confirmargs *args)
+				      const void *data)
 {
+	const struct nfs_open_confirmargs *args = data;
 	struct compound_hdr hdr = {
 		.nops   = 0,
 	};
@@ -2304,8 +2318,9 @@ static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_openargs *args)
+				     const void *data)
 {
+	const struct nfs_openargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2325,8 +2340,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs_closeargs *args)
+					const void *data)
 {
+	const struct nfs_closeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2344,8 +2360,9 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
  * Encode a LOCK request
  */
 static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_lock_args *args)
+			      const void *data)
 {
+	const struct nfs_lock_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2361,8 +2378,9 @@ static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a LOCKT request
  */
 static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_lockt_args *args)
+			       const void *data)
 {
+	const struct nfs_lockt_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2378,8 +2396,9 @@ static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a LOCKU request
  */
 static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_locku_args *args)
+			       const void *data)
 {
+	const struct nfs_locku_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2393,8 +2412,9 @@ static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
 					   struct xdr_stream *xdr,
-					struct nfs_release_lockowner_args *args)
+					   const void *data)
 {
+	const struct nfs_release_lockowner_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = 0,
 	};
@@ -2408,8 +2428,9 @@ static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
  * Encode a READLINK request
  */
 static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  const struct nfs4_readlink *args)
+				  const void *data)
 {
+	const struct nfs4_readlink *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2428,8 +2449,9 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a READDIR request
  */
 static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_readdir_arg *args)
+				 const void *data)
 {
+	const struct nfs4_readdir_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2451,8 +2473,9 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a READ request
  */
 static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_pgio_args *args)
+			      const void *data)
 {
+	const struct nfs_pgio_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2472,8 +2495,9 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode an SETATTR request
  */
 static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_setattrargs *args)
+				 const void *data)
 {
+	const struct nfs_setattrargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2490,8 +2514,9 @@ static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a GETACL request
  */
 static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_getaclargs *args)
+				const void *data)
 {
+	const struct nfs_getaclargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2513,8 +2538,9 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a WRITE request
  */
 static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_pgio_args *args)
+			       const void *data)
 {
+	const struct nfs_pgio_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2533,8 +2559,9 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
  *  a COMMIT request
  */
 static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_commitargs *args)
+				const void *data)
 {
+	const struct nfs_commitargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2550,8 +2577,9 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
  * FSINFO request
  */
 static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs4_fsinfo_arg *args)
+				const void *data)
 {
+	const struct nfs4_fsinfo_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2567,8 +2595,9 @@ static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
  * a PATHCONF request
  */
 static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  const struct nfs4_pathconf_arg *args)
+				  const void *data)
 {
+	const struct nfs4_pathconf_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2585,8 +2614,9 @@ static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
  * a STATFS request
  */
 static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_statfs_arg *args)
+				const void *data)
 {
+	const struct nfs4_statfs_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2604,8 +2634,9 @@ static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_server_caps_arg *args)
+				     const void *data)
 {
+	const struct nfs4_server_caps_arg *args = data;
 	const u32 *bitmask = args->bitmask;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2622,8 +2653,10 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
  * a RENEW request
  */
 static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_client *clp)
+			       const void *data)
+
 {
+	const struct nfs_client *clp = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2638,8 +2671,9 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_setclientid *sc)
+				     const void *data)
 {
+	const struct nfs4_setclientid *sc = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2654,8 +2688,9 @@ static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
 					     struct xdr_stream *xdr,
-					     struct nfs4_setclientid_res *arg)
+					     const void *data)
 {
+	const struct nfs4_setclientid_res *arg = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2670,8 +2705,9 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs4_delegreturnargs *args)
+				     const void *data)
 {
+	const struct nfs4_delegreturnargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2692,8 +2728,9 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_fs_locations_arg *args)
+				      const void *data)
 {
+	const struct nfs4_fs_locations_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2715,8 +2752,8 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 	}
 
 	/* Set up reply kvec to capture returned fs_locations array. */
-	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
-			0, PAGE_SIZE);
+	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
+			 (struct page **)&args->page, 0, PAGE_SIZE);
 	encode_nops(&hdr);
 }
 
@@ -2725,8 +2762,9 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nfs4_secinfo_arg *args)
+				const void *data)
 {
+	const struct nfs4_secinfo_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2743,8 +2781,9 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_fsid_present_arg *args)
+				      const void *data)
 {
+	const struct nfs4_fsid_present_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2764,8 +2803,9 @@ static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nfs41_bind_conn_to_session_args *args)
+				const void *data)
 {
+	const struct nfs41_bind_conn_to_session_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2780,8 +2820,9 @@ static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs41_exchange_id_args *args)
+				     const void *data)
 {
+	const struct nfs41_exchange_id_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2796,8 +2837,9 @@ static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs41_create_session_args *args)
+					const void *data)
 {
+	const struct nfs41_create_session_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2812,8 +2854,9 @@ static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
 					 struct xdr_stream *xdr,
-					 struct nfs4_session *session)
+					 const void *data)
 {
+	const struct nfs4_session *session = data;
 	struct compound_hdr hdr = {
 		.minorversion = session->clp->cl_mvops->minor_version,
 	};
@@ -2828,8 +2871,9 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
 					 struct xdr_stream *xdr,
-					 struct nfs_client *clp)
+					 const void *data)
 {
+	const struct nfs_client *clp = data;
 	struct compound_hdr hdr = {
 		.minorversion = clp->cl_mvops->minor_version,
 	};
@@ -2843,8 +2887,9 @@ static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
  * a SEQUENCE request
  */
 static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs4_sequence_args *args)
+				  const void *data)
 {
+	const struct nfs4_sequence_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(args),
 	};
@@ -2859,8 +2904,9 @@ static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs4_get_lease_time_args *args)
+					const void *data)
 {
+	const struct nfs4_get_lease_time_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
 	};
@@ -2878,8 +2924,9 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
 					  struct xdr_stream *xdr,
-				struct nfs41_reclaim_complete_args *args)
+					  const void *data)
 {
+	const struct nfs41_reclaim_complete_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args)
 	};
@@ -2895,8 +2942,9 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
 				       struct xdr_stream *xdr,
-				       struct nfs4_getdeviceinfo_args *args)
+				       const void *data)
 {
+	const struct nfs4_getdeviceinfo_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2919,8 +2967,9 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs4_layoutget_args *args)
+				   const void *data)
 {
+	const struct nfs4_layoutget_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2941,8 +2990,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_layoutcommit_args *args)
+				      const void *priv)
 {
+	const struct nfs4_layoutcommit_args *args = priv;
 	struct nfs4_layoutcommit_data *data =
 		container_of(args, struct nfs4_layoutcommit_data, args);
 	struct compound_hdr hdr = {
@@ -2962,8 +3012,9 @@ static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_layoutreturn_args *args)
+				      const void *data)
 {
+	const struct nfs4_layoutreturn_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2978,10 +3029,11 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
 /*
  * Encode SECINFO_NO_NAME request
  */
-static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
+static void nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs41_secinfo_no_name_args *args)
+					const void *data)
 {
+	const struct nfs41_secinfo_no_name_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2991,7 +3043,6 @@ static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
 	encode_putrootfh(xdr, &hdr);
 	encode_secinfo_no_name(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
@@ -2999,8 +3050,9 @@ static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs41_test_stateid_args *args)
+				      const void *data)
 {
+	const struct nfs41_test_stateid_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -3016,8 +3068,9 @@ static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs41_free_stateid_args *args)
+				     const void *data)
 {
+	const struct nfs41_free_stateid_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -6364,8 +6417,9 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Encode an SETACL request
  */
 static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_setaclargs *args)
+				const void *data)
 {
+	const struct nfs_setaclargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -7484,7 +7538,7 @@ nfs4_stat_to_errno(int stat)
 #define PROC(proc, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
-	.p_encode = (kxdreproc_t)nfs4_xdr_##argtype,		\
+	.p_encode = nfs4_xdr_##argtype,				\
 	.p_decode = (kxdrdproc_t)nfs4_xdr_##restype,		\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
-- 
GitLab


From 1502c81b443d70f2e3009aef96428ffa94e2e29d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 14:47:53 +0200
Subject: [PATCH 0028/1958] nfsd: fix encoder callback prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfsd/nfs4callback.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 0274db6e65d0d..bede418237ee7 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -468,7 +468,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
  * NB: Without this zero space reservation, callbacks over krb5p fail
  */
 static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 void *__unused)
+				 const void *__unused)
 {
 	xdr_reserve_space(xdr, 0);
 }
@@ -477,8 +477,9 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
  * 20.2. Operation 4: CB_RECALL - Recall a Delegation
  */
 static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
-				   const struct nfsd4_callback *cb)
+				   const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfs4_delegation *dp = cb_to_delegation(cb);
 	struct nfs4_cb_compound_hdr hdr = {
 		.ident = cb->cb_clp->cl_cb_ident,
@@ -585,8 +586,9 @@ static void encode_cb_layout4args(struct xdr_stream *xdr,
 
 static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfsd4_callback *cb)
+				   const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfs4_layout_stateid *ls =
 		container_of(cb, struct nfs4_layout_stateid, ls_recall);
 	struct nfs4_cb_compound_hdr hdr = {
@@ -631,8 +633,9 @@ static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so
 
 static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					const struct nfsd4_callback *cb)
+					const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfsd4_blocked_lock *nbl =
 		container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
 	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
@@ -682,7 +685,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 #define PROC(proc, call, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
-	.p_encode  = (kxdreproc_t)nfs4_xdr_enc_##argtype,		\
+	.p_encode  = nfs4_xdr_enc_##argtype,		\
 	.p_decode  = (kxdrdproc_t)nfs4_xdr_dec_##restype,		\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
-- 
GitLab


From 89daf3602310b46ddf1bea4873be737d8781fc39 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 14:54:06 +0200
Subject: [PATCH 0029/1958] sunrpc/auth_gss: nfsd: fix encoder callback
 prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  2 +-
 net/sunrpc/auth_gss/gss_rpc_xdr.c    | 11 ++++++-----
 net/sunrpc/auth_gss/gss_rpc_xdr.h    |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index f0c6a8c78a567..45ab924da726c 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -55,7 +55,7 @@ enum {
 #define PROC(proc, name)				\
 [GSSX_##proc] = {					\
 	.p_proc   = GSSX_##proc,			\
-	.p_encode = (kxdreproc_t)gssx_enc_##name,	\
+	.p_encode = gssx_enc_##name,	\
 	.p_decode = (kxdrdproc_t)gssx_dec_##name,	\
 	.p_arglen = GSSX_ARG_##name##_sz,		\
 	.p_replen = GSSX_RES_##name##_sz, 		\
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 25d9a9cf7b66b..5e54f47430921 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -44,7 +44,7 @@ static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
 }
 
 static int gssx_enc_buffer(struct xdr_stream *xdr,
-			   gssx_buffer *buf)
+			   const gssx_buffer *buf)
 {
 	__be32 *p;
 
@@ -56,7 +56,7 @@ static int gssx_enc_buffer(struct xdr_stream *xdr,
 }
 
 static int gssx_enc_in_token(struct xdr_stream *xdr,
-			     struct gssp_in_token *in)
+			     const struct gssp_in_token *in)
 {
 	__be32 *p;
 
@@ -130,7 +130,7 @@ static int gssx_dec_option(struct xdr_stream *xdr,
 }
 
 static int dummy_enc_opt_array(struct xdr_stream *xdr,
-				struct gssx_option_array *oa)
+				const struct gssx_option_array *oa)
 {
 	__be32 *p;
 
@@ -348,7 +348,7 @@ static int gssx_dec_status(struct xdr_stream *xdr,
 }
 
 static int gssx_enc_call_ctx(struct xdr_stream *xdr,
-			     struct gssx_call_ctx *ctx)
+			     const struct gssx_call_ctx *ctx)
 {
 	struct gssx_option opt;
 	__be32 *p;
@@ -733,8 +733,9 @@ static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
 
 void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct gssx_arg_accept_sec_context *arg)
+				 const void *data)
 {
+	const struct gssx_arg_accept_sec_context *arg = data;
 	int err;
 
 	err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 9d88c6239f014..87cd719ca0ad7 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -179,7 +179,7 @@ struct gssx_res_accept_sec_context {
 #define gssx_dec_init_sec_context NULL
 void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct gssx_arg_accept_sec_context *args);
+				 const void *data);
 int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
 				struct gssx_res_accept_sec_context *res);
-- 
GitLab


From 73c8dc133afb0cbe72a9234894ea72c2a0e71a73 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 14:58:11 +0200
Subject: [PATCH 0030/1958] sunrpc: properly type argument to kxdrdproc_t

Pass struct rpc_request as the first argument instead of an untyped blob.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xdr.h | 3 ++-
 net/sunrpc/clnt.c          | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 290f189de2009..ed0fbf0d8d0f5 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -226,7 +226,8 @@ struct xdr_stream {
  */
 typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		const void *obj);
-typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef int	(*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9fee20dc0c809..964d5c4a1b609 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2481,7 +2481,8 @@ static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 {
 }
 
-static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj)
 {
 	return 0;
 }
-- 
GitLab


From 605d712fa89f16ed5266897ecda01dd1a74b8aa8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:00:29 +0200
Subject: [PATCH 0031/1958] sunrpc: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 net/sunrpc/rpcb_clnt.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index d0269a39afdfa..f67b9e2897b49 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -861,8 +861,9 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    struct rpcbind_args *rpcb)
+			    void *data)
 {
+	struct rpcbind_args *rpcb = data;
 	unsigned long port;
 	__be32 *p;
 
@@ -883,8 +884,9 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
-			unsigned int *boolp)
+			void *data)
 {
+	unsigned int *boolp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4);
@@ -939,8 +941,9 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    struct rpcbind_args *rpcb)
+			    void *data)
 {
+	struct rpcbind_args *rpcb = data;
 	struct sockaddr_storage address;
 	struct sockaddr *sap = (struct sockaddr *)&address;
 	__be32 *p;
@@ -995,7 +998,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1005,7 +1008,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1015,7 +1018,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
 		.p_encode	= rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getport,
+		.p_decode	= rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
 		.p_statidx	= RPCBPROC_GETPORT,
@@ -1028,7 +1031,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1038,7 +1041,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1048,7 +1051,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
+		.p_decode	= rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
@@ -1061,7 +1064,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1071,7 +1074,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1081,7 +1084,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
+		.p_decode	= rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
-- 
GitLab


From 305c62417f2e5ff9e784ff7b52e493368373e2d7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:03:02 +0200
Subject: [PATCH 0032/1958] sunrpc/auth_gss: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +-
 net/sunrpc/auth_gss/gss_rpc_xdr.c    | 3 ++-
 net/sunrpc/auth_gss/gss_rpc_xdr.h    | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 45ab924da726c..a80b8e6074781 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -56,7 +56,7 @@ enum {
 [GSSX_##proc] = {					\
 	.p_proc   = GSSX_##proc,			\
 	.p_encode = gssx_enc_##name,	\
-	.p_decode = (kxdrdproc_t)gssx_dec_##name,	\
+	.p_decode = gssx_dec_##name,	\
 	.p_arglen = GSSX_ARG_##name##_sz,		\
 	.p_replen = GSSX_RES_##name##_sz, 		\
 	.p_statidx = GSSX_##proc,			\
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 5e54f47430921..c4778cae58ef1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -790,8 +790,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 
 int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct gssx_res_accept_sec_context *res)
+				void *data)
 {
+	struct gssx_res_accept_sec_context *res = data;
 	u32 value_follows;
 	int err;
 	struct page *scratch;
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 87cd719ca0ad7..146c310329173 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -182,7 +182,7 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 				 const void *data);
 int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct gssx_res_accept_sec_context *res);
+				void *data);
 #define gssx_enc_release_handle NULL
 #define gssx_dec_release_handle NULL
 #define gssx_enc_get_mic NULL
-- 
GitLab


From d39916c48771597a3213758b6dc508b6b82083e4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:04:45 +0200
Subject: [PATCH 0033/1958] nfsd: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfsd/nfs4callback.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index bede418237ee7..a2bedbd05b2bf 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -513,8 +513,9 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfsd4_callback *cb)
+				  void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -604,8 +605,9 @@ static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
 
 static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfsd4_callback *cb)
+				  void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -662,8 +664,9 @@ static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
 
 static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct nfsd4_callback *cb)
+					void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -686,7 +689,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
 	.p_encode  = nfs4_xdr_enc_##argtype,		\
-	.p_decode  = (kxdrdproc_t)nfs4_xdr_dec_##restype,		\
+	.p_decode  = nfs4_xdr_dec_##restype,				\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
 	.p_statidx = NFSPROC4_CB_##call,				\
-- 
GitLab


From 1fa2339123f596d0c7608670d9052805a90e92ca Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:06:20 +0200
Subject: [PATCH 0034/1958] lockd: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clnt4xdr.c |  8 +++++---
 fs/lockd/clntxdr.c  |  8 +++++---
 fs/lockd/mon.c      | 10 ++++++----
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 3cbad662120a5..f0ab7a99dd236 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -533,8 +533,9 @@ static int decode_nlm4_testrply(struct xdr_stream *xdr,
 
 static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nlm_res *result)
+				void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -553,8 +554,9 @@ static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
  */
 static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 			    struct xdr_stream *xdr,
-			    struct nlm_res *result)
+			    void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -575,7 +577,7 @@ static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = nlm4_xdr_enc_##argtype,				\
-	.p_decode    = (kxdrdproc_t)nlm4_xdr_dec_##restype,		\
+	.p_decode    = nlm4_xdr_dec_##restype,				\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 825c0fde8c80b..17e7f08b3a22f 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -531,8 +531,9 @@ static int decode_nlm_testrply(struct xdr_stream *xdr,
 
 static int nlm_xdr_dec_testres(struct rpc_rqst *req,
 			       struct xdr_stream *xdr,
-			       struct nlm_res *result)
+			       void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -551,8 +552,9 @@ static int nlm_xdr_dec_testres(struct rpc_rqst *req,
  */
 static int nlm_xdr_dec_res(struct rpc_rqst *req,
 			   struct xdr_stream *xdr,
-			   struct nlm_res *result)
+			   void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -573,7 +575,7 @@ static int nlm_xdr_dec_res(struct rpc_rqst *req,
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = nlm_xdr_enc_##argtype,		\
-	.p_decode    = (kxdrdproc_t)nlm_xdr_dec_##restype,		\
+	.p_decode    = nlm_xdr_dec_##restype,				\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 8043fd4b8a5c2..80630f0347e18 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -490,8 +490,9 @@ static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nsm_res *resp)
+				void *data)
 {
+	struct nsm_res *resp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4 + 4);
@@ -507,8 +508,9 @@ static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
 
 static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 			    struct xdr_stream *xdr,
-			    struct nsm_res *resp)
+			    void *data)
 {
+	struct nsm_res *resp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4);
@@ -533,7 +535,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_encode	= nsm_xdr_enc_mon,
-		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat_res,
+		.p_decode	= nsm_xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
 		.p_statidx	= NSMPROC_MON,
@@ -542,7 +544,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
 		.p_encode	= nsm_xdr_enc_unmon,
-		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat,
+		.p_decode	= nsm_xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
 		.p_statidx	= NSMPROC_UNMON,
-- 
GitLab


From 18d9cff40010cb13cc57c36c982b273140b6b73e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:09:02 +0200
Subject: [PATCH 0035/1958] nfs: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/mount_clnt.c |  10 ++--
 fs/nfs/nfs2xdr.c    |  15 +++--
 fs/nfs/nfs3xdr.c    |  55 +++++++++++-------
 fs/nfs/nfs42xdr.c   |  18 ++++--
 fs/nfs/nfs4xdr.c    | 137 +++++++++++++++++++++++++++++---------------
 5 files changed, 151 insertions(+), 84 deletions(-)

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index f435d640d5527..806657d650745 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -357,8 +357,9 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
 
 static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct mountres *res)
+				void *data)
 {
+	struct mountres *res = data;
 	int status;
 
 	status = decode_status(xdr, res);
@@ -449,8 +450,9 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 
 static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct mountres *res)
+				 void *data)
 {
+	struct mountres *res = data;
 	int status;
 
 	status = decode_fhs_status(xdr, res);
@@ -468,7 +470,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres,
+		.p_decode	= mnt_xdr_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
 		.p_statidx	= MOUNTPROC_MNT,
@@ -487,7 +489,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres3,
+		.p_decode	= mnt_xdr_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
 		.p_statidx	= MOUNTPROC3_MNT,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 8ecd58597228d..a299648ea3215 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -832,13 +832,13 @@ static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_fattr *result)
+				 void *result)
 {
 	return decode_attrstat(xdr, result, NULL);
 }
 
 static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_diropok *result)
+				 void *result)
 {
 	return decode_diropres(xdr, result);
 }
@@ -883,8 +883,9 @@ static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
  *	};
  */
 static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_pgio_res *result)
+				void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -905,8 +906,10 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_pgio_res *result)
+				 void *data)
 {
+	struct nfs_pgio_res *result = data;
+
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;
 	return decode_attrstat(xdr, result->fattr, &result->op_status);
@@ -1057,7 +1060,7 @@ static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
 }
 
 static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs2_fsstat *result)
+				  void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1142,7 +1145,7 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
 	.p_encode   =  nfs2_xdr_enc_##argtype,				\
-	.p_decode   =  (kxdrdproc_t)nfs2_xdr_dec_##restype,		\
+	.p_decode   =  nfs2_xdr_dec_##restype,				\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
 	.p_timer    =  timer,						\
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 773150678633f..cc272eb8be3e0 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1419,7 +1419,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs_fattr *result)
+				    void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1456,7 +1456,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs_fattr *result)
+				    void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1497,8 +1497,9 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_diropres *result)
+				   void *data)
 {
+	struct nfs3_diropres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1544,8 +1545,9 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_accessres *result)
+				   void *data)
 {
+	struct nfs3_accessres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1585,7 +1587,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_fattr *result)
+				     void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1663,8 +1665,9 @@ static int decode_read3resok(struct xdr_stream *xdr,
 }
 
 static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_pgio_res *result)
+				 void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1736,8 +1739,9 @@ static int decode_write3resok(struct xdr_stream *xdr,
 }
 
 static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs_pgio_res *result)
+				  void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1801,8 +1805,9 @@ static int decode_create3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_diropres *result)
+				   void *data)
 {
+	struct nfs3_diropres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1841,8 +1846,9 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_removeres *result)
+				   void *data)
 {
+	struct nfs_removeres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1882,8 +1888,9 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_renameres *result)
+				   void *data)
 {
+	struct nfs_renameres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1925,8 +1932,9 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
  *	};
  */
 static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs3_linkres *result)
+				 void *data)
 {
+	struct nfs3_linkres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2109,8 +2117,9 @@ static int decode_readdir3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs3_readdirres *result)
+				    void *data)
 {
+	struct nfs3_readdirres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2177,8 +2186,9 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fsstat *result)
+				   void *data)
 {
+	struct nfs_fsstat *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2253,8 +2263,9 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fsinfo *result)
+				   void *data)
 {
+	struct nfs_fsinfo *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2316,8 +2327,9 @@ static int decode_pathconf3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_pathconf *result)
+				     void *data)
 {
+	struct nfs_pathconf *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2357,8 +2369,9 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_commitres *result)
+				   void *data)
 {
+	struct nfs_commitres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2426,7 +2439,7 @@ static inline int decode_getacl3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_getaclres *result)
+				   void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -2445,7 +2458,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
 
 static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fattr *result)
+				   void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -2533,7 +2546,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
 	.p_encode    = nfs3_xdr_enc_##argtype##3args,			\
-	.p_decode    = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res,	\
+	.p_decode    = nfs3_xdr_dec_##restype##3res,			\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
 	.p_timer     = timer,						\
@@ -2576,7 +2589,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
 		.p_encode = nfs3_xdr_enc_getacl3args,
-		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
+		.p_decode = nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
 		.p_timer = 1,
@@ -2585,7 +2598,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
 		.p_encode = nfs3_xdr_enc_setacl3args,
-		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
+		.p_decode = nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
 		.p_timer = 0,
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0a1bd60a1f8e8..5ee1b0f0d9044 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -454,8 +454,9 @@ static int decode_clone(struct xdr_stream *xdr)
  */
 static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs42_falloc_res *res)
+				 void *data)
 {
+	struct nfs42_falloc_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -481,8 +482,9 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
 			     struct xdr_stream *xdr,
-			     struct nfs42_copy_res *res)
+			     void *data)
 {
+	struct nfs42_copy_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -514,8 +516,9 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
 				   struct xdr_stream *xdr,
-				   struct nfs42_falloc_res *res)
+				   void *data)
 {
+	struct nfs42_falloc_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -541,8 +544,9 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
 			     struct xdr_stream *xdr,
-			     struct nfs42_seek_res *res)
+			     void *data)
 {
+	struct nfs42_seek_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -565,8 +569,9 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs42_layoutstat_res *res)
+				    void *data)
 {
+	struct nfs42_layoutstat_res *res = data;
 	struct compound_hdr hdr;
 	int status, i;
 
@@ -595,8 +600,9 @@ static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
 			      struct xdr_stream *xdr,
-			      struct nfs42_clone_res *res)
+			      void *data)
 {
+	struct nfs42_clone_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c5036ef770f9a..797f3ce752861 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6154,8 +6154,9 @@ int decode_layoutreturn(struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs_closeres *res)
+				       void *data)
 {
+	struct nfs_closeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6183,8 +6184,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
  * Decode ACCESS response
  */
 static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_accessres *res)
+			       void *data)
 {
+	struct nfs4_accessres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6209,8 +6211,9 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOOKUP response
  */
 static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_lookup_res *res)
+			       void *data)
 {
+	struct nfs4_lookup_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6239,8 +6242,9 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs4_lookup_res *res)
+				    void *data)
 {
+	struct nfs4_lookup_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6265,8 +6269,9 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
  * Decode REMOVE response
  */
 static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_removeres *res)
+			       void *data)
 {
+	struct nfs_removeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6288,8 +6293,9 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode RENAME response
  */
 static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_renameres *res)
+			       void *data)
 {
+	struct nfs_renameres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6317,8 +6323,9 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LINK response
  */
 static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs4_link_res *res)
+			     void *data)
 {
+	struct nfs4_link_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6356,8 +6363,9 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode CREATE response
  */
 static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_create_res *res)
+			       void *data)
 {
+	struct nfs4_create_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6385,7 +6393,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode SYMLINK response
  */
 static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_create_res *res)
+				void *res)
 {
 	return nfs4_xdr_dec_create(rqstp, xdr, res);
 }
@@ -6394,8 +6402,9 @@ static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode GETATTR response
  */
 static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_getattr_res *res)
+				void *data)
 {
+	struct nfs4_getattr_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6436,8 +6445,9 @@ static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int
 nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-		    struct nfs_setaclres *res)
+		    void *data)
 {
+	struct nfs_setaclres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6460,8 +6470,9 @@ nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int
 nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-		    struct nfs_getaclres *res)
+		    void *data)
 {
+	struct nfs_getaclres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6488,8 +6499,9 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode CLOSE response
  */
 static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_closeres *res)
+			      void *data)
 {
+	struct nfs_closeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6522,8 +6534,9 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode OPEN response
  */
 static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_openres *res)
+			     void *data)
 {
+	struct nfs_openres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6554,8 +6567,9 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs_open_confirmres *res)
+				     void *data)
 {
+	struct nfs_open_confirmres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6575,8 +6589,9 @@ static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs_openres *res)
+				    void *data)
 {
+	struct nfs_openres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6604,8 +6619,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nfs_setattrres *res)
+				void *data)
 {
+	struct nfs_setattrres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6630,8 +6646,9 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
  * Decode LOCK response
  */
 static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_lock_res *res)
+			     void *data)
 {
+	struct nfs_lock_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6653,8 +6670,9 @@ static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOCKT response
  */
 static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_lockt_res *res)
+			      void *data)
 {
+	struct nfs_lockt_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6676,8 +6694,9 @@ static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOCKU response
  */
 static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_locku_res *res)
+			      void *data)
 {
+	struct nfs_locku_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6712,8 +6731,9 @@ static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs4_readlink_res *res)
+				 void *data)
 {
+	struct nfs4_readlink_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6735,8 +6755,9 @@ static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
  * Decode READDIR response
  */
 static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_readdir_res *res)
+				void *data)
 {
+	struct nfs4_readdir_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6758,8 +6779,9 @@ static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode Read response
  */
 static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_pgio_res *res)
+			     void *data)
 {
+	struct nfs_pgio_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6784,8 +6806,9 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode WRITE response
  */
 static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_pgio_res *res)
+			      void *data)
 {
+	struct nfs_pgio_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6814,8 +6837,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode COMMIT response
  */
 static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_commitres *res)
+			       void *data)
 {
+	struct nfs_commitres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6838,8 +6862,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode FSINFO response
  */
 static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs4_fsinfo_res *res)
+			       void *data)
 {
+	struct nfs4_fsinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6857,8 +6882,9 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Decode PATHCONF response
  */
 static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs4_pathconf_res *res)
+				 void *data)
 {
+	struct nfs4_pathconf_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6876,8 +6902,9 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Decode STATFS response
  */
 static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs4_statfs_res *res)
+			       void *data)
 {
+	struct nfs4_statfs_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6896,8 +6923,9 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs4_server_caps_res *res)
+				    void *data)
 {
+	struct nfs4_server_caps_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6935,8 +6963,9 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs4_setclientid_res *res)
+				    void *data)
 {
+	struct nfs4_setclientid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6950,7 +6979,8 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
  * Decode SETCLIENTID_CONFIRM response
  */
 static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
-					    struct xdr_stream *xdr)
+					    struct xdr_stream *xdr,
+					    void *data)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -6966,8 +6996,9 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
  */
 static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs4_delegreturnres *res)
+				    void *data)
 {
+	struct nfs4_delegreturnres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7001,8 +7032,9 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_fs_locations_res *res)
+				     void *data)
 {
+	struct nfs4_fs_locations_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7044,8 +7076,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
  */
 static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nfs4_secinfo_res *res)
+				void *data)
 {
+	struct nfs4_secinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7068,8 +7101,9 @@ static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_fsid_present_res *res)
+				     void *data)
 {
+	struct nfs4_fsid_present_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7129,7 +7163,7 @@ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs41_create_session_res *res)
+				       void *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -7177,7 +7211,7 @@ static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs4_sequence_res *res)
+				 void *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -7193,8 +7227,9 @@ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs4_get_lease_time_res *res)
+				       void *data)
 {
+	struct nfs4_get_lease_time_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7213,8 +7248,9 @@ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
 					 struct xdr_stream *xdr,
-					 struct nfs41_reclaim_complete_res *res)
+					 void *data)
 {
+	struct nfs41_reclaim_complete_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7231,8 +7267,9 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
 				      struct xdr_stream *xdr,
-				      struct nfs4_getdeviceinfo_res *res)
+				      void *data)
 {
+	struct nfs4_getdeviceinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7252,8 +7289,9 @@ static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfs4_layoutget_res *res)
+				  void *data)
 {
+	struct nfs4_layoutget_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7276,8 +7314,9 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_layoutreturn_res *res)
+				     void *data)
 {
+	struct nfs4_layoutreturn_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7300,8 +7339,9 @@ static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_layoutcommit_res *res)
+				     void *data)
 {
+	struct nfs4_layoutcommit_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7327,8 +7367,9 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct nfs4_secinfo_res *res)
+					void *data)
 {
+	struct nfs4_secinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7351,8 +7392,9 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs41_test_stateid_res *res)
+				     void *data)
 {
+	struct nfs41_test_stateid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7372,8 +7414,9 @@ static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs41_free_stateid_res *res)
+				     void *data)
 {
+	struct nfs41_free_stateid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7539,7 +7582,7 @@ nfs4_stat_to_errno(int stat)
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
 	.p_encode = nfs4_xdr_##argtype,				\
-	.p_decode = (kxdrdproc_t)nfs4_xdr_##restype,		\
+	.p_decode = nfs4_xdr_##restype,				\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
 	.p_statidx = NFSPROC4_CLNT_##proc,			\
-- 
GitLab


From f4dac4ade5ba4e84b1fb28ff52265f13aa833838 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 11 May 2017 09:22:18 +0200
Subject: [PATCH 0036/1958] nfs: don't cast callback decode/proc/encode
 routines

Instead declare all functions with the proper methods signature.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback.h      | 27 +++++--------
 fs/nfs/callback_proc.c | 33 ++++++++++------
 fs/nfs/callback_xdr.c  | 86 ++++++++++++++++++++++--------------------
 3 files changed, 77 insertions(+), 69 deletions(-)

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index c701c308fac52..3dc54d7cb19c0 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -114,8 +114,7 @@ struct cb_sequenceres {
 	uint32_t			csr_target_highestslotid;
 };
 
-extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
-				       struct cb_sequenceres *res,
+extern __be32 nfs4_callback_sequence(void *argp, void *resp,
 				       struct cb_process_state *cps);
 
 #define RCA4_TYPE_MASK_RDATA_DLG	0
@@ -134,15 +133,13 @@ struct cb_recallanyargs {
 	uint32_t	craa_type_mask;
 };
 
-extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
-					void *dummy,
+extern __be32 nfs4_callback_recallany(void *argp, void *resp,
 					struct cb_process_state *cps);
 
 struct cb_recallslotargs {
 	uint32_t	crsa_target_highest_slotid;
 };
-extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
-					 void *dummy,
+extern __be32 nfs4_callback_recallslot(void *argp, void *resp,
 					 struct cb_process_state *cps);
 
 struct cb_layoutrecallargs {
@@ -159,9 +156,8 @@ struct cb_layoutrecallargs {
 	};
 };
 
-extern __be32 nfs4_callback_layoutrecall(
-	struct cb_layoutrecallargs *args,
-	void *dummy, struct cb_process_state *cps);
+extern __be32 nfs4_callback_layoutrecall(void *argp, void *resp,
+		struct cb_process_state *cps);
 
 struct cb_devicenotifyitem {
 	uint32_t		cbd_notify_type;
@@ -175,9 +171,8 @@ struct cb_devicenotifyargs {
 	struct cb_devicenotifyitem	 *devs;
 };
 
-extern __be32 nfs4_callback_devicenotify(
-	struct cb_devicenotifyargs *args,
-	void *dummy, struct cb_process_state *cps);
+extern __be32 nfs4_callback_devicenotify(void *argp, void *resp,
+		struct cb_process_state *cps);
 
 struct cb_notify_lock_args {
 	struct nfs_fh			cbnl_fh;
@@ -185,15 +180,13 @@ struct cb_notify_lock_args {
 	bool				cbnl_valid;
 };
 
-extern __be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args,
-					 void *dummy,
+extern __be32 nfs4_callback_notify_lock(void *argp, void *resp,
 					 struct cb_process_state *cps);
 #endif /* CONFIG_NFS_V4_1 */
 extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
-extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
-				    struct cb_getattrres *res,
+extern __be32 nfs4_callback_getattr(void *argp, void *resp,
 				    struct cb_process_state *cps);
-extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+extern __be32 nfs4_callback_recall(void *argp, void *resp,
 				   struct cb_process_state *cps);
 #if IS_ENABLED(CONFIG_NFS_V4)
 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 52479f180ea14..5427cdf04c5a1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -19,10 +19,11 @@
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
-__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
-			     struct cb_getattrres *res,
+__be32 nfs4_callback_getattr(void *argp, void *resp,
 			     struct cb_process_state *cps)
 {
+	struct cb_getattrargs *args = argp;
+	struct cb_getattrres *res = resp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
@@ -68,9 +69,10 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
 	return res->status;
 }
 
-__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+__be32 nfs4_callback_recall(void *argp, void *resp,
 			    struct cb_process_state *cps)
 {
+	struct cb_recallargs *args = argp;
 	struct inode *inode;
 	__be32 res;
 	
@@ -324,9 +326,10 @@ static u32 do_callback_layoutrecall(struct nfs_client *clp,
 	return initiate_bulk_draining(clp, args);
 }
 
-__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
-				  void *dummy, struct cb_process_state *cps)
+__be32 nfs4_callback_layoutrecall(void *argp, void *resp,
+				  struct cb_process_state *cps)
 {
+	struct cb_layoutrecallargs *args = argp;
 	u32 res = NFS4ERR_OP_NOT_IN_SESSION;
 
 	if (cps->clp)
@@ -345,9 +348,10 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp)
 	do_callback_layoutrecall(clp, &args);
 }
 
-__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
-				  void *dummy, struct cb_process_state *cps)
+__be32 nfs4_callback_devicenotify(void *argp, void *resp,
+				  struct cb_process_state *cps)
 {
+	struct cb_devicenotifyargs *args = argp;
 	int i;
 	__be32 res = 0;
 	struct nfs_client *clp = cps->clp;
@@ -469,10 +473,11 @@ static bool referring_call_exists(struct nfs_client *clp,
 	return status;
 }
 
-__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
-			      struct cb_sequenceres *res,
+__be32 nfs4_callback_sequence(void *argp, void *resp,
 			      struct cb_process_state *cps)
 {
+	struct cb_sequenceargs *args = argp;
+	struct cb_sequenceres *res = resp;
 	struct nfs4_slot_table *tbl;
 	struct nfs4_slot *slot;
 	struct nfs_client *clp;
@@ -571,9 +576,10 @@ validate_bitmap_values(unsigned long mask)
 	return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
 }
 
-__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
+__be32 nfs4_callback_recallany(void *argp, void *resp,
 			       struct cb_process_state *cps)
 {
+	struct cb_recallanyargs *args = argp;
 	__be32 status;
 	fmode_t flags = 0;
 
@@ -606,9 +612,10 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
 }
 
 /* Reduce the fore channel's max_slots to the target value */
-__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
+__be32 nfs4_callback_recallslot(void *argp, void *resp,
 				struct cb_process_state *cps)
 {
+	struct cb_recallslotargs *args = argp;
 	struct nfs4_slot_table *fc_tbl;
 	__be32 status;
 
@@ -631,9 +638,11 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
 	return status;
 }
 
-__be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args, void *dummy,
+__be32 nfs4_callback_notify_lock(void *argp, void *resp,
 				 struct cb_process_state *cps)
 {
+	struct cb_notify_lock_args *args = argp;
+
 	if (!cps->clp) /* set in cb_sequence */
 		return htonl(NFS4ERR_OP_NOT_IN_SESSION);
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c14758e08d738..287c02202b258 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -43,16 +43,11 @@
 /* Internal error code */
 #define NFS4ERR_RESOURCE_HDR	11050
 
-typedef __be32 (*callback_process_op_t)(void *, void *,
-					struct cb_process_state *);
-typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
-typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
-
-
 struct callback_op {
-	callback_process_op_t process_op;
-	callback_decode_arg_t decode_args;
-	callback_encode_res_t encode_res;
+	__be32 (*process_op)(void *, void *, struct cb_process_state *);
+	__be32 (*decode_args)(struct svc_rqst *, struct xdr_stream *, void *);
+	__be32 (*encode_res)(struct svc_rqst *, struct xdr_stream *,
+			const void *);
 	long res_maxsize;
 };
 
@@ -184,8 +179,10 @@ static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
 	return 0;
 }
 
-static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
+static __be32 decode_getattr_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_getattrargs *args = argp;
 	__be32 status;
 
 	status = decode_fh(xdr, &args->fh);
@@ -194,8 +191,10 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
 	return decode_bitmap(xdr, args->bitmap);
 }
 
-static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
+static __be32 decode_recall_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_recallargs *args = argp;
 	__be32 *p;
 	__be32 status;
 
@@ -217,9 +216,9 @@ static __be32 decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *statei
 }
 
 static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
-				       struct xdr_stream *xdr,
-				       struct cb_layoutrecallargs *args)
+				       struct xdr_stream *xdr, void *argp)
 {
+	struct cb_layoutrecallargs *args = argp;
 	__be32 *p;
 	__be32 status = 0;
 	uint32_t iomode;
@@ -262,8 +261,9 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
 static
 __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct cb_devicenotifyargs *args)
+				void *argp)
 {
+	struct cb_devicenotifyargs *args = argp;
 	__be32 *p;
 	__be32 status = 0;
 	u32 tmp;
@@ -403,8 +403,9 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
 
 static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct cb_sequenceargs *args)
+					void *argp)
 {
+	struct cb_sequenceargs *args = argp;
 	__be32 *p;
 	int i;
 	__be32 status;
@@ -450,8 +451,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 
 static __be32 decode_recallany_args(struct svc_rqst *rqstp,
 				      struct xdr_stream *xdr,
-				      struct cb_recallanyargs *args)
+				      void *argp)
 {
+	struct cb_recallanyargs *args = argp;
 	uint32_t bitmap[2];
 	__be32 *p, status;
 
@@ -469,8 +471,9 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
 
 static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct cb_recallslotargs *args)
+					void *argp)
 {
+	struct cb_recallslotargs *args = argp;
 	__be32 *p;
 
 	p = read_buf(xdr, 4);
@@ -510,8 +513,10 @@ static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_arg
 	return 0;
 }
 
-static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_notify_lock_args *args)
+static __be32 decode_notify_lock_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_notify_lock_args *args = argp;
 	__be32 status;
 
 	status = decode_fh(xdr, &args->cbnl_fh);
@@ -641,8 +646,10 @@ static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res)
 	return 0;
 }
 
-static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
+static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *resp)
 {
+	const struct cb_getattrres *res = resp;
 	__be32 *savep = NULL;
 	__be32 status = res->status;
 	
@@ -683,8 +690,9 @@ static __be32 encode_sessionid(struct xdr_stream *xdr,
 
 static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       const struct cb_sequenceres *res)
+				       const void *resp)
 {
+	const struct cb_sequenceres *res = resp;
 	__be32 *p;
 	__be32 status = res->csr_status;
 
@@ -938,48 +946,46 @@ static struct callback_op callback_ops[] = {
 		.res_maxsize = CB_OP_HDR_RES_MAXSZ,
 	},
 	[OP_CB_GETATTR] = {
-		.process_op = (callback_process_op_t)nfs4_callback_getattr,
-		.decode_args = (callback_decode_arg_t)decode_getattr_args,
-		.encode_res = (callback_encode_res_t)encode_getattr_res,
+		.process_op = nfs4_callback_getattr,
+		.decode_args = decode_getattr_args,
+		.encode_res = encode_getattr_res,
 		.res_maxsize = CB_OP_GETATTR_RES_MAXSZ,
 	},
 	[OP_CB_RECALL] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recall,
-		.decode_args = (callback_decode_arg_t)decode_recall_args,
+		.process_op = nfs4_callback_recall,
+		.decode_args = decode_recall_args,
 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
 	},
 #if defined(CONFIG_NFS_V4_1)
 	[OP_CB_LAYOUTRECALL] = {
-		.process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
-		.decode_args =
-			(callback_decode_arg_t)decode_layoutrecall_args,
+		.process_op = nfs4_callback_layoutrecall,
+		.decode_args = decode_layoutrecall_args,
 		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
 	},
 	[OP_CB_NOTIFY_DEVICEID] = {
-		.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
-		.decode_args =
-			(callback_decode_arg_t)decode_devicenotify_args,
+		.process_op = nfs4_callback_devicenotify,
+		.decode_args = decode_devicenotify_args,
 		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
 	},
 	[OP_CB_SEQUENCE] = {
-		.process_op = (callback_process_op_t)nfs4_callback_sequence,
-		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
-		.encode_res = (callback_encode_res_t)encode_cb_sequence_res,
+		.process_op = nfs4_callback_sequence,
+		.decode_args = decode_cb_sequence_args,
+		.encode_res = encode_cb_sequence_res,
 		.res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ,
 	},
 	[OP_CB_RECALL_ANY] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recallany,
-		.decode_args = (callback_decode_arg_t)decode_recallany_args,
+		.process_op = nfs4_callback_recallany,
+		.decode_args = decode_recallany_args,
 		.res_maxsize = CB_OP_RECALLANY_RES_MAXSZ,
 	},
 	[OP_CB_RECALL_SLOT] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recallslot,
-		.decode_args = (callback_decode_arg_t)decode_recallslot_args,
+		.process_op = nfs4_callback_recallslot,
+		.decode_args = decode_recallslot_args,
 		.res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ,
 	},
 	[OP_CB_NOTIFY_LOCK] = {
-		.process_op = (callback_process_op_t)nfs4_callback_notify_lock,
-		.decode_args = (callback_decode_arg_t)decode_notify_lock_args,
+		.process_op = nfs4_callback_notify_lock,
+		.decode_args = decode_notify_lock_args,
 		.res_maxsize = CB_OP_NOTIFY_LOCK_RES_MAXSZ,
 	},
 #endif /* CONFIG_NFS_V4_1 */
-- 
GitLab


From cdfa31e93fd228e197cc5bb31f0156117cea2156 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:32:18 +0200
Subject: [PATCH 0037/1958] lockd: fix some weird indentation

Remove double indentation of a few struct rpc_version and
struct rpc_program instance.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clntxdr.c | 22 +++++++++++-----------
 fs/lockd/mon.c     | 16 ++++++++--------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 17e7f08b3a22f..bd8a976785aea 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -601,15 +601,15 @@ static struct rpc_procinfo	nlm_procedures[] = {
 };
 
 static const struct rpc_version	nlm_version1 = {
-		.number		= 1,
-		.nrprocs	= ARRAY_SIZE(nlm_procedures),
-		.procs		= nlm_procedures,
+	.number		= 1,
+	.nrprocs	= ARRAY_SIZE(nlm_procedures),
+	.procs		= nlm_procedures,
 };
 
 static const struct rpc_version	nlm_version3 = {
-		.number		= 3,
-		.nrprocs	= ARRAY_SIZE(nlm_procedures),
-		.procs		= nlm_procedures,
+	.number		= 3,
+	.nrprocs	= ARRAY_SIZE(nlm_procedures),
+	.procs		= nlm_procedures,
 };
 
 static const struct rpc_version	*nlm_versions[] = {
@@ -623,9 +623,9 @@ static const struct rpc_version	*nlm_versions[] = {
 static struct rpc_stat		nlm_rpc_stats;
 
 const struct rpc_program	nlm_program = {
-		.name		= "lockd",
-		.number		= NLM_PROGRAM,
-		.nrvers		= ARRAY_SIZE(nlm_versions),
-		.version	= nlm_versions,
-		.stats		= &nlm_rpc_stats,
+	.name		= "lockd",
+	.number		= NLM_PROGRAM,
+	.nrvers		= ARRAY_SIZE(nlm_versions),
+	.version	= nlm_versions,
+	.stats		= &nlm_rpc_stats,
 };
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 80630f0347e18..62424e929a7fd 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -553,9 +553,9 @@ static struct rpc_procinfo	nsm_procedures[] = {
 };
 
 static const struct rpc_version nsm_version1 = {
-		.number		= 1,
-		.nrprocs	= ARRAY_SIZE(nsm_procedures),
-		.procs		= nsm_procedures
+	.number		= 1,
+	.nrprocs	= ARRAY_SIZE(nsm_procedures),
+	.procs		= nsm_procedures
 };
 
 static const struct rpc_version *nsm_version[] = {
@@ -565,9 +565,9 @@ static const struct rpc_version *nsm_version[] = {
 static struct rpc_stat		nsm_stats;
 
 static const struct rpc_program nsm_program = {
-		.name		= "statd",
-		.number		= NSM_PROGRAM,
-		.nrvers		= ARRAY_SIZE(nsm_version),
-		.version	= nsm_version,
-		.stats		= &nsm_stats
+	.name		= "statd",
+	.number		= NSM_PROGRAM,
+	.nrvers		= ARRAY_SIZE(nsm_version),
+	.version	= nsm_version,
+	.stats		= &nsm_stats
 };
-- 
GitLab


From 1c5876ddbdb401f814ef717394826e7dfb6704d4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:27:10 +0200
Subject: [PATCH 0038/1958] sunrpc: move p_count out of struct rpc_procinfo

p_count is the only writeable memeber of struct rpc_procinfo, which is
a good candidate to be const-ified as it contains function pointers.

This patch moves it into out out struct rpc_procinfo, and into a
separate writable array that is pointed to by struct rpc_version and
indexed by p_statidx.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/clnt4xdr.c                  |  2 ++
 fs/lockd/clntxdr.c                   |  4 ++++
 fs/lockd/mon.c                       |  4 +++-
 fs/nfs/mount_clnt.c                  |  5 ++++-
 fs/nfs/nfs2xdr.c                     |  4 +++-
 fs/nfs/nfs3xdr.c                     |  6 +++++-
 fs/nfs/nfs4xdr.c                     |  4 +++-
 fs/nfsd/nfs4callback.c               |  4 +++-
 include/linux/sunrpc/clnt.h          |  2 +-
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  3 ++-
 net/sunrpc/clnt.c                    |  6 ++++--
 net/sunrpc/rpcb_clnt.c               | 12 +++++++++---
 net/sunrpc/stats.c                   |  3 +--
 13 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index f0ab7a99dd236..7c255d1d7c644 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -602,8 +602,10 @@ static struct rpc_procinfo	nlm4_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version4_counts[ARRAY_SIZE(nlm4_procedures)];
 const struct rpc_version nlm_version4 = {
 	.number		= 4,
 	.nrprocs	= ARRAY_SIZE(nlm4_procedures),
 	.procs		= nlm4_procedures,
+	.counts		= nlm_version4_counts,
 };
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index bd8a976785aea..39500c5743a56 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -600,16 +600,20 @@ static struct rpc_procinfo	nlm_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version1_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(nlm_procedures),
 	.procs		= nlm_procedures,
+	.counts		= nlm_version1_counts,
 };
 
+static unsigned int nlm_version3_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(nlm_procedures),
 	.procs		= nlm_procedures,
+	.counts		= nlm_version3_counts,
 };
 
 static const struct rpc_version	*nlm_versions[] = {
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 62424e929a7fd..fe4ec82764fe1 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -552,10 +552,12 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 };
 
+static unsigned int nsm_version1_counts[ARRAY_SIZE(nsm_procedures)];
 static const struct rpc_version nsm_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(nsm_procedures),
-	.procs		= nsm_procedures
+	.procs		= nsm_procedures,
+	.counts		= nsm_version1_counts,
 };
 
 static const struct rpc_version *nsm_version[] = {
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 806657d650745..d25914aa8bf91 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -504,17 +504,20 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 };
 
-
+static unsigned int mnt_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(mnt_procedures),
 	.procs		= mnt_procedures,
+	.counts		= mnt_counts,
 };
 
+static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(mnt3_procedures),
 	.procs		= mnt3_procedures,
+	.counts		= mnt3_counts,
 };
 
 static const struct rpc_version *mnt_version[] = {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a299648ea3215..16b4526299c11 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1170,8 +1170,10 @@ struct rpc_procinfo	nfs_procedures[] = {
 	PROC(STATFS,	fhandle,	statfsres,	0),
 };
 
+static unsigned int nfs_version2_counts[ARRAY_SIZE(nfs_procedures)];
 const struct rpc_version nfs_version2 = {
 	.number			= 2,
 	.nrprocs		= ARRAY_SIZE(nfs_procedures),
-	.procs			= nfs_procedures
+	.procs			= nfs_procedures,
+	.counts			= nfs_version2_counts,
 };
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index cc272eb8be3e0..a017ec5c7a9d6 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2578,10 +2578,12 @@ struct rpc_procinfo	nfs3_procedures[] = {
 	PROC(COMMIT,		commit,		commit,		5),
 };
 
+static unsigned int nfs_version3_counts[ARRAY_SIZE(nfs3_procedures)];
 const struct rpc_version nfs_version3 = {
 	.number			= 3,
 	.nrprocs		= ARRAY_SIZE(nfs3_procedures),
-	.procs			= nfs3_procedures
+	.procs			= nfs3_procedures,
+	.counts			= nfs_version3_counts,
 };
 
 #ifdef CONFIG_NFS_V3_ACL
@@ -2606,10 +2608,12 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 };
 
+static unsigned int nfs3_acl_counts[ARRAY_SIZE(nfs3_acl_procedures)];
 const struct rpc_version nfsacl_version3 = {
 	.number			= 3,
 	.nrprocs		= sizeof(nfs3_acl_procedures)/
 				  sizeof(nfs3_acl_procedures[0]),
 	.procs			= nfs3_acl_procedures,
+	.counts			= nfs3_acl_counts,
 };
 #endif  /* CONFIG_NFS_V3_ACL */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 797f3ce752861..40cf5529e65ff 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7661,10 +7661,12 @@ struct rpc_procinfo	nfs4_procedures[] = {
 #endif /* CONFIG_NFS_V4_2 */
 };
 
+static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
 const struct rpc_version nfs_version4 = {
 	.number			= 4,
 	.nrprocs		= ARRAY_SIZE(nfs4_procedures),
-	.procs			= nfs4_procedures
+	.procs			= nfs4_procedures,
+	.counts			= nfs_version4_counts,
 };
 
 /*
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a2bedbd05b2bf..afa961fe073c3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -705,6 +705,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
 };
 
+static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
 static struct rpc_version nfs_cb_version4 = {
 /*
  * Note on the callback rpc program version number: despite language in rfc
@@ -715,7 +716,8 @@ static struct rpc_version nfs_cb_version4 = {
  */
 	.number			= 1,
 	.nrprocs		= ARRAY_SIZE(nfs4_cb_procedures),
-	.procs			= nfs4_cb_procedures
+	.procs			= nfs4_cb_procedures,
+	.counts			= nfs4_cb_counts,
 };
 
 static const struct rpc_version *nfs_cb_version[] = {
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6095ecba0ddee..c75ba37151fee 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -88,6 +88,7 @@ struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
 	struct rpc_procinfo *	procs;		/* procedure array */
+	unsigned int		*counts;	/* call counts */
 };
 
 /*
@@ -99,7 +100,6 @@ struct rpc_procinfo {
 	kxdrdproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
-	unsigned int		p_count;	/* call count */
 	unsigned int		p_timer;	/* Which RTT timer to use */
 	u32			p_statidx;	/* Which procedure to account */
 	const char *		p_name;		/* name of procedure */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index a80b8e6074781..f8729b6476052 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -364,11 +364,12 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data)
 /*
  * Initialization stuff
  */
-
+static unsigned int gssp_version1_counts[ARRAY_SIZE(gssp_procedures)];
 static const struct rpc_version gssp_version1 = {
 	.number		= GSSPROXY_VERS_1,
 	.nrprocs	= ARRAY_SIZE(gssp_procedures),
 	.procs		= gssp_procedures,
+	.counts		= gssp_version1_counts,
 };
 
 static const struct rpc_version *gssp_version[] = {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 964d5c4a1b609..f2d1f971247b1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1517,14 +1517,16 @@ static void
 call_start(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	int idx = task->tk_msg.rpc_proc->p_statidx;
 
 	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
 			clnt->cl_program->name, clnt->cl_vers,
 			rpc_proc_name(task),
 			(RPC_IS_ASYNC(task) ? "async" : "sync"));
 
-	/* Increment call count */
-	task->tk_msg.rpc_proc->p_count++;
+	/* Increment call count (version might not be valid for ping) */
+	if (clnt->cl_program->version[clnt->cl_vers])
+		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 }
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index f67b9e2897b49..9d47b9d3bbeed 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -1117,22 +1117,28 @@ static const struct rpcb_info rpcb_next_version6[] = {
 	},
 };
 
+static unsigned int rpcb_version2_counts[ARRAY_SIZE(rpcb_procedures2)];
 static const struct rpc_version rpcb_version2 = {
 	.number		= RPCBVERS_2,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures2),
-	.procs		= rpcb_procedures2
+	.procs		= rpcb_procedures2,
+	.counts		= rpcb_version2_counts,
 };
 
+static unsigned int rpcb_version3_counts[ARRAY_SIZE(rpcb_procedures3)];
 static const struct rpc_version rpcb_version3 = {
 	.number		= RPCBVERS_3,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures3),
-	.procs		= rpcb_procedures3
+	.procs		= rpcb_procedures3,
+	.counts		= rpcb_version3_counts,
 };
 
+static unsigned int rpcb_version4_counts[ARRAY_SIZE(rpcb_procedures4)];
 static const struct rpc_version rpcb_version4 = {
 	.number		= RPCBVERS_4,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures4),
-	.procs		= rpcb_procedures4
+	.procs		= rpcb_procedures4,
+	.counts		= rpcb_version4_counts,
 };
 
 static const struct rpc_version *rpcb_version[] = {
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index caeb01ad2b5ae..91c84d18bf9a5 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -55,8 +55,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
 		seq_printf(seq, "proc%u %u",
 					vers->number, vers->nrprocs);
 		for (j = 0; j < vers->nrprocs; j++)
-			seq_printf(seq, " %u",
-					vers->procs[j].p_count);
+			seq_printf(seq, " %u", vers->counts[j]);
 		seq_putc(seq, '\n');
 	}
 	return 0;
-- 
GitLab


From f700c72dd2f1f886f56788436b540aab95903c3f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 15:51:24 +0200
Subject: [PATCH 0039/1958] nfs: use ARRAY_SIZE() in the nfsacl_version3
 declaration

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfs/nfs3xdr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a017ec5c7a9d6..85ff1187e637f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2611,8 +2611,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 static unsigned int nfs3_acl_counts[ARRAY_SIZE(nfs3_acl_procedures)];
 const struct rpc_version nfsacl_version3 = {
 	.number			= 3,
-	.nrprocs		= sizeof(nfs3_acl_procedures)/
-				  sizeof(nfs3_acl_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs3_acl_procedures),
 	.procs			= nfs3_acl_procedures,
 	.counts			= nfs3_acl_counts,
 };
-- 
GitLab


From 499b4988109e91b76f231fb1b4f1e53ec3260686 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 15:36:49 +0200
Subject: [PATCH 0040/1958] sunrpc: mark all struct rpc_procinfo instances as
 const

struct rpc_procinfo contains function pointers, and marking it as
constant avoids it being able to be used as an attach vector for
code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clnt4xdr.c                  |  2 +-
 fs/lockd/clntxdr.c                   |  2 +-
 fs/lockd/mon.c                       |  2 +-
 fs/nfs/internal.h                    |  6 +++---
 fs/nfs/mount_clnt.c                  |  4 ++--
 fs/nfs/nfs2xdr.c                     |  2 +-
 fs/nfs/nfs3xdr.c                     |  4 ++--
 fs/nfs/nfs4_fs.h                     |  2 +-
 fs/nfs/nfs4xdr.c                     |  2 +-
 fs/nfsd/nfs4callback.c               |  2 +-
 include/linux/sunrpc/clnt.h          |  4 ++--
 include/linux/sunrpc/sched.h         |  2 +-
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  2 +-
 net/sunrpc/clnt.c                    |  4 ++--
 net/sunrpc/rpcb_clnt.c               | 19 ++++++++++---------
 net/sunrpc/stats.c                   |  2 +-
 16 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 7c255d1d7c644..c349fc0f9b804 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -584,7 +584,7 @@ static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm4_procedures[] = {
+static const struct rpc_procinfo nlm4_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 39500c5743a56..3b4724a6c4eeb 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -582,7 +582,7 @@ static int nlm_xdr_dec_res(struct rpc_rqst *req,
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm_procedures[] = {
+static const struct rpc_procinfo nlm_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index fe4ec82764fe1..9d8166c39c549 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -531,7 +531,7 @@ static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 #define SM_monres_sz	2
 #define SM_unmonres_sz	1
 
-static struct rpc_procinfo	nsm_procedures[] = {
+static const struct rpc_procinfo nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_encode	= nsm_xdr_enc_mon,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e9b4c3320e371..c212549243890 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -270,12 +270,12 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 }
 
 /* nfs2xdr.c */
-extern struct rpc_procinfo nfs_procedures[];
+extern const struct rpc_procinfo nfs_procedures[];
 extern int nfs2_decode_dirent(struct xdr_stream *,
 				struct nfs_entry *, int);
 
 /* nfs3xdr.c */
-extern struct rpc_procinfo nfs3_procedures[];
+extern const struct rpc_procinfo nfs3_procedures[];
 extern int nfs3_decode_dirent(struct xdr_stream *,
 				struct nfs_entry *, int);
 
@@ -292,7 +292,7 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
 
 /* nfs4proc.c */
 #if IS_ENABLED(CONFIG_NFS_V4)
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 #endif
 
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d25914aa8bf91..3efe946672beb 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -466,7 +466,7 @@ static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 	return decode_auth_flavors(xdr, res);
 }
 
-static struct rpc_procinfo mnt_procedures[] = {
+static const struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
@@ -485,7 +485,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 };
 
-static struct rpc_procinfo mnt3_procedures[] = {
+static const struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 16b4526299c11..c8a7e98c1371a 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1152,7 +1152,7 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 	.p_statidx  =  NFSPROC_##proc,					\
 	.p_name     =  #proc,						\
 	}
-struct rpc_procinfo	nfs_procedures[] = {
+const struct rpc_procinfo nfs_procedures[] = {
 	PROC(GETATTR,	fhandle,	attrstat,	1),
 	PROC(SETATTR,	sattrargs,	attrstat,	0),
 	PROC(LOOKUP,	diropargs,	diropres,	2),
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 85ff1187e637f..670eddb3ae369 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2554,7 +2554,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 	.p_name      = #proc,						\
 	}
 
-struct rpc_procinfo	nfs3_procedures[] = {
+const struct rpc_procinfo nfs3_procedures[] = {
 	PROC(GETATTR,		getattr,	getattr,	1),
 	PROC(SETATTR,		setattr,	setattr,	0),
 	PROC(LOOKUP,		lookup,		lookup,		2),
@@ -2587,7 +2587,7 @@ const struct rpc_version nfs_version3 = {
 };
 
 #ifdef CONFIG_NFS_V3_ACL
-static struct rpc_procinfo	nfs3_acl_procedures[] = {
+static const struct rpc_procinfo nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
 		.p_encode = nfs3_xdr_enc_getacl3args,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index af285cc27ccf8..9b0cf3872722b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -493,7 +493,7 @@ static inline void nfs4_unregister_sysctl(void)
 #endif
 
 /* nfs4xdr.c */
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 40cf5529e65ff..0f1f290c97cd0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7594,7 +7594,7 @@ nfs4_stat_to_errno(int stat)
 	.p_name = #proc,	\
 }
 
-struct rpc_procinfo	nfs4_procedures[] = {
+const struct rpc_procinfo nfs4_procedures[] = {
 	PROC(READ,		enc_read,		dec_read),
 	PROC(WRITE,		enc_write,		dec_write),
 	PROC(COMMIT,		enc_commit,		dec_commit),
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index afa961fe073c3..ac10f78c0fb36 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -696,7 +696,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 	.p_name    = #proc,						\
 }
 
-static struct rpc_procinfo nfs4_cb_procedures[] = {
+static const struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
 	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
 #ifdef CONFIG_NFSD_PNFS
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c75ba37151fee..55ef67bea06b9 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -39,7 +39,7 @@ struct rpc_clnt {
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt __rcu *	cl_xprt;	/* transport */
-	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
+	const struct rpc_procinfo *cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
 				cl_vers,	/* RPC version number */
 				cl_maxproc;	/* max procedure number */
@@ -87,7 +87,7 @@ struct rpc_program {
 struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
-	struct rpc_procinfo *	procs;		/* procedure array */
+	const struct rpc_procinfo *procs;	/* procedure array */
 	unsigned int		*counts;	/* call counts */
 };
 
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7ba040c797ec4..ed60253abd0a3 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -22,7 +22,7 @@
  */
 struct rpc_procinfo;
 struct rpc_message {
-	struct rpc_procinfo *	rpc_proc;	/* Procedure information */
+	const struct rpc_procinfo *rpc_proc;	/* Procedure information */
 	void *			rpc_argp;	/* Arguments */
 	void *			rpc_resp;	/* Result */
 	struct rpc_cred *	rpc_cred;	/* Credentials */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index f8729b6476052..46b295e4f2b82 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -63,7 +63,7 @@ enum {
 	.p_name   = #proc,				\
 }
 
-static struct rpc_procinfo gssp_procedures[] = {
+static const struct rpc_procinfo gssp_procedures[] = {
 	PROC(INDICATE_MECHS, indicate_mechs),
         PROC(GET_CALL_CONTEXT, get_call_context),
         PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f2d1f971247b1..2e49d1f892b79 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1674,7 +1674,7 @@ call_allocate(struct rpc_task *task)
 	unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
-	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
 	int status;
 
 	dprint_status(task);
@@ -2489,7 +2489,7 @@ static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	return 0;
 }
 
-static struct rpc_procinfo rpcproc_null = {
+static const struct rpc_procinfo rpcproc_null = {
 	.p_encode = rpcproc_encode_null,
 	.p_decode = rpcproc_decode_null,
 };
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 9d47b9d3bbeed..ea0676f199c85 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -128,13 +128,13 @@ struct rpcbind_args {
 	int			r_status;
 };
 
-static struct rpc_procinfo rpcb_procedures2[];
-static struct rpc_procinfo rpcb_procedures3[];
-static struct rpc_procinfo rpcb_procedures4[];
+static const struct rpc_procinfo rpcb_procedures2[];
+static const struct rpc_procinfo rpcb_procedures3[];
+static const struct rpc_procinfo rpcb_procedures4[];
 
 struct rpcb_info {
 	u32			rpc_vers;
-	struct rpc_procinfo *	rpc_proc;
+	const struct rpc_procinfo *rpc_proc;
 };
 
 static const struct rpcb_info rpcb_next_version[];
@@ -620,7 +620,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version,
 	return -EAFNOSUPPORT;
 }
 
-static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt,
+		struct rpcbind_args *map, const struct rpc_procinfo *proc)
 {
 	struct rpc_message msg = {
 		.rpc_proc = proc,
@@ -671,7 +672,7 @@ static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
 void rpcb_getport_async(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt;
-	struct rpc_procinfo *proc;
+	const struct rpc_procinfo *proc;
 	u32 bind_version;
 	struct rpc_xprt *xprt;
 	struct rpc_clnt	*rpcb_clnt;
@@ -994,7 +995,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * since the Linux kernel RPC code requires only these.
  */
 
-static struct rpc_procinfo rpcb_procedures2[] = {
+static const struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_mapping,
@@ -1027,7 +1028,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures3[] = {
+static const struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
@@ -1060,7 +1061,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures4[] = {
+static const struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 91c84d18bf9a5..8b6c35ae1d571 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -191,7 +191,7 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
 EXPORT_SYMBOL_GPL(rpc_count_iostats);
 
 static void _print_name(struct seq_file *seq, unsigned int op,
-			struct rpc_procinfo *procs)
+			const struct rpc_procinfo *procs)
 {
 	if (procs[op].p_name)
 		seq_printf(seq, "\t%12s: ", procs[op].p_name);
-- 
GitLab


From 02be49f6b7afe3c4acb1bfdc76c0abe3f3a40f90 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 15:58:06 +0200
Subject: [PATCH 0041/1958] nfsd4: const-ify nfs_cb_version4

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4callback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ac10f78c0fb36..b45083c0f9ae8 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -706,7 +706,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
 };
 
 static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
-static struct rpc_version nfs_cb_version4 = {
+static const struct rpc_version nfs_cb_version4 = {
 /*
  * Note on the callback rpc program version number: despite language in rfc
  * 5661 section 18.36.3 requiring servers to use 4 in this field, the
-- 
GitLab


From f7235b6bc56168302e1de8101a04b3d3061a0c82 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 17:59:13 +0200
Subject: [PATCH 0042/1958] nfsd: use named initializers in PROC()

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs2acl.c  | 22 +++++++++++-----------
 fs/nfsd/nfs3acl.c  | 22 +++++++++++-----------
 fs/nfsd/nfs4proc.c | 10 +++++-----
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 838f90f3f890a..12933d07204c4 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -345,17 +345,17 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
 #define nfsd3_voidres		nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsacld_proc_##name,		\
-   (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
-   (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
-   (kxdrproc_t) nfsaclsvc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
+#define PROC(name, argt, rest, relt, cache, respsize)			\
+{									\
+	.pc_func	= (svc_procfunc) nfsacld_proc_##name,		\
+	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
+	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
+	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
+	.pc_cachetype	= cache,					\
+	.pc_xdrressize	= respsize,					\
+}
 
 #define ST 1		/* status*/
 #define AT 21		/* attributes */
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index dcb5f79076c0c..db988a229b3a3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -237,17 +237,17 @@ static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
 #define nfsd3_voidres			nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsd3_proc_##name,		\
-   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
-   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-   (kxdrproc_t) nfs3svc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
+#define PROC(name, argt, rest, relt, cache, respsize)			\
+{									\
+	.pc_func	= (svc_procfunc) nfsd3_proc_##name,		\
+	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
+	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
+	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
+	.pc_cachetype	= cache,					\
+	.pc_xdrressize	= respsize,					\
+}
 
 #define ST 1		/* status*/
 #define AT 21		/* attributes */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c453a1998e003..695e17605b67c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1673,10 +1673,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
  * COMPOUND call.
  */
 static __be32
-nfsd4_proc_compound(struct svc_rqst *rqstp,
-		    struct nfsd4_compoundargs *args,
-		    struct nfsd4_compoundres *resp)
+nfsd4_proc_compound(struct svc_rqst *rqstp, void *arg, void *res)
 {
+	struct nfsd4_compoundargs *args = arg;
+	struct nfsd4_compoundres *resp = res;
 	struct nfsd4_op	*op;
 	struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
@@ -2518,7 +2518,7 @@ struct nfsd4_voidargs { int dummy; };
 
 static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
-		.pc_func = (svc_procfunc) nfsd4_proc_null,
+		.pc_func = nfsd4_proc_null,
 		.pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd4_voidargs),
 		.pc_ressize = sizeof(struct nfsd4_voidres),
@@ -2526,7 +2526,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 		.pc_xdrressize = 1,
 	},
 	[NFSPROC4_COMPOUND] = {
-		.pc_func = (svc_procfunc) nfsd4_proc_compound,
+		.pc_func = nfsd4_proc_compound,
 		.pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
 		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
-- 
GitLab


From 9482c9c15c29deb5e49ff475710f94dea0842328 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 18:03:06 +0200
Subject: [PATCH 0043/1958] nfsd: remove the unused PROC() macro in nfs3proc.c

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs3proc.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 045c9081eabeb..24e0351907707 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -647,18 +647,6 @@ nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
 #define nfsd3_voidres			nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsd3_proc_##name,		\
-   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
-   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-   (kxdrproc_t) nfs3svc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
-
 #define ST 1		/* status*/
 #define FH 17		/* filehandle with length */
 #define AT 21		/* attributes */
-- 
GitLab


From a6beb73272b4c0108e41bc7c7b5a447ae6c92863 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 17:35:49 +0200
Subject: [PATCH 0044/1958] sunrpc: properly type pc_func callbacks

Drop the argp and resp arguments as they can trivially be derived from
the rqstp argument.  With that all functions now have the same prototype,
and we can remove the unsafe casting to svc_procfunc as well as the
svc_procfunc typedef itself.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c        | 118 ++++++++++++++++++-----------
 fs/lockd/svcproc.c         | 118 ++++++++++++++++++-----------
 fs/nfs/callback_xdr.c      |   7 +-
 fs/nfsd/nfs2acl.c          |  25 +++---
 fs/nfsd/nfs3acl.c          |  15 ++--
 fs/nfsd/nfs3proc.c         | 151 +++++++++++++++++++++----------------
 fs/nfsd/nfs4proc.c         |   9 ++-
 fs/nfsd/nfsproc.c          | 104 +++++++++++++------------
 fs/nfsd/nfssvc.c           |   2 +-
 include/linux/sunrpc/svc.h |   4 +-
 net/sunrpc/svc.c           |   2 +-
 11 files changed, 328 insertions(+), 227 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 09c576f26c7b7..3e4cba029d3d6 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -62,7 +62,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NULL: Test for presence of service
  */
 static __be32
-nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlm4svc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -72,9 +72,9 @@ nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -99,9 +99,15 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlm4svc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlm4svc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -141,9 +147,15 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlm4svc_proc_lock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -170,13 +182,19 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -203,14 +221,21 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -219,6 +244,12 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -243,9 +274,10 @@ static const struct rpc_call_ops nlm4svc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *,  struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -261,7 +293,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -273,48 +305,44 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, __nlm4svc_proc_test);
 }
 
-static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, __nlm4svc_proc_lock);
 }
 
-static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, __nlm4svc_proc_cancel);
 }
 
-static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlm4svc_proc_unlock);
 }
 
-static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, __nlm4svc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlm4svc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -345,9 +373,10 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -378,22 +407,23 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlm4svc_proc_lock(rqstp, argp, resp);
+	return nlm4svc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlm4svc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -409,9 +439,10 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlm4svc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -429,9 +460,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlm4svc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
         if (!nlmsvc_ops)
                 return rpc_success;
 
@@ -463,7 +495,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlm4svc_proc_##name,	\
+ { .pc_func	= nlm4svc_proc_##name,	\
    .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index fb26b9f522e74..3add50661fab6 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -92,7 +92,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NULL: Test for presence of service
  */
 static __be32
-nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlmsvc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -102,9 +102,9 @@ nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -130,9 +130,15 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlmsvc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlmsvc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -172,9 +178,15 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlmsvc_proc_lock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -202,13 +214,19 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -236,14 +254,21 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -252,6 +277,12 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -284,9 +315,10 @@ static const struct rpc_call_ops nlmsvc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *, struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -302,7 +334,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -314,50 +346,46 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, __nlmsvc_proc_test);
 }
 
-static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, __nlmsvc_proc_lock);
 }
 
-static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, __nlmsvc_proc_cancel);
 }
 
 static __be32
-nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlmsvc_proc_unlock);
 }
 
 static __be32
-nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, __nlmsvc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlmsvc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -388,9 +416,10 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -421,22 +450,23 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlmsvc_proc_lock(rqstp, argp, resp);
+	return nlmsvc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlmsvc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -452,9 +482,10 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlmsvc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -472,9 +503,10 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
 	if (!nlmsvc_ops)
 		return rpc_success;
 
@@ -505,7 +537,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlmsvc_proc_##name,	\
+ { .pc_func	= nlmsvc_proc_##name,			\
    .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
    .pc_release	= NULL,					\
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 287c02202b258..5a14bdaa5986d 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -53,7 +53,7 @@ struct callback_op {
 
 static struct callback_op callback_ops[];
 
-static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 {
 	return htonl(NFS4_OK);
 }
@@ -880,7 +880,7 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp,
 /*
  * Decode, process and encode a COMPOUND
  */
-static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 {
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
@@ -916,7 +916,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	while (status == 0 && nops != hdr_arg.nops) {
 		status = process_op(nops, rqstp, &xdr_in,
-				    argp, &xdr_out, resp, &cps);
+				    rqstp->rq_argp, &xdr_out, rqstp->rq_resp,
+				    &cps);
 		nops++;
 	}
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 12933d07204c4..4b7f84fa1fa50 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -19,7 +19,7 @@
  * NULL call.
  */
 static __be32
-nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsacld_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -27,9 +27,10 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -87,10 +88,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -141,9 +142,10 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 /*
  * Check file attributes
  */
-static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
-		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -158,9 +160,10 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 /*
  * Check file access
  */
-static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-		struct nfsd3_accessres *resp)
+static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
@@ -347,7 +350,7 @@ struct nfsd3_voidargs { int dummy; };
 
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
-	.pc_func	= (svc_procfunc) nfsacld_proc_##name,		\
+	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
 	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index db988a229b3a3..5e42004035e0a 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -18,7 +18,7 @@
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -26,9 +26,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -80,10 +81,10 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd3_attrstat *resp)
+static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -239,7 +240,7 @@ struct nfsd3_voidargs { int dummy; };
 
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
-	.pc_func	= (svc_procfunc) nfsd3_proc_##name,		\
+	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
 	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 24e0351907707..4a2bae07cfbff 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -31,7 +31,7 @@ static int	nfs3_ftypes[] = {
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -40,9 +40,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * Get a file's attributes
  */
 static __be32
-nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					   struct nfsd3_attrstat *resp)
+nfsd3_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
@@ -63,9 +64,10 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * Set a file's attributes
  */
 static __be32
-nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
-					   struct nfsd3_attrstat  *resp)
+nfsd3_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd3_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SETATTR(3)  %s\n",
@@ -81,9 +83,10 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
  * Look up a path name component
  */
 static __be32
-nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_diropres  *resp)
+nfsd3_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP(3)   %s %.*s\n",
@@ -105,9 +108,10 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Check file access
  */
 static __be32
-nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-					  struct nfsd3_accessres *resp)
+nfsd3_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: ACCESS(3)   %s 0x%x\n",
@@ -124,9 +128,10 @@ nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
-					   struct nfsd3_readlinkres *resp)
+nfsd3_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
@@ -142,9 +147,10 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
  * Read a portion of a file.
  */
 static __be32
-nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
-				        struct nfsd3_readres  *resp)
+nfsd3_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readargs *argp = rqstp->rq_argp;
+	struct nfsd3_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 	unsigned long cnt = min(argp->count, max_blocksize);
@@ -179,9 +185,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
  * Write data to a file
  */
 static __be32
-nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
-					 struct nfsd3_writeres  *resp)
+nfsd3_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd3_writeargs *argp = rqstp->rq_argp;
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -206,9 +213,10 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
  * first reports about SunOS compatibility problems start to pour in...
  */
 static __be32
-nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					  struct nfsd3_diropres   *resp)
+nfsd3_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp, *newfhp = NULL;
 	struct iattr	*attr;
 	__be32		nfserr;
@@ -243,9 +251,10 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
  * Make directory. This operation is not idempotent.
  */
 static __be32
-nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					 struct nfsd3_diropres   *resp)
+nfsd3_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR(3)    %s %.*s\n",
@@ -263,9 +272,10 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 }
 
 static __be32
-nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
-					   struct nfsd3_diropres    *resp)
+nfsd3_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_symlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
@@ -284,9 +294,10 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
  * Make socket/fifo/device.
  */
 static __be32
-nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
-					 struct nfsd3_diropres  *resp)
+nfsd3_proc_mknod(struct svc_rqst *rqstp)
 {
+	struct nfsd3_mknodargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int type;
 	dev_t	rdev = 0;
@@ -321,9 +332,10 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
  * Remove file/fifo/socket etc.
  */
 static __be32
-nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_attrstat  *resp)
+nfsd3_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE(3)   %s %.*s\n",
@@ -342,9 +354,10 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					 struct nfsd3_attrstat  *resp)
+nfsd3_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR(3)    %s %.*s\n",
@@ -359,9 +372,10 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 }
 
 static __be32
-nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
-					  struct nfsd3_renameres  *resp)
+nfsd3_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd3_renameargs *argp = rqstp->rq_argp;
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME(3)   %s %.*s ->\n",
@@ -381,9 +395,10 @@ nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 }
 
 static __be32
-nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
-					struct nfsd3_linkres  *resp)
+nfsd3_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd3_linkargs *argp = rqstp->rq_argp;
+	struct nfsd3_linkres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK(3)     %s ->\n",
@@ -404,9 +419,10 @@ nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					   struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32		nfserr;
 	int		count;
 
@@ -440,9 +456,10 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * For now, we choose to ignore the dircount parameter.
  */
 static __be32
-nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					       struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
@@ -507,9 +524,10 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * Get file system stats
  */
 static __be32
-nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsstatres *resp)
+nfsd3_proc_fsstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: FSSTAT(3)   %s\n",
@@ -524,9 +542,10 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get file system info
  */
 static __be32
-nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsinfores *resp)
+nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 
@@ -567,9 +586,10 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get pathconf info for the specified file
  */
 static __be32
-nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
-					     struct nfsd3_pathconfres *resp)
+nfsd3_proc_pathconf(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: PATHCONF(3) %s\n",
@@ -610,9 +630,10 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
  * Commit a file (range) to stable storage.
  */
 static __be32
-nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
-					   struct nfsd3_commitres  *resp)
+nfsd3_proc_commit(struct svc_rqst *rqstp)
 {
+	struct nfsd3_commitargs *argp = rqstp->rq_argp;
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: COMMIT(3)   %s %u@%Lu\n",
@@ -655,7 +676,7 @@ struct nfsd3_voidargs { int dummy; };
 
 static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_null,
+		.pc_func = nfsd3_proc_null,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
@@ -663,7 +684,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST,
 	},
 	[NFS3PROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_getattr,
+		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -673,7 +694,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFS3PROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_setattr,
+		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -683,7 +704,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_lookup,
+		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -693,7 +714,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+FH+pAT+pAT,
 	},
 	[NFS3PROC_ACCESS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_access,
+		.pc_func = nfsd3_proc_access,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -703,7 +724,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+1,
 	},
 	[NFS3PROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readlink,
+		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -713,7 +734,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
 	},
 	[NFS3PROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_read,
+		.pc_func = nfsd3_proc_read,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -723,7 +744,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
 	},
 	[NFS3PROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_write,
+		.pc_func = nfsd3_proc_write,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -733,7 +754,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC+4,
 	},
 	[NFS3PROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_create,
+		.pc_func = nfsd3_proc_create,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -743,7 +764,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mkdir,
+		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -753,7 +774,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_symlink,
+		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -763,7 +784,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKNOD] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mknod,
+		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -773,7 +794,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_remove,
+		.pc_func = nfsd3_proc_remove,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -783,7 +804,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rmdir,
+		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -793,7 +814,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rename,
+		.pc_func = nfsd3_proc_rename,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -803,7 +824,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC+WC,
 	},
 	[NFS3PROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_link,
+		.pc_func = nfsd3_proc_link,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -813,7 +834,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+WC,
 	},
 	[NFS3PROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdir,
+		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -822,7 +843,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_READDIRPLUS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdirplus,
+		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -831,7 +852,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_FSSTAT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsstat,
+		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -840,7 +861,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+2*6+1,
 	},
 	[NFS3PROC_FSINFO] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsinfo,
+		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -849,7 +870,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+12,
 	},
 	[NFS3PROC_PATHCONF] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_pathconf,
+		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -858,7 +879,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+6,
 	},
 	[NFS3PROC_COMMIT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_commit,
+		.pc_func = nfsd3_proc_commit,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 695e17605b67c..7a0112bc3531a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1510,7 +1510,7 @@ nfsd4_layoutreturn(struct svc_rqst *rqstp,
  * NULL call.
  */
 static __be32
-nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd4_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -1524,6 +1524,7 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
 typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
+
 typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
 typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
 
@@ -1673,10 +1674,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
  * COMPOUND call.
  */
 static __be32
-nfsd4_proc_compound(struct svc_rqst *rqstp, void *arg, void *res)
+nfsd4_proc_compound(struct svc_rqst *rqstp)
 {
-	struct nfsd4_compoundargs *args = arg;
-	struct nfsd4_compoundres *resp = res;
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfsd4_op	*op;
 	struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 03a7e9da4da02..448505b939db0 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -17,7 +17,7 @@ typedef struct svc_buf	svc_buf;
 
 
 static __be32
-nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -39,9 +39,10 @@ nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					  struct nfsd_attrstat *resp)
+nfsd_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -56,9 +57,10 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
-					  struct nfsd_attrstat  *resp)
+nfsd_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct iattr *iap = &argp->attrs;
 	struct svc_fh *fhp;
 	__be32 nfserr;
@@ -122,9 +124,10 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 struct nfsd_diropres  *resp)
+nfsd_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP   %s %.*s\n",
@@ -142,9 +145,10 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
-					   struct nfsd_readlinkres *resp)
+nfsd_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
@@ -162,9 +166,10 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
-				       struct nfsd_readres  *resp)
+nfsd_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd_readargs *argp = rqstp->rq_argp;
+	struct nfsd_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READ    %s %d bytes at %d\n",
@@ -200,9 +205,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
-					struct nfsd_attrstat  *resp)
+nfsd_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd_writeargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -222,9 +228,10 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
  * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
  */
 static __be32
-nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					 struct nfsd_diropres   *resp)
+nfsd_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp = &argp->fh;
 	svc_fh		*newfhp = &resp->fh;
 	struct iattr	*attr = &argp->attrs;
@@ -377,9 +384,9 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 }
 
 static __be32
-nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 void		       *resp)
+nfsd_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE   %s %.*s\n", SVCFH_fmt(&argp->fh),
@@ -392,9 +399,9 @@ nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 }
 
 static __be32
-nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
-				  	 void		        *resp)
+nfsd_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd_renameargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME   %s %.*s -> \n",
@@ -410,9 +417,9 @@ nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 }
 
 static __be32
-nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
-				void			    *resp)
+nfsd_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd_linkargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK     %s ->\n",
@@ -430,9 +437,9 @@ nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 }
 
 static __be32
-nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
-				          void			  *resp)
+nfsd_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_symlinkargs *argp = rqstp->rq_argp;
 	struct svc_fh	newfh;
 	__be32		nfserr;
 
@@ -460,9 +467,10 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					struct nfsd_diropres   *resp)
+nfsd_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -484,9 +492,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-				 	void		      *resp)
+nfsd_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -500,9 +508,10 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
-					  struct nfsd_readdirres  *resp)
+nfsd_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
 	int		count;
 	__be32		nfserr;
 	loff_t		offset;
@@ -540,9 +549,10 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
  * Get file system info
  */
 static __be32
-nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
-					  struct nfsd_statfsres *resp)
+nfsd_proc_statfs(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: STATFS   %s\n", SVCFH_fmt(&argp->fh));
@@ -565,7 +575,7 @@ struct nfsd_void { int dummy; };
 
 static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd_proc_null,
+		.pc_func = nfsd_proc_null,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
@@ -574,7 +584,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_getattr,
+		.pc_func = nfsd_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -584,7 +594,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_setattr,
+		.pc_func = nfsd_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -602,7 +612,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd_proc_lookup,
+		.pc_func = nfsd_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -612,7 +622,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readlink,
+		.pc_func = nfsd_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
@@ -621,7 +631,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
 	},
 	[NFSPROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd_proc_read,
+		.pc_func = nfsd_proc_read,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -639,7 +649,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_write,
+		.pc_func = nfsd_proc_write,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -649,7 +659,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_create,
+		.pc_func = nfsd_proc_create,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -659,7 +669,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_remove,
+		.pc_func = nfsd_proc_remove,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -668,7 +678,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rename,
+		.pc_func = nfsd_proc_rename,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
@@ -677,7 +687,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_link,
+		.pc_func = nfsd_proc_link,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
@@ -686,7 +696,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_symlink,
+		.pc_func = nfsd_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
@@ -695,7 +705,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_mkdir,
+		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -705,7 +715,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rmdir,
+		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -714,7 +724,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readdir,
+		.pc_func = nfsd_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
@@ -722,7 +732,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFSPROC_STATFS] = {
-		.pc_func = (svc_procfunc) nfsd_proc_statfs,
+		.pc_func = nfsd_proc_statfs,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 59979f0bbd4bf..d64895fd8d25b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -827,7 +827,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
 	/* Now call the procedure handler, and encode NFS status. */
-	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+	nfserr = proc->pc_func(rqstp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
 	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 		dprintk("nfsd: Dropping request; may be revisited later\n");
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 94631026f79c5..5c222af2db416 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -418,9 +418,9 @@ struct svc_version {
 /*
  * RPC procedure info
  */
-typedef __be32	(*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
 struct svc_procedure {
-	svc_procfunc		pc_func;	/* process the request */
+	/* process the request: */
+	__be32			(*pc_func)(struct svc_rqst *);
 	kxdrproc_t		pc_decode;	/* XDR decode args */
 	kxdrproc_t		pc_encode;	/* XDR encode result */
 	kxdrproc_t		pc_release;	/* XDR free result */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc0f5a0ecbdce..95335455ad385 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1281,7 +1281,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
 			goto err_garbage;
 
-		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+		*statp = procp->pc_func(rqstp);
 
 		/* Encode reply */
 		if (*statp == rpc_drop_reply ||
-- 
GitLab


From 8537488b5a2f33980e33f654b0a515304de2b267 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 18:48:24 +0200
Subject: [PATCH 0045/1958] sunrpc: properly type pc_release callbacks

Drop the p and resp arguments as they are always NULL or can trivially
be derived from the rqstp argument.  With that all functions now have the
same prototype, and we can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs2acl.c          | 22 +++++++++++-----------
 fs/nfsd/nfs3acl.c          |  8 ++++----
 fs/nfsd/nfs3proc.c         | 36 ++++++++++++++++++------------------
 fs/nfsd/nfs3xdr.c          | 16 ++++++++--------
 fs/nfsd/nfs4xdr.c          |  4 +---
 fs/nfsd/nfsproc.c          | 14 +++++++-------
 fs/nfsd/nfsxdr.c           |  8 ++++----
 fs/nfsd/xdr.h              |  2 +-
 fs/nfsd/xdr3.h             |  6 ++----
 fs/nfsd/xdr4.h             |  2 +-
 include/linux/sunrpc/svc.h |  3 ++-
 net/sunrpc/svc.c           |  8 ++++----
 12 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4b7f84fa1fa50..302441027f508 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -318,27 +318,27 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
-static int nfsaclsvc_release_attrstat(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
-               struct nfsd3_accessres *resp)
+static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
 {
-       fh_put(&resp->fh);
-       return 1;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+	fh_put(&resp->fh);
 }
 
 #define nfsaclsvc_decode_voidargs	NULL
@@ -353,7 +353,7 @@ struct nfsd3_voidargs { int dummy; };
 	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
-	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
+	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
 	.pc_cachetype	= cache,					\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 5e42004035e0a..56cdff4e954c5 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -223,13 +223,13 @@ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfs3svc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
 #define nfs3svc_decode_voidargs		NULL
@@ -243,7 +243,7 @@ struct nfsd3_voidargs { int dummy; };
 	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
+	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
 	.pc_cachetype	= cache,					\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 4a2bae07cfbff..f0cccc0768ceb 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -687,7 +687,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
 		.pc_cachetype = RC_NOCACHE,
@@ -697,7 +697,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -707,7 +707,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
 		.pc_cachetype = RC_NOCACHE,
@@ -717,7 +717,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_access,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
 		.pc_cachetype = RC_NOCACHE,
@@ -727,7 +727,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
@@ -737,7 +737,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_read,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
 		.pc_cachetype = RC_NOCACHE,
@@ -747,7 +747,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_write,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -757,7 +757,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_create,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -767,7 +767,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -777,7 +777,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -787,7 +787,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -797,7 +797,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_remove,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -807,7 +807,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -817,7 +817,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_rename,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -827,7 +827,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_link,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -837,7 +837,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -846,7 +846,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -882,7 +882,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_commit,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
 		.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 12feac6ee2fd4..7fd361a752873 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1114,19 +1114,19 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-int
-nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+void
+nfs3svc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-int
-nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fhandle_pair *resp)
+void
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp)
 {
+	struct nfsd3_fhandle_pair *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh1);
 	fh_put(&resp->fh2);
-	return 1;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 26780d53a6f94..5aa847bdfc630 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4543,9 +4543,8 @@ nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
         return xdr_ressize_check(rqstp, p);
 }
 
-int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
+void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 {
-	struct svc_rqst *rqstp = rq;
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 
 	if (args->ops != args->iops) {
@@ -4559,7 +4558,6 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
 		args->to_free = tb->next;
 		kfree(tb);
 	}
-	return 1;
 }
 
 int
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 448505b939db0..dc32e0f8480d6 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -587,7 +587,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_NOCACHE,
@@ -597,7 +597,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
@@ -615,7 +615,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_NOCACHE,
@@ -634,7 +634,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_read,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
 		.pc_cachetype = RC_NOCACHE,
@@ -652,7 +652,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
@@ -662,7 +662,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_create,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -708,7 +708,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6a4947a3f4fa8..de7b07ee489ec 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -550,10 +550,10 @@ nfssvc_encode_entry(void *ccdv, const char *name,
 /*
  * XDR release functions
  */
-int
-nfssvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_fhandle *resp)
+void
+nfssvc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 4f0481d638048..2c21fa843fbf6 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -164,7 +164,7 @@ int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
 
-int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+void nfssvc_release_fhandle(struct svc_rqst *);
 
 /* Helper functions for NFSv2 ACL code */
 __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 335e04aaf7db1..23fe456a223be 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -330,10 +330,8 @@ int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
 int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
 				struct nfsd3_commitres *);
 
-int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
-				struct nfsd3_fhandle_pair *);
+void nfs3svc_release_fhandle(struct svc_rqst *);
+void nfs3svc_release_fhandle2(struct svc_rqst *);
 int nfs3svc_encode_entry(void *, const char *name,
 				int namlen, loff_t offset, u64 ino,
 				unsigned int);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 8fda4abdf3b12..a158579d55a2b 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -743,7 +743,7 @@ extern __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *,
 		struct nfsd4_release_lockowner *rlockowner);
-extern int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp);
+extern void nfsd4_release_compoundargs(struct svc_rqst *rqstp);
 extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
 extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5c222af2db416..1381e13436406 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -423,7 +423,8 @@ struct svc_procedure {
 	__be32			(*pc_func)(struct svc_rqst *);
 	kxdrproc_t		pc_decode;	/* XDR decode args */
 	kxdrproc_t		pc_encode;	/* XDR encode result */
-	kxdrproc_t		pc_release;	/* XDR free result */
+	/* XDR free result: */
+	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
 	unsigned int		pc_count;	/* call count */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 95335455ad385..4611cb7adc041 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1287,12 +1287,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (*statp == rpc_drop_reply ||
 		    test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 		if (*statp == rpc_autherr_badcred) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
 		if (*statp == rpc_success &&
@@ -1307,7 +1307,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (!versp->vs_dispatch(rqstp, statp)) {
 			/* Release reply info */
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 	}
@@ -1318,7 +1318,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Release reply info */
 	if (procp->pc_release)
-		procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+		procp->pc_release(rqstp);
 
 	if (procp->pc_encode == NULL)
 		goto dropit;
-- 
GitLab


From 026fec7e7c4723b5f26a753bbcad69f68c8299d4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:01:48 +0200
Subject: [PATCH 0046/1958] sunrpc: properly type pc_decode callbacks

Drop the argp argument as it can trivially be derived from the rqstp
argument.  With that all functions now have the same prototype, and we
can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c        |  2 +-
 fs/lockd/svcproc.c         |  2 +-
 fs/lockd/xdr.c             | 29 ++++++++++-----
 fs/lockd/xdr4.c            | 29 ++++++++++-----
 fs/nfs/callback_xdr.c      |  4 +--
 fs/nfsd/nfs2acl.c          | 21 ++++++-----
 fs/nfsd/nfs3acl.c          | 11 +++---
 fs/nfsd/nfs3proc.c         | 42 +++++++++++-----------
 fs/nfsd/nfs3xdr.c          | 74 ++++++++++++++++++++++----------------
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfs4xdr.c          |  4 ++-
 fs/nfsd/nfsproc.c          | 36 +++++++++----------
 fs/nfsd/nfssvc.c           |  5 ++-
 fs/nfsd/nfsxdr.c           | 53 ++++++++++++++++-----------
 fs/nfsd/xdr.h              | 34 +++++++-----------
 fs/nfsd/xdr3.h             | 47 +++++++++---------------
 fs/nfsd/xdr4.h             |  3 +-
 include/linux/lockd/xdr.h  | 18 +++++-----
 include/linux/lockd/xdr4.h | 18 +++++-----
 include/linux/sunrpc/svc.h |  3 +-
 net/sunrpc/svc.c           |  9 +++--
 21 files changed, 237 insertions(+), 209 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 3e4cba029d3d6..804744f7528c0 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -496,7 +496,7 @@ struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlm4svc_proc_##name,	\
-   .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
+   .pc_decode	= nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3add50661fab6..204a698f7d410 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -538,7 +538,7 @@ struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlmsvc_proc_##name,			\
-   .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
+   .pc_decode	= nlmsvc_decode_##xargt,		\
    .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 5b651daad5183..b57af63fba56b 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -182,8 +182,9 @@ nlm_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -207,8 +208,9 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -227,8 +229,9 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -243,8 +246,10 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm_decode_lock(p, &argp->lock)))
 		return 0;
@@ -253,8 +258,9 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -293,8 +299,9 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -305,8 +312,10 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -316,8 +325,10 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -325,7 +336,7 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index dfa4789cd4605..46e18598a15c4 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -179,8 +179,9 @@ nlm4_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -204,8 +205,9 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -224,8 +226,9 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -240,8 +243,10 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm4_decode_lock(p, &argp->lock)))
 		return 0;
@@ -250,8 +255,9 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -290,8 +296,9 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -302,8 +309,10 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -313,8 +322,10 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp
 }
 
 int
-nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -322,7 +333,7 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 5a14bdaa5986d..23ecbf7a40c17 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -58,7 +58,7 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 	return htonl(NFS4_OK);
 }
 
-static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
@@ -998,7 +998,7 @@ static struct callback_op callback_ops[] = {
 static struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
-		.pc_decode = (kxdrproc_t)nfs4_decode_void,
+		.pc_decode = nfs4_decode_void,
 		.pc_encode = (kxdrproc_t)nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 302441027f508..bcfdaa83ee6c3 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -182,9 +182,10 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 /*
  * XDR decode functions
  */
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *argp)
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -194,9 +195,9 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *argp)
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -220,18 +221,20 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_fhandle *argp)
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessargs *argp)
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -351,7 +354,7 @@ struct nfsd3_voidargs { int dummy; };
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
 	.pc_func	= nfsacld_proc_##name,				\
-	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
 	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 56cdff4e954c5..4e68d6b5f4095 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -124,9 +124,10 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 /*
  * XDR decode functions
  */
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *args)
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *args = rqstp->rq_argp;
+
 	p = nfs3svc_decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -136,9 +137,9 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *args)
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *args = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -241,7 +242,7 @@ struct nfsd3_voidargs { int dummy; };
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
 	.pc_func	= nfsd3_proc_##name,				\
-	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+	.pc_decode	= nfs3svc_decode_##argt##args,			\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
 	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f0cccc0768ceb..ed83e8a9e7b47 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -685,7 +685,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -695,7 +695,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
+		.pc_decode = nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
@@ -705,7 +705,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -715,7 +715,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
+		.pc_decode = nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
@@ -725,7 +725,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
+		.pc_decode = nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
@@ -735,7 +735,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
+		.pc_decode = nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
@@ -745,7 +745,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
+		.pc_decode = nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
@@ -755,7 +755,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
+		.pc_decode = nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
@@ -765,7 +765,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
+		.pc_decode = nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
@@ -775,7 +775,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
+		.pc_decode = nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
@@ -785,7 +785,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
+		.pc_decode = nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
@@ -795,7 +795,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -805,7 +805,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -815,7 +815,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
+		.pc_decode = nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
@@ -825,7 +825,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
+		.pc_decode = nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
@@ -835,7 +835,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
+		.pc_decode = nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
@@ -844,7 +844,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
+		.pc_decode = nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
@@ -853,7 +853,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
@@ -862,7 +862,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
@@ -871,7 +871,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
@@ -880,7 +880,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
+		.pc_decode = nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7fd361a752873..be8bf8af9917a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -273,8 +273,10 @@ void fill_post_wcc(struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -282,9 +284,10 @@ nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *a
 }
 
 int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_sattrargs *args)
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -300,9 +303,10 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropargs *args)
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -311,9 +315,10 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessargs *args)
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -323,9 +328,9 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readargs *args)
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
@@ -356,9 +361,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeargs *args)
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeargs *args = rqstp->rq_argp;
 	unsigned int len, v, hdr, dlen;
 	u32 max_blocksize = svc_max_payload(rqstp);
 	struct kvec *head = rqstp->rq_arg.head;
@@ -416,9 +421,10 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -438,10 +444,12 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 
 	return xdr_argsize_check(rqstp, p);
 }
+
 int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh)) ||
 	    !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -451,9 +459,9 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_symlinkargs *args)
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
 	unsigned int len, avail;
 	char *old, *new;
 	struct kvec *vec;
@@ -503,9 +511,10 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_mknodargs *args)
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_mknodargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -525,9 +534,10 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameargs *args)
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -538,9 +548,10 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkargs *args)
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -552,9 +563,10 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkargs *args)
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -564,9 +576,9 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -585,9 +597,9 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
@@ -613,9 +625,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitargs *args)
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7a0112bc3531a..6ff434b77a9e1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2528,7 +2528,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
-		.pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
+		.pc_decode = nfs4svc_decode_compoundargs,
 		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5aa847bdfc630..3a7e117bd11eb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4561,8 +4561,10 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 }
 
 int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
 	if (rqstp->rq_arg.head[0].iov_len % 4) {
 		/* client is nuts */
 		dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index dc32e0f8480d6..d351d0ef6d348 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -576,7 +576,7 @@ struct nfsd_void { int dummy; };
 static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -585,7 +585,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
@@ -595,7 +595,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
+		.pc_decode = nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
@@ -604,7 +604,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_ROOT] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -613,7 +613,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -623,7 +623,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
+		.pc_decode = nfssvc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
@@ -632,7 +632,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
+		.pc_decode = nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
@@ -641,7 +641,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
 	},
 	[NFSPROC_WRITECACHE] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -650,7 +650,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
+		.pc_decode = nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
@@ -660,7 +660,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_decode = nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
@@ -670,7 +670,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -679,7 +679,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
+		.pc_decode = nfssvc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -688,7 +688,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
+		.pc_decode = nfssvc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -697,7 +697,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
+		.pc_decode = nfssvc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -706,7 +706,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_decode = nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
@@ -716,7 +716,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -725,7 +725,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
+		.pc_decode = nfssvc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
@@ -733,7 +733,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d64895fd8d25b..3e00499d7ad7e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -801,9 +801,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 */
 	rqstp->rq_cachetype = proc->pc_cachetype;
 	/* Decode arguments */
-	xdr = proc->pc_decode;
-	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
-			rqstp->rq_argp)) {
+	if (proc->pc_decode &&
+	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
 		dprintk("nfsd: failed to decode arguments!\n");
 		*statp = rpc_garbage_args;
 		return 1;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index de7b07ee489ec..0957ceebe1aa4 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -206,14 +206,16 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
  * XDR decode functions
  */
 int
-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -221,9 +223,10 @@ nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *ar
 }
 
 int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_sattrargs *args)
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -233,9 +236,10 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropargs *args)
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -244,9 +248,9 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readargs *args)
+nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	p = decode_fh(p, &args->fh);
@@ -279,9 +283,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_writeargs *args)
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_writeargs *args = rqstp->rq_argp;
 	unsigned int len, hdr, dlen;
 	struct kvec *head = rqstp->rq_arg.head;
 	int v;
@@ -335,9 +339,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_createargs *args)
+nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_createargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->fh))
 	    || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -347,9 +352,10 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_renameargs *args)
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -360,8 +366,10 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -373,9 +381,10 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 }
 
 int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_linkargs *args)
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -385,9 +394,10 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_symlinkargs *args)
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_symlinkargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->ffh))
 	    || !(p = decode_filename(p, &args->fname, &args->flen))
 	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
@@ -398,9 +408,10 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirargs *args)
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 2c21fa843fbf6..8eeb752cf6f8a 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -131,28 +131,18 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readdirargs *);
+int nfssvc_decode_void(struct svc_rqst *, __be32 *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
 int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
 int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
 int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 23fe456a223be..f79be4c42e4a5 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -269,37 +269,22 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitargs *);
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
 int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
 int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
 				struct nfsd3_attrstat *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index a158579d55a2b..2a53c12338842 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -683,8 +683,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
 int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundargs *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
 int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
 		struct nfsd4_compoundres *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d39ed1cc5fbf4..0416600844ceb 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -95,19 +95,19 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index e58c88b52ce13..951bbe31fdb83 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -23,19 +23,19 @@
 
 
-int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 1381e13436406..047f04411dd4c 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -421,7 +421,8 @@ struct svc_version {
 struct svc_procedure {
 	/* process the request: */
 	__be32			(*pc_func)(struct svc_rqst *);
-	kxdrproc_t		pc_decode;	/* XDR decode args */
+	/* XDR decode args: */
+	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
 	kxdrproc_t		pc_encode;	/* XDR encode result */
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4611cb7adc041..18024c1b9b7b3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1276,9 +1276,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Call the function that processes the request. */
 	if (!versp->vs_dispatch) {
-		/* Decode arguments */
-		xdr = procp->pc_decode;
-		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
+		/*
+		 * Decode arguments
+		 * XXX: why do we ignore the return value?
+		 */
+		if (procp->pc_decode &&
+		    !procp->pc_decode(rqstp, argv->iov_base))
 			goto err_garbage;
 
 		*statp = procp->pc_func(rqstp);
-- 
GitLab


From 63f8de37951a64cc24479eafd33085537e088075 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:42:02 +0200
Subject: [PATCH 0047/1958] sunrpc: properly type pc_encode callbacks

Drop the resp argument as it can trivially be derived from the rqstp
argument.  With that all functions now have the same prototype, and we
can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svc4proc.c        |  2 +-
 fs/lockd/svcproc.c         |  2 +-
 fs/lockd/xdr.c             | 14 +++++---
 fs/lockd/xdr4.c            | 14 +++++---
 fs/nfs/callback_xdr.c      |  6 ++--
 fs/nfsd/nfs2acl.c          | 18 +++++-----
 fs/nfsd/nfs3acl.c          | 11 +++---
 fs/nfsd/nfs3proc.c         | 44 +++++++++++------------
 fs/nfsd/nfs3xdr.c          | 74 ++++++++++++++++++++++----------------
 fs/nfsd/nfs4proc.c         |  4 +--
 fs/nfsd/nfs4xdr.c          |  5 +--
 fs/nfsd/nfsproc.c          | 36 +++++++++----------
 fs/nfsd/nfssvc.c           |  5 +--
 fs/nfsd/nfsxdr.c           | 31 +++++++++-------
 fs/nfsd/xdr.h              | 14 ++++----
 fs/nfsd/xdr3.h             | 45 +++++++++--------------
 fs/nfsd/xdr4.h             |  5 ++-
 include/linux/lockd/xdr.h  |  8 ++---
 include/linux/lockd/xdr4.h |  8 ++---
 include/linux/sunrpc/svc.h |  3 +-
 net/sunrpc/svc.c           |  6 ++--
 21 files changed, 185 insertions(+), 170 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 804744f7528c0..fed0161557917 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -497,7 +497,7 @@ struct nlm_void			{ int dummy; };
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlm4svc_proc_##name,	\
    .pc_decode	= nlm4svc_decode_##xargt,	\
-   .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
+   .pc_encode	= nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 204a698f7d410..14648b051eba0 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -539,7 +539,7 @@ struct nlm_void			{ int dummy; };
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlmsvc_proc_##name,			\
    .pc_decode	= nlmsvc_decode_##xargt,		\
-   .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
+   .pc_encode	= nlmsvc_encode_##xrest,		\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b57af63fba56b..442bbd0b0b293 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -200,8 +200,10 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
@@ -280,8 +282,10 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -290,8 +294,10 @@ nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -342,7 +348,7 @@ nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 46e18598a15c4..2a0cd5679c49d 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -197,8 +197,10 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
@@ -277,8 +279,10 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -287,8 +291,10 @@ nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -339,7 +345,7 @@ nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 23ecbf7a40c17..acf75dc63e14e 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -63,7 +63,7 @@ static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -999,12 +999,12 @@ static struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
 		.pc_decode = nfs4_decode_void,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
 	[CB_COMPOUND] = {
 		.pc_func = nfs4_callback_compound,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_argsize = 256,
 		.pc_ressize = 256,
 		.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index bcfdaa83ee6c3..fc6b179c8fff7 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -251,15 +251,15 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
  * There must be an encoding function for void results so svc_process
  * will work properly.
  */
-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct inode *inode;
 	struct kvec *head = rqstp->rq_res.head;
@@ -302,17 +302,19 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessres *resp)
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->access);
 	return xdr_ressize_check(rqstp, p);
@@ -355,7 +357,7 @@ struct nfsd3_voidargs { int dummy; };
 {									\
 	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
-	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+	.pc_encode	= nfsaclsvc_encode_##rest##res,			\
 	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 4e68d6b5f4095..9437b758cbfdb 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -168,9 +168,9 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
  */
 
 /* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
@@ -213,9 +213,10 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 /* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_attrstat *resp)
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
 
 	return xdr_ressize_check(rqstp, p);
@@ -243,7 +244,7 @@ struct nfsd3_voidargs { int dummy; };
 {									\
 	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= nfs3svc_decode_##argt##args,			\
-	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+	.pc_encode	= nfs3svc_encode_##rest##res,			\
 	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index ed83e8a9e7b47..17c90c41a3a68 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -677,7 +677,7 @@ struct nfsd3_voidargs { int dummy; };
 static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
+		.pc_encode = nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
 		.pc_cachetype = RC_NOCACHE,
@@ -686,7 +686,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
+		.pc_encode = nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
@@ -696,7 +696,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = nfs3svc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -706,7 +706,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
+		.pc_encode = nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
@@ -716,7 +716,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
 		.pc_decode = nfs3svc_decode_accessargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
+		.pc_encode = nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
@@ -726,7 +726,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = nfs3svc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
+		.pc_encode = nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
@@ -736,7 +736,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
 		.pc_decode = nfs3svc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
+		.pc_encode = nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
@@ -746,7 +746,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
 		.pc_decode = nfs3svc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
+		.pc_encode = nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
@@ -756,7 +756,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
 		.pc_decode = nfs3svc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -766,7 +766,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = nfs3svc_decode_mkdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -776,7 +776,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = nfs3svc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -786,7 +786,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = nfs3svc_decode_mknodargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -796,7 +796,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -806,7 +806,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -816,7 +816,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
 		.pc_decode = nfs3svc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
+		.pc_encode = nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
@@ -826,7 +826,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
 		.pc_decode = nfs3svc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
+		.pc_encode = nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
@@ -836,7 +836,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = nfs3svc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
@@ -845,7 +845,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = nfs3svc_decode_readdirplusargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
@@ -854,7 +854,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
+		.pc_encode = nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
 		.pc_cachetype = RC_NOCACHE,
@@ -863,7 +863,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
+		.pc_encode = nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
 		.pc_cachetype = RC_NOCACHE,
@@ -872,7 +872,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
+		.pc_encode = nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
 		.pc_cachetype = RC_NOCACHE,
@@ -881,7 +881,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
 		.pc_decode = nfs3svc_decode_commitargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
+		.pc_encode = nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index be8bf8af9917a..349e355edc737 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -645,16 +645,17 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
  * will work properly.
  */
 int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETATTR */
 int
-nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		lease_get_mtime(d_inode(resp->fh.fh_dentry),
 				&resp->stat.mtime);
@@ -665,18 +666,20 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 
 /* SETATTR, REMOVE, RMDIR */
 int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* LOOKUP */
 int
-nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		p = encode_fh(p, &resp->fh);
 		p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -687,9 +690,10 @@ nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 
 /* ACCESS */
 int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessres *resp)
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0)
 		*p++ = htonl(resp->access);
@@ -698,9 +702,10 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READLINK */
 int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkres *resp)
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->len);
@@ -719,9 +724,10 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READ */
 int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readres *resp)
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->count);
@@ -743,9 +749,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 
 /* WRITE */
 int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeres *resp)
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -760,9 +766,10 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
 int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		*p++ = xdr_one;
 		p = encode_fh(p, &resp->fh);
@@ -774,9 +781,10 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
 
 /* RENAME */
 int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameres *resp)
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->ffh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -784,9 +792,10 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
 
 /* LINK */
 int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkres *resp)
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -794,9 +803,10 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READDIR */
 int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirres *resp)
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 
 	if (resp->status == 0) {
@@ -1044,9 +1054,9 @@ nfs3svc_encode_entry_plus(void *cd, const char *name,
 
 /* FSSTAT */
 int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsstatres *resp)
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	struct kstatfs	*s = &resp->stats;
 	u64		bs = s->f_bsize;
 
@@ -1066,9 +1076,10 @@ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
 
 /* FSINFO */
 int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsinfores *resp)
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1090,9 +1101,10 @@ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
 
 /* PATHCONF */
 int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_pathconfres *resp)
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1109,9 +1121,9 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
 
 /* COMMIT */
 int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitres *resp)
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6ff434b77a9e1..ad0622efae4ed 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2520,7 +2520,7 @@ struct nfsd4_voidargs { int dummy; };
 static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
+		.pc_encode = nfs4svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd4_voidargs),
 		.pc_ressize = sizeof(struct nfsd4_voidres),
 		.pc_cachetype = RC_NOCACHE,
@@ -2529,7 +2529,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
 		.pc_decode = nfs4svc_decode_compoundargs,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
+		.pc_encode = nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
 		.pc_release = nfsd4_release_compoundargs,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3a7e117bd11eb..54e212e3541eb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4538,7 +4538,7 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
 }
 
 int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
         return xdr_ressize_check(rqstp, p);
 }
@@ -4584,11 +4584,12 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp)
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
 {
 	/*
 	 * All that remains is to write the tag and operation count...
 	 */
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct xdr_buf *buf = resp->xdr.buf;
 
 	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index d351d0ef6d348..0ef88d0e67d9d 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -577,7 +577,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -586,7 +586,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
 		.pc_decode = nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -596,7 +596,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = nfssvc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -605,7 +605,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_ROOT] = {
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -614,7 +614,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -624,7 +624,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
 		.pc_decode = nfssvc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
+		.pc_encode = nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
@@ -633,7 +633,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
 		.pc_decode = nfssvc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
+		.pc_encode = nfssvc_encode_readres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
@@ -642,7 +642,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_WRITECACHE] = {
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -651,7 +651,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = nfssvc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -661,7 +661,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
 		.pc_decode = nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -671,7 +671,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -680,7 +680,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
 		.pc_decode = nfssvc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -689,7 +689,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
 		.pc_decode = nfssvc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -698,7 +698,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
 		.pc_decode = nfssvc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -707,7 +707,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -717,7 +717,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -726,7 +726,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
 		.pc_decode = nfssvc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
+		.pc_encode = nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -734,7 +734,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
 		.pc_decode = nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
+		.pc_encode = nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
 		.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 3e00499d7ad7e..5552336641246 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -782,7 +782,6 @@ int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	struct svc_procedure	*proc;
-	kxdrproc_t		xdr;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
@@ -841,9 +840,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 * For NFSv2, additional info is never returned in case of an error.
 	 */
 	if (!(nfserr && rqstp->rq_vers == 2)) {
-		xdr = proc->pc_encode;
-		if (xdr && !xdr(rqstp, nfserrp,
-				rqstp->rq_resp)) {
+		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
 			/* Failed to encode result. Release cache entry */
 			dprintk("nfsd: failed to encode result!\n");
 			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 0957ceebe1aa4..bb1998c5ae610 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -429,32 +429,35 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
  * XDR encode functions
  */
 int
-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_attrstat *resp)
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropres *resp)
+nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropres *resp = rqstp->rq_resp;
+
 	p = encode_fh(p, &resp->fh);
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readlinkres *resp)
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
+
 	*p++ = htonl(resp->len);
 	xdr_ressize_check(rqstp, p);
 	rqstp->rq_res.page_len = resp->len;
@@ -468,9 +471,10 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readres *resp)
+nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readres *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->count);
 	xdr_ressize_check(rqstp, p);
@@ -487,9 +491,10 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirres *resp)
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
+
 	xdr_ressize_check(rqstp, p);
 	p = resp->buffer;
 	*p++ = 0;			/* no more entries */
@@ -500,9 +505,9 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_statfsres *resp)
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	struct kstatfs	*stat = &resp->stats;
 
 	*p++ = htonl(NFSSVC_MAXBLKSIZE_V2);	/* max transfer size */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 8eeb752cf6f8a..457ce45e5084f 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -143,13 +143,13 @@ int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
 
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index f79be4c42e4a5..80d7da620e912 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -285,35 +285,22 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
-				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitres *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
 
 void nfs3svc_release_fhandle(struct svc_rqst *);
 void nfs3svc_release_fhandle2(struct svc_rqst *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2a53c12338842..eb7f9239304ff 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -682,10 +682,9 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
 int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundres *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 0416600844ceb..7acbecc21a401 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -96,16 +96,16 @@ struct nlm_reboot {
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
 int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlmsvc_encode_void(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 951bbe31fdb83..bf16456092254 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -24,16 +24,16 @@
 
 
 int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlm4svc_encode_void(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 047f04411dd4c..6cfe41db7f31b 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -423,7 +423,8 @@ struct svc_procedure {
 	__be32			(*pc_func)(struct svc_rqst *);
 	/* XDR decode args: */
 	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
-	kxdrproc_t		pc_encode;	/* XDR encode result */
+	/* XDR encode result: */
+	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 18024c1b9b7b3..aa643a29fdc6f 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1154,7 +1154,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	struct svc_version	*versp = NULL;	/* compiler food */
 	struct svc_procedure	*procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
-	kxdrproc_t		xdr;
 	__be32			*statp;
 	u32			prog, vers, proc;
 	__be32			auth_stat, rpc_stat;
@@ -1298,9 +1297,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
-		if (*statp == rpc_success &&
-		    (xdr = procp->pc_encode) &&
-		    !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
+		if (*statp == rpc_success && procp->pc_encode &&
+		    !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
 			dprintk("svc: failed to encode reply\n");
 			/* serv->sv_stats->rpcsystemerr++; */
 			*statp = rpc_system_err;
-- 
GitLab


From 35f297e5370bd511a171f7de0c5a31ee661f2e7e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:56:10 +0200
Subject: [PATCH 0048/1958] sunrpc: remove kxdrproc_t

Remove the now unused typedef.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/sunrpc/xdr.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index ed0fbf0d8d0f5..261b48a2701d2 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -34,13 +34,6 @@ struct xdr_netobj {
 	u8 *			data;
 };
 
-/*
- * This is the legacy generic XDR function. rqstp is either a rpc_rqst
- * (client side) or svc_rqst pointer (server side).
- * Encode functions always assume there's enough room in the buffer.
- */
-typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
-
 /*
  * Basic structure for transmission/reception of a client XDR message.
  * Features a header (for a linear buffer containing RPC headers
-- 
GitLab


From b60e985980948f65d9833e5be7adccb6df321368 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 20:03:15 +0200
Subject: [PATCH 0049/1958] nfsd4: properly type op_set_currentstateid
 callbacks

Given the args union in struct nfsd4_op a name, and pass it to the
op_set_currentstateid callbacks instead of using unsafe function
pointer casts.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/current_stateid.h | 12 ++++++++----
 fs/nfsd/nfs4proc.c        | 12 ++++++------
 fs/nfsd/nfs4state.c       | 20 ++++++++++++--------
 fs/nfsd/xdr4.h            |  2 +-
 4 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h
index 4123551208d88..3c80a7b5302da 100644
--- a/fs/nfsd/current_stateid.h
+++ b/fs/nfsd/current_stateid.h
@@ -8,10 +8,14 @@ extern void clear_current_stateid(struct nfsd4_compound_state *cstate);
 /*
  * functions to set current state id
  */
-extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
-extern void nfsd4_set_openstateid(struct nfsd4_compound_state *, struct nfsd4_open *);
-extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *, struct nfsd4_lock *);
-extern void nfsd4_set_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
+extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_openstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_closestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
 
 /*
  * functions to consume current state id
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ad0622efae4ed..fa2c46f67f273 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1525,7 +1525,6 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
 typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
 
-typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
 typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
 
 enum nfsd4_op_flags {
@@ -1565,7 +1564,8 @@ struct nfsd4_operation {
 	/* Try to get response size before operation */
 	nfsd4op_rsize op_rsize_bop;
 	stateid_getter op_get_currentstateid;
-	stateid_setter op_set_currentstateid;
+	void (*op_set_currentstateid)(struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
 };
 
 static struct nfsd4_operation nfsd4_ops[];
@@ -2105,7 +2105,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_CLOSE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_closestateid,
+		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
 	[OP_COMMIT] = {
 		.op_func = (nfsd4op_func)nfsd4_commit,
@@ -2149,7 +2149,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCK",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_lockstateid,
+		.op_set_currentstateid = nfsd4_set_lockstateid,
 	},
 	[OP_LOCKT] = {
 		.op_func = (nfsd4op_func)nfsd4_lockt,
@@ -2185,7 +2185,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_openstateid,
+		.op_set_currentstateid = nfsd4_set_openstateid,
 	},
 	[OP_OPEN_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_open_confirm,
@@ -2199,7 +2199,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_OPEN_DOWNGRADE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_opendowngradestateid,
+		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
 	[OP_PUTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putfh,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 22002fb75a182..f9552357923e8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7103,27 +7103,31 @@ clear_current_stateid(struct nfsd4_compound_state *cstate)
  * functions to set current state id
  */
 void
-nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &odp->od_stateid);
+	put_stateid(cstate, &u->open_downgrade.od_stateid);
 }
 
 void
-nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+nfsd4_set_openstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &open->op_stateid);
+	put_stateid(cstate, &u->open.op_stateid);
 }
 
 void
-nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+nfsd4_set_closestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &close->cl_stateid);
+	put_stateid(cstate, &u->close.cl_stateid);
 }
 
 void
-nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock)
+nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &lock->lk_resp_stateid);
+	put_stateid(cstate, &u->lock.lk_resp_stateid);
 }
 
 /*
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index eb7f9239304ff..b625f4aa1061e 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -539,7 +539,7 @@ struct nfsd4_seek {
 struct nfsd4_op {
 	int					opnum;
 	__be32					status;
-	union {
+	union nfsd4_op_u {
 		struct nfsd4_access		access;
 		struct nfsd4_close		close;
 		struct nfsd4_commit		commit;
-- 
GitLab


From 57832e7bd85a450e64dba19933d83b6577632066 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 20:37:33 +0200
Subject: [PATCH 0050/1958] nfsd4: properly type op_get_currentstateid
 callbacks

Pass union nfsd4_op_u to the op_set_currentstateid callbacks instead of
using unsafe function pointer casts.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/current_stateid.h | 24 +++++++++++++++--------
 fs/nfsd/nfs4proc.c        | 21 ++++++++++----------
 fs/nfsd/nfs4state.c       | 40 +++++++++++++++++++++++----------------
 3 files changed, 50 insertions(+), 35 deletions(-)

diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h
index 3c80a7b5302da..34075cee573a0 100644
--- a/fs/nfsd/current_stateid.h
+++ b/fs/nfsd/current_stateid.h
@@ -20,13 +20,21 @@ extern void nfsd4_set_closestateid(struct nfsd4_compound_state *,
 /*
  * functions to consume current state id
  */
-extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
-extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *, struct nfsd4_delegreturn *);
-extern void nfsd4_get_freestateid(struct nfsd4_compound_state *, struct nfsd4_free_stateid *);
-extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *, struct nfsd4_setattr *);
-extern void nfsd4_get_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
-extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *, struct nfsd4_locku *);
-extern void nfsd4_get_readstateid(struct nfsd4_compound_state *, struct nfsd4_read *);
-extern void nfsd4_get_writestateid(struct nfsd4_compound_state *, struct nfsd4_write *);
+extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_freestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_closestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_readstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_writestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
 
 #endif   /* _NFSD4_CURRENT_STATE_H */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index fa2c46f67f273..4d724d3284b00 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1525,8 +1525,6 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
 typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
 
-typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
-
 enum nfsd4_op_flags {
 	ALLOWED_WITHOUT_FH = 1 << 0,	/* No current filehandle required */
 	ALLOWED_ON_ABSENT_FS = 1 << 1,	/* ops processed on absent fs */
@@ -1563,7 +1561,8 @@ struct nfsd4_operation {
 	char *op_name;
 	/* Try to get response size before operation */
 	nfsd4op_rsize op_rsize_bop;
-	stateid_getter op_get_currentstateid;
+	void (*op_get_currentstateid)(struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
 	void (*op_set_currentstateid)(struct nfsd4_compound_state *,
 			union nfsd4_op_u *);
 };
@@ -2104,7 +2103,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CLOSE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid,
+		.op_get_currentstateid = nfsd4_get_closestateid,
 		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
 	[OP_COMMIT] = {
@@ -2124,7 +2123,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DELEGRETURN",
 		.op_rsize_bop = nfsd4_only_status_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_delegreturnstateid,
+		.op_get_currentstateid = nfsd4_get_delegreturnstateid,
 	},
 	[OP_GETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_getattr,
@@ -2161,7 +2160,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCKU",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_lockustateid,
+		.op_get_currentstateid = nfsd4_get_lockustateid,
 	},
 	[OP_LOOKUP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookup,
@@ -2198,7 +2197,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_DOWNGRADE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid,
+		.op_get_currentstateid = nfsd4_get_opendowngradestateid,
 		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
 	[OP_PUTFH] = {
@@ -2226,7 +2225,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_read,
 		.op_name = "OP_READ",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
+		.op_get_currentstateid = nfsd4_get_readstateid,
 	},
 	[OP_READDIR] = {
 		.op_func = (nfsd4op_func)nfsd4_readdir,
@@ -2282,7 +2281,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_SETATTR",
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_setattrstateid,
+		.op_get_currentstateid = nfsd4_get_setattrstateid,
 	},
 	[OP_SETCLIENTID] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid,
@@ -2308,7 +2307,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_WRITE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid,
+		.op_get_currentstateid = nfsd4_get_writestateid,
 	},
 	[OP_RELEASE_LOCKOWNER] = {
 		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
@@ -2388,7 +2387,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
+		.op_get_currentstateid = nfsd4_get_freestateid,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 #ifdef CONFIG_NFSD_PNFS
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f9552357923e8..01ab21f8d34db 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7135,49 +7135,57 @@ nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate,
  */
 
 void
-nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &odp->od_stateid);
+	get_stateid(cstate, &u->open_downgrade.od_stateid);
 }
 
 void
-nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp)
+nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &drp->dr_stateid);
+	get_stateid(cstate, &u->delegreturn.dr_stateid);
 }
 
 void
-nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp)
+nfsd4_get_freestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &fsp->fr_stateid);
+	get_stateid(cstate, &u->free_stateid.fr_stateid);
 }
 
 void
-nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr)
+nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &setattr->sa_stateid);
+	get_stateid(cstate, &u->setattr.sa_stateid);
 }
 
 void
-nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+nfsd4_get_closestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &close->cl_stateid);
+	get_stateid(cstate, &u->close.cl_stateid);
 }
 
 void
-nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku)
+nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &locku->lu_stateid);
+	get_stateid(cstate, &u->locku.lu_stateid);
 }
 
 void
-nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read)
+nfsd4_get_readstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &read->rd_stateid);
+	get_stateid(cstate, &u->read.rd_stateid);
 }
 
 void
-nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write)
+nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &write->wr_stateid);
+	get_stateid(cstate, &u->write.wr_stateid);
 }
-- 
GitLab


From 1c1226385b44c1f66dafb3c56b68455ee565dbbf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 20:42:10 +0200
Subject: [PATCH 0051/1958] nfsd4: remove nfsd4op_rsize

Except for a lot of unnecessary casts this typedef only has one user,
so remove the casts and expand it in struct nfsd4_operation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4proc.c | 109 ++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 55 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4d724d3284b00..0bd4b750262f2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1523,7 +1523,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 
 typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
-typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
 
 enum nfsd4_op_flags {
 	ALLOWED_WITHOUT_FH = 1 << 0,	/* No current filehandle required */
@@ -1560,7 +1559,7 @@ struct nfsd4_operation {
 	u32 op_flags;
 	char *op_name;
 	/* Try to get response size before operation */
-	nfsd4op_rsize op_rsize_bop;
+	u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
 	void (*op_get_currentstateid)(struct nfsd4_compound_state *,
 			union nfsd4_op_u *);
 	void (*op_set_currentstateid)(struct nfsd4_compound_state *,
@@ -2096,13 +2095,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
 		.op_func = (nfsd4op_func)nfsd4_access,
 		.op_name = "OP_ACCESS",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
+		.op_rsize_bop = nfsd4_access_rsize,
 	},
 	[OP_CLOSE] = {
 		.op_func = (nfsd4op_func)nfsd4_close,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CLOSE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = nfsd4_get_closestateid,
 		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
@@ -2110,13 +2109,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_commit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_COMMIT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize,
+		.op_rsize_bop = nfsd4_commit_rsize,
 	},
 	[OP_CREATE] = {
 		.op_func = (nfsd4op_func)nfsd4_create,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID,
 		.op_name = "OP_CREATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
+		.op_rsize_bop = nfsd4_create_rsize,
 	},
 	[OP_DELEGRETURN] = {
 		.op_func = (nfsd4op_func)nfsd4_delegreturn,
@@ -2134,69 +2133,69 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_GETFH] = {
 		.op_func = (nfsd4op_func)nfsd4_getfh,
 		.op_name = "OP_GETFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
+		.op_rsize_bop = nfsd4_getfh_rsize,
 	},
 	[OP_LINK] = {
 		.op_func = (nfsd4op_func)nfsd4_link,
 		.op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING
 				| OP_CACHEME,
 		.op_name = "OP_LINK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize,
+		.op_rsize_bop = nfsd4_link_rsize,
 	},
 	[OP_LOCK] = {
 		.op_func = (nfsd4op_func)nfsd4_lock,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
+		.op_rsize_bop = nfsd4_lock_rsize,
 		.op_set_currentstateid = nfsd4_set_lockstateid,
 	},
 	[OP_LOCKT] = {
 		.op_func = (nfsd4op_func)nfsd4_lockt,
 		.op_name = "OP_LOCKT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
+		.op_rsize_bop = nfsd4_lock_rsize,
 	},
 	[OP_LOCKU] = {
 		.op_func = (nfsd4op_func)nfsd4_locku,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCKU",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = nfsd4_get_lockustateid,
 	},
 	[OP_LOOKUP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookup,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUP",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_LOOKUPP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookupp,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUPP",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_NVERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_nverify,
 		.op_name = "OP_NVERIFY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_OPEN] = {
 		.op_func = (nfsd4op_func)nfsd4_open,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
+		.op_rsize_bop = nfsd4_open_rsize,
 		.op_set_currentstateid = nfsd4_set_openstateid,
 	},
 	[OP_OPEN_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_open_confirm,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_CONFIRM",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 	},
 	[OP_OPEN_DOWNGRADE] = {
 		.op_func = (nfsd4op_func)nfsd4_open_downgrade,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_DOWNGRADE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = nfsd4_get_opendowngradestateid,
 		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
@@ -2205,56 +2204,56 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTPUBFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTPUBFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTROOTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTROOTFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_READ] = {
 		.op_func = (nfsd4op_func)nfsd4_read,
 		.op_name = "OP_READ",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
+		.op_rsize_bop = nfsd4_read_rsize,
 		.op_get_currentstateid = nfsd4_get_readstateid,
 	},
 	[OP_READDIR] = {
 		.op_func = (nfsd4op_func)nfsd4_readdir,
 		.op_name = "OP_READDIR",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
+		.op_rsize_bop = nfsd4_readdir_rsize,
 	},
 	[OP_READLINK] = {
 		.op_func = (nfsd4op_func)nfsd4_readlink,
 		.op_name = "OP_READLINK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
+		.op_rsize_bop = nfsd4_readlink_rsize,
 	},
 	[OP_REMOVE] = {
 		.op_func = (nfsd4op_func)nfsd4_remove,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_REMOVE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize,
+		.op_rsize_bop = nfsd4_remove_rsize,
 	},
 	[OP_RENAME] = {
 		.op_func = (nfsd4op_func)nfsd4_rename,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_RENAME",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize,
+		.op_rsize_bop = nfsd4_rename_rsize,
 	},
 	[OP_RENEW] = {
 		.op_func = (nfsd4op_func)nfsd4_renew,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RENEW",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 
 	},
 	[OP_RESTOREFH] = {
@@ -2262,25 +2261,25 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RESTOREFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SAVEFH] = {
 		.op_func = (nfsd4op_func)nfsd4_savefh,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_SAVEFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
+		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_SETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_setattr,
 		.op_name = "OP_SETATTR",
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
+		.op_rsize_bop = nfsd4_setattr_rsize,
 		.op_get_currentstateid = nfsd4_get_setattrstateid,
 	},
 	[OP_SETCLIENTID] = {
@@ -2288,25 +2287,25 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize,
+		.op_rsize_bop = nfsd4_setclientid_rsize,
 	},
 	[OP_SETCLIENTID_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID_CONFIRM",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_VERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_verify,
 		.op_name = "OP_VERIFY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_WRITE] = {
 		.op_func = (nfsd4op_func)nfsd4_write,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_WRITE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+		.op_rsize_bop = nfsd4_write_rsize,
 		.op_get_currentstateid = nfsd4_get_writestateid,
 	},
 	[OP_RELEASE_LOCKOWNER] = {
@@ -2314,7 +2313,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RELEASE_LOCKOWNER",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 
 	/* NFSv4.1 operations */
@@ -2323,97 +2322,97 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_EXCHANGE_ID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
+		.op_rsize_bop = nfsd4_exchange_id_rsize,
 	},
 	[OP_BACKCHANNEL_CTL] = {
 		.op_func = (nfsd4op_func)nfsd4_backchannel_ctl,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BACKCHANNEL_CTL",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_BIND_CONN_TO_SESSION] = {
 		.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BIND_CONN_TO_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize,
+		.op_rsize_bop = nfsd4_bind_conn_to_session_rsize,
 	},
 	[OP_CREATE_SESSION] = {
 		.op_func = (nfsd4op_func)nfsd4_create_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CREATE_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize,
+		.op_rsize_bop = nfsd4_create_session_rsize,
 	},
 	[OP_DESTROY_SESSION] = {
 		.op_func = (nfsd4op_func)nfsd4_destroy_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SEQUENCE] = {
 		.op_func = (nfsd4op_func)nfsd4_sequence,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
 		.op_name = "OP_SEQUENCE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_sequence_rsize,
+		.op_rsize_bop = nfsd4_sequence_rsize,
 	},
 	[OP_DESTROY_CLIENTID] = {
 		.op_func = (nfsd4op_func)nfsd4_destroy_clientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_CLIENTID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_RECLAIM_COMPLETE] = {
 		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RECLAIM_COMPLETE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO_NO_NAME] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO_NO_NAME",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
+		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_TEST_STATEID] = {
 		.op_func = (nfsd4op_func)nfsd4_test_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_TEST_STATEID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
+		.op_rsize_bop = nfsd4_test_stateid_rsize,
 	},
 	[OP_FREE_STATEID] = {
 		.op_func = (nfsd4op_func)nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
 		.op_get_currentstateid = nfsd4_get_freestateid,
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 #ifdef CONFIG_NFSD_PNFS
 	[OP_GETDEVICEINFO] = {
 		.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_GETDEVICEINFO",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
+		.op_rsize_bop = nfsd4_getdeviceinfo_rsize,
 	},
 	[OP_LAYOUTGET] = {
 		.op_func = (nfsd4op_func)nfsd4_layoutget,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTGET",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize,
+		.op_rsize_bop = nfsd4_layoutget_rsize,
 	},
 	[OP_LAYOUTCOMMIT] = {
 		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTCOMMIT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize,
+		.op_rsize_bop = nfsd4_layoutcommit_rsize,
 	},
 	[OP_LAYOUTRETURN] = {
 		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTRETURN",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize,
+		.op_rsize_bop = nfsd4_layoutreturn_rsize,
 	},
 #endif /* CONFIG_NFSD_PNFS */
 
@@ -2422,30 +2421,30 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_allocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_ALLOCATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_DEALLOCATE] = {
 		.op_func = (nfsd4op_func)nfsd4_deallocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_DEALLOCATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_CLONE] = {
 		.op_func = (nfsd4op_func)nfsd4_clone,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_CLONE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_COPY] = {
 		.op_func = (nfsd4op_func)nfsd4_copy,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_COPY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize,
+		.op_rsize_bop = nfsd4_copy_rsize,
 	},
 	[OP_SEEK] = {
 		.op_func = (nfsd4op_func)nfsd4_seek,
 		.op_name = "OP_SEEK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
+		.op_rsize_bop = nfsd4_seek_rsize,
 	},
 };
 
-- 
GitLab


From eb69853da9459280d89876cfc3da11292e59f7af Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 20:58:35 +0200
Subject: [PATCH 0052/1958] nfsd4: properly type op_func callbacks

Pass union nfsd4_op_u to the op_func callbacks instead of using unsafe
function pointer casts.

It also adds two missing structures to struct nfsd4_op.u to facilitate
this.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4proc.c  | 234 ++++++++++++++++++++++++--------------------
 fs/nfsd/nfs4state.c |  82 ++++++++++------
 fs/nfsd/xdr4.h      |  66 ++++++-------
 3 files changed, 209 insertions(+), 173 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0bd4b750262f2..a947dcef5e4e6 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -344,8 +344,9 @@ copy_clientid(clientid_t *clid, struct nfsd4_session *session)
 
 static __be32
 nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_open *open)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_open *open = &u->open;
 	__be32 status;
 	struct svc_fh *resfh = NULL;
 	struct net *net = SVC_NET(rqstp);
@@ -467,14 +468,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
  */
 static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op)
 {
-	struct nfsd4_open *open = (struct nfsd4_open *)&op->u;
+	struct nfsd4_open *open = &op->u.open;
 
 	if (!seqid_mutating_err(ntohl(op->status)))
 		return op->status;
 	if (nfsd4_has_session(cstate))
 		return op->status;
 	open->op_xdr_error = op->status;
-	return nfsd4_open(rqstp, cstate, open);
+	return nfsd4_open(rqstp, cstate, &op->u);
 }
 
 /*
@@ -482,19 +483,21 @@ static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_stat
  */
 static __be32
 nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct svc_fh **getfh)
+	    union nfsd4_op_u *u)
 {
 	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
 
-	*getfh = &cstate->current_fh;
+	u->getfh = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_putfh *putfh)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_putfh *putfh = &u->putfh;
+
 	fh_put(&cstate->current_fh);
 	cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
 	memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
@@ -504,7 +507,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		void *arg)
+		union nfsd4_op_u *u)
 {
 	__be32 status;
 
@@ -515,7 +518,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		void *arg)
+		union nfsd4_op_u *u)
 {
 	if (!cstate->save_fh.fh_dentry)
 		return nfserr_restorefh;
@@ -530,7 +533,7 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     void *arg)
+	     union nfsd4_op_u *u)
 {
 	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
@@ -548,8 +551,10 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
  */
 static __be32
 nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_access *access)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_access *access = &u->access;
+
 	if (access->ac_req_access & ~NFS3_ACCESS_FULL)
 		return nfserr_inval;
 
@@ -574,8 +579,10 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 
 static __be32
 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_commit *commit)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_commit *commit = &u->commit;
+
 	gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
 	return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
 			     commit->co_count);
@@ -583,8 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_create *create)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_create *create = &u->create;
 	struct svc_fh resfh;
 	__be32 status;
 	dev_t rdev;
@@ -670,8 +678,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_getattr *getattr)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_getattr *getattr = &u->getattr;
 	__be32 status;
 
 	status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
@@ -691,8 +700,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_link *link)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_link *link = &u->link;
 	__be32 status = nfserr_nofilehandle;
 
 	if (!cstate->save_fh.fh_dentry)
@@ -723,24 +733,25 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 
 static __be32
 nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      void *arg)
+	      union nfsd4_op_u *u)
 {
 	return nfsd4_do_lookupp(rqstp, &cstate->current_fh);
 }
 
 static __be32
 nfsd4_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_lookup *lookup)
+	     union nfsd4_op_u *u)
 {
 	return nfsd_lookup(rqstp, &cstate->current_fh,
-			   lookup->lo_name, lookup->lo_len,
+			   u->lookup.lo_name, u->lookup.lo_len,
 			   &cstate->current_fh);
 }
 
 static __be32
 nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_read *read)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_read *read = &u->read;
 	__be32 status;
 
 	read->rd_filp = NULL;
@@ -775,8 +786,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_readdir *readdir)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_readdir *readdir = &u->readdir;
 	u64 cookie = readdir->rd_cookie;
 	static const nfs4_verifier zeroverf;
 
@@ -800,17 +812,18 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_readlink(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	       struct nfsd4_readlink *readlink)
+	       union nfsd4_op_u *u)
 {
-	readlink->rl_rqstp = rqstp;
-	readlink->rl_fhp = &cstate->current_fh;
+	u->readlink.rl_rqstp = rqstp;
+	u->readlink.rl_fhp = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_remove *remove)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_remove *remove = &u->remove;
 	__be32 status;
 
 	if (opens_in_grace(SVC_NET(rqstp)))
@@ -826,8 +839,9 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_rename *rename)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_rename *rename = &u->rename;
 	__be32 status = nfserr_nofilehandle;
 
 	if (!cstate->save_fh.fh_dentry)
@@ -847,8 +861,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_secinfo *secinfo)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_secinfo *secinfo = &u->secinfo;
 	struct svc_export *exp;
 	struct dentry *dentry;
 	__be32 err;
@@ -876,11 +891,11 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_secinfo_no_name *sin)
+		union nfsd4_op_u *u)
 {
 	__be32 err;
 
-	switch (sin->sin_style) {
+	switch (u->secinfo_no_name.sin_style) {
 	case NFS4_SECINFO_STYLE4_CURRENT_FH:
 		break;
 	case NFS4_SECINFO_STYLE4_PARENT:
@@ -892,15 +907,16 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
 		return nfserr_inval;
 	}
 
-	sin->sin_exp = exp_get(cstate->current_fh.fh_export);
+	u->secinfo_no_name.sin_exp = exp_get(cstate->current_fh.fh_export);
 	fh_put(&cstate->current_fh);
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_setattr *setattr)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_setattr *setattr = &u->setattr;
 	__be32 status = nfs_ok;
 	int err;
 
@@ -960,8 +976,9 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 
 static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_write *write)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_write *write = &u->write;
 	stateid_t *stateid = &write->wr_stateid;
 	struct file *filp = NULL;
 	__be32 status = nfs_ok;
@@ -1034,8 +1051,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_clone *clone)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_clone *clone = &u->clone;
 	struct file *src, *dst;
 	__be32 status;
 
@@ -1055,8 +1073,9 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_copy *copy)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_copy *copy = &u->copy;
 	struct file *src, *dst;
 	__be32 status;
 	ssize_t bytes;
@@ -1111,23 +1130,24 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_allocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	       struct nfsd4_fallocate *fallocate)
+	       union nfsd4_op_u *u)
 {
-	return nfsd4_fallocate(rqstp, cstate, fallocate, 0);
+	return nfsd4_fallocate(rqstp, cstate, &u->allocate, 0);
 }
 
 static __be32
 nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		 struct nfsd4_fallocate *fallocate)
+		 union nfsd4_op_u *u)
 {
-	return nfsd4_fallocate(rqstp, cstate, fallocate,
+	return nfsd4_fallocate(rqstp, cstate, &u->deallocate,
 			       FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
 }
 
 static __be32
 nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_seek *seek)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_seek *seek = &u->seek;
 	int whence;
 	__be32 status;
 	struct file *file;
@@ -1232,21 +1252,21 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_nverify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_verify *verify)
+	      union nfsd4_op_u *u)
 {
 	__be32 status;
 
-	status = _nfsd4_verify(rqstp, cstate, verify);
+	status = _nfsd4_verify(rqstp, cstate, &u->verify);
 	return status == nfserr_not_same ? nfs_ok : status;
 }
 
 static __be32
 nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_verify *verify)
+	     union nfsd4_op_u *u)
 {
 	__be32 status;
 
-	status = _nfsd4_verify(rqstp, cstate, verify);
+	status = _nfsd4_verify(rqstp, cstate, &u->nverify);
 	return status == nfserr_same ? nfs_ok : status;
 }
 
@@ -1271,9 +1291,9 @@ nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
 
 static __be32
 nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_getdeviceinfo *gdp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_getdeviceinfo *gdp = &u->getdeviceinfo;
 	const struct nfsd4_layout_ops *ops;
 	struct nfsd4_deviceid_map *map;
 	struct svc_export *exp;
@@ -1317,9 +1337,9 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutget(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutget *lgp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutget *lgp = &u->layoutget;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	const struct nfsd4_layout_ops *ops;
 	struct nfs4_layout_stateid *ls;
@@ -1397,9 +1417,9 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutcommit(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutcommit *lcp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
 	const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	const struct nfsd4_layout_ops *ops;
@@ -1461,9 +1481,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutreturn *lrp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	__be32 nfserr;
 
@@ -1521,9 +1541,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 		nfsdstats.nfs4_opcount[opnum]++;
 }
 
-typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
-			      void *);
-
 enum nfsd4_op_flags {
 	ALLOWED_WITHOUT_FH = 1 << 0,	/* No current filehandle required */
 	ALLOWED_ON_ABSENT_FS = 1 << 1,	/* ops processed on absent fs */
@@ -1555,7 +1572,8 @@ enum nfsd4_op_flags {
 };
 
 struct nfsd4_operation {
-	nfsd4op_func op_func;
+	__be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
 	u32 op_flags;
 	char *op_name;
 	/* Try to get response size before operation */
@@ -2093,12 +2111,12 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 
 static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
-		.op_func = (nfsd4op_func)nfsd4_access,
+		.op_func = nfsd4_access,
 		.op_name = "OP_ACCESS",
 		.op_rsize_bop = nfsd4_access_rsize,
 	},
 	[OP_CLOSE] = {
-		.op_func = (nfsd4op_func)nfsd4_close,
+		.op_func = nfsd4_close,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CLOSE",
 		.op_rsize_bop = nfsd4_status_stateid_rsize,
@@ -2106,93 +2124,93 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
 	[OP_COMMIT] = {
-		.op_func = (nfsd4op_func)nfsd4_commit,
+		.op_func = nfsd4_commit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_COMMIT",
 		.op_rsize_bop = nfsd4_commit_rsize,
 	},
 	[OP_CREATE] = {
-		.op_func = (nfsd4op_func)nfsd4_create,
+		.op_func = nfsd4_create,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID,
 		.op_name = "OP_CREATE",
 		.op_rsize_bop = nfsd4_create_rsize,
 	},
 	[OP_DELEGRETURN] = {
-		.op_func = (nfsd4op_func)nfsd4_delegreturn,
+		.op_func = nfsd4_delegreturn,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DELEGRETURN",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 		.op_get_currentstateid = nfsd4_get_delegreturnstateid,
 	},
 	[OP_GETATTR] = {
-		.op_func = (nfsd4op_func)nfsd4_getattr,
+		.op_func = nfsd4_getattr,
 		.op_flags = ALLOWED_ON_ABSENT_FS,
 		.op_rsize_bop = nfsd4_getattr_rsize,
 		.op_name = "OP_GETATTR",
 	},
 	[OP_GETFH] = {
-		.op_func = (nfsd4op_func)nfsd4_getfh,
+		.op_func = nfsd4_getfh,
 		.op_name = "OP_GETFH",
 		.op_rsize_bop = nfsd4_getfh_rsize,
 	},
 	[OP_LINK] = {
-		.op_func = (nfsd4op_func)nfsd4_link,
+		.op_func = nfsd4_link,
 		.op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING
 				| OP_CACHEME,
 		.op_name = "OP_LINK",
 		.op_rsize_bop = nfsd4_link_rsize,
 	},
 	[OP_LOCK] = {
-		.op_func = (nfsd4op_func)nfsd4_lock,
+		.op_func = nfsd4_lock,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCK",
 		.op_rsize_bop = nfsd4_lock_rsize,
 		.op_set_currentstateid = nfsd4_set_lockstateid,
 	},
 	[OP_LOCKT] = {
-		.op_func = (nfsd4op_func)nfsd4_lockt,
+		.op_func = nfsd4_lockt,
 		.op_name = "OP_LOCKT",
 		.op_rsize_bop = nfsd4_lock_rsize,
 	},
 	[OP_LOCKU] = {
-		.op_func = (nfsd4op_func)nfsd4_locku,
+		.op_func = nfsd4_locku,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCKU",
 		.op_rsize_bop = nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = nfsd4_get_lockustateid,
 	},
 	[OP_LOOKUP] = {
-		.op_func = (nfsd4op_func)nfsd4_lookup,
+		.op_func = nfsd4_lookup,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUP",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_LOOKUPP] = {
-		.op_func = (nfsd4op_func)nfsd4_lookupp,
+		.op_func = nfsd4_lookupp,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUPP",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_NVERIFY] = {
-		.op_func = (nfsd4op_func)nfsd4_nverify,
+		.op_func = nfsd4_nverify,
 		.op_name = "OP_NVERIFY",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_OPEN] = {
-		.op_func = (nfsd4op_func)nfsd4_open,
+		.op_func = nfsd4_open,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN",
 		.op_rsize_bop = nfsd4_open_rsize,
 		.op_set_currentstateid = nfsd4_set_openstateid,
 	},
 	[OP_OPEN_CONFIRM] = {
-		.op_func = (nfsd4op_func)nfsd4_open_confirm,
+		.op_func = nfsd4_open_confirm,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_CONFIRM",
 		.op_rsize_bop = nfsd4_status_stateid_rsize,
 	},
 	[OP_OPEN_DOWNGRADE] = {
-		.op_func = (nfsd4op_func)nfsd4_open_downgrade,
+		.op_func = nfsd4_open_downgrade,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_DOWNGRADE",
 		.op_rsize_bop = nfsd4_status_stateid_rsize,
@@ -2200,56 +2218,56 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
 	[OP_PUTFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putfh,
+		.op_func = nfsd4_putfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTPUBFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putrootfh,
+		.op_func = nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTPUBFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTROOTFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putrootfh,
+		.op_func = nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTROOTFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_READ] = {
-		.op_func = (nfsd4op_func)nfsd4_read,
+		.op_func = nfsd4_read,
 		.op_name = "OP_READ",
 		.op_rsize_bop = nfsd4_read_rsize,
 		.op_get_currentstateid = nfsd4_get_readstateid,
 	},
 	[OP_READDIR] = {
-		.op_func = (nfsd4op_func)nfsd4_readdir,
+		.op_func = nfsd4_readdir,
 		.op_name = "OP_READDIR",
 		.op_rsize_bop = nfsd4_readdir_rsize,
 	},
 	[OP_READLINK] = {
-		.op_func = (nfsd4op_func)nfsd4_readlink,
+		.op_func = nfsd4_readlink,
 		.op_name = "OP_READLINK",
 		.op_rsize_bop = nfsd4_readlink_rsize,
 	},
 	[OP_REMOVE] = {
-		.op_func = (nfsd4op_func)nfsd4_remove,
+		.op_func = nfsd4_remove,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_REMOVE",
 		.op_rsize_bop = nfsd4_remove_rsize,
 	},
 	[OP_RENAME] = {
-		.op_func = (nfsd4op_func)nfsd4_rename,
+		.op_func = nfsd4_rename,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_RENAME",
 		.op_rsize_bop = nfsd4_rename_rsize,
 	},
 	[OP_RENEW] = {
-		.op_func = (nfsd4op_func)nfsd4_renew,
+		.op_func = nfsd4_renew,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RENEW",
@@ -2257,59 +2275,59 @@ static struct nfsd4_operation nfsd4_ops[] = {
 
 	},
 	[OP_RESTOREFH] = {
-		.op_func = (nfsd4op_func)nfsd4_restorefh,
+		.op_func = nfsd4_restorefh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RESTOREFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SAVEFH] = {
-		.op_func = (nfsd4op_func)nfsd4_savefh,
+		.op_func = nfsd4_savefh,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_SAVEFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO] = {
-		.op_func = (nfsd4op_func)nfsd4_secinfo,
+		.op_func = nfsd4_secinfo,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO",
 		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_SETATTR] = {
-		.op_func = (nfsd4op_func)nfsd4_setattr,
+		.op_func = nfsd4_setattr,
 		.op_name = "OP_SETATTR",
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_rsize_bop = nfsd4_setattr_rsize,
 		.op_get_currentstateid = nfsd4_get_setattrstateid,
 	},
 	[OP_SETCLIENTID] = {
-		.op_func = (nfsd4op_func)nfsd4_setclientid,
+		.op_func = nfsd4_setclientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID",
 		.op_rsize_bop = nfsd4_setclientid_rsize,
 	},
 	[OP_SETCLIENTID_CONFIRM] = {
-		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
+		.op_func = nfsd4_setclientid_confirm,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID_CONFIRM",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_VERIFY] = {
-		.op_func = (nfsd4op_func)nfsd4_verify,
+		.op_func = nfsd4_verify,
 		.op_name = "OP_VERIFY",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_WRITE] = {
-		.op_func = (nfsd4op_func)nfsd4_write,
+		.op_func = nfsd4_write,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_WRITE",
 		.op_rsize_bop = nfsd4_write_rsize,
 		.op_get_currentstateid = nfsd4_get_writestateid,
 	},
 	[OP_RELEASE_LOCKOWNER] = {
-		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
+		.op_func = nfsd4_release_lockowner,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RELEASE_LOCKOWNER",
@@ -2318,72 +2336,72 @@ static struct nfsd4_operation nfsd4_ops[] = {
 
 	/* NFSv4.1 operations */
 	[OP_EXCHANGE_ID] = {
-		.op_func = (nfsd4op_func)nfsd4_exchange_id,
+		.op_func = nfsd4_exchange_id,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_EXCHANGE_ID",
 		.op_rsize_bop = nfsd4_exchange_id_rsize,
 	},
 	[OP_BACKCHANNEL_CTL] = {
-		.op_func = (nfsd4op_func)nfsd4_backchannel_ctl,
+		.op_func = nfsd4_backchannel_ctl,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BACKCHANNEL_CTL",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_BIND_CONN_TO_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
+		.op_func = nfsd4_bind_conn_to_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BIND_CONN_TO_SESSION",
 		.op_rsize_bop = nfsd4_bind_conn_to_session_rsize,
 	},
 	[OP_CREATE_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_create_session,
+		.op_func = nfsd4_create_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CREATE_SESSION",
 		.op_rsize_bop = nfsd4_create_session_rsize,
 	},
 	[OP_DESTROY_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_destroy_session,
+		.op_func = nfsd4_destroy_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_SESSION",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SEQUENCE] = {
-		.op_func = (nfsd4op_func)nfsd4_sequence,
+		.op_func = nfsd4_sequence,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
 		.op_name = "OP_SEQUENCE",
 		.op_rsize_bop = nfsd4_sequence_rsize,
 	},
 	[OP_DESTROY_CLIENTID] = {
-		.op_func = (nfsd4op_func)nfsd4_destroy_clientid,
+		.op_func = nfsd4_destroy_clientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_CLIENTID",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_RECLAIM_COMPLETE] = {
-		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
+		.op_func = nfsd4_reclaim_complete,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RECLAIM_COMPLETE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO_NO_NAME] = {
-		.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
+		.op_func = nfsd4_secinfo_no_name,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO_NO_NAME",
 		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_TEST_STATEID] = {
-		.op_func = (nfsd4op_func)nfsd4_test_stateid,
+		.op_func = nfsd4_test_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_TEST_STATEID",
 		.op_rsize_bop = nfsd4_test_stateid_rsize,
 	},
 	[OP_FREE_STATEID] = {
-		.op_func = (nfsd4op_func)nfsd4_free_stateid,
+		.op_func = nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
 		.op_get_currentstateid = nfsd4_get_freestateid,
@@ -2391,25 +2409,25 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	},
 #ifdef CONFIG_NFSD_PNFS
 	[OP_GETDEVICEINFO] = {
-		.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
+		.op_func = nfsd4_getdeviceinfo,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_GETDEVICEINFO",
 		.op_rsize_bop = nfsd4_getdeviceinfo_rsize,
 	},
 	[OP_LAYOUTGET] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutget,
+		.op_func = nfsd4_layoutget,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTGET",
 		.op_rsize_bop = nfsd4_layoutget_rsize,
 	},
 	[OP_LAYOUTCOMMIT] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
+		.op_func = nfsd4_layoutcommit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTCOMMIT",
 		.op_rsize_bop = nfsd4_layoutcommit_rsize,
 	},
 	[OP_LAYOUTRETURN] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
+		.op_func = nfsd4_layoutreturn,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTRETURN",
 		.op_rsize_bop = nfsd4_layoutreturn_rsize,
@@ -2418,31 +2436,31 @@ static struct nfsd4_operation nfsd4_ops[] = {
 
 	/* NFSv4.2 operations */
 	[OP_ALLOCATE] = {
-		.op_func = (nfsd4op_func)nfsd4_allocate,
+		.op_func = nfsd4_allocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_ALLOCATE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_DEALLOCATE] = {
-		.op_func = (nfsd4op_func)nfsd4_deallocate,
+		.op_func = nfsd4_deallocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_DEALLOCATE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_CLONE] = {
-		.op_func = (nfsd4op_func)nfsd4_clone,
+		.op_func = nfsd4_clone,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_CLONE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_COPY] = {
-		.op_func = (nfsd4op_func)nfsd4_copy,
+		.op_func = nfsd4_copy,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_COPY",
 		.op_rsize_bop = nfsd4_copy_rsize,
 	},
 	[OP_SEEK] = {
-		.op_func = (nfsd4op_func)nfsd4_seek,
+		.op_func = nfsd4_seek,
 		.op_name = "OP_SEEK",
 		.op_rsize_bop = nfsd4_seek_rsize,
 	},
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 01ab21f8d34db..0c04f81aa63b2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2402,10 +2402,10 @@ static bool client_has_state(struct nfs4_client *clp)
 }
 
 __be32
-nfsd4_exchange_id(struct svc_rqst *rqstp,
-		  struct nfsd4_compound_state *cstate,
-		  struct nfsd4_exchange_id *exid)
+nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_exchange_id *exid = &u->exchange_id;
 	struct nfs4_client *conf, *new;
 	struct nfs4_client *unconf = NULL;
 	__be32 status;
@@ -2698,9 +2698,9 @@ static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs)
 
 __be32
 nfsd4_create_session(struct svc_rqst *rqstp,
-		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_create_session *cr_ses)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_create_session *cr_ses = &u->create_session;
 	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *old = NULL;
@@ -2824,8 +2824,11 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
 	return nfserr_inval;
 }
 
-__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc)
+__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
 	struct nfsd4_session *session = cstate->session;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	__be32 status;
@@ -2845,8 +2848,9 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state
 
 __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_bind_conn_to_session *bcts)
+		     union nfsd4_op_u *u)
 {
+	struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
 	__be32 status;
 	struct nfsd4_conn *conn;
 	struct nfsd4_session *session;
@@ -2886,10 +2890,10 @@ static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4
 }
 
 __be32
-nfsd4_destroy_session(struct svc_rqst *r,
-		      struct nfsd4_compound_state *cstate,
-		      struct nfsd4_destroy_session *sessionid)
+nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_destroy_session *sessionid = &u->destroy_session;
 	struct nfsd4_session *ses;
 	__be32 status;
 	int ref_held_by_me = 0;
@@ -2983,10 +2987,10 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
 }
 
 __be32
-nfsd4_sequence(struct svc_rqst *rqstp,
-	       struct nfsd4_compound_state *cstate,
-	       struct nfsd4_sequence *seq)
+nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_sequence *seq = &u->sequence;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct xdr_stream *xdr = &resp->xdr;
 	struct nfsd4_session *session;
@@ -3120,8 +3124,11 @@ nfsd4_sequence_done(struct nfsd4_compoundres *resp)
 }
 
 __be32
-nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
+nfsd4_destroy_clientid(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *clp = NULL;
 	__be32 status = 0;
@@ -3161,8 +3168,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 }
 
 __be32
-nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
+nfsd4_reclaim_complete(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
 	__be32 status = 0;
 
 	if (rc->rca_one_fs) {
@@ -3199,8 +3208,9 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 
 __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		  struct nfsd4_setclientid *setclid)
+		  union nfsd4_op_u *u)
 {
+	struct nfsd4_setclientid *setclid = &u->setclientid;
 	struct xdr_netobj 	clname = setclid->se_name;
 	nfs4_verifier		clverifier = setclid->se_verf;
 	struct nfs4_client	*conf, *new;
@@ -3257,9 +3267,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-			 struct nfsd4_compound_state *cstate,
-			 struct nfsd4_setclientid_confirm *setclientid_confirm)
+			struct nfsd4_compound_state *cstate,
+			union nfsd4_op_u *u)
 {
+	struct nfsd4_setclientid_confirm *setclientid_confirm =
+			&u->setclientid_confirm;
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *old = NULL;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
@@ -4506,8 +4518,9 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    clientid_t *clid)
+	    union nfsd4_op_u *u)
 {
+	clientid_t *clid = &u->renew;
 	struct nfs4_client *clp;
 	__be32 status;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -4993,8 +5006,9 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
  */
 __be32
 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_test_stateid *test_stateid)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
 	struct nfsd4_test_stateid_id *stateid;
 	struct nfs4_client *cl = cstate->session->se_client;
 
@@ -5033,8 +5047,9 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
 
 __be32
 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_free_stateid *free_stateid)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
 	stateid_t *stateid = &free_stateid->fr_stateid;
 	struct nfs4_stid *s;
 	struct nfs4_delegation *dp;
@@ -5162,8 +5177,9 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
 
 __be32
 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_open_confirm *oc)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_open_confirm *oc = &u->open_confirm;
 	__be32 status;
 	struct nfs4_openowner *oo;
 	struct nfs4_ol_stateid *stp;
@@ -5230,9 +5246,9 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
 
 __be32
 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_open_downgrade *od)
+		     struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_open_downgrade *od = &u->open_downgrade;
 	__be32 status;
 	struct nfs4_ol_stateid *stp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -5300,8 +5316,9 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
  */
 __be32
 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_close *close)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_close *close = &u->close;
 	__be32 status;
 	struct nfs4_ol_stateid *stp;
 	struct net *net = SVC_NET(rqstp);
@@ -5330,8 +5347,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		  struct nfsd4_delegreturn *dr)
+		  union nfsd4_op_u *u)
 {
+	struct nfsd4_delegreturn *dr = &u->delegreturn;
 	struct nfs4_delegation *dp;
 	stateid_t *stateid = &dr->dr_stateid;
 	struct nfs4_stid *s;
@@ -5706,8 +5724,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
  */
 __be32
 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_lock *lock)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_lock *lock = &u->lock;
 	struct nfs4_openowner *open_sop = NULL;
 	struct nfs4_lockowner *lock_sop = NULL;
 	struct nfs4_ol_stateid *lock_stp = NULL;
@@ -5939,8 +5958,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
  */
 __be32
 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_lockt *lockt)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_lockt *lockt = &u->lockt;
 	struct file_lock *file_lock = NULL;
 	struct nfs4_lockowner *lo = NULL;
 	__be32 status;
@@ -6012,8 +6032,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_locku *locku)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_locku *locku = &u->locku;
 	struct nfs4_ol_stateid *stp;
 	struct file *filp = NULL;
 	struct file_lock *file_lock = NULL;
@@ -6119,8 +6140,9 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 			struct nfsd4_compound_state *cstate,
-			struct nfsd4_release_lockowner *rlockowner)
+			union nfsd4_op_u *u)
 {
+	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
 	clientid_t *clid = &rlockowner->rl_clientid;
 	struct nfs4_stateowner *sop;
 	struct nfs4_lockowner *lo = NULL;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index b625f4aa1061e..72c6ad136107c 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -577,6 +577,7 @@ struct nfsd4_op {
 		struct nfsd4_bind_conn_to_session bind_conn_to_session;
 		struct nfsd4_create_session	create_session;
 		struct nfsd4_destroy_session	destroy_session;
+		struct nfsd4_destroy_clientid	destroy_clientid;
 		struct nfsd4_sequence		sequence;
 		struct nfsd4_reclaim_complete	reclaim_complete;
 		struct nfsd4_test_stateid	test_stateid;
@@ -585,6 +586,7 @@ struct nfsd4_op {
 		struct nfsd4_layoutget		layoutget;
 		struct nfsd4_layoutcommit	layoutcommit;
 		struct nfsd4_layoutreturn	layoutreturn;
+		struct nfsd4_secinfo_no_name	secinfo_no_name;
 
 		/* NFSv4.2 */
 		struct nfsd4_fallocate		allocate;
@@ -693,27 +695,26 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
 		struct dentry *dentry,
 		u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
 extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid *setclid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid_confirm *setclientid_confirm);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
-extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
-extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *,
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *,
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_create_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_create_session *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_sequence(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_sequence *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp);
 extern __be32 nfsd4_destroy_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_destroy_session *);
-extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *);
-__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
+__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
 		struct nfsd4_open *open, struct nfsd_net *nn);
 extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
@@ -722,34 +723,29 @@ extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
 extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
 		struct nfsd4_open *open);
 extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
-extern __be32 nfsd4_close(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_close *close);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_open_downgrade *od);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
-		struct nfsd4_lock *lock);
-extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_lockt *lockt);
-extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_locku *locku);
+		union nfsd4_op_u *u);
+extern __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
+extern __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_release_lockowner *rlockowner);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern void nfsd4_release_compoundargs(struct svc_rqst *rqstp);
 extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
-extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
-			  struct nfsd4_compound_state *, clientid_t *clid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *);
 extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *);
 extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
 
 #endif
-- 
GitLab


From 7fd38af9cae6aef1dfd28a7d1bd214eb5ddb7d53 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:40:27 +0200
Subject: [PATCH 0053/1958] sunrpc: move pc_count out of struct svc_procinfo

pc_count is the only writeable memeber of struct svc_procinfo, which is
a good candidate to be const-ified as it contains function pointers.

This patch moves it into out out struct svc_procinfo, and into a
separate writable array that is pointed to by struct svc_version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc.c             |  6 ++++++
 fs/nfs/callback_xdr.c      |  4 ++++
 fs/nfsd/nfs2acl.c          |  2 ++
 fs/nfsd/nfs3acl.c          |  2 ++
 fs/nfsd/nfs3proc.c         |  2 ++
 fs/nfsd/nfs4proc.c         |  2 ++
 fs/nfsd/nfsproc.c          |  2 ++
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/stats.c         | 11 ++++++-----
 net/sunrpc/svc.c           |  2 +-
 10 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5d481e8a1b5d0..cc6abe6280bce 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -739,23 +739,29 @@ module_exit(exit_nlm);
 /*
  * Define NLM program and procedures
  */
+static unsigned int nlmsvc_version1_count[17];
 static struct svc_version	nlmsvc_version1 = {
 		.vs_vers	= 1,
 		.vs_nproc	= 17,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_count	= nlmsvc_version1_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
+static unsigned int nlmsvc_version3_count[24];
 static struct svc_version	nlmsvc_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_count	= nlmsvc_version3_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
+static unsigned int nlmsvc_version4_count[24];
 static struct svc_version	nlmsvc_version4 = {
 		.vs_vers	= 4,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures4,
+		.vs_count	= nlmsvc_version4_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index acf75dc63e14e..ecd46b8c09851 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1011,20 +1011,24 @@ static struct svc_procedure nfs4_callback_procedures1[] = {
 	}
 };
 
+static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
 struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
 
+static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
 struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count4,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fc6b179c8fff7..026edfe73fd5b 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -378,10 +378,12 @@ static struct svc_procedure		nfsd_acl_procedures2[] = {
   PROC(access,	access,		access,		access,   RC_NOCACHE, ST+AT+1),
 };
 
+static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
 struct svc_version	nfsd_acl_version2 = {
 		.vs_vers	= 2,
 		.vs_nproc	= 5,
 		.vs_proc	= nfsd_acl_procedures2,
+		.vs_count	= nfsd_acl_count2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9437b758cbfdb..73c0970ccefb4 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -263,10 +263,12 @@ static struct svc_procedure		nfsd_acl_procedures3[] = {
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
 };
 
+static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
 struct svc_version	nfsd_acl_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 3,
 		.vs_proc	= nfsd_acl_procedures3,
+		.vs_count	= nfsd_acl_count3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 17c90c41a3a68..b5823802e2785 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -890,10 +890,12 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 };
 
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
 struct svc_version	nfsd_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 22,
 		.vs_proc	= nfsd_procedures3,
+		.vs_count	= nfsd_count3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a947dcef5e4e6..bad5fec0ebc72 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2554,10 +2554,12 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 };
 
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
 struct svc_version	nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
+	.vs_count		= nfsd_count3,
 	.vs_dispatch		= nfsd_dispatch,
 	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
 	.vs_rpcb_optnl		= true,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0ef88d0e67d9d..44b1575537337 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -743,10 +743,12 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 };
 
 
+static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
 struct svc_version	nfsd_version2 = {
 		.vs_vers	= 2,
 		.vs_nproc	= 18,
 		.vs_proc	= nfsd_procedures2,
+		.vs_count	= nfsd_count2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6cfe41db7f31b..9f00384153f4e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -397,6 +397,7 @@ struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
 	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
 	/* Don't register with rpcbind */
@@ -429,7 +430,6 @@ struct svc_procedure {
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
-	unsigned int		pc_count;	/* call count */
 	unsigned int		pc_cachetype;	/* cache info (NFS) */
 	unsigned int		pc_xdrressize;	/* maximum size of XDR reply */
 };
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 8b6c35ae1d571..1e671333c3d5b 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -77,9 +77,9 @@ static const struct file_operations rpc_proc_fops = {
 /*
  * Get RPC server stats
  */
-void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
+void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
+{
 	const struct svc_program *prog = statp->program;
-	const struct svc_procedure *proc;
 	const struct svc_version *vers;
 	unsigned int i, j;
 
@@ -98,11 +98,12 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
 			statp->rpcbadclnt);
 
 	for (i = 0; i < prog->pg_nvers; i++) {
-		if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
+		vers = prog->pg_vers[i];
+		if (!vers)
 			continue;
 		seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
-		for (j = 0; j < vers->vs_nproc; j++, proc++)
-			seq_printf(seq, " %u", proc->pc_count);
+		for (j = 0; j < vers->vs_nproc; j++)
+			seq_printf(seq, " %u", vers->vs_count[j]);
 		seq_putc(seq, '\n');
 	}
 }
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aa643a29fdc6f..6452592194ac0 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1261,7 +1261,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	svc_putnl(resv, RPC_SUCCESS);
 
 	/* Bump per-procedure stats counter */
-	procp->pc_count++;
+	versp->vs_count[proc]++;
 
 	/* Initialize storage for argp and resp */
 	memset(rqstp->rq_argp, 0, procp->pc_argsize);
-- 
GitLab


From 860bda29b99afdc072a7a796fe81185f7ae85deb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 16:11:49 +0200
Subject: [PATCH 0054/1958] sunrpc: mark all struct svc_procinfo instances as
 const

struct svc_procinfo contains function pointers, and marking it as
constant avoids it being able to be used as an attach vector for
code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c         | 2 +-
 fs/lockd/svcproc.c          | 2 +-
 fs/nfs/callback_xdr.c       | 2 +-
 fs/nfsd/nfs2acl.c           | 2 +-
 fs/nfsd/nfs3acl.c           | 2 +-
 fs/nfsd/nfs3proc.c          | 2 +-
 fs/nfsd/nfs4proc.c          | 2 +-
 fs/nfsd/nfsproc.c           | 2 +-
 fs/nfsd/nfssvc.c            | 4 ++--
 include/linux/lockd/lockd.h | 4 ++--
 include/linux/sunrpc/svc.h  | 4 ++--
 net/sunrpc/svc.c            | 2 +-
 12 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index fed0161557917..82925f17ec45f 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -507,7 +507,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)				/* netobj */
 #define	St	1					/* status */
 #define	Rg	4					/* range (offset + length) */
-struct svc_procedure		nlmsvc_procedures4[] = {
+const struct svc_procedure nlmsvc_procedures4[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 14648b051eba0..07915162581d6 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -551,7 +551,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)			/* Net Obj */
 #define	Rg	2				/* range - offset + size */
 
-struct svc_procedure		nlmsvc_procedures[] = {
+const struct svc_procedure nlmsvc_procedures[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ecd46b8c09851..ae249f27297f9 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -995,7 +995,7 @@ static struct callback_op callback_ops[] = {
 /*
  * Define NFS4 callback procedures
  */
-static struct svc_procedure nfs4_callback_procedures1[] = {
+static const struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
 		.pc_decode = nfs4_decode_void,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 026edfe73fd5b..c3f6b8a6b659c 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -370,7 +370,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures2[] = {
+static const struct svc_procedure nfsd_acl_procedures2[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		attrstat,	attrstat, RC_NOCACHE, ST+AT),
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 73c0970ccefb4..1a482ac9d4e95 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -257,7 +257,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures3[] = {
+static const struct svc_procedure nfsd_acl_procedures3[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b5823802e2785..96e0e6a2af510 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -674,7 +674,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define WC (7+pAT)	/* WCC attributes */
 
-static struct svc_procedure		nfsd_procedures3[22] = {
+static const struct svc_procedure nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
 		.pc_encode = nfs3svc_encode_voidres,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bad5fec0ebc72..a4d8aa3abc635 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2533,7 +2533,7 @@ static const char *nfsd4_op_name(unsigned opnum)
 #define nfsd4_voidres			nfsd4_voidargs
 struct nfsd4_voidargs { int dummy; };
 
-static struct svc_procedure		nfsd_procedures4[2] = {
+static const struct svc_procedure nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
 		.pc_encode = nfs4svc_encode_voidres,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 44b1575537337..a68b686fda121 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -573,7 +573,7 @@ struct nfsd_void { int dummy; };
 #define FH 8		/* filehandle */
 #define	AT 18		/* attributes */
 
-static struct svc_procedure		nfsd_procedures2[18] = {
+static const struct svc_procedure nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
 		.pc_decode = nfssvc_decode_void,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 5552336641246..379b310c445d3 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -756,7 +756,7 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
  * problem, we enforce these assumptions here:
  */
 static bool nfs_request_too_big(struct svc_rqst *rqstp,
-				struct svc_procedure *proc)
+				const struct svc_procedure *proc)
 {
 	/*
 	 * The ACL code has more careful bounds-checking and is not
@@ -781,7 +781,7 @@ static bool nfs_request_too_big(struct svc_rqst *rqstp,
 int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
-	struct svc_procedure	*proc;
+	const struct svc_procedure *proc;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 41f7b6a04d691..3eca677283666 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -192,9 +192,9 @@ struct nlm_block {
  * Global variables
  */
 extern const struct rpc_program	nlm_program;
-extern struct svc_procedure	nlmsvc_procedures[];
+extern const struct svc_procedure nlmsvc_procedures[];
 #ifdef CONFIG_LOCKD_V4
-extern struct svc_procedure	nlmsvc_procedures4[];
+extern const struct svc_procedure nlmsvc_procedures4[];
 #endif
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 9f00384153f4e..984e6b9c30435 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -237,7 +237,7 @@ struct svc_rqst {
 
 	struct svc_serv *	rq_server;	/* RPC service definition */
 	struct svc_pool *	rq_pool;	/* thread pool */
-	struct svc_procedure *	rq_procinfo;	/* procedure info */
+	const struct svc_procedure *rq_procinfo;/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
@@ -396,7 +396,7 @@ struct svc_program {
 struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
-	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	const struct svc_procedure *vs_proc;	/* per-procedure info */
 	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6452592194ac0..049963d676a79 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1152,7 +1152,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
 	struct svc_version	*versp = NULL;	/* compiler food */
-	struct svc_procedure	*procp = NULL;
+	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	__be32			*statp;
 	u32			prog, vers, proc;
-- 
GitLab


From e9679189e34b25a1b9aa77fe37d331559d1544af Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 16:21:37 +0200
Subject: [PATCH 0055/1958] sunrpc: mark all struct svc_version instances as
 const

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svc.c             | 38 +++++++++++++++++++-------------------
 fs/nfs/callback.c          |  2 +-
 fs/nfs/callback_xdr.c      |  4 ++--
 fs/nfs/internal.h          |  4 ++--
 fs/nfs/nfs4_fs.h           |  4 ++--
 fs/nfsd/nfs2acl.c          | 14 +++++++-------
 fs/nfsd/nfs3acl.c          | 14 +++++++-------
 fs/nfsd/nfs3proc.c         | 14 +++++++-------
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfsd.h             |  6 +++---
 fs/nfsd/nfsproc.c          | 14 +++++++-------
 fs/nfsd/nfssvc.c           |  8 ++++----
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/svc.c           |  4 ++--
 14 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index cc6abe6280bce..726b6cecf430d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -740,32 +740,32 @@ module_exit(exit_nlm);
  * Define NLM program and procedures
  */
 static unsigned int nlmsvc_version1_count[17];
-static struct svc_version	nlmsvc_version1 = {
-		.vs_vers	= 1,
-		.vs_nproc	= 17,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_count	= nlmsvc_version1_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version1 = {
+	.vs_vers	= 1,
+	.vs_nproc	= 17,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version1_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 static unsigned int nlmsvc_version3_count[24];
-static struct svc_version	nlmsvc_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_count	= nlmsvc_version3_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version3_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
 static unsigned int nlmsvc_version4_count[24];
-static struct svc_version	nlmsvc_version4 = {
-		.vs_vers	= 4,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures4,
-		.vs_count	= nlmsvc_version4_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version4 = {
+	.vs_vers	= 4,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures4,
+	.vs_count	= nlmsvc_version4_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
-static struct svc_version *	nlmsvc_version[] = {
+static const struct svc_version *nlmsvc_version[] = {
 	[1] = &nlmsvc_version1,
 	[3] = &nlmsvc_version3,
 #ifdef CONFIG_LOCKD_V4
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 73a1f928226c0..34323877ec132 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -439,7 +439,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 /*
  * Define NFS4 callback program
  */
-static struct svc_version *nfs4_callback_version[] = {
+static const struct svc_version *nfs4_callback_version[] = {
 	[1] = &nfs4_callback_version1,
 	[4] = &nfs4_callback_version4,
 };
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ae249f27297f9..01a430e51daad 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1012,7 +1012,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
 };
 
 static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
-struct svc_version nfs4_callback_version1 = {
+const struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
@@ -1024,7 +1024,7 @@ struct svc_version nfs4_callback_version1 = {
 };
 
 static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
-struct svc_version nfs4_callback_version4 = {
+const struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c212549243890..9976d8498cf30 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -225,8 +225,8 @@ static inline void nfs_fs_proc_exit(void)
 #endif
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 struct nfs_pageio_descriptor;
 /* pagelist.c */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9b0cf3872722b..40bd05f05e743 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -498,8 +498,8 @@ extern const struct rpc_procinfo nfs4_procedures[];
 struct nfs4_mount_data;
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
 {
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index c3f6b8a6b659c..6276ec8608b06 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -379,11 +379,11 @@ static const struct svc_procedure nfsd_acl_procedures2[] = {
 };
 
 static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
-struct svc_version	nfsd_acl_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 5,
-		.vs_proc	= nfsd_acl_procedures2,
-		.vs_count	= nfsd_acl_count2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_acl_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 5,
+	.vs_proc	= nfsd_acl_procedures2,
+	.vs_count	= nfsd_acl_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 1a482ac9d4e95..01976529f0424 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -264,12 +264,12 @@ static const struct svc_procedure nfsd_acl_procedures3[] = {
 };
 
 static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
-struct svc_version	nfsd_acl_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 3,
-		.vs_proc	= nfsd_acl_procedures3,
-		.vs_count	= nfsd_acl_count3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_acl_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 3,
+	.vs_proc	= nfsd_acl_procedures3,
+	.vs_count	= nfsd_acl_count3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
 
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 96e0e6a2af510..2cb56a0d6625f 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -891,11 +891,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 };
 
 static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
-struct svc_version	nfsd_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 22,
-		.vs_proc	= nfsd_procedures3,
-		.vs_count	= nfsd_count3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 22,
+	.vs_proc	= nfsd_procedures3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_count	= nfsd_count3,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a4d8aa3abc635..e814c1946f6e7 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2555,7 +2555,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
 };
 
 static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
-struct svc_version	nfsd_version4 = {
+const struct svc_version nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d96606801d47a..b9c538ab7a59b 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -60,7 +60,7 @@ struct readdir_cd {
 
 
 extern struct svc_program	nfsd_program;
-extern struct svc_version	nfsd_version2, nfsd_version3,
+extern const struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
 extern struct mutex		nfsd_mutex;
 extern spinlock_t		nfsd_drc_lock;
@@ -86,12 +86,12 @@ void		nfsd_destroy(struct net *net);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
-extern struct svc_version nfsd_acl_version2;
+extern const struct svc_version nfsd_acl_version2;
 #else
 #define nfsd_acl_version2 NULL
 #endif
 #ifdef CONFIG_NFSD_V3_ACL
-extern struct svc_version nfsd_acl_version3;
+extern const struct svc_version nfsd_acl_version3;
 #else
 #define nfsd_acl_version3 NULL
 #endif
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a68b686fda121..5076ae2b82585 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -744,13 +744,13 @@ static const struct svc_procedure nfsd_procedures2[18] = {
 
 
 static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
-struct svc_version	nfsd_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 18,
-		.vs_proc	= nfsd_procedures2,
-		.vs_count	= nfsd_count2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
+const struct svc_version nfsd_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 18,
+	.vs_proc	= nfsd_procedures2,
+	.vs_count	= nfsd_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 379b310c445d3..063ae7de2c122 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -68,14 +68,14 @@ unsigned long	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
-static struct svc_version *	nfsd_acl_version[] = {
+static const struct svc_version *nfsd_acl_version[] = {
 	[2] = &nfsd_acl_version2,
 	[3] = &nfsd_acl_version3,
 };
 
 #define NFSD_ACL_MINVERS            2
 #define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
-static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
+static const struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
 
 static struct svc_program	nfsd_acl_program = {
 	.pg_prog		= NFS_ACL_PROGRAM,
@@ -92,7 +92,7 @@ static struct svc_stat	nfsd_acl_svcstats = {
 };
 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
 
-static struct svc_version *	nfsd_version[] = {
+static const struct svc_version *nfsd_version[] = {
 	[2] = &nfsd_version2,
 #if defined(CONFIG_NFSD_V3)
 	[3] = &nfsd_version3,
@@ -104,7 +104,7 @@ static struct svc_version *	nfsd_version[] = {
 
 #define NFSD_MINVERS    	2
 #define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
-static struct svc_version *nfsd_versions[NFSD_NRVERS];
+static const struct svc_version *nfsd_versions[NFSD_NRVERS];
 
 struct svc_program		nfsd_program = {
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 984e6b9c30435..e85267899753f 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -383,7 +383,7 @@ struct svc_program {
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* highest version */
 	unsigned int		pg_nvers;	/* number of versions */
-	struct svc_version **	pg_vers;	/* version array */
+	const struct svc_version **pg_vers;	/* version array */
 	char *			pg_name;	/* service name */
 	char *			pg_class;	/* class name: services sharing authentication */
 	struct svc_stat *	pg_stats;	/* rpc statistics */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 049963d676a79..45b4f2d2e3bdf 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1008,7 +1008,7 @@ int svc_register(const struct svc_serv *serv, struct net *net,
 		 const unsigned short port)
 {
 	struct svc_program	*progp;
-	struct svc_version	*vers;
+	const struct svc_version *vers;
 	unsigned int		i;
 	int			error = 0;
 
@@ -1151,7 +1151,7 @@ static int
 svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
-	struct svc_version	*versp = NULL;	/* compiler food */
+	const struct svc_version *versp = NULL;	/* compiler food */
 	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	__be32			*statp;
-- 
GitLab


From bb2a8b0cd116219777b99cb71fe9e24b31d3f521 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:46:47 +0200
Subject: [PATCH 0056/1958] nfsd4: const-ify nfsd4_ops

nfsd4_ops contains function pointers, and marking it as constant avoids
it being able to be used as an attach vector for code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4proc.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e814c1946f6e7..fe6cb5b6d31cf 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1584,7 +1584,7 @@ struct nfsd4_operation {
 			union nfsd4_op_u *);
 };
 
-static struct nfsd4_operation nfsd4_ops[];
+static const struct nfsd4_operation nfsd4_ops[];
 
 static const char *nfsd4_op_name(unsigned opnum);
 
@@ -1621,7 +1621,7 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
 	return nfs_ok;
 }
 
-static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
+static inline const struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
 {
 	return &nfsd4_ops[op->opnum];
 }
@@ -1639,10 +1639,9 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
 	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
 	struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
 	struct nfsd4_op *next = &argp->ops[resp->opcnt];
-	struct nfsd4_operation *thisd;
-	struct nfsd4_operation *nextd;
+	const struct nfsd4_operation *thisd = OPDESC(this);
+	const struct nfsd4_operation *nextd;
 
-	thisd = OPDESC(this);
 	/*
 	 * Most ops check wronsec on our own; only the putfh-like ops
 	 * have special rules.
@@ -1695,7 +1694,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfsd4_op	*op;
-	struct nfsd4_operation *opdesc;
+	const struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	struct svc_fh *save_fh = &cstate->save_fh;
@@ -2109,7 +2108,7 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 	return (op_encode_hdr_size + 3) * sizeof(__be32);
 }
 
-static struct nfsd4_operation nfsd4_ops[] = {
+static const struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
 		.op_func = nfsd4_access,
 		.op_name = "OP_ACCESS",
-- 
GitLab


From cfc5604c488ccd17936b69008af0c9ae050f4a08 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Tue, 25 Apr 2017 22:08:46 +0200
Subject: [PATCH 0057/1958] mtd: spi-nor: introduce SPI 1-2-2 and SPI 1-4-4
 protocols

This patch changes the prototype of spi_nor_scan(): its 3rd parameter
is replaced by a 'struct spi_nor_hwcaps' pointer, which tells the spi-nor
framework about the actual hardware capabilities supported by the SPI
controller and its driver.

Besides, this patch also introduces a new 'struct spi_nor_flash_parameter'
telling the spi-nor framework about the hardware capabilities supported by
the SPI flash memory and the associated settings required to use those
hardware caps.

Then, to improve the readability of spi_nor_scan(), the discovery of the
memory settings and the memory initialization are now split into two
dedicated functions.

1 - spi_nor_init_params()

The spi_nor_init_params() function is responsible for initializing the
'struct spi_nor_flash_parameter'. Currently this structure is filled with
legacy values but further patches will allow to override some parameter
values dynamically, for instance by reading the JESD216 Serial Flash
Discoverable Parameter (SFDP) tables from the SPI memory.
The spi_nor_init_params() function only deals with the hardware
capabilities of the SPI flash memory: especially it doesn't care about
the hardware capabilities supported by the SPI controller.

2 - spi_nor_setup()

The second function is called once the 'struct spi_nor_flash_parameter'
has been initialized by spi_nor_init_params().
With both 'struct spi_nor_flash_parameter' and 'struct spi_nor_hwcaps',
the new argument of spi_nor_scan(), spi_nor_setup() computes the best
match between hardware caps supported by both the (Q)SPI memory and
controller hence selecting the relevant settings for (Fast) Read and Page
Program operations.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/devices/m25p80.c          |  21 +-
 drivers/mtd/spi-nor/aspeed-smc.c      |  23 +-
 drivers/mtd/spi-nor/atmel-quadspi.c   |  83 +++--
 drivers/mtd/spi-nor/cadence-quadspi.c |  18 +-
 drivers/mtd/spi-nor/fsl-quadspi.c     |   6 +-
 drivers/mtd/spi-nor/hisi-sfc.c        |  31 +-
 drivers/mtd/spi-nor/intel-spi.c       |   7 +-
 drivers/mtd/spi-nor/mtk-quadspi.c     |  15 +-
 drivers/mtd/spi-nor/nxp-spifi.c       |  22 +-
 drivers/mtd/spi-nor/spi-nor.c         | 440 ++++++++++++++++++++------
 drivers/mtd/spi-nor/stm32-quadspi.c   |  27 +-
 include/linux/mtd/spi-nor.h           | 119 ++++++-
 12 files changed, 613 insertions(+), 199 deletions(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index c4df3b1bded0b..07073f4ce0bd0 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -111,14 +111,7 @@ static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 
 static inline unsigned int m25p80_rx_nbits(struct spi_nor *nor)
 {
-	switch (nor->flash_read) {
-	case SPI_NOR_DUAL:
-		return 2;
-	case SPI_NOR_QUAD:
-		return 4;
-	default:
-		return 0;
-	}
+	return spi_nor_get_protocol_data_nbits(nor->read_proto);
 }
 
 /*
@@ -196,7 +189,11 @@ static int m25p_probe(struct spi_device *spi)
 	struct flash_platform_data	*data;
 	struct m25p *flash;
 	struct spi_nor *nor;
-	enum read_mode mode = SPI_NOR_NORMAL;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	char *flash_name;
 	int ret;
 
@@ -222,9 +219,9 @@ static int m25p_probe(struct spi_device *spi)
 	flash->spi = spi;
 
 	if (spi->mode & SPI_RX_QUAD)
-		mode = SPI_NOR_QUAD;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
 	else if (spi->mode & SPI_RX_DUAL)
-		mode = SPI_NOR_DUAL;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
 
 	if (data && data->name)
 		nor->mtd.name = data->name;
@@ -241,7 +238,7 @@ static int m25p_probe(struct spi_device *spi)
 	else
 		flash_name = spi->modalias;
 
-	ret = spi_nor_scan(nor, flash_name, mode);
+	ret = spi_nor_scan(nor, flash_name, &hwcaps);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
index 56051d30f0008..3f875c8d63398 100644
--- a/drivers/mtd/spi-nor/aspeed-smc.c
+++ b/drivers/mtd/spi-nor/aspeed-smc.c
@@ -585,14 +585,12 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 	 * TODO: Adjust clocks if fast read is supported and interpret
 	 * SPI-NOR flags to adjust controller settings.
 	 */
-	switch (chip->nor.flash_read) {
-	case SPI_NOR_NORMAL:
-		cmd = CONTROL_COMMAND_MODE_NORMAL;
-		break;
-	case SPI_NOR_FAST:
-		cmd = CONTROL_COMMAND_MODE_FREAD;
-		break;
-	default:
+	if (chip->nor.read_proto == SNOR_PROTO_1_1_1) {
+		if (chip->nor.read_dummy == 0)
+			cmd = CONTROL_COMMAND_MODE_NORMAL;
+		else
+			cmd = CONTROL_COMMAND_MODE_FREAD;
+	} else {
 		dev_err(chip->nor.dev, "unsupported SPI read mode\n");
 		return -EINVAL;
 	}
@@ -608,6 +606,11 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
 				  struct device_node *np, struct resource *r)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	const struct aspeed_smc_info *info = controller->info;
 	struct device *dev = controller->dev;
 	struct device_node *child;
@@ -671,11 +674,11 @@ static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
 			break;
 
 		/*
-		 * TODO: Add support for SPI_NOR_QUAD and SPI_NOR_DUAL
+		 * TODO: Add support for Dual and Quad SPI protocols
 		 * attach when board support is present as determined
 		 * by of property.
 		 */
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_NORMAL);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			break;
 
diff --git a/drivers/mtd/spi-nor/atmel-quadspi.c b/drivers/mtd/spi-nor/atmel-quadspi.c
index 47937d9beec6b..ba76fa8f2031b 100644
--- a/drivers/mtd/spi-nor/atmel-quadspi.c
+++ b/drivers/mtd/spi-nor/atmel-quadspi.c
@@ -275,14 +275,48 @@ static void atmel_qspi_debug_command(struct atmel_qspi *aq,
 
 static int atmel_qspi_run_command(struct atmel_qspi *aq,
 				  const struct atmel_qspi_command *cmd,
-				  u32 ifr_tfrtyp, u32 ifr_width)
+				  u32 ifr_tfrtyp, enum spi_nor_protocol proto)
 {
 	u32 iar, icr, ifr, sr;
 	int err = 0;
 
 	iar = 0;
 	icr = 0;
-	ifr = ifr_tfrtyp | ifr_width;
+	ifr = ifr_tfrtyp;
+
+	/* Set the SPI protocol */
+	switch (proto) {
+	case SNOR_PROTO_1_1_1:
+		ifr |= QSPI_IFR_WIDTH_SINGLE_BIT_SPI;
+		break;
+
+	case SNOR_PROTO_1_1_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_OUTPUT;
+		break;
+
+	case SNOR_PROTO_1_1_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_OUTPUT;
+		break;
+
+	case SNOR_PROTO_1_2_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_IO;
+		break;
+
+	case SNOR_PROTO_1_4_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_IO;
+		break;
+
+	case SNOR_PROTO_2_2_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_CMD;
+		break;
+
+	case SNOR_PROTO_4_4_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_CMD;
+		break;
+
+	default:
+		return -EINVAL;
+	}
 
 	/* Compute instruction parameters */
 	if (cmd->enable.bits.instruction) {
@@ -434,7 +468,7 @@ static int atmel_qspi_read_reg(struct spi_nor *nor, u8 opcode,
 	cmd.rx_buf = buf;
 	cmd.buf_len = len;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_READ,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static int atmel_qspi_write_reg(struct spi_nor *nor, u8 opcode,
@@ -450,7 +484,7 @@ static int atmel_qspi_write_reg(struct spi_nor *nor, u8 opcode,
 	cmd.tx_buf = buf;
 	cmd.buf_len = len;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static ssize_t atmel_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
@@ -469,7 +503,7 @@ static ssize_t atmel_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
 	cmd.tx_buf = write_buf;
 	cmd.buf_len = len;
 	ret = atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE_MEM,
-				     QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				     nor->write_proto);
 	return (ret < 0) ? ret : len;
 }
 
@@ -484,7 +518,7 @@ static int atmel_qspi_erase(struct spi_nor *nor, loff_t offs)
 	cmd.instruction = nor->erase_opcode;
 	cmd.address = (u32)offs;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
@@ -493,27 +527,8 @@ static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
 	struct atmel_qspi *aq = nor->priv;
 	struct atmel_qspi_command cmd;
 	u8 num_mode_cycles, num_dummy_cycles;
-	u32 ifr_width;
 	ssize_t ret;
 
-	switch (nor->flash_read) {
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
-		ifr_width = QSPI_IFR_WIDTH_SINGLE_BIT_SPI;
-		break;
-
-	case SPI_NOR_DUAL:
-		ifr_width = QSPI_IFR_WIDTH_DUAL_OUTPUT;
-		break;
-
-	case SPI_NOR_QUAD:
-		ifr_width = QSPI_IFR_WIDTH_QUAD_OUTPUT;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
 	if (nor->read_dummy >= 2) {
 		num_mode_cycles = 2;
 		num_dummy_cycles = nor->read_dummy - 2;
@@ -536,7 +551,7 @@ static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
 	cmd.rx_buf = read_buf;
 	cmd.buf_len = len;
 	ret = atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_READ_MEM,
-				     ifr_width);
+				     nor->read_proto);
 	return (ret < 0) ? ret : len;
 }
 
@@ -590,6 +605,20 @@ static irqreturn_t atmel_qspi_interrupt(int irq, void *dev_id)
 
 static int atmel_qspi_probe(struct platform_device *pdev)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_2_2 |
+			SNOR_HWCAPS_READ_2_2_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_READ_1_4_4 |
+			SNOR_HWCAPS_READ_4_4_4 |
+			SNOR_HWCAPS_PP |
+			SNOR_HWCAPS_PP_1_1_4 |
+			SNOR_HWCAPS_PP_1_4_4 |
+			SNOR_HWCAPS_PP_4_4_4,
+	};
 	struct device_node *child, *np = pdev->dev.of_node;
 	struct atmel_qspi *aq;
 	struct resource *res;
@@ -679,7 +708,7 @@ static int atmel_qspi_probe(struct platform_device *pdev)
 	if (err)
 		goto disable_clk;
 
-	err = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+	err = spi_nor_scan(nor, NULL, &hwcaps);
 	if (err)
 		goto disable_clk;
 
diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index 9f8102de1b16a..40096d73536cf 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -855,15 +855,14 @@ static int cqspi_set_protocol(struct spi_nor *nor, const int read)
 	f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
 
 	if (read) {
-		switch (nor->flash_read) {
-		case SPI_NOR_NORMAL:
-		case SPI_NOR_FAST:
+		switch (nor->read_proto) {
+		case SNOR_PROTO_1_1_1:
 			f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
 			break;
-		case SPI_NOR_DUAL:
+		case SNOR_PROTO_1_1_2:
 			f_pdata->data_width = CQSPI_INST_TYPE_DUAL;
 			break;
-		case SPI_NOR_QUAD:
+		case SNOR_PROTO_1_1_4:
 			f_pdata->data_width = CQSPI_INST_TYPE_QUAD;
 			break;
 		default:
@@ -1069,6 +1068,13 @@ static void cqspi_controller_init(struct cqspi_st *cqspi)
 
 static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct platform_device *pdev = cqspi->pdev;
 	struct device *dev = &pdev->dev;
 	struct cqspi_flash_pdata *f_pdata;
@@ -1123,7 +1129,7 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np)
 			goto err;
 		}
 
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			goto err;
 
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 1476135e0d501..f17d22435bfcf 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -957,6 +957,10 @@ static void fsl_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
 
 static int fsl_qspi_probe(struct platform_device *pdev)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct device_node *np = pdev->dev.of_node;
 	struct device *dev = &pdev->dev;
 	struct fsl_qspi *q;
@@ -1065,7 +1069,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		/* set the chip address for READID */
 		fsl_qspi_set_base_addr(q, nor);
 
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			goto mutex_failed;
 
diff --git a/drivers/mtd/spi-nor/hisi-sfc.c b/drivers/mtd/spi-nor/hisi-sfc.c
index a286350627a66..d1106832b9d5e 100644
--- a/drivers/mtd/spi-nor/hisi-sfc.c
+++ b/drivers/mtd/spi-nor/hisi-sfc.c
@@ -120,19 +120,24 @@ static inline int wait_op_finish(struct hifmc_host *host)
 		(reg & FMC_INT_OP_DONE), 0, FMC_WAIT_TIMEOUT);
 }
 
-static int get_if_type(enum read_mode flash_read)
+static int get_if_type(enum spi_nor_protocol proto)
 {
 	enum hifmc_iftype if_type;
 
-	switch (flash_read) {
-	case SPI_NOR_DUAL:
+	switch (proto) {
+	case SNOR_PROTO_1_1_2:
 		if_type = IF_TYPE_DUAL;
 		break;
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_2_2:
+		if_type = IF_TYPE_DIO;
+		break;
+	case SNOR_PROTO_1_1_4:
 		if_type = IF_TYPE_QUAD;
 		break;
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
+	case SNOR_PROTO_1_4_4:
+		if_type = IF_TYPE_QIO;
+		break;
+	case SNOR_PROTO_1_1_1:
 	default:
 		if_type = IF_TYPE_STD;
 		break;
@@ -253,7 +258,10 @@ static int hisi_spi_nor_dma_transfer(struct spi_nor *nor, loff_t start_off,
 	writel(FMC_DMA_LEN_SET(len), host->regbase + FMC_DMA_LEN);
 
 	reg = OP_CFG_FM_CS(priv->chipselect);
-	if_type = get_if_type(nor->flash_read);
+	if (op_type == FMC_OP_READ)
+		if_type = get_if_type(nor->read_proto);
+	else
+		if_type = get_if_type(nor->write_proto);
 	reg |= OP_CFG_MEM_IF_TYPE(if_type);
 	if (op_type == FMC_OP_READ)
 		reg |= OP_CFG_DUMMY_NUM(nor->read_dummy >> 3);
@@ -321,6 +329,13 @@ static ssize_t hisi_spi_nor_write(struct spi_nor *nor, loff_t to,
 static int hisi_spi_nor_register(struct device_node *np,
 				struct hifmc_host *host)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct device *dev = host->dev;
 	struct spi_nor *nor;
 	struct hifmc_priv *priv;
@@ -362,7 +377,7 @@ static int hisi_spi_nor_register(struct device_node *np,
 	nor->read = hisi_spi_nor_read;
 	nor->write = hisi_spi_nor_write;
 	nor->erase = NULL;
-	ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+	ret = spi_nor_scan(nor, NULL, &hwcaps);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c
index 986a3d020a3a1..8a596bfeddff6 100644
--- a/drivers/mtd/spi-nor/intel-spi.c
+++ b/drivers/mtd/spi-nor/intel-spi.c
@@ -715,6 +715,11 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
 struct intel_spi *intel_spi_probe(struct device *dev,
 	struct resource *mem, const struct intel_spi_boardinfo *info)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	struct mtd_partition part;
 	struct intel_spi *ispi;
 	int ret;
@@ -746,7 +751,7 @@ struct intel_spi *intel_spi_probe(struct device *dev,
 	ispi->nor.write = intel_spi_write;
 	ispi->nor.erase = intel_spi_erase;
 
-	ret = spi_nor_scan(&ispi->nor, NULL, SPI_NOR_NORMAL);
+	ret = spi_nor_scan(&ispi->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_info(dev, "failed to locate the chip\n");
 		return ERR_PTR(ret);
diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index b6377707ce321..8a20ec4991c87 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c
@@ -123,20 +123,20 @@ static void mt8173_nor_set_read_mode(struct mt8173_nor *mt8173_nor)
 {
 	struct spi_nor *nor = &mt8173_nor->nor;
 
-	switch (nor->flash_read) {
-	case SPI_NOR_FAST:
+	switch (nor->read_proto) {
+	case SNOR_PROTO_1_1_1:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA3_REG);
 		writeb(MTK_NOR_FAST_READ, mt8173_nor->base +
 		       MTK_NOR_CFG1_REG);
 		break;
-	case SPI_NOR_DUAL:
+	case SNOR_PROTO_1_1_2:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA3_REG);
 		writeb(MTK_NOR_DUAL_READ_EN, mt8173_nor->base +
 		       MTK_NOR_DUAL_REG);
 		break;
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_1_4:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA4_REG);
 		writeb(MTK_NOR_QUAD_READ_EN, mt8173_nor->base +
@@ -408,6 +408,11 @@ static int mt8173_nor_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
 static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
 			struct device_node *flash_node)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_PP,
+	};
 	int ret;
 	struct spi_nor *nor;
 
@@ -426,7 +431,7 @@ static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
 	nor->write_reg = mt8173_nor_write_reg;
 	nor->mtd.name = "mtk_nor";
 	/* initialized with NULL */
-	ret = spi_nor_scan(nor, NULL, SPI_NOR_DUAL);
+	ret = spi_nor_scan(nor, NULL, &hwcaps);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/nxp-spifi.c b/drivers/mtd/spi-nor/nxp-spifi.c
index 73a14f40928be..15374216d4d90 100644
--- a/drivers/mtd/spi-nor/nxp-spifi.c
+++ b/drivers/mtd/spi-nor/nxp-spifi.c
@@ -240,13 +240,12 @@ static int nxp_spifi_erase(struct spi_nor *nor, loff_t offs)
 
 static int nxp_spifi_setup_memory_cmd(struct nxp_spifi *spifi)
 {
-	switch (spifi->nor.flash_read) {
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
+	switch (spifi->nor.read_proto) {
+	case SNOR_PROTO_1_1_1:
 		spifi->mcmd = SPIFI_CMD_FIELDFORM_ALL_SERIAL;
 		break;
-	case SPI_NOR_DUAL:
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_1_2:
+	case SNOR_PROTO_1_1_4:
 		spifi->mcmd = SPIFI_CMD_FIELDFORM_QUAD_DUAL_DATA;
 		break;
 	default:
@@ -274,7 +273,11 @@ static void nxp_spifi_dummy_id_read(struct spi_nor *nor)
 static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 				 struct device_node *np)
 {
-	enum read_mode flash_read;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	u32 ctrl, property;
 	u16 mode = 0;
 	int ret;
@@ -308,13 +311,12 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 
 	if (mode & SPI_RX_DUAL) {
 		ctrl |= SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_DUAL;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
 	} else if (mode & SPI_RX_QUAD) {
 		ctrl &= ~SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_QUAD;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
 	} else {
 		ctrl |= SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_NORMAL;
 	}
 
 	switch (mode & (SPI_CPHA | SPI_CPOL)) {
@@ -351,7 +353,7 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 	 */
 	nxp_spifi_dummy_id_read(&spifi->nor);
 
-	ret = spi_nor_scan(&spifi->nor, NULL, flash_read);
+	ret = spi_nor_scan(&spifi->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_err(spifi->dev, "device scan failed\n");
 		return ret;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index dea8c9cbadf00..e653806070a15 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -149,24 +149,6 @@ static int read_cr(struct spi_nor *nor)
 	return val;
 }
 
-/*
- * Dummy Cycle calculation for different type of read.
- * It can be used to support more commands with
- * different dummy cycle requirements.
- */
-static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
-{
-	switch (nor->flash_read) {
-	case SPI_NOR_FAST:
-	case SPI_NOR_DUAL:
-	case SPI_NOR_QUAD:
-		return 8;
-	case SPI_NOR_NORMAL:
-		return 0;
-	}
-	return 0;
-}
-
 /*
  * Write status register 1 byte
  * Returns negative if error occurred.
@@ -1460,30 +1442,6 @@ static int spansion_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
-static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
-{
-	int status;
-
-	switch (JEDEC_MFR(info)) {
-	case SNOR_MFR_MACRONIX:
-		status = macronix_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Macronix quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
-	case SNOR_MFR_MICRON:
-		return 0;
-	default:
-		status = spansion_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Spansion quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
-	}
-}
-
 static int spi_nor_check(struct spi_nor *nor)
 {
 	if (!nor->dev || !nor->read || !nor->write ||
@@ -1536,8 +1494,323 @@ static int s3an_nor_scan(const struct flash_info *info, struct spi_nor *nor)
 	return 0;
 }
 
-int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
+struct spi_nor_read_command {
+	u8			num_mode_clocks;
+	u8			num_wait_states;
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+struct spi_nor_pp_command {
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+enum spi_nor_read_command_index {
+	SNOR_CMD_READ,
+	SNOR_CMD_READ_FAST,
+
+	/* Dual SPI */
+	SNOR_CMD_READ_1_1_2,
+	SNOR_CMD_READ_1_2_2,
+	SNOR_CMD_READ_2_2_2,
+
+	/* Quad SPI */
+	SNOR_CMD_READ_1_1_4,
+	SNOR_CMD_READ_1_4_4,
+	SNOR_CMD_READ_4_4_4,
+
+	SNOR_CMD_READ_MAX
+};
+
+enum spi_nor_pp_command_index {
+	SNOR_CMD_PP,
+
+	/* Quad SPI */
+	SNOR_CMD_PP_1_1_4,
+	SNOR_CMD_PP_1_4_4,
+	SNOR_CMD_PP_4_4_4,
+
+	SNOR_CMD_PP_MAX
+};
+
+struct spi_nor_flash_parameter {
+	u64				size;
+	u32				page_size;
+
+	struct spi_nor_hwcaps		hwcaps;
+	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
+	struct spi_nor_pp_command	page_programs[SNOR_CMD_PP_MAX];
+
+	int (*quad_enable)(struct spi_nor *nor);
+};
+
+static void
+spi_nor_set_read_settings(struct spi_nor_read_command *read,
+			  u8 num_mode_clocks,
+			  u8 num_wait_states,
+			  u8 opcode,
+			  enum spi_nor_protocol proto)
+{
+	read->num_mode_clocks = num_mode_clocks;
+	read->num_wait_states = num_wait_states;
+	read->opcode = opcode;
+	read->proto = proto;
+}
+
+static void
+spi_nor_set_pp_settings(struct spi_nor_pp_command *pp,
+			u8 opcode,
+			enum spi_nor_protocol proto)
+{
+	pp->opcode = opcode;
+	pp->proto = proto;
+}
+
+static int spi_nor_init_params(struct spi_nor *nor,
+			       const struct flash_info *info,
+			       struct spi_nor_flash_parameter *params)
+{
+	/* Set legacy flash parameters as default. */
+	memset(params, 0, sizeof(*params));
+
+	/* Set SPI NOR sizes. */
+	params->size = info->sector_size * info->n_sectors;
+	params->page_size = info->page_size;
+
+	/* (Fast) Read settings. */
+	params->hwcaps.mask |= SNOR_HWCAPS_READ;
+	spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ],
+				  0, 0, SPINOR_OP_READ,
+				  SNOR_PROTO_1_1_1);
+
+	if (!(info->flags & SPI_NOR_NO_FR)) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_FAST],
+					  0, 8, SPINOR_OP_READ_FAST,
+					  SNOR_PROTO_1_1_1);
+	}
+
+	if (info->flags & SPI_NOR_DUAL_READ) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_2],
+					  0, 8, SPINOR_OP_READ_1_1_2,
+					  SNOR_PROTO_1_1_2);
+	}
+
+	if (info->flags & SPI_NOR_QUAD_READ) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_4],
+					  0, 8, SPINOR_OP_READ_1_1_4,
+					  SNOR_PROTO_1_1_4);
+	}
+
+	/* Page Program settings. */
+	params->hwcaps.mask |= SNOR_HWCAPS_PP;
+	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
+				SPINOR_OP_PP, SNOR_PROTO_1_1_1);
+
+	/* Select the procedure to set the Quad Enable bit. */
+	if (params->hwcaps.mask & (SNOR_HWCAPS_READ_QUAD |
+				   SNOR_HWCAPS_PP_QUAD)) {
+		switch (JEDEC_MFR(info)) {
+		case SNOR_MFR_MACRONIX:
+			params->quad_enable = macronix_quad_enable;
+			break;
+
+		case SNOR_MFR_MICRON:
+			break;
+
+		default:
+			params->quad_enable = spansion_quad_enable;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int spi_nor_hwcaps2cmd(u32 hwcaps, const int table[][2], size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; i++)
+		if (table[i][0] == (int)hwcaps)
+			return table[i][1];
+
+	return -EINVAL;
+}
+
+static int spi_nor_hwcaps_read2cmd(u32 hwcaps)
+{
+	static const int hwcaps_read2cmd[][2] = {
+		{ SNOR_HWCAPS_READ,		SNOR_CMD_READ },
+		{ SNOR_HWCAPS_READ_FAST,	SNOR_CMD_READ_FAST },
+		{ SNOR_HWCAPS_READ_1_1_2,	SNOR_CMD_READ_1_1_2 },
+		{ SNOR_HWCAPS_READ_1_2_2,	SNOR_CMD_READ_1_2_2 },
+		{ SNOR_HWCAPS_READ_2_2_2,	SNOR_CMD_READ_2_2_2 },
+		{ SNOR_HWCAPS_READ_1_1_4,	SNOR_CMD_READ_1_1_4 },
+		{ SNOR_HWCAPS_READ_1_4_4,	SNOR_CMD_READ_1_4_4 },
+		{ SNOR_HWCAPS_READ_4_4_4,	SNOR_CMD_READ_4_4_4 },
+	};
+
+	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_read2cmd,
+				  ARRAY_SIZE(hwcaps_read2cmd));
+}
+
+static int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
+{
+	static const int hwcaps_pp2cmd[][2] = {
+		{ SNOR_HWCAPS_PP,		SNOR_CMD_PP },
+		{ SNOR_HWCAPS_PP_1_1_4,		SNOR_CMD_PP_1_1_4 },
+		{ SNOR_HWCAPS_PP_1_4_4,		SNOR_CMD_PP_1_4_4 },
+		{ SNOR_HWCAPS_PP_4_4_4,		SNOR_CMD_PP_4_4_4 },
+	};
+
+	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_pp2cmd,
+				  ARRAY_SIZE(hwcaps_pp2cmd));
+}
+
+static int spi_nor_select_read(struct spi_nor *nor,
+			       const struct spi_nor_flash_parameter *params,
+			       u32 shared_hwcaps)
+{
+	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_READ_MASK) - 1;
+	const struct spi_nor_read_command *read;
+
+	if (best_match < 0)
+		return -EINVAL;
+
+	cmd = spi_nor_hwcaps_read2cmd(BIT(best_match));
+	if (cmd < 0)
+		return -EINVAL;
+
+	read = &params->reads[cmd];
+	nor->read_opcode = read->opcode;
+	nor->read_proto = read->proto;
+
+	/*
+	 * In the spi-nor framework, we don't need to make the difference
+	 * between mode clock cycles and wait state clock cycles.
+	 * Indeed, the value of the mode clock cycles is used by a QSPI
+	 * flash memory to know whether it should enter or leave its 0-4-4
+	 * (Continuous Read / XIP) mode.
+	 * eXecution In Place is out of the scope of the mtd sub-system.
+	 * Hence we choose to merge both mode and wait state clock cycles
+	 * into the so called dummy clock cycles.
+	 */
+	nor->read_dummy = read->num_mode_clocks + read->num_wait_states;
+	return 0;
+}
+
+static int spi_nor_select_pp(struct spi_nor *nor,
+			     const struct spi_nor_flash_parameter *params,
+			     u32 shared_hwcaps)
+{
+	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_PP_MASK) - 1;
+	const struct spi_nor_pp_command *pp;
+
+	if (best_match < 0)
+		return -EINVAL;
+
+	cmd = spi_nor_hwcaps_pp2cmd(BIT(best_match));
+	if (cmd < 0)
+		return -EINVAL;
+
+	pp = &params->page_programs[cmd];
+	nor->program_opcode = pp->opcode;
+	nor->write_proto = pp->proto;
+	return 0;
+}
+
+static int spi_nor_select_erase(struct spi_nor *nor,
+				const struct flash_info *info)
+{
+	struct mtd_info *mtd = &nor->mtd;
+
+#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
+	/* prefer "small sector" erase if possible */
+	if (info->flags & SECT_4K) {
+		nor->erase_opcode = SPINOR_OP_BE_4K;
+		mtd->erasesize = 4096;
+	} else if (info->flags & SECT_4K_PMC) {
+		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
+		mtd->erasesize = 4096;
+	} else
+#endif
+	{
+		nor->erase_opcode = SPINOR_OP_SE;
+		mtd->erasesize = info->sector_size;
+	}
+	return 0;
+}
+
+static int spi_nor_setup(struct spi_nor *nor, const struct flash_info *info,
+			 const struct spi_nor_flash_parameter *params,
+			 const struct spi_nor_hwcaps *hwcaps)
+{
+	u32 ignored_mask, shared_mask;
+	bool enable_quad_io;
+	int err;
+
+	/*
+	 * Keep only the hardware capabilities supported by both the SPI
+	 * controller and the SPI flash memory.
+	 */
+	shared_mask = hwcaps->mask & params->hwcaps.mask;
+
+	/* SPI n-n-n protocols are not supported yet. */
+	ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
+			SNOR_HWCAPS_READ_4_4_4 |
+			SNOR_HWCAPS_PP_4_4_4);
+	if (shared_mask & ignored_mask) {
+		dev_dbg(nor->dev,
+			"SPI n-n-n protocols are not supported yet.\n");
+		shared_mask &= ~ignored_mask;
+	}
+
+	/* Select the (Fast) Read command. */
+	err = spi_nor_select_read(nor, params, shared_mask);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select read settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Select the Page Program command. */
+	err = spi_nor_select_pp(nor, params, shared_mask);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select write settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Select the Sector Erase command. */
+	err = spi_nor_select_erase(nor, info);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select erase settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Enable Quad I/O if needed. */
+	enable_quad_io = (spi_nor_get_protocol_width(nor->read_proto) == 4 ||
+			  spi_nor_get_protocol_width(nor->write_proto) == 4);
+	if (enable_quad_io && params->quad_enable) {
+		err = params->quad_enable(nor);
+		if (err) {
+			dev_err(nor->dev, "quad mode not supported\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int spi_nor_scan(struct spi_nor *nor, const char *name,
+		 const struct spi_nor_hwcaps *hwcaps)
 {
+	struct spi_nor_flash_parameter params;
 	const struct flash_info *info = NULL;
 	struct device *dev = nor->dev;
 	struct mtd_info *mtd = &nor->mtd;
@@ -1549,6 +1822,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (ret)
 		return ret;
 
+	/* Reset SPI protocol for all commands. */
+	nor->reg_proto = SNOR_PROTO_1_1_1;
+	nor->read_proto = SNOR_PROTO_1_1_1;
+	nor->write_proto = SNOR_PROTO_1_1_1;
+
 	if (name)
 		info = spi_nor_match_id(name);
 	/* Try to auto-detect if chip name wasn't specified or not found */
@@ -1591,6 +1869,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (info->flags & SPI_S3AN)
 		nor->flags |=  SNOR_F_READY_XSR_RDY;
 
+	/* Parse the Serial Flash Discoverable Parameters table. */
+	ret = spi_nor_init_params(nor, info, &params);
+	if (ret)
+		return ret;
+
 	/*
 	 * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
 	 * with the software protection bits set
@@ -1611,7 +1894,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	mtd->type = MTD_NORFLASH;
 	mtd->writesize = 1;
 	mtd->flags = MTD_CAP_NORFLASH;
-	mtd->size = info->sector_size * info->n_sectors;
+	mtd->size = params.size;
 	mtd->_erase = spi_nor_erase;
 	mtd->_read = spi_nor_read;
 
@@ -1642,75 +1925,38 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (info->flags & NO_CHIP_ERASE)
 		nor->flags |= SNOR_F_NO_OP_CHIP_ERASE;
 
-#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
-	/* prefer "small sector" erase if possible */
-	if (info->flags & SECT_4K) {
-		nor->erase_opcode = SPINOR_OP_BE_4K;
-		mtd->erasesize = 4096;
-	} else if (info->flags & SECT_4K_PMC) {
-		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
-		mtd->erasesize = 4096;
-	} else
-#endif
-	{
-		nor->erase_opcode = SPINOR_OP_SE;
-		mtd->erasesize = info->sector_size;
-	}
-
 	if (info->flags & SPI_NOR_NO_ERASE)
 		mtd->flags |= MTD_NO_ERASE;
 
 	mtd->dev.parent = dev;
-	nor->page_size = info->page_size;
+	nor->page_size = params.page_size;
 	mtd->writebufsize = nor->page_size;
 
 	if (np) {
 		/* If we were instantiated by DT, use it */
 		if (of_property_read_bool(np, "m25p,fast-read"))
-			nor->flash_read = SPI_NOR_FAST;
+			params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
 		else
-			nor->flash_read = SPI_NOR_NORMAL;
+			params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
 	} else {
 		/* If we weren't instantiated by DT, default to fast-read */
-		nor->flash_read = SPI_NOR_FAST;
+		params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
 	}
 
 	/* Some devices cannot do fast-read, no matter what DT tells us */
 	if (info->flags & SPI_NOR_NO_FR)
-		nor->flash_read = SPI_NOR_NORMAL;
-
-	/* Quad/Dual-read mode takes precedence over fast/normal */
-	if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) {
-		ret = set_quad_mode(nor, info);
-		if (ret) {
-			dev_err(dev, "quad mode not supported\n");
-			return ret;
-		}
-		nor->flash_read = SPI_NOR_QUAD;
-	} else if (mode == SPI_NOR_DUAL && info->flags & SPI_NOR_DUAL_READ) {
-		nor->flash_read = SPI_NOR_DUAL;
-	}
-
-	/* Default commands */
-	switch (nor->flash_read) {
-	case SPI_NOR_QUAD:
-		nor->read_opcode = SPINOR_OP_READ_1_1_4;
-		break;
-	case SPI_NOR_DUAL:
-		nor->read_opcode = SPINOR_OP_READ_1_1_2;
-		break;
-	case SPI_NOR_FAST:
-		nor->read_opcode = SPINOR_OP_READ_FAST;
-		break;
-	case SPI_NOR_NORMAL:
-		nor->read_opcode = SPINOR_OP_READ;
-		break;
-	default:
-		dev_err(dev, "No Read opcode defined\n");
-		return -EINVAL;
-	}
+		params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
 
-	nor->program_opcode = SPINOR_OP_PP;
+	/*
+	 * Configure the SPI memory:
+	 * - select op codes for (Fast) Read, Page Program and Sector Erase.
+	 * - set the number of dummy cycles (mode cycles + wait states).
+	 * - set the SPI protocols for register and memory accesses.
+	 * - set the Quad Enable bit if needed (required by SPI x-y-4 protos).
+	 */
+	ret = spi_nor_setup(nor, info, &params, hwcaps);
+	if (ret)
+		return ret;
 
 	if (info->addr_width)
 		nor->addr_width = info->addr_width;
@@ -1732,8 +1978,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 		return -EINVAL;
 	}
 
-	nor->read_dummy = spi_nor_read_dummy_cycles(nor);
-
 	if (info->flags & SPI_S3AN) {
 		ret = s3an_nor_scan(info, nor);
 		if (ret)
diff --git a/drivers/mtd/spi-nor/stm32-quadspi.c b/drivers/mtd/spi-nor/stm32-quadspi.c
index ae45f81b8cd33..1056e7408d2a1 100644
--- a/drivers/mtd/spi-nor/stm32-quadspi.c
+++ b/drivers/mtd/spi-nor/stm32-quadspi.c
@@ -192,15 +192,15 @@ static void stm32_qspi_set_framemode(struct spi_nor *nor,
 	cmd->framemode = CCR_IMODE_1;
 
 	if (read) {
-		switch (nor->flash_read) {
-		case SPI_NOR_NORMAL:
-		case SPI_NOR_FAST:
+		switch (nor->read_proto) {
+		default:
+		case SNOR_PROTO_1_1_1:
 			dmode = CCR_DMODE_1;
 			break;
-		case SPI_NOR_DUAL:
+		case SNOR_PROTO_1_1_2:
 			dmode = CCR_DMODE_2;
 			break;
-		case SPI_NOR_QUAD:
+		case SNOR_PROTO_1_1_4:
 			dmode = CCR_DMODE_4;
 			break;
 		}
@@ -480,7 +480,12 @@ static void stm32_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
 static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 				  struct device_node *np)
 {
-	u32 width, flash_read, presc, cs_num, max_rate = 0;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
+	u32 width, presc, cs_num, max_rate = 0;
 	struct stm32_qspi_flash *flash;
 	struct mtd_info *mtd;
 	int ret;
@@ -499,12 +504,10 @@ static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 		width = 1;
 
 	if (width == 4)
-		flash_read = SPI_NOR_QUAD;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
 	else if (width == 2)
-		flash_read = SPI_NOR_DUAL;
-	else if (width == 1)
-		flash_read = SPI_NOR_NORMAL;
-	else
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+	else if (width != 1)
 		return -EINVAL;
 
 	flash = &qspi->flash[cs_num];
@@ -539,7 +542,7 @@ static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 	 */
 	flash->fsize = FSIZE_VAL(SZ_1K);
 
-	ret = spi_nor_scan(&flash->nor, NULL, flash_read);
+	ret = spi_nor_scan(&flash->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_err(qspi->dev, "device scan failed\n");
 		return ret;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index f2a718030476f..60db1585f94c8 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -119,13 +119,63 @@
 /* Configuration Register bits. */
 #define CR_QUAD_EN_SPAN		BIT(1)	/* Spansion Quad I/O */
 
-enum read_mode {
-	SPI_NOR_NORMAL = 0,
-	SPI_NOR_FAST,
-	SPI_NOR_DUAL,
-	SPI_NOR_QUAD,
+/* Supported SPI protocols */
+#define SNOR_PROTO_INST_MASK	GENMASK(23, 16)
+#define SNOR_PROTO_INST_SHIFT	16
+#define SNOR_PROTO_INST(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_INST_SHIFT) & \
+	 SNOR_PROTO_INST_MASK)
+
+#define SNOR_PROTO_ADDR_MASK	GENMASK(15, 8)
+#define SNOR_PROTO_ADDR_SHIFT	8
+#define SNOR_PROTO_ADDR(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_ADDR_SHIFT) & \
+	 SNOR_PROTO_ADDR_MASK)
+
+#define SNOR_PROTO_DATA_MASK	GENMASK(7, 0)
+#define SNOR_PROTO_DATA_SHIFT	0
+#define SNOR_PROTO_DATA(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \
+	 SNOR_PROTO_DATA_MASK)
+
+#define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits)	\
+	(SNOR_PROTO_INST(_inst_nbits) |				\
+	 SNOR_PROTO_ADDR(_addr_nbits) |				\
+	 SNOR_PROTO_DATA(_data_nbits))
+
+enum spi_nor_protocol {
+	SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1),
+	SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2),
+	SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4),
+	SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2),
+	SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4),
+	SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2),
+	SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4),
 };
 
+static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >>
+		SNOR_PROTO_INST_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_addr_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_ADDR_MASK)) >>
+		SNOR_PROTO_ADDR_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_data_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_DATA_MASK)) >>
+		SNOR_PROTO_DATA_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto)
+{
+	return spi_nor_get_protocol_data_nbits(proto);
+}
+
 #define SPI_NOR_MAX_CMD_SIZE	8
 enum spi_nor_ops {
 	SPI_NOR_OPS_READ = 0,
@@ -154,9 +204,11 @@ enum spi_nor_option_flags {
  * @read_opcode:	the read opcode
  * @read_dummy:		the dummy needed by the read operation
  * @program_opcode:	the program opcode
- * @flash_read:		the mode of the read
  * @sst_write_second:	used by the SST write operation
  * @flags:		flag options for the current SPI-NOR (SNOR_F_*)
+ * @read_proto:		the SPI protocol for read operations
+ * @write_proto:	the SPI protocol for write operations
+ * @reg_proto		the SPI protocol for read_reg/write_reg/erase operations
  * @cmd_buf:		used by the write_reg
  * @prepare:		[OPTIONAL] do some preparations for the
  *			read/write/erase/lock/unlock operations
@@ -185,7 +237,9 @@ struct spi_nor {
 	u8			read_opcode;
 	u8			read_dummy;
 	u8			program_opcode;
-	enum read_mode		flash_read;
+	enum spi_nor_protocol	read_proto;
+	enum spi_nor_protocol	write_proto;
+	enum spi_nor_protocol	reg_proto;
 	bool			sst_write_second;
 	u32			flags;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
@@ -219,11 +273,57 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
 	return mtd_get_of_node(&nor->mtd);
 }
 
+/**
+ * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
+ * supported by the SPI controller (bus master).
+ * @mask:		the bitmask listing all the supported hw capabilies
+ */
+struct spi_nor_hwcaps {
+	u32	mask;
+};
+
+/*
+ *(Fast) Read capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * As a matter of performances, it is relevant to use Quad SPI protocols first,
+ * then Dual SPI protocols before Fast Read and lastly (Slow) Read.
+ */
+#define SNOR_HWCAPS_READ_MASK		GENMASK(7, 0)
+#define SNOR_HWCAPS_READ		BIT(0)
+#define SNOR_HWCAPS_READ_FAST		BIT(1)
+
+#define SNOR_HWCAPS_READ_DUAL		GENMASK(4, 2)
+#define SNOR_HWCAPS_READ_1_1_2		BIT(2)
+#define SNOR_HWCAPS_READ_1_2_2		BIT(3)
+#define SNOR_HWCAPS_READ_2_2_2		BIT(4)
+
+#define SNOR_HWCAPS_READ_QUAD		GENMASK(7, 5)
+#define SNOR_HWCAPS_READ_1_1_4		BIT(5)
+#define SNOR_HWCAPS_READ_1_4_4		BIT(6)
+#define SNOR_HWCAPS_READ_4_4_4		BIT(7)
+
+/*
+ * Page Program capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * Like (Fast) Read capabilities, Quad SPI protocols are preferred to the
+ * legacy SPI 1-1-1 protocol.
+ * Note that Dual Page Programs are not supported because there is no existing
+ * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
+ * implements such commands.
+ */
+#define SNOR_HWCAPS_PP_MASK	GENMASK(19, 16)
+#define SNOR_HWCAPS_PP		BIT(16)
+
+#define SNOR_HWCAPS_PP_QUAD	GENMASK(19, 17)
+#define SNOR_HWCAPS_PP_1_1_4	BIT(17)
+#define SNOR_HWCAPS_PP_1_4_4	BIT(18)
+#define SNOR_HWCAPS_PP_4_4_4	BIT(19)
+
 /**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
  * @name:	the chip type name
- * @mode:	the read mode supported by the driver
+ * @hwcaps:	the hardware capabilities supported by the controller driver
  *
  * The drivers can use this fuction to scan the SPI NOR.
  * In the scanning, it will try to get all the necessary information to
@@ -233,6 +333,7 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
  *
  * Return: 0 for success, others for failure.
  */
-int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode);
+int spi_nor_scan(struct spi_nor *nor, const char *name,
+		 const struct spi_nor_hwcaps *hwcaps);
 
 #endif
-- 
GitLab


From 138f5dc61b63c3e89dc133c4faed6b6d0a91b7fd Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Tue, 25 Apr 2017 22:08:47 +0200
Subject: [PATCH 0058/1958] mtd: m25p80: add support of SPI 1-2-2 and 1-4-4
 protocols

Before this patch, m25p80_read() supported few SPI protocols:
- regular SPI 1-1-1
- SPI Dual Output 1-1-2
- SPI Quad Output 1-1-4
On the other hand, m25p80_write() only supported SPI 1-1-1.

This patch updates both m25p80_read() and m25p80_write() functions to let
them support SPI 1-2-2 and SPI 1-4-4 protocols for Fast Read and Page
Program SPI commands.

It adopts a conservative approach to avoid regressions. Hence the new
implementations try to be as close as possible to the old implementations,
so the main differences are:
- the tx_nbits values now being set properly for the spi_transfer
  structures carrying the (op code + address/dummy) bytes
- and the spi_transfer structure being split into 2 spi_transfer
  structures when the numbers of I/O lines are different for op code and
  for address/dummy byte transfers on the SPI bus.

Besides, the current spi-nor framework supports neither the SPI 2-2-2 nor
the SPI 4-4-4 protocols. So, for now, we don't need to update the
m25p80_{read|write}_reg() functions as SPI 1-1-1 is the only one possible
protocol.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/devices/m25p80.c | 102 +++++++++++++++++++++++++++--------
 1 file changed, 79 insertions(+), 23 deletions(-)

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 07073f4ce0bd0..00eea6fd379cc 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -78,11 +78,17 @@ static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 {
 	struct m25p *flash = nor->priv;
 	struct spi_device *spi = flash->spi;
-	struct spi_transfer t[2] = {};
+	unsigned int inst_nbits, addr_nbits, data_nbits, data_idx;
+	struct spi_transfer t[3] = {};
 	struct spi_message m;
 	int cmd_sz = m25p_cmdsz(nor);
 	ssize_t ret;
 
+	/* get transfer protocols. */
+	inst_nbits = spi_nor_get_protocol_inst_nbits(nor->write_proto);
+	addr_nbits = spi_nor_get_protocol_addr_nbits(nor->write_proto);
+	data_nbits = spi_nor_get_protocol_data_nbits(nor->write_proto);
+
 	spi_message_init(&m);
 
 	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
@@ -92,12 +98,27 @@ static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 	m25p_addr2cmd(nor, to, flash->command);
 
 	t[0].tx_buf = flash->command;
+	t[0].tx_nbits = inst_nbits;
 	t[0].len = cmd_sz;
 	spi_message_add_tail(&t[0], &m);
 
-	t[1].tx_buf = buf;
-	t[1].len = len;
-	spi_message_add_tail(&t[1], &m);
+	/* split the op code and address bytes into two transfers if needed. */
+	data_idx = 1;
+	if (addr_nbits != inst_nbits) {
+		t[0].len = 1;
+
+		t[1].tx_buf = &flash->command[1];
+		t[1].tx_nbits = addr_nbits;
+		t[1].len = cmd_sz - 1;
+		spi_message_add_tail(&t[1], &m);
+
+		data_idx = 2;
+	}
+
+	t[data_idx].tx_buf = buf;
+	t[data_idx].tx_nbits = data_nbits;
+	t[data_idx].len = len;
+	spi_message_add_tail(&t[data_idx], &m);
 
 	ret = spi_sync(spi, &m);
 	if (ret)
@@ -109,11 +130,6 @@ static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 	return ret;
 }
 
-static inline unsigned int m25p80_rx_nbits(struct spi_nor *nor)
-{
-	return spi_nor_get_protocol_data_nbits(nor->read_proto);
-}
-
 /*
  * Read an address range from the nor chip.  The address range
  * may be any size provided it is within the physical boundaries.
@@ -123,13 +139,20 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 {
 	struct m25p *flash = nor->priv;
 	struct spi_device *spi = flash->spi;
-	struct spi_transfer t[2];
+	unsigned int inst_nbits, addr_nbits, data_nbits, data_idx;
+	struct spi_transfer t[3];
 	struct spi_message m;
 	unsigned int dummy = nor->read_dummy;
 	ssize_t ret;
+	int cmd_sz;
+
+	/* get transfer protocols. */
+	inst_nbits = spi_nor_get_protocol_inst_nbits(nor->read_proto);
+	addr_nbits = spi_nor_get_protocol_addr_nbits(nor->read_proto);
+	data_nbits = spi_nor_get_protocol_data_nbits(nor->read_proto);
 
 	/* convert the dummy cycles to the number of bytes */
-	dummy /= 8;
+	dummy = (dummy * addr_nbits) / 8;
 
 	if (spi_flash_read_supported(spi)) {
 		struct spi_flash_read_message msg;
@@ -142,10 +165,9 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 		msg.read_opcode = nor->read_opcode;
 		msg.addr_width = nor->addr_width;
 		msg.dummy_bytes = dummy;
-		/* TODO: Support other combinations */
-		msg.opcode_nbits = SPI_NBITS_SINGLE;
-		msg.addr_nbits = SPI_NBITS_SINGLE;
-		msg.data_nbits = m25p80_rx_nbits(nor);
+		msg.opcode_nbits = inst_nbits;
+		msg.addr_nbits = addr_nbits;
+		msg.data_nbits = data_nbits;
 
 		ret = spi_flash_read(spi, &msg);
 		if (ret < 0)
@@ -160,20 +182,45 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 	m25p_addr2cmd(nor, from, flash->command);
 
 	t[0].tx_buf = flash->command;
+	t[0].tx_nbits = inst_nbits;
 	t[0].len = m25p_cmdsz(nor) + dummy;
 	spi_message_add_tail(&t[0], &m);
 
-	t[1].rx_buf = buf;
-	t[1].rx_nbits = m25p80_rx_nbits(nor);
-	t[1].len = min3(len, spi_max_transfer_size(spi),
-			spi_max_message_size(spi) - t[0].len);
-	spi_message_add_tail(&t[1], &m);
+	/*
+	 * Set all dummy/mode cycle bits to avoid sending some manufacturer
+	 * specific pattern, which might make the memory enter its Continuous
+	 * Read mode by mistake.
+	 * Based on the different mode cycle bit patterns listed and described
+	 * in the JESD216B specification, the 0xff value works for all memories
+	 * and all manufacturers.
+	 */
+	cmd_sz = t[0].len;
+	memset(flash->command + cmd_sz - dummy, 0xff, dummy);
+
+	/* split the op code and address bytes into two transfers if needed. */
+	data_idx = 1;
+	if (addr_nbits != inst_nbits) {
+		t[0].len = 1;
+
+		t[1].tx_buf = &flash->command[1];
+		t[1].tx_nbits = addr_nbits;
+		t[1].len = cmd_sz - 1;
+		spi_message_add_tail(&t[1], &m);
+
+		data_idx = 2;
+	}
+
+	t[data_idx].rx_buf = buf;
+	t[data_idx].rx_nbits = data_nbits;
+	t[data_idx].len = min3(len, spi_max_transfer_size(spi),
+			       spi_max_message_size(spi) - cmd_sz);
+	spi_message_add_tail(&t[data_idx], &m);
 
 	ret = spi_sync(spi, &m);
 	if (ret)
 		return ret;
 
-	ret = m.actual_length - m25p_cmdsz(nor) - dummy;
+	ret = m.actual_length - cmd_sz;
 	if (ret < 0)
 		return -EIO;
 	return ret;
@@ -218,11 +265,20 @@ static int m25p_probe(struct spi_device *spi)
 	spi_set_drvdata(spi, flash);
 	flash->spi = spi;
 
-	if (spi->mode & SPI_RX_QUAD)
+	if (spi->mode & SPI_RX_QUAD) {
 		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
-	else if (spi->mode & SPI_RX_DUAL)
+
+		if (spi->mode & SPI_TX_QUAD)
+			hwcaps.mask |= (SNOR_HWCAPS_READ_1_4_4 |
+					SNOR_HWCAPS_PP_1_1_4 |
+					SNOR_HWCAPS_PP_1_4_4);
+	} else if (spi->mode & SPI_RX_DUAL) {
 		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
 
+		if (spi->mode & SPI_TX_DUAL)
+			hwcaps.mask |= SNOR_HWCAPS_READ_1_2_2;
+	}
+
 	if (data && data->name)
 		nor->mtd.name = data->name;
 
-- 
GitLab


From 15f55331527b1422eae683477f8a31fdfae93316 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Tue, 25 Apr 2017 22:08:48 +0200
Subject: [PATCH 0059/1958] mtd: spi-nor: introduce Double Transfer Rate (DTR)
 SPI protocols

This patch introduces support to Double Transfer Rate (DTR) SPI protocols.
DTR is used only for Fast Read operations.

According to manufacturer datasheets, whatever the number of I/O lines
used during instruction (x) and address/mode/dummy (y) clock cycles, DTR
is used only during data (z) clock cycles of SPI x-y-z protocols.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 10 ++++++++
 include/linux/mtd/spi-nor.h   | 48 +++++++++++++++++++++++++++--------
 2 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index e653806070a15..2062a3abba72c 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -203,6 +203,10 @@ static inline u8 spi_nor_convert_3to4_read(u8 opcode)
 		{ SPINOR_OP_READ_1_2_2,	SPINOR_OP_READ_1_2_2_4B },
 		{ SPINOR_OP_READ_1_1_4,	SPINOR_OP_READ_1_1_4_4B },
 		{ SPINOR_OP_READ_1_4_4,	SPINOR_OP_READ_1_4_4_4B },
+
+		{ SPINOR_OP_READ_1_1_1_DTR,	SPINOR_OP_READ_1_1_1_DTR_4B },
+		{ SPINOR_OP_READ_1_2_2_DTR,	SPINOR_OP_READ_1_2_2_DTR_4B },
+		{ SPINOR_OP_READ_1_4_4_DTR,	SPINOR_OP_READ_1_4_4_DTR_4B },
 	};
 
 	return spi_nor_convert_opcode(opcode, spi_nor_3to4_read,
@@ -1509,16 +1513,19 @@ struct spi_nor_pp_command {
 enum spi_nor_read_command_index {
 	SNOR_CMD_READ,
 	SNOR_CMD_READ_FAST,
+	SNOR_CMD_READ_1_1_1_DTR,
 
 	/* Dual SPI */
 	SNOR_CMD_READ_1_1_2,
 	SNOR_CMD_READ_1_2_2,
 	SNOR_CMD_READ_2_2_2,
+	SNOR_CMD_READ_1_2_2_DTR,
 
 	/* Quad SPI */
 	SNOR_CMD_READ_1_1_4,
 	SNOR_CMD_READ_1_4_4,
 	SNOR_CMD_READ_4_4_4,
+	SNOR_CMD_READ_1_4_4_DTR,
 
 	SNOR_CMD_READ_MAX
 };
@@ -1646,12 +1653,15 @@ static int spi_nor_hwcaps_read2cmd(u32 hwcaps)
 	static const int hwcaps_read2cmd[][2] = {
 		{ SNOR_HWCAPS_READ,		SNOR_CMD_READ },
 		{ SNOR_HWCAPS_READ_FAST,	SNOR_CMD_READ_FAST },
+		{ SNOR_HWCAPS_READ_1_1_1_DTR,	SNOR_CMD_READ_1_1_1_DTR },
 		{ SNOR_HWCAPS_READ_1_1_2,	SNOR_CMD_READ_1_1_2 },
 		{ SNOR_HWCAPS_READ_1_2_2,	SNOR_CMD_READ_1_2_2 },
 		{ SNOR_HWCAPS_READ_2_2_2,	SNOR_CMD_READ_2_2_2 },
+		{ SNOR_HWCAPS_READ_1_2_2_DTR,	SNOR_CMD_READ_1_2_2_DTR },
 		{ SNOR_HWCAPS_READ_1_1_4,	SNOR_CMD_READ_1_1_4 },
 		{ SNOR_HWCAPS_READ_1_4_4,	SNOR_CMD_READ_1_4_4 },
 		{ SNOR_HWCAPS_READ_4_4_4,	SNOR_CMD_READ_4_4_4 },
+		{ SNOR_HWCAPS_READ_1_4_4_DTR,	SNOR_CMD_READ_1_4_4_DTR },
 	};
 
 	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_read2cmd,
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 60db1585f94c8..313dbe56f31a0 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -73,6 +73,15 @@
 #define SPINOR_OP_BE_32K_4B	0x5c	/* Erase 32KiB block */
 #define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
+/* Double Transfer Rate opcodes - defined in JEDEC JESD216B. */
+#define SPINOR_OP_READ_1_1_1_DTR	0x0d
+#define SPINOR_OP_READ_1_2_2_DTR	0xbd
+#define SPINOR_OP_READ_1_4_4_DTR	0xed
+
+#define SPINOR_OP_READ_1_1_1_DTR_4B	0x0e
+#define SPINOR_OP_READ_1_2_2_DTR_4B	0xbe
+#define SPINOR_OP_READ_1_4_4_DTR_4B	0xee
+
 /* Used for SST flashes only. */
 #define SPINOR_OP_BP		0x02	/* Byte program */
 #define SPINOR_OP_WRDI		0x04	/* Write disable */
@@ -138,10 +147,15 @@
 	((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \
 	 SNOR_PROTO_DATA_MASK)
 
+#define SNOR_PROTO_IS_DTR	BIT(24)	/* Double Transfer Rate */
+
 #define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits)	\
 	(SNOR_PROTO_INST(_inst_nbits) |				\
 	 SNOR_PROTO_ADDR(_addr_nbits) |				\
 	 SNOR_PROTO_DATA(_data_nbits))
+#define SNOR_PROTO_DTR(_inst_nbits, _addr_nbits, _data_nbits)	\
+	(SNOR_PROTO_IS_DTR |					\
+	 SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits))
 
 enum spi_nor_protocol {
 	SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1),
@@ -151,8 +165,17 @@ enum spi_nor_protocol {
 	SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4),
 	SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2),
 	SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4),
+
+	SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1),
+	SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2),
+	SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4),
 };
 
+static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto)
+{
+	return !!(proto & SNOR_PROTO_IS_DTR);
+}
+
 static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto)
 {
 	return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >>
@@ -288,19 +311,22 @@ struct spi_nor_hwcaps {
  * As a matter of performances, it is relevant to use Quad SPI protocols first,
  * then Dual SPI protocols before Fast Read and lastly (Slow) Read.
  */
-#define SNOR_HWCAPS_READ_MASK		GENMASK(7, 0)
+#define SNOR_HWCAPS_READ_MASK		GENMASK(10, 0)
 #define SNOR_HWCAPS_READ		BIT(0)
 #define SNOR_HWCAPS_READ_FAST		BIT(1)
-
-#define SNOR_HWCAPS_READ_DUAL		GENMASK(4, 2)
-#define SNOR_HWCAPS_READ_1_1_2		BIT(2)
-#define SNOR_HWCAPS_READ_1_2_2		BIT(3)
-#define SNOR_HWCAPS_READ_2_2_2		BIT(4)
-
-#define SNOR_HWCAPS_READ_QUAD		GENMASK(7, 5)
-#define SNOR_HWCAPS_READ_1_1_4		BIT(5)
-#define SNOR_HWCAPS_READ_1_4_4		BIT(6)
-#define SNOR_HWCAPS_READ_4_4_4		BIT(7)
+#define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
+
+#define SNOR_HWCAPS_READ_DUAL		GENMASK(6, 3)
+#define SNOR_HWCAPS_READ_1_1_2		BIT(3)
+#define SNOR_HWCAPS_READ_1_2_2		BIT(4)
+#define SNOR_HWCAPS_READ_2_2_2		BIT(5)
+#define SNOR_HWCAPS_READ_1_2_2_DTR	BIT(6)
+
+#define SNOR_HWCAPS_READ_QUAD		GENMASK(10, 7)
+#define SNOR_HWCAPS_READ_1_1_4		BIT(7)
+#define SNOR_HWCAPS_READ_1_4_4		BIT(8)
+#define SNOR_HWCAPS_READ_4_4_4		BIT(9)
+#define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
 
 /*
  * Page Program capabilities.
-- 
GitLab


From fe488a5e48c69204c3b1ad6fa3282e12dbfaabe7 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Tue, 25 Apr 2017 22:08:49 +0200
Subject: [PATCH 0060/1958] mtd: spi-nor: introduce Octo SPI protocols

This patch starts adding support to Octo SPI protocols (SPI x-y-8).

Op codes for Fast Read and/or Page Program operations using Octo SPI
protocols are not known yet (no JEDEC specification has defined them yet)
but we'd rather introduce the Octo SPI protocols now so it's done as it
should be.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 22 +++++++++++++++++++++-
 include/linux/mtd/spi-nor.h   | 26 +++++++++++++++++++++-----
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 2062a3abba72c..060a59e716be3 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1527,6 +1527,12 @@ enum spi_nor_read_command_index {
 	SNOR_CMD_READ_4_4_4,
 	SNOR_CMD_READ_1_4_4_DTR,
 
+	/* Octo SPI */
+	SNOR_CMD_READ_1_1_8,
+	SNOR_CMD_READ_1_8_8,
+	SNOR_CMD_READ_8_8_8,
+	SNOR_CMD_READ_1_8_8_DTR,
+
 	SNOR_CMD_READ_MAX
 };
 
@@ -1538,6 +1544,11 @@ enum spi_nor_pp_command_index {
 	SNOR_CMD_PP_1_4_4,
 	SNOR_CMD_PP_4_4_4,
 
+	/* Octo SPI */
+	SNOR_CMD_PP_1_1_8,
+	SNOR_CMD_PP_1_8_8,
+	SNOR_CMD_PP_8_8_8,
+
 	SNOR_CMD_PP_MAX
 };
 
@@ -1662,6 +1673,10 @@ static int spi_nor_hwcaps_read2cmd(u32 hwcaps)
 		{ SNOR_HWCAPS_READ_1_4_4,	SNOR_CMD_READ_1_4_4 },
 		{ SNOR_HWCAPS_READ_4_4_4,	SNOR_CMD_READ_4_4_4 },
 		{ SNOR_HWCAPS_READ_1_4_4_DTR,	SNOR_CMD_READ_1_4_4_DTR },
+		{ SNOR_HWCAPS_READ_1_1_8,	SNOR_CMD_READ_1_1_8 },
+		{ SNOR_HWCAPS_READ_1_8_8,	SNOR_CMD_READ_1_8_8 },
+		{ SNOR_HWCAPS_READ_8_8_8,	SNOR_CMD_READ_8_8_8 },
+		{ SNOR_HWCAPS_READ_1_8_8_DTR,	SNOR_CMD_READ_1_8_8_DTR },
 	};
 
 	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_read2cmd,
@@ -1675,6 +1690,9 @@ static int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
 		{ SNOR_HWCAPS_PP_1_1_4,		SNOR_CMD_PP_1_1_4 },
 		{ SNOR_HWCAPS_PP_1_4_4,		SNOR_CMD_PP_1_4_4 },
 		{ SNOR_HWCAPS_PP_4_4_4,		SNOR_CMD_PP_4_4_4 },
+		{ SNOR_HWCAPS_PP_1_1_8,		SNOR_CMD_PP_1_1_8 },
+		{ SNOR_HWCAPS_PP_1_8_8,		SNOR_CMD_PP_1_8_8 },
+		{ SNOR_HWCAPS_PP_8_8_8,		SNOR_CMD_PP_8_8_8 },
 	};
 
 	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_pp2cmd,
@@ -1772,7 +1790,9 @@ static int spi_nor_setup(struct spi_nor *nor, const struct flash_info *info,
 	/* SPI n-n-n protocols are not supported yet. */
 	ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
 			SNOR_HWCAPS_READ_4_4_4 |
-			SNOR_HWCAPS_PP_4_4_4);
+			SNOR_HWCAPS_READ_8_8_8 |
+			SNOR_HWCAPS_PP_4_4_4 |
+			SNOR_HWCAPS_PP_8_8_8);
 	if (shared_mask & ignored_mask) {
 		dev_dbg(nor->dev,
 			"SPI n-n-n protocols are not supported yet.\n");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 313dbe56f31a0..55faa2f07ccaf 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -161,14 +161,18 @@ enum spi_nor_protocol {
 	SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1),
 	SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2),
 	SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4),
+	SNOR_PROTO_1_1_8 = SNOR_PROTO_STR(1, 1, 8),
 	SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2),
 	SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4),
+	SNOR_PROTO_1_8_8 = SNOR_PROTO_STR(1, 8, 8),
 	SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2),
 	SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4),
+	SNOR_PROTO_8_8_8 = SNOR_PROTO_STR(8, 8, 8),
 
 	SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1),
 	SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2),
 	SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4),
+	SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8),
 };
 
 static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto)
@@ -308,10 +312,11 @@ struct spi_nor_hwcaps {
 /*
  *(Fast) Read capabilities.
  * MUST be ordered by priority: the higher bit position, the higher priority.
- * As a matter of performances, it is relevant to use Quad SPI protocols first,
- * then Dual SPI protocols before Fast Read and lastly (Slow) Read.
+ * As a matter of performances, it is relevant to use Octo SPI protocols first,
+ * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
+ * (Slow) Read.
  */
-#define SNOR_HWCAPS_READ_MASK		GENMASK(10, 0)
+#define SNOR_HWCAPS_READ_MASK		GENMASK(14, 0)
 #define SNOR_HWCAPS_READ		BIT(0)
 #define SNOR_HWCAPS_READ_FAST		BIT(1)
 #define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
@@ -328,16 +333,22 @@ struct spi_nor_hwcaps {
 #define SNOR_HWCAPS_READ_4_4_4		BIT(9)
 #define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
 
+#define SNOR_HWCPAS_READ_OCTO		GENMASK(14, 11)
+#define SNOR_HWCAPS_READ_1_1_8		BIT(11)
+#define SNOR_HWCAPS_READ_1_8_8		BIT(12)
+#define SNOR_HWCAPS_READ_8_8_8		BIT(13)
+#define SNOR_HWCAPS_READ_1_8_8_DTR	BIT(14)
+
 /*
  * Page Program capabilities.
  * MUST be ordered by priority: the higher bit position, the higher priority.
- * Like (Fast) Read capabilities, Quad SPI protocols are preferred to the
+ * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the
  * legacy SPI 1-1-1 protocol.
  * Note that Dual Page Programs are not supported because there is no existing
  * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
  * implements such commands.
  */
-#define SNOR_HWCAPS_PP_MASK	GENMASK(19, 16)
+#define SNOR_HWCAPS_PP_MASK	GENMASK(22, 16)
 #define SNOR_HWCAPS_PP		BIT(16)
 
 #define SNOR_HWCAPS_PP_QUAD	GENMASK(19, 17)
@@ -345,6 +356,11 @@ struct spi_nor_hwcaps {
 #define SNOR_HWCAPS_PP_1_4_4	BIT(18)
 #define SNOR_HWCAPS_PP_4_4_4	BIT(19)
 
+#define SNOR_HWCAPS_PP_OCTO	GENMASK(22, 20)
+#define SNOR_HWCAPS_PP_1_1_8	BIT(20)
+#define SNOR_HWCAPS_PP_1_8_8	BIT(21)
+#define SNOR_HWCAPS_PP_8_8_8	BIT(22)
+
 /**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
-- 
GitLab


From ecca81f8cb54ce74f3d5d4ba77732a216173f7b1 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Date: Thu, 4 May 2017 21:22:07 +0200
Subject: [PATCH 0061/1958] mtd: spi-nor: stm32-quadspi: fix compiler errors
 with COMPILE_TEST

This patch fixes some compiler errors:
- change format strings to use %zx for size_t
- add missing #include <linux/sizes.h>

Cc: Ludovic Barre <ludovic.barre@st.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/stm32-quadspi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/stm32-quadspi.c b/drivers/mtd/spi-nor/stm32-quadspi.c
index 1056e7408d2a1..86c0931543c53 100644
--- a/drivers/mtd/spi-nor/stm32-quadspi.c
+++ b/drivers/mtd/spi-nor/stm32-quadspi.c
@@ -19,6 +19,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
+#include <linux/sizes.h>
 
 #define QUADSPI_CR		0x00
 #define CR_EN			BIT(0)
@@ -375,7 +376,7 @@ static ssize_t stm32_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
 	struct stm32_qspi_cmd cmd;
 	int err;
 
-	dev_dbg(qspi->dev, "read(%#.2x): buf:%p from:%#.8x len:%#x\n",
+	dev_dbg(qspi->dev, "read(%#.2x): buf:%p from:%#.8x len:%#zx\n",
 		nor->read_opcode, buf, (u32)from, len);
 
 	memset(&cmd, 0, sizeof(cmd));
@@ -402,7 +403,7 @@ static ssize_t stm32_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
 	struct stm32_qspi_cmd cmd;
 	int err;
 
-	dev_dbg(dev, "write(%#.2x): buf:%p to:%#.8x len:%#x\n",
+	dev_dbg(dev, "write(%#.2x): buf:%p to:%#.8x len:%#zx\n",
 		nor->program_opcode, buf, (u32)to, len);
 
 	memset(&cmd, 0, sizeof(cmd));
-- 
GitLab


From ddd0503e4d780607865950e14f215bf8660ed683 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Mon, 1 May 2017 18:13:00 -0700
Subject: [PATCH 0062/1958] mtd: spi-nor: stm32-quadspi: allow building with
 COMPILE_TEST

Cc: Ludovic Barre <ludovic.barre@st.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
index bfdfb1e72b38a..293c8a4d1e496 100644
--- a/drivers/mtd/spi-nor/Kconfig
+++ b/drivers/mtd/spi-nor/Kconfig
@@ -108,7 +108,7 @@ config SPI_INTEL_SPI_PLATFORM
 
 config SPI_STM32_QUADSPI
 	tristate "STM32 Quad SPI controller"
-	depends on ARCH_STM32
+	depends on ARCH_STM32 || COMPILE_TEST
 	help
 	  This enables support for the STM32 Quad SPI controller.
 	  We only connect the NOR to this controller.
-- 
GitLab


From 05d090f00203bdcf722b9cb6d5ac57c165e2c95c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 5 May 2017 08:33:56 +0300
Subject: [PATCH 0063/1958] mtd: spi-nor: Potential oops on error path in
 quad_enable()

Before commit cff959958832 ("mtd: spi-nor: introduce SPI 1-2-2 and SPI
1-4-4 protocols") then we treated 1 as -EINVAL in the caller but after
that commit we changed to propagate the return.  My static checker
complains that it's eventually passed to an ERR_PTR() and later
dereferenced, but I'm not totally certain if that's true.  Regardless,
returning 1 is wrong.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/spi-nor.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 060a59e716be3..eef55b597ec73 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1389,8 +1389,9 @@ static int macronix_quad_enable(struct spi_nor *nor)
 
 	write_sr(nor, val | SR_QUAD_EN_MX);
 
-	if (spi_nor_wait_till_ready(nor))
-		return 1;
+	ret = spi_nor_wait_till_ready(nor);
+	if (ret)
+		return ret;
 
 	ret = read_sr(nor);
 	if (!(ret > 0 && (ret & SR_QUAD_EN_MX))) {
-- 
GitLab


From 15d3042a937c13f5d9244241c7a9c8416ff6e82a Mon Sep 17 00:00:00 2001
From: Jin Qian <jinqian@google.com>
Date: Mon, 15 May 2017 10:45:08 -0700
Subject: [PATCH 0064/1958] f2fs: sanity check checkpoint segno and blkoff

Make sure segno and blkoff read from raw image are valid.

Cc: stable@vger.kernel.org
Signed-off-by: Jin Qian <jinqian@google.com>
[Jaegeuk Kim: adjust minor coding style]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 83355ec4a92cd..397b1e816b369 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1521,6 +1521,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	unsigned int ovp_segments, reserved_segments;
+	unsigned int main_segs, blocks_per_seg;
+	int i;
 
 	total = le32_to_cpu(raw_super->segment_count);
 	fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -1542,6 +1544,20 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 		return 1;
 	}
 
+	main_segs = le32_to_cpu(raw_super->segment_count_main);
+	blocks_per_seg = sbi->blocks_per_seg;
+
+	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+		if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
+			le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
+			return 1;
+	}
+	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
+		if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
+			le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
+			return 1;
+	}
+
 	if (unlikely(f2fs_cp_error(sbi))) {
 		f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
 		return 1;
-- 
GitLab


From 4f1fcfe94c1700f9e891adc1ca364a5cef00bbfd Mon Sep 17 00:00:00 2001
From: Simon Xue <xxm@rock-chips.com>
Date: Wed, 3 May 2017 17:19:40 +0200
Subject: [PATCH 0065/1958] iommu/rockchip: Enable Rockchip IOMMU on ARM64

This patch makes it possible to compile the rockchip-iommu driver on
ARM64, so that it can be used with 64-bit SoCs equipped with this type
of IOMMU.

Signed-off-by: Simon Xue <xxm@rock-chips.com>
Signed-off-by: Shunqian Zheng <zhengsq@rock-chips.com>
Signed-off-by: Tomasz Figa <tfiga@chromium.org>
Reviewed-by: Matthias Brugger <mbrugger@suse.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6ee3a25ae7318..99c6366a25519 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -219,7 +219,7 @@ config OMAP_IOMMU_DEBUG
 
 config ROCKCHIP_IOMMU
 	bool "Rockchip IOMMU Support"
-	depends on ARM
+	depends on ARM || ARM64
 	depends on ARCH_ROCKCHIP || COMPILE_TEST
 	select IOMMU_API
 	select ARM_DMA_USE_IOMMU
-- 
GitLab


From 15060aba717115dc9f204c02213a7c6bf341163e Mon Sep 17 00:00:00 2001
From: CQ Tang <cq.tang@intel.com>
Date: Wed, 10 May 2017 11:39:03 -0700
Subject: [PATCH 0066/1958] iommu/vt-d: Helper function to query if a pasid has
 any active users

A driver would need to know if there are any active references to a
a PASID before cleaning up its resources. This function helps check
if there are any active users of a PASID before it can perform any
recovery on that device.

To: Joerg Roedel <joro@8bytes.org>
To: linux-kernel@vger.kernel.org
To: David Woodhouse <dwmw2@infradead.org>
Cc: Jean-Phillipe Brucker <jean-philippe.brucker@arm.com>
Cc: iommu@lists.linux-foundation.org

Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c | 30 ++++++++++++++++++++++++++++++
 include/linux/intel-svm.h | 20 ++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 23c427602c55b..f167c0d84ebfb 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -489,6 +489,36 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
 
+int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+	struct intel_iommu *iommu;
+	struct intel_svm *svm;
+	int ret = -EINVAL;
+
+	mutex_lock(&pasid_mutex);
+	iommu = intel_svm_device_to_iommu(dev);
+	if (!iommu || !iommu->pasid_table)
+		goto out;
+
+	svm = idr_find(&iommu->pasid_idr, pasid);
+	if (!svm)
+		goto out;
+
+	/* init_mm is used in this case */
+	if (!svm->mm)
+		ret = 1;
+	else if (atomic_read(&svm->mm->mm_users) > 0)
+		ret = 1;
+	else
+		ret = 0;
+
+ out:
+	mutex_unlock(&pasid_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
+
 /* Page request queue descriptor */
 struct page_req_dsc {
 	u64 srr:1;
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 3c25794042f93..99bc5b3ae26e1 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
  */
 extern int intel_svm_unbind_mm(struct device *dev, int pasid);
 
+/**
+ * intel_svm_is_pasid_valid() - check if pasid is valid
+ * @dev:	Device for which PASID was allocated
+ * @pasid:	PASID value to be checked
+ *
+ * This function checks if the specified pasid is still valid. A
+ * valid pasid means the backing mm is still having a valid user.
+ * For kernel callers init_mm is always valid. for other mm, if mm->mm_users
+ * is non-zero, it is valid.
+ *
+ * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid
+ * 1 if pasid is valid.
+ */
+extern int intel_svm_is_pasid_valid(struct device *dev, int pasid);
+
 #else /* CONFIG_INTEL_IOMMU_SVM */
 
 static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
@@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
 {
 	BUG();
 }
+
+static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_INTEL_IOMMU_SVM */
 
 #define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
-- 
GitLab


From 6aa9a30838707f19cd5de4b8710987fbc747cdaf Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 17 May 2017 19:06:27 +0900
Subject: [PATCH 0067/1958] iommu/ipmmu-vmsa: Remove platform data handling

The IPMMU driver is using DT these days, and platform data is no longer
used by the driver. Remove unused code.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index b7e14ee863f92..d10a8d5eee4f0 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -768,11 +768,6 @@ static int ipmmu_probe(struct platform_device *pdev)
 	int irq;
 	int ret;
 
-	if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) {
-		dev_err(&pdev->dev, "missing platform data\n");
-		return -EINVAL;
-	}
-
 	mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
 	if (!mmu) {
 		dev_err(&pdev->dev, "cannot allocate device data\n");
-- 
GitLab


From dbb7069223bed0c40511ee28e5be519405e47906 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 17 May 2017 19:06:38 +0900
Subject: [PATCH 0068/1958] iommu/ipmmu-vmsa: Rework interrupt code and use
 bitmap for context

Introduce a bitmap for context handing and convert the
interrupt routine to handle all registered contexts.

At this point the number of contexts are still limited.

Also remove the use of the ARM specific mapping variable
from ipmmu_irq() to allow compile on ARM64.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 76 +++++++++++++++++++++++++++++++++-----
 1 file changed, 66 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index d10a8d5eee4f0..787b675b0e9f8 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -8,6 +8,7 @@
  * the Free Software Foundation; version 2 of the License.
  */
 
+#include <linux/bitmap.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
@@ -26,12 +27,17 @@
 
 #include "io-pgtable.h"
 
+#define IPMMU_CTX_MAX 1
+
 struct ipmmu_vmsa_device {
 	struct device *dev;
 	void __iomem *base;
 	struct list_head list;
 
 	unsigned int num_utlbs;
+	spinlock_t lock;			/* Protects ctx and domains[] */
+	DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
+	struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
 
 	struct dma_iommu_mapping *mapping;
 };
@@ -293,9 +299,29 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
  * Domain/Context Management
  */
 
+static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu,
+					 struct ipmmu_vmsa_domain *domain)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&mmu->lock, flags);
+
+	ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX);
+	if (ret != IPMMU_CTX_MAX) {
+		mmu->domains[ret] = domain;
+		set_bit(ret, mmu->ctx);
+	}
+
+	spin_unlock_irqrestore(&mmu->lock, flags);
+
+	return ret;
+}
+
 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 {
 	u64 ttbr;
+	int ret;
 
 	/*
 	 * Allocate the page table operations.
@@ -327,10 +353,15 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 		return -EINVAL;
 
 	/*
-	 * TODO: When adding support for multiple contexts, find an unused
-	 * context.
+	 * Find an unused context.
 	 */
-	domain->context_id = 0;
+	ret = ipmmu_domain_allocate_context(domain->mmu, domain);
+	if (ret == IPMMU_CTX_MAX) {
+		free_io_pgtable_ops(domain->iop);
+		return -EBUSY;
+	}
+
+	domain->context_id = ret;
 
 	/* TTBR0 */
 	ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
@@ -372,6 +403,19 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	return 0;
 }
 
+static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu,
+				      unsigned int context_id)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmu->lock, flags);
+
+	clear_bit(context_id, mmu->ctx);
+	mmu->domains[context_id] = NULL;
+
+	spin_unlock_irqrestore(&mmu->lock, flags);
+}
+
 static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 {
 	/*
@@ -382,6 +426,7 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 	 */
 	ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH);
 	ipmmu_tlb_sync(domain);
+	ipmmu_domain_free_context(domain->mmu, domain->context_id);
 }
 
 /* -----------------------------------------------------------------------------
@@ -439,16 +484,25 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
 static irqreturn_t ipmmu_irq(int irq, void *dev)
 {
 	struct ipmmu_vmsa_device *mmu = dev;
-	struct iommu_domain *io_domain;
-	struct ipmmu_vmsa_domain *domain;
+	irqreturn_t status = IRQ_NONE;
+	unsigned int i;
+	unsigned long flags;
 
-	if (!mmu->mapping)
-		return IRQ_NONE;
+	spin_lock_irqsave(&mmu->lock, flags);
+
+	/*
+	 * Check interrupts for all active contexts.
+	 */
+	for (i = 0; i < IPMMU_CTX_MAX; i++) {
+		if (!mmu->domains[i])
+			continue;
+		if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
+			status = IRQ_HANDLED;
+	}
 
-	io_domain = mmu->mapping->domain;
-	domain = to_vmsa_domain(io_domain);
+	spin_unlock_irqrestore(&mmu->lock, flags);
 
-	return ipmmu_domain_irq(domain);
+	return status;
 }
 
 /* -----------------------------------------------------------------------------
@@ -776,6 +830,8 @@ static int ipmmu_probe(struct platform_device *pdev)
 
 	mmu->dev = &pdev->dev;
 	mmu->num_utlbs = 32;
+	spin_lock_init(&mmu->lock);
+	bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
 
 	/* Map I/O memory and request IRQ. */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-- 
GitLab


From 383fef5f4b56d8dc05cac12dc04262c8175331b1 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 17 May 2017 19:06:48 +0900
Subject: [PATCH 0069/1958] iommu/ipmmu-vmsa: Break out utlb parsing code

Break out the utlb parsing code and dev_data allocation into a
separate function. This is preparation for future code sharing.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 64 ++++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 787b675b0e9f8..c4c35abc1c1a2 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -649,22 +649,15 @@ static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev,
 	return 0;
 }
 
-static int ipmmu_add_device(struct device *dev)
+static int ipmmu_init_platform_device(struct device *dev)
 {
 	struct ipmmu_vmsa_archdata *archdata;
 	struct ipmmu_vmsa_device *mmu;
-	struct iommu_group *group = NULL;
 	unsigned int *utlbs;
 	unsigned int i;
 	int num_utlbs;
 	int ret = -ENODEV;
 
-	if (dev->archdata.iommu) {
-		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
-			 dev_name(dev));
-		return -EINVAL;
-	}
-
 	/* Find the master corresponding to the device. */
 
 	num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus",
@@ -701,6 +694,36 @@ static int ipmmu_add_device(struct device *dev)
 		}
 	}
 
+	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
+	if (!archdata) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	archdata->mmu = mmu;
+	archdata->utlbs = utlbs;
+	archdata->num_utlbs = num_utlbs;
+	dev->archdata.iommu = archdata;
+	return 0;
+
+error:
+	kfree(utlbs);
+	return ret;
+}
+
+static int ipmmu_add_device(struct device *dev)
+{
+	struct ipmmu_vmsa_archdata *archdata;
+	struct ipmmu_vmsa_device *mmu = NULL;
+	struct iommu_group *group;
+	int ret;
+
+	if (dev->archdata.iommu) {
+		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
+			 dev_name(dev));
+		return -EINVAL;
+	}
+
 	/* Create a device group and add the device to it. */
 	group = iommu_group_alloc();
 	if (IS_ERR(group)) {
@@ -718,16 +741,9 @@ static int ipmmu_add_device(struct device *dev)
 		goto error;
 	}
 
-	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
-	if (!archdata) {
-		ret = -ENOMEM;
+	ret = ipmmu_init_platform_device(dev);
+	if (ret < 0)
 		goto error;
-	}
-
-	archdata->mmu = mmu;
-	archdata->utlbs = utlbs;
-	archdata->num_utlbs = num_utlbs;
-	dev->archdata.iommu = archdata;
 
 	/*
 	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate
@@ -738,6 +754,8 @@ static int ipmmu_add_device(struct device *dev)
 	 * - Make the mapping size configurable ? We currently use a 2GB mapping
 	 *   at a 1GB offset to ensure that NULL VAs will fault.
 	 */
+	archdata = dev->archdata.iommu;
+	mmu = archdata->mmu;
 	if (!mmu->mapping) {
 		struct dma_iommu_mapping *mapping;
 
@@ -762,16 +780,16 @@ static int ipmmu_add_device(struct device *dev)
 	return 0;
 
 error:
-	arm_iommu_release_mapping(mmu->mapping);
-
-	kfree(dev->archdata.iommu);
-	kfree(utlbs);
-
-	dev->archdata.iommu = NULL;
+	if (mmu)
+		arm_iommu_release_mapping(mmu->mapping);
 
 	if (!IS_ERR_OR_NULL(group))
 		iommu_group_remove_device(dev);
 
+	kfree(archdata->utlbs);
+	kfree(archdata);
+	dev->archdata.iommu = NULL;
+
 	return ret;
 }
 
-- 
GitLab


From 8e73bf659135ee7333d9a762bbdadb2ad794452f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 17 May 2017 19:06:59 +0900
Subject: [PATCH 0070/1958] iommu/ipmmu-vmsa: Break out domain allocation code

Break out the domain allocation code into a separate function.

This is preparation for future code sharing.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index c4c35abc1c1a2..eb5008596051c 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -509,13 +509,10 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
  * IOMMU Operations
  */
 
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
 {
 	struct ipmmu_vmsa_domain *domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
@@ -525,6 +522,14 @@ static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
 	return &domain->io_domain;
 }
 
+static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+{
+	if (type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
+	return __ipmmu_domain_alloc(type);
+}
+
 static void ipmmu_domain_free(struct iommu_domain *io_domain)
 {
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
-- 
GitLab


From 3ae47292024f85e82b769044c43f0bd13adcd7e8 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 17 May 2017 19:07:10 +0900
Subject: [PATCH 0071/1958] iommu/ipmmu-vmsa: Add new IOMMU_DOMAIN_DMA ops

Introduce an alternative set of iommu_ops suitable for 64-bit ARM
as well as 32-bit ARM when CONFIG_IOMMU_DMA=y. Also adjust the
Kconfig to depend on ARM or IOMMU_DMA. Initialize the device
from ->xlate() when CONFIG_IOMMU_DMA=y.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig      |   1 +
 drivers/iommu/ipmmu-vmsa.c | 164 +++++++++++++++++++++++++++++++++++--
 2 files changed, 156 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6ee3a25ae7318..504ba025a54c7 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -274,6 +274,7 @@ config EXYNOS_IOMMU_DEBUG
 
 config IPMMU_VMSA
 	bool "Renesas VMSA-compatible IPMMU"
+	depends on ARM || IOMMU_DMA
 	depends on ARM_LPAE
 	depends on ARCH_RENESAS || COMPILE_TEST
 	select IOMMU_API
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index eb5008596051c..8b648f6e0e4d3 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -10,6 +10,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/delay.h>
+#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/export.h>
@@ -22,8 +23,10 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 #include <asm/dma-iommu.h>
 #include <asm/pgalloc.h>
+#endif
 
 #include "io-pgtable.h"
 
@@ -57,6 +60,8 @@ struct ipmmu_vmsa_archdata {
 	struct ipmmu_vmsa_device *mmu;
 	unsigned int *utlbs;
 	unsigned int num_utlbs;
+	struct device *dev;
+	struct list_head list;
 };
 
 static DEFINE_SPINLOCK(ipmmu_devices_lock);
@@ -522,14 +527,6 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
 	return &domain->io_domain;
 }
 
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
-{
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
-	return __ipmmu_domain_alloc(type);
-}
-
 static void ipmmu_domain_free(struct iommu_domain *io_domain)
 {
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
@@ -572,7 +569,8 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 		dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n",
 			dev_name(mmu->dev), dev_name(domain->mmu->dev));
 		ret = -EINVAL;
-	}
+	} else
+		dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -708,6 +706,7 @@ static int ipmmu_init_platform_device(struct device *dev)
 	archdata->mmu = mmu;
 	archdata->utlbs = utlbs;
 	archdata->num_utlbs = num_utlbs;
+	archdata->dev = dev;
 	dev->archdata.iommu = archdata;
 	return 0;
 
@@ -716,6 +715,16 @@ static int ipmmu_init_platform_device(struct device *dev)
 	return ret;
 }
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
+
+static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+{
+	if (type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
+	return __ipmmu_domain_alloc(type);
+}
+
 static int ipmmu_add_device(struct device *dev)
 {
 	struct ipmmu_vmsa_archdata *archdata;
@@ -825,6 +834,141 @@ static const struct iommu_ops ipmmu_ops = {
 	.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
 };
 
+#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */
+
+#ifdef CONFIG_IOMMU_DMA
+
+static DEFINE_SPINLOCK(ipmmu_slave_devices_lock);
+static LIST_HEAD(ipmmu_slave_devices);
+
+static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type)
+{
+	struct iommu_domain *io_domain = NULL;
+
+	switch (type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		io_domain = __ipmmu_domain_alloc(type);
+		break;
+
+	case IOMMU_DOMAIN_DMA:
+		io_domain = __ipmmu_domain_alloc(type);
+		if (io_domain)
+			iommu_get_dma_cookie(io_domain);
+		break;
+	}
+
+	return io_domain;
+}
+
+static void ipmmu_domain_free_dma(struct iommu_domain *io_domain)
+{
+	switch (io_domain->type) {
+	case IOMMU_DOMAIN_DMA:
+		iommu_put_dma_cookie(io_domain);
+		/* fall-through */
+	default:
+		ipmmu_domain_free(io_domain);
+		break;
+	}
+}
+
+static int ipmmu_add_device_dma(struct device *dev)
+{
+	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct iommu_group *group;
+
+	/* The device has been verified in xlate() */
+	if (!archdata)
+		return -ENODEV;
+
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	spin_lock(&ipmmu_slave_devices_lock);
+	list_add(&archdata->list, &ipmmu_slave_devices);
+	spin_unlock(&ipmmu_slave_devices_lock);
+	return 0;
+}
+
+static void ipmmu_remove_device_dma(struct device *dev)
+{
+	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+
+	spin_lock(&ipmmu_slave_devices_lock);
+	list_del(&archdata->list);
+	spin_unlock(&ipmmu_slave_devices_lock);
+
+	iommu_group_remove_device(dev);
+}
+
+static struct device *ipmmu_find_sibling_device(struct device *dev)
+{
+	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_archdata *sibling_archdata = NULL;
+	bool found = false;
+
+	spin_lock(&ipmmu_slave_devices_lock);
+
+	list_for_each_entry(sibling_archdata, &ipmmu_slave_devices, list) {
+		if (archdata == sibling_archdata)
+			continue;
+		if (sibling_archdata->mmu == archdata->mmu) {
+			found = true;
+			break;
+		}
+	}
+
+	spin_unlock(&ipmmu_slave_devices_lock);
+
+	return found ? sibling_archdata->dev : NULL;
+}
+
+static struct iommu_group *ipmmu_find_group_dma(struct device *dev)
+{
+	struct iommu_group *group;
+	struct device *sibling;
+
+	sibling = ipmmu_find_sibling_device(dev);
+	if (sibling)
+		group = iommu_group_get(sibling);
+	if (!sibling || IS_ERR(group))
+		group = generic_device_group(dev);
+
+	return group;
+}
+
+static int ipmmu_of_xlate_dma(struct device *dev,
+			      struct of_phandle_args *spec)
+{
+	/* If the IPMMU device is disabled in DT then return error
+	 * to make sure the of_iommu code does not install ops
+	 * even though the iommu device is disabled
+	 */
+	if (!of_device_is_available(spec->np))
+		return -ENODEV;
+
+	return ipmmu_init_platform_device(dev);
+}
+
+static const struct iommu_ops ipmmu_ops = {
+	.domain_alloc = ipmmu_domain_alloc_dma,
+	.domain_free = ipmmu_domain_free_dma,
+	.attach_dev = ipmmu_attach_device,
+	.detach_dev = ipmmu_detach_device,
+	.map = ipmmu_map,
+	.unmap = ipmmu_unmap,
+	.map_sg = default_iommu_map_sg,
+	.iova_to_phys = ipmmu_iova_to_phys,
+	.add_device = ipmmu_add_device_dma,
+	.remove_device = ipmmu_remove_device_dma,
+	.device_group = ipmmu_find_group_dma,
+	.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
+	.of_xlate = ipmmu_of_xlate_dma,
+};
+
+#endif /* CONFIG_IOMMU_DMA */
+
 /* -----------------------------------------------------------------------------
  * Probe/remove and init
  */
@@ -914,7 +1058,9 @@ static int ipmmu_remove(struct platform_device *pdev)
 	list_del(&mmu->list);
 	spin_unlock(&ipmmu_devices_lock);
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 	arm_iommu_release_mapping(mmu->mapping);
+#endif
 
 	ipmmu_device_reset(mmu);
 
-- 
GitLab


From 0fbc8b04c34fef1d730712135c504978c939a5a3 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 17 May 2017 19:07:20 +0900
Subject: [PATCH 0072/1958] iommu/ipmmu-vmsa: Use fwspec iommu_priv on ARM64

Convert from archdata to iommu_priv via iommu_fwspec on ARM64 but
let 32-bit ARM keep on using archdata for now.

Once the 32-bit ARM code and the IPMMU driver is able to move over
to CONFIG_IOMMU_DMA=y then coverting to fwspec via ->of_xlate() will
be easy.

For now fwspec ids and num_ids are not used to allow code sharing between
32-bit and 64-bit ARM code inside the driver.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 97 +++++++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 39 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 8b648f6e0e4d3..a04babbd7de97 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -56,7 +56,7 @@ struct ipmmu_vmsa_domain {
 	spinlock_t lock;			/* Protects mappings */
 };
 
-struct ipmmu_vmsa_archdata {
+struct ipmmu_vmsa_iommu_priv {
 	struct ipmmu_vmsa_device *mmu;
 	unsigned int *utlbs;
 	unsigned int num_utlbs;
@@ -72,6 +72,24 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
 	return container_of(dom, struct ipmmu_vmsa_domain, io_domain);
 }
 
+
+static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
+{
+#if defined(CONFIG_ARM)
+	return dev->archdata.iommu;
+#else
+	return dev->iommu_fwspec->iommu_priv;
+#endif
+}
+static void set_priv(struct device *dev, struct ipmmu_vmsa_iommu_priv *p)
+{
+#if defined(CONFIG_ARM)
+	dev->archdata.iommu = p;
+#else
+	dev->iommu_fwspec->iommu_priv = p;
+#endif
+}
+
 #define TLB_LOOP_TIMEOUT		100	/* 100us */
 
 /* -----------------------------------------------------------------------------
@@ -543,8 +561,8 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
 static int ipmmu_attach_device(struct iommu_domain *io_domain,
 			       struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
-	struct ipmmu_vmsa_device *mmu = archdata->mmu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+	struct ipmmu_vmsa_device *mmu = priv->mmu;
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 	unsigned long flags;
 	unsigned int i;
@@ -577,8 +595,8 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < archdata->num_utlbs; ++i)
-		ipmmu_utlb_enable(domain, archdata->utlbs[i]);
+	for (i = 0; i < priv->num_utlbs; ++i)
+		ipmmu_utlb_enable(domain, priv->utlbs[i]);
 
 	return 0;
 }
@@ -586,12 +604,12 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 static void ipmmu_detach_device(struct iommu_domain *io_domain,
 				struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 	unsigned int i;
 
-	for (i = 0; i < archdata->num_utlbs; ++i)
-		ipmmu_utlb_disable(domain, archdata->utlbs[i]);
+	for (i = 0; i < priv->num_utlbs; ++i)
+		ipmmu_utlb_disable(domain, priv->utlbs[i]);
 
 	/*
 	 * TODO: Optimize by disabling the context when no device is attached.
@@ -654,7 +672,7 @@ static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev,
 
 static int ipmmu_init_platform_device(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata;
+	struct ipmmu_vmsa_iommu_priv *priv;
 	struct ipmmu_vmsa_device *mmu;
 	unsigned int *utlbs;
 	unsigned int i;
@@ -697,17 +715,17 @@ static int ipmmu_init_platform_device(struct device *dev)
 		}
 	}
 
-	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
-	if (!archdata) {
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
 		ret = -ENOMEM;
 		goto error;
 	}
 
-	archdata->mmu = mmu;
-	archdata->utlbs = utlbs;
-	archdata->num_utlbs = num_utlbs;
-	archdata->dev = dev;
-	dev->archdata.iommu = archdata;
+	priv->mmu = mmu;
+	priv->utlbs = utlbs;
+	priv->num_utlbs = num_utlbs;
+	priv->dev = dev;
+	set_priv(dev, priv);
 	return 0;
 
 error:
@@ -727,12 +745,11 @@ static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
 
 static int ipmmu_add_device(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata;
 	struct ipmmu_vmsa_device *mmu = NULL;
 	struct iommu_group *group;
 	int ret;
 
-	if (dev->archdata.iommu) {
+	if (to_priv(dev)) {
 		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
 			 dev_name(dev));
 		return -EINVAL;
@@ -768,8 +785,7 @@ static int ipmmu_add_device(struct device *dev)
 	 * - Make the mapping size configurable ? We currently use a 2GB mapping
 	 *   at a 1GB offset to ensure that NULL VAs will fault.
 	 */
-	archdata = dev->archdata.iommu;
-	mmu = archdata->mmu;
+	mmu = to_priv(dev)->mmu;
 	if (!mmu->mapping) {
 		struct dma_iommu_mapping *mapping;
 
@@ -800,24 +816,24 @@ static int ipmmu_add_device(struct device *dev)
 	if (!IS_ERR_OR_NULL(group))
 		iommu_group_remove_device(dev);
 
-	kfree(archdata->utlbs);
-	kfree(archdata);
-	dev->archdata.iommu = NULL;
+	kfree(to_priv(dev)->utlbs);
+	kfree(to_priv(dev));
+	set_priv(dev, NULL);
 
 	return ret;
 }
 
 static void ipmmu_remove_device(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
 
 	arm_iommu_detach_device(dev);
 	iommu_group_remove_device(dev);
 
-	kfree(archdata->utlbs);
-	kfree(archdata);
+	kfree(priv->utlbs);
+	kfree(priv);
 
-	dev->archdata.iommu = NULL;
+	set_priv(dev, NULL);
 }
 
 static const struct iommu_ops ipmmu_ops = {
@@ -874,11 +890,14 @@ static void ipmmu_domain_free_dma(struct iommu_domain *io_domain)
 
 static int ipmmu_add_device_dma(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
 	struct iommu_group *group;
 
-	/* The device has been verified in xlate() */
-	if (!archdata)
+	/*
+	 * Only let through devices that have been verified in xlate()
+	 * We may get called with dev->iommu_fwspec set to NULL.
+	 */
+	if (!fwspec || !fwspec->iommu_priv)
 		return -ENODEV;
 
 	group = iommu_group_get_for_dev(dev);
@@ -886,17 +905,17 @@ static int ipmmu_add_device_dma(struct device *dev)
 		return PTR_ERR(group);
 
 	spin_lock(&ipmmu_slave_devices_lock);
-	list_add(&archdata->list, &ipmmu_slave_devices);
+	list_add(&to_priv(dev)->list, &ipmmu_slave_devices);
 	spin_unlock(&ipmmu_slave_devices_lock);
 	return 0;
 }
 
 static void ipmmu_remove_device_dma(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
 
 	spin_lock(&ipmmu_slave_devices_lock);
-	list_del(&archdata->list);
+	list_del(&priv->list);
 	spin_unlock(&ipmmu_slave_devices_lock);
 
 	iommu_group_remove_device(dev);
@@ -904,16 +923,16 @@ static void ipmmu_remove_device_dma(struct device *dev)
 
 static struct device *ipmmu_find_sibling_device(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
-	struct ipmmu_vmsa_archdata *sibling_archdata = NULL;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+	struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL;
 	bool found = false;
 
 	spin_lock(&ipmmu_slave_devices_lock);
 
-	list_for_each_entry(sibling_archdata, &ipmmu_slave_devices, list) {
-		if (archdata == sibling_archdata)
+	list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) {
+		if (priv == sibling_priv)
 			continue;
-		if (sibling_archdata->mmu == archdata->mmu) {
+		if (sibling_priv->mmu == priv->mmu) {
 			found = true;
 			break;
 		}
@@ -921,7 +940,7 @@ static struct device *ipmmu_find_sibling_device(struct device *dev)
 
 	spin_unlock(&ipmmu_slave_devices_lock);
 
-	return found ? sibling_archdata->dev : NULL;
+	return found ? sibling_priv->dev : NULL;
 }
 
 static struct iommu_group *ipmmu_find_group_dma(struct device *dev)
-- 
GitLab


From d74c67d46b8e03883326163587de783adf61ef6a Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 17 May 2017 19:07:30 +0900
Subject: [PATCH 0073/1958] iommu/ipmmu-vmsa: Drop LPAE Kconfig dependency

Neither the ARM page table code enabled by IOMMU_IO_PGTABLE_LPAE
nor the IPMMU_VMSA driver actually depends on ARM_LPAE, so get
rid of the dependency.

Tested with ipmmu-vmsa on r8a7794 ALT and a kernel config using:
 # CONFIG_ARM_LPAE is not set

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 504ba025a54c7..45cb1905b9bc9 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -275,7 +275,6 @@ config EXYNOS_IOMMU_DEBUG
 config IPMMU_VMSA
 	bool "Renesas VMSA-compatible IPMMU"
 	depends on ARM || IOMMU_DMA
-	depends on ARM_LPAE
 	depends on ARCH_RENESAS || COMPILE_TEST
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
-- 
GitLab


From 26b6aec6e726597149b4bcd4cc8583f402f3da14 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Wed, 17 May 2017 19:07:41 +0900
Subject: [PATCH 0074/1958] iommu/ipmmu-vmsa: Fix pgsize_bitmap semicolon typo

Fix comma-instead-of-semicolon typo error present
in the latest version of the IPMMU driver.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index a04babbd7de97..2a38aa15be17d 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -358,7 +358,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	 * non-secure mode.
 	 */
 	domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS;
-	domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
+	domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
 	domain->cfg.ias = 32;
 	domain->cfg.oas = 40;
 	domain->cfg.tlb = &ipmmu_gather_ops;
-- 
GitLab


From 757c370f036e1f9f9a816cd481a13cdbcb346eb9 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 16 May 2017 12:26:48 +0100
Subject: [PATCH 0075/1958] iommu/iova: Sort out rbtree limit_pfn handling

When walking the rbtree, the fact that iovad->start_pfn and limit_pfn
are both inclusive limits creates an ambiguity once limit_pfn reaches
the bottom of the address space and they overlap. Commit 5016bdb796b3
("iommu/iova: Fix underflow bug in __alloc_and_insert_iova_range") fixed
the worst side-effect of this, that of underflow wraparound leading to
bogus allocations, but the remaining fallout is that any attempt to
allocate start_pfn itself erroneously fails.

The cleanest way to resolve the ambiguity is to simply make limit_pfn an
exclusive limit when inside the guts of the rbtree. Since we're working
with PFNs, representing one past the top of the address space is always
possible without fear of overflow, and elsewhere it just makes life a
little more straightforward.

Reported-by: Aaron Sierra <asierra@xes-inc.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c |  2 +-
 drivers/iommu/iova.c      | 21 +++++++++------------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8348f366ddd1a..aaf6fa304240e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -314,7 +314,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		 * If we have devices with different DMA masks, move the free
 		 * area cache limit down for the benefit of the smaller one.
 		 */
-		iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn);
+		iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn);
 
 		return 0;
 	}
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 5c88ba70e4e0f..3f24c9a831c9b 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -48,7 +48,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->cached32_node = NULL;
 	iovad->granule = granule;
 	iovad->start_pfn = start_pfn;
-	iovad->dma_32bit_pfn = pfn_32bit;
+	iovad->dma_32bit_pfn = pfn_32bit + 1;
 	init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
@@ -63,7 +63,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
 		struct rb_node *prev_node = rb_prev(iovad->cached32_node);
 		struct iova *curr_iova =
 			rb_entry(iovad->cached32_node, struct iova, node);
-		*limit_pfn = curr_iova->pfn_lo - 1;
+		*limit_pfn = curr_iova->pfn_lo;
 		return prev_node;
 	}
 }
@@ -135,7 +135,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 static unsigned int
 iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
 {
-	return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1);
+	return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
 }
 
 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
@@ -155,18 +155,15 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	while (curr) {
 		struct iova *curr_iova = rb_entry(curr, struct iova, node);
 
-		if (limit_pfn < curr_iova->pfn_lo)
+		if (limit_pfn <= curr_iova->pfn_lo) {
 			goto move_left;
-		else if (limit_pfn < curr_iova->pfn_hi)
-			goto adjust_limit_pfn;
-		else {
+		} else if (limit_pfn > curr_iova->pfn_hi) {
 			if (size_aligned)
 				pad_size = iova_get_pad_size(size, limit_pfn);
-			if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
+			if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
 				break;	/* found a free slot */
 		}
-adjust_limit_pfn:
-		limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0;
+		limit_pfn = curr_iova->pfn_lo;
 move_left:
 		prev = curr;
 		curr = rb_prev(curr);
@@ -182,7 +179,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	}
 
 	/* pfn_lo will point to size aligned address if size_aligned is set */
-	new->pfn_lo = limit_pfn - (size + pad_size) + 1;
+	new->pfn_lo = limit_pfn - (size + pad_size);
 	new->pfn_hi = new->pfn_lo + size - 1;
 
 	/* If we have 'prev', it's a valid place to start the insertion. */
@@ -269,7 +266,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	if (!new_iova)
 		return NULL;
 
-	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
+	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 			new_iova, size_aligned);
 
 	if (ret) {
-- 
GitLab


From bf10ff69dd6e27710b21863ebd8e6504d9516222 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 7 May 2017 19:05:16 +0100
Subject: [PATCH 0076/1958] ipmi_ssif: remove redundant null check on array
 client->adapter->name

The null check on client->adapter->name is redundant as name is an
array of I2C_NAME_SIZE chars and hence can never be null. We may as
well remove this redundant check.

Detected by CoverityScan, CID#1375918 ("Array compared against 0")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 6dd6476ea5d31..1d4fd846e4574 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1419,8 +1419,7 @@ static int find_slave_address(struct i2c_client *client, int slave_addr)
 	list_for_each_entry(info, &ssif_infos, link) {
 		if (info->binfo.addr != client->addr)
 			continue;
-		if (info->adapter_name && client->adapter->name &&
-		    strcmp_nospace(info->adapter_name,
+		if (info->adapter_name && strcmp_nospace(info->adapter_name,
 				   client->adapter->name))
 			continue;
 		if (info->slave_addr) {
-- 
GitLab


From 490194269665d6d4915a4a5774f002885c5a2d8f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 21 Apr 2017 15:35:26 -0700
Subject: [PATCH 0077/1958] module: Pass struct load_info into symbol checks

Since we're already using values from struct load_info, just pass this
pointer in directly and use what's needed as we need it. This allows us
to access future fields in struct load_info too.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jessica Yu <jeyu@redhat.com>
---
 kernel/module.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 4a3665f8f8372..ca4509b13400c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1278,12 +1278,13 @@ static u32 resolve_rel_crc(const s32 *crc)
 	return *(u32 *)((void *)crc + *crc);
 }
 
-static int check_version(Elf_Shdr *sechdrs,
-			 unsigned int versindex,
+static int check_version(const struct load_info *info,
 			 const char *symname,
 			 struct module *mod,
 			 const s32 *crc)
 {
+	Elf_Shdr *sechdrs = info->sechdrs;
+	unsigned int versindex = info->index.vers;
 	unsigned int i, num_versions;
 	struct modversion_info *versions;
 
@@ -1326,8 +1327,7 @@ static int check_version(Elf_Shdr *sechdrs,
 	return 0;
 }
 
-static inline int check_modstruct_version(Elf_Shdr *sechdrs,
-					  unsigned int versindex,
+static inline int check_modstruct_version(const struct load_info *info,
 					  struct module *mod)
 {
 	const s32 *crc;
@@ -1343,8 +1343,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
 		BUG();
 	}
 	preempt_enable();
-	return check_version(sechdrs, versindex,
-			     VMLINUX_SYMBOL_STR(module_layout), mod, crc);
+	return check_version(info, VMLINUX_SYMBOL_STR(module_layout),
+			     mod, crc);
 }
 
 /* First part is kernel version, which we ignore if module has crcs. */
@@ -1358,8 +1358,7 @@ static inline int same_magic(const char *amagic, const char *bmagic,
 	return strcmp(amagic, bmagic) == 0;
 }
 #else
-static inline int check_version(Elf_Shdr *sechdrs,
-				unsigned int versindex,
+static inline int check_version(const struct load_info *info,
 				const char *symname,
 				struct module *mod,
 				const s32 *crc)
@@ -1367,8 +1366,7 @@ static inline int check_version(Elf_Shdr *sechdrs,
 	return 1;
 }
 
-static inline int check_modstruct_version(Elf_Shdr *sechdrs,
-					  unsigned int versindex,
+static inline int check_modstruct_version(const struct load_info *info,
 					  struct module *mod)
 {
 	return 1;
@@ -1404,7 +1402,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
 	if (!sym)
 		goto unlock;
 
-	if (!check_version(info->sechdrs, info->index.vers, name, mod, crc)) {
+	if (!check_version(info, name, mod, crc)) {
 		sym = ERR_PTR(-EINVAL);
 		goto getname;
 	}
@@ -2971,7 +2969,7 @@ static struct module *setup_load_info(struct load_info *info, int flags)
 	info->index.pcpu = find_pcpusec(info);
 
 	/* Check module struct version now, before we try to use module. */
-	if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
+	if (!check_modstruct_version(info, mod))
 		return ERR_PTR(-ENOEXEC);
 
 	return mod;
-- 
GitLab


From 3e2e857f9c3a19d55ee0ba7b428b8be5008960bf Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 21 Apr 2017 15:35:27 -0700
Subject: [PATCH 0078/1958] module: Add module name to modinfo

Accessing the mod structure (e.g. for mod->name) prior to having completed
check_modstruct_version() can result in writing garbage to the error logs
if the layout of the mod structure loaded from disk doesn't match the
running kernel's mod structure layout. This kind of mismatch will become
much more likely if a kernel is built with different randomization seed
for the struct layout randomization plugin.

Instead, add and use a new modinfo string for logging the module name.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jessica Yu <jeyu@redhat.com>
---
 kernel/module.c       | 29 ++++++++++++++++++++++-------
 scripts/mod/modpost.c |  1 +
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index ca4509b13400c..3803449ca2191 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -302,6 +302,7 @@ int unregister_module_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL(unregister_module_notifier);
 
 struct load_info {
+	char *name;
 	Elf_Ehdr *hdr;
 	unsigned long len;
 	Elf_Shdr *sechdrs;
@@ -1318,12 +1319,12 @@ static int check_version(const struct load_info *info,
 	}
 
 	/* Broken toolchain. Warn once, then let it go.. */
-	pr_warn_once("%s: no symbol version for %s\n", mod->name, symname);
+	pr_warn_once("%s: no symbol version for %s\n", info->name, symname);
 	return 1;
 
 bad_version:
 	pr_warn("%s: disagrees about version of symbol %s\n",
-	       mod->name, symname);
+	       info->name, symname);
 	return 0;
 }
 
@@ -2913,9 +2914,15 @@ static int rewrite_section_headers(struct load_info *info, int flags)
 		info->index.vers = 0; /* Pretend no __versions section! */
 	else
 		info->index.vers = find_sec(info, "__versions");
+	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+
 	info->index.info = find_sec(info, ".modinfo");
+	if (!info->index.info)
+		info->name = "(missing .modinfo section)";
+	else
+		info->name = get_modinfo(info, "name");
 	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
-	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+
 	return 0;
 }
 
@@ -2955,14 +2962,22 @@ static struct module *setup_load_info(struct load_info *info, int flags)
 
 	info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
 	if (!info->index.mod) {
-		pr_warn("No module found in object\n");
+		pr_warn("%s: No module found in object\n",
+			info->name ?: "(missing .modinfo name field)");
 		return ERR_PTR(-ENOEXEC);
 	}
 	/* This is temporary: point mod into copy of data. */
 	mod = (void *)info->sechdrs[info->index.mod].sh_addr;
 
+	/*
+	 * If we didn't load the .modinfo 'name' field, fall back to
+	 * on-disk struct mod 'name' field.
+	 */
+	if (!info->name)
+		info->name = mod->name;
+
 	if (info->index.sym == 0) {
-		pr_warn("%s: module has no symbols (stripped?)\n", mod->name);
+		pr_warn("%s: module has no symbols (stripped?)\n", info->name);
 		return ERR_PTR(-ENOEXEC);
 	}
 
@@ -2990,7 +3005,7 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
 			return err;
 	} else if (!same_magic(modmagic, vermagic, info->index.vers)) {
 		pr_err("%s: version magic '%s' should be '%s'\n",
-		       mod->name, modmagic, vermagic);
+		       info->name, modmagic, vermagic);
 		return -ENOEXEC;
 	}
 
@@ -3270,7 +3285,7 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
 	if (IS_ERR(mod))
 		return mod;
 
-	if (blacklisted(mod->name))
+	if (blacklisted(info->name))
 		return ERR_PTR(-EPERM);
 
 	err = check_modinfo(mod, info, flags);
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 30d752a4a6a60..48397feb08fba 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2126,6 +2126,7 @@ static void add_header(struct buffer *b, struct module *mod)
 	buf_printf(b, "#include <linux/compiler.h>\n");
 	buf_printf(b, "\n");
 	buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
+	buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n");
 	buf_printf(b, "\n");
 	buf_printf(b, "__visible struct module __this_module\n");
 	buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n");
-- 
GitLab


From 93607124c5450148e592c3d18ac533b4e5f25b8b Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 16 May 2017 13:20:16 -0700
Subject: [PATCH 0079/1958] f2fs: load inode's flag from disk

This patch fixes missing inode flag loaded from disk, reported by Tom.

[tom@localhost ~]$ sudo mount /dev/loop0 /mnt/
[tom@localhost ~]$ sudo chown tom:tom /mnt/
[tom@localhost ~]$ touch /mnt/testfile
[tom@localhost ~]$ sudo chattr +i /mnt/testfile
[tom@localhost ~]$ echo test > /mnt/testfile
bash: /mnt/testfile: Operation not permitted
[tom@localhost ~]$ rm /mnt/testfile
rm: cannot remove '/mnt/testfile': Operation not permitted
[tom@localhost ~]$ sudo umount /mnt/
[tom@localhost ~]$ sudo mount /dev/loop0 /mnt/
[tom@localhost ~]$ lsattr /mnt/testfile
----i-------------- /mnt/testfile
[tom@localhost ~]$ echo test > /mnt/testfile
[tom@localhost ~]$ rm /mnt/testfile
[tom@localhost ~]$ sudo umount /mnt/

Cc: stable@vger.kernel.org
Reported-by: Tom Yan <tom.ty89@outlook.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c  | 1 +
 fs/f2fs/inode.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index abb0403d34148..9b3e7635222c9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1493,6 +1493,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
 	inode->i_ctime = current_time(inode);
 	f2fs_set_inode_flags(inode);
+	f2fs_mark_inode_dirty_sync(inode, false);
 
 	inode_unlock(inode);
 out:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 518f496430925..e53c784ab11e3 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -44,7 +44,6 @@ void f2fs_set_inode_flags(struct inode *inode)
 		new_fl |= S_DIRSYNC;
 	inode_set_flags(inode, new_fl,
 			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-	f2fs_mark_inode_dirty_sync(inode, false);
 }
 
 static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -226,6 +225,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
 		ret = -EIO;
 		goto bad_inode;
 	}
+	f2fs_set_inode_flags(inode);
 	unlock_new_inode(inode);
 	trace_f2fs_iget(inode);
 	return inode;
-- 
GitLab


From 1c6d8ee4b8aaadc3645497658007ca007312351d Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 3 May 2017 23:59:12 +0800
Subject: [PATCH 0080/1958] f2fs: support statx

Last kernel has already support new syscall statx() in commit a528d35e8bfc
("statx: Add a system call to make enhanced file info available"), with
this interface we can show more file info including file creation and some
attribute flags to user.

This patch tries to support this functionality.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9b3e7635222c9..8ccbfe53c03c2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -633,9 +633,30 @@ int f2fs_truncate(struct inode *inode)
 }
 
 int f2fs_getattr(const struct path *path, struct kstat *stat,
-		 u32 request_mask, unsigned int flags)
+		 u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	unsigned int flags;
+
+	flags = fi->i_flags & FS_FL_USER_VISIBLE;
+	if (flags & FS_APPEND_FL)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (flags & FS_COMPR_FL)
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+	if (f2fs_encrypted_inode(inode))
+		stat->attributes |= STATX_ATTR_ENCRYPTED;
+	if (flags & FS_IMMUTABLE_FL)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+	if (flags & FS_NODUMP_FL)
+		stat->attributes |= STATX_ATTR_NODUMP;
+
+	stat->attributes_mask |= (STATX_ATTR_APPEND |
+				  STATX_ATTR_COMPRESSED |
+				  STATX_ATTR_ENCRYPTED |
+				  STATX_ATTR_IMMUTABLE |
+				  STATX_ATTR_NODUMP);
+
 	generic_fillattr(inode, stat);
 	stat->blocks <<= 3;
 	return 0;
-- 
GitLab


From e5dbd9563e5528f98728ba0bc8361f804ace5aae Mon Sep 17 00:00:00 2001
From: Weichao Guo <guoweichao@huawei.com>
Date: Thu, 11 May 2017 04:28:00 +0800
Subject: [PATCH 0081/1958] f2fs: make sure f2fs_gc returns consistent errno

By default, f2fs_gc returns -EINVAL in general error cases, e.g., no victim
was selected. However, the default errno may be overwritten in two cases:
gc_more and BG_GC -> FG_GC. We should return consistent errno in such cases.

Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 026522107ca3f..965fbf5d0a2eb 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -955,7 +955,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 {
 	int gc_type = sync ? FG_GC : BG_GC;
 	int sec_freed = 0;
-	int ret = -EINVAL;
+	int ret;
 	struct cp_control cpc;
 	unsigned int init_segno = segno;
 	struct gc_inode_list gc_list = {
@@ -965,8 +965,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 
 	cpc.reason = __get_cp_reason(sbi);
 gc_more:
-	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
+	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
+		ret = -EINVAL;
 		goto stop;
+	}
 	if (unlikely(f2fs_cp_error(sbi))) {
 		ret = -EIO;
 		goto stop;
@@ -987,6 +989,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 			gc_type = FG_GC;
 	}
 
+	ret = -EINVAL;
 	/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
 	if (gc_type == BG_GC && !background)
 		goto stop;
-- 
GitLab


From 1919ffc0d7a300d4d8002e92ab3c6dea1974defc Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 10 May 2017 11:23:36 -0700
Subject: [PATCH 0082/1958] f2fs: use f2fs_submit_page_bio for ra_meta_pages

This patch avoids to use f2fs_submit_merged_bio for read, which was the only
read case.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ea9c317b5916e..8d92f8249000c 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -207,12 +207,10 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
 		}
 
 		fio.page = page;
-		fio.old_blkaddr = fio.new_blkaddr;
-		f2fs_submit_page_mbio(&fio);
+		f2fs_submit_page_bio(&fio);
 		f2fs_put_page(page, 0);
 	}
 out:
-	f2fs_submit_merged_bio(sbi, META, READ);
 	blk_finish_plug(&plug);
 	return blkno - start;
 }
-- 
GitLab


From b9109b0e49b93b0ae663330acb36561b8f4f6905 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 10 May 2017 11:28:38 -0700
Subject: [PATCH 0083/1958] f2fs: remove unnecessary read cases in merged IO
 flow

Merged IO flow doesn't need to care about read IOs.

f2fs_submit_merged_bio -> f2fs_submit_merged_write
f2fs_submit_merged_bios -> f2fs_submit_merged_writes
f2fs_submit_merged_bio_cond -> f2fs_submit_merged_write_cond

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c        | 14 +++++-----
 fs/f2fs/data.c              | 55 ++++++++++++++++---------------------
 fs/f2fs/f2fs.h              | 12 ++++----
 fs/f2fs/gc.c                |  6 ++--
 fs/f2fs/node.c              | 11 ++++----
 fs/f2fs/segment.c           | 11 ++++----
 fs/f2fs/super.c             |  5 +---
 include/trace/events/f2fs.h |  2 +-
 8 files changed, 51 insertions(+), 65 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 8d92f8249000c..13828f63a8710 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -31,7 +31,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
 	set_ckpt_flags(sbi, CP_ERROR_FLAG);
 	sbi->sb->s_flags |= MS_RDONLY;
 	if (!end_io)
-		f2fs_flush_merged_bios(sbi);
+		f2fs_flush_merged_writes(sbi);
 }
 
 /*
@@ -247,13 +247,13 @@ static int f2fs_write_meta_page(struct page *page,
 	dec_page_count(sbi, F2FS_DIRTY_META);
 
 	if (wbc->for_reclaim)
-		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
-						0, page->index, META, WRITE);
+		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
+						0, page->index, META);
 
 	unlock_page(page);
 
 	if (unlikely(f2fs_cp_error(sbi)))
-		f2fs_submit_merged_bio(sbi, META, WRITE);
+		f2fs_submit_merged_write(sbi, META);
 
 	return 0;
 
@@ -356,7 +356,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
 	}
 stop:
 	if (nwritten)
-		f2fs_submit_merged_bio(sbi, type, WRITE);
+		f2fs_submit_merged_write(sbi, type);
 
 	blk_finish_plug(&plug);
 
@@ -904,7 +904,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
 		 * We should submit bio, since it exists several
 		 * wribacking dentry pages in the freeing inode.
 		 */
-		f2fs_submit_merged_bio(sbi, DATA, WRITE);
+		f2fs_submit_merged_write(sbi, DATA);
 		cond_resched();
 	}
 	goto retry;
@@ -1293,7 +1293,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
 
-	f2fs_flush_merged_bios(sbi);
+	f2fs_flush_merged_writes(sbi);
 
 	/* this is the case of multiple fstrims without any changes */
 	if (cpc->reason & CP_DISCARD) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bdf817d4..06bb2042385ea 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -291,14 +291,12 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
 	return ret;
 }
 
-static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
+static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type, int rw)
+				enum page_type type)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
-	struct f2fs_bio_info *io;
-
-	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
+	struct f2fs_bio_info *io = &sbi->write_io[btype];
 
 	down_write(&io->io_rwsem);
 
@@ -318,25 +316,24 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 	up_write(&io->io_rwsem);
 }
 
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
-									int rw)
+void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 {
-	__f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
+	__f2fs_submit_merged_write(sbi, NULL, 0, 0, type);
 }
 
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type, int rw)
+				enum page_type type)
 {
 	if (has_merged_page(sbi, inode, ino, idx, type))
-		__f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
+		__f2fs_submit_merged_write(sbi, inode, ino, idx, type);
 }
 
-void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
+void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
 {
-	f2fs_submit_merged_bio(sbi, DATA, WRITE);
-	f2fs_submit_merged_bio(sbi, NODE, WRITE);
-	f2fs_submit_merged_bio(sbi, META, WRITE);
+	f2fs_submit_merged_write(sbi, DATA);
+	f2fs_submit_merged_write(sbi, NODE);
+	f2fs_submit_merged_write(sbi, META);
 }
 
 /*
@@ -368,16 +365,15 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	return 0;
 }
 
-int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_write(struct f2fs_io_info *fio)
 {
 	struct f2fs_sb_info *sbi = fio->sbi;
 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
-	struct f2fs_bio_info *io;
-	bool is_read = is_read_io(fio->op);
+	struct f2fs_bio_info *io = &sbi->write_io[btype];
 	struct page *bio_page;
 	int err = 0;
 
-	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+	f2fs_bug_on(sbi, is_read_io(fio->op));
 
 	if (fio->old_blkaddr != NEW_ADDR)
 		verify_block_addr(sbi, fio->old_blkaddr);
@@ -388,8 +384,7 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 	/* set submitted = 1 as a return value */
 	fio->submitted = 1;
 
-	if (!is_read)
-		inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 
 	down_write(&io->io_rwsem);
 
@@ -402,12 +397,11 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 		if ((fio->type == DATA || fio->type == NODE) &&
 				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
 			err = -EAGAIN;
-			if (!is_read)
-				dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
 			goto out_fail;
 		}
 		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
-						BIO_MAX_PAGES, is_read);
+						BIO_MAX_PAGES, false);
 		io->fio = *fio;
 	}
 
@@ -421,7 +415,7 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 	f2fs_trace_ios(fio, 0);
 out_fail:
 	up_write(&io->io_rwsem);
-	trace_f2fs_submit_page_mbio(fio->page, fio);
+	trace_f2fs_submit_page_write(fio->page, fio);
 	return err;
 }
 
@@ -1321,7 +1315,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
 
 	/* flush pending IOs and wait for a while in the ENOMEM case */
 	if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
-		f2fs_flush_merged_bios(fio->sbi);
+		f2fs_flush_merged_writes(fio->sbi);
 		congestion_wait(BLK_RW_ASYNC, HZ/50);
 		gfp_flags |= __GFP_NOFAIL;
 		goto retry_encrypt;
@@ -1513,8 +1507,7 @@ static int __write_data_page(struct page *page, bool *submitted,
 		ClearPageUptodate(page);
 
 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
-						DATA, WRITE);
+		f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
 		clear_inode_flag(inode, FI_HOT_DATA);
 		remove_dirty_inode(inode);
 		submitted = NULL;
@@ -1525,7 +1518,7 @@ static int __write_data_page(struct page *page, bool *submitted,
 		f2fs_balance_fs(sbi, need_balance_fs);
 
 	if (unlikely(f2fs_cp_error(sbi))) {
-		f2fs_submit_merged_bio(sbi, DATA, WRITE);
+		f2fs_submit_merged_write(sbi, DATA);
 		submitted = NULL;
 	}
 
@@ -1684,8 +1677,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 		mapping->writeback_index = done_index;
 
 	if (last_idx != ULONG_MAX)
-		f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
-						0, last_idx, DATA, WRITE);
+		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
+						0, last_idx, DATA);
 
 	return ret;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e26999a745220..58697bd588fa2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -879,7 +879,6 @@ struct f2fs_sb_info {
 	struct f2fs_sm_info *sm_info;		/* segment manager */
 
 	/* for bio operations */
-	struct f2fs_bio_info read_io;			/* for read bios */
 	struct f2fs_bio_info write_io[NR_PAGE_TYPE];	/* for write bios */
 	struct mutex wio_mutex[NODE + 1];	/* bio ordering for NODE/DATA */
 	int write_io_size_bits;			/* Write IO size bits */
@@ -2325,14 +2324,13 @@ void destroy_checkpoint_caches(void);
 /*
  * data.c
  */
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
-			int rw);
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
+void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type, int rw);
-void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
+				enum page_type type);
+void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
 int f2fs_submit_page_bio(struct f2fs_io_info *fio);
-int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
+int f2fs_submit_page_write(struct f2fs_io_info *fio);
 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 			block_t blk_addr, struct bio *bio);
 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 965fbf5d0a2eb..67b87155bc482 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -670,7 +670,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 	fio.op = REQ_OP_WRITE;
 	fio.op_flags = REQ_SYNC;
 	fio.new_blkaddr = newaddr;
-	f2fs_submit_page_mbio(&fio);
+	f2fs_submit_page_write(&fio);
 
 	f2fs_update_data_blkaddr(&dn, newaddr);
 	set_inode_flag(inode, FI_APPEND_WRITE);
@@ -936,8 +936,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
 	}
 
 	if (gc_type == FG_GC)
-		f2fs_submit_merged_bio(sbi,
-				(type == SUM_TYPE_NODE) ? NODE : DATA, WRITE);
+		f2fs_submit_merged_write(sbi,
+				(type == SUM_TYPE_NODE) ? NODE : DATA);
 
 	blk_finish_plug(&plug);
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 98351a4a4da3f..41bb632ac2e0d 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1373,15 +1373,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
 	up_read(&sbi->node_write);
 
 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
-						page->index, NODE, WRITE);
+		f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
+						page->index, NODE);
 		submitted = NULL;
 	}
 
 	unlock_page(page);
 
 	if (unlikely(f2fs_cp_error(sbi))) {
-		f2fs_submit_merged_bio(sbi, NODE, WRITE);
+		f2fs_submit_merged_write(sbi, NODE);
 		submitted = NULL;
 	}
 	if (submitted)
@@ -1518,8 +1518,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 	}
 out:
 	if (last_idx != ULONG_MAX)
-		f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
-							NODE, WRITE);
+		f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
 	return ret ? -EIO: 0;
 }
 
@@ -1625,7 +1624,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
 	}
 out:
 	if (nwritten)
-		f2fs_submit_merged_bio(sbi, NODE, WRITE);
+		f2fs_submit_merged_write(sbi, NODE);
 	return ret;
 }
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index de31030b5041c..38bb675976e22 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -328,8 +328,7 @@ static int __commit_inmem_pages(struct inode *inode,
 	}
 
 	if (last_idx != ULONG_MAX)
-		f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
-							DATA, WRITE);
+		f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
 
 	if (!err)
 		__revoke_inmem_pages(inode, revoke_list, false, false);
@@ -2150,7 +2149,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 					&fio->new_blkaddr, sum, type);
 
 	/* writeout dirty page into bdev */
-	err = f2fs_submit_page_mbio(fio);
+	err = f2fs_submit_page_write(fio);
 	if (err == -EAGAIN) {
 		fio->old_blkaddr = fio->new_blkaddr;
 		goto reallocate;
@@ -2177,7 +2176,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
 		fio.op_flags &= ~REQ_META;
 
 	set_page_writeback(page);
-	f2fs_submit_page_mbio(&fio);
+	f2fs_submit_page_write(&fio);
 }
 
 void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
@@ -2296,8 +2295,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
 	if (PageWriteback(page)) {
 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
 
-		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
-						0, page->index, type, WRITE);
+		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
+						0, page->index, type);
 		if (ordered)
 			wait_on_page_writeback(page);
 		else
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 397b1e816b369..90599397425ac 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -817,7 +817,7 @@ static void f2fs_put_super(struct super_block *sb)
 	mutex_unlock(&sbi->umount_mutex);
 
 	/* our cp_error case, we can wait for any writeback page */
-	f2fs_flush_merged_bios(sbi);
+	f2fs_flush_merged_writes(sbi);
 
 	iput(sbi->node_inode);
 	iput(sbi->meta_inode);
@@ -1966,9 +1966,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	set_sbi_flag(sbi, SBI_POR_DOING);
 	spin_lock_init(&sbi->stat_lock);
 
-	init_rwsem(&sbi->read_io.io_rwsem);
-	sbi->read_io.sbi = sbi;
-	sbi->read_io.bio = NULL;
 	for (i = 0; i < NR_PAGE_TYPE; i++) {
 		init_rwsem(&sbi->write_io[i].io_rwsem);
 		sbi->write_io[i].sbi = sbi;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 15da88c5c3a4d..5805d92893a8d 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -790,7 +790,7 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio,
 	TP_CONDITION(page->mapping)
 );
 
-DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
+DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write,
 
 	TP_PROTO(struct page *page, struct f2fs_io_info *fio),
 
-- 
GitLab


From 81377bd62837c8113b1c49c5dfa6b1af8f9ee5c2 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 10 May 2017 14:19:54 -0700
Subject: [PATCH 0084/1958] f2fs: use fio instead of multiple parameters

This patch just changes using fio instead of parameters.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 38bb675976e22..c9f3a2faee21f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2039,61 +2039,62 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
 	return false;
 }
 
-static int __get_segment_type_2(struct page *page, enum page_type p_type)
+static int __get_segment_type_2(struct f2fs_io_info *fio)
 {
-	if (p_type == DATA)
+	if (fio->type == DATA)
 		return CURSEG_HOT_DATA;
 	else
 		return CURSEG_HOT_NODE;
 }
 
-static int __get_segment_type_4(struct page *page, enum page_type p_type)
+static int __get_segment_type_4(struct f2fs_io_info *fio)
 {
-	if (p_type == DATA) {
-		struct inode *inode = page->mapping->host;
+	if (fio->type == DATA) {
+		struct inode *inode = fio->page->mapping->host;
 
 		if (S_ISDIR(inode->i_mode))
 			return CURSEG_HOT_DATA;
 		else
 			return CURSEG_COLD_DATA;
 	} else {
-		if (IS_DNODE(page) && is_cold_node(page))
+		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
 			return CURSEG_WARM_NODE;
 		else
 			return CURSEG_COLD_NODE;
 	}
 }
 
-static int __get_segment_type_6(struct page *page, enum page_type p_type)
+static int __get_segment_type_6(struct f2fs_io_info *fio)
 {
-	if (p_type == DATA) {
-		struct inode *inode = page->mapping->host;
+	if (fio->type == DATA) {
+		struct inode *inode = fio->page->mapping->host;
 
-		if (is_cold_data(page) || file_is_cold(inode))
+		if (is_cold_data(fio->page) || file_is_cold(inode))
 			return CURSEG_COLD_DATA;
 		if (is_inode_flag_set(inode, FI_HOT_DATA))
 			return CURSEG_HOT_DATA;
 		return CURSEG_WARM_DATA;
 	} else {
-		if (IS_DNODE(page))
-			return is_cold_node(page) ? CURSEG_WARM_NODE :
+		if (IS_DNODE(fio->page))
+			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
 						CURSEG_HOT_NODE;
 		return CURSEG_COLD_NODE;
 	}
 }
 
-static int __get_segment_type(struct page *page, enum page_type p_type)
+static int __get_segment_type(struct f2fs_io_info *fio)
 {
-	switch (F2FS_P_SB(page)->active_logs) {
+	switch (fio->sbi->active_logs) {
 	case 2:
-		return __get_segment_type_2(page, p_type);
+		return __get_segment_type_2(fio);
 	case 4:
-		return __get_segment_type_4(page, p_type);
+		return __get_segment_type_4(fio);
 	}
+
 	/* NR_CURSEG_TYPE(6) logs by default */
-	f2fs_bug_on(F2FS_P_SB(page),
-		F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
-	return __get_segment_type_6(page, p_type);
+	f2fs_bug_on(fio->sbi, fio->sbi->active_logs != NR_CURSEG_TYPE);
+
+	return __get_segment_type_6(fio);
 }
 
 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
@@ -2139,7 +2140,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 {
-	int type = __get_segment_type(fio->page, fio->type);
+	int type = __get_segment_type(fio);
 	int err;
 
 	if (fio->type == NODE || fio->type == DATA)
-- 
GitLab


From a912b54d3aaa011266dc266e3694f782f27233cf Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 10 May 2017 11:18:25 -0700
Subject: [PATCH 0085/1958] f2fs: split bio cache

Split DATA/NODE type bio cache according to different temperature,
so write IOs with the same temperature can be merged in corresponding
bio cache as much as possible, otherwise, different temperature write
IOs submitting into one bio cache will always cause split of bio.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c              | 57 +++++++++++++++++++++++++------------
 fs/f2fs/f2fs.h              | 10 ++++++-
 fs/f2fs/gc.c                |  2 ++
 fs/f2fs/segment.c           | 24 ++++++++++++----
 fs/f2fs/segment.h           |  4 +++
 fs/f2fs/super.c             | 21 ++++++++++++--
 include/trace/events/f2fs.h | 14 ++++++++-
 7 files changed, 103 insertions(+), 29 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 06bb2042385ea..4ca0899621a0c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -282,27 +282,32 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
 				nid_t ino, pgoff_t idx, enum page_type type)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
-	struct f2fs_bio_info *io = &sbi->write_io[btype];
-	bool ret;
+	enum temp_type temp;
+	struct f2fs_bio_info *io;
+	bool ret = false;
 
-	down_read(&io->io_rwsem);
-	ret = __has_merged_page(io, inode, ino, idx);
-	up_read(&io->io_rwsem);
+	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+		io = sbi->write_io[btype] + temp;
+
+		down_read(&io->io_rwsem);
+		ret = __has_merged_page(io, inode, ino, idx);
+		up_read(&io->io_rwsem);
+
+		/* TODO: use HOT temp only for meta pages now. */
+		if (ret || btype == META)
+			break;
+	}
 	return ret;
 }
 
 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
-				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type)
+				enum page_type type, enum temp_type temp)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
-	struct f2fs_bio_info *io = &sbi->write_io[btype];
+	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 
 	down_write(&io->io_rwsem);
 
-	if (!__has_merged_page(io, inode, ino, idx))
-		goto out;
-
 	/* change META to META_FLUSH in the checkpoint procedure */
 	if (type >= META_FLUSH) {
 		io->fio.type = META_FLUSH;
@@ -312,21 +317,38 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
 	}
 	__submit_merged_bio(io);
-out:
 	up_write(&io->io_rwsem);
 }
 
+static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
+				struct inode *inode, nid_t ino, pgoff_t idx,
+				enum page_type type, bool force)
+{
+	enum temp_type temp;
+
+	if (!force && !has_merged_page(sbi, inode, ino, idx, type))
+		return;
+
+	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+
+		__f2fs_submit_merged_write(sbi, type, temp);
+
+		/* TODO: use HOT temp only for meta pages now. */
+		if (type >= META)
+			break;
+	}
+}
+
 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 {
-	__f2fs_submit_merged_write(sbi, NULL, 0, 0, type);
+	__submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
 }
 
 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 				struct inode *inode, nid_t ino, pgoff_t idx,
 				enum page_type type)
 {
-	if (has_merged_page(sbi, inode, ino, idx, type))
-		__f2fs_submit_merged_write(sbi, inode, ino, idx, type);
+	__submit_merged_write_cond(sbi, inode, ino, idx, type, false);
 }
 
 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@ -369,7 +391,7 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio)
 {
 	struct f2fs_sb_info *sbi = fio->sbi;
 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
-	struct f2fs_bio_info *io = &sbi->write_io[btype];
+	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
 	struct page *bio_page;
 	int err = 0;
 
@@ -405,8 +427,7 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio)
 		io->fio = *fio;
 	}
 
-	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
-							PAGE_SIZE) {
+	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
 		__submit_merged_bio(io);
 		goto alloc_new;
 	}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 58697bd588fa2..c6643783adfff 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -792,9 +792,17 @@ enum page_type {
 	OPU,
 };
 
+enum temp_type {
+	HOT = 0,	/* must be zero for meta bio */
+	WARM,
+	COLD,
+	NR_TEMP_TYPE,
+};
+
 struct f2fs_io_info {
 	struct f2fs_sb_info *sbi;	/* f2fs_sb_info pointer */
 	enum page_type type;	/* contains DATA/NODE/META/META_FLUSH */
+	enum temp_type temp;	/* contains HOT/WARM/COLD */
 	int op;			/* contains REQ_OP_ */
 	int op_flags;		/* req_flag_bits */
 	block_t new_blkaddr;	/* new block address to be written */
@@ -879,7 +887,7 @@ struct f2fs_sb_info {
 	struct f2fs_sm_info *sm_info;		/* segment manager */
 
 	/* for bio operations */
-	struct f2fs_bio_info write_io[NR_PAGE_TYPE];	/* for write bios */
+	struct f2fs_bio_info *write_io[NR_PAGE_TYPE];	/* for write bios */
 	struct mutex wio_mutex[NODE + 1];	/* bio ordering for NODE/DATA */
 	int write_io_size_bits;			/* Write IO size bits */
 	mempool_t *write_io_dummy;		/* Dummy pages */
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 67b87155bc482..e2b13558a915c 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -586,6 +586,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 	struct f2fs_io_info fio = {
 		.sbi = F2FS_I_SB(inode),
 		.type = DATA,
+		.temp = COLD,
 		.op = REQ_OP_READ,
 		.op_flags = 0,
 		.encrypted_page = NULL,
@@ -712,6 +713,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
 		struct f2fs_io_info fio = {
 			.sbi = F2FS_I_SB(inode),
 			.type = DATA,
+			.temp = COLD,
 			.op = REQ_OP_WRITE,
 			.op_flags = REQ_SYNC,
 			.old_blkaddr = NULL_ADDR,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c9f3a2faee21f..fcada9d03817b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2084,17 +2084,29 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
 
 static int __get_segment_type(struct f2fs_io_info *fio)
 {
+	int type = 0;
+
 	switch (fio->sbi->active_logs) {
 	case 2:
-		return __get_segment_type_2(fio);
+		type = __get_segment_type_2(fio);
+		break;
 	case 4:
-		return __get_segment_type_4(fio);
+		type = __get_segment_type_4(fio);
+		break;
+	case 6:
+		type = __get_segment_type_6(fio);
+		break;
+	default:
+		f2fs_bug_on(fio->sbi, true);
 	}
 
-	/* NR_CURSEG_TYPE(6) logs by default */
-	f2fs_bug_on(fio->sbi, fio->sbi->active_logs != NR_CURSEG_TYPE);
-
-	return __get_segment_type_6(fio);
+	if (IS_HOT(type))
+		fio->temp = HOT;
+	else if (IS_WARM(type))
+		fio->temp = WARM;
+	else
+		fio->temp = COLD;
+	return type;
 }
 
 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 10bf05d4cff49..e9ba1f1d97232 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -27,6 +27,10 @@
 #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
 #define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
 
+#define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
+#define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
+#define IS_COLD(t)	((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA)
+
 #define IS_CURSEG(sbi, seg)						\
 	(((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) ||	\
 	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) ||	\
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 90599397425ac..aa451ec9fb809 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -768,6 +768,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 static void f2fs_put_super(struct super_block *sb)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	int i;
 
 	if (sbi->s_proc) {
 		remove_proc_entry("segment_info", sbi->s_proc);
@@ -838,6 +839,8 @@ static void f2fs_put_super(struct super_block *sb)
 	destroy_device_list(sbi);
 	mempool_destroy(sbi->write_io_dummy);
 	destroy_percpu_info(sbi);
+	for (i = 0; i < NR_PAGE_TYPE; i++)
+		kfree(sbi->write_io[i]);
 	kfree(sbi);
 }
 
@@ -1967,9 +1970,19 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	spin_lock_init(&sbi->stat_lock);
 
 	for (i = 0; i < NR_PAGE_TYPE; i++) {
-		init_rwsem(&sbi->write_io[i].io_rwsem);
-		sbi->write_io[i].sbi = sbi;
-		sbi->write_io[i].bio = NULL;
+		int n = (i == META) ? 1: NR_TEMP_TYPE;
+		int j;
+
+		sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
+								GFP_KERNEL);
+		if (!sbi->write_io[i])
+			goto free_options;
+
+		for (j = HOT; j < n; j++) {
+			init_rwsem(&sbi->write_io[i][j].io_rwsem);
+			sbi->write_io[i][j].sbi = sbi;
+			sbi->write_io[i][j].bio = NULL;
+		}
 	}
 
 	init_rwsem(&sbi->cp_rwsem);
@@ -2215,6 +2228,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 free_io_dummy:
 	mempool_destroy(sbi->write_io_dummy);
 free_options:
+	for (i = 0; i < NR_PAGE_TYPE; i++)
+		kfree(sbi->write_io[i]);
 	destroy_percpu_info(sbi);
 	kfree(options);
 free_sb_buf:
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 5805d92893a8d..6f77a2755abbf 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -19,6 +19,9 @@ TRACE_DEFINE_ENUM(INMEM_INVALIDATE);
 TRACE_DEFINE_ENUM(INMEM_REVOKE);
 TRACE_DEFINE_ENUM(IPU);
 TRACE_DEFINE_ENUM(OPU);
+TRACE_DEFINE_ENUM(HOT);
+TRACE_DEFINE_ENUM(WARM);
+TRACE_DEFINE_ENUM(COLD);
 TRACE_DEFINE_ENUM(CURSEG_HOT_DATA);
 TRACE_DEFINE_ENUM(CURSEG_WARM_DATA);
 TRACE_DEFINE_ENUM(CURSEG_COLD_DATA);
@@ -59,6 +62,12 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
 		{ IPU,		"IN-PLACE" },				\
 		{ OPU,		"OUT-OF-PLACE" })
 
+#define show_block_temp(temp)						\
+	__print_symbolic(temp,						\
+		{ HOT,		"HOT" },				\
+		{ WARM,		"WARM" },				\
+		{ COLD,		"COLD" })
+
 #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO |	\
 			REQ_PREFLUSH | REQ_FUA)
 #define F2FS_BIO_FLAG_MASK(t)	(t & F2FS_OP_FLAGS)
@@ -757,6 +766,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
 		__field(block_t, new_blkaddr)
 		__field(int, op)
 		__field(int, op_flags)
+		__field(int, temp)
 		__field(int, type)
 	),
 
@@ -768,16 +778,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
 		__entry->new_blkaddr	= fio->new_blkaddr;
 		__entry->op		= fio->op;
 		__entry->op_flags	= fio->op_flags;
+		__entry->temp		= fio->temp;
 		__entry->type		= fio->type;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
-		"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s",
+		"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s",
 		show_dev_ino(__entry),
 		(unsigned long)__entry->index,
 		(unsigned long long)__entry->old_blkaddr,
 		(unsigned long long)__entry->new_blkaddr,
 		show_bio_type(__entry->op, __entry->op_flags),
+		show_block_temp(__entry->temp),
 		show_block_type(__entry->type))
 );
 
-- 
GitLab


From cc15620bc826b14006956fd321e026ae96aff53a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 12 May 2017 13:51:34 -0700
Subject: [PATCH 0086/1958] f2fs: avoid f2fs_lock_op for IPU writes

Currently, if we do get_node_of_data before f2fs_lock_op, there may be dead lock
as follows, where process A would be in infinite loop, and B will NOT be awaked.

Process A(cp):            Process B:
f2fs_lock_all(sbi)
                        get_dnode_of_data <---- lock dn.node_page
flush_nodes             f2fs_lock_op

So, this patch adds f2fs_trylock_op to avoid f2fs_lock_op done by IPU.

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c    | 44 +++++++++++++++++++++++++++++++-------------
 fs/f2fs/f2fs.h    | 13 ++++++++++++-
 fs/f2fs/gc.c      |  2 +-
 fs/f2fs/segment.c |  2 +-
 4 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 4ca0899621a0c..779a306858a22 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1383,12 +1383,12 @@ int do_write_data_page(struct f2fs_io_info *fio)
 
 		if (valid_ipu_blkaddr(fio)) {
 			ipu_force = true;
-			fio->need_lock = false;
+			fio->need_lock = LOCK_DONE;
 			goto got_it;
 		}
 	}
 
-	if (fio->need_lock)
+	if (fio->need_lock == LOCK_REQ)
 		f2fs_lock_op(fio->sbi);
 
 	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
@@ -1403,19 +1403,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
 		goto out_writepage;
 	}
 got_it:
-	err = encrypt_one_page(fio);
-	if (err)
-		goto out_writepage;
-
-	set_page_writeback(page);
-
 	/*
 	 * If current allocation needs SSR,
 	 * it had better in-place writes for updated data.
 	 */
 	if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
+		err = encrypt_one_page(fio);
+		if (err)
+			goto out_writepage;
+
+		set_page_writeback(page);
 		f2fs_put_dnode(&dn);
-		if (fio->need_lock)
+		if (fio->need_lock == LOCK_REQ)
 			f2fs_unlock_op(fio->sbi);
 		err = rewrite_data_page(fio);
 		trace_f2fs_do_write_data_page(fio->page, IPU);
@@ -1423,6 +1422,20 @@ int do_write_data_page(struct f2fs_io_info *fio)
 		return err;
 	}
 
+	if (fio->need_lock == LOCK_RETRY) {
+		if (!f2fs_trylock_op(fio->sbi)) {
+			err = -EAGAIN;
+			goto out_writepage;
+		}
+		fio->need_lock = LOCK_REQ;
+	}
+
+	err = encrypt_one_page(fio);
+	if (err)
+		goto out_writepage;
+
+	set_page_writeback(page);
+
 	/* LFS mode write path */
 	write_data_page(&dn, fio);
 	trace_f2fs_do_write_data_page(page, OPU);
@@ -1432,7 +1445,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
 out_writepage:
 	f2fs_put_dnode(&dn);
 out:
-	if (fio->need_lock)
+	if (fio->need_lock == LOCK_REQ)
 		f2fs_unlock_op(fio->sbi);
 	return err;
 }
@@ -1458,7 +1471,7 @@ static int __write_data_page(struct page *page, bool *submitted,
 		.page = page,
 		.encrypted_page = NULL,
 		.submitted = false,
-		.need_lock = true,
+		.need_lock = LOCK_RETRY,
 	};
 
 	trace_f2fs_writepage(page, DATA);
@@ -1494,7 +1507,7 @@ static int __write_data_page(struct page *page, bool *submitted,
 
 	/* Dentry blocks are controlled by checkpoint */
 	if (S_ISDIR(inode->i_mode)) {
-		fio.need_lock = false;
+		fio.need_lock = LOCK_DONE;
 		err = do_write_data_page(&fio);
 		goto done;
 	}
@@ -1513,8 +1526,13 @@ static int __write_data_page(struct page *page, bool *submitted,
 			goto out;
 	}
 
-	if (err == -EAGAIN)
+	if (err == -EAGAIN) {
 		err = do_write_data_page(&fio);
+		if (err == -EAGAIN) {
+			fio.need_lock = LOCK_REQ;
+			err = do_write_data_page(&fio);
+		}
+	}
 	if (F2FS_I(inode)->last_disk_size < psize)
 		F2FS_I(inode)->last_disk_size = psize;
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c6643783adfff..f61e095947c81 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -799,6 +799,12 @@ enum temp_type {
 	NR_TEMP_TYPE,
 };
 
+enum need_lock_type {
+	LOCK_REQ = 0,
+	LOCK_DONE,
+	LOCK_RETRY,
+};
+
 struct f2fs_io_info {
 	struct f2fs_sb_info *sbi;	/* f2fs_sb_info pointer */
 	enum page_type type;	/* contains DATA/NODE/META/META_FLUSH */
@@ -810,7 +816,7 @@ struct f2fs_io_info {
 	struct page *page;	/* page to be written */
 	struct page *encrypted_page;	/* encrypted page */
 	bool submitted;		/* indicate IO submission */
-	bool need_lock;		/* indicate we need to lock cp_rwsem */
+	int need_lock;		/* indicate we need to lock cp_rwsem */
 };
 
 #define is_read_io(rw) ((rw) == READ)
@@ -1279,6 +1285,11 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 	down_read(&sbi->cp_rwsem);
 }
 
+static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
+{
+	return down_read_trylock(&sbi->cp_rwsem);
+}
+
 static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
 	up_read(&sbi->cp_rwsem);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e2b13558a915c..14c71ac760621 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -719,7 +719,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
 			.old_blkaddr = NULL_ADDR,
 			.page = page,
 			.encrypted_page = NULL,
-			.need_lock = true,
+			.need_lock = LOCK_REQ,
 		};
 		bool is_dirty = PageDirty(page);
 		int err;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fcada9d03817b..3bc36769ec9f7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -312,7 +312,7 @@ static int __commit_inmem_pages(struct inode *inode,
 			fio.page = page;
 			fio.old_blkaddr = NULL_ADDR;
 			fio.encrypted_page = NULL;
-			fio.need_lock = false,
+			fio.need_lock = LOCK_DONE;
 			err = do_write_data_page(&fio);
 			if (err) {
 				unlock_page(page);
-- 
GitLab


From bd80a4b9812c0d74ecfc0b1b14ca77732faa2259 Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Wed, 17 May 2017 02:48:48 +0000
Subject: [PATCH 0087/1958] f2fs: declare load_free_nid_bitmap static

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 41bb632ac2e0d..d22db8ce0a692 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2552,7 +2552,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
-inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int i = 0;
-- 
GitLab


From b7b7c4cf1c9ef0272a65f1480457cbfdadcda19d Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Wed, 17 May 2017 17:22:51 +0800
Subject: [PATCH 0088/1958] f2fs: add a new function get_ssr_cost

This patch add a new method get_ssr_cost to select
SSR segment more accurately.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 14c71ac760621..81392970fb2da 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -258,11 +258,20 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
 				valid_blocks * 2 : valid_blocks;
 }
 
+static unsigned int get_ssr_cost(struct f2fs_sb_info *sbi,
+						unsigned int segno)
+{
+	struct seg_entry *se = get_seg_entry(sbi, segno);
+
+	return se->ckpt_valid_blocks > se->valid_blocks ?
+				se->ckpt_valid_blocks : se->valid_blocks;
+}
+
 static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct victim_sel_policy *p)
 {
 	if (p->alloc_mode == SSR)
-		return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+		return get_ssr_cost(sbi, segno);
 
 	/* alloc_mode == LFS */
 	if (p->gc_mode == GC_GREEDY)
-- 
GitLab


From 1d7be2708277edfef95171d52fb65ee26eaa076b Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 17 May 2017 10:36:58 -0700
Subject: [PATCH 0089/1958] f2fs: try to freeze in gc and discard threads

This allows to freeze gc and discard threads.

Cc: stable@vger.kernel.org
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c      |  9 +++++----
 fs/f2fs/segment.c | 25 ++++++++++++++++---------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 81392970fb2da..570480571d722 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -32,13 +32,14 @@ static int gc_thread_func(void *data)
 
 	wait_ms = gc_th->min_sleep_time;
 
+	set_freezable();
 	do {
+		wait_event_interruptible_timeout(*wq,
+				kthread_should_stop() || freezing(current),
+				msecs_to_jiffies(wait_ms));
+
 		if (try_to_freeze())
 			continue;
-		else
-			wait_event_interruptible_timeout(*wq,
-						kthread_should_stop(),
-						msecs_to_jiffies(wait_ms));
 		if (kthread_should_stop())
 			break;
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 3bc36769ec9f7..4591239dbae22 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -16,6 +16,7 @@
 #include <linux/kthread.h>
 #include <linux/swap.h>
 #include <linux/timer.h>
+#include <linux/freezer.h>
 
 #include "f2fs.h"
 #include "segment.h"
@@ -1059,18 +1060,24 @@ static int issue_discard_thread(void *data)
 	struct f2fs_sb_info *sbi = data;
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	wait_queue_head_t *q = &dcc->discard_wait_queue;
-repeat:
-	if (kthread_should_stop())
-		return 0;
 
-	__issue_discard_cmd(sbi, true);
-	__wait_discard_cmd(sbi, true);
+	set_freezable();
 
-	congestion_wait(BLK_RW_SYNC, HZ/50);
+	do {
+		wait_event_interruptible(*q, kthread_should_stop() ||
+					freezing(current) ||
+					atomic_read(&dcc->discard_cmd_cnt));
+		if (try_to_freeze())
+			continue;
+		if (kthread_should_stop())
+			return 0;
 
-	wait_event_interruptible(*q, kthread_should_stop() ||
-				atomic_read(&dcc->discard_cmd_cnt));
-	goto repeat;
+		__issue_discard_cmd(sbi, true);
+		__wait_discard_cmd(sbi, true);
+
+		congestion_wait(BLK_RW_SYNC, HZ/50);
+	} while (!kthread_should_stop());
+	return 0;
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-- 
GitLab


From dad48e73127ba10279ea33e6dbc8d3905c4d31c0 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Fri, 19 May 2017 15:06:12 +0800
Subject: [PATCH 0090/1958] f2fs: fix a bug caused by NULL extent tree

Thread A:					Thread B:

-f2fs_remount
    -sbi->mount_opt.opt = 0;
						<--- -f2fs_iget
						         -do_read_inode
							     -f2fs_init_extent_tree
							         -F2FS_I(inode)->extent_tree is NULL
        -default_options && parse_options
	    -remount return
						<---  -f2fs_map_blocks
						          -f2fs_lookup_extent_tree
                                                              -f2fs_bug_on(sbi, !et);

The same problem with f2fs_new_inode.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 2f98d70397013..ff2352a0ed157 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -320,7 +320,7 @@ static void __drop_largest_extent(struct inode *inode,
 }
 
 /* return true, if inode page is changed */
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct extent_tree *et;
@@ -358,6 +358,16 @@ bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
 	return false;
 }
 
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+{
+	bool ret =  __f2fs_init_extent_tree(inode, i_ext);
+
+	if (!F2FS_I(inode)->extent_tree)
+		set_inode_flag(inode, FI_NO_EXTENT);
+
+	return ret;
+}
+
 static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
 							struct extent_info *ei)
 {
-- 
GitLab


From 963932a93ceb6bdb0d45572056d8daebf2948cd0 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Fri, 19 May 2017 14:42:12 +0800
Subject: [PATCH 0091/1958] f2fs: combine huge num of discard rb tree
 consistence checks

Came across a hungtask caused by huge number of rb tree traversing
during adding discard addrs in cp. This patch combine these consistence
checks and move it to discard thread.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4591239dbae22..7ac3a0c84bfd1 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -838,7 +838,6 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
 		dc->len = blkaddr - dc->lstart;
 		dcc->undiscard_blks += dc->len;
 		__relocate_discard_cmd(dcc, dc);
-		f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
 		modified = true;
 	}
 
@@ -848,16 +847,12 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
 					di.start + blkaddr + 1 - di.lstart,
 					di.lstart + di.len - 1 - blkaddr,
 					NULL, NULL);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 		} else {
 			dc->lstart++;
 			dc->len--;
 			dc->start++;
 			dcc->undiscard_blks += dc->len;
 			__relocate_discard_cmd(dcc, dc);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 		}
 	}
 }
@@ -918,8 +913,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
 			prev_dc->di.len += di.len;
 			dcc->undiscard_blks += di.len;
 			__relocate_discard_cmd(dcc, prev_dc);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 			di = prev_dc->di;
 			tdc = prev_dc;
 			merged = true;
@@ -935,16 +928,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
 			__relocate_discard_cmd(dcc, next_dc);
 			if (tdc)
 				__remove_discard_cmd(sbi, tdc);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 			merged = true;
 		}
 
 		if (!merged) {
 			__insert_discard_tree(sbi, bdev, di.lstart, di.start,
 							di.len, NULL, NULL);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 		}
  next:
 		prev_dc = next_dc;
@@ -983,6 +972,8 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
 	int i, iter = 0;
 
 	mutex_lock(&dcc->cmd_lock);
+	f2fs_bug_on(sbi,
+		!__check_rb_tree_consistence(sbi, &dcc->root));
 	blk_start_plug(&plug);
 	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
 		pend_list = &dcc->pend_list[i];
-- 
GitLab


From e41e6d75e5010741f01f8aa4f77a5e8a1786652d Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 19 May 2017 23:37:00 +0800
Subject: [PATCH 0092/1958] f2fs: split wio_mutex

Split wio_mutex to adjust different temperature bio cache.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    | 3 ++-
 fs/f2fs/segment.c | 4 ++--
 fs/f2fs/super.c   | 7 ++++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f61e095947c81..093d68a7ae478 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -894,7 +894,8 @@ struct f2fs_sb_info {
 
 	/* for bio operations */
 	struct f2fs_bio_info *write_io[NR_PAGE_TYPE];	/* for write bios */
-	struct mutex wio_mutex[NODE + 1];	/* bio ordering for NODE/DATA */
+	struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
+						/* bio ordering for NODE/DATA */
 	int write_io_size_bits;			/* Write IO size bits */
 	mempool_t *write_io_dummy;		/* Dummy pages */
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7ac3a0c84bfd1..63850e023c106 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2154,7 +2154,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 	int err;
 
 	if (fio->type == NODE || fio->type == DATA)
-		mutex_lock(&fio->sbi->wio_mutex[fio->type]);
+		mutex_lock(&fio->sbi->wio_mutex[fio->type][fio->temp]);
 reallocate:
 	allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
 					&fio->new_blkaddr, sum, type);
@@ -2167,7 +2167,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 	}
 
 	if (fio->type == NODE || fio->type == DATA)
-		mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
+		mutex_unlock(&fio->sbi->wio_mutex[fio->type][fio->temp]);
 }
 
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index aa451ec9fb809..b700766d0cbfc 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1571,7 +1571,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 static void init_sb_info(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = sbi->raw_super;
-	int i;
+	int i, j;
 
 	sbi->log_sectors_per_block =
 		le32_to_cpu(raw_super->log_sectors_per_block);
@@ -1603,8 +1603,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&sbi->s_list);
 	mutex_init(&sbi->umount_mutex);
-	mutex_init(&sbi->wio_mutex[NODE]);
-	mutex_init(&sbi->wio_mutex[DATA]);
+	for (i = 0; i < NR_PAGE_TYPE - 1; i++)
+		for (j = HOT; j < NR_TEMP_TYPE; j++)
+			mutex_init(&sbi->wio_mutex[i][j]);
 	spin_lock_init(&sbi->cp_lock);
 }
 
-- 
GitLab


From fb830fc5cfc90ba8236921aacb72c6d70bf78af7 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 19 May 2017 23:37:01 +0800
Subject: [PATCH 0093/1958] f2fs: introduce io_list for serialize data/node IOs

Serialize data/node IOs by using fifo list instead of mutex lock,
it will help to enhance concurrency of f2fs, meanwhile keeping LFS
IO semantics.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  1 +
 fs/f2fs/data.c       | 24 ++++++++++++++++++++----
 fs/f2fs/f2fs.h       |  7 ++++++-
 fs/f2fs/gc.c         |  3 ++-
 fs/f2fs/segment.c    | 22 +++++++++++++++-------
 fs/f2fs/super.c      |  2 ++
 6 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 13828f63a8710..12559a4b6c244 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -162,6 +162,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
 		.op = REQ_OP_READ,
 		.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
 		.encrypted_page = NULL,
+		.in_list = false,
 	};
 	struct blk_plug plug;
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 779a306858a22..2ed90f5db8320 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -397,6 +397,20 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio)
 
 	f2fs_bug_on(sbi, is_read_io(fio->op));
 
+	down_write(&io->io_rwsem);
+next:
+	if (fio->in_list) {
+		spin_lock(&io->io_lock);
+		if (list_empty(&io->io_list)) {
+			spin_unlock(&io->io_lock);
+			goto out_fail;
+		}
+		fio = list_first_entry(&io->io_list,
+						struct f2fs_io_info, list);
+		list_del(&fio->list);
+		spin_unlock(&io->io_lock);
+	}
+
 	if (fio->old_blkaddr != NEW_ADDR)
 		verify_block_addr(sbi, fio->old_blkaddr);
 	verify_block_addr(sbi, fio->new_blkaddr);
@@ -408,8 +422,6 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio)
 
 	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 
-	down_write(&io->io_rwsem);
-
 	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
 	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
 			!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
@@ -434,9 +446,13 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio)
 
 	io->last_block_in_bio = fio->new_blkaddr;
 	f2fs_trace_ios(fio, 0);
+
+	trace_f2fs_submit_page_write(fio->page, fio);
+
+	if (fio->in_list)
+		goto next;
 out_fail:
 	up_write(&io->io_rwsem);
-	trace_f2fs_submit_page_write(fio->page, fio);
 	return err;
 }
 
@@ -749,7 +765,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 
 	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
-						&sum, CURSEG_WARM_DATA);
+					&sum, CURSEG_WARM_DATA, NULL, false);
 	set_data_blkaddr(dn);
 
 	/* update i_size */
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 093d68a7ae478..cd777cf30be2a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -815,8 +815,10 @@ struct f2fs_io_info {
 	block_t old_blkaddr;	/* old block address before Cow */
 	struct page *page;	/* page to be written */
 	struct page *encrypted_page;	/* encrypted page */
+	struct list_head list;		/* serialize IOs */
 	bool submitted;		/* indicate IO submission */
 	int need_lock;		/* indicate we need to lock cp_rwsem */
+	bool in_list;		/* indicate fio is in io_list */
 };
 
 #define is_read_io(rw) ((rw) == READ)
@@ -826,6 +828,8 @@ struct f2fs_bio_info {
 	sector_t last_block_in_bio;	/* last block number */
 	struct f2fs_io_info fio;	/* store buffered io info. */
 	struct rw_semaphore io_rwsem;	/* blocking op for bio */
+	spinlock_t io_lock;		/* serialize DATA/NODE IOs */
+	struct list_head io_list;	/* track fios */
 };
 
 #define FDEV(i)				(sbi->devs[i])
@@ -2294,7 +2298,8 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
 			bool recover_newaddr);
 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 			block_t old_blkaddr, block_t *new_blkaddr,
-			struct f2fs_summary *sum, int type);
+			struct f2fs_summary *sum, int type,
+			struct f2fs_io_info *fio, bool add_list);
 void f2fs_wait_on_page_writeback(struct page *page,
 			enum page_type type, bool ordered);
 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 570480571d722..fa3d2e2df8e70 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -600,6 +600,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 		.op = REQ_OP_READ,
 		.op_flags = 0,
 		.encrypted_page = NULL,
+		.in_list = false,
 	};
 	struct dnode_of_data dn;
 	struct f2fs_summary sum;
@@ -643,7 +644,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 	fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
 
 	allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
-							&sum, CURSEG_COLD_DATA);
+					&sum, CURSEG_COLD_DATA, NULL, false);
 
 	fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
 					FGP_LOCK | FGP_CREAT, GFP_NOFS);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 63850e023c106..a3766bc1ba4d9 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2109,7 +2109,8 @@ static int __get_segment_type(struct f2fs_io_info *fio)
 
 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 		block_t old_blkaddr, block_t *new_blkaddr,
-		struct f2fs_summary *sum, int type)
+		struct f2fs_summary *sum, int type,
+		struct f2fs_io_info *fio, bool add_list)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2145,6 +2146,17 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 	if (page && IS_NODESEG(type))
 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
 
+	if (add_list) {
+		struct f2fs_bio_info *io;
+
+		INIT_LIST_HEAD(&fio->list);
+		fio->in_list = true;
+		io = sbi->write_io[fio->type] + fio->temp;
+		spin_lock(&io->io_lock);
+		list_add_tail(&fio->list, &io->io_list);
+		spin_unlock(&io->io_lock);
+	}
+
 	mutex_unlock(&curseg->curseg_mutex);
 }
 
@@ -2153,11 +2165,9 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 	int type = __get_segment_type(fio);
 	int err;
 
-	if (fio->type == NODE || fio->type == DATA)
-		mutex_lock(&fio->sbi->wio_mutex[fio->type][fio->temp]);
 reallocate:
 	allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
-					&fio->new_blkaddr, sum, type);
+			&fio->new_blkaddr, sum, type, fio, true);
 
 	/* writeout dirty page into bdev */
 	err = f2fs_submit_page_write(fio);
@@ -2165,9 +2175,6 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 		fio->old_blkaddr = fio->new_blkaddr;
 		goto reallocate;
 	}
-
-	if (fio->type == NODE || fio->type == DATA)
-		mutex_unlock(&fio->sbi->wio_mutex[fio->type][fio->temp]);
 }
 
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2181,6 +2188,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
 		.new_blkaddr = page->index,
 		.page = page,
 		.encrypted_page = NULL,
+		.in_list = false,
 	};
 
 	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b700766d0cbfc..d6af34d1e6a83 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1983,6 +1983,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 			init_rwsem(&sbi->write_io[i][j].io_rwsem);
 			sbi->write_io[i][j].sbi = sbi;
 			sbi->write_io[i][j].bio = NULL;
+			spin_lock_init(&sbi->write_io[i][j].io_lock);
+			INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
 		}
 	}
 
-- 
GitLab


From 04dfc23006a200865132ef404778a07b896a0280 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 19 May 2017 23:46:43 +0800
Subject: [PATCH 0094/1958] f2fs: show more info if fail to issue discard

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a3766bc1ba4d9..22cca26990955 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -741,7 +741,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
 
 	if (dc->error)
 		f2fs_msg(sbi->sb, KERN_INFO,
-				"Issue discard failed, ret: %d", dc->error);
+			"Issue discard(%u, %u, %u) failed, ret: %d",
+			dc->lstart, dc->start, dc->len, dc->error);
 	__detach_discard_cmd(dcc, dc);
 }
 
-- 
GitLab


From e31b98215779e66a490471c6ad886ae231316699 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 19 May 2017 23:46:44 +0800
Subject: [PATCH 0095/1958] f2fs: wake up all waiters in
 f2fs_submit_discard_endio

There could be more than one waiter waiting discard IO completion, so we
need use complete_all() instead of complete() in f2fs_submit_discard_endio
to avoid hungtask.

Fixes: 	ec9895add2c5 ("f2fs: don't hold cmd_lock during waiting discard
command")
Cc: <stable@vger.kernel.org>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 22cca26990955..24a2d5ab6f456 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -752,7 +752,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 
 	dc->error = bio->bi_error;
 	dc->state = D_DONE;
-	complete(&dc->wait);
+	complete_all(&dc->wait);
 	bio_put(bio);
 }
 
-- 
GitLab


From 6afae6336ac9c19b5956c003e882b1ee4bca2a9d Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 19 May 2017 23:46:45 +0800
Subject: [PATCH 0096/1958] f2fs: wait discard IO completion without cmd_lock
 held

Wait discard IO completion outside cmd_lock to avoid long latency
of holding cmd_lock in IO busy scenario.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 24a2d5ab6f456..a8f4c81467141 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -997,17 +997,34 @@ static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	struct list_head *wait_list = &(dcc->wait_list);
 	struct discard_cmd *dc, *tmp;
+	bool need_wait;
+
+next:
+	need_wait = false;
 
 	mutex_lock(&dcc->cmd_lock);
 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
-		if (!wait_cond || dc->state == D_DONE) {
-			if (dc->ref)
-				continue;
+		if (!wait_cond || (dc->state == D_DONE && !dc->ref)) {
 			wait_for_completion_io(&dc->wait);
 			__remove_discard_cmd(sbi, dc);
+		} else {
+			dc->ref++;
+			need_wait = true;
+			break;
 		}
 	}
 	mutex_unlock(&dcc->cmd_lock);
+
+	if (need_wait) {
+		wait_for_completion_io(&dc->wait);
+		mutex_lock(&dcc->cmd_lock);
+		f2fs_bug_on(sbi, dc->state != D_DONE);
+		dc->ref--;
+		if (!dc->ref)
+			__remove_discard_cmd(sbi, dc);
+		mutex_unlock(&dcc->cmd_lock);
+		goto next;
+	}
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
-- 
GitLab


From b82a6ea6ec39f1b5ef949a38e334eed0dc29f4d1 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 May 2017 17:39:43 -0700
Subject: [PATCH 0097/1958] f2fs: don't bother checking for encryption key in
 ->mmap()

Since only an open file can be mmap'ed, and we only allow open()ing an
encrypted file when its key is available, there is no need to check for
the key again before permitting each mmap().

This f2fs copy of this code was also broken in that it wouldn't actually
have failed if the key was in fact unavailable.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: David Gstir <david@sigma-star.at>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8ccbfe53c03c2..30fbb4ce6a735 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -415,14 +415,6 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	struct inode *inode = file_inode(file);
 	int err;
 
-	if (f2fs_encrypted_inode(inode)) {
-		err = fscrypt_get_encryption_info(inode);
-		if (err)
-			return 0;
-		if (!f2fs_encrypted_inode(inode))
-			return -ENOKEY;
-	}
-
 	/* we don't need to use inline_data strictly */
 	err = f2fs_convert_inline_inode(inode);
 	if (err)
-- 
GitLab


From aaebdee8b88225ebe28af2afc60446f9fd7228f9 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 May 2017 17:39:45 -0700
Subject: [PATCH 0098/1958] f2fs: don't bother checking for encryption key in
 ->write_iter()

Since only an open file can be written to, and we only allow open()ing
an encrypted file when its key is available, there is no need to check
for the key again before permitting each ->write_iter().

This code was also broken in that it wouldn't actually have failed if
the key was in fact unavailable.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: David Gstir <david@sigma-star.at>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 30fbb4ce6a735..65915b4ce14be 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2340,11 +2340,6 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct blk_plug plug;
 	ssize_t ret;
 
-	if (f2fs_encrypted_inode(inode) &&
-				!fscrypt_has_encryption_key(inode) &&
-				fscrypt_get_encryption_info(inode))
-		return -EACCES;
-
 	inode_lock(inode);
 	ret = generic_write_checks(iocb, from);
 	if (ret > 0) {
-- 
GitLab


From 11e5890b5342c82eefbaa39aec4767ae21ae8803 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 10 Mar 2017 18:52:34 +0100
Subject: [PATCH 0099/1958] rtc: ds1307: convert driver to regmap

This patch converts the ds1307 driver to using regmap. It's a rather
big patch and I can test with DS3231 only. With this chip it's
working fine.

I'd appreciate if people with other supported hardware could test as
well.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 603 ++++++++++++++-------------------------
 1 file changed, 221 insertions(+), 382 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 77339b3d50a1e..94e13d7037358 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -24,6 +24,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/clk-provider.h>
+#include <linux/regmap.h>
 
 /*
  * We can't determine type by probing, but if we expect pre-Linux code
@@ -120,12 +121,11 @@ struct ds1307 {
 	unsigned long		flags;
 #define HAS_NVRAM	0		/* bit 0 == sysfs file active */
 #define HAS_ALARM	1		/* bit 1 == irq claimed */
-	struct i2c_client	*client;
+	struct device		*dev;
+	struct regmap		*regmap;
+	const char		*name;
+	int			irq;
 	struct rtc_device	*rtc;
-	s32 (*read_block_data)(const struct i2c_client *client, u8 command,
-			       u8 length, u8 *values);
-	s32 (*write_block_data)(const struct i2c_client *client, u8 command,
-				u8 length, const u8 *values);
 #ifdef CONFIG_COMMON_CLK
 	struct clk_hw		clks[2];
 #endif
@@ -137,11 +137,11 @@ struct chip_desc {
 	u16			nvram_size;
 	u16			trickle_charger_reg;
 	u8			trickle_charger_setup;
-	u8			(*do_trickle_setup)(struct i2c_client *, uint32_t, bool);
+	u8			(*do_trickle_setup)(struct ds1307 *, uint32_t,
+						    bool);
 };
 
-static u8 do_trickle_setup_ds1339(struct i2c_client *,
-				  uint32_t ohms, bool diode);
+static u8 do_trickle_setup_ds1339(struct ds1307 *, uint32_t ohms, bool diode);
 
 static struct chip_desc chips[last_ds_type] = {
 	[ds_1307] = {
@@ -280,136 +280,6 @@ static const struct acpi_device_id ds1307_acpi_ids[] = {
 MODULE_DEVICE_TABLE(acpi, ds1307_acpi_ids);
 #endif
 
-/*----------------------------------------------------------------------*/
-
-#define BLOCK_DATA_MAX_TRIES 10
-
-static s32 ds1307_read_block_data_once(const struct i2c_client *client,
-				       u8 command, u8 length, u8 *values)
-{
-	s32 i, data;
-
-	for (i = 0; i < length; i++) {
-		data = i2c_smbus_read_byte_data(client, command + i);
-		if (data < 0)
-			return data;
-		values[i] = data;
-	}
-	return i;
-}
-
-static s32 ds1307_read_block_data(const struct i2c_client *client, u8 command,
-				  u8 length, u8 *values)
-{
-	u8 oldvalues[255];
-	s32 ret;
-	int tries = 0;
-
-	dev_dbg(&client->dev, "ds1307_read_block_data (length=%d)\n", length);
-	ret = ds1307_read_block_data_once(client, command, length, values);
-	if (ret < 0)
-		return ret;
-	do {
-		if (++tries > BLOCK_DATA_MAX_TRIES) {
-			dev_err(&client->dev,
-				"ds1307_read_block_data failed\n");
-			return -EIO;
-		}
-		memcpy(oldvalues, values, length);
-		ret = ds1307_read_block_data_once(client, command, length,
-						  values);
-		if (ret < 0)
-			return ret;
-	} while (memcmp(oldvalues, values, length));
-	return length;
-}
-
-static s32 ds1307_write_block_data(const struct i2c_client *client, u8 command,
-				   u8 length, const u8 *values)
-{
-	u8 currvalues[255];
-	int tries = 0;
-
-	dev_dbg(&client->dev, "ds1307_write_block_data (length=%d)\n", length);
-	do {
-		s32 i, ret;
-
-		if (++tries > BLOCK_DATA_MAX_TRIES) {
-			dev_err(&client->dev,
-				"ds1307_write_block_data failed\n");
-			return -EIO;
-		}
-		for (i = 0; i < length; i++) {
-			ret = i2c_smbus_write_byte_data(client, command + i,
-							values[i]);
-			if (ret < 0)
-				return ret;
-		}
-		ret = ds1307_read_block_data_once(client, command, length,
-						  currvalues);
-		if (ret < 0)
-			return ret;
-	} while (memcmp(currvalues, values, length));
-	return length;
-}
-
-/*----------------------------------------------------------------------*/
-
-/* These RTC devices are not designed to be connected to a SMbus adapter.
-   SMbus limits block operations length to 32 bytes, whereas it's not
-   limited on I2C buses. As a result, accesses may exceed 32 bytes;
-   in that case, split them into smaller blocks */
-
-static s32 ds1307_native_smbus_write_block_data(const struct i2c_client *client,
-				u8 command, u8 length, const u8 *values)
-{
-	u8 suboffset = 0;
-
-	if (length <= I2C_SMBUS_BLOCK_MAX) {
-		s32 retval = i2c_smbus_write_i2c_block_data(client,
-					command, length, values);
-		if (retval < 0)
-			return retval;
-		return length;
-	}
-
-	while (suboffset < length) {
-		s32 retval = i2c_smbus_write_i2c_block_data(client,
-				command + suboffset,
-				min(I2C_SMBUS_BLOCK_MAX, length - suboffset),
-				values + suboffset);
-		if (retval < 0)
-			return retval;
-
-		suboffset += I2C_SMBUS_BLOCK_MAX;
-	}
-	return length;
-}
-
-static s32 ds1307_native_smbus_read_block_data(const struct i2c_client *client,
-				u8 command, u8 length, u8 *values)
-{
-	u8 suboffset = 0;
-
-	if (length <= I2C_SMBUS_BLOCK_MAX)
-		return i2c_smbus_read_i2c_block_data(client,
-					command, length, values);
-
-	while (suboffset < length) {
-		s32 retval = i2c_smbus_read_i2c_block_data(client,
-				command + suboffset,
-				min(I2C_SMBUS_BLOCK_MAX, length - suboffset),
-				values + suboffset);
-		if (retval < 0)
-			return retval;
-
-		suboffset += I2C_SMBUS_BLOCK_MAX;
-	}
-	return length;
-}
-
-/*----------------------------------------------------------------------*/
-
 /*
  * The ds1337 and ds1339 both have two alarms, but we only use the first
  * one (with a "seconds" field).  For ds1337 we expect nINTA is our alarm
@@ -417,26 +287,25 @@ static s32 ds1307_native_smbus_read_block_data(const struct i2c_client *client,
  */
 static irqreturn_t ds1307_irq(int irq, void *dev_id)
 {
-	struct i2c_client	*client = dev_id;
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
+	struct ds1307		*ds1307 = dev_id;
 	struct mutex		*lock = &ds1307->rtc->ops_lock;
-	int			stat, control;
+	int			stat, control, ret;
 
 	mutex_lock(lock);
-	stat = i2c_smbus_read_byte_data(client, DS1337_REG_STATUS);
-	if (stat < 0)
+	ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &stat);
+	if (ret)
 		goto out;
 
 	if (stat & DS1337_BIT_A1I) {
 		stat &= ~DS1337_BIT_A1I;
-		i2c_smbus_write_byte_data(client, DS1337_REG_STATUS, stat);
+		regmap_write(ds1307->regmap, DS1337_REG_STATUS, stat);
 
-		control = i2c_smbus_read_byte_data(client, DS1337_REG_CONTROL);
-		if (control < 0)
+		ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
+		if (ret)
 			goto out;
 
 		control &= ~DS1337_BIT_A1IE;
-		i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL, control);
+		regmap_write(ds1307->regmap, DS1337_REG_CONTROL, control);
 
 		rtc_update_irq(ds1307->rtc, 1, RTC_AF | RTC_IRQF);
 	}
@@ -452,14 +321,13 @@ static irqreturn_t ds1307_irq(int irq, void *dev_id)
 static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 {
 	struct ds1307	*ds1307 = dev_get_drvdata(dev);
-	int		tmp;
+	int		tmp, ret;
 
 	/* read the RTC date and time registers all at once */
-	tmp = ds1307->read_block_data(ds1307->client,
-		ds1307->offset, 7, ds1307->regs);
-	if (tmp != 7) {
-		dev_err(dev, "%s error %d\n", "read", tmp);
-		return -EIO;
+	ret = regmap_bulk_read(ds1307->regmap, ds1307->offset, ds1307->regs, 7);
+	if (ret) {
+		dev_err(dev, "%s error %d\n", "read", ret);
+		return ret;
 	}
 
 	dev_dbg(dev, "%s: %7ph\n", "read", ds1307->regs);
@@ -580,9 +448,8 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 
 	dev_dbg(dev, "%s: %7ph\n", "write", buf);
 
-	result = ds1307->write_block_data(ds1307->client,
-		ds1307->offset, 7, buf);
-	if (result < 0) {
+	result = regmap_bulk_write(ds1307->regmap, ds1307->offset, buf, 7);
+	if (result) {
 		dev_err(dev, "%s error %d\n", "write", result);
 		return result;
 	}
@@ -591,19 +458,18 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 
 static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
-	struct i2c_client       *client = to_i2c_client(dev);
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
+	struct ds1307		*ds1307 = dev_get_drvdata(dev);
 	int			ret;
 
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -EINVAL;
 
 	/* read all ALARM1, ALARM2, and status registers at once */
-	ret = ds1307->read_block_data(client,
-			DS1339_REG_ALARM1_SECS, 9, ds1307->regs);
-	if (ret != 9) {
+	ret = regmap_bulk_read(ds1307->regmap, DS1339_REG_ALARM1_SECS,
+			       ds1307->regs, 9);
+	if (ret) {
 		dev_err(dev, "%s error %d\n", "alarm read", ret);
-		return -EIO;
+		return ret;
 	}
 
 	dev_dbg(dev, "%s: %4ph, %3ph, %2ph\n", "alarm read",
@@ -633,8 +499,7 @@ static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
-	struct i2c_client	*client = to_i2c_client(dev);
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
+	struct ds1307		*ds1307 = dev_get_drvdata(dev);
 	unsigned char		*buf = ds1307->regs;
 	u8			control, status;
 	int			ret;
@@ -649,11 +514,10 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 		t->enabled, t->pending);
 
 	/* read current status of both alarms and the chip */
-	ret = ds1307->read_block_data(client,
-			DS1339_REG_ALARM1_SECS, 9, buf);
-	if (ret != 9) {
+	ret = regmap_bulk_read(ds1307->regmap, DS1339_REG_ALARM1_SECS, buf, 9);
+	if (ret) {
 		dev_err(dev, "%s error %d\n", "alarm write", ret);
-		return -EIO;
+		return ret;
 	}
 	control = ds1307->regs[7];
 	status = ds1307->regs[8];
@@ -676,9 +540,8 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	buf[7] = control & ~(DS1337_BIT_A1IE | DS1337_BIT_A2IE);
 	buf[8] = status & ~(DS1337_BIT_A1I | DS1337_BIT_A2I);
 
-	ret = ds1307->write_block_data(client,
-			DS1339_REG_ALARM1_SECS, 9, buf);
-	if (ret < 0) {
+	ret = regmap_bulk_write(ds1307->regmap, DS1339_REG_ALARM1_SECS, buf, 9);
+	if (ret) {
 		dev_err(dev, "can't set alarm time\n");
 		return ret;
 	}
@@ -687,7 +550,7 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	if (t->enabled) {
 		dev_dbg(dev, "alarm IRQ armed\n");
 		buf[7] |= DS1337_BIT_A1IE;	/* only ALARM1 is used */
-		i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL, buf[7]);
+		regmap_write(ds1307->regmap, DS1337_REG_CONTROL, buf[7]);
 	}
 
 	return 0;
@@ -695,27 +558,22 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 static int ds1307_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
-	struct i2c_client	*client = to_i2c_client(dev);
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
-	int			ret;
+	struct ds1307		*ds1307 = dev_get_drvdata(dev);
+	int			control, ret;
 
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -ENOTTY;
 
-	ret = i2c_smbus_read_byte_data(client, DS1337_REG_CONTROL);
-	if (ret < 0)
+	ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
+	if (ret)
 		return ret;
 
 	if (enabled)
-		ret |= DS1337_BIT_A1IE;
+		control |= DS1337_BIT_A1IE;
 	else
-		ret &= ~DS1337_BIT_A1IE;
-
-	ret = i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL, ret);
-	if (ret < 0)
-		return ret;
+		control &= ~DS1337_BIT_A1IE;
 
-	return 0;
+	return regmap_write(ds1307->regmap, DS1337_REG_CONTROL, control);
 }
 
 static const struct rtc_class_ops ds13xx_rtc_ops = {
@@ -752,31 +610,30 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
 
 static irqreturn_t mcp794xx_irq(int irq, void *dev_id)
 {
-	struct i2c_client       *client = dev_id;
-	struct ds1307           *ds1307 = i2c_get_clientdata(client);
+	struct ds1307           *ds1307 = dev_id;
 	struct mutex            *lock = &ds1307->rtc->ops_lock;
 	int reg, ret;
 
 	mutex_lock(lock);
 
 	/* Check and clear alarm 0 interrupt flag. */
-	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_ALARM0_CTRL);
-	if (reg < 0)
+	ret = regmap_read(ds1307->regmap, MCP794XX_REG_ALARM0_CTRL, &reg);
+	if (ret)
 		goto out;
 	if (!(reg & MCP794XX_BIT_ALMX_IF))
 		goto out;
 	reg &= ~MCP794XX_BIT_ALMX_IF;
-	ret = i2c_smbus_write_byte_data(client, MCP794XX_REG_ALARM0_CTRL, reg);
-	if (ret < 0)
+	ret = regmap_write(ds1307->regmap, MCP794XX_REG_ALARM0_CTRL, reg);
+	if (ret)
 		goto out;
 
 	/* Disable alarm 0. */
-	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_CONTROL);
-	if (reg < 0)
+	ret = regmap_read(ds1307->regmap, MCP794XX_REG_CONTROL, &reg);
+	if (ret)
 		goto out;
 	reg &= ~MCP794XX_BIT_ALM0_EN;
-	ret = i2c_smbus_write_byte_data(client, MCP794XX_REG_CONTROL, reg);
-	if (ret < 0)
+	ret = regmap_write(ds1307->regmap, MCP794XX_REG_CONTROL, reg);
+	if (ret)
 		goto out;
 
 	rtc_update_irq(ds1307->rtc, 1, RTC_AF | RTC_IRQF);
@@ -789,8 +646,7 @@ static irqreturn_t mcp794xx_irq(int irq, void *dev_id)
 
 static int mcp794xx_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct ds1307 *ds1307 = i2c_get_clientdata(client);
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
 	u8 *regs = ds1307->regs;
 	int ret;
 
@@ -798,8 +654,8 @@ static int mcp794xx_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 		return -EINVAL;
 
 	/* Read control and alarm 0 registers. */
-	ret = ds1307->read_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
-	if (ret < 0)
+	ret = regmap_bulk_read(ds1307->regmap, MCP794XX_REG_CONTROL, regs, 10);
+	if (ret)
 		return ret;
 
 	t->enabled = !!(regs[0] & MCP794XX_BIT_ALM0_EN);
@@ -828,8 +684,7 @@ static int mcp794xx_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 static int mcp794xx_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct ds1307 *ds1307 = i2c_get_clientdata(client);
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
 	unsigned char *regs = ds1307->regs;
 	int ret;
 
@@ -843,8 +698,8 @@ static int mcp794xx_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 		t->enabled, t->pending);
 
 	/* Read control and alarm 0 registers. */
-	ret = ds1307->read_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
-	if (ret < 0)
+	ret = regmap_bulk_read(ds1307->regmap, MCP794XX_REG_CONTROL, regs, 10);
+	if (ret)
 		return ret;
 
 	/* Set alarm 0, using 24-hour and day-of-month modes. */
@@ -862,35 +717,34 @@ static int mcp794xx_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	/* Disable interrupt. We will not enable until completely programmed */
 	regs[0] &= ~MCP794XX_BIT_ALM0_EN;
 
-	ret = ds1307->write_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
-	if (ret < 0)
+	ret = regmap_bulk_write(ds1307->regmap, MCP794XX_REG_CONTROL, regs, 10);
+	if (ret)
 		return ret;
 
 	if (!t->enabled)
 		return 0;
 	regs[0] |= MCP794XX_BIT_ALM0_EN;
-	return i2c_smbus_write_byte_data(client, MCP794XX_REG_CONTROL, regs[0]);
+	return regmap_write(ds1307->regmap, MCP794XX_REG_CONTROL, regs[0]);
 }
 
 static int mcp794xx_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct ds1307 *ds1307 = i2c_get_clientdata(client);
-	int reg;
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
+	int reg, ret;
 
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -EINVAL;
 
-	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_CONTROL);
-	if (reg < 0)
-		return reg;
+	ret = regmap_read(ds1307->regmap, MCP794XX_REG_CONTROL, &reg);
+	if (ret)
+		return ret;
 
 	if (enabled)
 		reg |= MCP794XX_BIT_ALM0_EN;
 	else
 		reg &= ~MCP794XX_BIT_ALM0_EN;
 
-	return i2c_smbus_write_byte_data(client, MCP794XX_REG_CONTROL, reg);
+	return regmap_write(ds1307->regmap, MCP794XX_REG_CONTROL, reg);
 }
 
 static const struct rtc_class_ops mcp794xx_rtc_ops = {
@@ -908,17 +762,15 @@ ds1307_nvram_read(struct file *filp, struct kobject *kobj,
 		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
-	struct i2c_client	*client;
 	struct ds1307		*ds1307;
 	int			result;
 
-	client = kobj_to_i2c_client(kobj);
-	ds1307 = i2c_get_clientdata(client);
+	ds1307 = dev_get_drvdata(kobj_to_dev(kobj));
 
-	result = ds1307->read_block_data(client, ds1307->nvram_offset + off,
-								count, buf);
-	if (result < 0)
-		dev_err(&client->dev, "%s error %d\n", "nvram read", result);
+	result = regmap_bulk_read(ds1307->regmap, ds1307->nvram_offset + off,
+				  buf, count);
+	if (result)
+		dev_err(ds1307->dev, "%s error %d\n", "nvram read", result);
 	return result;
 }
 
@@ -927,17 +779,15 @@ ds1307_nvram_write(struct file *filp, struct kobject *kobj,
 		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
-	struct i2c_client	*client;
 	struct ds1307		*ds1307;
 	int			result;
 
-	client = kobj_to_i2c_client(kobj);
-	ds1307 = i2c_get_clientdata(client);
+	ds1307 = dev_get_drvdata(kobj_to_dev(kobj));
 
-	result = ds1307->write_block_data(client, ds1307->nvram_offset + off,
-								count, buf);
-	if (result < 0) {
-		dev_err(&client->dev, "%s error %d\n", "nvram write", result);
+	result = regmap_bulk_write(ds1307->regmap, ds1307->nvram_offset + off,
+				   buf, count);
+	if (result) {
+		dev_err(ds1307->dev, "%s error %d\n", "nvram write", result);
 		return result;
 	}
 	return count;
@@ -946,7 +796,7 @@ ds1307_nvram_write(struct file *filp, struct kobject *kobj,
 
 /*----------------------------------------------------------------------*/
 
-static u8 do_trickle_setup_ds1339(struct i2c_client *client,
+static u8 do_trickle_setup_ds1339(struct ds1307 *ds1307,
 				  uint32_t ohms, bool diode)
 {
 	u8 setup = (diode) ? DS1307_TRICKLE_CHARGER_DIODE :
@@ -963,14 +813,14 @@ static u8 do_trickle_setup_ds1339(struct i2c_client *client,
 		setup |= DS1307_TRICKLE_CHARGER_4K_OHM;
 		break;
 	default:
-		dev_warn(&client->dev,
+		dev_warn(ds1307->dev,
 			 "Unsupported ohm value %u in dt\n", ohms);
 		return 0;
 	}
 	return setup;
 }
 
-static void ds1307_trickle_init(struct i2c_client *client,
+static void ds1307_trickle_init(struct ds1307 *ds1307,
 				struct chip_desc *chip)
 {
 	uint32_t ohms = 0;
@@ -978,11 +828,12 @@ static void ds1307_trickle_init(struct i2c_client *client,
 
 	if (!chip->do_trickle_setup)
 		goto out;
-	if (device_property_read_u32(&client->dev, "trickle-resistor-ohms", &ohms))
+	if (device_property_read_u32(ds1307->dev, "trickle-resistor-ohms",
+				     &ohms))
 		goto out;
-	if (device_property_read_bool(&client->dev, "trickle-diode-disable"))
+	if (device_property_read_bool(ds1307->dev, "trickle-diode-disable"))
 		diode = false;
-	chip->trickle_charger_setup = chip->do_trickle_setup(client,
+	chip->trickle_charger_setup = chip->do_trickle_setup(ds1307,
 							     ohms, diode);
 out:
 	return;
@@ -1009,13 +860,10 @@ static int ds3231_hwmon_read_temp(struct device *dev, s32 *mC)
 	s16 temp;
 	int ret;
 
-	ret = ds1307->read_block_data(ds1307->client, DS3231_REG_TEMPERATURE,
-					sizeof(temp_buf), temp_buf);
-	if (ret < 0)
+	ret = regmap_bulk_read(ds1307->regmap, DS3231_REG_TEMPERATURE,
+			       temp_buf, sizeof(temp_buf));
+	if (ret)
 		return ret;
-	if (ret != sizeof(temp_buf))
-		return -EIO;
-
 	/*
 	 * Temperature is represented as a 10-bit code with a resolution of
 	 * 0.25 degree celsius and encoded in two's complement format.
@@ -1055,12 +903,11 @@ static void ds1307_hwmon_register(struct ds1307 *ds1307)
 	if (ds1307->type != ds_3231)
 		return;
 
-	dev = devm_hwmon_device_register_with_groups(&ds1307->client->dev,
-						ds1307->client->name,
+	dev = devm_hwmon_device_register_with_groups(ds1307->dev, ds1307->name,
 						ds1307, ds3231_hwmon_groups);
 	if (IS_ERR(dev)) {
-		dev_warn(&ds1307->client->dev,
-			"unable to register hwmon device %ld\n", PTR_ERR(dev));
+		dev_warn(ds1307->dev, "unable to register hwmon device %ld\n",
+			 PTR_ERR(dev));
 	}
 }
 
@@ -1099,23 +946,20 @@ static int ds3231_clk_sqw_rates[] = {
 
 static int ds1337_write_control(struct ds1307 *ds1307, u8 mask, u8 value)
 {
-	struct i2c_client *client = ds1307->client;
 	struct mutex *lock = &ds1307->rtc->ops_lock;
 	int control;
 	int ret;
 
 	mutex_lock(lock);
 
-	control = i2c_smbus_read_byte_data(client, DS1337_REG_CONTROL);
-	if (control < 0) {
-		ret = control;
+	ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
+	if (ret)
 		goto out;
-	}
 
 	control &= ~mask;
 	control |= value;
 
-	ret = i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL, control);
+	ret = regmap_write(ds1307->regmap, DS1337_REG_CONTROL, control);
 out:
 	mutex_unlock(lock);
 
@@ -1126,12 +970,12 @@ static unsigned long ds3231_clk_sqw_recalc_rate(struct clk_hw *hw,
 						unsigned long parent_rate)
 {
 	struct ds1307 *ds1307 = clk_sqw_to_ds1307(hw);
-	int control;
+	int control, ret;
 	int rate_sel = 0;
 
-	control = i2c_smbus_read_byte_data(ds1307->client, DS1337_REG_CONTROL);
-	if (control < 0)
-		return control;
+	ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
+	if (ret)
+		return ret;
 	if (control & DS1337_BIT_RS1)
 		rate_sel += 1;
 	if (control & DS1337_BIT_RS2)
@@ -1195,11 +1039,11 @@ static void ds3231_clk_sqw_unprepare(struct clk_hw *hw)
 static int ds3231_clk_sqw_is_prepared(struct clk_hw *hw)
 {
 	struct ds1307 *ds1307 = clk_sqw_to_ds1307(hw);
-	int control;
+	int control, ret;
 
-	control = i2c_smbus_read_byte_data(ds1307->client, DS1337_REG_CONTROL);
-	if (control < 0)
-		return control;
+	ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
+	if (ret)
+		return ret;
 
 	return !(control & DS1337_BIT_INTCN);
 }
@@ -1221,25 +1065,22 @@ static unsigned long ds3231_clk_32khz_recalc_rate(struct clk_hw *hw,
 
 static int ds3231_clk_32khz_control(struct ds1307 *ds1307, bool enable)
 {
-	struct i2c_client *client = ds1307->client;
 	struct mutex *lock = &ds1307->rtc->ops_lock;
 	int status;
 	int ret;
 
 	mutex_lock(lock);
 
-	status = i2c_smbus_read_byte_data(client, DS1337_REG_STATUS);
-	if (status < 0) {
-		ret = status;
+	ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &status);
+	if (ret)
 		goto out;
-	}
 
 	if (enable)
 		status |= DS3231_BIT_EN32KHZ;
 	else
 		status &= ~DS3231_BIT_EN32KHZ;
 
-	ret = i2c_smbus_write_byte_data(client, DS1337_REG_STATUS, status);
+	ret = regmap_write(ds1307->regmap, DS1337_REG_STATUS, status);
 out:
 	mutex_unlock(lock);
 
@@ -1263,11 +1104,11 @@ static void ds3231_clk_32khz_unprepare(struct clk_hw *hw)
 static int ds3231_clk_32khz_is_prepared(struct clk_hw *hw)
 {
 	struct ds1307 *ds1307 = clk_32khz_to_ds1307(hw);
-	int status;
+	int status, ret;
 
-	status = i2c_smbus_read_byte_data(ds1307->client, DS1337_REG_STATUS);
-	if (status < 0)
-		return status;
+	ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &status);
+	if (ret)
+		return ret;
 
 	return !!(status & DS3231_BIT_EN32KHZ);
 }
@@ -1292,18 +1133,17 @@ static struct clk_init_data ds3231_clks_init[] = {
 
 static int ds3231_clks_register(struct ds1307 *ds1307)
 {
-	struct i2c_client *client = ds1307->client;
-	struct device_node *node = client->dev.of_node;
+	struct device_node *node = ds1307->dev->of_node;
 	struct clk_onecell_data	*onecell;
 	int i;
 
-	onecell = devm_kzalloc(&client->dev, sizeof(*onecell), GFP_KERNEL);
+	onecell = devm_kzalloc(ds1307->dev, sizeof(*onecell), GFP_KERNEL);
 	if (!onecell)
 		return -ENOMEM;
 
 	onecell->clk_num = ARRAY_SIZE(ds3231_clks_init);
-	onecell->clks = devm_kcalloc(&client->dev, onecell->clk_num,
-					sizeof(onecell->clks[0]), GFP_KERNEL);
+	onecell->clks = devm_kcalloc(ds1307->dev, onecell->clk_num,
+				     sizeof(onecell->clks[0]), GFP_KERNEL);
 	if (!onecell->clks)
 		return -ENOMEM;
 
@@ -1322,8 +1162,8 @@ static int ds3231_clks_register(struct ds1307 *ds1307)
 						&init.name);
 		ds1307->clks[i].init = &init;
 
-		onecell->clks[i] = devm_clk_register(&client->dev,
-							&ds1307->clks[i]);
+		onecell->clks[i] = devm_clk_register(ds1307->dev,
+						     &ds1307->clks[i]);
 		if (IS_ERR(onecell->clks[i]))
 			return PTR_ERR(onecell->clks[i]);
 	}
@@ -1345,8 +1185,8 @@ static void ds1307_clks_register(struct ds1307 *ds1307)
 
 	ret = ds3231_clks_register(ds1307);
 	if (ret) {
-		dev_warn(&ds1307->client->dev,
-			"unable to register clock device %d\n", ret);
+		dev_warn(ds1307->dev, "unable to register clock device %d\n",
+			 ret);
 	}
 }
 
@@ -1358,6 +1198,12 @@ static void ds1307_clks_register(struct ds1307 *ds1307)
 
 #endif /* CONFIG_COMMON_CLK */
 
+static const struct regmap_config regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0x12,
+};
+
 static int ds1307_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
@@ -1365,7 +1211,6 @@ static int ds1307_probe(struct i2c_client *client,
 	int			err = -ENODEV;
 	int			tmp, wday;
 	struct chip_desc	*chip;
-	struct i2c_adapter	*adapter = to_i2c_adapter(client->dev.parent);
 	bool			want_irq = false;
 	bool			ds1307_can_wakeup_device = false;
 	unsigned char		*buf;
@@ -1382,17 +1227,22 @@ static int ds1307_probe(struct i2c_client *client,
 	};
 	const struct rtc_class_ops *rtc_ops = &ds13xx_rtc_ops;
 
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)
-	    && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK))
-		return -EIO;
-
 	ds1307 = devm_kzalloc(&client->dev, sizeof(struct ds1307), GFP_KERNEL);
 	if (!ds1307)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, ds1307);
+	dev_set_drvdata(&client->dev, ds1307);
+	ds1307->dev = &client->dev;
+	ds1307->name = client->name;
+	ds1307->irq = client->irq;
+
+	ds1307->regmap = devm_regmap_init_i2c(client, &regmap_config);
+	if (IS_ERR(ds1307->regmap)) {
+		dev_err(ds1307->dev, "regmap allocation failed\n");
+		return PTR_ERR(ds1307->regmap);
+	}
 
-	ds1307->client	= client;
+	i2c_set_clientdata(client, ds1307);
 
 	if (client->dev.of_node) {
 		ds1307->type = (enum ds_type)
@@ -1405,7 +1255,7 @@ static int ds1307_probe(struct i2c_client *client,
 		const struct acpi_device_id *acpi_id;
 
 		acpi_id = acpi_match_device(ACPI_PTR(ds1307_acpi_ids),
-					    &client->dev);
+					    ds1307->dev);
 		if (!acpi_id)
 			return -ENODEV;
 		chip = &chips[acpi_id->driver_data];
@@ -1413,27 +1263,21 @@ static int ds1307_probe(struct i2c_client *client,
 	}
 
 	if (!pdata)
-		ds1307_trickle_init(client, chip);
+		ds1307_trickle_init(ds1307, chip);
 	else if (pdata->trickle_charger_setup)
 		chip->trickle_charger_setup = pdata->trickle_charger_setup;
 
 	if (chip->trickle_charger_setup && chip->trickle_charger_reg) {
-		dev_dbg(&client->dev, "writing trickle charger info 0x%x to 0x%x\n",
+		dev_dbg(ds1307->dev,
+			"writing trickle charger info 0x%x to 0x%x\n",
 		    DS13XX_TRICKLE_CHARGER_MAGIC | chip->trickle_charger_setup,
 		    chip->trickle_charger_reg);
-		i2c_smbus_write_byte_data(client, chip->trickle_charger_reg,
+		regmap_write(ds1307->regmap, chip->trickle_charger_reg,
 		    DS13XX_TRICKLE_CHARGER_MAGIC |
 		    chip->trickle_charger_setup);
 	}
 
 	buf = ds1307->regs;
-	if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) {
-		ds1307->read_block_data = ds1307_native_smbus_read_block_data;
-		ds1307->write_block_data = ds1307_native_smbus_write_block_data;
-	} else {
-		ds1307->read_block_data = ds1307_read_block_data;
-		ds1307->write_block_data = ds1307_write_block_data;
-	}
 
 #ifdef CONFIG_OF
 /*
@@ -1459,11 +1303,10 @@ static int ds1307_probe(struct i2c_client *client,
 	case ds_1339:
 	case ds_3231:
 		/* get registers that the "rtc" read below won't read... */
-		tmp = ds1307->read_block_data(ds1307->client,
-				DS1337_REG_CONTROL, 2, buf);
-		if (tmp != 2) {
-			dev_dbg(&client->dev, "read error %d\n", tmp);
-			err = -EIO;
+		err = regmap_bulk_read(ds1307->regmap, DS1337_REG_CONTROL,
+				       buf, 2);
+		if (err) {
+			dev_dbg(ds1307->dev, "read error %d\n", err);
 			goto exit;
 		}
 
@@ -1477,8 +1320,8 @@ static int ds1307_probe(struct i2c_client *client,
 		 * For some variants, be sure alarms can trigger when we're
 		 * running on Vbackup (BBSQI/BBSQW)
 		 */
-		if (chip->alarm && (ds1307->client->irq > 0 ||
-						ds1307_can_wakeup_device)) {
+		if (chip->alarm && (ds1307->irq > 0 ||
+				    ds1307_can_wakeup_device)) {
 			ds1307->regs[0] |= DS1337_BIT_INTCN
 					| bbsqi_bitpos[ds1307->type];
 			ds1307->regs[0] &= ~(DS1337_BIT_A2IE | DS1337_BIT_A1IE);
@@ -1486,50 +1329,49 @@ static int ds1307_probe(struct i2c_client *client,
 			want_irq = true;
 		}
 
-		i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL,
-							ds1307->regs[0]);
+		regmap_write(ds1307->regmap, DS1337_REG_CONTROL,
+			     ds1307->regs[0]);
 
 		/* oscillator fault?  clear flag, and warn */
 		if (ds1307->regs[1] & DS1337_BIT_OSF) {
-			i2c_smbus_write_byte_data(client, DS1337_REG_STATUS,
-				ds1307->regs[1] & ~DS1337_BIT_OSF);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1337_REG_STATUS,
+				     ds1307->regs[1] & ~DS1337_BIT_OSF);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 		}
 		break;
 
 	case rx_8025:
-		tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
-				RX8025_REG_CTRL1 << 4 | 0x08, 2, buf);
-		if (tmp != 2) {
-			dev_dbg(&client->dev, "read error %d\n", tmp);
-			err = -EIO;
+		err = regmap_bulk_read(ds1307->regmap,
+				       RX8025_REG_CTRL1 << 4 | 0x08, buf, 2);
+		if (err) {
+			dev_dbg(ds1307->dev, "read error %d\n", err);
 			goto exit;
 		}
 
 		/* oscillator off?  turn it on, so clock can tick. */
 		if (!(ds1307->regs[1] & RX8025_BIT_XST)) {
 			ds1307->regs[1] |= RX8025_BIT_XST;
-			i2c_smbus_write_byte_data(client,
-						  RX8025_REG_CTRL2 << 4 | 0x08,
-						  ds1307->regs[1]);
-			dev_warn(&client->dev,
+			regmap_write(ds1307->regmap,
+				     RX8025_REG_CTRL2 << 4 | 0x08,
+				     ds1307->regs[1]);
+			dev_warn(ds1307->dev,
 				 "oscillator stop detected - SET TIME!\n");
 		}
 
 		if (ds1307->regs[1] & RX8025_BIT_PON) {
 			ds1307->regs[1] &= ~RX8025_BIT_PON;
-			i2c_smbus_write_byte_data(client,
-						  RX8025_REG_CTRL2 << 4 | 0x08,
-						  ds1307->regs[1]);
-			dev_warn(&client->dev, "power-on detected\n");
+			regmap_write(ds1307->regmap,
+				     RX8025_REG_CTRL2 << 4 | 0x08,
+				     ds1307->regs[1]);
+			dev_warn(ds1307->dev, "power-on detected\n");
 		}
 
 		if (ds1307->regs[1] & RX8025_BIT_VDET) {
 			ds1307->regs[1] &= ~RX8025_BIT_VDET;
-			i2c_smbus_write_byte_data(client,
-						  RX8025_REG_CTRL2 << 4 | 0x08,
-						  ds1307->regs[1]);
-			dev_warn(&client->dev, "voltage drop detected\n");
+			regmap_write(ds1307->regmap,
+				     RX8025_REG_CTRL2 << 4 | 0x08,
+				     ds1307->regs[1]);
+			dev_warn(ds1307->dev, "voltage drop detected\n");
 		}
 
 		/* make sure we are running in 24hour mode */
@@ -1537,16 +1379,15 @@ static int ds1307_probe(struct i2c_client *client,
 			u8 hour;
 
 			/* switch to 24 hour mode */
-			i2c_smbus_write_byte_data(client,
-						  RX8025_REG_CTRL1 << 4 | 0x08,
-						  ds1307->regs[0] |
-						  RX8025_BIT_2412);
-
-			tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
-					RX8025_REG_CTRL1 << 4 | 0x08, 2, buf);
-			if (tmp != 2) {
-				dev_dbg(&client->dev, "read error %d\n", tmp);
-				err = -EIO;
+			regmap_write(ds1307->regmap,
+				     RX8025_REG_CTRL1 << 4 | 0x08,
+				     ds1307->regs[0] | RX8025_BIT_2412);
+
+			err = regmap_bulk_read(ds1307->regmap,
+					       RX8025_REG_CTRL1 << 4 | 0x08,
+					       buf, 2);
+			if (err) {
+				dev_dbg(ds1307->dev, "read error %d\n", err);
 				goto exit;
 			}
 
@@ -1557,9 +1398,8 @@ static int ds1307_probe(struct i2c_client *client,
 			if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
 				hour += 12;
 
-			i2c_smbus_write_byte_data(client,
-						  DS1307_REG_HOUR << 4 | 0x08,
-						  hour);
+			regmap_write(ds1307->regmap,
+				     DS1307_REG_HOUR << 4 | 0x08, hour);
 		}
 		break;
 	case ds_1388:
@@ -1567,7 +1407,7 @@ static int ds1307_probe(struct i2c_client *client,
 		break;
 	case mcp794xx:
 		rtc_ops = &mcp794xx_rtc_ops;
-		if (ds1307->client->irq > 0 && chip->alarm) {
+		if (ds1307->irq > 0 && chip->alarm) {
 			irq_handler = mcp794xx_irq;
 			want_irq = true;
 		}
@@ -1578,10 +1418,9 @@ static int ds1307_probe(struct i2c_client *client,
 
 read_rtc:
 	/* read RTC registers */
-	tmp = ds1307->read_block_data(ds1307->client, ds1307->offset, 8, buf);
-	if (tmp != 8) {
-		dev_dbg(&client->dev, "read error %d\n", tmp);
-		err = -EIO;
+	err = regmap_bulk_read(ds1307->regmap, ds1307->offset, buf, 8);
+	if (err) {
+		dev_dbg(ds1307->dev, "read error %d\n", err);
 		goto exit;
 	}
 
@@ -1597,56 +1436,55 @@ static int ds1307_probe(struct i2c_client *client,
 	case m41t00:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1307_BIT_CH) {
-			i2c_smbus_write_byte_data(client, DS1307_REG_SECS, 0);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1307_REG_SECS, 0);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 			goto read_rtc;
 		}
 		break;
 	case ds_1338:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1307_BIT_CH)
-			i2c_smbus_write_byte_data(client, DS1307_REG_SECS, 0);
+			regmap_write(ds1307->regmap, DS1307_REG_SECS, 0);
 
 		/* oscillator fault?  clear flag, and warn */
 		if (ds1307->regs[DS1307_REG_CONTROL] & DS1338_BIT_OSF) {
-			i2c_smbus_write_byte_data(client, DS1307_REG_CONTROL,
-					ds1307->regs[DS1307_REG_CONTROL]
-					& ~DS1338_BIT_OSF);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1307_REG_CONTROL,
+					ds1307->regs[DS1307_REG_CONTROL] &
+					~DS1338_BIT_OSF);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 			goto read_rtc;
 		}
 		break;
 	case ds_1340:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1340_BIT_nEOSC)
-			i2c_smbus_write_byte_data(client, DS1307_REG_SECS, 0);
+			regmap_write(ds1307->regmap, DS1307_REG_SECS, 0);
 
-		tmp = i2c_smbus_read_byte_data(client, DS1340_REG_FLAG);
-		if (tmp < 0) {
-			dev_dbg(&client->dev, "read error %d\n", tmp);
-			err = -EIO;
+		err = regmap_read(ds1307->regmap, DS1340_REG_FLAG, &tmp);
+		if (err) {
+			dev_dbg(ds1307->dev, "read error %d\n", err);
 			goto exit;
 		}
 
 		/* oscillator fault?  clear flag, and warn */
 		if (tmp & DS1340_BIT_OSF) {
-			i2c_smbus_write_byte_data(client, DS1340_REG_FLAG, 0);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1340_REG_FLAG, 0);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 		}
 		break;
 	case mcp794xx:
 		/* make sure that the backup battery is enabled */
 		if (!(ds1307->regs[DS1307_REG_WDAY] & MCP794XX_BIT_VBATEN)) {
-			i2c_smbus_write_byte_data(client, DS1307_REG_WDAY,
-					ds1307->regs[DS1307_REG_WDAY]
-					| MCP794XX_BIT_VBATEN);
+			regmap_write(ds1307->regmap, DS1307_REG_WDAY,
+				     ds1307->regs[DS1307_REG_WDAY] |
+				     MCP794XX_BIT_VBATEN);
 		}
 
 		/* clock halted?  turn it on, so clock can tick. */
 		if (!(tmp & MCP794XX_BIT_ST)) {
-			i2c_smbus_write_byte_data(client, DS1307_REG_SECS,
-					MCP794XX_BIT_ST);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1307_REG_SECS,
+				     MCP794XX_BIT_ST);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 			goto read_rtc;
 		}
 
@@ -1680,16 +1518,15 @@ static int ds1307_probe(struct i2c_client *client,
 			tmp = 0;
 		if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
 			tmp += 12;
-		i2c_smbus_write_byte_data(client,
-				ds1307->offset + DS1307_REG_HOUR,
-				bin2bcd(tmp));
+		regmap_write(ds1307->regmap, ds1307->offset + DS1307_REG_HOUR,
+			     bin2bcd(tmp));
 	}
 
 	/*
 	 * Some IPs have weekday reset value = 0x1 which might not correct
 	 * hence compute the wday using the current date/month/year values
 	 */
-	ds1307_get_time(&client->dev, &tm);
+	ds1307_get_time(ds1307->dev, &tm);
 	wday = tm.tm_wday;
 	timestamp = rtc_tm_to_time64(&tm);
 	rtc_time64_to_tm(timestamp, &tm);
@@ -1700,51 +1537,53 @@ static int ds1307_probe(struct i2c_client *client,
 	 * timestamp
 	 */
 	if (wday != tm.tm_wday) {
-		wday = i2c_smbus_read_byte_data(client, MCP794XX_REG_WEEKDAY);
+		regmap_read(ds1307->regmap, MCP794XX_REG_WEEKDAY, &wday);
 		wday = wday & ~MCP794XX_REG_WEEKDAY_WDAY_MASK;
 		wday = wday | (tm.tm_wday + 1);
-		i2c_smbus_write_byte_data(client, MCP794XX_REG_WEEKDAY, wday);
+		regmap_write(ds1307->regmap, MCP794XX_REG_WEEKDAY, wday);
 	}
 
 	if (want_irq) {
-		device_set_wakeup_capable(&client->dev, true);
+		device_set_wakeup_capable(ds1307->dev, true);
 		set_bit(HAS_ALARM, &ds1307->flags);
 	}
-	ds1307->rtc = devm_rtc_device_register(&client->dev, client->name,
+	ds1307->rtc = devm_rtc_device_register(ds1307->dev, ds1307->name,
 				rtc_ops, THIS_MODULE);
 	if (IS_ERR(ds1307->rtc)) {
 		return PTR_ERR(ds1307->rtc);
 	}
 
-	if (ds1307_can_wakeup_device && ds1307->client->irq <= 0) {
+	if (ds1307_can_wakeup_device && ds1307->irq <= 0) {
 		/* Disable request for an IRQ */
 		want_irq = false;
-		dev_info(&client->dev, "'wakeup-source' is set, request for an IRQ is disabled!\n");
+		dev_info(ds1307->dev,
+			 "'wakeup-source' is set, request for an IRQ is disabled!\n");
 		/* We cannot support UIE mode if we do not have an IRQ line */
 		ds1307->rtc->uie_unsupported = 1;
 	}
 
 	if (want_irq) {
-		err = devm_request_threaded_irq(&client->dev,
-						client->irq, NULL, irq_handler,
+		err = devm_request_threaded_irq(ds1307->dev,
+						ds1307->irq, NULL, irq_handler,
 						IRQF_SHARED | IRQF_ONESHOT,
-						ds1307->rtc->name, client);
+						ds1307->rtc->name, ds1307);
 		if (err) {
 			client->irq = 0;
-			device_set_wakeup_capable(&client->dev, false);
+			device_set_wakeup_capable(ds1307->dev, false);
 			clear_bit(HAS_ALARM, &ds1307->flags);
-			dev_err(&client->dev, "unable to request IRQ!\n");
+			dev_err(ds1307->dev, "unable to request IRQ!\n");
 		} else
-			dev_dbg(&client->dev, "got IRQ %d\n", client->irq);
+			dev_dbg(ds1307->dev, "got IRQ %d\n", client->irq);
 	}
 
 	if (chip->nvram_size) {
 
-		ds1307->nvram = devm_kzalloc(&client->dev,
+		ds1307->nvram = devm_kzalloc(ds1307->dev,
 					sizeof(struct bin_attribute),
 					GFP_KERNEL);
 		if (!ds1307->nvram) {
-			dev_err(&client->dev, "cannot allocate memory for nvram sysfs\n");
+			dev_err(ds1307->dev,
+				"cannot allocate memory for nvram sysfs\n");
 		} else {
 
 			ds1307->nvram->attr.name = "nvram";
@@ -1757,15 +1596,15 @@ static int ds1307_probe(struct i2c_client *client,
 			ds1307->nvram->size = chip->nvram_size;
 			ds1307->nvram_offset = chip->nvram_offset;
 
-			err = sysfs_create_bin_file(&client->dev.kobj,
+			err = sysfs_create_bin_file(&ds1307->dev->kobj,
 						    ds1307->nvram);
 			if (err) {
-				dev_err(&client->dev,
+				dev_err(ds1307->dev,
 					"unable to create sysfs file: %s\n",
 					ds1307->nvram->attr.name);
 			} else {
 				set_bit(HAS_NVRAM, &ds1307->flags);
-				dev_info(&client->dev, "%zu bytes nvram\n",
+				dev_info(ds1307->dev, "%zu bytes nvram\n",
 					 ds1307->nvram->size);
 			}
 		}
@@ -1785,7 +1624,7 @@ static int ds1307_remove(struct i2c_client *client)
 	struct ds1307 *ds1307 = i2c_get_clientdata(client);
 
 	if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags))
-		sysfs_remove_bin_file(&client->dev.kobj, ds1307->nvram);
+		sysfs_remove_bin_file(&ds1307->dev->kobj, ds1307->nvram);
 
 	return 0;
 }
-- 
GitLab


From 289d72afddf83440117c35d864bf0c6309c1d011 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:08 +0530
Subject: [PATCH 0100/1958] thermal: cpu_cooling: Avoid accessing potentially
 freed structures

After the lock is dropped, it is possible that the cpufreq_dev gets
freed before we call get_level() and that can cause kernel to crash.

Drop the lock after we are done using the structure.

Cc: 4.2+ <stable@vger.kernel.org> # 4.2+
Fixes: 02373d7c69b4 ("thermal: cpu_cooling: fix lockdep problems in cpu_cooling")
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 69d0f430b2d19..be29489dd2475 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -153,8 +153,10 @@ unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
 	mutex_lock(&cooling_list_lock);
 	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
 		if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
+			unsigned long level = get_level(cpufreq_dev, freq);
+
 			mutex_unlock(&cooling_list_lock);
-			return get_level(cpufreq_dev, freq);
+			return level;
 		}
 	}
 	mutex_unlock(&cooling_list_lock);
-- 
GitLab


From fb8ea3082169612933b5cd14f30415e81f926f7b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:09 +0530
Subject: [PATCH 0101/1958] thermal: cpu_cooling: rearrange globals

Just to make it look better.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index be29489dd2475..ce94aafed25db 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -105,8 +105,8 @@ struct cpufreq_cooling_device {
 	struct device *cpu_dev;
 	get_static_t plat_get_static_power;
 };
-static DEFINE_IDA(cpufreq_ida);
 
+static DEFINE_IDA(cpufreq_ida);
 static DEFINE_MUTEX(cooling_list_lock);
 static LIST_HEAD(cpufreq_dev_list);
 
-- 
GitLab


From 1dea432a671aa87068c28194a6a602a6f358f749 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:10 +0530
Subject: [PATCH 0102/1958] thermal: cpu_cooling: Name cpufreq cooling devices
 as cpufreq_cdev

Objects of "struct cpufreq_cooling_device" are named a bit
inconsistently. Lets use cpufreq_cdev everywhere. Also note that the
lists containing such devices is renamed similarly too.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 248 +++++++++++++++++-----------------
 1 file changed, 124 insertions(+), 124 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index ce94aafed25db..80a46a80817b0 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -108,27 +108,27 @@ struct cpufreq_cooling_device {
 
 static DEFINE_IDA(cpufreq_ida);
 static DEFINE_MUTEX(cooling_list_lock);
-static LIST_HEAD(cpufreq_dev_list);
+static LIST_HEAD(cpufreq_cdev_list);
 
 /* Below code defines functions to be used for cpufreq as cooling device */
 
 /**
  * get_level: Find the level for a particular frequency
- * @cpufreq_dev: cpufreq_dev for which the property is required
+ * @cpufreq_cdev: cpufreq_cdev for which the property is required
  * @freq: Frequency
  *
  * Return: level on success, THERMAL_CSTATE_INVALID on error.
  */
-static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
+static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
 			       unsigned int freq)
 {
 	unsigned long level;
 
-	for (level = 0; level <= cpufreq_dev->max_level; level++) {
-		if (freq == cpufreq_dev->freq_table[level])
+	for (level = 0; level <= cpufreq_cdev->max_level; level++) {
+		if (freq == cpufreq_cdev->freq_table[level])
 			return level;
 
-		if (freq > cpufreq_dev->freq_table[level])
+		if (freq > cpufreq_cdev->freq_table[level])
 			break;
 	}
 
@@ -148,12 +148,12 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
  */
 unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
 {
-	struct cpufreq_cooling_device *cpufreq_dev;
+	struct cpufreq_cooling_device *cpufreq_cdev;
 
 	mutex_lock(&cooling_list_lock);
-	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
-		if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
-			unsigned long level = get_level(cpufreq_dev, freq);
+	list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) {
+		if (cpumask_test_cpu(cpu, &cpufreq_cdev->allowed_cpus)) {
+			unsigned long level = get_level(cpufreq_cdev, freq);
 
 			mutex_unlock(&cooling_list_lock);
 			return level;
@@ -183,14 +183,14 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 {
 	struct cpufreq_policy *policy = data;
 	unsigned long clipped_freq;
-	struct cpufreq_cooling_device *cpufreq_dev;
+	struct cpufreq_cooling_device *cpufreq_cdev;
 
 	if (event != CPUFREQ_ADJUST)
 		return NOTIFY_DONE;
 
 	mutex_lock(&cooling_list_lock);
-	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
-		if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus))
+	list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) {
+		if (!cpumask_test_cpu(policy->cpu, &cpufreq_cdev->allowed_cpus))
 			continue;
 
 		/*
@@ -204,7 +204,7 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 		 * But, if clipped_freq is greater than policy->max, we don't
 		 * need to do anything.
 		 */
-		clipped_freq = cpufreq_dev->clipped_freq;
+		clipped_freq = cpufreq_cdev->clipped_freq;
 
 		if (policy->max > clipped_freq)
 			cpufreq_verify_within_limits(policy, 0, clipped_freq);
@@ -217,11 +217,11 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 
 /**
  * build_dyn_power_table() - create a dynamic power to frequency table
- * @cpufreq_device:	the cpufreq cooling device in which to store the table
+ * @cpufreq_cdev:	the cpufreq cooling device in which to store the table
  * @capacitance: dynamic power coefficient for these cpus
  *
  * Build a dynamic power to frequency table for this cpu and store it
- * in @cpufreq_device.  This table will be used in cpu_power_to_freq() and
+ * in @cpufreq_cdev.  This table will be used in cpu_power_to_freq() and
  * cpu_freq_to_power() to convert between power and frequency
  * efficiently.  Power is stored in mW, frequency in KHz.  The
  * resulting table is in ascending order.
@@ -230,7 +230,7 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
  * -ENOMEM if we run out of memory or -EAGAIN if an OPP was
  * added/enabled while the function was executing.
  */
-static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
+static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev,
 				 u32 capacitance)
 {
 	struct power_table *power_table;
@@ -239,10 +239,10 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
 	int num_opps = 0, cpu, i, ret = 0;
 	unsigned long freq;
 
-	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+	for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) {
 		dev = get_cpu_device(cpu);
 		if (!dev) {
-			dev_warn(&cpufreq_device->cool_dev->device,
+			dev_warn(&cpufreq_cdev->cool_dev->device,
 				 "No cpu device for cpu %d\n", cpu);
 			continue;
 		}
@@ -295,9 +295,9 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
 		goto free_power_table;
 	}
 
-	cpufreq_device->cpu_dev = dev;
-	cpufreq_device->dyn_power_table = power_table;
-	cpufreq_device->dyn_power_table_entries = i;
+	cpufreq_cdev->cpu_dev = dev;
+	cpufreq_cdev->dyn_power_table = power_table;
+	cpufreq_cdev->dyn_power_table_entries = i;
 
 	return 0;
 
@@ -307,26 +307,26 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
 	return ret;
 }
 
-static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
+static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
 			     u32 freq)
 {
 	int i;
-	struct power_table *pt = cpufreq_device->dyn_power_table;
+	struct power_table *pt = cpufreq_cdev->dyn_power_table;
 
-	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+	for (i = 1; i < cpufreq_cdev->dyn_power_table_entries; i++)
 		if (freq < pt[i].frequency)
 			break;
 
 	return pt[i - 1].power;
 }
 
-static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
+static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
 			     u32 power)
 {
 	int i;
-	struct power_table *pt = cpufreq_device->dyn_power_table;
+	struct power_table *pt = cpufreq_cdev->dyn_power_table;
 
-	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+	for (i = 1; i < cpufreq_cdev->dyn_power_table_entries; i++)
 		if (power < pt[i].power)
 			break;
 
@@ -335,37 +335,37 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
 
 /**
  * get_load() - get load for a cpu since last updated
- * @cpufreq_device:	&struct cpufreq_cooling_device for this cpu
+ * @cpufreq_cdev:	&struct cpufreq_cooling_device for this cpu
  * @cpu:	cpu number
- * @cpu_idx:	index of the cpu in cpufreq_device->allowed_cpus
+ * @cpu_idx:	index of the cpu in cpufreq_cdev->allowed_cpus
  *
  * Return: The average load of cpu @cpu in percentage since this
  * function was last called.
  */
-static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
 		    int cpu_idx)
 {
 	u32 load;
 	u64 now, now_idle, delta_time, delta_idle;
 
 	now_idle = get_cpu_idle_time(cpu, &now, 0);
-	delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx];
-	delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx];
+	delta_idle = now_idle - cpufreq_cdev->time_in_idle[cpu_idx];
+	delta_time = now - cpufreq_cdev->time_in_idle_timestamp[cpu_idx];
 
 	if (delta_time <= delta_idle)
 		load = 0;
 	else
 		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
 
-	cpufreq_device->time_in_idle[cpu_idx] = now_idle;
-	cpufreq_device->time_in_idle_timestamp[cpu_idx] = now;
+	cpufreq_cdev->time_in_idle[cpu_idx] = now_idle;
+	cpufreq_cdev->time_in_idle_timestamp[cpu_idx] = now;
 
 	return load;
 }
 
 /**
  * get_static_power() - calculate the static power consumed by the cpus
- * @cpufreq_device:	struct &cpufreq_cooling_device for this cpu cdev
+ * @cpufreq_cdev:	struct &cpufreq_cooling_device for this cpu cdev
  * @tz:		thermal zone device in which we're operating
  * @freq:	frequency in KHz
  * @power:	pointer in which to store the calculated static power
@@ -378,25 +378,24 @@ static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
  *
  * Return: 0 on success, -E* on failure.
  */
-static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
+static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev,
 			    struct thermal_zone_device *tz, unsigned long freq,
 			    u32 *power)
 {
 	struct dev_pm_opp *opp;
 	unsigned long voltage;
-	struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
+	struct cpumask *cpumask = &cpufreq_cdev->allowed_cpus;
 	unsigned long freq_hz = freq * 1000;
 
-	if (!cpufreq_device->plat_get_static_power ||
-	    !cpufreq_device->cpu_dev) {
+	if (!cpufreq_cdev->plat_get_static_power || !cpufreq_cdev->cpu_dev) {
 		*power = 0;
 		return 0;
 	}
 
-	opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
+	opp = dev_pm_opp_find_freq_exact(cpufreq_cdev->cpu_dev, freq_hz,
 					 true);
 	if (IS_ERR(opp)) {
-		dev_warn_ratelimited(cpufreq_device->cpu_dev,
+		dev_warn_ratelimited(cpufreq_cdev->cpu_dev,
 				     "Failed to find OPP for frequency %lu: %ld\n",
 				     freq_hz, PTR_ERR(opp));
 		return -EINVAL;
@@ -406,31 +405,31 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
 	dev_pm_opp_put(opp);
 
 	if (voltage == 0) {
-		dev_err_ratelimited(cpufreq_device->cpu_dev,
+		dev_err_ratelimited(cpufreq_cdev->cpu_dev,
 				    "Failed to get voltage for frequency %lu\n",
 				    freq_hz);
 		return -EINVAL;
 	}
 
-	return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
-						     voltage, power);
+	return cpufreq_cdev->plat_get_static_power(cpumask, tz->passive_delay,
+						  voltage, power);
 }
 
 /**
  * get_dynamic_power() - calculate the dynamic power
- * @cpufreq_device:	&cpufreq_cooling_device for this cdev
+ * @cpufreq_cdev:	&cpufreq_cooling_device for this cdev
  * @freq:	current frequency
  *
  * Return: the dynamic power consumed by the cpus described by
- * @cpufreq_device.
+ * @cpufreq_cdev.
  */
-static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
+static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
 			     unsigned long freq)
 {
 	u32 raw_cpu_power;
 
-	raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
-	return (raw_cpu_power * cpufreq_device->last_load) / 100;
+	raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
+	return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
 }
 
 /* cpufreq cooling device callback functions are defined below */
@@ -448,9 +447,9 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
 static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
 				 unsigned long *state)
 {
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
-	*state = cpufreq_device->max_level;
+	*state = cpufreq_cdev->max_level;
 	return 0;
 }
 
@@ -467,9 +466,9 @@ static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
 static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
 				 unsigned long *state)
 {
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
-	*state = cpufreq_device->cpufreq_state;
+	*state = cpufreq_cdev->cpufreq_state;
 
 	return 0;
 }
@@ -487,21 +486,21 @@ static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
 static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 				 unsigned long state)
 {
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
-	unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
+	unsigned int cpu = cpumask_any(&cpufreq_cdev->allowed_cpus);
 	unsigned int clip_freq;
 
 	/* Request state should be less than max_level */
-	if (WARN_ON(state > cpufreq_device->max_level))
+	if (WARN_ON(state > cpufreq_cdev->max_level))
 		return -EINVAL;
 
 	/* Check if the old cooling action is same as new cooling action */
-	if (cpufreq_device->cpufreq_state == state)
+	if (cpufreq_cdev->cpufreq_state == state)
 		return 0;
 
-	clip_freq = cpufreq_device->freq_table[state];
-	cpufreq_device->cpufreq_state = state;
-	cpufreq_device->clipped_freq = clip_freq;
+	clip_freq = cpufreq_cdev->freq_table[state];
+	cpufreq_cdev->cpufreq_state = state;
+	cpufreq_cdev->clipped_freq = clip_freq;
 
 	cpufreq_update_policy(cpu);
 
@@ -538,10 +537,10 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 	unsigned long freq;
 	int i = 0, cpu, ret;
 	u32 static_power, dynamic_power, total_load = 0;
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 	u32 *load_cpu = NULL;
 
-	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+	cpu = cpumask_any_and(&cpufreq_cdev->allowed_cpus, cpu_online_mask);
 
 	/*
 	 * All the CPUs are offline, thus the requested power by
@@ -555,16 +554,16 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 	freq = cpufreq_quick_get(cpu);
 
 	if (trace_thermal_power_cpu_get_power_enabled()) {
-		u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
+		u32 ncpus = cpumask_weight(&cpufreq_cdev->allowed_cpus);
 
 		load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
 	}
 
-	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+	for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) {
 		u32 load;
 
 		if (cpu_online(cpu))
-			load = get_load(cpufreq_device, cpu, i);
+			load = get_load(cpufreq_cdev, cpu, i);
 		else
 			load = 0;
 
@@ -575,10 +574,10 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 		i++;
 	}
 
-	cpufreq_device->last_load = total_load;
+	cpufreq_cdev->last_load = total_load;
 
-	dynamic_power = get_dynamic_power(cpufreq_device, freq);
-	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+	dynamic_power = get_dynamic_power(cpufreq_cdev, freq);
+	ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
 	if (ret) {
 		kfree(load_cpu);
 		return ret;
@@ -586,7 +585,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 
 	if (load_cpu) {
 		trace_thermal_power_cpu_get_power(
-			&cpufreq_device->allowed_cpus,
+			&cpufreq_cdev->allowed_cpus,
 			freq, load_cpu, i, dynamic_power, static_power);
 
 		kfree(load_cpu);
@@ -619,12 +618,12 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 	cpumask_var_t cpumask;
 	u32 static_power, dynamic_power;
 	int ret;
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
 		return -ENOMEM;
 
-	cpumask_and(cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
+	cpumask_and(cpumask, &cpufreq_cdev->allowed_cpus, cpu_online_mask);
 	num_cpus = cpumask_weight(cpumask);
 
 	/* None of our cpus are online, so no power */
@@ -634,14 +633,14 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 		goto out;
 	}
 
-	freq = cpufreq_device->freq_table[state];
+	freq = cpufreq_cdev->freq_table[state];
 	if (!freq) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
-	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+	dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
+	ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
 	if (ret)
 		goto out;
 
@@ -679,24 +678,24 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 	int ret;
 	s32 dyn_power;
 	u32 last_load, normalised_power, static_power;
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
-	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+	cpu = cpumask_any_and(&cpufreq_cdev->allowed_cpus, cpu_online_mask);
 
 	/* None of our cpus are online */
 	if (cpu >= nr_cpu_ids)
 		return -ENODEV;
 
 	cur_freq = cpufreq_quick_get(cpu);
-	ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
+	ret = get_static_power(cpufreq_cdev, tz, cur_freq, &static_power);
 	if (ret)
 		return ret;
 
 	dyn_power = power - static_power;
 	dyn_power = dyn_power > 0 ? dyn_power : 0;
-	last_load = cpufreq_device->last_load ?: 1;
+	last_load = cpufreq_cdev->last_load ?: 1;
 	normalised_power = (dyn_power * 100) / last_load;
-	target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
+	target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
 
 	*state = cpufreq_cooling_get_level(cpu, target_freq);
 	if (*state == THERMAL_CSTATE_INVALID) {
@@ -706,7 +705,7 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 		return -EINVAL;
 	}
 
-	trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
+	trace_thermal_power_cpu_limit(&cpufreq_cdev->allowed_cpus,
 				      target_freq, *state, power);
 	return 0;
 }
@@ -771,7 +770,7 @@ __cpufreq_cooling_register(struct device_node *np,
 {
 	struct cpufreq_policy *policy;
 	struct thermal_cooling_device *cool_dev;
-	struct cpufreq_cooling_device *cpufreq_dev;
+	struct cpufreq_cooling_device *cpufreq_cdev;
 	char dev_name[THERMAL_NAME_LENGTH];
 	struct cpufreq_frequency_table *pos, *table;
 	cpumask_var_t temp_mask;
@@ -798,49 +797,49 @@ __cpufreq_cooling_register(struct device_node *np,
 		goto put_policy;
 	}
 
-	cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
-	if (!cpufreq_dev) {
+	cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
+	if (!cpufreq_cdev) {
 		cool_dev = ERR_PTR(-ENOMEM);
 		goto put_policy;
 	}
 
 	num_cpus = cpumask_weight(clip_cpus);
-	cpufreq_dev->time_in_idle = kcalloc(num_cpus,
-					    sizeof(*cpufreq_dev->time_in_idle),
+	cpufreq_cdev->time_in_idle = kcalloc(num_cpus,
+					    sizeof(*cpufreq_cdev->time_in_idle),
 					    GFP_KERNEL);
-	if (!cpufreq_dev->time_in_idle) {
+	if (!cpufreq_cdev->time_in_idle) {
 		cool_dev = ERR_PTR(-ENOMEM);
 		goto free_cdev;
 	}
 
-	cpufreq_dev->time_in_idle_timestamp =
-		kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
+	cpufreq_cdev->time_in_idle_timestamp =
+		kcalloc(num_cpus, sizeof(*cpufreq_cdev->time_in_idle_timestamp),
 			GFP_KERNEL);
-	if (!cpufreq_dev->time_in_idle_timestamp) {
+	if (!cpufreq_cdev->time_in_idle_timestamp) {
 		cool_dev = ERR_PTR(-ENOMEM);
 		goto free_time_in_idle;
 	}
 
 	/* Find max levels */
 	cpufreq_for_each_valid_entry(pos, table)
-		cpufreq_dev->max_level++;
+		cpufreq_cdev->max_level++;
 
-	cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
-					  cpufreq_dev->max_level, GFP_KERNEL);
-	if (!cpufreq_dev->freq_table) {
+	cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) *
+					  cpufreq_cdev->max_level, GFP_KERNEL);
+	if (!cpufreq_cdev->freq_table) {
 		cool_dev = ERR_PTR(-ENOMEM);
 		goto free_time_in_idle_timestamp;
 	}
 
 	/* max_level is an index, not a counter */
-	cpufreq_dev->max_level--;
+	cpufreq_cdev->max_level--;
 
-	cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
+	cpumask_copy(&cpufreq_cdev->allowed_cpus, clip_cpus);
 
 	if (capacitance) {
-		cpufreq_dev->plat_get_static_power = plat_static_func;
+		cpufreq_cdev->plat_get_static_power = plat_static_func;
 
-		ret = build_dyn_power_table(cpufreq_dev, capacitance);
+		ret = build_dyn_power_table(cpufreq_cdev, capacitance);
 		if (ret) {
 			cool_dev = ERR_PTR(ret);
 			goto free_table;
@@ -856,12 +855,12 @@ __cpufreq_cooling_register(struct device_node *np,
 		cool_dev = ERR_PTR(ret);
 		goto free_power_table;
 	}
-	cpufreq_dev->id = ret;
+	cpufreq_cdev->id = ret;
 
 	/* Fill freq-table in descending order of frequencies */
-	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
+	for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
 		freq = find_next_max(table, freq);
-		cpufreq_dev->freq_table[i] = freq;
+		cpufreq_cdev->freq_table[i] = freq;
 
 		/* Warn for duplicate entries */
 		if (!freq)
@@ -871,20 +870,21 @@ __cpufreq_cooling_register(struct device_node *np,
 	}
 
 	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
-		 cpufreq_dev->id);
+		 cpufreq_cdev->id);
 
-	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
+	cool_dev = thermal_of_cooling_device_register(np, dev_name,
+						      cpufreq_cdev,
 						      cooling_ops);
 	if (IS_ERR(cool_dev))
 		goto remove_ida;
 
-	cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
-	cpufreq_dev->cool_dev = cool_dev;
+	cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0];
+	cpufreq_cdev->cool_dev = cool_dev;
 
 	mutex_lock(&cooling_list_lock);
 	/* Register the notifier for first cpufreq cooling device */
-	first = list_empty(&cpufreq_dev_list);
-	list_add(&cpufreq_dev->node, &cpufreq_dev_list);
+	first = list_empty(&cpufreq_cdev_list);
+	list_add(&cpufreq_cdev->node, &cpufreq_cdev_list);
 	mutex_unlock(&cooling_list_lock);
 
 	if (first)
@@ -894,17 +894,17 @@ __cpufreq_cooling_register(struct device_node *np,
 	goto put_policy;
 
 remove_ida:
-	ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
+	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
 free_power_table:
-	kfree(cpufreq_dev->dyn_power_table);
+	kfree(cpufreq_cdev->dyn_power_table);
 free_table:
-	kfree(cpufreq_dev->freq_table);
+	kfree(cpufreq_cdev->freq_table);
 free_time_in_idle_timestamp:
-	kfree(cpufreq_dev->time_in_idle_timestamp);
+	kfree(cpufreq_cdev->time_in_idle_timestamp);
 free_time_in_idle:
-	kfree(cpufreq_dev->time_in_idle);
+	kfree(cpufreq_cdev->time_in_idle);
 free_cdev:
-	kfree(cpufreq_dev);
+	kfree(cpufreq_cdev);
 put_policy:
 	cpufreq_cpu_put(policy);
 free_cpumask:
@@ -1029,30 +1029,30 @@ EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
  */
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
-	struct cpufreq_cooling_device *cpufreq_dev;
+	struct cpufreq_cooling_device *cpufreq_cdev;
 	bool last;
 
 	if (!cdev)
 		return;
 
-	cpufreq_dev = cdev->devdata;
+	cpufreq_cdev = cdev->devdata;
 
 	mutex_lock(&cooling_list_lock);
-	list_del(&cpufreq_dev->node);
+	list_del(&cpufreq_cdev->node);
 	/* Unregister the notifier for the last cpufreq cooling device */
-	last = list_empty(&cpufreq_dev_list);
+	last = list_empty(&cpufreq_cdev_list);
 	mutex_unlock(&cooling_list_lock);
 
 	if (last)
 		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
 					    CPUFREQ_POLICY_NOTIFIER);
 
-	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
-	ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
-	kfree(cpufreq_dev->dyn_power_table);
-	kfree(cpufreq_dev->time_in_idle_timestamp);
-	kfree(cpufreq_dev->time_in_idle);
-	kfree(cpufreq_dev->freq_table);
-	kfree(cpufreq_dev);
+	thermal_cooling_device_unregister(cpufreq_cdev->cool_dev);
+	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
+	kfree(cpufreq_cdev->dyn_power_table);
+	kfree(cpufreq_cdev->time_in_idle_timestamp);
+	kfree(cpufreq_cdev->time_in_idle);
+	kfree(cpufreq_cdev->freq_table);
+	kfree(cpufreq_cdev);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
-- 
GitLab


From 04bdbdf93cedc728ceff6af175fcce2bedfdd25f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:11 +0530
Subject: [PATCH 0103/1958] thermal: cpu_cooling: replace cool_dev with cdev

Objects of "struct thermal_cooling_device" are named a bit
inconsistently. Lets use cdev everywhere.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 37 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 80a46a80817b0..f1e784c22c5ad 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -65,7 +65,7 @@ struct power_table {
  * struct cpufreq_cooling_device - data for cooling device with cpufreq
  * @id: unique integer value corresponding to each cpufreq_cooling_device
  *	registered.
- * @cool_dev: thermal_cooling_device pointer to keep track of the
+ * @cdev: thermal_cooling_device pointer to keep track of the
  *	registered cooling device.
  * @cpufreq_state: integer value representing the current state of cpufreq
  *	cooling	devices.
@@ -90,7 +90,7 @@ struct power_table {
  */
 struct cpufreq_cooling_device {
 	int id;
-	struct thermal_cooling_device *cool_dev;
+	struct thermal_cooling_device *cdev;
 	unsigned int cpufreq_state;
 	unsigned int clipped_freq;
 	unsigned int max_level;
@@ -242,7 +242,7 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev,
 	for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) {
 		dev = get_cpu_device(cpu);
 		if (!dev) {
-			dev_warn(&cpufreq_cdev->cool_dev->device,
+			dev_warn(&cpufreq_cdev->cdev->device,
 				 "No cpu device for cpu %d\n", cpu);
 			continue;
 		}
@@ -769,7 +769,7 @@ __cpufreq_cooling_register(struct device_node *np,
 			get_static_t plat_static_func)
 {
 	struct cpufreq_policy *policy;
-	struct thermal_cooling_device *cool_dev;
+	struct thermal_cooling_device *cdev;
 	struct cpufreq_cooling_device *cpufreq_cdev;
 	char dev_name[THERMAL_NAME_LENGTH];
 	struct cpufreq_frequency_table *pos, *table;
@@ -786,20 +786,20 @@ __cpufreq_cooling_register(struct device_node *np,
 	policy = cpufreq_cpu_get(cpumask_first(temp_mask));
 	if (!policy) {
 		pr_debug("%s: CPUFreq policy not found\n", __func__);
-		cool_dev = ERR_PTR(-EPROBE_DEFER);
+		cdev = ERR_PTR(-EPROBE_DEFER);
 		goto free_cpumask;
 	}
 
 	table = policy->freq_table;
 	if (!table) {
 		pr_debug("%s: CPUFreq table not found\n", __func__);
-		cool_dev = ERR_PTR(-ENODEV);
+		cdev = ERR_PTR(-ENODEV);
 		goto put_policy;
 	}
 
 	cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
 	if (!cpufreq_cdev) {
-		cool_dev = ERR_PTR(-ENOMEM);
+		cdev = ERR_PTR(-ENOMEM);
 		goto put_policy;
 	}
 
@@ -808,7 +808,7 @@ __cpufreq_cooling_register(struct device_node *np,
 					    sizeof(*cpufreq_cdev->time_in_idle),
 					    GFP_KERNEL);
 	if (!cpufreq_cdev->time_in_idle) {
-		cool_dev = ERR_PTR(-ENOMEM);
+		cdev = ERR_PTR(-ENOMEM);
 		goto free_cdev;
 	}
 
@@ -816,7 +816,7 @@ __cpufreq_cooling_register(struct device_node *np,
 		kcalloc(num_cpus, sizeof(*cpufreq_cdev->time_in_idle_timestamp),
 			GFP_KERNEL);
 	if (!cpufreq_cdev->time_in_idle_timestamp) {
-		cool_dev = ERR_PTR(-ENOMEM);
+		cdev = ERR_PTR(-ENOMEM);
 		goto free_time_in_idle;
 	}
 
@@ -827,7 +827,7 @@ __cpufreq_cooling_register(struct device_node *np,
 	cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) *
 					  cpufreq_cdev->max_level, GFP_KERNEL);
 	if (!cpufreq_cdev->freq_table) {
-		cool_dev = ERR_PTR(-ENOMEM);
+		cdev = ERR_PTR(-ENOMEM);
 		goto free_time_in_idle_timestamp;
 	}
 
@@ -841,7 +841,7 @@ __cpufreq_cooling_register(struct device_node *np,
 
 		ret = build_dyn_power_table(cpufreq_cdev, capacitance);
 		if (ret) {
-			cool_dev = ERR_PTR(ret);
+			cdev = ERR_PTR(ret);
 			goto free_table;
 		}
 
@@ -852,7 +852,7 @@ __cpufreq_cooling_register(struct device_node *np,
 
 	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
 	if (ret < 0) {
-		cool_dev = ERR_PTR(ret);
+		cdev = ERR_PTR(ret);
 		goto free_power_table;
 	}
 	cpufreq_cdev->id = ret;
@@ -872,14 +872,13 @@ __cpufreq_cooling_register(struct device_node *np,
 	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
 		 cpufreq_cdev->id);
 
-	cool_dev = thermal_of_cooling_device_register(np, dev_name,
-						      cpufreq_cdev,
-						      cooling_ops);
-	if (IS_ERR(cool_dev))
+	cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
+						  cooling_ops);
+	if (IS_ERR(cdev))
 		goto remove_ida;
 
 	cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0];
-	cpufreq_cdev->cool_dev = cool_dev;
+	cpufreq_cdev->cdev = cdev;
 
 	mutex_lock(&cooling_list_lock);
 	/* Register the notifier for first cpufreq cooling device */
@@ -909,7 +908,7 @@ __cpufreq_cooling_register(struct device_node *np,
 	cpufreq_cpu_put(policy);
 free_cpumask:
 	free_cpumask_var(temp_mask);
-	return cool_dev;
+	return cdev;
 }
 
 /**
@@ -1047,7 +1046,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
 					    CPUFREQ_POLICY_NOTIFIER);
 
-	thermal_cooling_device_unregister(cpufreq_cdev->cool_dev);
+	thermal_cooling_device_unregister(cpufreq_cdev->cdev);
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
 	kfree(cpufreq_cdev->dyn_power_table);
 	kfree(cpufreq_cdev->time_in_idle_timestamp);
-- 
GitLab


From 3e08b2df12983159ea2d9a1aa2b2bc601093b3cb Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:12 +0530
Subject: [PATCH 0104/1958] thermal: cpu_cooling: remove
 cpufreq_cooling_get_level()

There is only one user of cpufreq_cooling_get_level() and that already
has pointer to the cpufreq_cdev structure. It can directly call
get_level() instead and we can get rid of cpufreq_cooling_get_level().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 33 +--------------------------------
 include/linux/cpu_cooling.h   |  6 ------
 2 files changed, 1 insertion(+), 38 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index f1e784c22c5ad..1f4b6a719d05a 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -135,37 +135,6 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
 	return THERMAL_CSTATE_INVALID;
 }
 
-/**
- * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
- * @cpu: cpu for which the level is required
- * @freq: the frequency of interest
- *
- * This function will match the cooling level corresponding to the
- * requested @freq and return it.
- *
- * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
- * otherwise.
- */
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
-{
-	struct cpufreq_cooling_device *cpufreq_cdev;
-
-	mutex_lock(&cooling_list_lock);
-	list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) {
-		if (cpumask_test_cpu(cpu, &cpufreq_cdev->allowed_cpus)) {
-			unsigned long level = get_level(cpufreq_cdev, freq);
-
-			mutex_unlock(&cooling_list_lock);
-			return level;
-		}
-	}
-	mutex_unlock(&cooling_list_lock);
-
-	pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
-	return THERMAL_CSTATE_INVALID;
-}
-EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
-
 /**
  * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  * @nb:	struct notifier_block * with callback info.
@@ -697,7 +666,7 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 	normalised_power = (dyn_power * 100) / last_load;
 	target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
 
-	*state = cpufreq_cooling_get_level(cpu, target_freq);
+	*state = get_level(cpufreq_cdev, target_freq);
 	if (*state == THERMAL_CSTATE_INVALID) {
 		dev_err_ratelimited(&cdev->device,
 				    "Failed to convert %dKHz for cpu %d into a cdev state\n",
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index c156f50827589..96c5e4c2f9c8e 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -82,7 +82,6 @@ of_cpufreq_power_cooling_register(struct device_node *np,
  */
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev);
 
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq);
 #else /* !CONFIG_CPU_THERMAL */
 static inline struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus)
@@ -117,11 +116,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
 	return;
 }
-static inline
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
-{
-	return THERMAL_CSTATE_INVALID;
-}
 #endif	/* CONFIG_CPU_THERMAL */
 
 #endif /* __CPU_COOLING_H__ */
-- 
GitLab


From 18f301c934db0f7bae4e7c12e941bce11d67aec5 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:13 +0530
Subject: [PATCH 0105/1958] thermal: cpu_cooling: get rid of a variable in
 cpufreq_set_cur_state()

'cpu' is used at only one place and there is no need to keep a separate
variable for it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 1f4b6a719d05a..002b48dc6beaa 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -456,7 +456,6 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 				 unsigned long state)
 {
 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
-	unsigned int cpu = cpumask_any(&cpufreq_cdev->allowed_cpus);
 	unsigned int clip_freq;
 
 	/* Request state should be less than max_level */
@@ -471,7 +470,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 	cpufreq_cdev->cpufreq_state = state;
 	cpufreq_cdev->clipped_freq = clip_freq;
 
-	cpufreq_update_policy(cpu);
+	cpufreq_update_policy(cpumask_any(&cpufreq_cdev->allowed_cpus));
 
 	return 0;
 }
-- 
GitLab


From 4d753aa7b6279e4b7d338947a434689962f430d1 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:14 +0530
Subject: [PATCH 0106/1958] thermal: cpu_cooling: use cpufreq_policy to
 register cooling device

The CPU cooling driver uses the cpufreq policy, to get clip_cpus, the
frequency table, etc. Most of the callers of CPU cooling driver's
registration routines have the cpufreq policy with them, but they only
pass the policy->related_cpus cpumask. The __cpufreq_cooling_register()
routine then gets the policy by itself and uses it.

It would be much better if the callers can pass the policy instead
directly. This also fixes a basic design flaw, where the policy can be
freed while the CPU cooling driver is still active.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/cpufreq/arm_big_little.c              |  2 +-
 drivers/cpufreq/cpufreq-dt.c                  |  2 +-
 drivers/cpufreq/dbx500-cpufreq.c              |  2 +-
 drivers/cpufreq/mt8173-cpufreq.c              |  4 +-
 drivers/cpufreq/qoriq-cpufreq.c               |  3 +-
 drivers/thermal/cpu_cooling.c                 | 61 +++++++------------
 drivers/thermal/imx_thermal.c                 | 22 +++++--
 .../ti-soc-thermal/ti-thermal-common.c        | 22 ++++---
 include/linux/cpu_cooling.h                   | 26 ++++----
 9 files changed, 74 insertions(+), 70 deletions(-)

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 418042201e6da..ea6d62547b10d 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -540,7 +540,7 @@ static void bL_cpufreq_ready(struct cpufreq_policy *policy)
 				     &power_coefficient);
 
 		cdev[cur_cluster] = of_cpufreq_power_cooling_register(np,
-				policy->related_cpus, power_coefficient, NULL);
+				policy, power_coefficient, NULL);
 		if (IS_ERR(cdev[cur_cluster])) {
 			dev_err(cpu_dev,
 				"running cpufreq without cooling device: %ld\n",
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index c943787d761ee..fef3c21606911 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -326,7 +326,7 @@ static void cpufreq_ready(struct cpufreq_policy *policy)
 				     &power_coefficient);
 
 		priv->cdev = of_cpufreq_power_cooling_register(np,
-				policy->related_cpus, power_coefficient, NULL);
+				policy, power_coefficient, NULL);
 		if (IS_ERR(priv->cdev)) {
 			dev_err(priv->cpu_dev,
 				"running cpufreq without cooling device: %ld\n",
diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c
index 3575b82210ba0..4ee0431579c17 100644
--- a/drivers/cpufreq/dbx500-cpufreq.c
+++ b/drivers/cpufreq/dbx500-cpufreq.c
@@ -43,7 +43,7 @@ static int dbx500_cpufreq_exit(struct cpufreq_policy *policy)
 
 static void dbx500_cpufreq_ready(struct cpufreq_policy *policy)
 {
-	cdev = cpufreq_cooling_register(policy->cpus);
+	cdev = cpufreq_cooling_register(policy);
 	if (IS_ERR(cdev))
 		pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev));
 	else
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index fd1886faf33ab..f9f00fb4bc3a8 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -320,9 +320,7 @@ static void mtk_cpufreq_ready(struct cpufreq_policy *policy)
 		of_property_read_u32(np, DYNAMIC_POWER, &capacitance);
 
 		info->cdev = of_cpufreq_power_cooling_register(np,
-						policy->related_cpus,
-						capacitance,
-						NULL);
+						policy, capacitance, NULL);
 
 		if (IS_ERR(info->cdev)) {
 			dev_err(info->cpu_dev,
diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index e2ea433a5f9c3..4ada55b8856e1 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -278,8 +278,7 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
 	struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
 
 	if (of_find_property(np, "#cooling-cells", NULL)) {
-		cpud->cdev = of_cpufreq_cooling_register(np,
-							 policy->related_cpus);
+		cpud->cdev = of_cpufreq_cooling_register(np, policy);
 
 		if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
 			pr_err("cpu%d is not running as cooling device: %ld\n",
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 002b48dc6beaa..58e58065b6500 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -717,7 +717,7 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
 /**
  * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  * @np: a valid struct device_node to the cooling device device tree node
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  * Normally this should be same as cpufreq policy->related_cpus.
  * @capacitance: dynamic power coefficient for these cpus
  * @plat_static_func: function to calculate the static power consumed by these
@@ -733,45 +733,34 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
  */
 static struct thermal_cooling_device *
 __cpufreq_cooling_register(struct device_node *np,
-			const struct cpumask *clip_cpus, u32 capacitance,
+			struct cpufreq_policy *policy, u32 capacitance,
 			get_static_t plat_static_func)
 {
-	struct cpufreq_policy *policy;
 	struct thermal_cooling_device *cdev;
 	struct cpufreq_cooling_device *cpufreq_cdev;
 	char dev_name[THERMAL_NAME_LENGTH];
 	struct cpufreq_frequency_table *pos, *table;
-	cpumask_var_t temp_mask;
 	unsigned int freq, i, num_cpus;
 	int ret;
 	struct thermal_cooling_device_ops *cooling_ops;
 	bool first;
 
-	if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL))
-		return ERR_PTR(-ENOMEM);
-
-	cpumask_and(temp_mask, clip_cpus, cpu_online_mask);
-	policy = cpufreq_cpu_get(cpumask_first(temp_mask));
-	if (!policy) {
-		pr_debug("%s: CPUFreq policy not found\n", __func__);
-		cdev = ERR_PTR(-EPROBE_DEFER);
-		goto free_cpumask;
+	if (IS_ERR_OR_NULL(policy)) {
+		pr_err("%s: cpufreq policy isn't valid: %p", __func__, policy);
+		return ERR_PTR(-EINVAL);
 	}
 
 	table = policy->freq_table;
 	if (!table) {
 		pr_debug("%s: CPUFreq table not found\n", __func__);
-		cdev = ERR_PTR(-ENODEV);
-		goto put_policy;
+		return ERR_PTR(-ENODEV);
 	}
 
 	cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
-	if (!cpufreq_cdev) {
-		cdev = ERR_PTR(-ENOMEM);
-		goto put_policy;
-	}
+	if (!cpufreq_cdev)
+		return ERR_PTR(-ENOMEM);
 
-	num_cpus = cpumask_weight(clip_cpus);
+	num_cpus = cpumask_weight(policy->related_cpus);
 	cpufreq_cdev->time_in_idle = kcalloc(num_cpus,
 					    sizeof(*cpufreq_cdev->time_in_idle),
 					    GFP_KERNEL);
@@ -802,7 +791,7 @@ __cpufreq_cooling_register(struct device_node *np,
 	/* max_level is an index, not a counter */
 	cpufreq_cdev->max_level--;
 
-	cpumask_copy(&cpufreq_cdev->allowed_cpus, clip_cpus);
+	cpumask_copy(&cpufreq_cdev->allowed_cpus, policy->related_cpus);
 
 	if (capacitance) {
 		cpufreq_cdev->plat_get_static_power = plat_static_func;
@@ -858,7 +847,7 @@ __cpufreq_cooling_register(struct device_node *np,
 		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
 					  CPUFREQ_POLICY_NOTIFIER);
 
-	goto put_policy;
+	return cdev;
 
 remove_ida:
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
@@ -872,16 +861,12 @@ __cpufreq_cooling_register(struct device_node *np,
 	kfree(cpufreq_cdev->time_in_idle);
 free_cdev:
 	kfree(cpufreq_cdev);
-put_policy:
-	cpufreq_cpu_put(policy);
-free_cpumask:
-	free_cpumask_var(temp_mask);
 	return cdev;
 }
 
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -891,16 +876,16 @@ __cpufreq_cooling_register(struct device_node *np,
  * on failure, it returns a corresponding ERR_PTR().
  */
 struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+cpufreq_cooling_register(struct cpufreq_policy *policy)
 {
-	return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
+	return __cpufreq_cooling_register(NULL, policy, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
 
 /**
  * of_cpufreq_cooling_register - function to create cpufreq cooling device.
  * @np: a valid struct device_node to the cooling device device tree node
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -912,18 +897,18 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  */
 struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	if (!np)
 		return ERR_PTR(-EINVAL);
 
-	return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
+	return __cpufreq_cooling_register(np, policy, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
 
 /**
  * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
- * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @policy:		cpufreq policy
  * @capacitance:	dynamic power coefficient for these cpus
  * @plat_static_func:	function to calculate the static power consumed by these
  *			cpus (optional)
@@ -943,10 +928,10 @@ EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  * on failure, it returns a corresponding ERR_PTR().
  */
 struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance,
 			       get_static_t plat_static_func)
 {
-	return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
+	return __cpufreq_cooling_register(NULL, policy, capacitance,
 				plat_static_func);
 }
 EXPORT_SYMBOL(cpufreq_power_cooling_register);
@@ -954,7 +939,7 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register);
 /**
  * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  * @np:	a valid struct device_node to the cooling device device tree node
- * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy
  * @capacitance:	dynamic power coefficient for these cpus
  * @plat_static_func:	function to calculate the static power consumed by these
  *			cpus (optional)
@@ -976,14 +961,14 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register);
  */
 struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
 	if (!np)
 		return ERR_PTR(-EINVAL);
 
-	return __cpufreq_cooling_register(np, clip_cpus, capacitance,
+	return __cpufreq_cooling_register(np, policy, capacitance,
 				plat_static_func);
 }
 EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index fb648a45754ee..f7ec39f46ee4d 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/cpufreq.h>
 #include <linux/cpu_cooling.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -88,6 +89,7 @@ static struct thermal_soc_data thermal_imx6sx_data = {
 };
 
 struct imx_thermal_data {
+	struct cpufreq_policy *policy;
 	struct thermal_zone_device *tz;
 	struct thermal_cooling_device *cdev;
 	enum thermal_device_mode mode;
@@ -525,13 +527,18 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF);
 	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
 
-	data->cdev = cpufreq_cooling_register(cpu_present_mask);
+	data->policy = cpufreq_cpu_get(0);
+	if (!data->policy) {
+		pr_debug("%s: CPUFreq policy not found\n", __func__);
+		return -EPROBE_DEFER;
+	}
+
+	data->cdev = cpufreq_cooling_register(data->policy);
 	if (IS_ERR(data->cdev)) {
 		ret = PTR_ERR(data->cdev);
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev,
-				"failed to register cpufreq cooling device: %d\n",
-				ret);
+		dev_err(&pdev->dev,
+			"failed to register cpufreq cooling device: %d\n", ret);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -542,6 +549,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev,
 				"failed to get thermal clk: %d\n", ret);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -556,6 +564,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -571,6 +580,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			"failed to register thermal zone device %d\n", ret);
 		clk_disable_unprepare(data->thermal_clk);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -599,6 +609,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 		clk_disable_unprepare(data->thermal_clk);
 		thermal_zone_device_unregister(data->tz);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -620,6 +631,7 @@ static int imx_thermal_remove(struct platform_device *pdev)
 
 	thermal_zone_device_unregister(data->tz);
 	cpufreq_cooling_unregister(data->cdev);
+	cpufreq_cpu_put(data->policy);
 
 	return 0;
 }
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 02790f69e26ce..c211a8e4a2105 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -28,6 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/workqueue.h>
 #include <linux/thermal.h>
+#include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
 #include <linux/of.h>
@@ -37,6 +38,7 @@
 
 /* common data structures */
 struct ti_thermal_data {
+	struct cpufreq_policy *policy;
 	struct thermal_zone_device *ti_thermal;
 	struct thermal_zone_device *pcb_tz;
 	struct thermal_cooling_device *cool_dev;
@@ -247,15 +249,19 @@ int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id)
 	if (!data)
 		return -EINVAL;
 
+	data->policy = cpufreq_cpu_get(0);
+	if (!data->policy) {
+		pr_debug("%s: CPUFreq policy not found\n", __func__);
+		return -EPROBE_DEFER;
+	}
+
 	/* Register cooling device */
-	data->cool_dev = cpufreq_cooling_register(cpu_present_mask);
+	data->cool_dev = cpufreq_cooling_register(data->policy);
 	if (IS_ERR(data->cool_dev)) {
 		int ret = PTR_ERR(data->cool_dev);
-
-		if (ret != -EPROBE_DEFER)
-			dev_err(bgp->dev,
-				"Failed to register cpu cooling device %d\n",
-				ret);
+		dev_err(bgp->dev, "Failed to register cpu cooling device %d\n",
+			ret);
+		cpufreq_cpu_put(data->policy);
 
 		return ret;
 	}
@@ -270,8 +276,10 @@ int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id)
 
 	data = ti_bandgap_get_sensor_data(bgp, id);
 
-	if (data)
+	if (data) {
 		cpufreq_cooling_unregister(data->cool_dev);
+		cpufreq_cpu_put(data->policy);
+	}
 
 	return 0;
 }
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index 96c5e4c2f9c8e..d4292ebc5c8b0 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -28,47 +28,49 @@
 #include <linux/thermal.h>
 #include <linux/cpumask.h>
 
+struct cpufreq_policy;
+
 typedef int (*get_static_t)(cpumask_t *cpumask, int interval,
 			    unsigned long voltage, u32 *power);
 
 #ifdef CONFIG_CPU_THERMAL
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy.
  */
 struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus);
+cpufreq_cooling_register(struct cpufreq_policy *policy);
 
 struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy,
 			       u32 capacitance, get_static_t plat_static_func);
 
 /**
  * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
  * @np: a valid struct device_node to the cooling device device tree node.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy.
  */
 #ifdef CONFIG_THERMAL_OF
 struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus);
+			    struct cpufreq_policy *policy);
 
 struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func);
 #else
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
@@ -84,12 +86,12 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev);
 
 #else /* !CONFIG_CPU_THERMAL */
 static inline struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+cpufreq_cooling_register(struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 static inline struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy,
 			       u32 capacitance, get_static_t plat_static_func)
 {
 	return NULL;
@@ -97,14 +99,14 @@ cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
 
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
-- 
GitLab


From 55d852931319d2e3ccde86cd426405231ce6c6ac Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:15 +0530
Subject: [PATCH 0107/1958] cpufreq: create cpufreq_table_count_valid_entries()

We need such a routine at two places already, lets create one.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/cpufreq/cpufreq_stats.c | 13 ++++---------
 drivers/thermal/cpu_cooling.c   | 22 +++++++++-------------
 include/linux/cpufreq.h         | 14 ++++++++++++++
 3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index f570ead624547..9c3d319dc1290 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -170,11 +170,10 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	unsigned int i = 0, count = 0, ret = -ENOMEM;
 	struct cpufreq_stats *stats;
 	unsigned int alloc_size;
-	struct cpufreq_frequency_table *pos, *table;
+	struct cpufreq_frequency_table *pos;
 
-	/* We need cpufreq table for creating stats table */
-	table = policy->freq_table;
-	if (unlikely(!table))
+	count = cpufreq_table_count_valid_entries(policy);
+	if (!count)
 		return;
 
 	/* stats already initialized */
@@ -185,10 +184,6 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	if (!stats)
 		return;
 
-	/* Find total allocation size */
-	cpufreq_for_each_valid_entry(pos, table)
-		count++;
-
 	alloc_size = count * sizeof(int) + count * sizeof(u64);
 
 	alloc_size += count * count * sizeof(int);
@@ -205,7 +200,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	stats->max_state = count;
 
 	/* Find valid-unique entries */
-	cpufreq_for_each_valid_entry(pos, table)
+	cpufreq_for_each_valid_entry(pos, policy->freq_table)
 		if (freq_table_get_index(stats, pos->frequency) == -1)
 			stats->freq_table[i++] = pos->frequency;
 
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 58e58065b6500..55ff45c1e917a 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -739,7 +739,6 @@ __cpufreq_cooling_register(struct device_node *np,
 	struct thermal_cooling_device *cdev;
 	struct cpufreq_cooling_device *cpufreq_cdev;
 	char dev_name[THERMAL_NAME_LENGTH];
-	struct cpufreq_frequency_table *pos, *table;
 	unsigned int freq, i, num_cpus;
 	int ret;
 	struct thermal_cooling_device_ops *cooling_ops;
@@ -750,9 +749,10 @@ __cpufreq_cooling_register(struct device_node *np,
 		return ERR_PTR(-EINVAL);
 	}
 
-	table = policy->freq_table;
-	if (!table) {
-		pr_debug("%s: CPUFreq table not found\n", __func__);
+	i = cpufreq_table_count_valid_entries(policy);
+	if (!i) {
+		pr_debug("%s: CPUFreq table not found or has no valid entries\n",
+			 __func__);
 		return ERR_PTR(-ENODEV);
 	}
 
@@ -777,20 +777,16 @@ __cpufreq_cooling_register(struct device_node *np,
 		goto free_time_in_idle;
 	}
 
-	/* Find max levels */
-	cpufreq_for_each_valid_entry(pos, table)
-		cpufreq_cdev->max_level++;
+	/* max_level is an index, not a counter */
+	cpufreq_cdev->max_level = i - 1;
 
-	cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) *
-					  cpufreq_cdev->max_level, GFP_KERNEL);
+	cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * i,
+					  GFP_KERNEL);
 	if (!cpufreq_cdev->freq_table) {
 		cdev = ERR_PTR(-ENOMEM);
 		goto free_time_in_idle_timestamp;
 	}
 
-	/* max_level is an index, not a counter */
-	cpufreq_cdev->max_level--;
-
 	cpumask_copy(&cpufreq_cdev->allowed_cpus, policy->related_cpus);
 
 	if (capacitance) {
@@ -816,7 +812,7 @@ __cpufreq_cooling_register(struct device_node *np,
 
 	/* Fill freq-table in descending order of frequencies */
 	for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
-		freq = find_next_max(table, freq);
+		freq = find_next_max(policy->freq_table, freq);
 		cpufreq_cdev->freq_table[i] = freq;
 
 		/* Warn for duplicate entries */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a5ce0bbeadb5d..eb9abfadaeac4 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -862,6 +862,20 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		return -EINVAL;
 	}
 }
+
+static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *pos;
+	int count = 0;
+
+	if (unlikely(!policy->freq_table))
+		return 0;
+
+	cpufreq_for_each_valid_entry(pos, policy->freq_table)
+		count++;
+
+	return count;
+}
 #else
 static inline int cpufreq_boost_trigger_state(int state)
 {
-- 
GitLab


From b12b6519496bb07b0845ed50eee1ca75c3b7fa81 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:16 +0530
Subject: [PATCH 0108/1958] thermal: cpu_cooling: store cpufreq policy

The cpufreq policy can be used by the cpu_cooling driver, lets store it
in the cpufreq_cooling_device structure.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 55ff45c1e917a..7dddc7443f5d0 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -67,6 +67,7 @@ struct power_table {
  *	registered.
  * @cdev: thermal_cooling_device pointer to keep track of the
  *	registered cooling device.
+ * @policy: cpufreq policy.
  * @cpufreq_state: integer value representing the current state of cpufreq
  *	cooling	devices.
  * @clipped_freq: integer value representing the absolute value of the clipped
@@ -91,6 +92,7 @@ struct power_table {
 struct cpufreq_cooling_device {
 	int id;
 	struct thermal_cooling_device *cdev;
+	struct cpufreq_policy *policy;
 	unsigned int cpufreq_state;
 	unsigned int clipped_freq;
 	unsigned int max_level;
@@ -760,6 +762,7 @@ __cpufreq_cooling_register(struct device_node *np,
 	if (!cpufreq_cdev)
 		return ERR_PTR(-ENOMEM);
 
+	cpufreq_cdev->policy = policy;
 	num_cpus = cpumask_weight(policy->related_cpus);
 	cpufreq_cdev->time_in_idle = kcalloc(num_cpus,
 					    sizeof(*cpufreq_cdev->time_in_idle),
-- 
GitLab


From 02bacb21c6575ce408645ddb22fef44940d10257 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:17 +0530
Subject: [PATCH 0109/1958] thermal: cpu_cooling: OPPs are registered for all
 CPUs

The OPPs are registered for all CPUs of a cpufreq policy now and we
don't need to run the loop in build_dyn_power_table(). Just check for
the policy->cpu and we should be fine.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 7dddc7443f5d0..ce387f62c93e8 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -83,7 +83,7 @@ struct power_table {
  * @dyn_power_table: array of struct power_table for frequency to power
  *	conversion, sorted in ascending order.
  * @dyn_power_table_entries: number of entries in the @dyn_power_table array
- * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
+ * @cpu_dev: the cpu_device of policy->cpu.
  * @plat_get_static_power: callback to calculate the static power
  *
  * This structure is required for keeping information of each registered
@@ -207,24 +207,20 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev,
 	struct power_table *power_table;
 	struct dev_pm_opp *opp;
 	struct device *dev = NULL;
-	int num_opps = 0, cpu, i, ret = 0;
+	int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i, ret = 0;
 	unsigned long freq;
 
-	for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) {
-		dev = get_cpu_device(cpu);
-		if (!dev) {
-			dev_warn(&cpufreq_cdev->cdev->device,
-				 "No cpu device for cpu %d\n", cpu);
-			continue;
-		}
-
-		num_opps = dev_pm_opp_get_opp_count(dev);
-		if (num_opps > 0)
-			break;
-		else if (num_opps < 0)
-			return num_opps;
+	dev = get_cpu_device(cpu);
+	if (unlikely(!dev)) {
+		dev_warn(&cpufreq_cdev->cdev->device,
+			 "No cpu device for cpu %d\n", cpu);
+		return -ENODEV;
 	}
 
+	num_opps = dev_pm_opp_get_opp_count(dev);
+	if (num_opps < 0)
+		return num_opps;
+
 	if (num_opps == 0)
 		return -EINVAL;
 
-- 
GitLab


From ba76dd9ddd4046ece873444c2256e09afbd5d32e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:18 +0530
Subject: [PATCH 0110/1958] thermal: cpu_cooling: get rid of 'allowed_cpus'

'allowed_cpus' is a copy of policy->related_cpus and can be replaced by
it directly. At some places we are only concerned about online CPUs and
policy->cpus can be used there.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 81 +++++++++++------------------------
 1 file changed, 25 insertions(+), 56 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index ce387f62c93e8..94e7121817bf6 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -74,7 +74,6 @@ struct power_table {
  *	frequency.
  * @max_level: maximum cooling level. One less than total number of valid
  *	cpufreq frequencies.
- * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
  * @node: list_head to link all cpufreq_cooling_device together.
  * @last_load: load measured by the latest call to cpufreq_get_requested_power()
  * @time_in_idle: previous reading of the absolute time that this cpu was idle
@@ -97,7 +96,6 @@ struct cpufreq_cooling_device {
 	unsigned int clipped_freq;
 	unsigned int max_level;
 	unsigned int *freq_table;	/* In descending order */
-	struct cpumask allowed_cpus;
 	struct list_head node;
 	u32 last_load;
 	u64 *time_in_idle;
@@ -161,7 +159,11 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 
 	mutex_lock(&cooling_list_lock);
 	list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) {
-		if (!cpumask_test_cpu(policy->cpu, &cpufreq_cdev->allowed_cpus))
+		/*
+		 * A new copy of the policy is sent to the notifier and can't
+		 * compare that directly.
+		 */
+		if (policy->cpu != cpufreq_cdev->policy->cpu)
 			continue;
 
 		/*
@@ -304,7 +306,7 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
  * get_load() - get load for a cpu since last updated
  * @cpufreq_cdev:	&struct cpufreq_cooling_device for this cpu
  * @cpu:	cpu number
- * @cpu_idx:	index of the cpu in cpufreq_cdev->allowed_cpus
+ * @cpu_idx:	index of the cpu in time_in_idle*
  *
  * Return: The average load of cpu @cpu in percentage since this
  * function was last called.
@@ -351,7 +353,7 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev,
 {
 	struct dev_pm_opp *opp;
 	unsigned long voltage;
-	struct cpumask *cpumask = &cpufreq_cdev->allowed_cpus;
+	struct cpumask *cpumask = cpufreq_cdev->policy->related_cpus;
 	unsigned long freq_hz = freq * 1000;
 
 	if (!cpufreq_cdev->plat_get_static_power || !cpufreq_cdev->cpu_dev) {
@@ -468,7 +470,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 	cpufreq_cdev->cpufreq_state = state;
 	cpufreq_cdev->clipped_freq = clip_freq;
 
-	cpufreq_update_policy(cpumask_any(&cpufreq_cdev->allowed_cpus));
+	cpufreq_update_policy(cpufreq_cdev->policy->cpu);
 
 	return 0;
 }
@@ -504,28 +506,18 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 	int i = 0, cpu, ret;
 	u32 static_power, dynamic_power, total_load = 0;
 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
+	struct cpufreq_policy *policy = cpufreq_cdev->policy;
 	u32 *load_cpu = NULL;
 
-	cpu = cpumask_any_and(&cpufreq_cdev->allowed_cpus, cpu_online_mask);
-
-	/*
-	 * All the CPUs are offline, thus the requested power by
-	 * the cdev is 0
-	 */
-	if (cpu >= nr_cpu_ids) {
-		*power = 0;
-		return 0;
-	}
-
-	freq = cpufreq_quick_get(cpu);
+	freq = cpufreq_quick_get(policy->cpu);
 
 	if (trace_thermal_power_cpu_get_power_enabled()) {
-		u32 ncpus = cpumask_weight(&cpufreq_cdev->allowed_cpus);
+		u32 ncpus = cpumask_weight(policy->related_cpus);
 
 		load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
 	}
 
-	for_each_cpu(cpu, &cpufreq_cdev->allowed_cpus) {
+	for_each_cpu(cpu, policy->related_cpus) {
 		u32 load;
 
 		if (cpu_online(cpu))
@@ -550,9 +542,9 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 	}
 
 	if (load_cpu) {
-		trace_thermal_power_cpu_get_power(
-			&cpufreq_cdev->allowed_cpus,
-			freq, load_cpu, i, dynamic_power, static_power);
+		trace_thermal_power_cpu_get_power(policy->related_cpus, freq,
+						  load_cpu, i, dynamic_power,
+						  static_power);
 
 		kfree(load_cpu);
 	}
@@ -581,38 +573,22 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 			       unsigned long state, u32 *power)
 {
 	unsigned int freq, num_cpus;
-	cpumask_var_t cpumask;
 	u32 static_power, dynamic_power;
 	int ret;
 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
-	if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
-		return -ENOMEM;
-
-	cpumask_and(cpumask, &cpufreq_cdev->allowed_cpus, cpu_online_mask);
-	num_cpus = cpumask_weight(cpumask);
-
-	/* None of our cpus are online, so no power */
-	if (num_cpus == 0) {
-		*power = 0;
-		ret = 0;
-		goto out;
-	}
+	num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
 
 	freq = cpufreq_cdev->freq_table[state];
-	if (!freq) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!freq)
+		return -EINVAL;
 
 	dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
 	ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
 	if (ret)
-		goto out;
+		return ret;
 
 	*power = static_power + dynamic_power;
-out:
-	free_cpumask_var(cpumask);
 	return ret;
 }
 
@@ -640,19 +616,14 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 			       struct thermal_zone_device *tz, u32 power,
 			       unsigned long *state)
 {
-	unsigned int cpu, cur_freq, target_freq;
+	unsigned int cur_freq, target_freq;
 	int ret;
 	s32 dyn_power;
 	u32 last_load, normalised_power, static_power;
 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
+	struct cpufreq_policy *policy = cpufreq_cdev->policy;
 
-	cpu = cpumask_any_and(&cpufreq_cdev->allowed_cpus, cpu_online_mask);
-
-	/* None of our cpus are online */
-	if (cpu >= nr_cpu_ids)
-		return -ENODEV;
-
-	cur_freq = cpufreq_quick_get(cpu);
+	cur_freq = cpufreq_quick_get(policy->cpu);
 	ret = get_static_power(cpufreq_cdev, tz, cur_freq, &static_power);
 	if (ret)
 		return ret;
@@ -667,12 +638,12 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 	if (*state == THERMAL_CSTATE_INVALID) {
 		dev_err_ratelimited(&cdev->device,
 				    "Failed to convert %dKHz for cpu %d into a cdev state\n",
-				    target_freq, cpu);
+				    target_freq, policy->cpu);
 		return -EINVAL;
 	}
 
-	trace_thermal_power_cpu_limit(&cpufreq_cdev->allowed_cpus,
-				      target_freq, *state, power);
+	trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state,
+				      power);
 	return 0;
 }
 
@@ -786,8 +757,6 @@ __cpufreq_cooling_register(struct device_node *np,
 		goto free_time_in_idle_timestamp;
 	}
 
-	cpumask_copy(&cpufreq_cdev->allowed_cpus, policy->related_cpus);
-
 	if (capacitance) {
 		cpufreq_cdev->plat_get_static_power = plat_static_func;
 
-- 
GitLab


From 349d39dc57396e3e9f39170905ff8d626eea9e44 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:19 +0530
Subject: [PATCH 0111/1958] thermal: cpu_cooling: merge frequency and power
 tables

The cpu_cooling driver keeps two tables:

- freq_table: table of frequencies in descending order, built from
  policy->freq_table.

- power_table: table of frequencies and power in ascending order, built
  from OPP table.

If the OPPs are used for the CPU device then both these tables are
actually built using the OPP core and should have the same frequency
entries. And there is no need to keep separate tables for this.

Lets merge them both.

Note that the new table is in descending order of frequencies and so the
'for' loops were required to be fixed at few places to make it work.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 153 +++++++++++++++-------------------
 1 file changed, 67 insertions(+), 86 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 94e7121817bf6..67ec52d5f7fc5 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -49,14 +49,14 @@
  */
 
 /**
- * struct power_table - frequency to power conversion
+ * struct freq_table - frequency table along with power entries
  * @frequency:	frequency in KHz
  * @power:	power in mW
  *
  * This structure is built when the cooling device registers and helps
- * in translating frequency to power and viceversa.
+ * in translating frequency to power and vice versa.
  */
-struct power_table {
+struct freq_table {
 	u32 frequency;
 	u32 power;
 };
@@ -79,9 +79,6 @@ struct power_table {
  * @time_in_idle: previous reading of the absolute time that this cpu was idle
  * @time_in_idle_timestamp: wall time of the last invocation of
  *	get_cpu_idle_time_us()
- * @dyn_power_table: array of struct power_table for frequency to power
- *	conversion, sorted in ascending order.
- * @dyn_power_table_entries: number of entries in the @dyn_power_table array
  * @cpu_dev: the cpu_device of policy->cpu.
  * @plat_get_static_power: callback to calculate the static power
  *
@@ -95,13 +92,11 @@ struct cpufreq_cooling_device {
 	unsigned int cpufreq_state;
 	unsigned int clipped_freq;
 	unsigned int max_level;
-	unsigned int *freq_table;	/* In descending order */
+	struct freq_table *freq_table;	/* In descending order */
 	struct list_head node;
 	u32 last_load;
 	u64 *time_in_idle;
 	u64 *time_in_idle_timestamp;
-	struct power_table *dyn_power_table;
-	int dyn_power_table_entries;
 	struct device *cpu_dev;
 	get_static_t plat_get_static_power;
 };
@@ -125,10 +120,10 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
 	unsigned long level;
 
 	for (level = 0; level <= cpufreq_cdev->max_level; level++) {
-		if (freq == cpufreq_cdev->freq_table[level])
+		if (freq == cpufreq_cdev->freq_table[level].frequency)
 			return level;
 
-		if (freq > cpufreq_cdev->freq_table[level])
+		if (freq > cpufreq_cdev->freq_table[level].frequency)
 			break;
 	}
 
@@ -189,28 +184,25 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 }
 
 /**
- * build_dyn_power_table() - create a dynamic power to frequency table
- * @cpufreq_cdev:	the cpufreq cooling device in which to store the table
+ * update_freq_table() - Update the freq table with power numbers
+ * @cpufreq_cdev:	the cpufreq cooling device in which to update the table
  * @capacitance: dynamic power coefficient for these cpus
  *
- * Build a dynamic power to frequency table for this cpu and store it
- * in @cpufreq_cdev.  This table will be used in cpu_power_to_freq() and
- * cpu_freq_to_power() to convert between power and frequency
- * efficiently.  Power is stored in mW, frequency in KHz.  The
- * resulting table is in ascending order.
+ * Update the freq table with power numbers.  This table will be used in
+ * cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
+ * frequency efficiently.  Power is stored in mW, frequency in KHz.  The
+ * resulting table is in descending order.
  *
  * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
- * -ENOMEM if we run out of memory or -EAGAIN if an OPP was
- * added/enabled while the function was executing.
+ * or -ENOMEM if we run out of memory.
  */
-static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev,
-				 u32 capacitance)
+static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
+			     u32 capacitance)
 {
-	struct power_table *power_table;
+	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 	struct dev_pm_opp *opp;
 	struct device *dev = NULL;
-	int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i, ret = 0;
-	unsigned long freq;
+	int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;
 
 	dev = get_cpu_device(cpu);
 	if (unlikely(!dev)) {
@@ -223,25 +215,32 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev,
 	if (num_opps < 0)
 		return num_opps;
 
-	if (num_opps == 0)
+	/*
+	 * The cpufreq table is also built from the OPP table and so the count
+	 * should match.
+	 */
+	if (num_opps != cpufreq_cdev->max_level + 1) {
+		dev_warn(dev, "Number of OPPs not matching with max_levels\n");
 		return -EINVAL;
+	}
 
-	power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
-	if (!power_table)
-		return -ENOMEM;
-
-	for (freq = 0, i = 0;
-	     opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
-	     freq++, i++) {
-		u32 freq_mhz, voltage_mv;
+	for (i = 0; i <= cpufreq_cdev->max_level; i++) {
+		unsigned long freq = freq_table[i].frequency * 1000;
+		u32 freq_mhz = freq_table[i].frequency / 1000;
 		u64 power;
+		u32 voltage_mv;
 
-		if (i >= num_opps) {
-			ret = -EAGAIN;
-			goto free_power_table;
+		/*
+		 * Find ceil frequency as 'freq' may be slightly lower than OPP
+		 * freq due to truncation while converting to kHz.
+		 */
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		if (IS_ERR(opp)) {
+			dev_err(dev, "failed to get opp for %lu frequency\n",
+				freq);
+			return -EINVAL;
 		}
 
-		freq_mhz = freq / 1000000;
 		voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
 		dev_pm_opp_put(opp);
 
@@ -252,54 +251,39 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_cdev,
 		power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
 		do_div(power, 1000000000);
 
-		/* frequency is stored in power_table in KHz */
-		power_table[i].frequency = freq / 1000;
-
 		/* power is stored in mW */
-		power_table[i].power = power;
-	}
-
-	if (i != num_opps) {
-		ret = PTR_ERR(opp);
-		goto free_power_table;
+		freq_table[i].power = power;
 	}
 
 	cpufreq_cdev->cpu_dev = dev;
-	cpufreq_cdev->dyn_power_table = power_table;
-	cpufreq_cdev->dyn_power_table_entries = i;
 
 	return 0;
-
-free_power_table:
-	kfree(power_table);
-
-	return ret;
 }
 
 static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
 			     u32 freq)
 {
 	int i;
-	struct power_table *pt = cpufreq_cdev->dyn_power_table;
+	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 
-	for (i = 1; i < cpufreq_cdev->dyn_power_table_entries; i++)
-		if (freq < pt[i].frequency)
+	for (i = 1; i <= cpufreq_cdev->max_level; i++)
+		if (freq > freq_table[i].frequency)
 			break;
 
-	return pt[i - 1].power;
+	return freq_table[i - 1].power;
 }
 
 static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
 			     u32 power)
 {
 	int i;
-	struct power_table *pt = cpufreq_cdev->dyn_power_table;
+	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 
-	for (i = 1; i < cpufreq_cdev->dyn_power_table_entries; i++)
-		if (power < pt[i].power)
+	for (i = 1; i <= cpufreq_cdev->max_level; i++)
+		if (power > freq_table[i].power)
 			break;
 
-	return pt[i - 1].frequency;
+	return freq_table[i - 1].frequency;
 }
 
 /**
@@ -466,7 +450,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 	if (cpufreq_cdev->cpufreq_state == state)
 		return 0;
 
-	clip_freq = cpufreq_cdev->freq_table[state];
+	clip_freq = cpufreq_cdev->freq_table[state].frequency;
 	cpufreq_cdev->cpufreq_state = state;
 	cpufreq_cdev->clipped_freq = clip_freq;
 
@@ -579,7 +563,7 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 
 	num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
 
-	freq = cpufreq_cdev->freq_table[state];
+	freq = cpufreq_cdev->freq_table[state].frequency;
 	if (!freq)
 		return -EINVAL;
 
@@ -757,31 +741,20 @@ __cpufreq_cooling_register(struct device_node *np,
 		goto free_time_in_idle_timestamp;
 	}
 
-	if (capacitance) {
-		cpufreq_cdev->plat_get_static_power = plat_static_func;
-
-		ret = build_dyn_power_table(cpufreq_cdev, capacitance);
-		if (ret) {
-			cdev = ERR_PTR(ret);
-			goto free_table;
-		}
-
-		cooling_ops = &cpufreq_power_cooling_ops;
-	} else {
-		cooling_ops = &cpufreq_cooling_ops;
-	}
-
 	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
 	if (ret < 0) {
 		cdev = ERR_PTR(ret);
-		goto free_power_table;
+		goto free_table;
 	}
 	cpufreq_cdev->id = ret;
 
+	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
+		 cpufreq_cdev->id);
+
 	/* Fill freq-table in descending order of frequencies */
 	for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
 		freq = find_next_max(policy->freq_table, freq);
-		cpufreq_cdev->freq_table[i] = freq;
+		cpufreq_cdev->freq_table[i].frequency = freq;
 
 		/* Warn for duplicate entries */
 		if (!freq)
@@ -790,15 +763,26 @@ __cpufreq_cooling_register(struct device_node *np,
 			pr_debug("%s: freq:%u KHz\n", __func__, freq);
 	}
 
-	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
-		 cpufreq_cdev->id);
+	if (capacitance) {
+		cpufreq_cdev->plat_get_static_power = plat_static_func;
+
+		ret = update_freq_table(cpufreq_cdev, capacitance);
+		if (ret) {
+			cdev = ERR_PTR(ret);
+			goto remove_ida;
+		}
+
+		cooling_ops = &cpufreq_power_cooling_ops;
+	} else {
+		cooling_ops = &cpufreq_cooling_ops;
+	}
 
 	cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
 						  cooling_ops);
 	if (IS_ERR(cdev))
 		goto remove_ida;
 
-	cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0];
+	cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;
 	cpufreq_cdev->cdev = cdev;
 
 	mutex_lock(&cooling_list_lock);
@@ -815,8 +799,6 @@ __cpufreq_cooling_register(struct device_node *np,
 
 remove_ida:
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
-free_power_table:
-	kfree(cpufreq_cdev->dyn_power_table);
 free_table:
 	kfree(cpufreq_cdev->freq_table);
 free_time_in_idle_timestamp:
@@ -965,7 +947,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 
 	thermal_cooling_device_unregister(cpufreq_cdev->cdev);
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
-	kfree(cpufreq_cdev->dyn_power_table);
 	kfree(cpufreq_cdev->time_in_idle_timestamp);
 	kfree(cpufreq_cdev->time_in_idle);
 	kfree(cpufreq_cdev->freq_table);
-- 
GitLab


From 81ee14da1051ee87c243b9a0e55d83f1aa274b76 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:20 +0530
Subject: [PATCH 0112/1958] thermal: cpu_cooling: create structure for idle
 time stats

We keep two arrays for idle time stats and allocate memory for them
separately. It would be much easier to follow if we create an array of
idle stats structure instead and allocate it once.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 53 +++++++++++++++++------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 67ec52d5f7fc5..11735dba1456b 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -61,6 +61,16 @@ struct freq_table {
 	u32 power;
 };
 
+/**
+ * struct time_in_idle - Idle time stats
+ * @time: previous reading of the absolute time that this cpu was idle
+ * @timestamp: wall time of the last invocation of get_cpu_idle_time_us()
+ */
+struct time_in_idle {
+	u64 time;
+	u64 timestamp;
+};
+
 /**
  * struct cpufreq_cooling_device - data for cooling device with cpufreq
  * @id: unique integer value corresponding to each cpufreq_cooling_device
@@ -76,9 +86,7 @@ struct freq_table {
  *	cpufreq frequencies.
  * @node: list_head to link all cpufreq_cooling_device together.
  * @last_load: load measured by the latest call to cpufreq_get_requested_power()
- * @time_in_idle: previous reading of the absolute time that this cpu was idle
- * @time_in_idle_timestamp: wall time of the last invocation of
- *	get_cpu_idle_time_us()
+ * @idle_time: idle time stats
  * @cpu_dev: the cpu_device of policy->cpu.
  * @plat_get_static_power: callback to calculate the static power
  *
@@ -95,8 +103,7 @@ struct cpufreq_cooling_device {
 	struct freq_table *freq_table;	/* In descending order */
 	struct list_head node;
 	u32 last_load;
-	u64 *time_in_idle;
-	u64 *time_in_idle_timestamp;
+	struct time_in_idle *idle_time;
 	struct device *cpu_dev;
 	get_static_t plat_get_static_power;
 };
@@ -300,18 +307,19 @@ static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
 {
 	u32 load;
 	u64 now, now_idle, delta_time, delta_idle;
+	struct time_in_idle *idle_time = &cpufreq_cdev->idle_time[cpu_idx];
 
 	now_idle = get_cpu_idle_time(cpu, &now, 0);
-	delta_idle = now_idle - cpufreq_cdev->time_in_idle[cpu_idx];
-	delta_time = now - cpufreq_cdev->time_in_idle_timestamp[cpu_idx];
+	delta_idle = now_idle - idle_time->time;
+	delta_time = now - idle_time->timestamp;
 
 	if (delta_time <= delta_idle)
 		load = 0;
 	else
 		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
 
-	cpufreq_cdev->time_in_idle[cpu_idx] = now_idle;
-	cpufreq_cdev->time_in_idle_timestamp[cpu_idx] = now;
+	idle_time->time = now_idle;
+	idle_time->timestamp = now;
 
 	return load;
 }
@@ -715,22 +723,14 @@ __cpufreq_cooling_register(struct device_node *np,
 
 	cpufreq_cdev->policy = policy;
 	num_cpus = cpumask_weight(policy->related_cpus);
-	cpufreq_cdev->time_in_idle = kcalloc(num_cpus,
-					    sizeof(*cpufreq_cdev->time_in_idle),
-					    GFP_KERNEL);
-	if (!cpufreq_cdev->time_in_idle) {
+	cpufreq_cdev->idle_time = kcalloc(num_cpus,
+					 sizeof(*cpufreq_cdev->idle_time),
+					 GFP_KERNEL);
+	if (!cpufreq_cdev->idle_time) {
 		cdev = ERR_PTR(-ENOMEM);
 		goto free_cdev;
 	}
 
-	cpufreq_cdev->time_in_idle_timestamp =
-		kcalloc(num_cpus, sizeof(*cpufreq_cdev->time_in_idle_timestamp),
-			GFP_KERNEL);
-	if (!cpufreq_cdev->time_in_idle_timestamp) {
-		cdev = ERR_PTR(-ENOMEM);
-		goto free_time_in_idle;
-	}
-
 	/* max_level is an index, not a counter */
 	cpufreq_cdev->max_level = i - 1;
 
@@ -738,7 +738,7 @@ __cpufreq_cooling_register(struct device_node *np,
 					  GFP_KERNEL);
 	if (!cpufreq_cdev->freq_table) {
 		cdev = ERR_PTR(-ENOMEM);
-		goto free_time_in_idle_timestamp;
+		goto free_idle_time;
 	}
 
 	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
@@ -801,10 +801,8 @@ __cpufreq_cooling_register(struct device_node *np,
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
 free_table:
 	kfree(cpufreq_cdev->freq_table);
-free_time_in_idle_timestamp:
-	kfree(cpufreq_cdev->time_in_idle_timestamp);
-free_time_in_idle:
-	kfree(cpufreq_cdev->time_in_idle);
+free_idle_time:
+	kfree(cpufreq_cdev->idle_time);
 free_cdev:
 	kfree(cpufreq_cdev);
 	return cdev;
@@ -947,8 +945,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 
 	thermal_cooling_device_unregister(cpufreq_cdev->cdev);
 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
-	kfree(cpufreq_cdev->time_in_idle_timestamp);
-	kfree(cpufreq_cdev->time_in_idle);
+	kfree(cpufreq_cdev->idle_time);
 	kfree(cpufreq_cdev->freq_table);
 	kfree(cpufreq_cdev);
 }
-- 
GitLab


From da27f69d6a4eaf7bdaaad9d9e1b997a1c8f186aa Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:21 +0530
Subject: [PATCH 0113/1958] thermal: cpu_cooling: get_level() can't fail

The frequency passed to get_level() is returned by cpu_power_to_freq()
and it is guaranteed that get_level() can't fail.

Get rid of error code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 11735dba1456b..2c2d76ac04c38 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -119,22 +119,19 @@ static LIST_HEAD(cpufreq_cdev_list);
  * @cpufreq_cdev: cpufreq_cdev for which the property is required
  * @freq: Frequency
  *
- * Return: level on success, THERMAL_CSTATE_INVALID on error.
+ * Return: level corresponding to the frequency.
  */
 static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
 			       unsigned int freq)
 {
+	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 	unsigned long level;
 
-	for (level = 0; level <= cpufreq_cdev->max_level; level++) {
-		if (freq == cpufreq_cdev->freq_table[level].frequency)
-			return level;
-
-		if (freq > cpufreq_cdev->freq_table[level].frequency)
+	for (level = 1; level <= cpufreq_cdev->max_level; level++)
+		if (freq > freq_table[level].frequency)
 			break;
-	}
 
-	return THERMAL_CSTATE_INVALID;
+	return level - 1;
 }
 
 /**
@@ -627,13 +624,6 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 	target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
 
 	*state = get_level(cpufreq_cdev, target_freq);
-	if (*state == THERMAL_CSTATE_INVALID) {
-		dev_err_ratelimited(&cdev->device,
-				    "Failed to convert %dKHz for cpu %d into a cdev state\n",
-				    target_freq, policy->cpu);
-		return -EINVAL;
-	}
-
 	trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state,
 				      power);
 	return 0;
-- 
GitLab


From 53ca1ece60a719e23f686c647cb05d9e12a31974 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:22 +0530
Subject: [PATCH 0114/1958] thermal: cpu_cooling: don't store cpu_dev in
 cpufreq_cdev

'cpu_dev' is used by only one function, get_static_power(), and it
wouldn't be time consuming to get the cpu device structure within it.
This would help removing cpu_dev from struct cpufreq_cooling_device.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 2c2d76ac04c38..bfa9731e7cdb2 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -87,7 +87,6 @@ struct time_in_idle {
  * @node: list_head to link all cpufreq_cooling_device together.
  * @last_load: load measured by the latest call to cpufreq_get_requested_power()
  * @idle_time: idle time stats
- * @cpu_dev: the cpu_device of policy->cpu.
  * @plat_get_static_power: callback to calculate the static power
  *
  * This structure is required for keeping information of each registered
@@ -104,7 +103,6 @@ struct cpufreq_cooling_device {
 	struct list_head node;
 	u32 last_load;
 	struct time_in_idle *idle_time;
-	struct device *cpu_dev;
 	get_static_t plat_get_static_power;
 };
 
@@ -259,8 +257,6 @@ static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
 		freq_table[i].power = power;
 	}
 
-	cpufreq_cdev->cpu_dev = dev;
-
 	return 0;
 }
 
@@ -342,19 +338,22 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev,
 {
 	struct dev_pm_opp *opp;
 	unsigned long voltage;
-	struct cpumask *cpumask = cpufreq_cdev->policy->related_cpus;
+	struct cpufreq_policy *policy = cpufreq_cdev->policy;
+	struct cpumask *cpumask = policy->related_cpus;
 	unsigned long freq_hz = freq * 1000;
+	struct device *dev;
 
-	if (!cpufreq_cdev->plat_get_static_power || !cpufreq_cdev->cpu_dev) {
+	if (!cpufreq_cdev->plat_get_static_power) {
 		*power = 0;
 		return 0;
 	}
 
-	opp = dev_pm_opp_find_freq_exact(cpufreq_cdev->cpu_dev, freq_hz,
-					 true);
+	dev = get_cpu_device(policy->cpu);
+	WARN_ON(!dev);
+
+	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
 	if (IS_ERR(opp)) {
-		dev_warn_ratelimited(cpufreq_cdev->cpu_dev,
-				     "Failed to find OPP for frequency %lu: %ld\n",
+		dev_warn_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
 				     freq_hz, PTR_ERR(opp));
 		return -EINVAL;
 	}
@@ -363,8 +362,7 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev,
 	dev_pm_opp_put(opp);
 
 	if (voltage == 0) {
-		dev_err_ratelimited(cpufreq_cdev->cpu_dev,
-				    "Failed to get voltage for frequency %lu\n",
+		dev_err_ratelimited(dev, "Failed to get voltage for frequency %lu\n",
 				    freq_hz);
 		return -EINVAL;
 	}
-- 
GitLab


From cb1b631864ad7e34324ceccb73d0226dd00ad90d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:23 +0530
Subject: [PATCH 0115/1958] thermal: cpu_cooling: 'freq' can't be zero in
 cpufreq_state2power()

The frequency table shouldn't have any zero frequency entries and so
such a check isn't required. Though it would be better to make sure
'state' is within limits.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index bfa9731e7cdb2..9cbf7bdbeb7a3 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -564,12 +564,13 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 	int ret;
 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
+	/* Request state should be less than max_level */
+	if (WARN_ON(state > cpufreq_cdev->max_level))
+		return -EINVAL;
+
 	num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
 
 	freq = cpufreq_cdev->freq_table[state].frequency;
-	if (!freq)
-		return -EINVAL;
-
 	dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
 	ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
 	if (ret)
-- 
GitLab


From d72b4015839cca333a8e0108b1739a50f03d2acc Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 25 Apr 2017 15:57:24 +0530
Subject: [PATCH 0116/1958] thermal: cpu_cooling: Rearrange struct
 cpufreq_cooling_device

This shrinks the size of the structure on arm64 by 8 bytes by avoiding
padding of 4 bytes at two places.

Also add missing doc comment for freq_table

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 9cbf7bdbeb7a3..1305020790b28 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -75,17 +75,18 @@ struct time_in_idle {
  * struct cpufreq_cooling_device - data for cooling device with cpufreq
  * @id: unique integer value corresponding to each cpufreq_cooling_device
  *	registered.
- * @cdev: thermal_cooling_device pointer to keep track of the
- *	registered cooling device.
- * @policy: cpufreq policy.
+ * @last_load: load measured by the latest call to cpufreq_get_requested_power()
  * @cpufreq_state: integer value representing the current state of cpufreq
  *	cooling	devices.
  * @clipped_freq: integer value representing the absolute value of the clipped
  *	frequency.
  * @max_level: maximum cooling level. One less than total number of valid
  *	cpufreq frequencies.
+ * @freq_table: Freq table in descending order of frequencies
+ * @cdev: thermal_cooling_device pointer to keep track of the
+ *	registered cooling device.
+ * @policy: cpufreq policy.
  * @node: list_head to link all cpufreq_cooling_device together.
- * @last_load: load measured by the latest call to cpufreq_get_requested_power()
  * @idle_time: idle time stats
  * @plat_get_static_power: callback to calculate the static power
  *
@@ -94,14 +95,14 @@ struct time_in_idle {
  */
 struct cpufreq_cooling_device {
 	int id;
-	struct thermal_cooling_device *cdev;
-	struct cpufreq_policy *policy;
+	u32 last_load;
 	unsigned int cpufreq_state;
 	unsigned int clipped_freq;
 	unsigned int max_level;
 	struct freq_table *freq_table;	/* In descending order */
+	struct thermal_cooling_device *cdev;
+	struct cpufreq_policy *policy;
 	struct list_head node;
-	u32 last_load;
 	struct time_in_idle *idle_time;
 	get_static_t plat_get_static_power;
 };
-- 
GitLab


From f19b1a1735f7453c35031ed5ca3883f20176dde6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 23 May 2017 12:33:06 +0530
Subject: [PATCH 0117/1958] thermal: cpu_cooling: Replace kmalloc with
 kmalloc_array

Checkpatch reports following:

WARNING: Prefer kmalloc_array over kmalloc with multiply
+	cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * i,

Fix that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/cpu_cooling.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 1305020790b28..908a8014cf767 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -724,8 +724,9 @@ __cpufreq_cooling_register(struct device_node *np,
 	/* max_level is an index, not a counter */
 	cpufreq_cdev->max_level = i - 1;
 
-	cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * i,
-					  GFP_KERNEL);
+	cpufreq_cdev->freq_table = kmalloc_array(i,
+					sizeof(*cpufreq_cdev->freq_table),
+					GFP_KERNEL);
 	if (!cpufreq_cdev->freq_table) {
 		cdev = ERR_PTR(-ENOMEM);
 		goto free_idle_time;
-- 
GitLab


From 4a78cc644eed3cf2dae00c3a959910a86c140fd6 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Fri, 26 May 2017 17:10:15 +0200
Subject: [PATCH 0118/1958] mtd: nand: Make sure drivers not supporting
 SET/GET_FEATURES return -ENOTSUPP

A lot of drivers are providing their own ->cmdfunc(), and most of the
time this implementation does not support all possible NAND operations.
But since ->cmdfunc() cannot return an error code, the core has no way
to know that the operation it requested is not supported.

This is a problem we cannot address for all kind of operations with the
current design, but we can prevent these silent failures for the
GET/SET FEATURES operation by overloading the default
->onfi_{set,get}_features() methods with one returning -ENOTSUPP.

Reported-by: Chris Packham <Chris.Packham@alliedtelesis.co.nz>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Chris Packham <Chris.Packham@alliedtelesis.co.nz>
---
 drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c  |  2 ++
 drivers/mtd/nand/cafe_nand.c                  |  2 ++
 drivers/mtd/nand/denali.c                     |  2 ++
 drivers/mtd/nand/docg4.c                      |  2 ++
 drivers/mtd/nand/fsl_elbc_nand.c              |  2 ++
 drivers/mtd/nand/fsl_ifc_nand.c               |  2 ++
 drivers/mtd/nand/hisi504_nand.c               |  2 ++
 drivers/mtd/nand/mpc5121_nfc.c                |  2 ++
 drivers/mtd/nand/nand_base.c                  | 19 +++++++++++++++++++
 drivers/mtd/nand/pxa3xx_nand.c                |  2 ++
 drivers/mtd/nand/qcom_nandc.c                 |  2 ++
 drivers/mtd/nand/sh_flctl.c                   |  2 ++
 drivers/mtd/nand/vf610_nfc.c                  |  2 ++
 drivers/staging/mt29f_spinand/mt29f_spinand.c |  2 ++
 include/linux/mtd/nand.h                      |  5 +++++
 15 files changed, 50 insertions(+)

diff --git a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
index f1da4ea88f2c0..54bac5b73f0ab 100644
--- a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
@@ -392,6 +392,8 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 	b47n->nand_chip.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
 	b47n->nand_chip.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
 	b47n->nand_chip.write_buf = bcm47xxnflash_ops_bcm4706_write_buf;
+	b47n->nand_chip.onfi_set_features = nand_onfi_get_set_features_notsupp;
+	b47n->nand_chip.onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	nand_chip->chip_delay = 50;
 	b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH;
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index d40c32d311d8f..2fd733eba0a30 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -654,6 +654,8 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	cafe->nand.read_buf = cafe_read_buf;
 	cafe->nand.write_buf = cafe_write_buf;
 	cafe->nand.select_chip = cafe_select_chip;
+	cafe->nand.onfi_set_features = nand_onfi_get_set_features_notsupp;
+	cafe->nand.onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	cafe->nand.chip_delay = 0;
 
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 16634df2e39a7..b3c99d98fdee9 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1531,6 +1531,8 @@ int denali_init(struct denali_nand_info *denali)
 	chip->cmdfunc = denali_cmdfunc;
 	chip->read_byte = denali_read_byte;
 	chip->waitfunc = denali_waitfunc;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	/*
 	 * scan for NAND devices attached to the controller
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 7af2a3cd949ee..a27a84fbfb840 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -1260,6 +1260,8 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
 	nand->read_buf = docg4_read_buf;
 	nand->write_buf = docg4_write_buf16;
 	nand->erase = docg4_erase_block;
+	nand->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	nand->onfi_get_features = nand_onfi_get_set_features_notsupp;
 	nand->ecc.read_page = docg4_read_page;
 	nand->ecc.write_page = docg4_write_page;
 	nand->ecc.read_page_raw = docg4_read_page_raw;
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 113f76e599372..b9ac16f05057c 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -775,6 +775,8 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 	chip->select_chip = fsl_elbc_select_chip;
 	chip->cmdfunc = fsl_elbc_cmdfunc;
 	chip->waitfunc = fsl_elbc_wait;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index d1570f512f0bb..89e14daeaba69 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -831,6 +831,8 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	chip->select_chip = fsl_ifc_select_chip;
 	chip->cmdfunc = fsl_ifc_cmdfunc;
 	chip->waitfunc = fsl_ifc_wait;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;
diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c
index e40364eeb556b..530caa80b1b69 100644
--- a/drivers/mtd/nand/hisi504_nand.c
+++ b/drivers/mtd/nand/hisi504_nand.c
@@ -764,6 +764,8 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 	chip->write_buf		= hisi_nfc_write_buf;
 	chip->read_buf		= hisi_nfc_read_buf;
 	chip->chip_delay	= HINFC504_CHIP_DELAY;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 
 	hisi_nfc_host_init(host);
 
diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index 6d6eaed2d20c2..0e86fb6277c3a 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c
@@ -708,6 +708,8 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	chip->read_buf = mpc5121_nfc_read_buf;
 	chip->write_buf = mpc5121_nfc_write_buf;
 	chip->select_chip = mpc5121_nfc_select_chip;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 	chip->ecc.mode = NAND_ECC_SOFT;
 	chip->ecc.algo = NAND_ECC_HAMMING;
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 0b3b1a88091a7..ed08e3946727e 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3420,6 +3420,25 @@ static int nand_onfi_get_features(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
+/**
+ * nand_onfi_get_set_features_notsupp - set/get features stub returning
+ *					-ENOTSUPP
+ * @mtd: MTD device structure
+ * @chip: nand chip info structure
+ * @addr: feature address.
+ * @subfeature_param: the subfeature parameters, a four bytes array.
+ *
+ * Should be used by NAND controller drivers that do not support the SET/GET
+ * FEATURES operations.
+ */
+int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd,
+				       struct nand_chip *chip, int addr,
+				       u8 *subfeature_param)
+{
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL(nand_onfi_get_set_features_notsupp);
+
 /**
  * nand_suspend - [MTD Interface] Suspend the NAND flash
  * @mtd: MTD device structure
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 649ba8200832d..74dae4bbdac8f 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1812,6 +1812,8 @@ static int alloc_nand_resource(struct platform_device *pdev)
 		chip->write_buf		= pxa3xx_nand_write_buf;
 		chip->options		|= NAND_NO_SUBPAGE_WRITE;
 		chip->cmdfunc		= nand_cmdfunc;
+		chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+		chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 	}
 
 	nand_hw_control_init(chip->controller);
diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
index 57d483ac5765a..88af7145a51a2 100644
--- a/drivers/mtd/nand/qcom_nandc.c
+++ b/drivers/mtd/nand/qcom_nandc.c
@@ -2008,6 +2008,8 @@ static int qcom_nand_host_init(struct qcom_nand_controller *nandc,
 	chip->read_byte		= qcom_nandc_read_byte;
 	chip->read_buf		= qcom_nandc_read_buf;
 	chip->write_buf		= qcom_nandc_write_buf;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 
 	/*
 	 * the bad block marker is readable only when we read the last codeword
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 442ce619b3b6d..891ac7b993050 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -1183,6 +1183,8 @@ static int flctl_probe(struct platform_device *pdev)
 	nand->read_buf = flctl_read_buf;
 	nand->select_chip = flctl_select_chip;
 	nand->cmdfunc = flctl_cmdfunc;
+	nand->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	nand->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	if (pdata->flcmncr_val & SEL_16BIT)
 		nand->options |= NAND_BUSWIDTH_16;
diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index 3ea4bb19e12d9..744ab10e89621 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c
@@ -703,6 +703,8 @@ static int vf610_nfc_probe(struct platform_device *pdev)
 	chip->read_buf = vf610_nfc_read_buf;
 	chip->write_buf = vf610_nfc_write_buf;
 	chip->select_chip = vf610_nfc_select_chip;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->options |= NAND_NO_SUBPAGE_WRITE;
 
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index e389009fca42c..a4e3ae8f0c85f 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -915,6 +915,8 @@ static int spinand_probe(struct spi_device *spi_nand)
 	chip->waitfunc	= spinand_wait;
 	chip->options	|= NAND_CACHEPRG;
 	chip->select_chip = spinand_select_chip;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	mtd = nand_to_mtd(chip);
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7a01d2eb74430..28f7dd9177e92 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1259,6 +1259,11 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 			   int page);
 
+/* Stub used by drivers that do not support GET/SET FEATURES operations */
+int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd,
+				       struct nand_chip *chip, int addr,
+				       u8 *subfeature_param);
+
 /* Default read_page_raw implementation */
 int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 		       uint8_t *buf, int oob_required, int page);
-- 
GitLab


From b316d02a13c3a8ebe681b7232defbdf3fa28c41b Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 22 May 2017 18:28:51 +0800
Subject: [PATCH 0119/1958] iommu/vt-d: Unwrap __get_valid_domain_for_dev()

We do find_domain() in __get_valid_domain_for_dev(), while we do the
same thing in get_valid_domain_for_dev().  No need to do it twice.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 90ab0115d78e8..ee9c258d3ae0c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2387,7 +2387,7 @@ static struct dmar_domain *find_domain(struct device *dev)
 
 	/* No lock here, assumes no domain exit in normal case */
 	info = dev->archdata.iommu;
-	if (info)
+	if (likely(info))
 		return info->domain;
 	return NULL;
 }
@@ -3475,7 +3475,7 @@ static unsigned long intel_alloc_iova(struct device *dev,
 	return iova_pfn;
 }
 
-static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
+static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
 {
 	struct dmar_domain *domain, *tmp;
 	struct dmar_rmrr_unit *rmrr;
@@ -3522,18 +3522,6 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 	return domain;
 }
 
-static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
-{
-	struct device_domain_info *info;
-
-	/* No lock here, assumes no domain exit in normal case */
-	info = dev->archdata.iommu;
-	if (likely(info))
-		return info->domain;
-
-	return __get_valid_domain_for_dev(dev);
-}
-
 /* Check if the dev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct device *dev)
 {
-- 
GitLab


From 71bb620df634b22a08efd62a0f93c3f2aceaa8e2 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 24 May 2017 16:31:23 +0200
Subject: [PATCH 0120/1958] iommu/vt-d: Constify irq_domain_ops

struct irq_domain_ops is not modified, so it can be made const.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel_irq_remapping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index a190cbd76ef71..f7ef4a5d47854 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  * the dmar_global_lock.
  */
 static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
-static struct irq_domain_ops intel_ir_domain_ops;
+static const struct irq_domain_ops intel_ir_domain_ops;
 
 static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
 static int __init parse_ioapics_under_ir(void);
@@ -1396,7 +1396,7 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
 	modify_irte(&data->irq_2_iommu, &entry);
 }
 
-static struct irq_domain_ops intel_ir_domain_ops = {
+static const struct irq_domain_ops intel_ir_domain_ops = {
 	.alloc = intel_irq_remapping_alloc,
 	.free = intel_irq_remapping_free,
 	.activate = intel_irq_remapping_activate,
-- 
GitLab


From 30bf2df65b0d18f53ee9c92131239f66c8f55d63 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 15 May 2017 16:25:03 +0200
Subject: [PATCH 0121/1958] iommu/amd: Ratelimit io-page-faults per device

Misbehaving devices can cause an endless chain of
io-page-faults, flooding dmesg and making the system-log
unusable or even prevent the system from booting.

So ratelimit the error messages about io-page-faults on a
per-device basis.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 40 ++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5d6cf23..d3ff766a8ca6d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -138,6 +138,8 @@ struct iommu_dev_data {
 					     PPR completions */
 	u32 errata;			  /* Bitmap for errata to apply */
 	bool use_vapic;			  /* Enable device to use vapic mode */
+
+	struct ratelimit_state rs;	  /* Ratelimit IOPF messages */
 };
 
 /*
@@ -253,6 +255,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
 	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
 	spin_unlock_irqrestore(&dev_data_list_lock, flags);
 
+	ratelimit_default_init(&dev_data->rs);
+
 	return dev_data;
 }
 
@@ -551,6 +555,29 @@ static void dump_command(unsigned long phys_addr)
 		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
 }
 
+static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
+					u64 address, int flags)
+{
+	struct iommu_dev_data *dev_data = NULL;
+	struct pci_dev *pdev;
+
+	pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff);
+	if (pdev)
+		dev_data = get_dev_data(&pdev->dev);
+
+	if (dev_data && __ratelimit(&dev_data->rs)) {
+		dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+			domain_id, address, flags);
+	} else if (printk_ratelimit()) {
+		pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			domain_id, address, flags);
+	}
+
+	if (pdev)
+		pci_dev_put(pdev);
+}
+
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
 	int type, devid, domid, flags;
@@ -575,7 +602,12 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 		goto retry;
 	}
 
-	printk(KERN_ERR "AMD-Vi: Event logged [");
+	if (type == EVENT_TYPE_IO_FAULT) {
+		amd_iommu_report_page_fault(devid, domid, address, flags);
+		return;
+	} else {
+		printk(KERN_ERR "AMD-Vi: Event logged [");
+	}
 
 	switch (type) {
 	case EVENT_TYPE_ILL_DEV:
@@ -585,12 +617,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 		       address, flags);
 		dump_dte_entry(devid);
 		break;
-	case EVENT_TYPE_IO_FAULT:
-		printk("IO_PAGE_FAULT device=%02x:%02x.%x "
-		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       domid, address, flags);
-		break;
 	case EVENT_TYPE_DEV_TAB_ERR:
 		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
 		       "address=0x%016llx flags=0x%04x]\n",
-- 
GitLab


From e2f9d45fb452c60c9aaab55ce6f40601f9ec6516 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 24 May 2017 16:31:16 +0200
Subject: [PATCH 0122/1958] iommu/amd: Constify irq_domain_ops

struct irq_domain_ops is not modified, so it can be made const.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d3ff766a8ca6d..d7748955184b5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4299,7 +4299,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
 					    irte_info->index);
 }
 
-static struct irq_domain_ops amd_ir_domain_ops = {
+static const struct irq_domain_ops amd_ir_domain_ops = {
 	.alloc = irq_remapping_alloc,
 	.free = irq_remapping_free,
 	.activate = irq_remapping_activate,
-- 
GitLab


From 8066360744743fb7f2dee1e2041fc9fc92f9d6b5 Mon Sep 17 00:00:00 2001
From: David Lowe <dave-lowe@ntlworld.com>
Date: Sat, 22 Apr 2017 18:28:00 +0100
Subject: [PATCH 0123/1958] rtc: rtc-ds1307: enable support for mcp794xx as a
 wakeup source without IRQ

This patch extends the fixes for ds1337, ds1339, ds3231 in commit
8bc2a40730ec ("rtc: ds1307: add support for the DT property
'wakeup-source'") to mcp794xx devices, so that those parts can similarly be
used as a wakeup source without an IRQ to the processor.

Tested on Raspberry Pi ZeroW with MCP79400.

Signed-off-by: David Lowe <dave-lowe@ntlworld.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 94e13d7037358..a264938500afd 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -1407,7 +1407,8 @@ static int ds1307_probe(struct i2c_client *client,
 		break;
 	case mcp794xx:
 		rtc_ops = &mcp794xx_rtc_ops;
-		if (ds1307->irq > 0 && chip->alarm) {
+		if (chip->alarm && (ds1307->irq > 0 ||
+				    ds1307_can_wakeup_device)) {
 			irq_handler = mcp794xx_irq;
 			want_irq = true;
 		}
-- 
GitLab


From 2de9261c18c2b09a8025872bb470853dc8db726c Mon Sep 17 00:00:00 2001
From: Gary Bisson <gary.bisson@boundarydevices.com>
Date: Tue, 25 Apr 2017 16:45:14 +0200
Subject: [PATCH 0124/1958] rtc: m41t80: fix SQWE override when setting an
 alarm

Currently setting an alarm clears the SQWE bit which means that the
clock output is disabled no matter its previous state.

Signed-off-by: Gary Bisson <gary.bisson@boundarydevices.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-m41t80.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 5ec4653022fff..b0749645aa48c 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -332,6 +332,9 @@ static int m41t80_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		return err;
 	}
 
+	/* Keep SQWE bit value */
+	alarmvals[0] |= (ret & M41T80_ALMON_SQWE);
+
 	ret = i2c_smbus_read_byte_data(client, M41T80_REG_FLAGS);
 	if (ret < 0)
 		return ret;
-- 
GitLab


From 0f546b058b86ea2f661cc7a6e931cee5a29959ef Mon Sep 17 00:00:00 2001
From: Gary Bisson <gary.bisson@boundarydevices.com>
Date: Tue, 25 Apr 2017 16:45:15 +0200
Subject: [PATCH 0125/1958] rtc: m41t80: fix SQW dividers override when setting
 a date

This patch is only relevant for RTC with the SQ_ALT feature which
means the clock output frequency divider is stored in the weekday
register.

Current implementation discards the previous dividers value and clear
them as soon as the time is set.

Signed-off-by: Gary Bisson <gary.bisson@boundarydevices.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-m41t80.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index b0749645aa48c..1e8ba7a469ac9 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -227,6 +227,7 @@ static int m41t80_get_datetime(struct i2c_client *client,
 /* Sets the given date and time to the real time clock. */
 static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 {
+	struct m41t80_data *clientdata = i2c_get_clientdata(client);
 	unsigned char buf[8];
 	int err, flags;
 
@@ -242,6 +243,17 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year - 100);
 	buf[M41T80_REG_WDAY] = tm->tm_wday;
 
+	/* If the square wave output is controlled in the weekday register */
+	if (clientdata->features & M41T80_FEATURE_SQ_ALT) {
+		int val;
+
+		val = i2c_smbus_read_byte_data(client, M41T80_REG_WDAY);
+		if (val < 0)
+			return val;
+
+		buf[M41T80_REG_WDAY] |= (val & 0xf0);
+	}
+
 	err = i2c_smbus_write_i2c_block_data(client, M41T80_REG_SSEC,
 					     sizeof(buf), buf);
 	if (err < 0) {
-- 
GitLab


From 17e296eb35dc5b15511ffa4ef5de67e53cb5a284 Mon Sep 17 00:00:00 2001
From: Gary Bisson <gary.bisson@boundarydevices.com>
Date: Tue, 25 Apr 2017 16:45:16 +0200
Subject: [PATCH 0126/1958] rtc: m41t80: remove sqw sysfs entry

In order to use the proper clock framework to control this feature.

Signed-off-by: Gary Bisson <gary.bisson@boundarydevices.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-m41t80.c | 88 ----------------------------------------
 1 file changed, 88 deletions(-)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 1e8ba7a469ac9..6145bc1641622 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -446,96 +446,8 @@ static ssize_t flags_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(flags);
 
-static ssize_t sqwfreq_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	struct m41t80_data *clientdata = i2c_get_clientdata(client);
-	int val, reg_sqw;
-
-	if (!(clientdata->features & M41T80_FEATURE_SQ))
-		return -EINVAL;
-
-	reg_sqw = M41T80_REG_SQW;
-	if (clientdata->features & M41T80_FEATURE_SQ_ALT)
-		reg_sqw = M41T80_REG_WDAY;
-	val = i2c_smbus_read_byte_data(client, reg_sqw);
-	if (val < 0)
-		return val;
-	val = (val >> 4) & 0xf;
-	switch (val) {
-	case 0:
-		break;
-	case 1:
-		val = 32768;
-		break;
-	default:
-		val = 32768 >> val;
-	}
-	return sprintf(buf, "%d\n", val);
-}
-
-static ssize_t sqwfreq_store(struct device *dev,
-			     struct device_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	struct m41t80_data *clientdata = i2c_get_clientdata(client);
-	int almon, sqw, reg_sqw, rc;
-	unsigned long val;
-
-	rc = kstrtoul(buf, 0, &val);
-	if (rc < 0)
-		return rc;
-
-	if (!(clientdata->features & M41T80_FEATURE_SQ))
-		return -EINVAL;
-
-	if (val) {
-		if (!is_power_of_2(val))
-			return -EINVAL;
-		val = ilog2(val);
-		if (val == 15)
-			val = 1;
-		else if (val < 14)
-			val = 15 - val;
-		else
-			return -EINVAL;
-	}
-	/* disable SQW, set SQW frequency & re-enable */
-	almon = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
-	if (almon < 0)
-		return almon;
-	reg_sqw = M41T80_REG_SQW;
-	if (clientdata->features & M41T80_FEATURE_SQ_ALT)
-		reg_sqw = M41T80_REG_WDAY;
-	sqw = i2c_smbus_read_byte_data(client, reg_sqw);
-	if (sqw < 0)
-		return sqw;
-	sqw = (sqw & 0x0f) | (val << 4);
-
-	rc = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
-				       almon & ~M41T80_ALMON_SQWE);
-	if (rc < 0)
-		return rc;
-
-	if (val) {
-		rc = i2c_smbus_write_byte_data(client, reg_sqw, sqw);
-		if (rc < 0)
-			return rc;
-
-		rc = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
-					       almon | M41T80_ALMON_SQWE);
-		if (rc < 0)
-			return rc;
-	}
-	return count;
-}
-static DEVICE_ATTR_RW(sqwfreq);
-
 static struct attribute *attrs[] = {
 	&dev_attr_flags.attr,
-	&dev_attr_sqwfreq.attr,
 	NULL,
 };
 
-- 
GitLab


From 1373e77b4f108929caa448bb23f341a4af774cad Mon Sep 17 00:00:00 2001
From: Gary Bisson <gary.bisson@boundarydevices.com>
Date: Tue, 25 Apr 2017 16:45:17 +0200
Subject: [PATCH 0127/1958] rtc: m41t80: add clock provider support

Some devices supported by the m41t80 driver have a programmable
square-wave output signal (see M41T80_FEATURE_SQ).

This enables to use this feature as a clock provider of common
clock framework.

Signed-off-by: Gary Bisson <gary.bisson@boundarydevices.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-m41t80.c | 172 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 6145bc1641622..8940e9e43ea0f 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -16,6 +16,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bcd.h>
+#include <linux/clk-provider.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -53,6 +54,8 @@
 #define M41T80_ALARM_REG_SIZE	\
 	(M41T80_REG_ALARM_SEC + 1 - M41T80_REG_ALARM_MON)
 
+#define M41T80_SQW_MAX_FREQ	32768
+
 #define M41T80_SEC_ST		BIT(7)	/* ST: Stop Bit */
 #define M41T80_ALMON_AFE	BIT(7)	/* AFE: AF Enable Bit */
 #define M41T80_ALMON_SQWE	BIT(6)	/* SQWE: SQW Enable Bit */
@@ -147,7 +150,11 @@ MODULE_DEVICE_TABLE(of, m41t80_of_match);
 
 struct m41t80_data {
 	unsigned long features;
+	struct i2c_client *client;
 	struct rtc_device *rtc;
+#ifdef CONFIG_COMMON_CLK
+	struct clk_hw sqw;
+#endif
 };
 
 static irqreturn_t m41t80_handle_irq(int irq, void *dev_id)
@@ -455,6 +462,166 @@ static struct attribute_group attr_group = {
 	.attrs = attrs,
 };
 
+#ifdef CONFIG_COMMON_CLK
+#define sqw_to_m41t80_data(_hw) container_of(_hw, struct m41t80_data, sqw)
+
+static unsigned long m41t80_sqw_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	struct m41t80_data *m41t80 = sqw_to_m41t80_data(hw);
+	struct i2c_client *client = m41t80->client;
+	int reg_sqw = (m41t80->features & M41T80_FEATURE_SQ_ALT) ?
+		M41T80_REG_WDAY : M41T80_REG_SQW;
+	int ret = i2c_smbus_read_byte_data(client, reg_sqw);
+	unsigned long val = M41T80_SQW_MAX_FREQ;
+
+	if (ret < 0)
+		return 0;
+
+	ret >>= 4;
+	if (ret == 0)
+		val = 0;
+	else if (ret > 1)
+		val = val / (1 << ret);
+
+	return val;
+}
+
+static long m41t80_sqw_round_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long *prate)
+{
+	int i, freq = M41T80_SQW_MAX_FREQ;
+
+	if (freq <= rate)
+		return freq;
+
+	for (i = 2; i <= ilog2(M41T80_SQW_MAX_FREQ); i++) {
+		freq /= 1 << i;
+		if (freq <= rate)
+			return freq;
+	}
+
+	return 0;
+}
+
+static int m41t80_sqw_set_rate(struct clk_hw *hw, unsigned long rate,
+			       unsigned long parent_rate)
+{
+	struct m41t80_data *m41t80 = sqw_to_m41t80_data(hw);
+	struct i2c_client *client = m41t80->client;
+	int reg_sqw = (m41t80->features & M41T80_FEATURE_SQ_ALT) ?
+		M41T80_REG_WDAY : M41T80_REG_SQW;
+	int reg, ret, val = 0;
+
+	if (rate) {
+		if (!is_power_of_2(rate))
+			return -EINVAL;
+		val = ilog2(rate);
+		if (val == ilog2(M41T80_SQW_MAX_FREQ))
+			val = 1;
+		else if (val < (ilog2(M41T80_SQW_MAX_FREQ) - 1))
+			val = ilog2(M41T80_SQW_MAX_FREQ) - val;
+		else
+			return -EINVAL;
+	}
+
+	reg = i2c_smbus_read_byte_data(client, reg_sqw);
+	if (reg < 0)
+		return reg;
+
+	reg = (reg & 0x0f) | (val << 4);
+
+	ret = i2c_smbus_write_byte_data(client, reg_sqw, reg);
+	if (ret < 0)
+		return ret;
+
+	return -EINVAL;
+}
+
+static int m41t80_sqw_control(struct clk_hw *hw, bool enable)
+{
+	struct m41t80_data *m41t80 = sqw_to_m41t80_data(hw);
+	struct i2c_client *client = m41t80->client;
+	int ret = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
+
+	if (ret < 0)
+		return ret;
+
+	if (enable)
+		ret |= M41T80_ALMON_SQWE;
+	else
+		ret &= ~M41T80_ALMON_SQWE;
+
+	return i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON, ret);
+}
+
+static int m41t80_sqw_prepare(struct clk_hw *hw)
+{
+	return m41t80_sqw_control(hw, 1);
+}
+
+static void m41t80_sqw_unprepare(struct clk_hw *hw)
+{
+	m41t80_sqw_control(hw, 0);
+}
+
+static int m41t80_sqw_is_prepared(struct clk_hw *hw)
+{
+	struct m41t80_data *m41t80 = sqw_to_m41t80_data(hw);
+	struct i2c_client *client = m41t80->client;
+	int ret = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
+
+	if (ret < 0)
+		return ret;
+
+	return !!(ret & M41T80_ALMON_SQWE);
+}
+
+static const struct clk_ops m41t80_sqw_ops = {
+	.prepare = m41t80_sqw_prepare,
+	.unprepare = m41t80_sqw_unprepare,
+	.is_prepared = m41t80_sqw_is_prepared,
+	.recalc_rate = m41t80_sqw_recalc_rate,
+	.round_rate = m41t80_sqw_round_rate,
+	.set_rate = m41t80_sqw_set_rate,
+};
+
+static struct clk *m41t80_sqw_register_clk(struct m41t80_data *m41t80)
+{
+	struct i2c_client *client = m41t80->client;
+	struct device_node *node = client->dev.of_node;
+	struct clk *clk;
+	struct clk_init_data init;
+	int ret;
+
+	/* First disable the clock */
+	ret = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	ret = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
+					ret & ~(M41T80_ALMON_SQWE));
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	init.name = "m41t80-sqw";
+	init.ops = &m41t80_sqw_ops;
+	init.flags = 0;
+	init.parent_names = NULL;
+	init.num_parents = 0;
+	m41t80->sqw.init = &init;
+
+	/* optional override of the clockname */
+	of_property_read_string(node, "clock-output-names", &init.name);
+
+	/* register the clock */
+	clk = clk_register(&client->dev, &m41t80->sqw);
+	if (!IS_ERR(clk))
+		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+
+	return clk;
+}
+#endif
+
 #ifdef CONFIG_RTC_DRV_M41T80_WDT
 /*
  *****************************************************************************
@@ -772,6 +939,7 @@ static int m41t80_probe(struct i2c_client *client,
 	if (!m41t80_data)
 		return -ENOMEM;
 
+	m41t80_data->client = client;
 	if (client->dev.of_node)
 		m41t80_data->features = (unsigned long)
 			of_device_get_match_data(&client->dev);
@@ -863,6 +1031,10 @@ static int m41t80_probe(struct i2c_client *client,
 			return rc;
 		}
 	}
+#endif
+#ifdef CONFIG_COMMON_CLK
+	if (m41t80_data->features & M41T80_FEATURE_SQ)
+		m41t80_sqw_register_clk(m41t80_data);
 #endif
 	return 0;
 }
-- 
GitLab


From 6dc1cf6f932bb0ea4d8f5e913a0a401ecacd2f03 Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Fri, 19 May 2017 15:35:09 +0530
Subject: [PATCH 0128/1958] rtc: opal: Handle disabled TPO in
 opal_get_tpo_time()

On PowerNV platform when Timed-Power-On(TPO) is disabled, read of
stored TPO yields value with all date components set to '0' inside
opal_get_tpo_time(). The function opal_to_tm() then converts it to an
offset from year 1900 yielding alarm-time == "1900-00-01
00:00:00". This causes problems with __rtc_read_alarm() that
expecting an offset from "1970-00-01 00:00:00" and returned alarm-time
results in a -ve value for time64_t. Which ultimately results in this
error reported in kernel logs with a seemingly garbage value:

"rtc rtc0: invalid alarm value: -2-1--1041528741
2005511117:71582844:32"

We fix this by explicitly handling the case of all alarm date-time
components being '0' inside opal_get_tpo_time() and returning -ENOENT
in such a case. This signals generic rtc that no alarm is set and it
bails out from the alarm initialization flow without reporting the
above error.

Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Reported-by: Steve Best <sbest@redhat.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-opal.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
index ea20f627dabea..2d84e2a28772c 100644
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c
@@ -142,6 +142,16 @@ static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm)
 
 	y_m_d = be32_to_cpu(__y_m_d);
 	h_m_s_ms = ((u64)be32_to_cpu(__h_m) << 32);
+
+	/* check if no alarm is set */
+	if (y_m_d == 0 && h_m_s_ms == 0) {
+		pr_debug("No alarm is set\n");
+		rc = -ENOENT;
+		goto exit;
+	} else {
+		pr_debug("Alarm set to %x %llx\n", y_m_d, h_m_s_ms);
+	}
+
 	opal_to_tm(y_m_d, h_m_s_ms, &alarm->time);
 
 exit:
-- 
GitLab


From da96aea0ed177105cb13ee83b328f6c61e061d3f Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Fri, 19 May 2017 22:18:55 +0530
Subject: [PATCH 0129/1958] rtc: interface: Validate alarm-time before handling
 rollover

In function __rtc_read_alarm() its possible for an alarm time-stamp to
be invalid even after replacing missing components with current
time-stamp. The condition 'alarm->time.tm_year < 70' will trigger this
case and will cause the call to 'rtc_tm_to_time64(&alarm->time)'
return a negative value for variable t_alm.

While handling alarm rollover this negative t_alm (assumed to seconds
offset from '1970-01-01 00:00:00') is converted back to rtc_time via
rtc_time64_to_tm() which results in this error log with seemingly
garbage values:

"rtc rtc0: invalid alarm value: -2-1--1041528741
2005511117:71582844:32"

This error was generated when the rtc driver (rtc-opal in this case)
returned an alarm time-stamp of '00-00-00 00:00:00' to indicate that
the alarm is disabled. Though I have submitted a separate fix for the
rtc-opal driver, this issue may potentially impact other
existing/future rtc drivers.

To fix this issue the patch validates the alarm time-stamp just after
filling up the missing datetime components and if rtc_valid_tm() still
reports it to be invalid then bails out of the function without
handling the rollover.

Reported-by: Steve Best <sbest@redhat.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/interface.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index fc0fa7577636d..8cec9a02c0b89 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -227,6 +227,13 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 			missing = year;
 	}
 
+	/* Can't proceed if alarm is still invalid after replacing
+	 * missing fields.
+	 */
+	err = rtc_valid_tm(&alarm->time);
+	if (err)
+		goto done;
+
 	/* with luck, no rollover is needed */
 	t_now = rtc_tm_to_time64(&now);
 	t_alm = rtc_tm_to_time64(&alarm->time);
@@ -278,9 +285,9 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 		dev_warn(&rtc->dev, "alarm rollover not handled\n");
 	}
 
-done:
 	err = rtc_valid_tm(&alarm->time);
 
+done:
 	if (err) {
 		dev_warn(&rtc->dev, "invalid alarm value: %d-%d-%d %d:%d:%d\n",
 			alarm->time.tm_year + 1900, alarm->time.tm_mon + 1,
-- 
GitLab


From a21512c1698d8106bcece0d24ff590dc92682678 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Thu, 18 May 2017 22:25:56 -0300
Subject: [PATCH 0130/1958] rtc.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- adjust identation of the titles;
- mark a table as such;
- don't capitalize chapter names.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 Documentation/rtc.txt | 44 ++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index ddc366026e00f..47feb4414b7e1 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -1,6 +1,6 @@
-
-	Real Time Clock (RTC) Drivers for Linux
-	=======================================
+=======================================
+Real Time Clock (RTC) Drivers for Linux
+=======================================
 
 When Linux developers talk about a "Real Time Clock", they usually mean
 something that tracks wall clock time and is battery backed so that it
@@ -32,8 +32,8 @@ only issue an alarm up to 24 hours in the future, other hardware may
 be able to schedule one any time in the upcoming century.
 
 
-	Old PC/AT-Compatible driver:  /dev/rtc
-	--------------------------------------
+Old PC/AT-Compatible driver:  /dev/rtc
+--------------------------------------
 
 All PCs (even Alpha machines) have a Real Time Clock built into them.
 Usually they are built into the chipset of the computer, but some may
@@ -105,8 +105,8 @@ that will be using this driver.  See the code at the end of this document.
 (The original /dev/rtc driver was written by Paul Gortmaker.)
 
 
-	New portable "RTC Class" drivers:  /dev/rtcN
-	--------------------------------------------
+New portable "RTC Class" drivers:  /dev/rtcN
+--------------------------------------------
 
 Because Linux supports many non-ACPI and non-PC platforms, some of which
 have more than one RTC style clock, it needed a more portable solution
@@ -136,35 +136,37 @@ a high functionality RTC is integrated into the SOC.  That system might read
 the system clock from the discrete RTC, but use the integrated one for all
 other tasks, because of its greater functionality.
 
-SYSFS INTERFACE
+SYSFS interface
 ---------------
 
 The sysfs interface under /sys/class/rtc/rtcN provides access to various
 rtc attributes without requiring the use of ioctls. All dates and times
 are in the RTC's timezone, rather than in system time.
 
-date:  	   	 RTC-provided date
-hctosys:   	 1 if the RTC provided the system time at boot via the
+================ ==============================================================
+date  	   	 RTC-provided date
+hctosys   	 1 if the RTC provided the system time at boot via the
 		 CONFIG_RTC_HCTOSYS kernel option, 0 otherwise
-max_user_freq:	 The maximum interrupt rate an unprivileged user may request
+max_user_freq	 The maximum interrupt rate an unprivileged user may request
 		 from this RTC.
-name:		 The name of the RTC corresponding to this sysfs directory
-since_epoch:	 The number of seconds since the epoch according to the RTC
-time:		 RTC-provided time
-wakealarm:	 The time at which the clock will generate a system wakeup
+name		 The name of the RTC corresponding to this sysfs directory
+since_epoch	 The number of seconds since the epoch according to the RTC
+time		 RTC-provided time
+wakealarm	 The time at which the clock will generate a system wakeup
 		 event. This is a one shot wakeup event, so must be reset
-		 after wake if a daily wakeup is required. Format is seconds since
-		 the epoch by default, or if there's a leading +, seconds in the
-		 future, or if there is a leading +=, seconds ahead of the current
-		 alarm.
-offset:		 The amount which the rtc clock has been adjusted in firmware.
+		 after wake if a daily wakeup is required. Format is seconds
+		 since the epoch by default, or if there's a leading +, seconds
+		 in the future, or if there is a leading +=, seconds ahead of
+		 the current alarm.
+offset		 The amount which the rtc clock has been adjusted in firmware.
 		 Visible only if the driver supports clock offset adjustment.
 		 The unit is parts per billion, i.e. The number of clock ticks
 		 which are added to or removed from the rtc's base clock per
 		 billion ticks. A positive value makes a day pass more slowly,
 		 longer, and a negative value makes a day pass more quickly.
+================ ==============================================================
 
-IOCTL INTERFACE
+IOCTL interface
 ---------------
 
 The ioctl() calls supported by /dev/rtc are also supported by the RTC class
-- 
GitLab


From 91ed53491f4fa31879e0965a7e920dc10f78996c Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 23 May 2017 11:08:04 +0200
Subject: [PATCH 0131/1958] i2c: rename core source file to allow
 refactorization

The I2C core became quite huge and its monolithic structure makes
maintenance hard. So, prepare to break out some functionality into
separate files by renaming the source file. Note that we keep the
resulting object name constant to avoid regressions.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/driver-api/i2c.rst            | 2 +-
 drivers/i2c/Makefile                        | 4 +++-
 drivers/i2c/busses/i2c-designware-core.c    | 2 +-
 drivers/i2c/{i2c-core.c => i2c-core-base.c} | 0
 4 files changed, 5 insertions(+), 3 deletions(-)
 rename drivers/i2c/{i2c-core.c => i2c-core-base.c} (100%)

diff --git a/Documentation/driver-api/i2c.rst b/Documentation/driver-api/i2c.rst
index f3939f7852bd5..e6d4808ffbab3 100644
--- a/Documentation/driver-api/i2c.rst
+++ b/Documentation/driver-api/i2c.rst
@@ -42,5 +42,5 @@ i2c_adapter devices which don't support those I2C operations.
 .. kernel-doc:: drivers/i2c/i2c-boardinfo.c
    :functions: i2c_register_board_info
 
-.. kernel-doc:: drivers/i2c/i2c-core.c
+.. kernel-doc:: drivers/i2c/i2c-core-base.c
    :export:
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index 45095b3d16a91..d459c7e590760 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -4,6 +4,8 @@
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2c-core.o
+i2c-core-objs 			:= i2c-core-base.o
+
 obj-$(CONFIG_I2C_SMBUS)		+= i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
 obj-$(CONFIG_I2C_MUX)		+= i2c-mux.o
@@ -12,4 +14,4 @@ obj-$(CONFIG_I2C_STUB)		+= i2c-stub.o
 obj-$(CONFIG_I2C_SLAVE_EEPROM)	+= i2c-slave-eeprom.o
 
 ccflags-$(CONFIG_I2C_DEBUG_CORE) := -DDEBUG
-CFLAGS_i2c-core.o := -Wno-deprecated-declarations
+CFLAGS_i2c-core-base.o := -Wno-deprecated-declarations
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index c453717b753b7..3c41995634c2f 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -583,7 +583,7 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
 			 */
 
 			/*
-			 * i2c-core.c always sets the buffer length of
+			 * i2c-core always sets the buffer length of
 			 * I2C_FUNC_SMBUS_BLOCK_DATA to 1. The length will
 			 * be adjusted when receiving the first byte.
 			 * Thus we can't stop the transaction here.
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core-base.c
similarity index 100%
rename from drivers/i2c/i2c-core.c
rename to drivers/i2c/i2c-core-base.c
-- 
GitLab


From e4991ecdc6b8ad2b21f3d6e90ef826b8871103a2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 23 May 2017 11:14:17 +0200
Subject: [PATCH 0132/1958] i2c: break out slave support into separate file

Also removes some ifdeffery.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/Makefile         |   1 +
 drivers/i2c/i2c-core-base.c  | 102 +------------------------------
 drivers/i2c/i2c-core-slave.c | 115 +++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core.h       |   1 +
 4 files changed, 118 insertions(+), 101 deletions(-)
 create mode 100644 drivers/i2c/i2c-core-slave.c

diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index d459c7e590760..6c54716e7f28c 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -5,6 +5,7 @@
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2c-core.o
 i2c-core-objs 			:= i2c-core-base.o
+i2c-core-$(CONFIG_I2C_SLAVE) 	+= i2c-core-slave.o
 
 obj-$(CONFIG_I2C_SMBUS)		+= i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 82576aaccc909..88c0ca664a7b8 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -24,7 +24,6 @@
    (c) 2013  Wolfram Sang <wsa@the-dreams.de>
    I2C ACPI code Copyright (C) 2014 Intel Corp
    Author: Lan Tianyu <tianyu.lan@intel.com>
-   I2C slave support (c) 2014 by Wolfram Sang <wsa@sang-engineering.com>
  */
 
 #define pr_fmt(fmt) "i2c-core: " fmt
@@ -1213,7 +1212,7 @@ static int i2c_check_addr_validity(unsigned addr, unsigned short flags)
  * device uses a reserved address, then it shouldn't be probed. 7-bit
  * addressing is assumed, 10-bit address devices are rare and should be
  * explicitly enumerated. */
-static int i2c_check_7bit_addr_validity_strict(unsigned short addr)
+int i2c_check_7bit_addr_validity_strict(unsigned short addr)
 {
 	/*
 	 * Reserved addresses per I2C specification:
@@ -3727,105 +3726,6 @@ s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
 }
 EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data_or_emulated);
 
-#if IS_ENABLED(CONFIG_I2C_SLAVE)
-int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
-{
-	int ret;
-
-	if (!client || !slave_cb) {
-		WARN(1, "insufficient data\n");
-		return -EINVAL;
-	}
-
-	if (!(client->flags & I2C_CLIENT_SLAVE))
-		dev_warn(&client->dev, "%s: client slave flag not set. You might see address collisions\n",
-			 __func__);
-
-	if (!(client->flags & I2C_CLIENT_TEN)) {
-		/* Enforce stricter address checking */
-		ret = i2c_check_7bit_addr_validity_strict(client->addr);
-		if (ret) {
-			dev_err(&client->dev, "%s: invalid address\n", __func__);
-			return ret;
-		}
-	}
-
-	if (!client->adapter->algo->reg_slave) {
-		dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
-		return -EOPNOTSUPP;
-	}
-
-	client->slave_cb = slave_cb;
-
-	i2c_lock_adapter(client->adapter);
-	ret = client->adapter->algo->reg_slave(client);
-	i2c_unlock_adapter(client->adapter);
-
-	if (ret) {
-		client->slave_cb = NULL;
-		dev_err(&client->dev, "%s: adapter returned error %d\n", __func__, ret);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i2c_slave_register);
-
-int i2c_slave_unregister(struct i2c_client *client)
-{
-	int ret;
-
-	if (!client->adapter->algo->unreg_slave) {
-		dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
-		return -EOPNOTSUPP;
-	}
-
-	i2c_lock_adapter(client->adapter);
-	ret = client->adapter->algo->unreg_slave(client);
-	i2c_unlock_adapter(client->adapter);
-
-	if (ret == 0)
-		client->slave_cb = NULL;
-	else
-		dev_err(&client->dev, "%s: adapter returned error %d\n", __func__, ret);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i2c_slave_unregister);
-
-/**
- * i2c_detect_slave_mode - detect operation mode
- * @dev: The device owning the bus
- *
- * This checks the device nodes for an I2C slave by checking the address
- * used in the reg property. If the address match the I2C_OWN_SLAVE_ADDRESS
- * flag this means the device is configured to act as a I2C slave and it will
- * be listening at that address.
- *
- * Returns true if an I2C own slave address is detected, otherwise returns
- * false.
- */
-bool i2c_detect_slave_mode(struct device *dev)
-{
-	if (IS_BUILTIN(CONFIG_OF) && dev->of_node) {
-		struct device_node *child;
-		u32 reg;
-
-		for_each_child_of_node(dev->of_node, child) {
-			of_property_read_u32(child, "reg", &reg);
-			if (reg & I2C_OWN_SLAVE_ADDRESS) {
-				of_node_put(child);
-				return true;
-			}
-		}
-	} else if (IS_BUILTIN(CONFIG_ACPI) && ACPI_HANDLE(dev)) {
-		dev_dbg(dev, "ACPI slave is not supported yet\n");
-	}
-	return false;
-}
-EXPORT_SYMBOL_GPL(i2c_detect_slave_mode);
-
-#endif
-
 MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
 MODULE_DESCRIPTION("I2C-Bus main module");
 MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/i2c-core-slave.c b/drivers/i2c/i2c-core-slave.c
new file mode 100644
index 0000000000000..4a78c65e99713
--- /dev/null
+++ b/drivers/i2c/i2c-core-slave.c
@@ -0,0 +1,115 @@
+/*
+ * Linux I2C core slave support code
+ *
+ * Copyright (C) 2014 by Wolfram Sang <wsa@sang-engineering.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <dt-bindings/i2c/i2c.h>
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/of.h>
+
+#include "i2c-core.h"
+
+int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
+{
+	int ret;
+
+	if (!client || !slave_cb) {
+		WARN(1, "insufficient data\n");
+		return -EINVAL;
+	}
+
+	if (!(client->flags & I2C_CLIENT_SLAVE))
+		dev_warn(&client->dev, "%s: client slave flag not set. You might see address collisions\n",
+			 __func__);
+
+	if (!(client->flags & I2C_CLIENT_TEN)) {
+		/* Enforce stricter address checking */
+		ret = i2c_check_7bit_addr_validity_strict(client->addr);
+		if (ret) {
+			dev_err(&client->dev, "%s: invalid address\n", __func__);
+			return ret;
+		}
+	}
+
+	if (!client->adapter->algo->reg_slave) {
+		dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
+		return -EOPNOTSUPP;
+	}
+
+	client->slave_cb = slave_cb;
+
+	i2c_lock_adapter(client->adapter);
+	ret = client->adapter->algo->reg_slave(client);
+	i2c_unlock_adapter(client->adapter);
+
+	if (ret) {
+		client->slave_cb = NULL;
+		dev_err(&client->dev, "%s: adapter returned error %d\n", __func__, ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_slave_register);
+
+int i2c_slave_unregister(struct i2c_client *client)
+{
+	int ret;
+
+	if (!client->adapter->algo->unreg_slave) {
+		dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
+		return -EOPNOTSUPP;
+	}
+
+	i2c_lock_adapter(client->adapter);
+	ret = client->adapter->algo->unreg_slave(client);
+	i2c_unlock_adapter(client->adapter);
+
+	if (ret == 0)
+		client->slave_cb = NULL;
+	else
+		dev_err(&client->dev, "%s: adapter returned error %d\n", __func__, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_slave_unregister);
+
+/**
+ * i2c_detect_slave_mode - detect operation mode
+ * @dev: The device owning the bus
+ *
+ * This checks the device nodes for an I2C slave by checking the address
+ * used in the reg property. If the address match the I2C_OWN_SLAVE_ADDRESS
+ * flag this means the device is configured to act as a I2C slave and it will
+ * be listening at that address.
+ *
+ * Returns true if an I2C own slave address is detected, otherwise returns
+ * false.
+ */
+bool i2c_detect_slave_mode(struct device *dev)
+{
+	if (IS_BUILTIN(CONFIG_OF) && dev->of_node) {
+		struct device_node *child;
+		u32 reg;
+
+		for_each_child_of_node(dev->of_node, child) {
+			of_property_read_u32(child, "reg", &reg);
+			if (reg & I2C_OWN_SLAVE_ADDRESS) {
+				of_node_put(child);
+				return true;
+			}
+		}
+	} else if (IS_BUILTIN(CONFIG_ACPI) && ACPI_HANDLE(dev)) {
+		dev_dbg(dev, "ACPI slave is not supported yet\n");
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(i2c_detect_slave_mode);
diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h
index 17700bfddcf58..77c22b03ff951 100644
--- a/drivers/i2c/i2c-core.h
+++ b/drivers/i2c/i2c-core.h
@@ -27,3 +27,4 @@ extern struct rw_semaphore	__i2c_board_lock;
 extern struct list_head	__i2c_board_list;
 extern int		__i2c_first_dynamic_bus_num;
 
+int i2c_check_7bit_addr_validity_strict(unsigned short addr);
-- 
GitLab


From 22c78d1cce104072747023d2ae0351bf3f97d725 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 23 May 2017 12:27:17 +0200
Subject: [PATCH 0133/1958] i2c: break out smbus support into separate file

Break out the exported SMBus functions and the emulation layer into a
separate file. This also involved splitting up the tracing header into
an I2C and an SMBus part.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/driver-api/i2c.rst |   3 +
 drivers/i2c/Makefile             |   2 +-
 drivers/i2c/i2c-core-base.c      | 574 -----------------------------
 drivers/i2c/i2c-core-smbus.c     | 594 +++++++++++++++++++++++++++++++
 include/trace/events/i2c.h       | 226 +-----------
 include/trace/events/smbus.h     | 249 +++++++++++++
 6 files changed, 849 insertions(+), 799 deletions(-)
 create mode 100644 drivers/i2c/i2c-core-smbus.c
 create mode 100644 include/trace/events/smbus.h

diff --git a/Documentation/driver-api/i2c.rst b/Documentation/driver-api/i2c.rst
index e6d4808ffbab3..c215503801f0f 100644
--- a/Documentation/driver-api/i2c.rst
+++ b/Documentation/driver-api/i2c.rst
@@ -44,3 +44,6 @@ i2c_adapter devices which don't support those I2C operations.
 
 .. kernel-doc:: drivers/i2c/i2c-core-base.c
    :export:
+
+.. kernel-doc:: drivers/i2c/i2c-core-smbus.c
+   :export:
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index 6c54716e7f28c..a6a90fe2db887 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2c-core.o
-i2c-core-objs 			:= i2c-core-base.o
+i2c-core-objs 			:= i2c-core-base.o i2c-core-smbus.o
 i2c-core-$(CONFIG_I2C_SLAVE) 	+= i2c-core-slave.o
 
 obj-$(CONFIG_I2C_SMBUS)		+= i2c-smbus.o
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 88c0ca664a7b8..70fc4624c69c2 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -14,9 +14,6 @@
 /* ------------------------------------------------------------------------- */
 
 /* With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi>.
-   All SMBus-related things are written by Frodo Looijaard <frodol@dds.nl>
-   SMBus 2.0 support by Mark Studebaker <mdsxyz123@yahoo.com> and
-   Jean Delvare <jdelvare@suse.de>
    Mux support by Rodolfo Giometti <giometti@enneenne.com> and
    Michael Lawnick <michael.lawnick.ext@nsn.com>
    OF support is copyright (c) 2008 Jochen Friedrich <jochen@scram.de>
@@ -3155,577 +3152,6 @@ void i2c_put_adapter(struct i2c_adapter *adap)
 }
 EXPORT_SYMBOL(i2c_put_adapter);
 
-/* The SMBus parts */
-
-#define POLY    (0x1070U << 3)
-static u8 crc8(u16 data)
-{
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		if (data & 0x8000)
-			data = data ^ POLY;
-		data = data << 1;
-	}
-	return (u8)(data >> 8);
-}
-
-/* Incremental CRC8 over count bytes in the array pointed to by p */
-static u8 i2c_smbus_pec(u8 crc, u8 *p, size_t count)
-{
-	int i;
-
-	for (i = 0; i < count; i++)
-		crc = crc8((crc ^ p[i]) << 8);
-	return crc;
-}
-
-/* Assume a 7-bit address, which is reasonable for SMBus */
-static u8 i2c_smbus_msg_pec(u8 pec, struct i2c_msg *msg)
-{
-	/* The address will be sent first */
-	u8 addr = i2c_8bit_addr_from_msg(msg);
-	pec = i2c_smbus_pec(pec, &addr, 1);
-
-	/* The data buffer follows */
-	return i2c_smbus_pec(pec, msg->buf, msg->len);
-}
-
-/* Used for write only transactions */
-static inline void i2c_smbus_add_pec(struct i2c_msg *msg)
-{
-	msg->buf[msg->len] = i2c_smbus_msg_pec(0, msg);
-	msg->len++;
-}
-
-/* Return <0 on CRC error
-   If there was a write before this read (most cases) we need to take the
-   partial CRC from the write part into account.
-   Note that this function does modify the message (we need to decrease the
-   message length to hide the CRC byte from the caller). */
-static int i2c_smbus_check_pec(u8 cpec, struct i2c_msg *msg)
-{
-	u8 rpec = msg->buf[--msg->len];
-	cpec = i2c_smbus_msg_pec(cpec, msg);
-
-	if (rpec != cpec) {
-		pr_debug("Bad PEC 0x%02x vs. 0x%02x\n",
-			rpec, cpec);
-		return -EBADMSG;
-	}
-	return 0;
-}
-
-/**
- * i2c_smbus_read_byte - SMBus "receive byte" protocol
- * @client: Handle to slave device
- *
- * This executes the SMBus "receive byte" protocol, returning negative errno
- * else the byte received from the device.
- */
-s32 i2c_smbus_read_byte(const struct i2c_client *client)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, 0,
-				I2C_SMBUS_BYTE, &data);
-	return (status < 0) ? status : data.byte;
-}
-EXPORT_SYMBOL(i2c_smbus_read_byte);
-
-/**
- * i2c_smbus_write_byte - SMBus "send byte" protocol
- * @client: Handle to slave device
- * @value: Byte to be sent
- *
- * This executes the SMBus "send byte" protocol, returning negative errno
- * else zero on success.
- */
-s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value)
-{
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-	                      I2C_SMBUS_WRITE, value, I2C_SMBUS_BYTE, NULL);
-}
-EXPORT_SYMBOL(i2c_smbus_write_byte);
-
-/**
- * i2c_smbus_read_byte_data - SMBus "read byte" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- *
- * This executes the SMBus "read byte" protocol, returning negative errno
- * else a data byte received from the device.
- */
-s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, command,
-				I2C_SMBUS_BYTE_DATA, &data);
-	return (status < 0) ? status : data.byte;
-}
-EXPORT_SYMBOL(i2c_smbus_read_byte_data);
-
-/**
- * i2c_smbus_write_byte_data - SMBus "write byte" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @value: Byte being written
- *
- * This executes the SMBus "write byte" protocol, returning negative errno
- * else zero on success.
- */
-s32 i2c_smbus_write_byte_data(const struct i2c_client *client, u8 command,
-			      u8 value)
-{
-	union i2c_smbus_data data;
-	data.byte = value;
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-			      I2C_SMBUS_WRITE, command,
-			      I2C_SMBUS_BYTE_DATA, &data);
-}
-EXPORT_SYMBOL(i2c_smbus_write_byte_data);
-
-/**
- * i2c_smbus_read_word_data - SMBus "read word" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- *
- * This executes the SMBus "read word" protocol, returning negative errno
- * else a 16-bit unsigned "word" received from the device.
- */
-s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, command,
-				I2C_SMBUS_WORD_DATA, &data);
-	return (status < 0) ? status : data.word;
-}
-EXPORT_SYMBOL(i2c_smbus_read_word_data);
-
-/**
- * i2c_smbus_write_word_data - SMBus "write word" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @value: 16-bit "word" being written
- *
- * This executes the SMBus "write word" protocol, returning negative errno
- * else zero on success.
- */
-s32 i2c_smbus_write_word_data(const struct i2c_client *client, u8 command,
-			      u16 value)
-{
-	union i2c_smbus_data data;
-	data.word = value;
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-			      I2C_SMBUS_WRITE, command,
-			      I2C_SMBUS_WORD_DATA, &data);
-}
-EXPORT_SYMBOL(i2c_smbus_write_word_data);
-
-/**
- * i2c_smbus_read_block_data - SMBus "block read" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @values: Byte array into which data will be read; big enough to hold
- *	the data returned by the slave.  SMBus allows at most 32 bytes.
- *
- * This executes the SMBus "block read" protocol, returning negative errno
- * else the number of data bytes in the slave's response.
- *
- * Note that using this function requires that the client's adapter support
- * the I2C_FUNC_SMBUS_READ_BLOCK_DATA functionality.  Not all adapter drivers
- * support this; its emulation through I2C messaging relies on a specific
- * mechanism (I2C_M_RECV_LEN) which may not be implemented.
- */
-s32 i2c_smbus_read_block_data(const struct i2c_client *client, u8 command,
-			      u8 *values)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, command,
-				I2C_SMBUS_BLOCK_DATA, &data);
-	if (status)
-		return status;
-
-	memcpy(values, &data.block[1], data.block[0]);
-	return data.block[0];
-}
-EXPORT_SYMBOL(i2c_smbus_read_block_data);
-
-/**
- * i2c_smbus_write_block_data - SMBus "block write" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @length: Size of data block; SMBus allows at most 32 bytes
- * @values: Byte array which will be written.
- *
- * This executes the SMBus "block write" protocol, returning negative errno
- * else zero on success.
- */
-s32 i2c_smbus_write_block_data(const struct i2c_client *client, u8 command,
-			       u8 length, const u8 *values)
-{
-	union i2c_smbus_data data;
-
-	if (length > I2C_SMBUS_BLOCK_MAX)
-		length = I2C_SMBUS_BLOCK_MAX;
-	data.block[0] = length;
-	memcpy(&data.block[1], values, length);
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-			      I2C_SMBUS_WRITE, command,
-			      I2C_SMBUS_BLOCK_DATA, &data);
-}
-EXPORT_SYMBOL(i2c_smbus_write_block_data);
-
-/* Returns the number of read bytes */
-s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, u8 command,
-				  u8 length, u8 *values)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	if (length > I2C_SMBUS_BLOCK_MAX)
-		length = I2C_SMBUS_BLOCK_MAX;
-	data.block[0] = length;
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, command,
-				I2C_SMBUS_I2C_BLOCK_DATA, &data);
-	if (status < 0)
-		return status;
-
-	memcpy(values, &data.block[1], data.block[0]);
-	return data.block[0];
-}
-EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data);
-
-s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command,
-				   u8 length, const u8 *values)
-{
-	union i2c_smbus_data data;
-
-	if (length > I2C_SMBUS_BLOCK_MAX)
-		length = I2C_SMBUS_BLOCK_MAX;
-	data.block[0] = length;
-	memcpy(data.block + 1, values, length);
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-			      I2C_SMBUS_WRITE, command,
-			      I2C_SMBUS_I2C_BLOCK_DATA, &data);
-}
-EXPORT_SYMBOL(i2c_smbus_write_i2c_block_data);
-
-/* Simulate a SMBus command using the i2c protocol
-   No checking of parameters is done!  */
-static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
-				   unsigned short flags,
-				   char read_write, u8 command, int size,
-				   union i2c_smbus_data *data)
-{
-	/* So we need to generate a series of msgs. In the case of writing, we
-	  need to use only one message; when reading, we need two. We initialize
-	  most things with sane defaults, to keep the code below somewhat
-	  simpler. */
-	unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX+3];
-	unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX+2];
-	int num = read_write == I2C_SMBUS_READ ? 2 : 1;
-	int i;
-	u8 partial_pec = 0;
-	int status;
-	struct i2c_msg msg[2] = {
-		{
-			.addr = addr,
-			.flags = flags,
-			.len = 1,
-			.buf = msgbuf0,
-		}, {
-			.addr = addr,
-			.flags = flags | I2C_M_RD,
-			.len = 0,
-			.buf = msgbuf1,
-		},
-	};
-
-	msgbuf0[0] = command;
-	switch (size) {
-	case I2C_SMBUS_QUICK:
-		msg[0].len = 0;
-		/* Special case: The read/write field is used as data */
-		msg[0].flags = flags | (read_write == I2C_SMBUS_READ ?
-					I2C_M_RD : 0);
-		num = 1;
-		break;
-	case I2C_SMBUS_BYTE:
-		if (read_write == I2C_SMBUS_READ) {
-			/* Special case: only a read! */
-			msg[0].flags = I2C_M_RD | flags;
-			num = 1;
-		}
-		break;
-	case I2C_SMBUS_BYTE_DATA:
-		if (read_write == I2C_SMBUS_READ)
-			msg[1].len = 1;
-		else {
-			msg[0].len = 2;
-			msgbuf0[1] = data->byte;
-		}
-		break;
-	case I2C_SMBUS_WORD_DATA:
-		if (read_write == I2C_SMBUS_READ)
-			msg[1].len = 2;
-		else {
-			msg[0].len = 3;
-			msgbuf0[1] = data->word & 0xff;
-			msgbuf0[2] = data->word >> 8;
-		}
-		break;
-	case I2C_SMBUS_PROC_CALL:
-		num = 2; /* Special case */
-		read_write = I2C_SMBUS_READ;
-		msg[0].len = 3;
-		msg[1].len = 2;
-		msgbuf0[1] = data->word & 0xff;
-		msgbuf0[2] = data->word >> 8;
-		break;
-	case I2C_SMBUS_BLOCK_DATA:
-		if (read_write == I2C_SMBUS_READ) {
-			msg[1].flags |= I2C_M_RECV_LEN;
-			msg[1].len = 1; /* block length will be added by
-					   the underlying bus driver */
-		} else {
-			msg[0].len = data->block[0] + 2;
-			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) {
-				dev_err(&adapter->dev,
-					"Invalid block write size %d\n",
-					data->block[0]);
-				return -EINVAL;
-			}
-			for (i = 1; i < msg[0].len; i++)
-				msgbuf0[i] = data->block[i-1];
-		}
-		break;
-	case I2C_SMBUS_BLOCK_PROC_CALL:
-		num = 2; /* Another special case */
-		read_write = I2C_SMBUS_READ;
-		if (data->block[0] > I2C_SMBUS_BLOCK_MAX) {
-			dev_err(&adapter->dev,
-				"Invalid block write size %d\n",
-				data->block[0]);
-			return -EINVAL;
-		}
-		msg[0].len = data->block[0] + 2;
-		for (i = 1; i < msg[0].len; i++)
-			msgbuf0[i] = data->block[i-1];
-		msg[1].flags |= I2C_M_RECV_LEN;
-		msg[1].len = 1; /* block length will be added by
-				   the underlying bus driver */
-		break;
-	case I2C_SMBUS_I2C_BLOCK_DATA:
-		if (read_write == I2C_SMBUS_READ) {
-			msg[1].len = data->block[0];
-		} else {
-			msg[0].len = data->block[0] + 1;
-			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) {
-				dev_err(&adapter->dev,
-					"Invalid block write size %d\n",
-					data->block[0]);
-				return -EINVAL;
-			}
-			for (i = 1; i <= data->block[0]; i++)
-				msgbuf0[i] = data->block[i];
-		}
-		break;
-	default:
-		dev_err(&adapter->dev, "Unsupported transaction %d\n", size);
-		return -EOPNOTSUPP;
-	}
-
-	i = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK
-				      && size != I2C_SMBUS_I2C_BLOCK_DATA);
-	if (i) {
-		/* Compute PEC if first message is a write */
-		if (!(msg[0].flags & I2C_M_RD)) {
-			if (num == 1) /* Write only */
-				i2c_smbus_add_pec(&msg[0]);
-			else /* Write followed by read */
-				partial_pec = i2c_smbus_msg_pec(0, &msg[0]);
-		}
-		/* Ask for PEC if last message is a read */
-		if (msg[num-1].flags & I2C_M_RD)
-			msg[num-1].len++;
-	}
-
-	status = i2c_transfer(adapter, msg, num);
-	if (status < 0)
-		return status;
-
-	/* Check PEC if last message is a read */
-	if (i && (msg[num-1].flags & I2C_M_RD)) {
-		status = i2c_smbus_check_pec(partial_pec, &msg[num-1]);
-		if (status < 0)
-			return status;
-	}
-
-	if (read_write == I2C_SMBUS_READ)
-		switch (size) {
-		case I2C_SMBUS_BYTE:
-			data->byte = msgbuf0[0];
-			break;
-		case I2C_SMBUS_BYTE_DATA:
-			data->byte = msgbuf1[0];
-			break;
-		case I2C_SMBUS_WORD_DATA:
-		case I2C_SMBUS_PROC_CALL:
-			data->word = msgbuf1[0] | (msgbuf1[1] << 8);
-			break;
-		case I2C_SMBUS_I2C_BLOCK_DATA:
-			for (i = 0; i < data->block[0]; i++)
-				data->block[i+1] = msgbuf1[i];
-			break;
-		case I2C_SMBUS_BLOCK_DATA:
-		case I2C_SMBUS_BLOCK_PROC_CALL:
-			for (i = 0; i < msgbuf1[0] + 1; i++)
-				data->block[i] = msgbuf1[i];
-			break;
-		}
-	return 0;
-}
-
-/**
- * i2c_smbus_xfer - execute SMBus protocol operations
- * @adapter: Handle to I2C bus
- * @addr: Address of SMBus slave on that bus
- * @flags: I2C_CLIENT_* flags (usually zero or I2C_CLIENT_PEC)
- * @read_write: I2C_SMBUS_READ or I2C_SMBUS_WRITE
- * @command: Byte interpreted by slave, for protocols which use such bytes
- * @protocol: SMBus protocol operation to execute, such as I2C_SMBUS_PROC_CALL
- * @data: Data to be read or written
- *
- * This executes an SMBus protocol operation, and returns a negative
- * errno code else zero on success.
- */
-s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
-		   char read_write, u8 command, int protocol,
-		   union i2c_smbus_data *data)
-{
-	unsigned long orig_jiffies;
-	int try;
-	s32 res;
-
-	/* If enabled, the following two tracepoints are conditional on
-	 * read_write and protocol.
-	 */
-	trace_smbus_write(adapter, addr, flags, read_write,
-			  command, protocol, data);
-	trace_smbus_read(adapter, addr, flags, read_write,
-			 command, protocol);
-
-	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
-
-	if (adapter->algo->smbus_xfer) {
-		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
-
-		/* Retry automatically on arbitration loss */
-		orig_jiffies = jiffies;
-		for (res = 0, try = 0; try <= adapter->retries; try++) {
-			res = adapter->algo->smbus_xfer(adapter, addr, flags,
-							read_write, command,
-							protocol, data);
-			if (res != -EAGAIN)
-				break;
-			if (time_after(jiffies,
-				       orig_jiffies + adapter->timeout))
-				break;
-		}
-		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
-
-		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
-			goto trace;
-		/*
-		 * Fall back to i2c_smbus_xfer_emulated if the adapter doesn't
-		 * implement native support for the SMBus operation.
-		 */
-	}
-
-	res = i2c_smbus_xfer_emulated(adapter, addr, flags, read_write,
-				      command, protocol, data);
-
-trace:
-	/* If enabled, the reply tracepoint is conditional on read_write. */
-	trace_smbus_reply(adapter, addr, flags, read_write,
-			  command, protocol, data);
-	trace_smbus_result(adapter, addr, flags, read_write,
-			   command, protocol, res);
-
-	return res;
-}
-EXPORT_SYMBOL(i2c_smbus_xfer);
-
-/**
- * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @length: Size of data block; SMBus allows at most I2C_SMBUS_BLOCK_MAX bytes
- * @values: Byte array into which data will be read; big enough to hold
- *	the data returned by the slave.  SMBus allows at most
- *	I2C_SMBUS_BLOCK_MAX bytes.
- *
- * This executes the SMBus "block read" protocol if supported by the adapter.
- * If block read is not supported, it emulates it using either word or byte
- * read protocols depending on availability.
- *
- * The addresses of the I2C slave device that are accessed with this function
- * must be mapped to a linear region, so that a block read will have the same
- * effect as a byte read. Before using this function you must double-check
- * if the I2C slave does support exchanging a block transfer with a byte
- * transfer.
- */
-s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
-					      u8 command, u8 length, u8 *values)
-{
-	u8 i = 0;
-	int status;
-
-	if (length > I2C_SMBUS_BLOCK_MAX)
-		length = I2C_SMBUS_BLOCK_MAX;
-
-	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
-		return i2c_smbus_read_i2c_block_data(client, command, length, values);
-
-	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA))
-		return -EOPNOTSUPP;
-
-	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)) {
-		while ((i + 2) <= length) {
-			status = i2c_smbus_read_word_data(client, command + i);
-			if (status < 0)
-				return status;
-			values[i] = status & 0xff;
-			values[i + 1] = status >> 8;
-			i += 2;
-		}
-	}
-
-	while (i < length) {
-		status = i2c_smbus_read_byte_data(client, command + i);
-		if (status < 0)
-			return status;
-		values[i] = status;
-		i++;
-	}
-
-	return i;
-}
-EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data_or_emulated);
-
 MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
 MODULE_DESCRIPTION("I2C-Bus main module");
 MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
new file mode 100644
index 0000000000000..10f00a82ec9db
--- /dev/null
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -0,0 +1,594 @@
+/*
+ * Linux I2C core SMBus and SMBus emulation code
+ *
+ * This file contains the SMBus functions which are always included in the I2C
+ * core because they can be emulated via I2C. SMBus specific extensions
+ * (e.g. smbalert) are handled in a seperate i2c-smbus module.
+ *
+ * All SMBus-related things are written by Frodo Looijaard <frodol@dds.nl>
+ * SMBus 2.0 support by Mark Studebaker <mdsxyz123@yahoo.com> and
+ * Jean Delvare <jdelvare@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/smbus.h>
+
+
+/* The SMBus parts */
+
+#define POLY    (0x1070U << 3)
+static u8 crc8(u16 data)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		if (data & 0x8000)
+			data = data ^ POLY;
+		data = data << 1;
+	}
+	return (u8)(data >> 8);
+}
+
+/* Incremental CRC8 over count bytes in the array pointed to by p */
+static u8 i2c_smbus_pec(u8 crc, u8 *p, size_t count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		crc = crc8((crc ^ p[i]) << 8);
+	return crc;
+}
+
+/* Assume a 7-bit address, which is reasonable for SMBus */
+static u8 i2c_smbus_msg_pec(u8 pec, struct i2c_msg *msg)
+{
+	/* The address will be sent first */
+	u8 addr = i2c_8bit_addr_from_msg(msg);
+	pec = i2c_smbus_pec(pec, &addr, 1);
+
+	/* The data buffer follows */
+	return i2c_smbus_pec(pec, msg->buf, msg->len);
+}
+
+/* Used for write only transactions */
+static inline void i2c_smbus_add_pec(struct i2c_msg *msg)
+{
+	msg->buf[msg->len] = i2c_smbus_msg_pec(0, msg);
+	msg->len++;
+}
+
+/* Return <0 on CRC error
+   If there was a write before this read (most cases) we need to take the
+   partial CRC from the write part into account.
+   Note that this function does modify the message (we need to decrease the
+   message length to hide the CRC byte from the caller). */
+static int i2c_smbus_check_pec(u8 cpec, struct i2c_msg *msg)
+{
+	u8 rpec = msg->buf[--msg->len];
+	cpec = i2c_smbus_msg_pec(cpec, msg);
+
+	if (rpec != cpec) {
+		pr_debug("Bad PEC 0x%02x vs. 0x%02x\n",
+			rpec, cpec);
+		return -EBADMSG;
+	}
+	return 0;
+}
+
+/**
+ * i2c_smbus_read_byte - SMBus "receive byte" protocol
+ * @client: Handle to slave device
+ *
+ * This executes the SMBus "receive byte" protocol, returning negative errno
+ * else the byte received from the device.
+ */
+s32 i2c_smbus_read_byte(const struct i2c_client *client)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, 0,
+				I2C_SMBUS_BYTE, &data);
+	return (status < 0) ? status : data.byte;
+}
+EXPORT_SYMBOL(i2c_smbus_read_byte);
+
+/**
+ * i2c_smbus_write_byte - SMBus "send byte" protocol
+ * @client: Handle to slave device
+ * @value: Byte to be sent
+ *
+ * This executes the SMBus "send byte" protocol, returning negative errno
+ * else zero on success.
+ */
+s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value)
+{
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+	                      I2C_SMBUS_WRITE, value, I2C_SMBUS_BYTE, NULL);
+}
+EXPORT_SYMBOL(i2c_smbus_write_byte);
+
+/**
+ * i2c_smbus_read_byte_data - SMBus "read byte" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ *
+ * This executes the SMBus "read byte" protocol, returning negative errno
+ * else a data byte received from the device.
+ */
+s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, command,
+				I2C_SMBUS_BYTE_DATA, &data);
+	return (status < 0) ? status : data.byte;
+}
+EXPORT_SYMBOL(i2c_smbus_read_byte_data);
+
+/**
+ * i2c_smbus_write_byte_data - SMBus "write byte" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @value: Byte being written
+ *
+ * This executes the SMBus "write byte" protocol, returning negative errno
+ * else zero on success.
+ */
+s32 i2c_smbus_write_byte_data(const struct i2c_client *client, u8 command,
+			      u8 value)
+{
+	union i2c_smbus_data data;
+	data.byte = value;
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_BYTE_DATA, &data);
+}
+EXPORT_SYMBOL(i2c_smbus_write_byte_data);
+
+/**
+ * i2c_smbus_read_word_data - SMBus "read word" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ *
+ * This executes the SMBus "read word" protocol, returning negative errno
+ * else a 16-bit unsigned "word" received from the device.
+ */
+s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, command,
+				I2C_SMBUS_WORD_DATA, &data);
+	return (status < 0) ? status : data.word;
+}
+EXPORT_SYMBOL(i2c_smbus_read_word_data);
+
+/**
+ * i2c_smbus_write_word_data - SMBus "write word" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @value: 16-bit "word" being written
+ *
+ * This executes the SMBus "write word" protocol, returning negative errno
+ * else zero on success.
+ */
+s32 i2c_smbus_write_word_data(const struct i2c_client *client, u8 command,
+			      u16 value)
+{
+	union i2c_smbus_data data;
+	data.word = value;
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_WORD_DATA, &data);
+}
+EXPORT_SYMBOL(i2c_smbus_write_word_data);
+
+/**
+ * i2c_smbus_read_block_data - SMBus "block read" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @values: Byte array into which data will be read; big enough to hold
+ *	the data returned by the slave.  SMBus allows at most 32 bytes.
+ *
+ * This executes the SMBus "block read" protocol, returning negative errno
+ * else the number of data bytes in the slave's response.
+ *
+ * Note that using this function requires that the client's adapter support
+ * the I2C_FUNC_SMBUS_READ_BLOCK_DATA functionality.  Not all adapter drivers
+ * support this; its emulation through I2C messaging relies on a specific
+ * mechanism (I2C_M_RECV_LEN) which may not be implemented.
+ */
+s32 i2c_smbus_read_block_data(const struct i2c_client *client, u8 command,
+			      u8 *values)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, command,
+				I2C_SMBUS_BLOCK_DATA, &data);
+	if (status)
+		return status;
+
+	memcpy(values, &data.block[1], data.block[0]);
+	return data.block[0];
+}
+EXPORT_SYMBOL(i2c_smbus_read_block_data);
+
+/**
+ * i2c_smbus_write_block_data - SMBus "block write" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @length: Size of data block; SMBus allows at most 32 bytes
+ * @values: Byte array which will be written.
+ *
+ * This executes the SMBus "block write" protocol, returning negative errno
+ * else zero on success.
+ */
+s32 i2c_smbus_write_block_data(const struct i2c_client *client, u8 command,
+			       u8 length, const u8 *values)
+{
+	union i2c_smbus_data data;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+	data.block[0] = length;
+	memcpy(&data.block[1], values, length);
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_BLOCK_DATA, &data);
+}
+EXPORT_SYMBOL(i2c_smbus_write_block_data);
+
+/* Returns the number of read bytes */
+s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, u8 command,
+				  u8 length, u8 *values)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+	data.block[0] = length;
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, command,
+				I2C_SMBUS_I2C_BLOCK_DATA, &data);
+	if (status < 0)
+		return status;
+
+	memcpy(values, &data.block[1], data.block[0]);
+	return data.block[0];
+}
+EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data);
+
+s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command,
+				   u8 length, const u8 *values)
+{
+	union i2c_smbus_data data;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+	data.block[0] = length;
+	memcpy(data.block + 1, values, length);
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_I2C_BLOCK_DATA, &data);
+}
+EXPORT_SYMBOL(i2c_smbus_write_i2c_block_data);
+
+/* Simulate a SMBus command using the i2c protocol
+   No checking of parameters is done!  */
+static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
+				   unsigned short flags,
+				   char read_write, u8 command, int size,
+				   union i2c_smbus_data *data)
+{
+	/* So we need to generate a series of msgs. In the case of writing, we
+	  need to use only one message; when reading, we need two. We initialize
+	  most things with sane defaults, to keep the code below somewhat
+	  simpler. */
+	unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX+3];
+	unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX+2];
+	int num = read_write == I2C_SMBUS_READ ? 2 : 1;
+	int i;
+	u8 partial_pec = 0;
+	int status;
+	struct i2c_msg msg[2] = {
+		{
+			.addr = addr,
+			.flags = flags,
+			.len = 1,
+			.buf = msgbuf0,
+		}, {
+			.addr = addr,
+			.flags = flags | I2C_M_RD,
+			.len = 0,
+			.buf = msgbuf1,
+		},
+	};
+
+	msgbuf0[0] = command;
+	switch (size) {
+	case I2C_SMBUS_QUICK:
+		msg[0].len = 0;
+		/* Special case: The read/write field is used as data */
+		msg[0].flags = flags | (read_write == I2C_SMBUS_READ ?
+					I2C_M_RD : 0);
+		num = 1;
+		break;
+	case I2C_SMBUS_BYTE:
+		if (read_write == I2C_SMBUS_READ) {
+			/* Special case: only a read! */
+			msg[0].flags = I2C_M_RD | flags;
+			num = 1;
+		}
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		if (read_write == I2C_SMBUS_READ)
+			msg[1].len = 1;
+		else {
+			msg[0].len = 2;
+			msgbuf0[1] = data->byte;
+		}
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		if (read_write == I2C_SMBUS_READ)
+			msg[1].len = 2;
+		else {
+			msg[0].len = 3;
+			msgbuf0[1] = data->word & 0xff;
+			msgbuf0[2] = data->word >> 8;
+		}
+		break;
+	case I2C_SMBUS_PROC_CALL:
+		num = 2; /* Special case */
+		read_write = I2C_SMBUS_READ;
+		msg[0].len = 3;
+		msg[1].len = 2;
+		msgbuf0[1] = data->word & 0xff;
+		msgbuf0[2] = data->word >> 8;
+		break;
+	case I2C_SMBUS_BLOCK_DATA:
+		if (read_write == I2C_SMBUS_READ) {
+			msg[1].flags |= I2C_M_RECV_LEN;
+			msg[1].len = 1; /* block length will be added by
+					   the underlying bus driver */
+		} else {
+			msg[0].len = data->block[0] + 2;
+			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) {
+				dev_err(&adapter->dev,
+					"Invalid block write size %d\n",
+					data->block[0]);
+				return -EINVAL;
+			}
+			for (i = 1; i < msg[0].len; i++)
+				msgbuf0[i] = data->block[i-1];
+		}
+		break;
+	case I2C_SMBUS_BLOCK_PROC_CALL:
+		num = 2; /* Another special case */
+		read_write = I2C_SMBUS_READ;
+		if (data->block[0] > I2C_SMBUS_BLOCK_MAX) {
+			dev_err(&adapter->dev,
+				"Invalid block write size %d\n",
+				data->block[0]);
+			return -EINVAL;
+		}
+		msg[0].len = data->block[0] + 2;
+		for (i = 1; i < msg[0].len; i++)
+			msgbuf0[i] = data->block[i-1];
+		msg[1].flags |= I2C_M_RECV_LEN;
+		msg[1].len = 1; /* block length will be added by
+				   the underlying bus driver */
+		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		if (read_write == I2C_SMBUS_READ) {
+			msg[1].len = data->block[0];
+		} else {
+			msg[0].len = data->block[0] + 1;
+			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) {
+				dev_err(&adapter->dev,
+					"Invalid block write size %d\n",
+					data->block[0]);
+				return -EINVAL;
+			}
+			for (i = 1; i <= data->block[0]; i++)
+				msgbuf0[i] = data->block[i];
+		}
+		break;
+	default:
+		dev_err(&adapter->dev, "Unsupported transaction %d\n", size);
+		return -EOPNOTSUPP;
+	}
+
+	i = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK
+				      && size != I2C_SMBUS_I2C_BLOCK_DATA);
+	if (i) {
+		/* Compute PEC if first message is a write */
+		if (!(msg[0].flags & I2C_M_RD)) {
+			if (num == 1) /* Write only */
+				i2c_smbus_add_pec(&msg[0]);
+			else /* Write followed by read */
+				partial_pec = i2c_smbus_msg_pec(0, &msg[0]);
+		}
+		/* Ask for PEC if last message is a read */
+		if (msg[num-1].flags & I2C_M_RD)
+			msg[num-1].len++;
+	}
+
+	status = i2c_transfer(adapter, msg, num);
+	if (status < 0)
+		return status;
+
+	/* Check PEC if last message is a read */
+	if (i && (msg[num-1].flags & I2C_M_RD)) {
+		status = i2c_smbus_check_pec(partial_pec, &msg[num-1]);
+		if (status < 0)
+			return status;
+	}
+
+	if (read_write == I2C_SMBUS_READ)
+		switch (size) {
+		case I2C_SMBUS_BYTE:
+			data->byte = msgbuf0[0];
+			break;
+		case I2C_SMBUS_BYTE_DATA:
+			data->byte = msgbuf1[0];
+			break;
+		case I2C_SMBUS_WORD_DATA:
+		case I2C_SMBUS_PROC_CALL:
+			data->word = msgbuf1[0] | (msgbuf1[1] << 8);
+			break;
+		case I2C_SMBUS_I2C_BLOCK_DATA:
+			for (i = 0; i < data->block[0]; i++)
+				data->block[i+1] = msgbuf1[i];
+			break;
+		case I2C_SMBUS_BLOCK_DATA:
+		case I2C_SMBUS_BLOCK_PROC_CALL:
+			for (i = 0; i < msgbuf1[0] + 1; i++)
+				data->block[i] = msgbuf1[i];
+			break;
+		}
+	return 0;
+}
+
+/**
+ * i2c_smbus_xfer - execute SMBus protocol operations
+ * @adapter: Handle to I2C bus
+ * @addr: Address of SMBus slave on that bus
+ * @flags: I2C_CLIENT_* flags (usually zero or I2C_CLIENT_PEC)
+ * @read_write: I2C_SMBUS_READ or I2C_SMBUS_WRITE
+ * @command: Byte interpreted by slave, for protocols which use such bytes
+ * @protocol: SMBus protocol operation to execute, such as I2C_SMBUS_PROC_CALL
+ * @data: Data to be read or written
+ *
+ * This executes an SMBus protocol operation, and returns a negative
+ * errno code else zero on success.
+ */
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
+		   char read_write, u8 command, int protocol,
+		   union i2c_smbus_data *data)
+{
+	unsigned long orig_jiffies;
+	int try;
+	s32 res;
+
+	/* If enabled, the following two tracepoints are conditional on
+	 * read_write and protocol.
+	 */
+	trace_smbus_write(adapter, addr, flags, read_write,
+			  command, protocol, data);
+	trace_smbus_read(adapter, addr, flags, read_write,
+			 command, protocol);
+
+	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
+
+	if (adapter->algo->smbus_xfer) {
+		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
+
+		/* Retry automatically on arbitration loss */
+		orig_jiffies = jiffies;
+		for (res = 0, try = 0; try <= adapter->retries; try++) {
+			res = adapter->algo->smbus_xfer(adapter, addr, flags,
+							read_write, command,
+							protocol, data);
+			if (res != -EAGAIN)
+				break;
+			if (time_after(jiffies,
+				       orig_jiffies + adapter->timeout))
+				break;
+		}
+		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
+
+		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
+			goto trace;
+		/*
+		 * Fall back to i2c_smbus_xfer_emulated if the adapter doesn't
+		 * implement native support for the SMBus operation.
+		 */
+	}
+
+	res = i2c_smbus_xfer_emulated(adapter, addr, flags, read_write,
+				      command, protocol, data);
+
+trace:
+	/* If enabled, the reply tracepoint is conditional on read_write. */
+	trace_smbus_reply(adapter, addr, flags, read_write,
+			  command, protocol, data);
+	trace_smbus_result(adapter, addr, flags, read_write,
+			   command, protocol, res);
+
+	return res;
+}
+EXPORT_SYMBOL(i2c_smbus_xfer);
+
+/**
+ * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @length: Size of data block; SMBus allows at most I2C_SMBUS_BLOCK_MAX bytes
+ * @values: Byte array into which data will be read; big enough to hold
+ *	the data returned by the slave.  SMBus allows at most
+ *	I2C_SMBUS_BLOCK_MAX bytes.
+ *
+ * This executes the SMBus "block read" protocol if supported by the adapter.
+ * If block read is not supported, it emulates it using either word or byte
+ * read protocols depending on availability.
+ *
+ * The addresses of the I2C slave device that are accessed with this function
+ * must be mapped to a linear region, so that a block read will have the same
+ * effect as a byte read. Before using this function you must double-check
+ * if the I2C slave does support exchanging a block transfer with a byte
+ * transfer.
+ */
+s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
+					      u8 command, u8 length, u8 *values)
+{
+	u8 i = 0;
+	int status;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
+		return i2c_smbus_read_i2c_block_data(client, command, length, values);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA))
+		return -EOPNOTSUPP;
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)) {
+		while ((i + 2) <= length) {
+			status = i2c_smbus_read_word_data(client, command + i);
+			if (status < 0)
+				return status;
+			values[i] = status & 0xff;
+			values[i + 1] = status >> 8;
+			i += 2;
+		}
+	}
+
+	while (i < length) {
+		status = i2c_smbus_read_byte_data(client, command + i);
+		if (status < 0)
+			return status;
+		values[i] = status;
+		i++;
+	}
+
+	return i;
+}
+EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data_or_emulated);
diff --git a/include/trace/events/i2c.h b/include/trace/events/i2c.h
index 4abb8eab34d31..86a401190df9d 100644
--- a/include/trace/events/i2c.h
+++ b/include/trace/events/i2c.h
@@ -1,4 +1,4 @@
-/* I2C and SMBUS message transfer tracepoints
+/* I2C message transfer tracepoints
  *
  * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -18,7 +18,7 @@
 #include <linux/tracepoint.h>
 
 /*
- * drivers/i2c/i2c-core.c
+ * drivers/i2c/i2c-core-base.c
  */
 extern int i2c_transfer_trace_reg(void);
 extern void i2c_transfer_trace_unreg(void);
@@ -144,228 +144,6 @@ TRACE_EVENT_FN(i2c_result,
 	       i2c_transfer_trace_reg,
 	       i2c_transfer_trace_unreg);
 
-/*
- * i2c_smbus_xfer() write data or procedure call request
- */
-TRACE_EVENT_CONDITION(smbus_write,
-	TP_PROTO(const struct i2c_adapter *adap,
-		 u16 addr, unsigned short flags,
-		 char read_write, u8 command, int protocol,
-		 const union i2c_smbus_data *data),
-	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
-	TP_CONDITION(read_write == I2C_SMBUS_WRITE ||
-		     protocol == I2C_SMBUS_PROC_CALL ||
-		     protocol == I2C_SMBUS_BLOCK_PROC_CALL),
-	TP_STRUCT__entry(
-		__field(int,	adapter_nr		)
-		__field(__u16,	addr			)
-		__field(__u16,	flags			)
-		__field(__u8,	command			)
-		__field(__u8,	len			)
-		__field(__u32,	protocol		)
-		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
-	TP_fast_assign(
-		__entry->adapter_nr = adap->nr;
-		__entry->addr = addr;
-		__entry->flags = flags;
-		__entry->command = command;
-		__entry->protocol = protocol;
-
-		switch (protocol) {
-		case I2C_SMBUS_BYTE_DATA:
-			__entry->len = 1;
-			goto copy;
-		case I2C_SMBUS_WORD_DATA:
-		case I2C_SMBUS_PROC_CALL:
-			__entry->len = 2;
-			goto copy;
-		case I2C_SMBUS_BLOCK_DATA:
-		case I2C_SMBUS_BLOCK_PROC_CALL:
-		case I2C_SMBUS_I2C_BLOCK_DATA:
-			__entry->len = data->block[0] + 1;
-		copy:
-			memcpy(__entry->buf, data->block, __entry->len);
-			break;
-		case I2C_SMBUS_QUICK:
-		case I2C_SMBUS_BYTE:
-		case I2C_SMBUS_I2C_BLOCK_BROKEN:
-		default:
-			__entry->len = 0;
-		}
-		       ),
-	TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]",
-		  __entry->adapter_nr,
-		  __entry->addr,
-		  __entry->flags,
-		  __entry->command,
-		  __print_symbolic(__entry->protocol,
-				   { I2C_SMBUS_QUICK,		"QUICK"	},
-				   { I2C_SMBUS_BYTE,		"BYTE"	},
-				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
-				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
-				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
-				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
-				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
-				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
-				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
-		  __entry->len,
-		  __entry->len, __entry->buf
-		  ));
-
-/*
- * i2c_smbus_xfer() read data request
- */
-TRACE_EVENT_CONDITION(smbus_read,
-	TP_PROTO(const struct i2c_adapter *adap,
-		 u16 addr, unsigned short flags,
-		 char read_write, u8 command, int protocol),
-	TP_ARGS(adap, addr, flags, read_write, command, protocol),
-	TP_CONDITION(!(read_write == I2C_SMBUS_WRITE ||
-		       protocol == I2C_SMBUS_PROC_CALL ||
-		       protocol == I2C_SMBUS_BLOCK_PROC_CALL)),
-	TP_STRUCT__entry(
-		__field(int,	adapter_nr		)
-		__field(__u16,	flags			)
-		__field(__u16,	addr			)
-		__field(__u8,	command			)
-		__field(__u32,	protocol		)
-		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
-	TP_fast_assign(
-		__entry->adapter_nr = adap->nr;
-		__entry->addr = addr;
-		__entry->flags = flags;
-		__entry->command = command;
-		__entry->protocol = protocol;
-		       ),
-	TP_printk("i2c-%d a=%03x f=%04x c=%x %s",
-		  __entry->adapter_nr,
-		  __entry->addr,
-		  __entry->flags,
-		  __entry->command,
-		  __print_symbolic(__entry->protocol,
-				   { I2C_SMBUS_QUICK,		"QUICK"	},
-				   { I2C_SMBUS_BYTE,		"BYTE"	},
-				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
-				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
-				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
-				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
-				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
-				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
-				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" })
-		  ));
-
-/*
- * i2c_smbus_xfer() read data or procedure call reply
- */
-TRACE_EVENT_CONDITION(smbus_reply,
-	TP_PROTO(const struct i2c_adapter *adap,
-		 u16 addr, unsigned short flags,
-		 char read_write, u8 command, int protocol,
-		 const union i2c_smbus_data *data),
-	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
-	TP_CONDITION(read_write == I2C_SMBUS_READ),
-	TP_STRUCT__entry(
-		__field(int,	adapter_nr		)
-		__field(__u16,	addr			)
-		__field(__u16,	flags			)
-		__field(__u8,	command			)
-		__field(__u8,	len			)
-		__field(__u32,	protocol		)
-		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
-	TP_fast_assign(
-		__entry->adapter_nr = adap->nr;
-		__entry->addr = addr;
-		__entry->flags = flags;
-		__entry->command = command;
-		__entry->protocol = protocol;
-
-		switch (protocol) {
-		case I2C_SMBUS_BYTE:
-		case I2C_SMBUS_BYTE_DATA:
-			__entry->len = 1;
-			goto copy;
-		case I2C_SMBUS_WORD_DATA:
-		case I2C_SMBUS_PROC_CALL:
-			__entry->len = 2;
-			goto copy;
-		case I2C_SMBUS_BLOCK_DATA:
-		case I2C_SMBUS_BLOCK_PROC_CALL:
-		case I2C_SMBUS_I2C_BLOCK_DATA:
-			__entry->len = data->block[0] + 1;
-		copy:
-			memcpy(__entry->buf, data->block, __entry->len);
-			break;
-		case I2C_SMBUS_QUICK:
-		case I2C_SMBUS_I2C_BLOCK_BROKEN:
-		default:
-			__entry->len = 0;
-		}
-		       ),
-	TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]",
-		  __entry->adapter_nr,
-		  __entry->addr,
-		  __entry->flags,
-		  __entry->command,
-		  __print_symbolic(__entry->protocol,
-				   { I2C_SMBUS_QUICK,		"QUICK"	},
-				   { I2C_SMBUS_BYTE,		"BYTE"	},
-				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
-				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
-				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
-				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
-				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
-				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
-				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
-		  __entry->len,
-		  __entry->len, __entry->buf
-		  ));
-
-/*
- * i2c_smbus_xfer() result
- */
-TRACE_EVENT(smbus_result,
-	    TP_PROTO(const struct i2c_adapter *adap,
-		     u16 addr, unsigned short flags,
-		     char read_write, u8 command, int protocol,
-		     int res),
-	    TP_ARGS(adap, addr, flags, read_write, command, protocol, res),
-	    TP_STRUCT__entry(
-		    __field(int,	adapter_nr		)
-		    __field(__u16,	addr			)
-		    __field(__u16,	flags			)
-		    __field(__u8,	read_write		)
-		    __field(__u8,	command			)
-		    __field(__s16,	res			)
-		    __field(__u32,	protocol		)
-			     ),
-	    TP_fast_assign(
-		    __entry->adapter_nr = adap->nr;
-		    __entry->addr = addr;
-		    __entry->flags = flags;
-		    __entry->read_write = read_write;
-		    __entry->command = command;
-		    __entry->protocol = protocol;
-		    __entry->res = res;
-			   ),
-	    TP_printk("i2c-%d a=%03x f=%04x c=%x %s %s res=%d",
-		      __entry->adapter_nr,
-		      __entry->addr,
-		      __entry->flags,
-		      __entry->command,
-		      __print_symbolic(__entry->protocol,
-				       { I2C_SMBUS_QUICK,		"QUICK"	},
-				       { I2C_SMBUS_BYTE,		"BYTE"	},
-				       { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
-				       { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
-				       { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
-				       { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
-				       { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
-				       { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
-				       { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
-		      __entry->read_write == I2C_SMBUS_WRITE ? "wr" : "rd",
-		      __entry->res
-		      ));
-
 #endif /* _TRACE_I2C_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/smbus.h b/include/trace/events/smbus.h
new file mode 100644
index 0000000000000..d2fb6e1d3e10a
--- /dev/null
+++ b/include/trace/events/smbus.h
@@ -0,0 +1,249 @@
+/* SMBUS message transfer tracepoints
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM smbus
+
+#if !defined(_TRACE_SMBUS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SMBUS_H
+
+#include <linux/i2c.h>
+#include <linux/tracepoint.h>
+
+/*
+ * drivers/i2c/i2c-core-smbus.c
+ */
+
+/*
+ * i2c_smbus_xfer() write data or procedure call request
+ */
+TRACE_EVENT_CONDITION(smbus_write,
+	TP_PROTO(const struct i2c_adapter *adap,
+		 u16 addr, unsigned short flags,
+		 char read_write, u8 command, int protocol,
+		 const union i2c_smbus_data *data),
+	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
+	TP_CONDITION(read_write == I2C_SMBUS_WRITE ||
+		     protocol == I2C_SMBUS_PROC_CALL ||
+		     protocol == I2C_SMBUS_BLOCK_PROC_CALL),
+	TP_STRUCT__entry(
+		__field(int,	adapter_nr		)
+		__field(__u16,	addr			)
+		__field(__u16,	flags			)
+		__field(__u8,	command			)
+		__field(__u8,	len			)
+		__field(__u32,	protocol		)
+		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
+	TP_fast_assign(
+		__entry->adapter_nr = adap->nr;
+		__entry->addr = addr;
+		__entry->flags = flags;
+		__entry->command = command;
+		__entry->protocol = protocol;
+
+		switch (protocol) {
+		case I2C_SMBUS_BYTE_DATA:
+			__entry->len = 1;
+			goto copy;
+		case I2C_SMBUS_WORD_DATA:
+		case I2C_SMBUS_PROC_CALL:
+			__entry->len = 2;
+			goto copy;
+		case I2C_SMBUS_BLOCK_DATA:
+		case I2C_SMBUS_BLOCK_PROC_CALL:
+		case I2C_SMBUS_I2C_BLOCK_DATA:
+			__entry->len = data->block[0] + 1;
+		copy:
+			memcpy(__entry->buf, data->block, __entry->len);
+			break;
+		case I2C_SMBUS_QUICK:
+		case I2C_SMBUS_BYTE:
+		case I2C_SMBUS_I2C_BLOCK_BROKEN:
+		default:
+			__entry->len = 0;
+		}
+		       ),
+	TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]",
+		  __entry->adapter_nr,
+		  __entry->addr,
+		  __entry->flags,
+		  __entry->command,
+		  __print_symbolic(__entry->protocol,
+				   { I2C_SMBUS_QUICK,		"QUICK"	},
+				   { I2C_SMBUS_BYTE,		"BYTE"	},
+				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
+				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
+				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
+				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
+				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
+				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
+				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
+		  __entry->len,
+		  __entry->len, __entry->buf
+		  ));
+
+/*
+ * i2c_smbus_xfer() read data request
+ */
+TRACE_EVENT_CONDITION(smbus_read,
+	TP_PROTO(const struct i2c_adapter *adap,
+		 u16 addr, unsigned short flags,
+		 char read_write, u8 command, int protocol),
+	TP_ARGS(adap, addr, flags, read_write, command, protocol),
+	TP_CONDITION(!(read_write == I2C_SMBUS_WRITE ||
+		       protocol == I2C_SMBUS_PROC_CALL ||
+		       protocol == I2C_SMBUS_BLOCK_PROC_CALL)),
+	TP_STRUCT__entry(
+		__field(int,	adapter_nr		)
+		__field(__u16,	flags			)
+		__field(__u16,	addr			)
+		__field(__u8,	command			)
+		__field(__u32,	protocol		)
+		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
+	TP_fast_assign(
+		__entry->adapter_nr = adap->nr;
+		__entry->addr = addr;
+		__entry->flags = flags;
+		__entry->command = command;
+		__entry->protocol = protocol;
+		       ),
+	TP_printk("i2c-%d a=%03x f=%04x c=%x %s",
+		  __entry->adapter_nr,
+		  __entry->addr,
+		  __entry->flags,
+		  __entry->command,
+		  __print_symbolic(__entry->protocol,
+				   { I2C_SMBUS_QUICK,		"QUICK"	},
+				   { I2C_SMBUS_BYTE,		"BYTE"	},
+				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
+				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
+				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
+				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
+				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
+				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
+				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" })
+		  ));
+
+/*
+ * i2c_smbus_xfer() read data or procedure call reply
+ */
+TRACE_EVENT_CONDITION(smbus_reply,
+	TP_PROTO(const struct i2c_adapter *adap,
+		 u16 addr, unsigned short flags,
+		 char read_write, u8 command, int protocol,
+		 const union i2c_smbus_data *data),
+	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
+	TP_CONDITION(read_write == I2C_SMBUS_READ),
+	TP_STRUCT__entry(
+		__field(int,	adapter_nr		)
+		__field(__u16,	addr			)
+		__field(__u16,	flags			)
+		__field(__u8,	command			)
+		__field(__u8,	len			)
+		__field(__u32,	protocol		)
+		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
+	TP_fast_assign(
+		__entry->adapter_nr = adap->nr;
+		__entry->addr = addr;
+		__entry->flags = flags;
+		__entry->command = command;
+		__entry->protocol = protocol;
+
+		switch (protocol) {
+		case I2C_SMBUS_BYTE:
+		case I2C_SMBUS_BYTE_DATA:
+			__entry->len = 1;
+			goto copy;
+		case I2C_SMBUS_WORD_DATA:
+		case I2C_SMBUS_PROC_CALL:
+			__entry->len = 2;
+			goto copy;
+		case I2C_SMBUS_BLOCK_DATA:
+		case I2C_SMBUS_BLOCK_PROC_CALL:
+		case I2C_SMBUS_I2C_BLOCK_DATA:
+			__entry->len = data->block[0] + 1;
+		copy:
+			memcpy(__entry->buf, data->block, __entry->len);
+			break;
+		case I2C_SMBUS_QUICK:
+		case I2C_SMBUS_I2C_BLOCK_BROKEN:
+		default:
+			__entry->len = 0;
+		}
+		       ),
+	TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]",
+		  __entry->adapter_nr,
+		  __entry->addr,
+		  __entry->flags,
+		  __entry->command,
+		  __print_symbolic(__entry->protocol,
+				   { I2C_SMBUS_QUICK,		"QUICK"	},
+				   { I2C_SMBUS_BYTE,		"BYTE"	},
+				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
+				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
+				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
+				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
+				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
+				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
+				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
+		  __entry->len,
+		  __entry->len, __entry->buf
+		  ));
+
+/*
+ * i2c_smbus_xfer() result
+ */
+TRACE_EVENT(smbus_result,
+	    TP_PROTO(const struct i2c_adapter *adap,
+		     u16 addr, unsigned short flags,
+		     char read_write, u8 command, int protocol,
+		     int res),
+	    TP_ARGS(adap, addr, flags, read_write, command, protocol, res),
+	    TP_STRUCT__entry(
+		    __field(int,	adapter_nr		)
+		    __field(__u16,	addr			)
+		    __field(__u16,	flags			)
+		    __field(__u8,	read_write		)
+		    __field(__u8,	command			)
+		    __field(__s16,	res			)
+		    __field(__u32,	protocol		)
+			     ),
+	    TP_fast_assign(
+		    __entry->adapter_nr = adap->nr;
+		    __entry->addr = addr;
+		    __entry->flags = flags;
+		    __entry->read_write = read_write;
+		    __entry->command = command;
+		    __entry->protocol = protocol;
+		    __entry->res = res;
+			   ),
+	    TP_printk("i2c-%d a=%03x f=%04x c=%x %s %s res=%d",
+		      __entry->adapter_nr,
+		      __entry->addr,
+		      __entry->flags,
+		      __entry->command,
+		      __print_symbolic(__entry->protocol,
+				       { I2C_SMBUS_QUICK,		"QUICK"	},
+				       { I2C_SMBUS_BYTE,		"BYTE"	},
+				       { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
+				       { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
+				       { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
+				       { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
+				       { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
+				       { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
+				       { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
+		      __entry->read_write == I2C_SMBUS_WRITE ? "wr" : "rd",
+		      __entry->res
+		      ));
+
+#endif /* _TRACE_SMBUS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
GitLab


From 5bf4fa7daea6d5257357b613d0bb81c68e2d1af2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 23 May 2017 11:50:58 +0200
Subject: [PATCH 0134/1958] i2c: break out OF support into separate file

Also removes some ifdeffery.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/Makefile        |   1 +
 drivers/i2c/i2c-core-base.c | 265 +---------------------------------
 drivers/i2c/i2c-core-of.c   | 276 ++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core.h      |   8 ++
 4 files changed, 286 insertions(+), 264 deletions(-)
 create mode 100644 drivers/i2c/i2c-core-of.c

diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index a6a90fe2db887..189e0e6476f0a 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2c-core.o
 i2c-core-objs 			:= i2c-core-base.o i2c-core-smbus.o
 i2c-core-$(CONFIG_I2C_SLAVE) 	+= i2c-core-slave.o
+i2c-core-$(CONFIG_OF) 		+= i2c-core-of.o
 
 obj-$(CONFIG_I2C_SMBUS)		+= i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 70fc4624c69c2..461451da10650 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -16,9 +16,6 @@
 /* With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi>.
    Mux support by Rodolfo Giometti <giometti@enneenne.com> and
    Michael Lawnick <michael.lawnick.ext@nsn.com>
-   OF support is copyright (c) 2008 Jochen Friedrich <jochen@scram.de>
-   (based on a previous patch from Jon Smirl <jonsmirl@gmail.com>) and
-   (c) 2013  Wolfram Sang <wsa@the-dreams.de>
    I2C ACPI code Copyright (C) 2014 Intel Corp
    Author: Lan Tianyu <tianyu.lan@intel.com>
  */
@@ -1191,7 +1188,7 @@ static unsigned short i2c_encode_flags_to_addr(struct i2c_client *client)
 
 /* This is a permissive address validity check, I2C address map constraints
  * are purposely not enforced, except for the general call address. */
-static int i2c_check_addr_validity(unsigned addr, unsigned short flags)
+int i2c_check_addr_validity(unsigned addr, unsigned short flags)
 {
 	if (flags & I2C_CLIENT_TEN) {
 		/* 10-bit address, all values are valid */
@@ -1760,210 +1757,6 @@ static void i2c_scan_static_board_info(struct i2c_adapter *adapter)
 	up_read(&__i2c_board_lock);
 }
 
-/* OF support code */
-
-#if IS_ENABLED(CONFIG_OF)
-static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
-						 struct device_node *node)
-{
-	struct i2c_client *result;
-	struct i2c_board_info info = {};
-	struct dev_archdata dev_ad = {};
-	const __be32 *addr_be;
-	u32 addr;
-	int len;
-
-	dev_dbg(&adap->dev, "of_i2c: register %s\n", node->full_name);
-
-	if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) {
-		dev_err(&adap->dev, "of_i2c: modalias failure on %s\n",
-			node->full_name);
-		return ERR_PTR(-EINVAL);
-	}
-
-	addr_be = of_get_property(node, "reg", &len);
-	if (!addr_be || (len < sizeof(*addr_be))) {
-		dev_err(&adap->dev, "of_i2c: invalid reg on %s\n",
-			node->full_name);
-		return ERR_PTR(-EINVAL);
-	}
-
-	addr = be32_to_cpup(addr_be);
-	if (addr & I2C_TEN_BIT_ADDRESS) {
-		addr &= ~I2C_TEN_BIT_ADDRESS;
-		info.flags |= I2C_CLIENT_TEN;
-	}
-
-	if (addr & I2C_OWN_SLAVE_ADDRESS) {
-		addr &= ~I2C_OWN_SLAVE_ADDRESS;
-		info.flags |= I2C_CLIENT_SLAVE;
-	}
-
-	if (i2c_check_addr_validity(addr, info.flags)) {
-		dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n",
-			addr, node->full_name);
-		return ERR_PTR(-EINVAL);
-	}
-
-	info.addr = addr;
-	info.of_node = of_node_get(node);
-	info.archdata = &dev_ad;
-
-	if (of_property_read_bool(node, "host-notify"))
-		info.flags |= I2C_CLIENT_HOST_NOTIFY;
-
-	if (of_get_property(node, "wakeup-source", NULL))
-		info.flags |= I2C_CLIENT_WAKE;
-
-	result = i2c_new_device(adap, &info);
-	if (result == NULL) {
-		dev_err(&adap->dev, "of_i2c: Failure registering %s\n",
-			node->full_name);
-		of_node_put(node);
-		return ERR_PTR(-EINVAL);
-	}
-	return result;
-}
-
-static void of_i2c_register_devices(struct i2c_adapter *adap)
-{
-	struct device_node *bus, *node;
-	struct i2c_client *client;
-
-	/* Only register child devices if the adapter has a node pointer set */
-	if (!adap->dev.of_node)
-		return;
-
-	dev_dbg(&adap->dev, "of_i2c: walking child nodes\n");
-
-	bus = of_get_child_by_name(adap->dev.of_node, "i2c-bus");
-	if (!bus)
-		bus = of_node_get(adap->dev.of_node);
-
-	for_each_available_child_of_node(bus, node) {
-		if (of_node_test_and_set_flag(node, OF_POPULATED))
-			continue;
-
-		client = of_i2c_register_device(adap, node);
-		if (IS_ERR(client)) {
-			dev_warn(&adap->dev,
-				 "Failed to create I2C device for %s\n",
-				 node->full_name);
-			of_node_clear_flag(node, OF_POPULATED);
-		}
-	}
-
-	of_node_put(bus);
-}
-
-static int of_dev_node_match(struct device *dev, void *data)
-{
-	return dev->of_node == data;
-}
-
-/* must call put_device() when done with returned i2c_client device */
-struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
-{
-	struct device *dev;
-	struct i2c_client *client;
-
-	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
-	if (!dev)
-		return NULL;
-
-	client = i2c_verify_client(dev);
-	if (!client)
-		put_device(dev);
-
-	return client;
-}
-EXPORT_SYMBOL(of_find_i2c_device_by_node);
-
-/* must call put_device() when done with returned i2c_adapter device */
-struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
-{
-	struct device *dev;
-	struct i2c_adapter *adapter;
-
-	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
-	if (!dev)
-		return NULL;
-
-	adapter = i2c_verify_adapter(dev);
-	if (!adapter)
-		put_device(dev);
-
-	return adapter;
-}
-EXPORT_SYMBOL(of_find_i2c_adapter_by_node);
-
-/* must call i2c_put_adapter() when done with returned i2c_adapter device */
-struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node)
-{
-	struct i2c_adapter *adapter;
-
-	adapter = of_find_i2c_adapter_by_node(node);
-	if (!adapter)
-		return NULL;
-
-	if (!try_module_get(adapter->owner)) {
-		put_device(&adapter->dev);
-		adapter = NULL;
-	}
-
-	return adapter;
-}
-EXPORT_SYMBOL(of_get_i2c_adapter_by_node);
-
-static const struct of_device_id*
-i2c_of_match_device_sysfs(const struct of_device_id *matches,
-				  struct i2c_client *client)
-{
-	const char *name;
-
-	for (; matches->compatible[0]; matches++) {
-		/*
-		 * Adding devices through the i2c sysfs interface provides us
-		 * a string to match which may be compatible with the device
-		 * tree compatible strings, however with no actual of_node the
-		 * of_match_device() will not match
-		 */
-		if (sysfs_streq(client->name, matches->compatible))
-			return matches;
-
-		name = strchr(matches->compatible, ',');
-		if (!name)
-			name = matches->compatible;
-		else
-			name++;
-
-		if (sysfs_streq(client->name, name))
-			return matches;
-	}
-
-	return NULL;
-}
-
-const struct of_device_id
-*i2c_of_match_device(const struct of_device_id *matches,
-		     struct i2c_client *client)
-{
-	const struct of_device_id *match;
-
-	if (!(client && matches))
-		return NULL;
-
-	match = of_match_device(matches, &client->dev);
-	if (match)
-		return match;
-
-	return i2c_of_match_device_sysfs(matches, client);
-}
-EXPORT_SYMBOL_GPL(i2c_of_match_device);
-#else
-static void of_i2c_register_devices(struct i2c_adapter *adap) { }
-#endif /* CONFIG_OF */
-
 static int i2c_do_add_adapter(struct i2c_driver *driver,
 			      struct i2c_adapter *adap)
 {
@@ -2558,62 +2351,6 @@ void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg)
 }
 EXPORT_SYMBOL(i2c_clients_command);
 
-#if IS_ENABLED(CONFIG_OF_DYNAMIC)
-static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
-			 void *arg)
-{
-	struct of_reconfig_data *rd = arg;
-	struct i2c_adapter *adap;
-	struct i2c_client *client;
-
-	switch (of_reconfig_get_state_change(action, rd)) {
-	case OF_RECONFIG_CHANGE_ADD:
-		adap = of_find_i2c_adapter_by_node(rd->dn->parent);
-		if (adap == NULL)
-			return NOTIFY_OK;	/* not for us */
-
-		if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) {
-			put_device(&adap->dev);
-			return NOTIFY_OK;
-		}
-
-		client = of_i2c_register_device(adap, rd->dn);
-		put_device(&adap->dev);
-
-		if (IS_ERR(client)) {
-			dev_err(&adap->dev, "failed to create client for '%s'\n",
-				 rd->dn->full_name);
-			of_node_clear_flag(rd->dn, OF_POPULATED);
-			return notifier_from_errno(PTR_ERR(client));
-		}
-		break;
-	case OF_RECONFIG_CHANGE_REMOVE:
-		/* already depopulated? */
-		if (!of_node_check_flag(rd->dn, OF_POPULATED))
-			return NOTIFY_OK;
-
-		/* find our device by node */
-		client = of_find_i2c_device_by_node(rd->dn);
-		if (client == NULL)
-			return NOTIFY_OK;	/* no? not meant for us */
-
-		/* unregister takes one ref away */
-		i2c_unregister_device(client);
-
-		/* and put the reference of the find */
-		put_device(&client->dev);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-static struct notifier_block i2c_of_notifier = {
-	.notifier_call = of_i2c_notify,
-};
-#else
-extern struct notifier_block i2c_of_notifier;
-#endif /* CONFIG_OF_DYNAMIC */
-
 static int __init i2c_init(void)
 {
 	int retval;
diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
new file mode 100644
index 0000000000000..ccf82fdbcd8e7
--- /dev/null
+++ b/drivers/i2c/i2c-core-of.c
@@ -0,0 +1,276 @@
+/*
+ * Linux I2C core OF support code
+ *
+ * Copyright (C) 2008 Jochen Friedrich <jochen@scram.de>
+ * based on a previous patch from Jon Smirl <jonsmirl@gmail.com>
+ *
+ * Copyright (C) 2013 Wolfram Sang <wsa@the-dreams.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <dt-bindings/i2c/i2c.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "i2c-core.h"
+
+static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
+						 struct device_node *node)
+{
+	struct i2c_client *result;
+	struct i2c_board_info info = {};
+	struct dev_archdata dev_ad = {};
+	const __be32 *addr_be;
+	u32 addr;
+	int len;
+
+	dev_dbg(&adap->dev, "of_i2c: register %s\n", node->full_name);
+
+	if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) {
+		dev_err(&adap->dev, "of_i2c: modalias failure on %s\n",
+			node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	addr_be = of_get_property(node, "reg", &len);
+	if (!addr_be || (len < sizeof(*addr_be))) {
+		dev_err(&adap->dev, "of_i2c: invalid reg on %s\n",
+			node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	addr = be32_to_cpup(addr_be);
+	if (addr & I2C_TEN_BIT_ADDRESS) {
+		addr &= ~I2C_TEN_BIT_ADDRESS;
+		info.flags |= I2C_CLIENT_TEN;
+	}
+
+	if (addr & I2C_OWN_SLAVE_ADDRESS) {
+		addr &= ~I2C_OWN_SLAVE_ADDRESS;
+		info.flags |= I2C_CLIENT_SLAVE;
+	}
+
+	if (i2c_check_addr_validity(addr, info.flags)) {
+		dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n",
+			addr, node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	info.addr = addr;
+	info.of_node = of_node_get(node);
+	info.archdata = &dev_ad;
+
+	if (of_property_read_bool(node, "host-notify"))
+		info.flags |= I2C_CLIENT_HOST_NOTIFY;
+
+	if (of_get_property(node, "wakeup-source", NULL))
+		info.flags |= I2C_CLIENT_WAKE;
+
+	result = i2c_new_device(adap, &info);
+	if (result == NULL) {
+		dev_err(&adap->dev, "of_i2c: Failure registering %s\n",
+			node->full_name);
+		of_node_put(node);
+		return ERR_PTR(-EINVAL);
+	}
+	return result;
+}
+
+void of_i2c_register_devices(struct i2c_adapter *adap)
+{
+	struct device_node *bus, *node;
+	struct i2c_client *client;
+
+	/* Only register child devices if the adapter has a node pointer set */
+	if (!adap->dev.of_node)
+		return;
+
+	dev_dbg(&adap->dev, "of_i2c: walking child nodes\n");
+
+	bus = of_get_child_by_name(adap->dev.of_node, "i2c-bus");
+	if (!bus)
+		bus = of_node_get(adap->dev.of_node);
+
+	for_each_available_child_of_node(bus, node) {
+		if (of_node_test_and_set_flag(node, OF_POPULATED))
+			continue;
+
+		client = of_i2c_register_device(adap, node);
+		if (IS_ERR(client)) {
+			dev_warn(&adap->dev,
+				 "Failed to create I2C device for %s\n",
+				 node->full_name);
+			of_node_clear_flag(node, OF_POPULATED);
+		}
+	}
+
+	of_node_put(bus);
+}
+
+static int of_dev_node_match(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+/* must call put_device() when done with returned i2c_client device */
+struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
+{
+	struct device *dev;
+	struct i2c_client *client;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
+	if (!dev)
+		return NULL;
+
+	client = i2c_verify_client(dev);
+	if (!client)
+		put_device(dev);
+
+	return client;
+}
+EXPORT_SYMBOL(of_find_i2c_device_by_node);
+
+/* must call put_device() when done with returned i2c_adapter device */
+struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
+{
+	struct device *dev;
+	struct i2c_adapter *adapter;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
+	if (!dev)
+		return NULL;
+
+	adapter = i2c_verify_adapter(dev);
+	if (!adapter)
+		put_device(dev);
+
+	return adapter;
+}
+EXPORT_SYMBOL(of_find_i2c_adapter_by_node);
+
+/* must call i2c_put_adapter() when done with returned i2c_adapter device */
+struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node)
+{
+	struct i2c_adapter *adapter;
+
+	adapter = of_find_i2c_adapter_by_node(node);
+	if (!adapter)
+		return NULL;
+
+	if (!try_module_get(adapter->owner)) {
+		put_device(&adapter->dev);
+		adapter = NULL;
+	}
+
+	return adapter;
+}
+EXPORT_SYMBOL(of_get_i2c_adapter_by_node);
+
+static const struct of_device_id*
+i2c_of_match_device_sysfs(const struct of_device_id *matches,
+				  struct i2c_client *client)
+{
+	const char *name;
+
+	for (; matches->compatible[0]; matches++) {
+		/*
+		 * Adding devices through the i2c sysfs interface provides us
+		 * a string to match which may be compatible with the device
+		 * tree compatible strings, however with no actual of_node the
+		 * of_match_device() will not match
+		 */
+		if (sysfs_streq(client->name, matches->compatible))
+			return matches;
+
+		name = strchr(matches->compatible, ',');
+		if (!name)
+			name = matches->compatible;
+		else
+			name++;
+
+		if (sysfs_streq(client->name, name))
+			return matches;
+	}
+
+	return NULL;
+}
+
+const struct of_device_id
+*i2c_of_match_device(const struct of_device_id *matches,
+		     struct i2c_client *client)
+{
+	const struct of_device_id *match;
+
+	if (!(client && matches))
+		return NULL;
+
+	match = of_match_device(matches, &client->dev);
+	if (match)
+		return match;
+
+	return i2c_of_match_device_sysfs(matches, client);
+}
+EXPORT_SYMBOL_GPL(i2c_of_match_device);
+
+#if IS_ENABLED(CONFIG_OF_DYNAMIC)
+static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
+			 void *arg)
+{
+	struct of_reconfig_data *rd = arg;
+	struct i2c_adapter *adap;
+	struct i2c_client *client;
+
+	switch (of_reconfig_get_state_change(action, rd)) {
+	case OF_RECONFIG_CHANGE_ADD:
+		adap = of_find_i2c_adapter_by_node(rd->dn->parent);
+		if (adap == NULL)
+			return NOTIFY_OK;	/* not for us */
+
+		if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) {
+			put_device(&adap->dev);
+			return NOTIFY_OK;
+		}
+
+		client = of_i2c_register_device(adap, rd->dn);
+		put_device(&adap->dev);
+
+		if (IS_ERR(client)) {
+			dev_err(&adap->dev, "failed to create client for '%s'\n",
+				 rd->dn->full_name);
+			of_node_clear_flag(rd->dn, OF_POPULATED);
+			return notifier_from_errno(PTR_ERR(client));
+		}
+		break;
+	case OF_RECONFIG_CHANGE_REMOVE:
+		/* already depopulated? */
+		if (!of_node_check_flag(rd->dn, OF_POPULATED))
+			return NOTIFY_OK;
+
+		/* find our device by node */
+		client = of_find_i2c_device_by_node(rd->dn);
+		if (client == NULL)
+			return NOTIFY_OK;	/* no? not meant for us */
+
+		/* unregister takes one ref away */
+		i2c_unregister_device(client);
+
+		/* and put the reference of the find */
+		put_device(&client->dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+struct notifier_block i2c_of_notifier = {
+	.notifier_call = of_i2c_notify,
+};
+#endif /* CONFIG_OF_DYNAMIC */
diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h
index 77c22b03ff951..22151c88e8859 100644
--- a/drivers/i2c/i2c-core.h
+++ b/drivers/i2c/i2c-core.h
@@ -27,4 +27,12 @@ extern struct rw_semaphore	__i2c_board_lock;
 extern struct list_head	__i2c_board_list;
 extern int		__i2c_first_dynamic_bus_num;
 
+int i2c_check_addr_validity(unsigned addr, unsigned short flags);
 int i2c_check_7bit_addr_validity_strict(unsigned short addr);
+
+#ifdef CONFIG_OF
+void of_i2c_register_devices(struct i2c_adapter *adap);
+#else
+static inline void of_i2c_register_devices(struct i2c_adapter *adap) { }
+#endif
+extern struct notifier_block i2c_of_notifier;
-- 
GitLab


From 53f8f7c5cf145d639ebd6d13cfdf2e3e9764add3 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 23 May 2017 16:22:23 +0200
Subject: [PATCH 0135/1958] i2c: break out ACPI support into separate file

Removes some ifdeffery. Also add the new file to the relevant
MAINTAINERS section.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS                 |   1 +
 drivers/i2c/Makefile        |   1 +
 drivers/i2c/i2c-core-acpi.c | 653 ++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core-base.c | 648 -----------------------------------
 drivers/i2c/i2c-core.h      |  15 +
 5 files changed, 670 insertions(+), 648 deletions(-)
 create mode 100644 drivers/i2c/i2c-core-acpi.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 053c3bdd1fe51..34b0324d53c5c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6273,6 +6273,7 @@ M:	Mika Westerberg <mika.westerberg@linux.intel.com>
 L:	linux-i2c@vger.kernel.org
 L:	linux-acpi@vger.kernel.org
 S:	Maintained
+F:	drivers/i2c/i2c-core-acpi.c
 
 I2C-TAOS-EVM DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index 189e0e6476f0a..7bb65a4369e1e 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -5,6 +5,7 @@
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2c-core.o
 i2c-core-objs 			:= i2c-core-base.o i2c-core-smbus.o
+i2c-core-$(CONFIG_ACPI)		+= i2c-core-acpi.o
 i2c-core-$(CONFIG_I2C_SLAVE) 	+= i2c-core-slave.o
 i2c-core-$(CONFIG_OF) 		+= i2c-core-of.o
 
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
new file mode 100644
index 0000000000000..052005579ed62
--- /dev/null
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -0,0 +1,653 @@
+/*
+ * Linux I2C core ACPI support code
+ *
+ * Copyright (C) 2014 Intel Corp, Author: Lan Tianyu <tianyu.lan@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "i2c-core.h"
+
+struct i2c_acpi_handler_data {
+	struct acpi_connection_info info;
+	struct i2c_adapter *adapter;
+};
+
+struct gsb_buffer {
+	u8	status;
+	u8	len;
+	union {
+		u16	wdata;
+		u8	bdata;
+		u8	data[0];
+	};
+} __packed;
+
+struct i2c_acpi_lookup {
+	struct i2c_board_info *info;
+	acpi_handle adapter_handle;
+	acpi_handle device_handle;
+	acpi_handle search_handle;
+	int n;
+	int index;
+	u32 speed;
+	u32 min_speed;
+};
+
+static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data)
+{
+	struct i2c_acpi_lookup *lookup = data;
+	struct i2c_board_info *info = lookup->info;
+	struct acpi_resource_i2c_serialbus *sb;
+	acpi_status status;
+
+	if (info->addr || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+		return 1;
+
+	sb = &ares->data.i2c_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C)
+		return 1;
+
+	if (lookup->index != -1 && lookup->n++ != lookup->index)
+		return 1;
+
+	status = acpi_get_handle(lookup->device_handle,
+				 sb->resource_source.string_ptr,
+				 &lookup->adapter_handle);
+	if (!ACPI_SUCCESS(status))
+		return 1;
+
+	info->addr = sb->slave_address;
+	lookup->speed = sb->connection_speed;
+	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+		info->flags |= I2C_CLIENT_TEN;
+
+	return 1;
+}
+
+static int i2c_acpi_do_lookup(struct acpi_device *adev,
+			      struct i2c_acpi_lookup *lookup)
+{
+	struct i2c_board_info *info = lookup->info;
+	struct list_head resource_list;
+	int ret;
+
+	if (acpi_bus_get_status(adev) || !adev->status.present ||
+	    acpi_device_enumerated(adev))
+		return -EINVAL;
+
+	memset(info, 0, sizeof(*info));
+	lookup->device_handle = acpi_device_handle(adev);
+
+	/* Look up for I2cSerialBus resource */
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     i2c_acpi_fill_info, lookup);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0 || !info->addr)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int i2c_acpi_get_info(struct acpi_device *adev,
+			     struct i2c_board_info *info,
+			     struct i2c_adapter *adapter,
+			     acpi_handle *adapter_handle)
+{
+	struct list_head resource_list;
+	struct resource_entry *entry;
+	struct i2c_acpi_lookup lookup;
+	int ret;
+
+	memset(&lookup, 0, sizeof(lookup));
+	lookup.info = info;
+	lookup.index = -1;
+
+	ret = i2c_acpi_do_lookup(adev, &lookup);
+	if (ret)
+		return ret;
+
+	if (adapter) {
+		/* The adapter must match the one in I2cSerialBus() connector */
+		if (ACPI_HANDLE(&adapter->dev) != lookup.adapter_handle)
+			return -ENODEV;
+	} else {
+		struct acpi_device *adapter_adev;
+
+		/* The adapter must be present */
+		if (acpi_bus_get_device(lookup.adapter_handle, &adapter_adev))
+			return -ENODEV;
+		if (acpi_bus_get_status(adapter_adev) ||
+		    !adapter_adev->status.present)
+			return -ENODEV;
+	}
+
+	info->fwnode = acpi_fwnode_handle(adev);
+	if (adapter_handle)
+		*adapter_handle = lookup.adapter_handle;
+
+	/* Then fill IRQ number if any */
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+	if (ret < 0)
+		return -EINVAL;
+
+	resource_list_for_each_entry(entry, &resource_list) {
+		if (resource_type(entry->res) == IORESOURCE_IRQ) {
+			info->irq = entry->res->start;
+			break;
+		}
+	}
+
+	acpi_dev_free_resource_list(&resource_list);
+
+	acpi_set_modalias(adev, dev_name(&adev->dev), info->type,
+			  sizeof(info->type));
+
+	return 0;
+}
+
+static void i2c_acpi_register_device(struct i2c_adapter *adapter,
+				     struct acpi_device *adev,
+				     struct i2c_board_info *info)
+{
+	adev->power.flags.ignore_parent = true;
+	acpi_device_set_enumerated(adev);
+
+	if (!i2c_new_device(adapter, info)) {
+		adev->power.flags.ignore_parent = false;
+		dev_err(&adapter->dev,
+			"failed to add I2C device %s from ACPI\n",
+			dev_name(&adev->dev));
+	}
+}
+
+static acpi_status i2c_acpi_add_device(acpi_handle handle, u32 level,
+				       void *data, void **return_value)
+{
+	struct i2c_adapter *adapter = data;
+	struct acpi_device *adev;
+	struct i2c_board_info info;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+
+	if (i2c_acpi_get_info(adev, &info, adapter, NULL))
+		return AE_OK;
+
+	i2c_acpi_register_device(adapter, adev, &info);
+
+	return AE_OK;
+}
+
+#define I2C_ACPI_MAX_SCAN_DEPTH 32
+
+/**
+ * i2c_acpi_register_devices - enumerate I2C slave devices behind adapter
+ * @adap: pointer to adapter
+ *
+ * Enumerate all I2C slave devices behind this adapter by walking the ACPI
+ * namespace. When a device is found it will be added to the Linux device
+ * model and bound to the corresponding ACPI handle.
+ */
+void i2c_acpi_register_devices(struct i2c_adapter *adap)
+{
+	acpi_status status;
+
+	if (!has_acpi_companion(&adap->dev))
+		return;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+				     I2C_ACPI_MAX_SCAN_DEPTH,
+				     i2c_acpi_add_device, NULL,
+				     adap, NULL);
+	if (ACPI_FAILURE(status))
+		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
+}
+
+static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
+					   void *data, void **return_value)
+{
+	struct i2c_acpi_lookup *lookup = data;
+	struct acpi_device *adev;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+
+	if (i2c_acpi_do_lookup(adev, lookup))
+		return AE_OK;
+
+	if (lookup->search_handle != lookup->adapter_handle)
+		return AE_OK;
+
+	if (lookup->speed <= lookup->min_speed)
+		lookup->min_speed = lookup->speed;
+
+	return AE_OK;
+}
+
+/**
+ * i2c_acpi_find_bus_speed - find I2C bus speed from ACPI
+ * @dev: The device owning the bus
+ *
+ * Find the I2C bus speed by walking the ACPI namespace for all I2C slaves
+ * devices connected to this bus and use the speed of slowest device.
+ *
+ * Returns the speed in Hz or zero
+ */
+u32 i2c_acpi_find_bus_speed(struct device *dev)
+{
+	struct i2c_acpi_lookup lookup;
+	struct i2c_board_info dummy;
+	acpi_status status;
+
+	if (!has_acpi_companion(dev))
+		return 0;
+
+	memset(&lookup, 0, sizeof(lookup));
+	lookup.search_handle = ACPI_HANDLE(dev);
+	lookup.min_speed = UINT_MAX;
+	lookup.info = &dummy;
+	lookup.index = -1;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+				     I2C_ACPI_MAX_SCAN_DEPTH,
+				     i2c_acpi_lookup_speed, NULL,
+				     &lookup, NULL);
+
+	if (ACPI_FAILURE(status)) {
+		dev_warn(dev, "unable to find I2C bus speed from ACPI\n");
+		return 0;
+	}
+
+	return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0;
+}
+EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed);
+
+static int i2c_acpi_match_adapter(struct device *dev, void *data)
+{
+	struct i2c_adapter *adapter = i2c_verify_adapter(dev);
+
+	if (!adapter)
+		return 0;
+
+	return ACPI_HANDLE(dev) == (acpi_handle)data;
+}
+
+static int i2c_acpi_match_device(struct device *dev, void *data)
+{
+	return ACPI_COMPANION(dev) == data;
+}
+
+static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, handle,
+			      i2c_acpi_match_adapter);
+	return dev ? i2c_verify_adapter(dev) : NULL;
+}
+
+static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_match_device);
+	return dev ? i2c_verify_client(dev) : NULL;
+}
+
+static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
+			   void *arg)
+{
+	struct acpi_device *adev = arg;
+	struct i2c_board_info info;
+	acpi_handle adapter_handle;
+	struct i2c_adapter *adapter;
+	struct i2c_client *client;
+
+	switch (value) {
+	case ACPI_RECONFIG_DEVICE_ADD:
+		if (i2c_acpi_get_info(adev, &info, NULL, &adapter_handle))
+			break;
+
+		adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
+		if (!adapter)
+			break;
+
+		i2c_acpi_register_device(adapter, adev, &info);
+		break;
+	case ACPI_RECONFIG_DEVICE_REMOVE:
+		if (!acpi_device_enumerated(adev))
+			break;
+
+		client = i2c_acpi_find_client_by_adev(adev);
+		if (!client)
+			break;
+
+		i2c_unregister_device(client);
+		put_device(&client->dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+struct notifier_block i2c_acpi_notifier = {
+	.notifier_call = i2c_acpi_notify,
+};
+
+/**
+ * i2c_acpi_new_device - Create i2c-client for the Nth I2cSerialBus resource
+ * @dev:     Device owning the ACPI resources to get the client from
+ * @index:   Index of ACPI resource to get
+ * @info:    describes the I2C device; note this is modified (addr gets set)
+ * Context: can sleep
+ *
+ * By default the i2c subsys creates an i2c-client for the first I2cSerialBus
+ * resource of an acpi_device, but some acpi_devices have multiple I2cSerialBus
+ * resources, in that case this function can be used to create an i2c-client
+ * for other I2cSerialBus resources in the Current Resource Settings table.
+ *
+ * Also see i2c_new_device, which this function calls to create the i2c-client.
+ *
+ * Returns a pointer to the new i2c-client, or NULL if the adapter is not found.
+ */
+struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
+				       struct i2c_board_info *info)
+{
+	struct i2c_acpi_lookup lookup;
+	struct i2c_adapter *adapter;
+	struct acpi_device *adev;
+	LIST_HEAD(resource_list);
+	int ret;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return NULL;
+
+	memset(&lookup, 0, sizeof(lookup));
+	lookup.info = info;
+	lookup.device_handle = acpi_device_handle(adev);
+	lookup.index = index;
+
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     i2c_acpi_fill_info, &lookup);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0 || !info->addr)
+		return NULL;
+
+	adapter = i2c_acpi_find_adapter_by_handle(lookup.adapter_handle);
+	if (!adapter)
+		return NULL;
+
+	return i2c_new_device(adapter, info);
+}
+EXPORT_SYMBOL_GPL(i2c_acpi_new_device);
+
+#ifdef CONFIG_ACPI_I2C_OPREGION
+static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[2];
+	int ret;
+	u8 *buffer;
+
+	buffer = kzalloc(data_len, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = 1;
+	msgs[0].buf = &cmd;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = client->flags | I2C_M_RD;
+	msgs[1].len = data_len;
+	msgs[1].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c read failed\n");
+	else
+		memcpy(data, buffer, data_len);
+
+	kfree(buffer);
+	return ret;
+}
+
+static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[1];
+	u8 *buffer;
+	int ret = AE_OK;
+
+	buffer = kzalloc(data_len + 1, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	buffer[0] = cmd;
+	memcpy(buffer + 1, data, data_len);
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = data_len + 1;
+	msgs[0].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c write failed\n");
+
+	kfree(buffer);
+	return ret;
+}
+
+static acpi_status
+i2c_acpi_space_handler(u32 function, acpi_physical_address command,
+			u32 bits, u64 *value64,
+			void *handler_context, void *region_context)
+{
+	struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
+	struct i2c_acpi_handler_data *data = handler_context;
+	struct acpi_connection_info *info = &data->info;
+	struct acpi_resource_i2c_serialbus *sb;
+	struct i2c_adapter *adapter = data->adapter;
+	struct i2c_client *client;
+	struct acpi_resource *ares;
+	u32 accessor_type = function >> 16;
+	u8 action = function & ACPI_IO_MASK;
+	acpi_status ret;
+	int status;
+
+	ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
+	if (ACPI_FAILURE(ret))
+		return ret;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (!client) {
+		ret = AE_NO_MEMORY;
+		goto err;
+	}
+
+	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	sb = &ares->data.i2c_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	client->adapter = adapter;
+	client->addr = sb->slave_address;
+
+	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+		client->flags |= I2C_CLIENT_TEN;
+
+	switch (accessor_type) {
+	case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte(client);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte(client, gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BYTE:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte_data(client, command);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte_data(client, command,
+					gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_WORD:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_word_data(client, command);
+			if (status >= 0) {
+				gsb->wdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_word_data(client, command,
+					gsb->wdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_block_data(client, command,
+					gsb->data);
+			if (status >= 0) {
+				gsb->len = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_block_data(client, command,
+					gsb->len, gsb->data);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
+		if (action == ACPI_READ) {
+			status = acpi_gsb_i2c_read_bytes(client, command,
+					gsb->data, info->access_length);
+			if (status > 0)
+				status = 0;
+		} else {
+			status = acpi_gsb_i2c_write_bytes(client, command,
+					gsb->data, info->access_length);
+		}
+		break;
+
+	default:
+		dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n",
+			 accessor_type, client->addr);
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	gsb->status = status;
+
+ err:
+	kfree(client);
+	ACPI_FREE(ares);
+	return ret;
+}
+
+
+int i2c_acpi_install_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle;
+	struct i2c_acpi_handler_data *data;
+	acpi_status status;
+
+	if (!adapter->dev.parent)
+		return -ENODEV;
+
+	handle = ACPI_HANDLE(adapter->dev.parent);
+
+	if (!handle)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct i2c_acpi_handler_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->adapter = adapter;
+	status = acpi_bus_attach_private_data(handle, (void *)data);
+	if (ACPI_FAILURE(status)) {
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	status = acpi_install_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&i2c_acpi_space_handler,
+				NULL,
+				data);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&adapter->dev, "Error installing i2c space handler\n");
+		acpi_bus_detach_private_data(handle);
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	acpi_walk_dep_device_list(handle);
+	return 0;
+}
+
+void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle;
+	struct i2c_acpi_handler_data *data;
+	acpi_status status;
+
+	if (!adapter->dev.parent)
+		return;
+
+	handle = ACPI_HANDLE(adapter->dev.parent);
+
+	if (!handle)
+		return;
+
+	acpi_remove_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&i2c_acpi_space_handler);
+
+	status = acpi_bus_get_private_data(handle, (void **)&data);
+	if (ACPI_SUCCESS(status))
+		kfree(data);
+
+	acpi_bus_detach_private_data(handle);
+}
+#endif /* CONFIG_ACPI_I2C_OPREGION */
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 461451da10650..ac7b95e4cda75 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -16,8 +16,6 @@
 /* With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi>.
    Mux support by Rodolfo Giometti <giometti@enneenne.com> and
    Michael Lawnick <michael.lawnick.ext@nsn.com>
-   I2C ACPI code Copyright (C) 2014 Intel Corp
-   Author: Lan Tianyu <tianyu.lan@intel.com>
  */
 
 #define pr_fmt(fmt) "i2c-core: " fmt
@@ -83,652 +81,6 @@ void i2c_transfer_trace_unreg(void)
 	static_key_slow_dec(&i2c_trace_msg);
 }
 
-#if defined(CONFIG_ACPI)
-struct i2c_acpi_handler_data {
-	struct acpi_connection_info info;
-	struct i2c_adapter *adapter;
-};
-
-struct gsb_buffer {
-	u8	status;
-	u8	len;
-	union {
-		u16	wdata;
-		u8	bdata;
-		u8	data[0];
-	};
-} __packed;
-
-struct i2c_acpi_lookup {
-	struct i2c_board_info *info;
-	acpi_handle adapter_handle;
-	acpi_handle device_handle;
-	acpi_handle search_handle;
-	int n;
-	int index;
-	u32 speed;
-	u32 min_speed;
-};
-
-static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data)
-{
-	struct i2c_acpi_lookup *lookup = data;
-	struct i2c_board_info *info = lookup->info;
-	struct acpi_resource_i2c_serialbus *sb;
-	acpi_status status;
-
-	if (info->addr || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
-		return 1;
-
-	sb = &ares->data.i2c_serial_bus;
-	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C)
-		return 1;
-
-	if (lookup->index != -1 && lookup->n++ != lookup->index)
-		return 1;
-
-	status = acpi_get_handle(lookup->device_handle,
-				 sb->resource_source.string_ptr,
-				 &lookup->adapter_handle);
-	if (!ACPI_SUCCESS(status))
-		return 1;
-
-	info->addr = sb->slave_address;
-	lookup->speed = sb->connection_speed;
-	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-		info->flags |= I2C_CLIENT_TEN;
-
-	return 1;
-}
-
-static int i2c_acpi_do_lookup(struct acpi_device *adev,
-			      struct i2c_acpi_lookup *lookup)
-{
-	struct i2c_board_info *info = lookup->info;
-	struct list_head resource_list;
-	int ret;
-
-	if (acpi_bus_get_status(adev) || !adev->status.present ||
-	    acpi_device_enumerated(adev))
-		return -EINVAL;
-
-	memset(info, 0, sizeof(*info));
-	lookup->device_handle = acpi_device_handle(adev);
-
-	/* Look up for I2cSerialBus resource */
-	INIT_LIST_HEAD(&resource_list);
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     i2c_acpi_fill_info, lookup);
-	acpi_dev_free_resource_list(&resource_list);
-
-	if (ret < 0 || !info->addr)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int i2c_acpi_get_info(struct acpi_device *adev,
-			     struct i2c_board_info *info,
-			     struct i2c_adapter *adapter,
-			     acpi_handle *adapter_handle)
-{
-	struct list_head resource_list;
-	struct resource_entry *entry;
-	struct i2c_acpi_lookup lookup;
-	int ret;
-
-	memset(&lookup, 0, sizeof(lookup));
-	lookup.info = info;
-	lookup.index = -1;
-
-	ret = i2c_acpi_do_lookup(adev, &lookup);
-	if (ret)
-		return ret;
-
-	if (adapter) {
-		/* The adapter must match the one in I2cSerialBus() connector */
-		if (ACPI_HANDLE(&adapter->dev) != lookup.adapter_handle)
-			return -ENODEV;
-	} else {
-		struct acpi_device *adapter_adev;
-
-		/* The adapter must be present */
-		if (acpi_bus_get_device(lookup.adapter_handle, &adapter_adev))
-			return -ENODEV;
-		if (acpi_bus_get_status(adapter_adev) ||
-		    !adapter_adev->status.present)
-			return -ENODEV;
-	}
-
-	info->fwnode = acpi_fwnode_handle(adev);
-	if (adapter_handle)
-		*adapter_handle = lookup.adapter_handle;
-
-	/* Then fill IRQ number if any */
-	INIT_LIST_HEAD(&resource_list);
-	ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
-	if (ret < 0)
-		return -EINVAL;
-
-	resource_list_for_each_entry(entry, &resource_list) {
-		if (resource_type(entry->res) == IORESOURCE_IRQ) {
-			info->irq = entry->res->start;
-			break;
-		}
-	}
-
-	acpi_dev_free_resource_list(&resource_list);
-
-	acpi_set_modalias(adev, dev_name(&adev->dev), info->type,
-			  sizeof(info->type));
-
-	return 0;
-}
-
-static void i2c_acpi_register_device(struct i2c_adapter *adapter,
-				     struct acpi_device *adev,
-				     struct i2c_board_info *info)
-{
-	adev->power.flags.ignore_parent = true;
-	acpi_device_set_enumerated(adev);
-
-	if (!i2c_new_device(adapter, info)) {
-		adev->power.flags.ignore_parent = false;
-		dev_err(&adapter->dev,
-			"failed to add I2C device %s from ACPI\n",
-			dev_name(&adev->dev));
-	}
-}
-
-static acpi_status i2c_acpi_add_device(acpi_handle handle, u32 level,
-				       void *data, void **return_value)
-{
-	struct i2c_adapter *adapter = data;
-	struct acpi_device *adev;
-	struct i2c_board_info info;
-
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-
-	if (i2c_acpi_get_info(adev, &info, adapter, NULL))
-		return AE_OK;
-
-	i2c_acpi_register_device(adapter, adev, &info);
-
-	return AE_OK;
-}
-
-#define I2C_ACPI_MAX_SCAN_DEPTH 32
-
-/**
- * i2c_acpi_register_devices - enumerate I2C slave devices behind adapter
- * @adap: pointer to adapter
- *
- * Enumerate all I2C slave devices behind this adapter by walking the ACPI
- * namespace. When a device is found it will be added to the Linux device
- * model and bound to the corresponding ACPI handle.
- */
-static void i2c_acpi_register_devices(struct i2c_adapter *adap)
-{
-	acpi_status status;
-
-	if (!has_acpi_companion(&adap->dev))
-		return;
-
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-				     I2C_ACPI_MAX_SCAN_DEPTH,
-				     i2c_acpi_add_device, NULL,
-				     adap, NULL);
-	if (ACPI_FAILURE(status))
-		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
-}
-
-static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
-					   void *data, void **return_value)
-{
-	struct i2c_acpi_lookup *lookup = data;
-	struct acpi_device *adev;
-
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-
-	if (i2c_acpi_do_lookup(adev, lookup))
-		return AE_OK;
-
-	if (lookup->search_handle != lookup->adapter_handle)
-		return AE_OK;
-
-	if (lookup->speed <= lookup->min_speed)
-		lookup->min_speed = lookup->speed;
-
-	return AE_OK;
-}
-
-/**
- * i2c_acpi_find_bus_speed - find I2C bus speed from ACPI
- * @dev: The device owning the bus
- *
- * Find the I2C bus speed by walking the ACPI namespace for all I2C slaves
- * devices connected to this bus and use the speed of slowest device.
- *
- * Returns the speed in Hz or zero
- */
-u32 i2c_acpi_find_bus_speed(struct device *dev)
-{
-	struct i2c_acpi_lookup lookup;
-	struct i2c_board_info dummy;
-	acpi_status status;
-
-	if (!has_acpi_companion(dev))
-		return 0;
-
-	memset(&lookup, 0, sizeof(lookup));
-	lookup.search_handle = ACPI_HANDLE(dev);
-	lookup.min_speed = UINT_MAX;
-	lookup.info = &dummy;
-	lookup.index = -1;
-
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-				     I2C_ACPI_MAX_SCAN_DEPTH,
-				     i2c_acpi_lookup_speed, NULL,
-				     &lookup, NULL);
-
-	if (ACPI_FAILURE(status)) {
-		dev_warn(dev, "unable to find I2C bus speed from ACPI\n");
-		return 0;
-	}
-
-	return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0;
-}
-EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed);
-
-static int i2c_acpi_match_adapter(struct device *dev, void *data)
-{
-	struct i2c_adapter *adapter = i2c_verify_adapter(dev);
-
-	if (!adapter)
-		return 0;
-
-	return ACPI_HANDLE(dev) == (acpi_handle)data;
-}
-
-static int i2c_acpi_match_device(struct device *dev, void *data)
-{
-	return ACPI_COMPANION(dev) == data;
-}
-
-static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
-{
-	struct device *dev;
-
-	dev = bus_find_device(&i2c_bus_type, NULL, handle,
-			      i2c_acpi_match_adapter);
-	return dev ? i2c_verify_adapter(dev) : NULL;
-}
-
-static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
-{
-	struct device *dev;
-
-	dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_match_device);
-	return dev ? i2c_verify_client(dev) : NULL;
-}
-
-static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
-			   void *arg)
-{
-	struct acpi_device *adev = arg;
-	struct i2c_board_info info;
-	acpi_handle adapter_handle;
-	struct i2c_adapter *adapter;
-	struct i2c_client *client;
-
-	switch (value) {
-	case ACPI_RECONFIG_DEVICE_ADD:
-		if (i2c_acpi_get_info(adev, &info, NULL, &adapter_handle))
-			break;
-
-		adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
-		if (!adapter)
-			break;
-
-		i2c_acpi_register_device(adapter, adev, &info);
-		break;
-	case ACPI_RECONFIG_DEVICE_REMOVE:
-		if (!acpi_device_enumerated(adev))
-			break;
-
-		client = i2c_acpi_find_client_by_adev(adev);
-		if (!client)
-			break;
-
-		i2c_unregister_device(client);
-		put_device(&client->dev);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block i2c_acpi_notifier = {
-	.notifier_call = i2c_acpi_notify,
-};
-
-/**
- * i2c_acpi_new_device - Create i2c-client for the Nth I2cSerialBus resource
- * @dev:     Device owning the ACPI resources to get the client from
- * @index:   Index of ACPI resource to get
- * @info:    describes the I2C device; note this is modified (addr gets set)
- * Context: can sleep
- *
- * By default the i2c subsys creates an i2c-client for the first I2cSerialBus
- * resource of an acpi_device, but some acpi_devices have multiple I2cSerialBus
- * resources, in that case this function can be used to create an i2c-client
- * for other I2cSerialBus resources in the Current Resource Settings table.
- *
- * Also see i2c_new_device, which this function calls to create the i2c-client.
- *
- * Returns a pointer to the new i2c-client, or NULL if the adapter is not found.
- */
-struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
-				       struct i2c_board_info *info)
-{
-	struct i2c_acpi_lookup lookup;
-	struct i2c_adapter *adapter;
-	struct acpi_device *adev;
-	LIST_HEAD(resource_list);
-	int ret;
-
-	adev = ACPI_COMPANION(dev);
-	if (!adev)
-		return NULL;
-
-	memset(&lookup, 0, sizeof(lookup));
-	lookup.info = info;
-	lookup.device_handle = acpi_device_handle(adev);
-	lookup.index = index;
-
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     i2c_acpi_fill_info, &lookup);
-	acpi_dev_free_resource_list(&resource_list);
-
-	if (ret < 0 || !info->addr)
-		return NULL;
-
-	adapter = i2c_acpi_find_adapter_by_handle(lookup.adapter_handle);
-	if (!adapter)
-		return NULL;
-
-	return i2c_new_device(adapter, info);
-}
-EXPORT_SYMBOL_GPL(i2c_acpi_new_device);
-#else /* CONFIG_ACPI */
-static inline void i2c_acpi_register_devices(struct i2c_adapter *adap) { }
-extern struct notifier_block i2c_acpi_notifier;
-#endif /* CONFIG_ACPI */
-
-#ifdef CONFIG_ACPI_I2C_OPREGION
-static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
-		u8 cmd, u8 *data, u8 data_len)
-{
-
-	struct i2c_msg msgs[2];
-	int ret;
-	u8 *buffer;
-
-	buffer = kzalloc(data_len, GFP_KERNEL);
-	if (!buffer)
-		return AE_NO_MEMORY;
-
-	msgs[0].addr = client->addr;
-	msgs[0].flags = client->flags;
-	msgs[0].len = 1;
-	msgs[0].buf = &cmd;
-
-	msgs[1].addr = client->addr;
-	msgs[1].flags = client->flags | I2C_M_RD;
-	msgs[1].len = data_len;
-	msgs[1].buf = buffer;
-
-	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-	if (ret < 0)
-		dev_err(&client->adapter->dev, "i2c read failed\n");
-	else
-		memcpy(data, buffer, data_len);
-
-	kfree(buffer);
-	return ret;
-}
-
-static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
-		u8 cmd, u8 *data, u8 data_len)
-{
-
-	struct i2c_msg msgs[1];
-	u8 *buffer;
-	int ret = AE_OK;
-
-	buffer = kzalloc(data_len + 1, GFP_KERNEL);
-	if (!buffer)
-		return AE_NO_MEMORY;
-
-	buffer[0] = cmd;
-	memcpy(buffer + 1, data, data_len);
-
-	msgs[0].addr = client->addr;
-	msgs[0].flags = client->flags;
-	msgs[0].len = data_len + 1;
-	msgs[0].buf = buffer;
-
-	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-	if (ret < 0)
-		dev_err(&client->adapter->dev, "i2c write failed\n");
-
-	kfree(buffer);
-	return ret;
-}
-
-static acpi_status
-i2c_acpi_space_handler(u32 function, acpi_physical_address command,
-			u32 bits, u64 *value64,
-			void *handler_context, void *region_context)
-{
-	struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
-	struct i2c_acpi_handler_data *data = handler_context;
-	struct acpi_connection_info *info = &data->info;
-	struct acpi_resource_i2c_serialbus *sb;
-	struct i2c_adapter *adapter = data->adapter;
-	struct i2c_client *client;
-	struct acpi_resource *ares;
-	u32 accessor_type = function >> 16;
-	u8 action = function & ACPI_IO_MASK;
-	acpi_status ret;
-	int status;
-
-	ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
-	if (ACPI_FAILURE(ret))
-		return ret;
-
-	client = kzalloc(sizeof(*client), GFP_KERNEL);
-	if (!client) {
-		ret = AE_NO_MEMORY;
-		goto err;
-	}
-
-	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	sb = &ares->data.i2c_serial_bus;
-	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	client->adapter = adapter;
-	client->addr = sb->slave_address;
-
-	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-		client->flags |= I2C_CLIENT_TEN;
-
-	switch (accessor_type) {
-	case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_byte(client);
-			if (status >= 0) {
-				gsb->bdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_byte(client, gsb->bdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_BYTE:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_byte_data(client, command);
-			if (status >= 0) {
-				gsb->bdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_byte_data(client, command,
-					gsb->bdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_WORD:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_word_data(client, command);
-			if (status >= 0) {
-				gsb->wdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_word_data(client, command,
-					gsb->wdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_block_data(client, command,
-					gsb->data);
-			if (status >= 0) {
-				gsb->len = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_block_data(client, command,
-					gsb->len, gsb->data);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
-		if (action == ACPI_READ) {
-			status = acpi_gsb_i2c_read_bytes(client, command,
-					gsb->data, info->access_length);
-			if (status > 0)
-				status = 0;
-		} else {
-			status = acpi_gsb_i2c_write_bytes(client, command,
-					gsb->data, info->access_length);
-		}
-		break;
-
-	default:
-		dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n",
-			 accessor_type, client->addr);
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	gsb->status = status;
-
- err:
-	kfree(client);
-	ACPI_FREE(ares);
-	return ret;
-}
-
-
-static int i2c_acpi_install_space_handler(struct i2c_adapter *adapter)
-{
-	acpi_handle handle;
-	struct i2c_acpi_handler_data *data;
-	acpi_status status;
-
-	if (!adapter->dev.parent)
-		return -ENODEV;
-
-	handle = ACPI_HANDLE(adapter->dev.parent);
-
-	if (!handle)
-		return -ENODEV;
-
-	data = kzalloc(sizeof(struct i2c_acpi_handler_data),
-			    GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->adapter = adapter;
-	status = acpi_bus_attach_private_data(handle, (void *)data);
-	if (ACPI_FAILURE(status)) {
-		kfree(data);
-		return -ENOMEM;
-	}
-
-	status = acpi_install_address_space_handler(handle,
-				ACPI_ADR_SPACE_GSBUS,
-				&i2c_acpi_space_handler,
-				NULL,
-				data);
-	if (ACPI_FAILURE(status)) {
-		dev_err(&adapter->dev, "Error installing i2c space handler\n");
-		acpi_bus_detach_private_data(handle);
-		kfree(data);
-		return -ENOMEM;
-	}
-
-	acpi_walk_dep_device_list(handle);
-	return 0;
-}
-
-static void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter)
-{
-	acpi_handle handle;
-	struct i2c_acpi_handler_data *data;
-	acpi_status status;
-
-	if (!adapter->dev.parent)
-		return;
-
-	handle = ACPI_HANDLE(adapter->dev.parent);
-
-	if (!handle)
-		return;
-
-	acpi_remove_address_space_handler(handle,
-				ACPI_ADR_SPACE_GSBUS,
-				&i2c_acpi_space_handler);
-
-	status = acpi_bus_get_private_data(handle, (void **)&data);
-	if (ACPI_SUCCESS(status))
-		kfree(data);
-
-	acpi_bus_detach_private_data(handle);
-}
-#else /* CONFIG_ACPI_I2C_OPREGION */
-static inline void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter)
-{ }
-
-static inline int i2c_acpi_install_space_handler(struct i2c_adapter *adapter)
-{ return 0; }
-#endif /* CONFIG_ACPI_I2C_OPREGION */
-
-/* ------------------------------------------------------------------------- */
-
 const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
 						const struct i2c_client *client)
 {
diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h
index 22151c88e8859..3b63f5e5b89cb 100644
--- a/drivers/i2c/i2c-core.h
+++ b/drivers/i2c/i2c-core.h
@@ -30,6 +30,21 @@ extern int		__i2c_first_dynamic_bus_num;
 int i2c_check_addr_validity(unsigned addr, unsigned short flags);
 int i2c_check_7bit_addr_validity_strict(unsigned short addr);
 
+#ifdef CONFIG_ACPI
+void i2c_acpi_register_devices(struct i2c_adapter *adap);
+#else /* CONFIG_ACPI */
+static inline void i2c_acpi_register_devices(struct i2c_adapter *adap) { }
+#endif /* CONFIG_ACPI */
+extern struct notifier_block i2c_acpi_notifier;
+
+#ifdef CONFIG_ACPI_I2C_OPREGION
+int i2c_acpi_install_space_handler(struct i2c_adapter *adapter);
+void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter);
+#else /* CONFIG_ACPI_I2C_OPREGION */
+static inline int i2c_acpi_install_space_handler(struct i2c_adapter *adapter) { return 0; }
+static inline void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter) { }
+#endif /* CONFIG_ACPI_I2C_OPREGION */
+
 #ifdef CONFIG_OF
 void of_i2c_register_devices(struct i2c_adapter *adap);
 #else
-- 
GitLab


From 984b292333ea873cfa1eb5ae17344b40a43e26c3 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 25 May 2017 22:55:42 +0200
Subject: [PATCH 0136/1958] docs: i2c: dev-interface: adapt to new filenames of
 the i2c core

The I2C core files were renamed, adapt the textfile to it.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/dev-interface | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/i2c/dev-interface b/Documentation/i2c/dev-interface
index bcf919d8625ce..5ff19447ac442 100644
--- a/Documentation/i2c/dev-interface
+++ b/Documentation/i2c/dev-interface
@@ -191,7 +191,7 @@ checking on future transactions.)
 4* Other ioctl() calls are converted to in-kernel function calls by
 i2c-dev. Examples include I2C_FUNCS, which queries the I2C adapter
 functionality using i2c.h:i2c_get_functionality(), and I2C_SMBUS, which
-performs an SMBus transaction using i2c-core.c:i2c_smbus_xfer().
+performs an SMBus transaction using i2c-core-smbus.c:i2c_smbus_xfer().
 
 The i2c-dev driver is responsible for checking all the parameters that
 come from user-space for validity. After this point, there is no
@@ -200,13 +200,13 @@ and calls that would have been performed by kernel I2C chip drivers
 directly. This means that I2C bus drivers don't need to implement
 anything special to support access from user-space.
 
-5* These i2c-core.c/i2c.h functions are wrappers to the actual
-implementation of your I2C bus driver. Each adapter must declare
-callback functions implementing these standard calls.
-i2c.h:i2c_get_functionality() calls i2c_adapter.algo->functionality(),
-while i2c-core.c:i2c_smbus_xfer() calls either
+5* These i2c.h functions are wrappers to the actual implementation of
+your I2C bus driver. Each adapter must declare callback functions
+implementing these standard calls. i2c.h:i2c_get_functionality() calls
+i2c_adapter.algo->functionality(), while
+i2c-core-smbus.c:i2c_smbus_xfer() calls either
 adapter.algo->smbus_xfer() if it is implemented, or if not,
-i2c-core.c:i2c_smbus_xfer_emulated() which in turn calls
+i2c-core-smbus.c:i2c_smbus_xfer_emulated() which in turn calls
 i2c_adapter.algo->master_xfer().
 
 After your I2C bus driver has processed these requests, execution runs
-- 
GitLab


From 16210d0692d69512d77d0610cef0eaa0d783d1ba Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 23 May 2017 22:22:34 +0200
Subject: [PATCH 0137/1958] i2c: remove unneeded includes from core

They seem like cruft to me. I couldn't find any evidence that something
included from there is actually used.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index ac7b95e4cda75..78135c1deaab5 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -21,7 +21,6 @@
 #define pr_fmt(fmt) "i2c-core: " fmt
 
 #include <dt-bindings/i2c/i2c.h>
-#include <linux/uaccess.h>
 #include <linux/acpi.h>
 #include <linux/clk/clk-conf.h>
 #include <linux/completion.h>
@@ -29,7 +28,6 @@
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/gpio.h>
-#include <linux/hardirq.h>
 #include <linux/i2c.h>
 #include <linux/idr.h>
 #include <linux/init.h>
-- 
GitLab


From 61e3d0f79d6eff1928222ec940b7982b99904857 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 23 May 2017 19:23:11 +0200
Subject: [PATCH 0138/1958] i2c: reformat core-base file header

Finally, apply modern comment rules to the file header. The old style
looked very non-Linuxish and challenged my eyes for some time now. I
also added my own copyright for the period of me being the maintainer.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 43 +++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 78135c1deaab5..c89dac7fd2e7b 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1,21 +1,21 @@
-/* i2c-core.c - a device driver for the iic-bus interface		     */
-/* ------------------------------------------------------------------------- */
-/*   Copyright (C) 1995-99 Simon G. Vogl
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.			     */
-/* ------------------------------------------------------------------------- */
-
-/* With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi>.
-   Mux support by Rodolfo Giometti <giometti@enneenne.com> and
-   Michael Lawnick <michael.lawnick.ext@nsn.com>
+/*
+ * Linux I2C core
+ *
+ * Copyright (C) 1995-99 Simon G. Vogl
+ *   With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi>
+ *   Mux support by Rodolfo Giometti <giometti@enneenne.com> and
+ *   Michael Lawnick <michael.lawnick.ext@nsn.com>
+ *
+ * Copyright (C) 2013-2017 Wolfram Sang <wsa@the-dreams.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  */
 
 #define pr_fmt(fmt) "i2c-core: " fmt
@@ -57,9 +57,10 @@
 #define I2C_ADDR_7BITS_MAX	0x77
 #define I2C_ADDR_7BITS_COUNT	(I2C_ADDR_7BITS_MAX + 1)
 
-/* core_lock protects i2c_adapter_idr, and guarantees
-   that device detection, deletion of detected devices, and attach_adapter
-   calls are serialized */
+/*
+ * core_lock protects i2c_adapter_idr, and guarantees that device detection,
+ * deletion of detected devices, and attach_adapter calls are serialized
+ */
 static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
 
-- 
GitLab


From 2d7b56378d32b0cf006f8944cbba4046df45dd25 Mon Sep 17 00:00:00 2001
From: Caesar Wang <wxt@rock-chips.com>
Date: Wed, 31 May 2017 10:14:23 +0800
Subject: [PATCH 0139/1958] drm/rockchip: gem: add the lacks lock and trivial
 changes

As the allocation and free buffer that need to add mutex lock for drm mm,
but it lacks the locking on error path in rockchip_gem_iommu_map().
Also, the trivial changes like The comment should be  placed in the
kerneldoc and unused blank line.

Signed-off-by: Caesar Wang <wxt@rock-chips.com>
Reviewed-by: Mark Yao <mark.yao@rock-chips.com>
Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1496196863-25738-1-git-send-email-wxt@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_drv.h | 2 +-
 drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index a48fcce3f5f65..4a7a3107d8740 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -45,13 +45,13 @@ struct rockchip_crtc_state {
  *
  * @crtc: array of enabled CRTCs, used to map from "pipe" to drm_crtc.
  * @num_pipe: number of pipes for this device.
+ * @mm_lock: protect drm_mm on multi-threads.
  */
 struct rockchip_drm_private {
 	struct drm_fb_helper fbdev_helper;
 	struct drm_gem_object *fbdev_bo;
 	struct drm_atomic_state *state;
 	struct iommu_domain *domain;
-	/* protect drm_mm on multi-threads */
 	struct mutex mm_lock;
 	struct drm_mm mm;
 	struct list_head psr_list;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index df9e57064f198..b74ac717e56a5 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -29,12 +29,11 @@ static int rockchip_gem_iommu_map(struct rockchip_gem_object *rk_obj)
 	ssize_t ret;
 
 	mutex_lock(&private->mm_lock);
-
 	ret = drm_mm_insert_node_generic(&private->mm, &rk_obj->mm,
 					 rk_obj->base.size, PAGE_SIZE,
 					 0, 0);
-
 	mutex_unlock(&private->mm_lock);
+
 	if (ret < 0) {
 		DRM_ERROR("out of I/O virtual memory: %zd\n", ret);
 		return ret;
@@ -56,7 +55,9 @@ static int rockchip_gem_iommu_map(struct rockchip_gem_object *rk_obj)
 	return 0;
 
 err_remove_node:
+	mutex_lock(&private->mm_lock);
 	drm_mm_remove_node(&rk_obj->mm);
+	mutex_unlock(&private->mm_lock);
 
 	return ret;
 }
-- 
GitLab


From 9748e1d87573c94191442d6bd0307f523e5cd8b8 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 11:06:45 +0200
Subject: [PATCH 0140/1958] mtd: nand: add support for Micron on-die ECC

Now that the core NAND subsystem has support for on-die ECC, this commit
brings the necessary code to support on-die ECC on Micron NANDs.

In micron_nand_init(), we detect if the Micron NAND chip supports on-die
ECC mode, by checking a number of conditions:

 - It must be an ONFI NAND
 - It must be a SLC NAND

 - Enabling *and* disabling on-die ECC must work

 - The on-die ECC must be correcting 4 bits per 512 bytes of data. Some
   Micron NAND chips have an on-die ECC able to correct 8 bits per 512
   bytes of data, but they work slightly differently and therefore we
   don't support them in this patch.

Then, if the on-die ECC cannot be disabled (some Micron NAND have on-die
ECC forcefully enabled), we bail out, as we don't support such
NANDs. Indeed, the implementation of raw_read()/raw_write() make the
assumption that on-die ECC can be disabled. Support for Micron NANDs
with on-die ECC forcefully enabled can easily be added, but in the
absence of such HW for testing, we preferred to simply bail out.

If the on-die ECC is supported, and requested in the Device Tree, then
it is indeed enabled, by using custom implementations of the
->read_page(), ->read_page_raw(), ->write_page() and ->write_page_raw()
operation to properly handle the on-die ECC.

In the non-raw functions, we need to enable the internal ECC engine
before issuing the NAND_CMD_READ0 or NAND_CMD_SEQIN commands, which is
why we set the NAND_ECC_CUSTOM_PAGE_ACCESS option at initialization
time (it asks the NAND core to let the NAND driver issue those
commands).

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_micron.c | 216 +++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h       |   2 +
 2 files changed, 218 insertions(+)

diff --git a/drivers/mtd/nand/nand_micron.c b/drivers/mtd/nand/nand_micron.c
index 8770110692519..9993f8ead1e2f 100644
--- a/drivers/mtd/nand/nand_micron.c
+++ b/drivers/mtd/nand/nand_micron.c
@@ -17,6 +17,12 @@
 
 #include <linux/mtd/nand.h>
 
+/*
+ * Special Micron status bit that indicates when the block has been
+ * corrected by on-die ECC and should be rewritten
+ */
+#define NAND_STATUS_WRITE_RECOMMENDED	BIT(3)
+
 struct nand_onfi_vendor_micron {
 	u8 two_plane_read;
 	u8 read_cache;
@@ -66,9 +72,191 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
 	return 0;
 }
 
+static int micron_nand_on_die_ooblayout_ecc(struct mtd_info *mtd, int section,
+					    struct mtd_oob_region *oobregion)
+{
+	if (section >= 4)
+		return -ERANGE;
+
+	oobregion->offset = (section * 16) + 8;
+	oobregion->length = 8;
+
+	return 0;
+}
+
+static int micron_nand_on_die_ooblayout_free(struct mtd_info *mtd, int section,
+					     struct mtd_oob_region *oobregion)
+{
+	if (section >= 4)
+		return -ERANGE;
+
+	oobregion->offset = (section * 16) + 2;
+	oobregion->length = 6;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops micron_nand_on_die_ooblayout_ops = {
+	.ecc = micron_nand_on_die_ooblayout_ecc,
+	.free = micron_nand_on_die_ooblayout_free,
+};
+
+static int micron_nand_on_die_ecc_setup(struct nand_chip *chip, bool enable)
+{
+	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = { 0, };
+
+	if (enable)
+		feature[0] |= ONFI_FEATURE_ON_DIE_ECC_EN;
+
+	return chip->onfi_set_features(nand_to_mtd(chip), chip,
+				       ONFI_FEATURE_ON_DIE_ECC, feature);
+}
+
+static int
+micron_nand_read_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
+				 uint8_t *buf, int oob_required,
+				 int page)
+{
+	int status;
+	int max_bitflips = 0;
+
+	micron_nand_on_die_ecc_setup(chip, true);
+
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
+	status = chip->read_byte(mtd);
+	if (status & NAND_STATUS_FAIL)
+		mtd->ecc_stats.failed++;
+	/*
+	 * The internal ECC doesn't tell us the number of bitflips
+	 * that have been corrected, but tells us if it recommends to
+	 * rewrite the block. If it's the case, then we pretend we had
+	 * a number of bitflips equal to the ECC strength, which will
+	 * hint the NAND core to rewrite the block.
+	 */
+	else if (status & NAND_STATUS_WRITE_RECOMMENDED)
+		max_bitflips = chip->ecc.strength;
+
+	chip->cmdfunc(mtd, NAND_CMD_READ0, -1, -1);
+
+	nand_read_page_raw(mtd, chip, buf, oob_required, page);
+
+	micron_nand_on_die_ecc_setup(chip, false);
+
+	return max_bitflips;
+}
+
+static int
+micron_nand_write_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
+				  const uint8_t *buf, int oob_required,
+				  int page)
+{
+	micron_nand_on_die_ecc_setup(chip, true);
+
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
+	nand_write_page_raw(mtd, chip, buf, oob_required, page);
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	micron_nand_on_die_ecc_setup(chip, false);
+
+	return 0;
+}
+
+static int
+micron_nand_read_page_raw_on_die_ecc(struct mtd_info *mtd,
+				     struct nand_chip *chip,
+				     uint8_t *buf, int oob_required,
+				     int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+	nand_read_page_raw(mtd, chip, buf, oob_required, page);
+
+	return 0;
+}
+
+static int
+micron_nand_write_page_raw_on_die_ecc(struct mtd_info *mtd,
+				      struct nand_chip *chip,
+				      const uint8_t *buf, int oob_required,
+				      int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
+	nand_write_page_raw(mtd, chip, buf, oob_required, page);
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	return 0;
+}
+
+enum {
+	/* The NAND flash doesn't support on-die ECC */
+	MICRON_ON_DIE_UNSUPPORTED,
+
+	/*
+	 * The NAND flash supports on-die ECC and it can be
+	 * enabled/disabled by a set features command.
+	 */
+	MICRON_ON_DIE_SUPPORTED,
+
+	/*
+	 * The NAND flash supports on-die ECC, and it cannot be
+	 * disabled.
+	 */
+	MICRON_ON_DIE_MANDATORY,
+};
+
+/*
+ * Try to detect if the NAND support on-die ECC. To do this, we enable
+ * the feature, and read back if it has been enabled as expected. We
+ * also check if it can be disabled, because some Micron NANDs do not
+ * allow disabling the on-die ECC and we don't support such NANDs for
+ * now.
+ *
+ * This function also has the side effect of disabling on-die ECC if
+ * it had been left enabled by the firmware/bootloader.
+ */
+static int micron_supports_on_die_ecc(struct nand_chip *chip)
+{
+	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = { 0, };
+	int ret;
+
+	if (chip->onfi_version == 0)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	if (chip->bits_per_cell != 1)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	ret = micron_nand_on_die_ecc_setup(chip, true);
+	if (ret)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	chip->onfi_get_features(nand_to_mtd(chip), chip,
+				ONFI_FEATURE_ON_DIE_ECC, feature);
+	if ((feature[0] & ONFI_FEATURE_ON_DIE_ECC_EN) == 0)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	ret = micron_nand_on_die_ecc_setup(chip, false);
+	if (ret)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	chip->onfi_get_features(nand_to_mtd(chip), chip,
+				ONFI_FEATURE_ON_DIE_ECC, feature);
+	if (feature[0] & ONFI_FEATURE_ON_DIE_ECC_EN)
+		return MICRON_ON_DIE_MANDATORY;
+
+	/*
+	 * Some Micron NANDs have an on-die ECC of 4/512, some other
+	 * 8/512. We only support the former.
+	 */
+	if (chip->onfi_params.ecc_bits != 4)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	return MICRON_ON_DIE_SUPPORTED;
+}
+
 static int micron_nand_init(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ondie;
 	int ret;
 
 	ret = micron_nand_onfi_init(chip);
@@ -78,6 +266,34 @@ static int micron_nand_init(struct nand_chip *chip)
 	if (mtd->writesize == 2048)
 		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 
+	ondie = micron_supports_on_die_ecc(chip);
+
+	if (ondie == MICRON_ON_DIE_MANDATORY) {
+		pr_err("On-die ECC forcefully enabled, not supported\n");
+		return -EINVAL;
+	}
+
+	if (chip->ecc.mode == NAND_ECC_ON_DIE) {
+		if (ondie == MICRON_ON_DIE_UNSUPPORTED) {
+			pr_err("On-die ECC selected but not supported\n");
+			return -EINVAL;
+		}
+
+		chip->ecc.options = NAND_ECC_CUSTOM_PAGE_ACCESS;
+		chip->ecc.bytes = 8;
+		chip->ecc.size = 512;
+		chip->ecc.strength = 4;
+		chip->ecc.algo = NAND_ECC_BCH;
+		chip->ecc.read_page = micron_nand_read_page_on_die_ecc;
+		chip->ecc.write_page = micron_nand_write_page_on_die_ecc;
+		chip->ecc.read_page_raw =
+			micron_nand_read_page_raw_on_die_ecc;
+		chip->ecc.write_page_raw =
+			micron_nand_write_page_raw_on_die_ecc;
+
+		mtd_set_ooblayout(mtd, &micron_nand_on_die_ooblayout_ops);
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 28f7dd9177e92..893d0ce08030d 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -258,6 +258,8 @@ struct nand_chip;
 
 /* Vendor-specific feature address (Micron) */
 #define ONFI_FEATURE_ADDR_READ_RETRY	0x89
+#define ONFI_FEATURE_ON_DIE_ECC		0x90
+#define   ONFI_FEATURE_ON_DIE_ECC_EN	BIT(3)
 
 /* ONFI subfeature parameters length */
 #define ONFI_SUBFEATURE_PARAM_LEN	4
-- 
GitLab


From 838ff7b333263abc9e7e026bb225ed66511f450f Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sat, 29 Apr 2017 11:06:46 +0200
Subject: [PATCH 0141/1958] mtd: nand: fsmc_nand: handle on-die ECC case

This commit adjusts the fsmc_nand driver so that it accepts the
NAND_ECC_ON_DIE case. It simply does nothing in this case, since both
the ECC operations and OOB layout will be defined by the NAND chip code
rather than by the NAND controller code.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index f58c912fdf3bd..de57554b8c4f1 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -1055,6 +1055,9 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 				break;
 			}
 
+		case NAND_ECC_ON_DIE:
+			break;
+
 		default:
 			dev_err(&pdev->dev, "Unsupported ECC mode!\n");
 			goto err_probe;
-- 
GitLab


From 086567f12e1188c57e061a53825a5eff5a7923a0 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Fri, 21 Apr 2017 12:51:07 +0200
Subject: [PATCH 0142/1958] mtd: nand: Optimize checking of erased buffers

If we see ~0UL in flash, there's no need for hweight, and no need to
check number of bitflips. So this should be net win.

Signed-off-by: Pavel Machek <pavel@denx.de>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index ed08e3946727e..79d98c9fa8a75 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1421,7 +1421,10 @@ static int nand_check_erased_buf(void *buf, int len, int bitflips_threshold)
 
 	for (; len >= sizeof(long);
 	     len -= sizeof(long), bitmap += sizeof(long)) {
-		weight = hweight_long(*((unsigned long *)bitmap));
+		unsigned long d = *((unsigned long *)bitmap);
+		if (d == ~0UL)
+			continue;
+		weight = hweight_long(d);
 		bitflips += BITS_PER_LONG - weight;
 		if (unlikely(bitflips > bitflips_threshold))
 			return -EBADMSG;
-- 
GitLab


From 6b7ee72149a4a6963e361c5324cea2961f17989a Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Fri, 21 Apr 2017 18:23:34 -0700
Subject: [PATCH 0143/1958] mtd: nand: gpmi: unify clock handling

Add device specific list of clocks required, and handle all clocks
in a single for loop. This avoids further code duplication when
adding i.MX 7 support.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 41 +++++++++++---------------
 drivers/mtd/nand/gpmi-nand/gpmi-nand.h |  2 ++
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index d52139635b67c..c8bbf5da2ab8b 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -82,6 +82,10 @@ static int gpmi_ooblayout_free(struct mtd_info *mtd, int section,
 	return 0;
 }
 
+static const char * const gpmi_clks_for_mx2x[] = {
+	"gpmi_io",
+};
+
 static const struct mtd_ooblayout_ops gpmi_ooblayout_ops = {
 	.ecc = gpmi_ooblayout_ecc,
 	.free = gpmi_ooblayout_free,
@@ -91,24 +95,36 @@ static const struct gpmi_devdata gpmi_devdata_imx23 = {
 	.type = IS_MX23,
 	.bch_max_ecc_strength = 20,
 	.max_chain_delay = 16,
+	.clks = gpmi_clks_for_mx2x,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx2x),
 };
 
 static const struct gpmi_devdata gpmi_devdata_imx28 = {
 	.type = IS_MX28,
 	.bch_max_ecc_strength = 20,
 	.max_chain_delay = 16,
+	.clks = gpmi_clks_for_mx2x,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx2x),
+};
+
+static const char * const gpmi_clks_for_mx6[] = {
+	"gpmi_io", "gpmi_apb", "gpmi_bch", "gpmi_bch_apb", "per1_bch",
 };
 
 static const struct gpmi_devdata gpmi_devdata_imx6q = {
 	.type = IS_MX6Q,
 	.bch_max_ecc_strength = 40,
 	.max_chain_delay = 12,
+	.clks = gpmi_clks_for_mx6,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx6),
 };
 
 static const struct gpmi_devdata gpmi_devdata_imx6sx = {
 	.type = IS_MX6SX,
 	.bch_max_ecc_strength = 62,
 	.max_chain_delay = 12,
+	.clks = gpmi_clks_for_mx6,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx6),
 };
 
 static irqreturn_t bch_irq(int irq, void *cookie)
@@ -599,35 +615,14 @@ static int acquire_dma_channels(struct gpmi_nand_data *this)
 	return -EINVAL;
 }
 
-static char *extra_clks_for_mx6q[GPMI_CLK_MAX] = {
-	"gpmi_apb", "gpmi_bch", "gpmi_bch_apb", "per1_bch",
-};
-
 static int gpmi_get_clks(struct gpmi_nand_data *this)
 {
 	struct resources *r = &this->resources;
-	char **extra_clks = NULL;
 	struct clk *clk;
 	int err, i;
 
-	/* The main clock is stored in the first. */
-	r->clock[0] = devm_clk_get(this->dev, "gpmi_io");
-	if (IS_ERR(r->clock[0])) {
-		err = PTR_ERR(r->clock[0]);
-		goto err_clock;
-	}
-
-	/* Get extra clocks */
-	if (GPMI_IS_MX6(this))
-		extra_clks = extra_clks_for_mx6q;
-	if (!extra_clks)
-		return 0;
-
-	for (i = 1; i < GPMI_CLK_MAX; i++) {
-		if (extra_clks[i - 1] == NULL)
-			break;
-
-		clk = devm_clk_get(this->dev, extra_clks[i - 1]);
+	for (i = 0; i < this->devdata->clks_count; i++) {
+		clk = devm_clk_get(this->dev, this->devdata->clks[i]);
 		if (IS_ERR(clk)) {
 			err = PTR_ERR(clk);
 			goto err_clock;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 4e49a1f5fa27a..8879c4eff25e1 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -130,6 +130,8 @@ struct gpmi_devdata {
 	enum gpmi_type type;
 	int bch_max_ecc_strength;
 	int max_chain_delay; /* See the async EDO mode */
+	const char * const *clks;
+	const int clks_count;
 };
 
 struct gpmi_nand_data {
-- 
GitLab


From b4af694f1c51f035f70fe964e4859bd9dbaa52b8 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Fri, 21 Apr 2017 18:23:35 -0700
Subject: [PATCH 0144/1958] mtd: nand: gpmi: add i.MX 7 SoC support

Add support for i.MX 7 SoC. The i.MX 7 has a slightly different
clock architecture requiring only two clocks to be referenced.
The IP is slightly different compared to i.MX 6, but currently none
of this differences are in use, therefore reuse GPMI_IS_MX6.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 15 +++++++++++++++
 drivers/mtd/nand/gpmi-nand/gpmi-nand.h |  7 +++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index c8bbf5da2ab8b..9b777be633a9d 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -127,6 +127,18 @@ static const struct gpmi_devdata gpmi_devdata_imx6sx = {
 	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx6),
 };
 
+static const char * const gpmi_clks_for_mx7d[] = {
+	"gpmi_io", "gpmi_bch_apb",
+};
+
+static const struct gpmi_devdata gpmi_devdata_imx7d = {
+	.type = IS_MX7D,
+	.bch_max_ecc_strength = 62,
+	.max_chain_delay = 12,
+	.clks = gpmi_clks_for_mx7d,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx7d),
+};
+
 static irqreturn_t bch_irq(int irq, void *cookie)
 {
 	struct gpmi_nand_data *this = cookie;
@@ -2071,6 +2083,9 @@ static const struct of_device_id gpmi_nand_id_table[] = {
 	}, {
 		.compatible = "fsl,imx6sx-gpmi-nand",
 		.data = &gpmi_devdata_imx6sx,
+	}, {
+		.compatible = "fsl,imx7d-gpmi-nand",
+		.data = &gpmi_devdata_imx7d,
 	}, {}
 };
 MODULE_DEVICE_TABLE(of, gpmi_nand_id_table);
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 8879c4eff25e1..e88a45a62ab6a 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -123,7 +123,8 @@ enum gpmi_type {
 	IS_MX23,
 	IS_MX28,
 	IS_MX6Q,
-	IS_MX6SX
+	IS_MX6SX,
+	IS_MX7D,
 };
 
 struct gpmi_devdata {
@@ -307,6 +308,8 @@ void gpmi_copy_bits(u8 *dst, size_t dst_bit_off,
 #define GPMI_IS_MX28(x)		((x)->devdata->type == IS_MX28)
 #define GPMI_IS_MX6Q(x)		((x)->devdata->type == IS_MX6Q)
 #define GPMI_IS_MX6SX(x)	((x)->devdata->type == IS_MX6SX)
+#define GPMI_IS_MX7D(x)		((x)->devdata->type == IS_MX7D)
 
-#define GPMI_IS_MX6(x)		(GPMI_IS_MX6Q(x) || GPMI_IS_MX6SX(x))
+#define GPMI_IS_MX6(x)		(GPMI_IS_MX6Q(x) || GPMI_IS_MX6SX(x) || \
+				 GPMI_IS_MX7D(x))
 #endif
-- 
GitLab


From d7e578c8118113789b7abd2977e208c64d6f8465 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Fri, 21 Apr 2017 18:23:36 -0700
Subject: [PATCH 0145/1958] mtd: gpmi: document current clock requirements

The clock requirements are completely missing, add the clocks
currently required by the driver.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 .../devicetree/bindings/mtd/gpmi-nand.txt          | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.txt b/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
index d02acaff3c35e..b289ef3c1b7e4 100644
--- a/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
@@ -4,7 +4,12 @@ The GPMI nand controller provides an interface to control the
 NAND flash chips.
 
 Required properties:
-  - compatible : should be "fsl,<chip>-gpmi-nand"
+  - compatible : should be "fsl,<chip>-gpmi-nand", chip can be:
+    * imx23
+    * imx28
+    * imx6q
+    * imx6sx
+    * imx7d
   - reg : should contain registers location and length for gpmi and bch.
   - reg-names: Should contain the reg names "gpmi-nand" and "bch"
   - interrupts : BCH interrupt number.
@@ -13,6 +18,13 @@ Required properties:
     and GPMI DMA channel ID.
     Refer to dma.txt and fsl-mxs-dma.txt for details.
   - dma-names: Must be "rx-tx".
+  - clocks : clocks phandle and clock specifier corresponding to each clock
+    specified in clock-names.
+  - clock-names : The "gpmi_io" clock is always required. Which clocks are
+    exactly required depends on chip:
+    * imx23/imx28 : "gpmi_io"
+    * imx6q/sx : "gpmi_io", "gpmi_apb", "gpmi_bch", "gpmi_bch_apb", "per1_bch"
+    * imx7d : "gpmi_io", "gpmi_bch_apb"
 
 Optional properties:
   - nand-on-flash-bbt: boolean to enable on flash bbt option if not
-- 
GitLab


From 4d02423e9afe6c46142ce98bbcaf5167316dbfbf Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 10 Apr 2017 10:35:17 +0200
Subject: [PATCH 0146/1958] mtd: nand: gpmi: Fix gpmi_nand_init() error path

The GPMI driver is wrongly assuming that nand_release() can safely be
called on an uninitialized/unregistered NAND device.

Add a new err_nand_cleanup label in the error path and only execute if
nand_scan_tail() succeeded.

Note that we now call nand_cleanup() instead of nand_release()
(nand_release() is actually grouping the mtd_device_unregister() and
nand_cleanup() in one call) because there's no point in trying to
unregister a device that has never been registered.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Acked-by: Han Xu <han.xu@nxp.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 9b777be633a9d..f7ac81e44ce71 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -2055,18 +2055,20 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 
 	ret = nand_boot_init(this);
 	if (ret)
-		goto err_out;
+		goto err_nand_cleanup;
 	ret = chip->scan_bbt(mtd);
 	if (ret)
-		goto err_out;
+		goto err_nand_cleanup;
 
 	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret)
-		goto err_out;
+		goto err_nand_cleanup;
 	return 0;
 
+err_nand_cleanup:
+	nand_cleanup(chip);
 err_out:
-	gpmi_nand_exit(this);
+	gpmi_free_dma_buffer(this);
 	return ret;
 }
 
-- 
GitLab


From ebb528d97830731b5fce27a72191e4fb94a4e66d Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 10 Apr 2017 10:35:18 +0200
Subject: [PATCH 0147/1958] mtd: nand: gpmi: Kill gpmi_nand_exit()

The only user of gpmi_nand_exit() is gpmi_nand_remove(). Move its content
to the caller.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Acked-by: Han Xu <han.xu@nxp.com>
---
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index f7ac81e44ce71..50f8d4a1b9832 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1936,12 +1936,6 @@ static int gpmi_set_geometry(struct gpmi_nand_data *this)
 	return gpmi_alloc_dma_buffer(this);
 }
 
-static void gpmi_nand_exit(struct gpmi_nand_data *this)
-{
-	nand_release(nand_to_mtd(&this->nand));
-	gpmi_free_dma_buffer(this);
-}
-
 static int gpmi_init_last(struct gpmi_nand_data *this)
 {
 	struct nand_chip *chip = &this->nand;
@@ -2141,7 +2135,8 @@ static int gpmi_nand_remove(struct platform_device *pdev)
 {
 	struct gpmi_nand_data *this = platform_get_drvdata(pdev);
 
-	gpmi_nand_exit(this);
+	nand_release(nand_to_mtd(&this->nand));
+	gpmi_free_dma_buffer(this);
 	release_resources(this);
 	return 0;
 }
-- 
GitLab


From 104e442a67cfba4d0cc982384761befb917fb6a1 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 16 Mar 2017 09:35:58 +0100
Subject: [PATCH 0148/1958] mtd: nand: Pass the CS line to
 ->setup_data_interface()

Some NAND controllers can assign different NAND timings to different
CS lines. Pass the CS line information to ->setup_data_interface() so
that the NAND controller driver knows which CS line is concerned by
the setup_data_interface() request.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsmc_nand.c  |  7 +++----
 drivers/mtd/nand/mxc_nand.c   | 12 +++++-------
 drivers/mtd/nand/nand_base.c  | 22 +++++++++++++---------
 drivers/mtd/nand/s3c2410.c    |  5 ++---
 drivers/mtd/nand/sunxi_nand.c |  7 +++----
 drivers/mtd/nand/tango_nand.c |  7 +++----
 include/linux/mtd/nand.h      | 12 ++++++++----
 7 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index de57554b8c4f1..9d8b051d31870 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -388,9 +388,8 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host,
 	return 0;
 }
 
-static int fsmc_setup_data_interface(struct mtd_info *mtd,
-				     const struct nand_data_interface *conf,
-				     bool check_only)
+static int fsmc_setup_data_interface(struct mtd_info *mtd, int csline,
+				     const struct nand_data_interface *conf)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct fsmc_nand_data *host = nand_get_controller_data(nand);
@@ -406,7 +405,7 @@ static int fsmc_setup_data_interface(struct mtd_info *mtd,
 	if (ret)
 		return ret;
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	fsmc_nand_setup(host, &tims);
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 61ca020c52729..a764d5ca7536b 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -152,9 +152,8 @@ struct mxc_nand_devtype_data {
 	void (*select_chip)(struct mtd_info *mtd, int chip);
 	int (*correct_data)(struct mtd_info *mtd, u_char *dat,
 			u_char *read_ecc, u_char *calc_ecc);
-	int (*setup_data_interface)(struct mtd_info *mtd,
-				    const struct nand_data_interface *conf,
-				    bool check_only);
+	int (*setup_data_interface)(struct mtd_info *mtd, int csline,
+				    const struct nand_data_interface *conf);
 
 	/*
 	 * On i.MX21 the CONFIG2:INT bit cannot be read if interrupts are masked
@@ -1015,9 +1014,8 @@ static void preset_v1(struct mtd_info *mtd)
 	writew(0x4, NFC_V1_V2_WRPROT);
 }
 
-static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
@@ -1075,7 +1073,7 @@ static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd,
 		return -EINVAL;
 	}
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	ret = clk_set_rate(host->clk, rate);
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 79d98c9fa8a75..3317acf0ce04d 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1042,12 +1042,13 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 /**
  * nand_reset_data_interface - Reset data interface and timings
  * @chip: The NAND chip
+ * @chipnr: Internal die id
  *
  * Reset the Data interface and timings to ONFI mode 0.
  *
  * Returns 0 for success or negative error code otherwise.
  */
-static int nand_reset_data_interface(struct nand_chip *chip)
+static int nand_reset_data_interface(struct nand_chip *chip, int chipnr)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_data_interface *conf;
@@ -1071,7 +1072,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
 	 */
 
 	conf = nand_get_default_data_interface();
-	ret = chip->setup_data_interface(mtd, conf, false);
+	ret = chip->setup_data_interface(mtd, chipnr, conf);
 	if (ret)
 		pr_err("Failed to configure data interface to SDR timing mode 0\n");
 
@@ -1081,6 +1082,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
 /**
  * nand_setup_data_interface - Setup the best data interface and timings
  * @chip: The NAND chip
+ * @chipnr: Internal die id
  *
  * Find and configure the best data interface and NAND timings supported by
  * the chip and the driver.
@@ -1090,7 +1092,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
  *
  * Returns 0 for success or negative error code otherwise.
  */
-static int nand_setup_data_interface(struct nand_chip *chip)
+static int nand_setup_data_interface(struct nand_chip *chip, int chipnr)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
@@ -1114,7 +1116,7 @@ static int nand_setup_data_interface(struct nand_chip *chip)
 			goto err;
 	}
 
-	ret = chip->setup_data_interface(mtd, chip->data_interface, false);
+	ret = chip->setup_data_interface(mtd, chipnr, chip->data_interface);
 err:
 	return ret;
 }
@@ -1165,8 +1167,10 @@ static int nand_init_data_interface(struct nand_chip *chip)
 		if (ret)
 			continue;
 
-		ret = chip->setup_data_interface(mtd, chip->data_interface,
-						 true);
+		/* Pass -1 to only */
+		ret = chip->setup_data_interface(mtd,
+						 NAND_DATA_IFACE_CHECK_ONLY,
+						 chip->data_interface);
 		if (!ret) {
 			chip->onfi_timing_mode_default = mode;
 			break;
@@ -1193,7 +1197,7 @@ int nand_reset(struct nand_chip *chip, int chipnr)
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
-	ret = nand_reset_data_interface(chip);
+	ret = nand_reset_data_interface(chip, chipnr);
 	if (ret)
 		return ret;
 
@@ -1206,7 +1210,7 @@ int nand_reset(struct nand_chip *chip, int chipnr)
 	chip->select_chip(mtd, -1);
 
 	chip->select_chip(mtd, chipnr);
-	ret = nand_setup_data_interface(chip);
+	ret = nand_setup_data_interface(chip, chipnr);
 	chip->select_chip(mtd, -1);
 	if (ret)
 		return ret;
@@ -4396,7 +4400,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	 * For the other dies, nand_reset() will automatically switch to the
 	 * best mode for us.
 	 */
-	ret = nand_setup_data_interface(chip);
+	ret = nand_setup_data_interface(chip, 0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index f0b030d44f71f..9e0c849607b9c 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -812,9 +812,8 @@ static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info,
 	return -ENODEV;
 }
 
-static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	struct s3c2410_platform_nand *pdata = info->platform;
diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 118a26fff3685..9c2dbe352c439 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -1592,9 +1592,8 @@ static int _sunxi_nand_lookup_timing(const s32 *lut, int lut_size, u32 duration,
 #define sunxi_nand_lookup_timing(l, p, c) \
 			_sunxi_nand_lookup_timing(l, ARRAY_SIZE(l), p, c)
 
-static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nand_chip *chip = to_sunxi_nand(nand);
@@ -1707,7 +1706,7 @@ static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd,
 		return tRHW;
 	}
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	/*
diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 05b6e10652033..85e0d97593e84 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c
@@ -476,9 +476,8 @@ static u32 to_ticks(int kHz, int ps)
 	return DIV_ROUND_UP_ULL((u64)kHz * ps, NSEC_PER_SEC);
 }
 
-static int tango_set_timings(struct mtd_info *mtd,
-			     const struct nand_data_interface *conf,
-			     bool check_only)
+static int tango_set_timings(struct mtd_info *mtd, int csline,
+			     const struct nand_data_interface *conf)
 {
 	const struct nand_sdr_timings *sdr = nand_get_sdr_timings(conf);
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -490,7 +489,7 @@ static int tango_set_timings(struct mtd_info *mtd,
 	if (IS_ERR(sdr))
 		return PTR_ERR(sdr);
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	Trdy = to_ticks(kHz, sdr->tCEA_max - sdr->tREA_max);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 893d0ce08030d..9de3686e738c1 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -107,6 +107,8 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 #define NAND_STATUS_READY	0x40
 #define NAND_STATUS_WP		0x80
 
+#define NAND_DATA_IFACE_CHECK_ONLY	-1
+
 /*
  * Constants for ECC_MODES
  */
@@ -818,7 +820,10 @@ struct nand_manufacturer_ops {
  * @read_retries:	[INTERN] the number of read retry modes supported
  * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
  * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
- * @setup_data_interface: [OPTIONAL] setup the data interface and timing
+ * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If
+ *			  chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this
+ *			  means the configuration should not be applied but
+ *			  only checked.
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
  *			lookup.
@@ -862,9 +867,8 @@ struct nand_chip {
 	int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
 	int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode);
-	int (*setup_data_interface)(struct mtd_info *mtd,
-				    const struct nand_data_interface *conf,
-				    bool check_only);
+	int (*setup_data_interface)(struct mtd_info *mtd, int chipnr,
+				    const struct nand_data_interface *conf);
 
 
 	int chip_delay;
-- 
GitLab


From f9ce2eddf1769a2cf93e87332c55063537e69119 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 16 Mar 2017 09:35:59 +0100
Subject: [PATCH 0149/1958] mtd: nand: atmel: Add ->setup_data_interface()
 hooks

The NAND controller IP can adapt the NAND controller timings dynamically.
Implement the ->setup_data_interface() hook to support this feature.

Note that it's not supported on at91rm9200 because this SoC has a
completely different SMC block, which is not supported yet.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/Kconfig                 |   1 +
 drivers/mtd/nand/atmel/nand-controller.c | 329 ++++++++++++++++++++++-
 2 files changed, 328 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 0bd2319d3035c..dbfa72d61d5aa 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -308,6 +308,7 @@ config MTD_NAND_CS553X
 config MTD_NAND_ATMEL
 	tristate "Support for NAND Flash / SmartMedia on AT91"
 	depends on ARCH_AT91
+	select MFD_ATMEL_SMC
 	help
 	  Enables support for NAND Flash / Smart Media Card interface
 	  on Atmel AT91 processors.
diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index 3b24468961473..ee32909f7943d 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -57,6 +57,7 @@
 #include <linux/interrupt.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/atmel-matrix.h>
+#include <linux/mfd/syscon/atmel-smc.h>
 #include <linux/module.h>
 #include <linux/mtd/nand.h>
 #include <linux/of_address.h>
@@ -151,6 +152,8 @@ struct atmel_nand_cs {
 		void __iomem *virt;
 		dma_addr_t dma;
 	} io;
+
+	struct atmel_smc_cs_conf smcconf;
 };
 
 struct atmel_nand {
@@ -196,6 +199,8 @@ struct atmel_nand_controller_ops {
 	void (*nand_init)(struct atmel_nand_controller *nc,
 			  struct atmel_nand *nand);
 	int (*ecc_init)(struct atmel_nand *nand);
+	int (*setup_data_interface)(struct atmel_nand *nand, int csline,
+				    const struct nand_data_interface *conf);
 };
 
 struct atmel_nand_controller_caps {
@@ -1175,6 +1180,295 @@ static int atmel_hsmc_nand_ecc_init(struct atmel_nand *nand)
 	return 0;
 }
 
+static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand,
+					const struct nand_data_interface *conf,
+					struct atmel_smc_cs_conf *smcconf)
+{
+	u32 ncycles, totalcycles, timeps, mckperiodps;
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(nand->base.controller);
+
+	/* DDR interface not supported. */
+	if (conf->type != NAND_SDR_IFACE)
+		return -ENOTSUPP;
+
+	/*
+	 * tRC < 30ns implies EDO mode. This controller does not support this
+	 * mode.
+	 */
+	if (conf->timings.sdr.tRC_min < 30)
+		return -ENOTSUPP;
+
+	atmel_smc_cs_conf_init(smcconf);
+
+	mckperiodps = NSEC_PER_SEC / clk_get_rate(nc->mck);
+	mckperiodps *= 1000;
+
+	/*
+	 * Set write pulse timing. This one is easy to extract:
+	 *
+	 * NWE_PULSE = tWP
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tWP_min, mckperiodps);
+	totalcycles = ncycles;
+	ret = atmel_smc_cs_conf_set_pulse(smcconf, ATMEL_SMC_NWE_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * The write setup timing depends on the operation done on the NAND.
+	 * All operations goes through the same data bus, but the operation
+	 * type depends on the address we are writing to (ALE/CLE address
+	 * lines).
+	 * Since we have no way to differentiate the different operations at
+	 * the SMC level, we must consider the worst case (the biggest setup
+	 * time among all operation types):
+	 *
+	 * NWE_SETUP = max(tCLS, tCS, tALS, tDS) - NWE_PULSE
+	 */
+	timeps = max3(conf->timings.sdr.tCLS_min, conf->timings.sdr.tCS_min,
+		      conf->timings.sdr.tALS_min);
+	timeps = max(timeps, conf->timings.sdr.tDS_min);
+	ncycles = DIV_ROUND_UP(timeps, mckperiodps);
+	ncycles = ncycles > totalcycles ? ncycles - totalcycles : 0;
+	totalcycles += ncycles;
+	ret = atmel_smc_cs_conf_set_setup(smcconf, ATMEL_SMC_NWE_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * As for the write setup timing, the write hold timing depends on the
+	 * operation done on the NAND:
+	 *
+	 * NWE_HOLD = max(tCLH, tCH, tALH, tDH, tWH)
+	 */
+	timeps = max3(conf->timings.sdr.tCLH_min, conf->timings.sdr.tCH_min,
+		      conf->timings.sdr.tALH_min);
+	timeps = max3(timeps, conf->timings.sdr.tDH_min,
+		      conf->timings.sdr.tWH_min);
+	ncycles = DIV_ROUND_UP(timeps, mckperiodps);
+	totalcycles += ncycles;
+
+	/*
+	 * The write cycle timing is directly matching tWC, but is also
+	 * dependent on the other timings on the setup and hold timings we
+	 * calculated earlier, which gives:
+	 *
+	 * NWE_CYCLE = max(tWC, NWE_SETUP + NWE_PULSE + NWE_HOLD)
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tWC_min, mckperiodps);
+	ncycles = max(totalcycles, ncycles);
+	ret = atmel_smc_cs_conf_set_cycle(smcconf, ATMEL_SMC_NWE_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * We don't want the CS line to be toggled between each byte/word
+	 * transfer to the NAND. The only way to guarantee that is to have the
+	 * NCS_{WR,RD}_{SETUP,HOLD} timings set to 0, which in turn means:
+	 *
+	 * NCS_WR_PULSE = NWE_CYCLE
+	 */
+	ret = atmel_smc_cs_conf_set_pulse(smcconf, ATMEL_SMC_NCS_WR_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * As for the write setup timing, the read hold timing depends on the
+	 * operation done on the NAND:
+	 *
+	 * NRD_HOLD = max(tREH, tRHOH)
+	 */
+	timeps = max(conf->timings.sdr.tREH_min, conf->timings.sdr.tRHOH_min);
+	ncycles = DIV_ROUND_UP(timeps, mckperiodps);
+	totalcycles = ncycles;
+
+	/*
+	 * TDF = tRHZ - NRD_HOLD
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tRHZ_max, mckperiodps);
+	ncycles -= totalcycles;
+
+	/*
+	 * In ONFI 4.0 specs, tRHZ has been increased to support EDO NANDs and
+	 * we might end up with a config that does not fit in the TDF field.
+	 * Just take the max value in this case and hope that the NAND is more
+	 * tolerant than advertised.
+	 */
+	if (ncycles > ATMEL_SMC_MODE_TDF_MAX)
+		ncycles = ATMEL_SMC_MODE_TDF_MAX;
+	else if (ncycles < ATMEL_SMC_MODE_TDF_MIN)
+		ncycles = ATMEL_SMC_MODE_TDF_MIN;
+
+	smcconf->mode |= ATMEL_SMC_MODE_TDF(ncycles) |
+			 ATMEL_SMC_MODE_TDFMODE_OPTIMIZED;
+
+	/*
+	 * Read pulse timing directly matches tRP:
+	 *
+	 * NRD_PULSE = tRP
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tRP_min, mckperiodps);
+	totalcycles += ncycles;
+	ret = atmel_smc_cs_conf_set_pulse(smcconf, ATMEL_SMC_NRD_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * The write cycle timing is directly matching tWC, but is also
+	 * dependent on the setup and hold timings we calculated earlier,
+	 * which gives:
+	 *
+	 * NRD_CYCLE = max(tRC, NRD_PULSE + NRD_HOLD)
+	 *
+	 * NRD_SETUP is always 0.
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tRC_min, mckperiodps);
+	ncycles = max(totalcycles, ncycles);
+	ret = atmel_smc_cs_conf_set_cycle(smcconf, ATMEL_SMC_NRD_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * We don't want the CS line to be toggled between each byte/word
+	 * transfer from the NAND. The only way to guarantee that is to have
+	 * the NCS_{WR,RD}_{SETUP,HOLD} timings set to 0, which in turn means:
+	 *
+	 * NCS_RD_PULSE = NRD_CYCLE
+	 */
+	ret = atmel_smc_cs_conf_set_pulse(smcconf, ATMEL_SMC_NCS_RD_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/* Txxx timings are directly matching tXXX ones. */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tCLR_min, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TCLR_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tADL_min, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TADL_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tAR_min, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TAR_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tRR_min, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TRR_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tWB_max, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TWB_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	/* Attach the CS line to the NFC logic. */
+	smcconf->timings |= ATMEL_HSMC_TIMINGS_NFSEL;
+
+	/* Set the appropriate data bus width. */
+	if (nand->base.options & NAND_BUSWIDTH_16)
+		smcconf->mode |= ATMEL_SMC_MODE_DBW_16;
+
+	/* Operate in NRD/NWE READ/WRITEMODE. */
+	smcconf->mode |= ATMEL_SMC_MODE_READMODE_NRD |
+			 ATMEL_SMC_MODE_WRITEMODE_NWE;
+
+	return 0;
+}
+
+static int atmel_smc_nand_setup_data_interface(struct atmel_nand *nand,
+					int csline,
+					const struct nand_data_interface *conf)
+{
+	struct atmel_nand_controller *nc;
+	struct atmel_smc_cs_conf smcconf;
+	struct atmel_nand_cs *cs;
+	int ret;
+
+	nc = to_nand_controller(nand->base.controller);
+
+	ret = atmel_smc_nand_prepare_smcconf(nand, conf, &smcconf);
+	if (ret)
+		return ret;
+
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	cs = &nand->cs[csline];
+	cs->smcconf = smcconf;
+	atmel_smc_cs_conf_apply(nc->smc, cs->id, &cs->smcconf);
+
+	return 0;
+}
+
+static int atmel_hsmc_nand_setup_data_interface(struct atmel_nand *nand,
+					int csline,
+					const struct nand_data_interface *conf)
+{
+	struct atmel_nand_controller *nc;
+	struct atmel_smc_cs_conf smcconf;
+	struct atmel_nand_cs *cs;
+	int ret;
+
+	nc = to_nand_controller(nand->base.controller);
+
+	ret = atmel_smc_nand_prepare_smcconf(nand, conf, &smcconf);
+	if (ret)
+		return ret;
+
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	cs = &nand->cs[csline];
+	cs->smcconf = smcconf;
+
+	if (cs->rb.type == ATMEL_NAND_NATIVE_RB)
+		cs->smcconf.timings |= ATMEL_HSMC_TIMINGS_RBNSEL(cs->rb.id);
+
+	atmel_hsmc_cs_conf_apply(nc->smc, cs->id, &cs->smcconf);
+
+	return 0;
+}
+
+static int atmel_nand_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+
+	nc = to_nand_controller(nand->base.controller);
+
+	if (csline >= nand->numcs ||
+	    (csline < 0 && csline != NAND_DATA_IFACE_CHECK_ONLY))
+		return -EINVAL;
+
+	return nc->caps->ops->setup_data_interface(nand, csline, conf);
+}
+
 static void atmel_nand_init(struct atmel_nand_controller *nc,
 			    struct atmel_nand *nand)
 {
@@ -1192,6 +1486,9 @@ static void atmel_nand_init(struct atmel_nand_controller *nc,
 	chip->write_buf = atmel_nand_write_buf;
 	chip->select_chip = atmel_nand_select_chip;
 
+	if (nc->mck && nc->caps->ops->setup_data_interface)
+		chip->setup_data_interface = atmel_nand_setup_data_interface;
+
 	/* Some NANDs require a longer delay than the default one (20us). */
 	chip->chip_delay = 40;
 
@@ -1677,6 +1974,12 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
 	if (nc->caps->legacy_of_bindings)
 		return 0;
 
+	nc->mck = of_clk_get(dev->parent->of_node, 0);
+	if (IS_ERR(nc->mck)) {
+		dev_err(dev, "Failed to retrieve MCK clk\n");
+		return PTR_ERR(nc->mck);
+	}
+
 	np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0);
 	if (!np) {
 		dev_err(dev, "Missing or invalid atmel,smc property\n");
@@ -1983,6 +2286,7 @@ static const struct atmel_nand_controller_ops atmel_hsmc_nc_ops = {
 	.remove = atmel_hsmc_nand_controller_remove,
 	.ecc_init = atmel_hsmc_nand_ecc_init,
 	.nand_init = atmel_hsmc_nand_init,
+	.setup_data_interface = atmel_hsmc_nand_setup_data_interface,
 };
 
 static const struct atmel_nand_controller_caps atmel_sama5_nc_caps = {
@@ -2037,7 +2341,14 @@ atmel_smc_nand_controller_remove(struct atmel_nand_controller *nc)
 	return 0;
 }
 
-static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
+/*
+ * The SMC reg layout of at91rm9200 is completely different which prevents us
+ * from re-using atmel_smc_nand_setup_data_interface() for the
+ * ->setup_data_interface() hook.
+ * At this point, there's no support for the at91rm9200 SMC IP, so we leave
+ * ->setup_data_interface() unassigned.
+ */
+static const struct atmel_nand_controller_ops at91rm9200_nc_ops = {
 	.probe = atmel_smc_nand_controller_probe,
 	.remove = atmel_smc_nand_controller_remove,
 	.ecc_init = atmel_nand_ecc_init,
@@ -2045,6 +2356,20 @@ static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
 };
 
 static const struct atmel_nand_controller_caps atmel_rm9200_nc_caps = {
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
+	.ops = &at91rm9200_nc_ops,
+};
+
+static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
+	.probe = atmel_smc_nand_controller_probe,
+	.remove = atmel_smc_nand_controller_remove,
+	.ecc_init = atmel_nand_ecc_init,
+	.nand_init = atmel_smc_nand_init,
+	.setup_data_interface = atmel_smc_nand_setup_data_interface,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9260_nc_caps = {
 	.ale_offs = BIT(21),
 	.cle_offs = BIT(22),
 	.ops = &atmel_smc_nc_ops,
@@ -2093,7 +2418,7 @@ static const struct of_device_id atmel_nand_controller_of_ids[] = {
 	},
 	{
 		.compatible = "atmel,at91sam9260-nand-controller",
-		.data = &atmel_rm9200_nc_caps,
+		.data = &atmel_sam9260_nc_caps,
 	},
 	{
 		.compatible = "atmel,at91sam9261-nand-controller",
-- 
GitLab


From 6e532afaca8ed41107fa0a551d7b2db99a869540 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 16 Mar 2017 09:36:00 +0100
Subject: [PATCH 0150/1958] mtd: nand: atmel: Add PM ops

Provide a ->resume() hook to make sure the NAND timings are correctly
restored by resetting all chips connected to the controller.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/atmel/nand-controller.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index ee32909f7943d..846c555e470b4 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -2506,6 +2506,24 @@ static int atmel_nand_controller_remove(struct platform_device *pdev)
 	return nc->caps->ops->remove(nc);
 }
 
+static int atmel_nand_controller_resume(struct device *dev)
+{
+	struct atmel_nand_controller *nc = dev_get_drvdata(dev);
+	struct atmel_nand *nand;
+
+	list_for_each_entry(nand, &nc->chips, node) {
+		int i;
+
+		for (i = 0; i < nand->numcs; i++)
+			nand_reset(&nand->base, i);
+	}
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(atmel_nand_controller_pm_ops, NULL,
+			 atmel_nand_controller_resume);
+
 static struct platform_driver atmel_nand_controller_driver = {
 	.driver = {
 		.name = "atmel-nand-controller",
-- 
GitLab


From 0b4773fd1649e0d418275557723a7ef54f769dc9 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 16 May 2017 00:17:41 +0200
Subject: [PATCH 0151/1958] mtd: nand: Drop unused cached programming support

Cached programming is always skipped, so drop the associated code until
we decide to really support it.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 38 ++++++++++++------------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 3317acf0ce04d..561f70e05c883 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2724,7 +2724,7 @@ static int nand_write_page_syndrome(struct mtd_info *mtd,
  */
 static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 		uint32_t offset, int data_len, const uint8_t *buf,
-		int oob_required, int page, int cached, int raw)
+		int oob_required, int page, int raw)
 {
 	int status, subpage;
 
@@ -2750,31 +2750,19 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (status < 0)
 		return status;
 
+	if (nand_standard_page_accessors(&chip->ecc))
+		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
 	/*
-	 * Cached progamming disabled for now. Not sure if it's worth the
-	 * trouble. The speed gain is not very impressive. (2.3->2.6Mib/s).
+	 * See if operation failed and additional status checks are
+	 * available.
 	 */
-	cached = 0;
-
-	if (!cached || !NAND_HAS_CACHEPROG(chip)) {
-
-		if (nand_standard_page_accessors(&chip->ecc))
-			chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-		status = chip->waitfunc(mtd, chip);
-		/*
-		 * See if operation failed and additional status checks are
-		 * available.
-		 */
-		if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-			status = chip->errstat(mtd, chip, FL_WRITING, status,
-					       page);
+	if ((status & NAND_STATUS_FAIL) && (chip->errstat))
+		status = chip->errstat(mtd, chip, FL_WRITING, status,
+				       page);
 
-		if (status & NAND_STATUS_FAIL)
-			return -EIO;
-	} else {
-		chip->cmdfunc(mtd, NAND_CMD_CACHEDPROG, -1, -1);
-		status = chip->waitfunc(mtd, chip);
-	}
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
 
 	return 0;
 }
@@ -2881,7 +2869,6 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 
 	while (1) {
 		int bytes = mtd->writesize;
-		int cached = writelen > bytes && page != blockmask;
 		uint8_t *wbuf = buf;
 		int use_bufpoi;
 		int part_pagewr = (column || writelen < mtd->writesize);
@@ -2899,7 +2886,6 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		if (use_bufpoi) {
 			pr_debug("%s: using write bounce buffer for buf@%p\n",
 					 __func__, buf);
-			cached = 0;
 			if (part_pagewr)
 				bytes = min_t(int, bytes - column, writelen);
 			chip->pagebuf = -1;
@@ -2918,7 +2904,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		}
 
 		ret = nand_write_page(mtd, chip, column, bytes, wbuf,
-				      oob_required, page, cached,
+				      oob_required, page,
 				      (ops->mode == MTD_OPS_RAW));
 		if (ret)
 			break;
-- 
GitLab


From 7d135bcced20be2b50128432c5426a7278ec4f6d Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sat, 6 May 2017 18:03:33 +0200
Subject: [PATCH 0152/1958] mtd: nand: Drop the ->errstat() hook

The ->errstat() hook is no longer implemented NAND controller drivers.
Get rid of it before someone starts abusing it.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 16 ----------------
 include/linux/mtd/nand.h     |  5 -----
 2 files changed, 21 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 561f70e05c883..df613125795b5 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2753,14 +2753,6 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (nand_standard_page_accessors(&chip->ecc))
 		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 	status = chip->waitfunc(mtd, chip);
-	/*
-	 * See if operation failed and additional status checks are
-	 * available.
-	 */
-	if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-		status = chip->errstat(mtd, chip, FL_WRITING, status,
-				       page);
-
 	if (status & NAND_STATUS_FAIL)
 		return -EIO;
 
@@ -3220,14 +3212,6 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 
 		status = chip->erase(mtd, page & chip->pagemask);
 
-		/*
-		 * See if operation failed and additional status checks are
-		 * available
-		 */
-		if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-			status = chip->errstat(mtd, chip, FL_ERASING,
-					       status, page);
-
 		/* See if block erase succeeded */
 		if (status & NAND_STATUS_FAIL) {
 			pr_debug("%s: failed erase, page 0x%08x\n",
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9de3686e738c1..8b3607bde1b5d 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -834,9 +834,6 @@ struct nand_manufacturer_ops {
  *			structure which is shared among multiple independent
  *			devices.
  * @priv:		[OPTIONAL] pointer to private chip data
- * @errstat:		[OPTIONAL] hardware specific function to perform
- *			additional error status checks (determine if errors are
- *			correctable).
  * @manufacturer:	[INTERN] Contains manufacturer information
  */
 
@@ -860,8 +857,6 @@ struct nand_chip {
 	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
 	int (*erase)(struct mtd_info *mtd, int page);
 	int (*scan_bbt)(struct mtd_info *mtd);
-	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
-			int status, int page);
 	int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
 	int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
GitLab


From 2de85e73360104d3582363dcebdcdd7dc20431be Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 16 May 2017 00:23:45 +0200
Subject: [PATCH 0153/1958] mtd: nand: sunxi: Actually use DMA for subpage
 reads

ecc->read_subpage is set to sunxi_nfc_hw_ecc_read_subpage_dma when
->dmac != NULL, but is then unconditionally overwritten in the common
init path.

Remove this extra assignment to allow usage of the DMA operation when
possible.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/sunxi_nand.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 9c2dbe352c439..9a46d1db9211f 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -1921,7 +1921,6 @@ static int sunxi_nand_hw_ecc_ctrl_init(struct mtd_info *mtd,
 	ecc->write_subpage = sunxi_nfc_hw_ecc_write_subpage;
 	ecc->read_oob_raw = nand_read_oob_std;
 	ecc->write_oob_raw = nand_write_oob_std;
-	ecc->read_subpage = sunxi_nfc_hw_ecc_read_subpage;
 
 	return 0;
 }
-- 
GitLab


From df5586d7bf6cce421ed48e9b2a88a8bdaa4fd9d0 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 16 May 2017 00:23:46 +0200
Subject: [PATCH 0154/1958] mtd: nand: sunxi: Remove unneeded
 ->cmdfunc(NAND_CMD_READ0, 0, page)

The core already sends the NAND_CMD_READ0 for us. Duplicating this call
in the driver is useless and introduces a perf penalty.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/sunxi_nand.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 9a46d1db9211f..d0b6f8f9f297a 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -1301,7 +1301,6 @@ static int sunxi_nfc_hw_ecc_read_subpage(struct mtd_info *mtd,
 
 	sunxi_nfc_hw_ecc_enable(mtd);
 
-	chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page);
 	for (i = data_offs / ecc->size;
 	     i < DIV_ROUND_UP(data_offs + readlen, ecc->size); i++) {
 		int data_off = i * ecc->size;
-- 
GitLab


From a186493237a9d8559997c2f97c33c4716d602fd2 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 17 May 2017 10:47:50 +0200
Subject: [PATCH 0155/1958] mtd: nand: tango: Fix incorrect use of SEQIN
 command

SEQIN is supposed to be used when one wants to start programming a page.
What we want here is just to change the column within the page, which is
done with the RNDIN command.

Fixes: 6956e2385a16 ("mtd: nand: add tango NAND flash controller support")
Cc: stable@vger.kernel.org
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
---
 drivers/mtd/nand/tango_nand.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 85e0d97593e84..201979f0c1bdc 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c
@@ -332,7 +332,7 @@ static void aux_write(struct nand_chip *chip, const u8 **buf, int len, int *pos)
 
 	if (!*buf) {
 		/* skip over "len" bytes */
-		chip->cmdfunc(mtd, NAND_CMD_SEQIN, *pos, -1);
+		chip->cmdfunc(mtd, NAND_CMD_RNDIN, *pos, -1);
 	} else {
 		tango_write_buf(mtd, *buf, len);
 		*buf += len;
-- 
GitLab


From 41145649f4acb30249b636b945053db50c9331c5 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 16 May 2017 18:27:49 +0200
Subject: [PATCH 0156/1958] mtd: nand: Wait for PAGEPROG to finish in drivers
 setting NAND_ECC_CUSTOM_PAGE_ACCESS

Drivers setting NAND_ECC_CUSTOM_PAGE_ACCESS are supposed to handle the
full read/write page sequence, and waiting for a page to actually be
programmed is part of this write-page sequence.
This is also what is done in ->write_oob_xxx() hooks, so let's do that in
->write_page_xxx() as well to make it consistent.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/atmel/nand-controller.c |  6 +++++-
 drivers/mtd/nand/nand_base.c             | 10 ++++++----
 drivers/mtd/nand/nand_micron.c           | 10 ++++++++--
 drivers/mtd/nand/tango_nand.c            | 13 ++++++++++++-
 4 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index 846c555e470b4..6055831c953ff 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -917,7 +917,7 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_hsmc_nand_controller *nc;
-	int ret;
+	int ret, status;
 
 	nc = to_hsmc_nand_controller(chip->controller);
 
@@ -959,6 +959,10 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
 		dev_err(nc->base.dev, "Failed to program NAND page (err = %d)\n",
 			ret);
 
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
 	return ret;
 }
 
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index df613125795b5..4b4fea4a3d1f3 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2750,11 +2750,13 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (status < 0)
 		return status;
 
-	if (nand_standard_page_accessors(&chip->ecc))
+	if (nand_standard_page_accessors(&chip->ecc)) {
 		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-	status = chip->waitfunc(mtd, chip);
-	if (status & NAND_STATUS_FAIL)
-		return -EIO;
+
+		status = chip->waitfunc(mtd, chip);
+		if (status & NAND_STATUS_FAIL)
+			return -EIO;
+	}
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/nand_micron.c b/drivers/mtd/nand/nand_micron.c
index 9993f8ead1e2f..c30ab60f8e1bb 100644
--- a/drivers/mtd/nand/nand_micron.c
+++ b/drivers/mtd/nand/nand_micron.c
@@ -151,15 +151,18 @@ micron_nand_write_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
 				  const uint8_t *buf, int oob_required,
 				  int page)
 {
+	int status;
+
 	micron_nand_on_die_ecc_setup(chip, true);
 
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
 	nand_write_page_raw(mtd, chip, buf, oob_required, page);
 	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
 
 	micron_nand_on_die_ecc_setup(chip, false);
 
-	return 0;
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
 }
 
 static int
@@ -180,11 +183,14 @@ micron_nand_write_page_raw_on_die_ecc(struct mtd_info *mtd,
 				      const uint8_t *buf, int oob_required,
 				      int page)
 {
+	int status;
+
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
 	nand_write_page_raw(mtd, chip, buf, oob_required, page);
 	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
 
-	return 0;
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
 }
 
 enum {
diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 201979f0c1bdc..dddc4fa7beb62 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c
@@ -295,7 +295,7 @@ static int tango_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    const u8 *buf, int oob_required, int page)
 {
 	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
-	int err, len = mtd->writesize;
+	int err, status, len = mtd->writesize;
 
 	/* Calling tango_write_oob() would send PAGEPROG twice */
 	if (oob_required)
@@ -306,6 +306,10 @@ static int tango_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (err)
 		return err;
 
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
 	return 0;
 }
 
@@ -423,9 +427,16 @@ static int tango_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 static int tango_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 				const u8 *buf, int oob_required, int page)
 {
+	int status;
+
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page);
 	raw_write(chip, buf, chip->oob_poi);
 	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
 	return 0;
 }
 
-- 
GitLab


From 2165c4a1f71a04c7ea6493f18740a3afd4e47e4f Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 16 May 2017 18:35:45 +0200
Subject: [PATCH 0157/1958] mtd: nand: Support 'EXIT GET STATUS' command in
 nand_command[_lp]()

READ0 is sometimes used to exit GET STATUS mode. When this is the case
no address cycles are requested, and we can use this information to
detect that READSTART should not be issued after READ0 or that we
shouldn't wait for the chip to be ready.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 4b4fea4a3d1f3..58a97e30b3f4e 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -753,6 +753,16 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 		return;
 
 		/* This applies to read commands */
+	case NAND_CMD_READ0:
+		/*
+		 * READ0 is sometimes used to exit GET STATUS mode. When this
+		 * is the case no address cycles are requested, and we can use
+		 * this information to detect that we should not wait for the
+		 * device to be ready.
+		 */
+		if (column == -1 && page_addr == -1)
+			return;
+
 	default:
 		/*
 		 * If we don't have access to the busy pin, we apply the given
@@ -887,6 +897,15 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 		return;
 
 	case NAND_CMD_READ0:
+		/*
+		 * READ0 is sometimes used to exit GET STATUS mode. When this
+		 * is the case no address cycles are requested, and we can use
+		 * this information to detect that READSTART should not be
+		 * issued.
+		 */
+		if (column == -1 && page_addr == -1)
+			return;
+
 		chip->cmd_ctrl(mtd, NAND_CMD_READSTART,
 			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
 		chip->cmd_ctrl(mtd, NAND_CMD_NONE,
-- 
GitLab


From 79e0348c4e24fd1affdcf055e0269755580e0fcc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 25 May 2017 13:50:20 +0900
Subject: [PATCH 0158/1958] mtd: nand: check ecc->total sanity in
 nand_scan_tail

Drivers are supposed to set correct ecc->{size,strength,bytes} before
calling nand_scan_tail(), but it does not complain about ecc->total
bigger than oobsize.

In this case, chip->scan_bbt() crashes due to memory corruption, but
it is hard to debug.  It would be kind to fail it earlier with a clear
message.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 58a97e30b3f4e..2404bb046b69a 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4789,6 +4789,11 @@ int nand_scan_tail(struct mtd_info *mtd)
 		goto err_free;
 	}
 	ecc->total = ecc->steps * ecc->bytes;
+	if (ecc->total > mtd->oobsize) {
+		WARN(1, "Total number of ECC bytes exceeded oobsize\n");
+		ret = -EINVAL;
+		goto err_free;
+	}
 
 	/*
 	 * The number of bytes available for a client to place data into
-- 
GitLab


From 05b6c2313e91ac65915bde5e55bc6f194bfe22f3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 31 May 2017 10:19:26 +0200
Subject: [PATCH 0159/1958] mtd: nand: atmel: mark resume function
 __maybe_unused

The newly added suspend/resume support causes a harmless warning:

drivers/mtd/nand/atmel/nand-controller.c:2513:12: error: 'atmel_nand_controller_resume' defined but not used [-Werror=unused-function]

This shuts up the warning with a __maybe_unused annotation.

Fixes: b107007a7114 ("mtd: nand: atmel: Add PM ops")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/atmel/nand-controller.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index 6055831c953ff..d24e67b951671 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -2510,7 +2510,7 @@ static int atmel_nand_controller_remove(struct platform_device *pdev)
 	return nc->caps->ops->remove(nc);
 }
 
-static int atmel_nand_controller_resume(struct device *dev)
+static __maybe_unused int atmel_nand_controller_resume(struct device *dev)
 {
 	struct atmel_nand_controller *nc = dev_get_drvdata(dev);
 	struct atmel_nand *nand;
-- 
GitLab


From 2968698ba03187b09e84fbc709f0b5d19ddd94ff Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Wed, 31 May 2017 16:26:38 +0800
Subject: [PATCH 0160/1958] mtd: nand: mediatek: update DT bindings

Add MT2712 NAND Flash Controller dt bindings documentation.

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 Documentation/devicetree/bindings/mtd/mtk-nand.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/mtk-nand.txt b/Documentation/devicetree/bindings/mtd/mtk-nand.txt
index 069c192ed5c2f..dbf9e054c11c0 100644
--- a/Documentation/devicetree/bindings/mtd/mtk-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/mtk-nand.txt
@@ -12,7 +12,8 @@ tree nodes.
 
 The first part of NFC is NAND Controller Interface (NFI) HW.
 Required NFI properties:
-- compatible:			Should be "mediatek,mtxxxx-nfc".
+- compatible:			Should be one of "mediatek,mt2701-nfc",
+				"mediatek,mt2712-nfc".
 - reg:				Base physical address and size of NFI.
 - interrupts:			Interrupts of NFI.
 - clocks:			NFI required clocks.
@@ -141,7 +142,7 @@ Example:
 ==============
 
 Required BCH properties:
-- compatible:	Should be "mediatek,mtxxxx-ecc".
+- compatible:	Should be one of "mediatek,mt2701-ecc", "mediatek,mt2712-ecc".
 - reg:		Base physical address and size of ECC.
 - interrupts:	Interrupts of ECC.
 - clocks:	ECC required clocks.
-- 
GitLab


From 582212ceb9bcd9ef0808497a8e066c4eef442e19 Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Wed, 31 May 2017 16:26:39 +0800
Subject: [PATCH 0161/1958] mtd: nand: mediatek: refine register NFI_PAGEFMT
 setting

The register NFI_PAGEFMT is always 32 bits length, so it is better to
do register program using writel() compare with writew().

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_nand.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c
index 6c517c682939d..c28f4b7025bfc 100644
--- a/drivers/mtd/nand/mtk_nand.c
+++ b/drivers/mtd/nand/mtk_nand.c
@@ -408,7 +408,7 @@ static int mtk_nfc_hw_runtime_config(struct mtd_info *mtd)
 
 	fmt |= mtk_nand->fdm.reg_size << PAGEFMT_FDM_SHIFT;
 	fmt |= mtk_nand->fdm.ecc_size << PAGEFMT_FDM_ECC_SHIFT;
-	nfi_writew(nfc, fmt, NFI_PAGEFMT);
+	nfi_writel(nfc, fmt, NFI_PAGEFMT);
 
 	nfc->ecc_cfg.strength = chip->ecc.strength;
 	nfc->ecc_cfg.len = chip->ecc.size + mtk_nand->fdm.ecc_size;
@@ -1009,7 +1009,7 @@ static inline void mtk_nfc_hw_init(struct mtk_nfc *nfc)
 	 * 0  : poll the status of the busy/ready signal after [7:4]*16 cycles.
 	 */
 	nfi_writew(nfc, 0xf1, NFI_CNRNB);
-	nfi_writew(nfc, PAGEFMT_8K_16K, NFI_PAGEFMT);
+	nfi_writel(nfc, PAGEFMT_8K_16K, NFI_PAGEFMT);
 
 	mtk_nfc_hw_reset(nfc);
 
-- 
GitLab


From 7ec4a37c5d71f0a0bfeb1346d4e832a090ca292d Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Wed, 31 May 2017 16:26:40 +0800
Subject: [PATCH 0162/1958] mtd: nand: mediatek: add support for different MTK
 NAND FLASH Controller IP

ECC strength and spare size supported may be different among MTK NAND
FLASH Controller IPs.

This patch contains changes as following:
(1) add new struct mtk_nfc_caps to support different spare size.
(2) add new struct mtk_ecc_caps to support different ecc strength.
(3) remove ECC_CNFG_xBIT define, use a for loop to do ecc strength config.
(4) remove PAGEFMT_SPARE_ define, use a for loop to do spare format config.
(5) malloc ecc->eccdata buffer according to max ecc strength of this IP.

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_ecc.c  | 181 +++++++++++++++---------------------
 drivers/mtd/nand/mtk_ecc.h  |   2 +-
 drivers/mtd/nand/mtk_nand.c | 169 +++++++++++++++------------------
 3 files changed, 153 insertions(+), 199 deletions(-)

diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index dbf256217b3eb..ae513038b34f5 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c
@@ -33,26 +33,6 @@
 
 #define ECC_ENCCON		(0x00)
 #define ECC_ENCCNFG		(0x04)
-#define		ECC_CNFG_4BIT		(0)
-#define		ECC_CNFG_6BIT		(1)
-#define		ECC_CNFG_8BIT		(2)
-#define		ECC_CNFG_10BIT		(3)
-#define		ECC_CNFG_12BIT		(4)
-#define		ECC_CNFG_14BIT		(5)
-#define		ECC_CNFG_16BIT		(6)
-#define		ECC_CNFG_18BIT		(7)
-#define		ECC_CNFG_20BIT		(8)
-#define		ECC_CNFG_22BIT		(9)
-#define		ECC_CNFG_24BIT		(0xa)
-#define		ECC_CNFG_28BIT		(0xb)
-#define		ECC_CNFG_32BIT		(0xc)
-#define		ECC_CNFG_36BIT		(0xd)
-#define		ECC_CNFG_40BIT		(0xe)
-#define		ECC_CNFG_44BIT		(0xf)
-#define		ECC_CNFG_48BIT		(0x10)
-#define		ECC_CNFG_52BIT		(0x11)
-#define		ECC_CNFG_56BIT		(0x12)
-#define		ECC_CNFG_60BIT		(0x13)
 #define		ECC_MODE_SHIFT		(5)
 #define		ECC_MS_SHIFT		(16)
 #define ECC_ENCDIADDR		(0x08)
@@ -66,7 +46,6 @@
 #define		DEC_CNFG_CORRECT	(0x3 << 12)
 #define ECC_DECIDLE		(0x10C)
 #define ECC_DECENUM0		(0x114)
-#define		ERR_MASK		(0x3f)
 #define ECC_DECDONE		(0x124)
 #define ECC_DECIRQ_EN		(0x200)
 #define ECC_DECIRQ_STA		(0x204)
@@ -78,8 +57,15 @@
 #define ECC_IRQ_REG(op)		((op) == ECC_ENCODE ? \
 					ECC_ENCIRQ_EN : ECC_DECIRQ_EN)
 
+struct mtk_ecc_caps {
+	u32 err_mask;
+	const u8 *ecc_strength;
+	u8 num_ecc_strength;
+};
+
 struct mtk_ecc {
 	struct device *dev;
+	const struct mtk_ecc_caps *caps;
 	void __iomem *regs;
 	struct clk *clk;
 
@@ -87,7 +73,13 @@ struct mtk_ecc {
 	struct mutex lock;
 	u32 sectors;
 
-	u8 eccdata[112];
+	u8 *eccdata;
+};
+
+/* ecc strength that mt2701 supports */
+static const u8 ecc_strength_mt2701[] = {
+	4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36,
+	40, 44, 48, 52, 56, 60
 };
 
 static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc,
@@ -136,77 +128,24 @@ static irqreturn_t mtk_ecc_irq(int irq, void *id)
 	return IRQ_HANDLED;
 }
 
-static void mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
+static int mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 {
-	u32 ecc_bit = ECC_CNFG_4BIT, dec_sz, enc_sz;
-	u32 reg;
-
-	switch (config->strength) {
-	case 4:
-		ecc_bit = ECC_CNFG_4BIT;
-		break;
-	case 6:
-		ecc_bit = ECC_CNFG_6BIT;
-		break;
-	case 8:
-		ecc_bit = ECC_CNFG_8BIT;
-		break;
-	case 10:
-		ecc_bit = ECC_CNFG_10BIT;
-		break;
-	case 12:
-		ecc_bit = ECC_CNFG_12BIT;
-		break;
-	case 14:
-		ecc_bit = ECC_CNFG_14BIT;
-		break;
-	case 16:
-		ecc_bit = ECC_CNFG_16BIT;
-		break;
-	case 18:
-		ecc_bit = ECC_CNFG_18BIT;
-		break;
-	case 20:
-		ecc_bit = ECC_CNFG_20BIT;
-		break;
-	case 22:
-		ecc_bit = ECC_CNFG_22BIT;
-		break;
-	case 24:
-		ecc_bit = ECC_CNFG_24BIT;
-		break;
-	case 28:
-		ecc_bit = ECC_CNFG_28BIT;
-		break;
-	case 32:
-		ecc_bit = ECC_CNFG_32BIT;
-		break;
-	case 36:
-		ecc_bit = ECC_CNFG_36BIT;
-		break;
-	case 40:
-		ecc_bit = ECC_CNFG_40BIT;
-		break;
-	case 44:
-		ecc_bit = ECC_CNFG_44BIT;
-		break;
-	case 48:
-		ecc_bit = ECC_CNFG_48BIT;
-		break;
-	case 52:
-		ecc_bit = ECC_CNFG_52BIT;
-		break;
-	case 56:
-		ecc_bit = ECC_CNFG_56BIT;
-		break;
-	case 60:
-		ecc_bit = ECC_CNFG_60BIT;
-		break;
-	default:
-		dev_err(ecc->dev, "invalid strength %d, default to 4 bits\n",
+	u32 ecc_bit, dec_sz, enc_sz;
+	u32 reg, i;
+
+	for (i = 0; i < ecc->caps->num_ecc_strength; i++) {
+		if (ecc->caps->ecc_strength[i] == config->strength)
+			break;
+	}
+
+	if (i == ecc->caps->num_ecc_strength) {
+		dev_err(ecc->dev, "invalid ecc strength %d\n",
 			config->strength);
+		return -EINVAL;
 	}
 
+	ecc_bit = i;
+
 	if (config->op == ECC_ENCODE) {
 		/* configure ECC encoder (in bits) */
 		enc_sz = config->len << 3;
@@ -232,6 +171,8 @@ static void mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 		if (config->sectors)
 			ecc->sectors = 1 << (config->sectors - 1);
 	}
+
+	return 0;
 }
 
 void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats,
@@ -247,8 +188,8 @@ void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats,
 		offset = (i >> 2) << 2;
 		err = readl(ecc->regs + ECC_DECENUM0 + offset);
 		err = err >> ((i % 4) * 8);
-		err &= ERR_MASK;
-		if (err == ERR_MASK) {
+		err &= ecc->caps->err_mask;
+		if (err == ecc->caps->err_mask) {
 			/* uncorrectable errors */
 			stats->failed++;
 			continue;
@@ -322,7 +263,11 @@ int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 	}
 
 	mtk_ecc_wait_idle(ecc, op);
-	mtk_ecc_config(ecc, config);
+
+	ret = mtk_ecc_config(ecc, config);
+	if (ret)
+		return ret;
+
 	writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op));
 
 	init_completion(&ecc->done);
@@ -409,37 +354,66 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config,
 }
 EXPORT_SYMBOL(mtk_ecc_encode);
 
-void mtk_ecc_adjust_strength(u32 *p)
+void mtk_ecc_adjust_strength(struct mtk_ecc *ecc, u32 *p)
 {
-	u32 ecc[] = {4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36,
-			40, 44, 48, 52, 56, 60};
+	const u8 *ecc_strength = ecc->caps->ecc_strength;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(ecc); i++) {
-		if (*p <= ecc[i]) {
+	for (i = 0; i < ecc->caps->num_ecc_strength; i++) {
+		if (*p <= ecc_strength[i]) {
 			if (!i)
-				*p = ecc[i];
-			else if (*p != ecc[i])
-				*p = ecc[i - 1];
+				*p = ecc_strength[i];
+			else if (*p != ecc_strength[i])
+				*p = ecc_strength[i - 1];
 			return;
 		}
 	}
 
-	*p = ecc[ARRAY_SIZE(ecc) - 1];
+	*p = ecc_strength[ecc->caps->num_ecc_strength - 1];
 }
 EXPORT_SYMBOL(mtk_ecc_adjust_strength);
 
+static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
+	.err_mask = 0x3f,
+	.ecc_strength = ecc_strength_mt2701,
+	.num_ecc_strength = 20,
+};
+
+static const struct of_device_id mtk_ecc_dt_match[] = {
+	{
+		.compatible = "mediatek,mt2701-ecc",
+		.data = &mtk_ecc_caps_mt2701,
+	},
+	{},
+};
+
 static int mtk_ecc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_ecc *ecc;
 	struct resource *res;
+	const struct of_device_id *of_ecc_id = NULL;
+	u32 max_eccdata_size;
 	int irq, ret;
 
 	ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL);
 	if (!ecc)
 		return -ENOMEM;
 
+	of_ecc_id = of_match_device(mtk_ecc_dt_match, &pdev->dev);
+	if (!of_ecc_id)
+		return -ENODEV;
+
+	ecc->caps = of_ecc_id->data;
+
+	max_eccdata_size = ecc->caps->num_ecc_strength - 1;
+	max_eccdata_size = ecc->caps->ecc_strength[max_eccdata_size];
+	max_eccdata_size = (max_eccdata_size * ECC_PARITY_BITS + 7) >> 3;
+	max_eccdata_size = round_up(max_eccdata_size, 4);
+	ecc->eccdata = devm_kzalloc(dev, max_eccdata_size, GFP_KERNEL);
+	if (!ecc->eccdata)
+		return -ENOMEM;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ecc->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(ecc->regs)) {
@@ -508,11 +482,6 @@ static int mtk_ecc_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(mtk_ecc_pm_ops, mtk_ecc_suspend, mtk_ecc_resume);
 #endif
 
-static const struct of_device_id mtk_ecc_dt_match[] = {
-	{ .compatible = "mediatek,mt2701-ecc" },
-	{},
-};
-
 MODULE_DEVICE_TABLE(of, mtk_ecc_dt_match);
 
 static struct platform_driver mtk_ecc_driver = {
diff --git a/drivers/mtd/nand/mtk_ecc.h b/drivers/mtd/nand/mtk_ecc.h
index cbeba5cd1c139..d245c14f1b802 100644
--- a/drivers/mtd/nand/mtk_ecc.h
+++ b/drivers/mtd/nand/mtk_ecc.h
@@ -42,7 +42,7 @@ void mtk_ecc_get_stats(struct mtk_ecc *, struct mtk_ecc_stats *, int);
 int mtk_ecc_wait_done(struct mtk_ecc *, enum mtk_ecc_operation);
 int mtk_ecc_enable(struct mtk_ecc *, struct mtk_ecc_config *);
 void mtk_ecc_disable(struct mtk_ecc *);
-void mtk_ecc_adjust_strength(u32 *);
+void mtk_ecc_adjust_strength(struct mtk_ecc *ecc, u32 *p);
 
 struct mtk_ecc *of_mtk_ecc_get(struct device_node *);
 void mtk_ecc_release(struct mtk_ecc *);
diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c
index c28f4b7025bfc..e428669f41f1e 100644
--- a/drivers/mtd/nand/mtk_nand.c
+++ b/drivers/mtd/nand/mtk_nand.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/iopoll.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include "mtk_ecc.h"
 
 /* NAND controller register definition */
@@ -38,23 +39,6 @@
 #define NFI_PAGEFMT		(0x04)
 #define		PAGEFMT_FDM_ECC_SHIFT	(12)
 #define		PAGEFMT_FDM_SHIFT	(8)
-#define		PAGEFMT_SPARE_16	(0)
-#define		PAGEFMT_SPARE_26	(1)
-#define		PAGEFMT_SPARE_27	(2)
-#define		PAGEFMT_SPARE_28	(3)
-#define		PAGEFMT_SPARE_32	(4)
-#define		PAGEFMT_SPARE_36	(5)
-#define		PAGEFMT_SPARE_40	(6)
-#define		PAGEFMT_SPARE_44	(7)
-#define		PAGEFMT_SPARE_48	(8)
-#define		PAGEFMT_SPARE_49	(9)
-#define		PAGEFMT_SPARE_50	(0xa)
-#define		PAGEFMT_SPARE_51	(0xb)
-#define		PAGEFMT_SPARE_52	(0xc)
-#define		PAGEFMT_SPARE_62	(0xd)
-#define		PAGEFMT_SPARE_63	(0xe)
-#define		PAGEFMT_SPARE_64	(0xf)
-#define		PAGEFMT_SPARE_SHIFT	(4)
 #define		PAGEFMT_SEC_SEL_512	BIT(2)
 #define		PAGEFMT_512_2K		(0)
 #define		PAGEFMT_2K_4K		(1)
@@ -115,6 +99,13 @@
 #define MTK_RESET_TIMEOUT	(1000000)
 #define MTK_MAX_SECTOR		(16)
 #define MTK_NAND_MAX_NSELS	(2)
+#define MTK_NFC_MIN_SPARE	(16)
+
+struct mtk_nfc_caps {
+	const u8 *spare_size;
+	u8 num_spare_size;
+	u8 pageformat_spare_shift;
+};
 
 struct mtk_nfc_bad_mark_ctl {
 	void (*bm_swap)(struct mtd_info *, u8 *buf, int raw);
@@ -155,6 +146,7 @@ struct mtk_nfc {
 	struct mtk_ecc *ecc;
 
 	struct device *dev;
+	const struct mtk_nfc_caps *caps;
 	void __iomem *regs;
 
 	struct completion done;
@@ -163,6 +155,15 @@ struct mtk_nfc {
 	u8 *buffer;
 };
 
+/*
+ * supported spare size of each IP.
+ * order should be the same with the spare size bitfiled defination of
+ * register NFI_PAGEFMT.
+ */
+static const u8 spare_size_mt2701[] = {
+	16, 26, 27, 28, 32, 36, 40, 44,	48, 49, 50, 51, 52, 62, 63, 64
+};
+
 static inline struct mtk_nfc_nand_chip *to_mtk_nand(struct nand_chip *nand)
 {
 	return container_of(nand, struct mtk_nfc_nand_chip, nand);
@@ -308,7 +309,7 @@ static int mtk_nfc_hw_runtime_config(struct mtd_info *mtd)
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip);
 	struct mtk_nfc *nfc = nand_get_controller_data(chip);
-	u32 fmt, spare;
+	u32 fmt, spare, i;
 
 	if (!mtd->writesize)
 		return 0;
@@ -352,60 +353,18 @@ static int mtk_nfc_hw_runtime_config(struct mtd_info *mtd)
 	if (chip->ecc.size == 1024)
 		spare >>= 1;
 
-	switch (spare) {
-	case 16:
-		fmt |= (PAGEFMT_SPARE_16 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 26:
-		fmt |= (PAGEFMT_SPARE_26 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 27:
-		fmt |= (PAGEFMT_SPARE_27 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 28:
-		fmt |= (PAGEFMT_SPARE_28 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 32:
-		fmt |= (PAGEFMT_SPARE_32 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 36:
-		fmt |= (PAGEFMT_SPARE_36 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 40:
-		fmt |= (PAGEFMT_SPARE_40 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 44:
-		fmt |= (PAGEFMT_SPARE_44 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 48:
-		fmt |= (PAGEFMT_SPARE_48 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 49:
-		fmt |= (PAGEFMT_SPARE_49 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 50:
-		fmt |= (PAGEFMT_SPARE_50 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 51:
-		fmt |= (PAGEFMT_SPARE_51 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 52:
-		fmt |= (PAGEFMT_SPARE_52 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 62:
-		fmt |= (PAGEFMT_SPARE_62 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 63:
-		fmt |= (PAGEFMT_SPARE_63 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 64:
-		fmt |= (PAGEFMT_SPARE_64 << PAGEFMT_SPARE_SHIFT);
-		break;
-	default:
-		dev_err(nfc->dev, "invalid spare per sector %d\n", spare);
+	for (i = 0; i < nfc->caps->num_spare_size; i++) {
+		if (nfc->caps->spare_size[i] == spare)
+			break;
+	}
+
+	if (i == nfc->caps->num_spare_size) {
+		dev_err(nfc->dev, "invalid spare size %d\n", spare);
 		return -EINVAL;
 	}
 
+	fmt |= i << nfc->caps->pageformat_spare_shift;
+
 	fmt |= mtk_nand->fdm.reg_size << PAGEFMT_FDM_SHIFT;
 	fmt |= mtk_nand->fdm.ecc_size << PAGEFMT_FDM_ECC_SHIFT;
 	nfi_writel(nfc, fmt, NFI_PAGEFMT);
@@ -1131,12 +1090,12 @@ static void mtk_nfc_set_bad_mark_ctl(struct mtk_nfc_bad_mark_ctl *bm_ctl,
 	}
 }
 
-static void mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd)
+static int mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
-	u32 spare[] = {16, 26, 27, 28, 32, 36, 40, 44,
-			48, 49, 50, 51, 52, 62, 63, 64};
-	u32 eccsteps, i;
+	struct mtk_nfc *nfc = nand_get_controller_data(nand);
+	const u8 *spare = nfc->caps->spare_size;
+	u32 eccsteps, i, closest_spare = 0;
 
 	eccsteps = mtd->writesize / nand->ecc.size;
 	*sps = mtd->oobsize / eccsteps;
@@ -1144,28 +1103,31 @@ static void mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd)
 	if (nand->ecc.size == 1024)
 		*sps >>= 1;
 
-	for (i = 0; i < ARRAY_SIZE(spare); i++) {
-		if (*sps <= spare[i]) {
-			if (!i)
-				*sps = spare[i];
-			else if (*sps != spare[i])
-				*sps = spare[i - 1];
-			break;
+	if (*sps < MTK_NFC_MIN_SPARE)
+		return -EINVAL;
+
+	for (i = 0; i < nfc->caps->num_spare_size; i++) {
+		if (*sps >= spare[i] && spare[i] >= spare[closest_spare]) {
+			closest_spare = i;
+			if (*sps == spare[i])
+				break;
 		}
 	}
 
-	if (i >= ARRAY_SIZE(spare))
-		*sps = spare[ARRAY_SIZE(spare) - 1];
+	*sps = spare[closest_spare];
 
 	if (nand->ecc.size == 1024)
 		*sps <<= 1;
+
+	return 0;
 }
 
 static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct mtk_nfc *nfc = nand_get_controller_data(nand);
 	u32 spare;
-	int free;
+	int free, ret;
 
 	/* support only ecc hw mode */
 	if (nand->ecc.mode != NAND_ECC_HW) {
@@ -1194,7 +1156,9 @@ static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd)
 			nand->ecc.size = 1024;
 		}
 
-		mtk_nfc_set_spare_per_sector(&spare, mtd);
+		ret = mtk_nfc_set_spare_per_sector(&spare, mtd);
+		if (ret)
+			return ret;
 
 		/* calculate oob bytes except ecc parity data */
 		free = ((nand->ecc.strength * ECC_PARITY_BITS) + 7) >> 3;
@@ -1214,7 +1178,7 @@ static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd)
 		}
 	}
 
-	mtk_ecc_adjust_strength(&nand->ecc.strength);
+	mtk_ecc_adjust_strength(nfc->ecc, &nand->ecc.strength);
 
 	dev_info(dev, "eccsize %d eccstrength %d\n",
 		 nand->ecc.size, nand->ecc.strength);
@@ -1312,7 +1276,10 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 		return -EINVAL;
 	}
 
-	mtk_nfc_set_spare_per_sector(&chip->spare_per_sector, mtd);
+	ret = mtk_nfc_set_spare_per_sector(&chip->spare_per_sector, mtd);
+	if (ret)
+		return ret;
+
 	mtk_nfc_set_fdm(&chip->fdm, mtd);
 	mtk_nfc_set_bad_mark_ctl(&chip->bad_mark, mtd);
 
@@ -1354,12 +1321,28 @@ static int mtk_nfc_nand_chips_init(struct device *dev, struct mtk_nfc *nfc)
 	return 0;
 }
 
+static const struct mtk_nfc_caps mtk_nfc_caps_mt2701 = {
+	.spare_size = spare_size_mt2701,
+	.num_spare_size = 16,
+	.pageformat_spare_shift = 4,
+};
+
+static const struct of_device_id mtk_nfc_id_table[] = {
+	{
+		.compatible = "mediatek,mt2701-nfc",
+		.data = &mtk_nfc_caps_mt2701,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, mtk_nfc_id_table);
+
 static int mtk_nfc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
 	struct mtk_nfc *nfc;
 	struct resource *res;
+	const struct of_device_id *of_nfc_id = NULL;
 	int ret, irq;
 
 	nfc = devm_kzalloc(dev, sizeof(*nfc), GFP_KERNEL);
@@ -1423,6 +1406,14 @@ static int mtk_nfc_probe(struct platform_device *pdev)
 		goto clk_disable;
 	}
 
+	of_nfc_id = of_match_device(mtk_nfc_id_table, &pdev->dev);
+	if (!of_nfc_id) {
+		ret = -ENODEV;
+		goto clk_disable;
+	}
+
+	nfc->caps = of_nfc_id->data;
+
 	platform_set_drvdata(pdev, nfc);
 
 	ret = mtk_nfc_nand_chips_init(dev, nfc);
@@ -1503,12 +1494,6 @@ static int mtk_nfc_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(mtk_nfc_pm_ops, mtk_nfc_suspend, mtk_nfc_resume);
 #endif
 
-static const struct of_device_id mtk_nfc_id_table[] = {
-	{ .compatible = "mediatek,mt2701-nfc" },
-	{}
-};
-MODULE_DEVICE_TABLE(of, mtk_nfc_id_table);
-
 static struct platform_driver mtk_nfc_driver = {
 	.probe  = mtk_nfc_probe,
 	.remove = mtk_nfc_remove,
-- 
GitLab


From 30ee809e980b07f467fca3e7ee16e8d034cf41af Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Wed, 31 May 2017 16:26:41 +0800
Subject: [PATCH 0163/1958] mtd: nand: mediatek: add support for MT2712 NAND
 FLASH Controller

MT2712 NAND FLASH Controller is similar to MT2701 except those following:
(1) MT2712 supports up to 148B spare size per 1KB size sector (the same
    with 74B spare size per 512B size sector). There are three new spare
    format: 61, 67, 74.
(2) MT2712 supports up to 80 bit ecc strength. There are three new ecc
    strength level: 68, 72, 80.
(3) MT2712 ECC encode parity data register's start offset is 0x300, and
    different with 0x10 of MT2701.
(4) MT2712 improves ecc irq function. When ECC works in ECC_NFI_MODE,
    MT2701 will generate ecc irq number the same with ecc steps during
    page read. However, MT2712 can only generate one ecc irq.

Changes of this patch are:
(1) add two new variables named pg_irq_sel, encode_parity_reg0 in struct
    mtk_ecc_caps.
(2) add new bitfield ECC_PG_IRQ_SEL for register ECC_IRQ_REG.
(3) add ecc strength array of mt2712.
(4) add spare size array of mt2712.
(5) add mt2712 nfc and ecc device compatiable and data.

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_ecc.c  | 39 +++++++++++++++++++++++++++++++++----
 drivers/mtd/nand/mtk_nand.c | 14 +++++++++++++
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index ae513038b34f5..4958121cb8275 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c
@@ -28,6 +28,7 @@
 
 #define ECC_IDLE_MASK		BIT(0)
 #define ECC_IRQ_EN		BIT(0)
+#define ECC_PG_IRQ_SEL		BIT(1)
 #define ECC_OP_ENABLE		(1)
 #define ECC_OP_DISABLE		(0)
 
@@ -37,7 +38,6 @@
 #define		ECC_MS_SHIFT		(16)
 #define ECC_ENCDIADDR		(0x08)
 #define ECC_ENCIDLE		(0x0C)
-#define ECC_ENCPAR(x)		(0x10 + (x) * sizeof(u32))
 #define ECC_ENCIRQ_EN		(0x80)
 #define ECC_ENCIRQ_STA		(0x84)
 #define ECC_DECCON		(0x100)
@@ -61,6 +61,8 @@ struct mtk_ecc_caps {
 	u32 err_mask;
 	const u8 *ecc_strength;
 	u8 num_ecc_strength;
+	u32 encode_parity_reg0;
+	int pg_irq_sel;
 };
 
 struct mtk_ecc {
@@ -76,12 +78,17 @@ struct mtk_ecc {
 	u8 *eccdata;
 };
 
-/* ecc strength that mt2701 supports */
+/* ecc strength that each IP supports */
 static const u8 ecc_strength_mt2701[] = {
 	4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36,
 	40, 44, 48, 52, 56, 60
 };
 
+static const u8 ecc_strength_mt2712[] = {
+	4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36,
+	40, 44, 48, 52, 56, 60, 68, 72, 80
+};
+
 static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc,
 				     enum mtk_ecc_operation op)
 {
@@ -254,6 +261,7 @@ EXPORT_SYMBOL(of_mtk_ecc_get);
 int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 {
 	enum mtk_ecc_operation op = config->op;
+	u16 reg_val;
 	int ret;
 
 	ret = mutex_lock_interruptible(&ecc->lock);
@@ -271,7 +279,15 @@ int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 	writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op));
 
 	init_completion(&ecc->done);
-	writew(ECC_IRQ_EN, ecc->regs + ECC_IRQ_REG(op));
+	reg_val = ECC_IRQ_EN;
+	/*
+	 * For ECC_NFI_MODE, if ecc->caps->pg_irq_sel is 1, then it
+	 * means this chip can only generate one ecc irq during page
+	 * read / write. If is 0, generate one ecc irq each ecc step.
+	 */
+	if ((ecc->caps->pg_irq_sel) && (config->mode == ECC_NFI_MODE))
+		reg_val |= ECC_PG_IRQ_SEL;
+	writew(reg_val, ecc->regs + ECC_IRQ_REG(op));
 
 	return 0;
 }
@@ -341,7 +357,9 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config,
 	len = (config->strength * ECC_PARITY_BITS + 7) >> 3;
 
 	/* write the parity bytes generated by the ECC back to temp buffer */
-	__ioread32_copy(ecc->eccdata, ecc->regs + ECC_ENCPAR(0), round_up(len, 4));
+	__ioread32_copy(ecc->eccdata,
+			ecc->regs + ecc->caps->encode_parity_reg0,
+			round_up(len, 4));
 
 	/* copy into possibly unaligned OOB region with actual length */
 	memcpy(data + bytes, ecc->eccdata, len);
@@ -377,12 +395,25 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
 	.err_mask = 0x3f,
 	.ecc_strength = ecc_strength_mt2701,
 	.num_ecc_strength = 20,
+	.encode_parity_reg0 = 0x10,
+	.pg_irq_sel = 0,
+};
+
+static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = {
+	.err_mask = 0x7f,
+	.ecc_strength = ecc_strength_mt2712,
+	.num_ecc_strength = 23,
+	.encode_parity_reg0 = 0x300,
+	.pg_irq_sel = 1,
 };
 
 static const struct of_device_id mtk_ecc_dt_match[] = {
 	{
 		.compatible = "mediatek,mt2701-ecc",
 		.data = &mtk_ecc_caps_mt2701,
+	}, {
+		.compatible = "mediatek,mt2712-ecc",
+		.data = &mtk_ecc_caps_mt2712,
 	},
 	{},
 };
diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c
index e428669f41f1e..4d9ee278b9a07 100644
--- a/drivers/mtd/nand/mtk_nand.c
+++ b/drivers/mtd/nand/mtk_nand.c
@@ -164,6 +164,11 @@ static const u8 spare_size_mt2701[] = {
 	16, 26, 27, 28, 32, 36, 40, 44,	48, 49, 50, 51, 52, 62, 63, 64
 };
 
+static const u8 spare_size_mt2712[] = {
+	16, 26, 27, 28, 32, 36, 40, 44, 48, 49, 50, 51, 52, 62, 61, 63, 64, 67,
+	74
+};
+
 static inline struct mtk_nfc_nand_chip *to_mtk_nand(struct nand_chip *nand)
 {
 	return container_of(nand, struct mtk_nfc_nand_chip, nand);
@@ -1327,10 +1332,19 @@ static const struct mtk_nfc_caps mtk_nfc_caps_mt2701 = {
 	.pageformat_spare_shift = 4,
 };
 
+static const struct mtk_nfc_caps mtk_nfc_caps_mt2712 = {
+	.spare_size = spare_size_mt2712,
+	.num_spare_size = 19,
+	.pageformat_spare_shift = 16,
+};
+
 static const struct of_device_id mtk_nfc_id_table[] = {
 	{
 		.compatible = "mediatek,mt2701-nfc",
 		.data = &mtk_nfc_caps_mt2701,
+	}, {
+		.compatible = "mediatek,mt2712-nfc",
+		.data = &mtk_nfc_caps_mt2712,
 	},
 	{}
 };
-- 
GitLab


From 4db4d35ebda390b5287d758fdd51b26c24fbc26b Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Thu, 25 May 2017 11:49:12 +1200
Subject: [PATCH 0164/1958] mtd: mchp23k256: Add OF device ID table

This allows registering of this device via a Device Tree.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 .../bindings/mtd/microchip,mchp23k256.txt      | 18 ++++++++++++++++++
 drivers/mtd/devices/mchp23k256.c               |  8 ++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt

diff --git a/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt b/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt
new file mode 100644
index 0000000000000..25e5ad38b0f0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt
@@ -0,0 +1,18 @@
+* MTD SPI driver for Microchip 23K256 (and similar) serial SRAM
+
+Required properties:
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+  representing partitions.
+- compatible : Must be "microchip,mchp23k256"
+- reg : Chip-Select number
+- spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
+
+Example:
+
+	spi-sram@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "microchip,mchp23k256";
+		reg = <0>;
+		spi-max-frequency = <20000000>;
+	};
diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
index e237db9f1bdbd..9d8306a158333 100644
--- a/drivers/mtd/devices/mchp23k256.c
+++ b/drivers/mtd/devices/mchp23k256.c
@@ -19,6 +19,7 @@
 #include <linux/sizes.h>
 #include <linux/spi/flash.h>
 #include <linux/spi/spi.h>
+#include <linux/of_device.h>
 
 struct mchp23k256_flash {
 	struct spi_device	*spi;
@@ -166,9 +167,16 @@ static int mchp23k256_remove(struct spi_device *spi)
 	return mtd_device_unregister(&flash->mtd);
 }
 
+static const struct of_device_id mchp23k256_of_table[] = {
+	{ .compatible = "microchip,mchp23k256" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mchp23k256_of_table);
+
 static struct spi_driver mchp23k256_driver = {
 	.driver = {
 		.name	= "mchp23k256",
+		.of_match_table = of_match_ptr(mchp23k256_of_table),
 	},
 	.probe		= mchp23k256_probe,
 	.remove		= mchp23k256_remove,
-- 
GitLab


From 44225c9c79ae288b5f1668dc5dda7ba9f87995f1 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Thu, 25 May 2017 11:49:13 +1200
Subject: [PATCH 0165/1958] mtd: mchp23k256: switch to mtd_device_register()

Use mtd_device_register() instead of mtd_device_parse_register() to
eliminate two unused parameters.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/mchp23k256.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
index 9d8306a158333..2542f5b8b63fc 100644
--- a/drivers/mtd/devices/mchp23k256.c
+++ b/drivers/mtd/devices/mchp23k256.c
@@ -151,9 +151,8 @@ static int mchp23k256_probe(struct spi_device *spi)
 	flash->mtd._read	= mchp23k256_read;
 	flash->mtd._write	= mchp23k256_write;
 
-	err = mtd_device_parse_register(&flash->mtd, NULL, NULL,
-					data ? data->parts : NULL,
-					data ? data->nr_parts : 0);
+	err = mtd_device_register(&flash->mtd, data ? data->parts : NULL,
+				  data ? data->nr_parts : 0);
 	if (err)
 		return err;
 
-- 
GitLab


From d140bd42b2ced34ce31e9e4384526ce159feabf1 Mon Sep 17 00:00:00 2001
From: Hoan Tran <hotran@apm.com>
Date: Mon, 24 Apr 2017 11:00:25 -0700
Subject: [PATCH 0166/1958] i2c: xgene-slimpro: Use a single function to send
 command message

This patch refactors the code to use a single message function to
send command message.

Signed-off-by: Hoan Tran <hotran@apm.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xgene-slimpro.c | 67 ++++++++++----------------
 1 file changed, 26 insertions(+), 41 deletions(-)

diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
index 6ba6c83ca8f1b..4e2257850d2cd 100644
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
@@ -144,49 +144,52 @@ static int start_i2c_msg_xfer(struct slimpro_i2c_dev *ctx)
 	return 0;
 }
 
-static int slimpro_i2c_rd(struct slimpro_i2c_dev *ctx, u32 chip,
-			  u32 addr, u32 addrlen, u32 protocol,
-			  u32 readlen, u32 *data)
+static int slimpro_i2c_send_msg(struct slimpro_i2c_dev *ctx,
+				u32 *msg,
+				u32 *data)
 {
-	u32 msg[3];
 	int rc;
 
-	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip,
-					SLIMPRO_IIC_READ, protocol, addrlen, readlen);
-	msg[1] = SLIMPRO_IIC_ENCODE_ADDR(addr);
-	msg[2] = 0;
 	ctx->resp_msg = data;
-	rc = mbox_send_message(ctx->mbox_chan, &msg);
+
+	rc = mbox_send_message(ctx->mbox_chan, msg);
 	if (rc < 0)
 		goto err;
 
 	rc = start_i2c_msg_xfer(ctx);
+
 err:
 	ctx->resp_msg = NULL;
+
 	return rc;
 }
 
+static int slimpro_i2c_rd(struct slimpro_i2c_dev *ctx, u32 chip,
+			  u32 addr, u32 addrlen, u32 protocol,
+			  u32 readlen, u32 *data)
+{
+	u32 msg[3];
+
+	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip,
+					SLIMPRO_IIC_READ, protocol, addrlen, readlen);
+	msg[1] = SLIMPRO_IIC_ENCODE_ADDR(addr);
+	msg[2] = 0;
+
+	return slimpro_i2c_send_msg(ctx, msg, data);
+}
+
 static int slimpro_i2c_wr(struct slimpro_i2c_dev *ctx, u32 chip,
 			  u32 addr, u32 addrlen, u32 protocol, u32 writelen,
 			  u32 data)
 {
 	u32 msg[3];
-	int rc;
 
 	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip,
 					SLIMPRO_IIC_WRITE, protocol, addrlen, writelen);
 	msg[1] = SLIMPRO_IIC_ENCODE_ADDR(addr);
 	msg[2] = data;
-	ctx->resp_msg = msg;
-
-	rc = mbox_send_message(ctx->mbox_chan, &msg);
-	if (rc < 0)
-		goto err;
 
-	rc = start_i2c_msg_xfer(ctx);
-err:
-	ctx->resp_msg = NULL;
-	return rc;
+	return slimpro_i2c_send_msg(ctx, msg, msg);
 }
 
 static int slimpro_i2c_blkrd(struct slimpro_i2c_dev *ctx, u32 chip, u32 addr,
@@ -201,8 +204,7 @@ static int slimpro_i2c_blkrd(struct slimpro_i2c_dev *ctx, u32 chip, u32 addr,
 	if (dma_mapping_error(ctx->dev, paddr)) {
 		dev_err(&ctx->adapter.dev, "Error in mapping dma buffer %p\n",
 			ctx->dma_buffer);
-		rc = -ENOMEM;
-		goto err;
+		return -ENOMEM;
 	}
 
 	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip, SLIMPRO_IIC_READ,
@@ -212,21 +214,13 @@ static int slimpro_i2c_blkrd(struct slimpro_i2c_dev *ctx, u32 chip, u32 addr,
 		 SLIMPRO_IIC_ENCODE_UPPER_BUFADDR(paddr) |
 		 SLIMPRO_IIC_ENCODE_ADDR(addr);
 	msg[2] = (u32)paddr;
-	ctx->resp_msg = msg;
 
-	rc = mbox_send_message(ctx->mbox_chan, &msg);
-	if (rc < 0)
-		goto err_unmap;
-
-	rc = start_i2c_msg_xfer(ctx);
+	rc = slimpro_i2c_send_msg(ctx, msg, msg);
 
 	/* Copy to destination */
 	memcpy(data, ctx->dma_buffer, readlen);
 
-err_unmap:
 	dma_unmap_single(ctx->dev, paddr, readlen, DMA_FROM_DEVICE);
-err:
-	ctx->resp_msg = NULL;
 	return rc;
 }
 
@@ -244,8 +238,7 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip,
 	if (dma_mapping_error(ctx->dev, paddr)) {
 		dev_err(&ctx->adapter.dev, "Error in mapping dma buffer %p\n",
 			ctx->dma_buffer);
-		rc = -ENOMEM;
-		goto err;
+		return -ENOMEM;
 	}
 
 	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip, SLIMPRO_IIC_WRITE,
@@ -254,21 +247,13 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip,
 		 SLIMPRO_IIC_ENCODE_UPPER_BUFADDR(paddr) |
 		 SLIMPRO_IIC_ENCODE_ADDR(addr);
 	msg[2] = (u32)paddr;
-	ctx->resp_msg = msg;
 
 	if (ctx->mbox_client.tx_block)
 		reinit_completion(&ctx->rd_complete);
 
-	rc = mbox_send_message(ctx->mbox_chan, &msg);
-	if (rc < 0)
-		goto err_unmap;
-
-	rc = start_i2c_msg_xfer(ctx);
+	rc = slimpro_i2c_send_msg(ctx, msg, msg);
 
-err_unmap:
 	dma_unmap_single(ctx->dev, paddr, writelen, DMA_TO_DEVICE);
-err:
-	ctx->resp_msg = NULL;
 	return rc;
 }
 
-- 
GitLab


From df5da47fe722e36055b97134e6bb9df58c12495c Mon Sep 17 00:00:00 2001
From: Hoan Tran <hotran@apm.com>
Date: Mon, 24 Apr 2017 11:00:26 -0700
Subject: [PATCH 0167/1958] i2c: xgene-slimpro: Add ACPI support by using PCC
 mailbox

This patch adds ACPI support by using PCC mailbox communication
interface.

Signed-off-by: Hoan Tran <hotran@apm.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xgene-slimpro.c | 174 +++++++++++++++++++++++--
 1 file changed, 161 insertions(+), 13 deletions(-)

diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
index 4e2257850d2cd..8e5c8f28bc8bf 100644
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
@@ -22,6 +22,7 @@
  * using the APM X-Gene SLIMpro mailbox driver.
  *
  */
+#include <acpi/pcc.h>
 #include <linux/acpi.h>
 #include <linux/dma-mapping.h>
 #include <linux/i2c.h>
@@ -89,6 +90,8 @@
 	((addrlen << SLIMPRO_IIC_ADDRLEN_SHIFT) & SLIMPRO_IIC_ADDRLEN_MASK) | \
 	((datalen << SLIMPRO_IIC_DATALEN_SHIFT) & SLIMPRO_IIC_DATALEN_MASK))
 
+#define SLIMPRO_MSG_TYPE(v)             (((v) & 0xF0000000) >> 28)
+
 /*
  * Encode for upper address for block data
  */
@@ -99,19 +102,47 @@
 								& 0x3FF00000))
 #define SLIMPRO_IIC_ENCODE_ADDR(a)			((a) & 0x000FFFFF)
 
+#define SLIMPRO_IIC_MSG_DWORD_COUNT			3
+
+/* PCC related defines */
+#define PCC_SIGNATURE			0x50424300
+#define PCC_STS_CMD_COMPLETE		BIT(0)
+#define PCC_STS_SCI_DOORBELL		BIT(1)
+#define PCC_STS_ERR			BIT(2)
+#define PCC_STS_PLAT_NOTIFY		BIT(3)
+#define PCC_CMD_GENERATE_DB_INT		BIT(15)
+
 struct slimpro_i2c_dev {
 	struct i2c_adapter adapter;
 	struct device *dev;
 	struct mbox_chan *mbox_chan;
 	struct mbox_client mbox_client;
+	int mbox_idx;
 	struct completion rd_complete;
 	u8 dma_buffer[I2C_SMBUS_BLOCK_MAX + 1]; /* dma_buffer[0] is used for length */
 	u32 *resp_msg;
+	phys_addr_t comm_base_addr;
+	void *pcc_comm_addr;
 };
 
 #define to_slimpro_i2c_dev(cl)	\
 		container_of(cl, struct slimpro_i2c_dev, mbox_client)
 
+/*
+ * This function tests and clears a bitmask then returns its old value
+ */
+static u16 xgene_word_tst_and_clr(u16 *addr, u16 mask)
+{
+	u16 ret, val;
+
+	val = le16_to_cpu(READ_ONCE(*addr));
+	ret = val & mask;
+	val &= ~mask;
+	WRITE_ONCE(*addr, cpu_to_le16(val));
+
+	return ret;
+}
+
 static void slimpro_i2c_rx_cb(struct mbox_client *cl, void *mssg)
 {
 	struct slimpro_i2c_dev *ctx = to_slimpro_i2c_dev(cl);
@@ -129,9 +160,53 @@ static void slimpro_i2c_rx_cb(struct mbox_client *cl, void *mssg)
 		complete(&ctx->rd_complete);
 }
 
+static void slimpro_i2c_pcc_rx_cb(struct mbox_client *cl, void *msg)
+{
+	struct slimpro_i2c_dev *ctx = to_slimpro_i2c_dev(cl);
+	struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
+
+	/* Check if platform sends interrupt */
+	if (!xgene_word_tst_and_clr(&generic_comm_base->status,
+				    PCC_STS_SCI_DOORBELL))
+		return;
+
+	if (xgene_word_tst_and_clr(&generic_comm_base->status,
+				   PCC_STS_CMD_COMPLETE)) {
+		msg = generic_comm_base + 1;
+
+		/* Response message msg[1] contains the return value. */
+		if (ctx->resp_msg)
+			*ctx->resp_msg = ((u32 *)msg)[1];
+
+		complete(&ctx->rd_complete);
+	}
+}
+
+static void slimpro_i2c_pcc_tx_prepare(struct slimpro_i2c_dev *ctx, u32 *msg)
+{
+	struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
+	u32 *ptr = (void *)(generic_comm_base + 1);
+	u16 status;
+	int i;
+
+	WRITE_ONCE(generic_comm_base->signature,
+		   cpu_to_le32(PCC_SIGNATURE | ctx->mbox_idx));
+
+	WRITE_ONCE(generic_comm_base->command,
+		   cpu_to_le16(SLIMPRO_MSG_TYPE(msg[0]) | PCC_CMD_GENERATE_DB_INT));
+
+	status = le16_to_cpu(READ_ONCE(generic_comm_base->status));
+	status &= ~PCC_STS_CMD_COMPLETE;
+	WRITE_ONCE(generic_comm_base->status, cpu_to_le16(status));
+
+	/* Copy the message to the PCC comm space */
+	for (i = 0; i < SLIMPRO_IIC_MSG_DWORD_COUNT; i++)
+		WRITE_ONCE(ptr[i], cpu_to_le32(msg[i]));
+}
+
 static int start_i2c_msg_xfer(struct slimpro_i2c_dev *ctx)
 {
-	if (ctx->mbox_client.tx_block) {
+	if (ctx->mbox_client.tx_block || !acpi_disabled) {
 		if (!wait_for_completion_timeout(&ctx->rd_complete,
 						 msecs_to_jiffies(MAILBOX_OP_TIMEOUT)))
 			return -ETIMEDOUT;
@@ -152,6 +227,11 @@ static int slimpro_i2c_send_msg(struct slimpro_i2c_dev *ctx,
 
 	ctx->resp_msg = data;
 
+	if (!acpi_disabled) {
+		reinit_completion(&ctx->rd_complete);
+		slimpro_i2c_pcc_tx_prepare(ctx, msg);
+	}
+
 	rc = mbox_send_message(ctx->mbox_chan, msg);
 	if (rc < 0)
 		goto err;
@@ -159,6 +239,9 @@ static int slimpro_i2c_send_msg(struct slimpro_i2c_dev *ctx,
 	rc = start_i2c_msg_xfer(ctx);
 
 err:
+	if (!acpi_disabled)
+		mbox_chan_txdone(ctx->mbox_chan, 0);
+
 	ctx->resp_msg = NULL;
 
 	return rc;
@@ -379,17 +462,73 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev)
 
 	/* Request mailbox channel */
 	cl->dev = &pdev->dev;
-	cl->rx_callback = slimpro_i2c_rx_cb;
-	cl->tx_block = true;
 	init_completion(&ctx->rd_complete);
 	cl->tx_tout = MAILBOX_OP_TIMEOUT;
 	cl->knows_txdone = false;
-	ctx->mbox_chan = mbox_request_channel(cl, MAILBOX_I2C_INDEX);
-	if (IS_ERR(ctx->mbox_chan)) {
-		dev_err(&pdev->dev, "i2c mailbox channel request failed\n");
-		return PTR_ERR(ctx->mbox_chan);
-	}
+	if (acpi_disabled) {
+		cl->tx_block = true;
+		cl->rx_callback = slimpro_i2c_rx_cb;
+		ctx->mbox_chan = mbox_request_channel(cl, MAILBOX_I2C_INDEX);
+		if (IS_ERR(ctx->mbox_chan)) {
+			dev_err(&pdev->dev, "i2c mailbox channel request failed\n");
+			return PTR_ERR(ctx->mbox_chan);
+		}
+	} else {
+		struct acpi_pcct_hw_reduced *cppc_ss;
+
+		if (device_property_read_u32(&pdev->dev, "pcc-channel",
+					     &ctx->mbox_idx))
+			ctx->mbox_idx = MAILBOX_I2C_INDEX;
+
+		cl->tx_block = false;
+		cl->rx_callback = slimpro_i2c_pcc_rx_cb;
+		ctx->mbox_chan = pcc_mbox_request_channel(cl, ctx->mbox_idx);
+		if (IS_ERR(ctx->mbox_chan)) {
+			dev_err(&pdev->dev, "PCC mailbox channel request failed\n");
+			return PTR_ERR(ctx->mbox_chan);
+		}
+
+		/*
+		 * The PCC mailbox controller driver should
+		 * have parsed the PCCT (global table of all
+		 * PCC channels) and stored pointers to the
+		 * subspace communication region in con_priv.
+		 */
+		cppc_ss = ctx->mbox_chan->con_priv;
+		if (!cppc_ss) {
+			dev_err(&pdev->dev, "PPC subspace not found\n");
+			rc = -ENOENT;
+			goto mbox_err;
+		}
+
+		if (!ctx->mbox_chan->mbox->txdone_irq) {
+			dev_err(&pdev->dev, "PCC IRQ not supported\n");
+			rc = -ENOENT;
+			goto mbox_err;
+		}
 
+		/*
+		 * This is the shared communication region
+		 * for the OS and Platform to communicate over.
+		 */
+		ctx->comm_base_addr = cppc_ss->base_address;
+		if (ctx->comm_base_addr) {
+			ctx->pcc_comm_addr = memremap(ctx->comm_base_addr,
+						      cppc_ss->length,
+						      MEMREMAP_WB);
+		} else {
+			dev_err(&pdev->dev, "Failed to get PCC comm region\n");
+			rc = -ENOENT;
+			goto mbox_err;
+		}
+
+		if (!ctx->pcc_comm_addr) {
+			dev_err(&pdev->dev,
+				"Failed to ioremap PCC comm region\n");
+			rc = -ENOMEM;
+			goto mbox_err;
+		}
+	}
 	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (rc)
 		dev_warn(&pdev->dev, "Unable to set dma mask\n");
@@ -404,13 +543,19 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev)
 	ACPI_COMPANION_SET(&adapter->dev, ACPI_COMPANION(&pdev->dev));
 	i2c_set_adapdata(adapter, ctx);
 	rc = i2c_add_adapter(adapter);
-	if (rc) {
-		mbox_free_channel(ctx->mbox_chan);
-		return rc;
-	}
+	if (rc)
+		goto mbox_err;
 
 	dev_info(&pdev->dev, "Mailbox I2C Adapter registered\n");
 	return 0;
+
+mbox_err:
+	if (acpi_disabled)
+		mbox_free_channel(ctx->mbox_chan);
+	else
+		pcc_mbox_free_channel(ctx->mbox_chan);
+
+	return rc;
 }
 
 static int xgene_slimpro_i2c_remove(struct platform_device *pdev)
@@ -419,7 +564,10 @@ static int xgene_slimpro_i2c_remove(struct platform_device *pdev)
 
 	i2c_del_adapter(&ctx->adapter);
 
-	mbox_free_channel(ctx->mbox_chan);
+	if (acpi_disabled)
+		mbox_free_channel(ctx->mbox_chan);
+	else
+		pcc_mbox_free_channel(ctx->mbox_chan);
 
 	return 0;
 }
-- 
GitLab


From 6c42778780c40c7db0ee2bb56436cae86e4c1ba4 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 16 May 2017 13:21:05 +0200
Subject: [PATCH 0168/1958] i2c: stub: use pr_fmt

Instead of hard coding "i2c-stub:", let's use the pr_fmt mechanism to
achieve the same more easily. This makes it easier to stay consistent
when adding new messages. Also, remove an unneeded OOM message while we
are here.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-stub.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/i2c/i2c-stub.c b/drivers/i2c/i2c-stub.c
index 06af583d51015..8b0900147f96f 100644
--- a/drivers/i2c/i2c-stub.c
+++ b/drivers/i2c/i2c-stub.c
@@ -15,7 +15,8 @@
     GNU General Public License for more details.
 */
 
-#define DEBUG 1
+#define DEBUG
+#define pr_fmt(fmt) "i2c-stub: " fmt
 
 #include <linux/errno.h>
 #include <linux/i2c.h>
@@ -342,7 +343,7 @@ static int __init i2c_stub_allocate_banks(int i)
 	if (!chip->bank_words)
 		return -ENOMEM;
 
-	pr_debug("i2c-stub: Allocated %u banks of %u words each (registers 0x%02x to 0x%02x)\n",
+	pr_debug("Allocated %u banks of %u words each (registers 0x%02x to 0x%02x)\n",
 		 chip->bank_mask, chip->bank_size, chip->bank_start,
 		 chip->bank_end);
 
@@ -363,28 +364,27 @@ static int __init i2c_stub_init(void)
 	int i, ret;
 
 	if (!chip_addr[0]) {
-		pr_err("i2c-stub: Please specify a chip address\n");
+		pr_err("Please specify a chip address\n");
 		return -ENODEV;
 	}
 
 	for (i = 0; i < MAX_CHIPS && chip_addr[i]; i++) {
 		if (chip_addr[i] < 0x03 || chip_addr[i] > 0x77) {
-			pr_err("i2c-stub: Invalid chip address 0x%02x\n",
+			pr_err("Invalid chip address 0x%02x\n",
 			       chip_addr[i]);
 			return -EINVAL;
 		}
 
-		pr_info("i2c-stub: Virtual chip at 0x%02x\n", chip_addr[i]);
+		pr_info("Virtual chip at 0x%02x\n", chip_addr[i]);
 	}
 
 	/* Allocate memory for all chips at once */
 	stub_chips_nr = i;
 	stub_chips = kcalloc(stub_chips_nr, sizeof(struct stub_chip),
 			     GFP_KERNEL);
-	if (!stub_chips) {
-		pr_err("i2c-stub: Out of memory\n");
+	if (!stub_chips)
 		return -ENOMEM;
-	}
+
 	for (i = 0; i < stub_chips_nr; i++) {
 		INIT_LIST_HEAD(&stub_chips[i].smbus_blocks);
 
-- 
GitLab


From d2f31c49cf7cfe8f02b70614ae56a39b0c1d8a75 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 18 May 2017 23:11:39 +0200
Subject: [PATCH 0169/1958] i2c: sh_mobile: remove platform_data

No platform currently upstream makes use of this platform_data anymore.
The ones that did are converted to DT meanwhile. So, remove it. The old
platforms likely don't have the 'clks_per_cnt' feature, otherwise it
would have been implemented by now. And in the unlikely case they need
to setup a different bus speed, we should rather go for a generic i2c
platform data just for that.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-sh_mobile.c |  8 --------
 include/linux/i2c/i2c-sh_mobile.h  | 11 -----------
 2 files changed, 19 deletions(-)
 delete mode 100644 include/linux/i2c/i2c-sh_mobile.h

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 3d75593487454..d5e39eccae9b9 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -24,7 +24,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
-#include <linux/i2c/i2c-sh_mobile.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -879,7 +878,6 @@ static int sh_mobile_i2c_hook_irqs(struct platform_device *dev, struct sh_mobile
 
 static int sh_mobile_i2c_probe(struct platform_device *dev)
 {
-	struct i2c_sh_mobile_platform_data *pdata = dev_get_platdata(&dev->dev);
 	struct sh_mobile_i2c_data *pd;
 	struct i2c_adapter *adap;
 	struct resource *res;
@@ -910,7 +908,6 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 	if (IS_ERR(pd->reg))
 		return PTR_ERR(pd->reg);
 
-	/* Use platform data bus speed or STANDARD_MODE */
 	ret = of_property_read_u32(dev->dev.of_node, "clock-frequency", &bus_speed);
 	pd->bus_speed = ret ? STANDARD_MODE : bus_speed;
 
@@ -929,11 +926,6 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 			if (config->setup)
 				config->setup(pd);
 		}
-	} else {
-		if (pdata && pdata->bus_speed)
-			pd->bus_speed = pdata->bus_speed;
-		if (pdata && pdata->clks_per_count)
-			pd->clks_per_count = pdata->clks_per_count;
 	}
 
 	/* The IIC blocks on SH-Mobile ARM processors
diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
deleted file mode 100644
index 06e3089795fbe..0000000000000
--- a/include/linux/i2c/i2c-sh_mobile.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __I2C_SH_MOBILE_H__
-#define __I2C_SH_MOBILE_H__
-
-#include <linux/platform_device.h>
-
-struct i2c_sh_mobile_platform_data {
-	unsigned long bus_speed;
-	unsigned int clks_per_count;
-};
-
-#endif /* __I2C_SH_MOBILE_H__ */
-- 
GitLab


From 90b84c057414cbcca53337c64afc348541989d7d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 18 May 2017 23:11:40 +0200
Subject: [PATCH 0170/1958] i2c: sh_mobile: drop needless check for of_node

After removal of platform_data support, we can simplify OF handling.
of_match_device() evaluates to NULL if !CONFIG_OF or if there is no node
pointer for that device, so we can remove the check for the node ptr.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-sh_mobile.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index d5e39eccae9b9..2e097d97d258b 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -881,6 +881,7 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 	struct sh_mobile_i2c_data *pd;
 	struct i2c_adapter *adap;
 	struct resource *res;
+	const struct of_device_id *match;
 	int ret;
 	u32 bus_speed;
 
@@ -910,22 +911,16 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 
 	ret = of_property_read_u32(dev->dev.of_node, "clock-frequency", &bus_speed);
 	pd->bus_speed = ret ? STANDARD_MODE : bus_speed;
-
 	pd->clks_per_count = 1;
 
-	if (dev->dev.of_node) {
-		const struct of_device_id *match;
-
-		match = of_match_device(sh_mobile_i2c_dt_ids, &dev->dev);
-		if (match) {
-			const struct sh_mobile_dt_config *config;
+	match = of_match_device(sh_mobile_i2c_dt_ids, &dev->dev);
+	if (match) {
+		const struct sh_mobile_dt_config *config = match->data;
 
-			config = match->data;
-			pd->clks_per_count = config->clks_per_count;
+		pd->clks_per_count = config->clks_per_count;
 
-			if (config->setup)
-				config->setup(pd);
-		}
+		if (config->setup)
+			config->setup(pd);
 	}
 
 	/* The IIC blocks on SH-Mobile ARM processors
-- 
GitLab


From f9831bfec7414d8f54f064b6b21de0685f107a47 Mon Sep 17 00:00:00 2001
From: Michael Thalmeier <michael.thalmeier@hale.at>
Date: Wed, 31 May 2017 11:40:03 +0200
Subject: [PATCH 0171/1958] i2c: mxs: change error printing to debug for
 mxs_i2c_pio_wait_xfer_end

Instead of printing errors after mxs_i2c_pio_wait_xfer_end returns with
an error code just print a debug message.

NAKs and timeouts can occur in this situation normally, so do not treat
them as errors.

Signed-off-by: Michael Thalmeier <michael.thalmeier@hale.at>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-mxs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index 5738556b6aac2..d4e8f1954f23f 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -419,7 +419,7 @@ static int mxs_i2c_pio_setup_xfer(struct i2c_adapter *adap,
 
 		ret = mxs_i2c_pio_wait_xfer_end(i2c);
 		if (ret) {
-			dev_err(i2c->dev,
+			dev_dbg(i2c->dev,
 				"PIO: Failed to send SELECT command!\n");
 			goto cleanup;
 		}
@@ -431,7 +431,7 @@ static int mxs_i2c_pio_setup_xfer(struct i2c_adapter *adap,
 
 		ret = mxs_i2c_pio_wait_xfer_end(i2c);
 		if (ret) {
-			dev_err(i2c->dev,
+			dev_dbg(i2c->dev,
 				"PIO: Failed to send READ command!\n");
 			goto cleanup;
 		}
@@ -528,7 +528,7 @@ static int mxs_i2c_pio_setup_xfer(struct i2c_adapter *adap,
 			/* Wait for the end of the transfer. */
 			ret = mxs_i2c_pio_wait_xfer_end(i2c);
 			if (ret) {
-				dev_err(i2c->dev,
+				dev_dbg(i2c->dev,
 					"PIO: Failed to finish WRITE cmd!\n");
 				break;
 			}
-- 
GitLab


From 77a73f3caece06d1d17268f43ec39c5779e6ce24 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 2 Jun 2017 13:57:03 +0200
Subject: [PATCH 0172/1958] rtc: sysfs: make name uniform

The name sysfs attribute is not useful in its current form because of all
the drivers:
 - 3 are using the feature correctly
 - 2 are clearly misusing it
 - 60 are using driver.name, either directly or indirectly
 - 46 are using pdev->name
 - 8 are using client->name
 - 31 are using a variation of driver.name (addition or removal of rtc-,
   -rtc, _rtc, rtc_)

Make it uniform and use the driver name and the device name.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-sysfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 1218d5d4224dd..e364550eb9a7f 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -27,7 +27,8 @@
 static ssize_t
 name_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%s\n", to_rtc_device(dev)->name);
+	return sprintf(buf, "%s %s\n", dev_driver_string(dev->parent),
+		       dev_name(dev->parent));
 }
 static DEVICE_ATTR_RO(name);
 
-- 
GitLab


From 9f8606533b961b06c258cf4804e3379aa14c5951 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 2 Jun 2017 14:01:37 +0200
Subject: [PATCH 0173/1958] rtc: dev: remove rtc->name from debug message

rtc->name is superfluous here because the rtc is already registered at that
point and its name has already been printed.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index e81a8711fea74..794bc4fa49371 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -464,7 +464,7 @@ void rtc_dev_prepare(struct rtc_device *rtc)
 		return;
 
 	if (rtc->id >= RTC_DEV_MAX) {
-		dev_dbg(&rtc->dev, "%s: too many RTC devices\n", rtc->name);
+		dev_dbg(&rtc->dev, "too many RTC devices\n");
 		return;
 	}
 
-- 
GitLab


From f09e706992bd031f0d94b77e40074e7fea5c89a9 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 2 Jun 2017 14:03:39 +0200
Subject: [PATCH 0174/1958] rtc: pcf8563: avoid using rtc->name

pcf8563->rtc->name is a copy of pcf8563_driver.driver.name, use it instead

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-pcf8563.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 1227ceab61eec..cea6ea4df970f 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -606,7 +606,7 @@ static int pcf8563_probe(struct i2c_client *client,
 		err = devm_request_threaded_irq(&client->dev, client->irq,
 				NULL, pcf8563_irq,
 				IRQF_SHARED|IRQF_ONESHOT|IRQF_TRIGGER_FALLING,
-				pcf8563->rtc->name, client);
+				pcf8563_driver.driver.name, client);
 		if (err) {
 			dev_err(&client->dev, "unable to request IRQ %d\n",
 								client->irq);
-- 
GitLab


From 4b9e2a0c0591d2789a43414b8299466800873aea Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 2 Jun 2017 14:13:21 +0200
Subject: [PATCH 0175/1958] rtc: ds1307: avoid using rtc-name

ds1307->rtc->name is a copy of ds1307->name, use it instead.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index a264938500afd..16ace2fb0c0a8 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -1567,7 +1567,7 @@ static int ds1307_probe(struct i2c_client *client,
 		err = devm_request_threaded_irq(ds1307->dev,
 						ds1307->irq, NULL, irq_handler,
 						IRQF_SHARED | IRQF_ONESHOT,
-						ds1307->rtc->name, ds1307);
+						ds1307->name, ds1307);
 		if (err) {
 			client->irq = 0;
 			device_set_wakeup_capable(ds1307->dev, false);
-- 
GitLab


From 5c82a6ae0242416cfead597bb2b42aa3481a0ba7 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 2 Jun 2017 14:15:15 +0200
Subject: [PATCH 0176/1958] rtc: remove rtc_device.name

rtc->name is only used in messages were it is superfluous. Remove it
completely from the structure.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c | 7 +++----
 include/linux/rtc.h | 1 -
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 5fb439897fe1a..543c64cd3df02 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -224,7 +224,6 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	rtc->pie_timer.function = rtc_pie_update_irq;
 	rtc->pie_enabled = 0;
 
-	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
 	dev_set_name(&rtc->dev, "rtc%d", id);
 
 	/* Check to see if there is an ALARM already set in hw */
@@ -238,20 +237,20 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	err = cdev_device_add(&rtc->char_dev, &rtc->dev);
 	if (err) {
 		dev_warn(&rtc->dev, "%s: failed to add char device %d:%d\n",
-			 rtc->name, MAJOR(rtc->dev.devt), rtc->id);
+			 name, MAJOR(rtc->dev.devt), rtc->id);
 
 		/* This will free both memory and the ID */
 		put_device(&rtc->dev);
 		goto exit;
 	} else {
-		dev_dbg(&rtc->dev, "%s: dev (%d:%d)\n", rtc->name,
+		dev_dbg(&rtc->dev, "%s: dev (%d:%d)\n", name,
 			MAJOR(rtc->dev.devt), rtc->id);
 	}
 
 	rtc_proc_add_device(rtc);
 
 	dev_info(dev, "rtc core: registered %s as %s\n",
-			rtc->name, dev_name(&rtc->dev));
+			name, dev_name(&rtc->dev));
 
 	return rtc;
 
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b693adac853b2..d354f56e0cf54 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -116,7 +116,6 @@ struct rtc_device {
 	struct module *owner;
 
 	int id;
-	char name[RTC_DEVICE_NAME_SIZE];
 
 	const struct rtc_class_ops *ops;
 	struct mutex ops_lock;
-- 
GitLab


From 39bd56df7b2dae80d4099c64ad776775a3876ed5 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 2 Mar 2017 18:31:13 +0100
Subject: [PATCH 0177/1958] watchodg: sama5d4: simplify probe

Because the only way to use the driver is to have a device tree enabling
it, pdev->dev.of_node will never be NULL. Remove the unnecessary check.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/sama5d4_wdt.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
index 362fd229786df..d710014f3b7d5 100644
--- a/drivers/watchdog/sama5d4_wdt.c
+++ b/drivers/watchdog/sama5d4_wdt.c
@@ -228,15 +228,13 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 
 	wdt->reg_base = regs;
 
-	if (pdev->dev.of_node) {
-		irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
-		if (!irq)
-			dev_warn(&pdev->dev, "failed to get IRQ from DT\n");
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (!irq)
+		dev_warn(&pdev->dev, "failed to get IRQ from DT\n");
 
-		ret = of_sama5d4_wdt_init(pdev->dev.of_node, wdt);
-		if (ret)
-			return ret;
-	}
+	ret = of_sama5d4_wdt_init(pdev->dev.of_node, wdt);
+	if (ret)
+		return ret;
 
 	if ((wdt->mr & AT91_WDT_WDFIEN) && irq) {
 		ret = devm_request_irq(&pdev->dev, irq, sama5d4_wdt_irq_handler,
-- 
GitLab


From 5dca80f63eaf18eca2ba3ebf61056feb66103951 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 2 Mar 2017 18:31:14 +0100
Subject: [PATCH 0178/1958] watchdog: sama5d4: Add comment explaining what
 happens on resume

Because suspending to RAM may lose the register values, they are restored
on resume. This is currently done unconditionally because there is
currently no way to know (from the driver) whether they have really been
lost or are still valid. Writing MR also pings the watchdog and this may
not be what is expected so add a comment explaining why it happens.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/sama5d4_wdt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
index d710014f3b7d5..0ae947c3d7bcb 100644
--- a/drivers/watchdog/sama5d4_wdt.c
+++ b/drivers/watchdog/sama5d4_wdt.c
@@ -300,6 +300,11 @@ static int sama5d4_wdt_resume(struct device *dev)
 {
 	struct sama5d4_wdt *wdt = dev_get_drvdata(dev);
 
+	/*
+	 * FIXME: writing MR also pings the watchdog which may not be desired.
+	 * This should only be done when the registers are lost on suspend but
+	 * there is no way to get this information right now.
+	 */
 	sama5d4_wdt_init(wdt);
 
 	return 0;
-- 
GitLab


From aea24187f65e8adb00b2be949cd809fcb2aa241c Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Sat, 4 Mar 2017 17:37:35 -0500
Subject: [PATCH 0179/1958] watchdog: add rza_wdt driver

Adds a watchdog timer driver for the Renesas RZ/A Series SoCs. A reset
handler is also included since a WDT overflow is the only method for
restarting an RZ/A SoC.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig   |   8 ++
 drivers/watchdog/Makefile  |   1 +
 drivers/watchdog/rza_wdt.c | 199 +++++++++++++++++++++++++++++++++++++
 3 files changed, 208 insertions(+)
 create mode 100644 drivers/watchdog/rza_wdt.c

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 8b9049dac0948..1fdb5a365413b 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -721,6 +721,14 @@ config RENESAS_WDT
 	  This driver adds watchdog support for the integrated watchdogs in the
 	  Renesas R-Car and other SH-Mobile SoCs (usually named RWDT or SWDT).
 
+config RENESAS_RZAWDT
+	tristate "Renesas RZ/A WDT Watchdog"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	select WATCHDOG_CORE
+	help
+	  This driver adds watchdog support for the integrated watchdogs in the
+	  Renesas RZ/A SoCs. These watchdogs can be used to reset a system.
+
 config ASPEED_WATCHDOG
 	tristate "Aspeed 2400 watchdog support"
 	depends on ARCH_ASPEED || COMPILE_TEST
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index a2126e2a99ae8..e23fb4b51878b 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_LPC18XX_WATCHDOG) += lpc18xx_wdt.o
 obj-$(CONFIG_BCM7038_WDT) += bcm7038_wdt.o
 obj-$(CONFIG_ATLAS7_WATCHDOG) += atlas7_wdt.o
 obj-$(CONFIG_RENESAS_WDT) += renesas_wdt.o
+obj-$(CONFIG_RENESAS_RZAWDT) += rza_wdt.o
 obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
 obj-$(CONFIG_ZX2967_WATCHDOG) += zx2967_wdt.o
 
diff --git a/drivers/watchdog/rza_wdt.c b/drivers/watchdog/rza_wdt.c
new file mode 100644
index 0000000000000..e618218d23742
--- /dev/null
+++ b/drivers/watchdog/rza_wdt.c
@@ -0,0 +1,199 @@
+/*
+ * Renesas RZ/A Series WDT Driver
+ *
+ * Copyright (C) 2017 Renesas Electronics America, Inc.
+ * Copyright (C) 2017 Chris Brandt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+#define DEFAULT_TIMEOUT		30
+
+/* Watchdog Timer Registers */
+#define WTCSR			0
+#define WTCSR_MAGIC		0xA500
+#define WTSCR_WT		BIT(6)
+#define WTSCR_TME		BIT(5)
+#define WTSCR_CKS(i)		(i)
+
+#define WTCNT			2
+#define WTCNT_MAGIC		0x5A00
+
+#define WRCSR			4
+#define WRCSR_MAGIC		0x5A00
+#define WRCSR_RSTE		BIT(6)
+#define WRCSR_CLEAR_WOVF	0xA500	/* special value */
+
+struct rza_wdt {
+	struct watchdog_device wdev;
+	void __iomem *base;
+	struct clk *clk;
+};
+
+static int rza_wdt_start(struct watchdog_device *wdev)
+{
+	struct rza_wdt *priv = watchdog_get_drvdata(wdev);
+
+	/* Stop timer */
+	writew(WTCSR_MAGIC | 0, priv->base + WTCSR);
+
+	/* Must dummy read WRCSR:WOVF at least once before clearing */
+	readb(priv->base + WRCSR);
+	writew(WRCSR_CLEAR_WOVF, priv->base + WRCSR);
+
+	/*
+	 * Start timer with slowest clock source and reset option enabled.
+	 */
+	writew(WRCSR_MAGIC | WRCSR_RSTE, priv->base + WRCSR);
+	writew(WTCNT_MAGIC | 0, priv->base + WTCNT);
+	writew(WTCSR_MAGIC | WTSCR_WT | WTSCR_TME | WTSCR_CKS(7),
+	       priv->base + WTCSR);
+
+	return 0;
+}
+
+static int rza_wdt_stop(struct watchdog_device *wdev)
+{
+	struct rza_wdt *priv = watchdog_get_drvdata(wdev);
+
+	writew(WTCSR_MAGIC | 0, priv->base + WTCSR);
+
+	return 0;
+}
+
+static int rza_wdt_ping(struct watchdog_device *wdev)
+{
+	struct rza_wdt *priv = watchdog_get_drvdata(wdev);
+
+	writew(WTCNT_MAGIC | 0, priv->base + WTCNT);
+
+	return 0;
+}
+
+static int rza_wdt_restart(struct watchdog_device *wdev, unsigned long action,
+			    void *data)
+{
+	struct rza_wdt *priv = watchdog_get_drvdata(wdev);
+
+	/* Stop timer */
+	writew(WTCSR_MAGIC | 0, priv->base + WTCSR);
+
+	/* Must dummy read WRCSR:WOVF at least once before clearing */
+	readb(priv->base + WRCSR);
+	writew(WRCSR_CLEAR_WOVF, priv->base + WRCSR);
+
+	/*
+	 * Start timer with fastest clock source and only 1 clock left before
+	 * overflow with reset option enabled.
+	 */
+	writew(WRCSR_MAGIC | WRCSR_RSTE, priv->base + WRCSR);
+	writew(WTCNT_MAGIC | 255, priv->base + WTCNT);
+	writew(WTCSR_MAGIC | WTSCR_WT | WTSCR_TME, priv->base + WTCSR);
+
+	/*
+	 * Actually make sure the above sequence hits hardware before sleeping.
+	 */
+	wmb();
+
+	/* Wait for WDT overflow (reset) */
+	udelay(20);
+
+	return 0;
+}
+
+static const struct watchdog_info rza_wdt_ident = {
+	.options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+	.identity = "Renesas RZ/A WDT Watchdog",
+};
+
+static const struct watchdog_ops rza_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = rza_wdt_start,
+	.stop = rza_wdt_stop,
+	.ping = rza_wdt_ping,
+	.restart = rza_wdt_restart,
+};
+
+static int rza_wdt_probe(struct platform_device *pdev)
+{
+	struct rza_wdt *priv;
+	struct resource *res;
+	unsigned long rate;
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
+	rate = clk_get_rate(priv->clk);
+	if (rate < 16384) {
+		dev_err(&pdev->dev, "invalid clock rate (%ld)\n", rate);
+		return -ENOENT;
+	}
+
+	/* Assume slowest clock rate possible (CKS=7) */
+	rate /= 16384;
+
+	priv->wdev.info = &rza_wdt_ident,
+	priv->wdev.ops = &rza_wdt_ops,
+	priv->wdev.parent = &pdev->dev;
+
+	/*
+	 * Since the max possible timeout of our 8-bit count register is less
+	 * than a second, we must use max_hw_heartbeat_ms.
+	 */
+	priv->wdev.max_hw_heartbeat_ms = (1000 * U8_MAX) / rate;
+	dev_dbg(&pdev->dev, "max hw timeout of %dms\n",
+		 priv->wdev.max_hw_heartbeat_ms);
+
+	priv->wdev.min_timeout = 1;
+	priv->wdev.timeout = DEFAULT_TIMEOUT;
+
+	watchdog_init_timeout(&priv->wdev, 0, &pdev->dev);
+	watchdog_set_drvdata(&priv->wdev, priv);
+
+	ret = devm_watchdog_register_device(&pdev->dev, &priv->wdev);
+	if (ret)
+		dev_err(&pdev->dev, "Cannot register watchdog device\n");
+
+	return ret;
+}
+
+static const struct of_device_id rza_wdt_of_match[] = {
+	{ .compatible = "renesas,rza-wdt", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rza_wdt_of_match);
+
+static struct platform_driver rza_wdt_driver = {
+	.probe = rza_wdt_probe,
+	.driver = {
+		.name = "rza_wdt",
+		.of_match_table = rza_wdt_of_match,
+	},
+};
+
+module_platform_driver(rza_wdt_driver);
+
+MODULE_DESCRIPTION("Renesas RZ/A WDT Driver");
+MODULE_AUTHOR("Chris Brandt <chris.brandt@renesas.com>");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 0ab1380804ad709ced066eb10568110987e42570 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Sat, 4 Mar 2017 17:37:36 -0500
Subject: [PATCH 0180/1958] watchdog: renesas-wdt: add support for rza

Describe the WDT hardware in the RZ/A series.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 Documentation/devicetree/bindings/watchdog/renesas-wdt.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
index da24e31334177..9e306afbbd49e 100644
--- a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
@@ -2,10 +2,11 @@ Renesas Watchdog Timer (WDT) Controller
 
 Required properties:
 - compatible : Should be "renesas,<soctype>-wdt", and
-	       "renesas,rcar-gen3-wdt" as fallback.
+	       "renesas,rcar-gen3-wdt" or "renesas,rza-wdt" as fallback.
 	       Examples with soctypes are:
 	         - "renesas,r8a7795-wdt" (R-Car H3)
 	         - "renesas,r8a7796-wdt" (R-Car M3-W)
+	         - "renesas,r7s72100-wdt" (RZ/A1)
 
   When compatible with the generic version, nodes must list the SoC-specific
   version corresponding to the platform first, followed by the generic
@@ -17,6 +18,7 @@ Required properties:
 Optional properties:
 - timeout-sec : Contains the watchdog timeout in seconds
 - power-domains : the power domain the WDT belongs to
+- interrupts: Some WDTs have an interrupt when used in interval timer mode
 
 Examples:
 
-- 
GitLab


From 954351e8707bcdf6091cc55dab4e9e2453de6655 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 11 Mar 2017 00:22:17 +0200
Subject: [PATCH 0181/1958] watchdog: intel-mid_wdt: Keep watchdog running

Firmware followed by bootloader leaves watchdog running.

Keep it running in the driver.

User will not need any additional options to reboot in case of panic
during boot.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/intel-mid_wdt.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
index 45e4d02221b58..72c108a12c19d 100644
--- a/drivers/watchdog/intel-mid_wdt.c
+++ b/drivers/watchdog/intel-mid_wdt.c
@@ -147,8 +147,21 @@ static int mid_wdt_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	/* Make sure the watchdog is not running */
-	wdt_stop(wdt_dev);
+	/*
+	 * The firmware followed by U-Boot leaves the watchdog running
+	 * with the default threshold which may vary. When we get here
+	 * we should make a decision to prevent any side effects before
+	 * user space daemon will take care of it. The best option,
+	 * taking into consideration that there is no way to read values
+	 * back from hardware, is to enforce watchdog being run with
+	 * deterministic values.
+	 */
+	ret = wdt_start(wdt_dev);
+	if (ret)
+		return ret;
+
+	/* Make sure the watchdog is serviced */
+	set_bit(WDOG_HW_RUNNING, &wdt_dev->status);
 
 	ret = devm_watchdog_register_device(&pdev->dev, wdt_dev);
 	if (ret) {
-- 
GitLab


From 58415efe96670b858eb7b5ab066881ae3f1dad25 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Mon, 13 Mar 2017 21:07:24 +0200
Subject: [PATCH 0182/1958] watchdog: s3c2410: Constify local structures

Structures watchdog_device, watchdog_ops and s3c2410_wdt_variant are not
modified so they can be made const to increase code safeness.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/s3c2410_wdt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 6ed97596ca806..52b66bcdd1ef1 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -131,7 +131,7 @@ struct s3c2410_wdt {
 	unsigned long		wtdat_save;
 	struct watchdog_device	wdt_device;
 	struct notifier_block	freq_transition;
-	struct s3c2410_wdt_variant *drv_data;
+	const struct s3c2410_wdt_variant *drv_data;
 	struct regmap *pmureg;
 };
 
@@ -401,7 +401,7 @@ static const struct watchdog_ops s3c2410wdt_ops = {
 	.restart = s3c2410wdt_restart,
 };
 
-static struct watchdog_device s3c2410_wdd = {
+static const struct watchdog_device s3c2410_wdd = {
 	.info = &s3c2410_wdt_ident,
 	.ops = &s3c2410wdt_ops,
 	.timeout = S3C2410_WATCHDOG_DEFAULT_TIME,
@@ -507,7 +507,7 @@ static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt)
 	return 0;
 }
 
-static inline struct s3c2410_wdt_variant *
+static inline const struct s3c2410_wdt_variant *
 s3c2410_get_wdt_drv_data(struct platform_device *pdev)
 {
 	if (pdev->dev.of_node) {
-- 
GitLab


From a9a02c46624ad112eb07ca26b6a025c1358ed3ad Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Mon, 13 Mar 2017 21:07:25 +0200
Subject: [PATCH 0183/1958] watchdog: s3c2410: Simplify getting driver data

Simplify the flow in helper function for getting the driver data by
using of_device_get_match_data() and only one if() branch.

The code should be equivalent.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/s3c2410_wdt.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 52b66bcdd1ef1..0532dc93e6000 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/delay.h>
@@ -510,14 +511,16 @@ static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt)
 static inline const struct s3c2410_wdt_variant *
 s3c2410_get_wdt_drv_data(struct platform_device *pdev)
 {
-	if (pdev->dev.of_node) {
-		const struct of_device_id *match;
-		match = of_match_node(s3c2410_wdt_match, pdev->dev.of_node);
-		return (struct s3c2410_wdt_variant *)match->data;
-	} else {
-		return (struct s3c2410_wdt_variant *)
-			platform_get_device_id(pdev)->driver_data;
+	const struct s3c2410_wdt_variant *variant;
+
+	variant = of_device_get_match_data(&pdev->dev);
+	if (!variant) {
+		/* Device matched by platform_device_id */
+		variant = (struct s3c2410_wdt_variant *)
+			   platform_get_device_id(pdev)->driver_data;
 	}
+
+	return variant;
 }
 
 static int s3c2410wdt_probe(struct platform_device *pdev)
-- 
GitLab


From 08497f22b15affcf08fb2d1173caa73e7ad54df7 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Mon, 13 Mar 2017 21:07:26 +0200
Subject: [PATCH 0184/1958] watchdog: s3c2410: Minor code cleanup

Cleanup the code from minor readability issues (no functional changes):
1. Fix checkpatch: ERROR: Do not include the paragraph about writing to
   the Free Software Foundation's mailing address.
2. Fix checkpatch: WARNING: quoted string split across lines
3. Fix chechpatch: WARNING: Prefer 'unsigned int' to bare use of
   'unsigned'
4. Use 'dev' consistently in probe function instead of mixing dev with
   &pdev->dev.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/s3c2410_wdt.c | 35 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 0532dc93e6000..adaa43543f0a7 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -1,5 +1,4 @@
-/* linux/drivers/char/watchdog/s3c2410_wdt.c
- *
+/*
  * Copyright (c) 2004 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
@@ -17,11 +16,7 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
+ */
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -95,8 +90,7 @@ MODULE_PARM_DESC(tmr_atboot,
 			__MODULE_STRING(S3C2410_WATCHDOG_ATBOOT));
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 			__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, "
-			"0 to reboot (default 0)");
+MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, 0 to reboot (default 0)");
 
 /**
  * struct s3c2410_wdt_variant - Per-variant config data
@@ -311,7 +305,8 @@ static inline int s3c2410wdt_is_running(struct s3c2410_wdt *wdt)
 	return readl(wdt->reg_base + S3C2410_WTCON) & S3C2410_WTCON_ENABLE;
 }
 
-static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeout)
+static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd,
+				    unsigned int timeout)
 {
 	struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
 	unsigned long freq = clk_get_rate(wdt->clock);
@@ -525,7 +520,7 @@ s3c2410_get_wdt_drv_data(struct platform_device *pdev)
 
 static int s3c2410wdt_probe(struct platform_device *pdev)
 {
-	struct device *dev;
+	struct device *dev = &pdev->dev;
 	struct s3c2410_wdt *wdt;
 	struct resource *wdt_mem;
 	struct resource *wdt_irq;
@@ -533,13 +528,11 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	int started = 0;
 	int ret;
 
-	dev = &pdev->dev;
-
 	wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
 	if (!wdt)
 		return -ENOMEM;
 
-	wdt->dev = &pdev->dev;
+	wdt->dev = dev;
 	spin_lock_init(&wdt->lock);
 	wdt->wdt_device = s3c2410_wdd;
 
@@ -595,7 +588,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	/* see if we can actually set the requested timer margin, and if
 	 * not, try the default value */
 
-	watchdog_init_timeout(&wdt->wdt_device, tmr_margin, &pdev->dev);
+	watchdog_init_timeout(&wdt->wdt_device, tmr_margin, dev);
 	ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
 					wdt->wdt_device.timeout);
 	if (ret) {
@@ -604,11 +597,10 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 
 		if (started == 0)
 			dev_info(dev,
-			   "tmr_margin value out of range, default %d used\n",
-			       S3C2410_WATCHDOG_DEFAULT_TIME);
+				 "tmr_margin value out of range, default %d used\n",
+				 S3C2410_WATCHDOG_DEFAULT_TIME);
 		else
-			dev_info(dev, "default timer value is out of range, "
-							"cannot start\n");
+			dev_info(dev, "default timer value is out of range, cannot start\n");
 	}
 
 	ret = devm_request_irq(dev, wdt_irq->start, s3c2410wdt_irq, 0,
@@ -622,7 +614,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	watchdog_set_restart_priority(&wdt->wdt_device, 128);
 
 	wdt->wdt_device.bootstatus = s3c2410wdt_get_bootstatus(wdt);
-	wdt->wdt_device.parent = &pdev->dev;
+	wdt->wdt_device.parent = dev;
 
 	ret = watchdog_register_device(&wdt->wdt_device);
 	if (ret) {
@@ -757,7 +749,6 @@ static struct platform_driver s3c2410wdt_driver = {
 
 module_platform_driver(s3c2410wdt_driver);
 
-MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>, "
-	      "Dimitry Andric <dimitry.andric@tomtom.com>");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>, Dimitry Andric <dimitry.andric@tomtom.com>");
 MODULE_DESCRIPTION("S3C2410 Watchdog Device Driver");
 MODULE_LICENSE("GPL");
-- 
GitLab


From ecd94a41debfd21961c9f59f639dce26f2c779f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 29 Mar 2017 17:34:24 +0200
Subject: [PATCH 0185/1958] watchdog: orion: make license info match the file
 header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The header says: This file is licensed under the terms of the GNU
General Public License version 2. The right identifier for
MODULE_LICENSE is "GPL v2" then.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/orion_wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index 39be4dd8035ed..83af7d6cc37cc 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c
@@ -651,5 +651,5 @@ module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:orion_wdt");
-- 
GitLab


From 03bca15833f2865b11835b7f5bfa594d1aaacecc Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 28 Feb 2016 13:12:23 -0800
Subject: [PATCH 0186/1958] watchdog: gpio: Convert to use infrastructure
 triggered keepalives

The watchdog infrastructure now supports handling watchdog keepalive
if the watchdog is running while the watchdog device is closed.
The infrastructure now also supports generating additional heartbeats
if the maximum hardware timeout is smaller than or close to the
configured timeout. Convert the driver to use this infrastructure.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/gpio_wdt.c | 73 +++++++------------------------------
 1 file changed, 13 insertions(+), 60 deletions(-)

diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c
index 93457cabc1786..cb66c2f99ff15 100644
--- a/drivers/watchdog/gpio_wdt.c
+++ b/drivers/watchdog/gpio_wdt.c
@@ -18,7 +18,6 @@
 
 #define SOFT_TIMEOUT_MIN	1
 #define SOFT_TIMEOUT_DEF	60
-#define SOFT_TIMEOUT_MAX	0xffff
 
 enum {
 	HW_ALGO_TOGGLE,
@@ -30,11 +29,7 @@ struct gpio_wdt_priv {
 	bool			active_low;
 	bool			state;
 	bool			always_running;
-	bool			armed;
 	unsigned int		hw_algo;
-	unsigned int		hw_margin;
-	unsigned long		last_jiffies;
-	struct timer_list	timer;
 	struct watchdog_device	wdd;
 };
 
@@ -47,21 +42,10 @@ static void gpio_wdt_disable(struct gpio_wdt_priv *priv)
 		gpio_direction_input(priv->gpio);
 }
 
-static void gpio_wdt_hwping(unsigned long data)
+static int gpio_wdt_ping(struct watchdog_device *wdd)
 {
-	struct watchdog_device *wdd = (struct watchdog_device *)data;
 	struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-	if (priv->armed && time_after(jiffies, priv->last_jiffies +
-				      msecs_to_jiffies(wdd->timeout * 1000))) {
-		dev_crit(wdd->parent,
-			 "Timer expired. System will reboot soon!\n");
-		return;
-	}
-
-	/* Restart timer */
-	mod_timer(&priv->timer, jiffies + priv->hw_margin);
-
 	switch (priv->hw_algo) {
 	case HW_ALGO_TOGGLE:
 		/* Toggle output pin */
@@ -75,55 +59,33 @@ static void gpio_wdt_hwping(unsigned long data)
 		gpio_set_value_cansleep(priv->gpio, priv->active_low);
 		break;
 	}
-}
-
-static void gpio_wdt_start_impl(struct gpio_wdt_priv *priv)
-{
-	priv->state = priv->active_low;
-	gpio_direction_output(priv->gpio, priv->state);
-	priv->last_jiffies = jiffies;
-	gpio_wdt_hwping((unsigned long)&priv->wdd);
+	return 0;
 }
 
 static int gpio_wdt_start(struct watchdog_device *wdd)
 {
 	struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-	gpio_wdt_start_impl(priv);
-	priv->armed = true;
+	priv->state = priv->active_low;
+	gpio_direction_output(priv->gpio, priv->state);
 
-	return 0;
+	set_bit(WDOG_HW_RUNNING, &wdd->status);
+
+	return gpio_wdt_ping(wdd);
 }
 
 static int gpio_wdt_stop(struct watchdog_device *wdd)
 {
 	struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-	priv->armed = false;
 	if (!priv->always_running) {
-		mod_timer(&priv->timer, 0);
 		gpio_wdt_disable(priv);
+		clear_bit(WDOG_HW_RUNNING, &wdd->status);
 	}
 
 	return 0;
 }
 
-static int gpio_wdt_ping(struct watchdog_device *wdd)
-{
-	struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
-
-	priv->last_jiffies = jiffies;
-
-	return 0;
-}
-
-static int gpio_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
-{
-	wdd->timeout = t;
-
-	return gpio_wdt_ping(wdd);
-}
-
 static const struct watchdog_info gpio_wdt_ident = {
 	.options	= WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING |
 			  WDIOF_SETTIMEOUT,
@@ -135,7 +97,6 @@ static const struct watchdog_ops gpio_wdt_ops = {
 	.start		= gpio_wdt_start,
 	.stop		= gpio_wdt_stop,
 	.ping		= gpio_wdt_ping,
-	.set_timeout	= gpio_wdt_set_timeout,
 };
 
 static int gpio_wdt_probe(struct platform_device *pdev)
@@ -185,9 +146,6 @@ static int gpio_wdt_probe(struct platform_device *pdev)
 	if (hw_margin < 2 || hw_margin > 65535)
 		return -EINVAL;
 
-	/* Use safe value (1/2 of real timeout) */
-	priv->hw_margin = msecs_to_jiffies(hw_margin / 2);
-
 	priv->always_running = of_property_read_bool(pdev->dev.of_node,
 						     "always-running");
 
@@ -196,31 +154,26 @@ static int gpio_wdt_probe(struct platform_device *pdev)
 	priv->wdd.info		= &gpio_wdt_ident;
 	priv->wdd.ops		= &gpio_wdt_ops;
 	priv->wdd.min_timeout	= SOFT_TIMEOUT_MIN;
-	priv->wdd.max_timeout	= SOFT_TIMEOUT_MAX;
+	priv->wdd.max_hw_heartbeat_ms = hw_margin;
 	priv->wdd.parent	= &pdev->dev;
 
 	if (watchdog_init_timeout(&priv->wdd, 0, &pdev->dev) < 0)
 		priv->wdd.timeout = SOFT_TIMEOUT_DEF;
 
-	setup_timer(&priv->timer, gpio_wdt_hwping, (unsigned long)&priv->wdd);
-
 	watchdog_stop_on_reboot(&priv->wdd);
 
-	ret = watchdog_register_device(&priv->wdd);
-	if (ret)
-		return ret;
-
 	if (priv->always_running)
-		gpio_wdt_start_impl(priv);
+		gpio_wdt_start(&priv->wdd);
 
-	return 0;
+	ret = watchdog_register_device(&priv->wdd);
+
+	return ret;
 }
 
 static int gpio_wdt_remove(struct platform_device *pdev)
 {
 	struct gpio_wdt_priv *priv = platform_get_drvdata(pdev);
 
-	del_timer_sync(&priv->timer);
 	watchdog_unregister_device(&priv->wdd);
 
 	return 0;
-- 
GitLab


From f66872b5be147b32806dc98d808ee784fe7965c7 Mon Sep 17 00:00:00 2001
From: Steve Twiss <stwiss.opensource@diasemi.com>
Date: Tue, 28 Mar 2017 15:43:30 +0100
Subject: [PATCH 0187/1958] Documentation: devicetree: watchdog: da9062/61
 watchdog timer binding

Add binding information for DA9062 and DA9061 watchdog.

Example bindings for both DA9062 and DA9061 devices are added. For
the DA9061 device, a fallback compatible line is added as a valid
combination of compatible strings.

The original binding for DA9062 (only) used to reside inside the
Documentation/devicetree/bindings/mfd/da9062.txt MFD document.
The da9062-watchdog section was deleted in that file and replaced
with a link to the new DA9061/62 binding information stored in this
patch.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Steve Twiss <stwiss.opensource@diasemi.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 .../bindings/watchdog/da9062-wdt.txt          | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/watchdog/da9062-wdt.txt

diff --git a/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt b/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt
new file mode 100644
index 0000000000000..b935b526d2f37
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt
@@ -0,0 +1,23 @@
+* Dialog Semiconductor DA9062/61 Watchdog Timer
+
+Required properties:
+
+- compatible: should be one of the following valid compatible string lines:
+	"dlg,da9061-watchdog", "dlg,da9062-watchdog"
+	"dlg,da9062-watchdog"
+
+Example: DA9062
+
+	pmic0: da9062@58 {
+		watchdog {
+			compatible = "dlg,da9062-watchdog";
+		};
+	};
+
+Example: DA9061 using a fall-back compatible for the DA9062 watchdog driver
+
+	pmic0: da9061@58 {
+		watchdog {
+			compatible = "dlg,da9061-watchdog", "dlg,da9062-watchdog";
+		};
+	};
-- 
GitLab


From 5df11427b2d20ac1f4fdd1bcb6319743a4b6797e Mon Sep 17 00:00:00 2001
From: Yannick Fertre <yannick.fertre@st.com>
Date: Thu, 6 Apr 2017 14:19:24 +0200
Subject: [PATCH 0188/1958] dt-bindings: watchdog: Document STM32 IWDG bindings

This adds documentation of device tree bindings for the STM32 IWDG
(Independent WatchDoG).

Signed-off-by: Yannick Fertre <yannick.fertre@st.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 .../bindings/watchdog/st,stm32-iwdg.txt       | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt

diff --git a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt
new file mode 100644
index 0000000000000..cc13b10a3f823
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt
@@ -0,0 +1,19 @@
+STM32 Independent WatchDoG (IWDG)
+---------------------------------
+
+Required properties:
+- compatible: "st,stm32-iwdg"
+- reg: physical base address and length of the registers set for the device
+- clocks: must contain a single entry describing the clock input
+
+Optional Properties:
+- timeout-sec: Watchdog timeout value in seconds.
+
+Example:
+
+iwdg: watchdog@40003000 {
+	compatible = "st,stm32-iwdg";
+	reg = <0x40003000 0x400>;
+	clocks = <&clk_lsi>;
+	timeout-sec = <32>;
+};
-- 
GitLab


From 4332d113c66a6ecb3702cb8f265adbbe654f9d5f Mon Sep 17 00:00:00 2001
From: Yannick Fertre <yannick.fertre@st.com>
Date: Thu, 6 Apr 2017 14:19:25 +0200
Subject: [PATCH 0189/1958] watchdog: Add STM32 IWDG driver

This patch adds IWDG (Independent WatchDoG) support for STM32 platform.

Signed-off-by: Yannick FERTRE <yannick.fertre@st.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig      |  12 ++
 drivers/watchdog/Makefile     |   1 +
 drivers/watchdog/stm32_iwdg.c | 253 ++++++++++++++++++++++++++++++++++
 3 files changed, 266 insertions(+)
 create mode 100644 drivers/watchdog/stm32_iwdg.c

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 1fdb5a365413b..124e9b87ab6bd 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -752,6 +752,18 @@ config ZX2967_WATCHDOG
 	  To compile this driver as a module, choose M here: the
 	  module will be called zx2967_wdt.
 
+config STM32_WATCHDOG
+	tristate "STM32 Independent WatchDoG (IWDG) support"
+	depends on ARCH_STM32
+	select WATCHDOG_CORE
+	default y
+	help
+	  Say Y here to include support for the watchdog timer
+	  in stm32 SoCs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called stm32_iwdg.
+
 # AVR32 Architecture
 
 config AT32AP700X_WDT
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index e23fb4b51878b..3aafd997917a4 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_RENESAS_WDT) += renesas_wdt.o
 obj-$(CONFIG_RENESAS_RZAWDT) += rza_wdt.o
 obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
 obj-$(CONFIG_ZX2967_WATCHDOG) += zx2967_wdt.o
+obj-$(CONFIG_STM32_WATCHDOG) += stm32_iwdg.o
 
 # AVR32 Architecture
 obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c
new file mode 100644
index 0000000000000..6c501b7dba295
--- /dev/null
+++ b/drivers/watchdog/stm32_iwdg.c
@@ -0,0 +1,253 @@
+/*
+ * Driver for STM32 Independent Watchdog
+ *
+ * Copyright (C) Yannick Fertre 2017
+ * Author: Yannick Fertre <yannick.fertre@st.com>
+ *
+ * This driver is based on tegra_wdt.c
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+/* IWDG registers */
+#define IWDG_KR		0x00 /* Key register */
+#define IWDG_PR		0x04 /* Prescaler Register */
+#define IWDG_RLR	0x08 /* ReLoad Register */
+#define IWDG_SR		0x0C /* Status Register */
+#define IWDG_WINR	0x10 /* Windows Register */
+
+/* IWDG_KR register bit mask */
+#define KR_KEY_RELOAD	0xAAAA /* reload counter enable */
+#define KR_KEY_ENABLE	0xCCCC /* peripheral enable */
+#define KR_KEY_EWA	0x5555 /* write access enable */
+#define KR_KEY_DWA	0x0000 /* write access disable */
+
+/* IWDG_PR register bit values */
+#define PR_4		0x00 /* prescaler set to 4 */
+#define PR_8		0x01 /* prescaler set to 8 */
+#define PR_16		0x02 /* prescaler set to 16 */
+#define PR_32		0x03 /* prescaler set to 32 */
+#define PR_64		0x04 /* prescaler set to 64 */
+#define PR_128		0x05 /* prescaler set to 128 */
+#define PR_256		0x06 /* prescaler set to 256 */
+
+/* IWDG_RLR register values */
+#define RLR_MIN		0x07C /* min value supported by reload register */
+#define RLR_MAX		0xFFF /* max value supported by reload register */
+
+/* IWDG_SR register bit mask */
+#define FLAG_PVU	BIT(0) /* Watchdog prescaler value update */
+#define FLAG_RVU	BIT(1) /* Watchdog counter reload value update */
+
+/* set timeout to 100000 us */
+#define TIMEOUT_US	100000
+#define SLEEP_US	1000
+
+struct stm32_iwdg {
+	struct watchdog_device	wdd;
+	void __iomem		*regs;
+	struct clk		*clk;
+	unsigned int		rate;
+};
+
+static inline u32 reg_read(void __iomem *base, u32 reg)
+{
+	return readl_relaxed(base + reg);
+}
+
+static inline void reg_write(void __iomem *base, u32 reg, u32 val)
+{
+	writel_relaxed(val, base + reg);
+}
+
+static int stm32_iwdg_start(struct watchdog_device *wdd)
+{
+	struct stm32_iwdg *wdt = watchdog_get_drvdata(wdd);
+	u32 val = FLAG_PVU | FLAG_RVU;
+	u32 reload;
+	int ret;
+
+	dev_dbg(wdd->parent, "%s\n", __func__);
+
+	/* prescaler fixed to 256 */
+	reload = clamp_t(unsigned int, ((wdd->timeout * wdt->rate) / 256) - 1,
+			 RLR_MIN, RLR_MAX);
+
+	/* enable write access */
+	reg_write(wdt->regs, IWDG_KR, KR_KEY_EWA);
+
+	/* set prescaler & reload registers */
+	reg_write(wdt->regs, IWDG_PR, PR_256); /* prescaler fix to 256 */
+	reg_write(wdt->regs, IWDG_RLR, reload);
+	reg_write(wdt->regs, IWDG_KR, KR_KEY_ENABLE);
+
+	/* wait for the registers to be updated (max 100ms) */
+	ret = readl_relaxed_poll_timeout(wdt->regs + IWDG_SR, val,
+					 !(val & (FLAG_PVU | FLAG_RVU)),
+					 SLEEP_US, TIMEOUT_US);
+	if (ret) {
+		dev_err(wdd->parent,
+			"Fail to set prescaler or reload registers\n");
+		return ret;
+	}
+
+	/* reload watchdog */
+	reg_write(wdt->regs, IWDG_KR, KR_KEY_RELOAD);
+
+	return 0;
+}
+
+static int stm32_iwdg_ping(struct watchdog_device *wdd)
+{
+	struct stm32_iwdg *wdt = watchdog_get_drvdata(wdd);
+
+	dev_dbg(wdd->parent, "%s\n", __func__);
+
+	/* reload watchdog */
+	reg_write(wdt->regs, IWDG_KR, KR_KEY_RELOAD);
+
+	return 0;
+}
+
+static int stm32_iwdg_set_timeout(struct watchdog_device *wdd,
+				  unsigned int timeout)
+{
+	dev_dbg(wdd->parent, "%s timeout: %d sec\n", __func__, timeout);
+
+	wdd->timeout = timeout;
+
+	if (watchdog_active(wdd))
+		return stm32_iwdg_start(wdd);
+
+	return 0;
+}
+
+static const struct watchdog_info stm32_iwdg_info = {
+	.options	= WDIOF_SETTIMEOUT |
+			  WDIOF_MAGICCLOSE |
+			  WDIOF_KEEPALIVEPING,
+	.identity	= "STM32 Independent Watchdog",
+};
+
+static struct watchdog_ops stm32_iwdg_ops = {
+	.owner		= THIS_MODULE,
+	.start		= stm32_iwdg_start,
+	.ping		= stm32_iwdg_ping,
+	.set_timeout	= stm32_iwdg_set_timeout,
+};
+
+static int stm32_iwdg_probe(struct platform_device *pdev)
+{
+	struct watchdog_device *wdd;
+	struct stm32_iwdg *wdt;
+	struct resource *res;
+	void __iomem *regs;
+	struct clk *clk;
+	int ret;
+
+	/* This is the timer base. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(regs)) {
+		dev_err(&pdev->dev, "Could not get resource\n");
+		return PTR_ERR(regs);
+	}
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "Unable to get clock\n");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to prepare clock %p\n", clk);
+		return ret;
+	}
+
+	/*
+	 * Allocate our watchdog driver data, which has the
+	 * struct watchdog_device nested within it.
+	 */
+	wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+	if (!wdt) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Initialize struct stm32_iwdg. */
+	wdt->regs = regs;
+	wdt->clk = clk;
+	wdt->rate = clk_get_rate(clk);
+
+	/* Initialize struct watchdog_device. */
+	wdd = &wdt->wdd;
+	wdd->info = &stm32_iwdg_info;
+	wdd->ops = &stm32_iwdg_ops;
+	wdd->min_timeout = ((RLR_MIN + 1) * 256) / wdt->rate;
+	wdd->max_hw_heartbeat_ms = ((RLR_MAX + 1) * 256 * 1000) / wdt->rate;
+	wdd->parent = &pdev->dev;
+
+	watchdog_set_drvdata(wdd, wdt);
+	watchdog_set_nowayout(wdd, WATCHDOG_NOWAYOUT);
+
+	ret = watchdog_init_timeout(wdd, 0, &pdev->dev);
+	if (ret)
+		dev_warn(&pdev->dev,
+			 "unable to set timeout value, using default\n");
+
+	ret = watchdog_register_device(wdd);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register watchdog device\n");
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, wdt);
+
+	return 0;
+err:
+	clk_disable_unprepare(clk);
+
+	return ret;
+}
+
+static int stm32_iwdg_remove(struct platform_device *pdev)
+{
+	struct stm32_iwdg *wdt = platform_get_drvdata(pdev);
+
+	watchdog_unregister_device(&wdt->wdd);
+	clk_disable_unprepare(wdt->clk);
+
+	return 0;
+}
+
+static const struct of_device_id stm32_iwdg_of_match[] = {
+	{ .compatible = "st,stm32-iwdg" },
+	{ /* end node */ }
+};
+MODULE_DEVICE_TABLE(of, stm32_iwdg_of_match);
+
+static struct platform_driver stm32_iwdg_driver = {
+	.probe		= stm32_iwdg_probe,
+	.remove		= stm32_iwdg_remove,
+	.driver = {
+		.name	= "iwdg",
+		.of_match_table = stm32_iwdg_of_match,
+	},
+};
+module_platform_driver(stm32_iwdg_driver);
+
+MODULE_AUTHOR("Yannick Fertre <yannick.fertre@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 Independent Watchdog Driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 166fbcf88fdafa02f784ec25ac64745c716b2de0 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Mon, 17 Apr 2017 22:37:05 +0200
Subject: [PATCH 0190/1958] watchdog: f71808e_wdt: Add F71868 support

This adds support for watchdog part of Fintek F71868 Super I/O chip to
f71808e_wdt driver.

The F71868 chip is, in general, very similar to a F71869, however it has
slightly different set of available reset pulse widths.

Tested on MSI A55M-P33 motherboard.

Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig       |  7 ++++---
 drivers/watchdog/f71808e_wdt.c | 27 ++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 124e9b87ab6bd..2696493c808b6 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -849,11 +849,12 @@ config EBC_C384_WDT
 	  the timeout module parameter.
 
 config F71808E_WDT
-	tristate "Fintek F71808E, F71862FG, F71869, F71882FG and F71889FG Watchdog"
+	tristate "Fintek F718xx, F818xx Super I/O Watchdog"
 	depends on X86
 	help
-	  This is the driver for the hardware watchdog on the Fintek
-	  F71808E, F71862FG, F71869, F71882FG and F71889FG Super I/O controllers.
+	  This is the driver for the hardware watchdog on the Fintek F71808E,
+	  F71862FG, F71868, F71869, F71882FG, F71889FG, F81865 and F81866
+	  Super I/O controllers.
 
 	  You can compile this driver directly into the kernel, or use
 	  it as a module.  The module will be called f71808e_wdt.
diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index 1b7e9169072f2..8658dba217681 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c
@@ -57,6 +57,7 @@
 #define SIO_F71808_ID		0x0901	/* Chipset ID */
 #define SIO_F71858_ID		0x0507	/* Chipset ID */
 #define SIO_F71862_ID		0x0601	/* Chipset ID */
+#define SIO_F71868_ID		0x1106	/* Chipset ID */
 #define SIO_F71869_ID		0x0814	/* Chipset ID */
 #define SIO_F71869A_ID		0x1007	/* Chipset ID */
 #define SIO_F71882_ID		0x0541	/* Chipset ID */
@@ -101,7 +102,7 @@ MODULE_PARM_DESC(timeout,
 static unsigned int pulse_width = WATCHDOG_PULSE_WIDTH;
 module_param(pulse_width, uint, 0);
 MODULE_PARM_DESC(pulse_width,
-	"Watchdog signal pulse width. 0(=level), 1 ms, 25 ms, 125 ms or 5000 ms"
+	"Watchdog signal pulse width. 0(=level), 1, 25, 30, 125, 150, 5000 or 6000 ms"
 			" (default=" __MODULE_STRING(WATCHDOG_PULSE_WIDTH) ")");
 
 static unsigned int f71862fg_pin = WATCHDOG_F71862FG_PIN;
@@ -119,13 +120,14 @@ module_param(start_withtimeout, uint, 0);
 MODULE_PARM_DESC(start_withtimeout, "Start watchdog timer on module load with"
 	" given initial timeout. Zero (default) disables this feature.");
 
-enum chips { f71808fg, f71858fg, f71862fg, f71869, f71882fg, f71889fg, f81865,
-	     f81866};
+enum chips { f71808fg, f71858fg, f71862fg, f71868, f71869, f71882fg, f71889fg,
+	     f81865, f81866};
 
 static const char *f71808e_names[] = {
 	"f71808fg",
 	"f71858fg",
 	"f71862fg",
+	"f71868",
 	"f71869",
 	"f71882fg",
 	"f71889fg",
@@ -252,16 +254,23 @@ static int watchdog_set_timeout(int timeout)
 static int watchdog_set_pulse_width(unsigned int pw)
 {
 	int err = 0;
+	unsigned int t1 = 25, t2 = 125, t3 = 5000;
+
+	if (watchdog.type == f71868) {
+		t1 = 30;
+		t2 = 150;
+		t3 = 6000;
+	}
 
 	mutex_lock(&watchdog.lock);
 
-	if        (pw <=    1) {
+	if        (pw <=  1) {
 		watchdog.pulse_val = 0;
-	} else if (pw <=   25) {
+	} else if (pw <= t1) {
 		watchdog.pulse_val = 1;
-	} else if (pw <=  125) {
+	} else if (pw <= t2) {
 		watchdog.pulse_val = 2;
-	} else if (pw <= 5000) {
+	} else if (pw <= t3) {
 		watchdog.pulse_val = 3;
 	} else {
 		pr_err("pulse width out of range\n");
@@ -354,6 +363,7 @@ static int watchdog_start(void)
 			goto exit_superio;
 		break;
 
+	case f71868:
 	case f71869:
 		/* GPIO14 --> WDTRST# */
 		superio_clear_bit(watchdog.sioaddr, SIO_REG_MFUNCT1, 4);
@@ -792,6 +802,9 @@ static int __init f71808e_find(int sioaddr)
 		watchdog.type = f71862fg;
 		err = f71862fg_pin_configure(0); /* validate module parameter */
 		break;
+	case SIO_F71868_ID:
+		watchdog.type = f71868;
+		break;
 	case SIO_F71869_ID:
 	case SIO_F71869A_ID:
 		watchdog.type = f71869;
-- 
GitLab


From 2501b015313fe2fa40ed11fa4dd1748e09b7c773 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Date: Fri, 12 May 2017 14:05:32 +0200
Subject: [PATCH 0191/1958] watchdog: core: add option to avoid early handling
 of watchdog

On some systems its desirable to have watchdog reboot the system
when it does not come up fast enough. This adds a kernel parameter
to disable the auto-update of watchdog before userspace takes over
and a kernel option to set the default. The info messages were
added to shorten error searching on misconfigured systems.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig        | 11 +++++++++++
 drivers/watchdog/watchdog_dev.c | 19 ++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 2696493c808b6..e9da196279149 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -46,6 +46,17 @@ config WATCHDOG_NOWAYOUT
 	  get killed. If you say Y here, the watchdog cannot be stopped once
 	  it has been started.
 
+config WATCHDOG_HANDLE_BOOT_ENABLED
+	bool "Update boot-enabled watchdog until userspace takes over"
+	default y
+	help
+	  The default watchdog behaviour (which you get if you say Y here) is
+	  to ping watchdog devices that were enabled before the driver has
+	  been loaded until control is taken over from userspace using the
+	  /dev/watchdog file. If you say N here, the kernel will not update
+	  the watchdog on its own. Thus if your userspace does not start fast
+	  enough your device will reboot.
+
 config WATCHDOG_SYSFS
 	bool "Read different watchdog information through sysfs"
 	help
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index d5d2bbd8f4285..4bc7ab60b12c4 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -80,6 +80,9 @@ static struct watchdog_core_data *old_wd_data;
 
 static struct workqueue_struct *watchdog_wq;
 
+static bool handle_boot_enabled =
+	IS_ENABLED(CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED);
+
 static inline bool watchdog_need_worker(struct watchdog_device *wdd)
 {
 	/* All variables in milli-seconds */
@@ -956,9 +959,14 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
 	 * and schedule an immediate ping.
 	 */
 	if (watchdog_hw_running(wdd)) {
-		__module_get(wdd->ops->owner);
-		kref_get(&wd_data->kref);
-		queue_delayed_work(watchdog_wq, &wd_data->work, 0);
+		if (handle_boot_enabled) {
+			__module_get(wdd->ops->owner);
+			kref_get(&wd_data->kref);
+			queue_delayed_work(watchdog_wq, &wd_data->work, 0);
+		} else {
+			pr_info("watchdog%d running and kernel based pre-userspace handler disabled\n",
+					wdd->id);
+		}
 	}
 
 	return 0;
@@ -1106,3 +1114,8 @@ void __exit watchdog_dev_exit(void)
 	class_unregister(&watchdog_class);
 	destroy_workqueue(watchdog_wq);
 }
+
+module_param(handle_boot_enabled, bool, 0444);
+MODULE_PARM_DESC(handle_boot_enabled,
+	"Watchdog core auto-updates boot enabled watchdogs before userspace takes over (default="
+	__MODULE_STRING(IS_ENABLED(CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED)) ")");
-- 
GitLab


From 3a9aedb282acf1499a31834c457335e4535e4a1d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 29 May 2017 16:21:31 -0700
Subject: [PATCH 0192/1958] watchdog: w83627hf: Add support for NCT6793D and
 NCT6795D

Both NCT6793D and NCT6795D are compatible to NCT6792D.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/w83627hf_wdt.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
index 98fd186c68788..d9ba0496713c4 100644
--- a/drivers/watchdog/w83627hf_wdt.c
+++ b/drivers/watchdog/w83627hf_wdt.c
@@ -49,7 +49,8 @@ static int cr_wdt_csr;		/* WDT control & status register */
 
 enum chips { w83627hf, w83627s, w83697hf, w83697ug, w83637hf, w83627thf,
 	     w83687thf, w83627ehf, w83627dhg, w83627uhg, w83667hg, w83627dhg_p,
-	     w83667hg_b, nct6775, nct6776, nct6779, nct6791, nct6792, nct6102 };
+	     w83667hg_b, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793,
+	     nct6795, nct6102 };
 
 static int timeout;			/* in seconds */
 module_param(timeout, int, 0);
@@ -97,6 +98,8 @@ MODULE_PARM_DESC(early_disable, "Disable watchdog at boot time (default=0)");
 #define NCT6779_ID		0xc5
 #define NCT6791_ID		0xc8
 #define NCT6792_ID		0xc9
+#define NCT6793_ID		0xd1
+#define NCT6795_ID		0xd3
 
 #define W83627HF_WDT_TIMEOUT	0xf6
 #define W83697HF_WDT_TIMEOUT	0xf4
@@ -204,6 +207,8 @@ static int w83627hf_init(struct watchdog_device *wdog, enum chips chip)
 	case nct6779:
 	case nct6791:
 	case nct6792:
+	case nct6793:
+	case nct6795:
 	case nct6102:
 		/*
 		 * These chips have a fixed WDTO# output pin (W83627UHG),
@@ -396,6 +401,12 @@ static int wdt_find(int addr)
 	case NCT6792_ID:
 		ret = nct6792;
 		break;
+	case NCT6793_ID:
+		ret = nct6793;
+		break;
+	case NCT6795_ID:
+		ret = nct6795;
+		break;
 	case NCT6102_ID:
 		ret = nct6102;
 		cr_wdt_timeout = NCT6102D_WDT_TIMEOUT;
@@ -437,6 +448,8 @@ static int __init wdt_init(void)
 		"NCT6779",
 		"NCT6791",
 		"NCT6792",
+		"NCT6793",
+		"NCT6795",
 		"NCT6102",
 	};
 
-- 
GitLab


From d334a5637dfb53f7d07017afc1e491903b482ef8 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 5 Jun 2017 14:52:12 -0500
Subject: [PATCH 0193/1958] iommu/amd: Reduce amount of MMIO when submitting
 commands

As newer, higher speed devices are developed, perf data shows that the
amount of MMIO that is performed when submitting commands to the IOMMU
causes performance issues. Currently, the command submission path reads
the command buffer head and tail pointers and then writes the tail
pointer once the command is ready.

The tail pointer is only ever updated by the driver so it can be tracked
by the driver without having to read it from the hardware.

The head pointer is updated by the hardware, but can be read
opportunistically. Reading the head pointer only when it appears that
there might not be room in the command buffer and then re-checking the
available space reduces the number of times the head pointer has to be
read.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c       | 35 +++++++++++++++++++++------------
 drivers/iommu/amd_iommu_init.c  |  2 ++
 drivers/iommu/amd_iommu_types.h |  2 ++
 3 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d7748955184b5..d81c895ff4f43 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -874,19 +874,20 @@ static int wait_on_sem(volatile u64 *sem)
 }
 
 static void copy_cmd_to_buffer(struct amd_iommu *iommu,
-			       struct iommu_cmd *cmd,
-			       u32 tail)
+			       struct iommu_cmd *cmd)
 {
 	u8 *target;
 
-	target = iommu->cmd_buf + tail;
-	tail   = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+	target = iommu->cmd_buf + iommu->cmd_buf_tail;
+
+	iommu->cmd_buf_tail += sizeof(*cmd);
+	iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
 
 	/* Copy command to buffer */
 	memcpy(target, cmd, sizeof(*cmd));
 
 	/* Tell the IOMMU about it */
-	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 }
 
 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
@@ -1044,23 +1045,31 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
 				      struct iommu_cmd *cmd,
 				      bool sync)
 {
-	u32 left, tail, head, next_tail;
+	bool read_head = true;
+	u32 left, next_tail;
 
+	next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
 again:
-
-	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-	next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
-	left      = (head - next_tail) % CMD_BUFFER_SIZE;
+	left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
 
 	if (left <= 0x20) {
 		struct iommu_cmd sync_cmd;
 		int ret;
 
+		if (read_head) {
+			/* Update head and recheck remaining space */
+			iommu->cmd_buf_head = readl(iommu->mmio_base +
+						    MMIO_CMD_HEAD_OFFSET);
+			read_head = false;
+			goto again;
+		}
+
+		read_head = true;
+
 		iommu->cmd_sem = 0;
 
 		build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
-		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
+		copy_cmd_to_buffer(iommu, &sync_cmd);
 
 		if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
 			return ret;
@@ -1068,7 +1077,7 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
 		goto again;
 	}
 
-	copy_cmd_to_buffer(iommu, cmd, tail);
+	copy_cmd_to_buffer(iommu, cmd);
 
 	/* We need to sync now to make sure all commands are processed */
 	iommu->need_sync = sync;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5a11328f4d985..3fa7e3b35507d 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -588,6 +588,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 
 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	iommu->cmd_buf_head = 0;
+	iommu->cmd_buf_tail = 0;
 
 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 }
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 4de8f4160bb81..6960d7db2fabe 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -516,6 +516,8 @@ struct amd_iommu {
 
 	/* command buffer virtual address */
 	u8 *cmd_buf;
+	u32 cmd_buf_head;
+	u32 cmd_buf_tail;
 
 	/* event buffer virtual address */
 	u8 *evt_buf;
-- 
GitLab


From 23e967e17c58779b38f69f8d41d727f59440d36a Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 5 Jun 2017 14:52:26 -0500
Subject: [PATCH 0194/1958] iommu/amd: Reduce delay waiting for command buffer
 space

Currently if there is no room to add a command to the command buffer, the
driver performs a "completion wait" which only returns when all commands
on the queue have been processed. There is no need to wait for the entire
command queue to be executed before adding the next command.

Update the driver to perform the same udelay() loop that the "completion
wait" performs, but instead re-read the head pointer to determine if
sufficient space is available.  The very first time it is found that there
is no space available, the udelay() will be skipped to immediately perform
the opportunistic read of the head pointer. If it is still found that
there is not sufficient space, then the udelay() will be performed.

Signed-off-by: Leo Duran <leo.duran@amd.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d81c895ff4f43..1efbef7f3b61a 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1045,7 +1045,7 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
 				      struct iommu_cmd *cmd,
 				      bool sync)
 {
-	bool read_head = true;
+	unsigned int count = 0;
 	u32 left, next_tail;
 
 	next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
@@ -1053,33 +1053,26 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
 	left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
 
 	if (left <= 0x20) {
-		struct iommu_cmd sync_cmd;
-		int ret;
-
-		if (read_head) {
-			/* Update head and recheck remaining space */
-			iommu->cmd_buf_head = readl(iommu->mmio_base +
-						    MMIO_CMD_HEAD_OFFSET);
-			read_head = false;
-			goto again;
-		}
-
-		read_head = true;
-
-		iommu->cmd_sem = 0;
+		/* Skip udelay() the first time around */
+		if (count++) {
+			if (count == LOOP_TIMEOUT) {
+				pr_err("AMD-Vi: Command buffer timeout\n");
+				return -EIO;
+			}
 
-		build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
-		copy_cmd_to_buffer(iommu, &sync_cmd);
+			udelay(1);
+		}
 
-		if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
-			return ret;
+		/* Update head and recheck remaining space */
+		iommu->cmd_buf_head = readl(iommu->mmio_base +
+					    MMIO_CMD_HEAD_OFFSET);
 
 		goto again;
 	}
 
 	copy_cmd_to_buffer(iommu, cmd);
 
-	/* We need to sync now to make sure all commands are processed */
+	/* Do we need to make sure all commands are processed? */
 	iommu->need_sync = sync;
 
 	return 0;
-- 
GitLab


From 460c26d05bf7357a4d5b41b3d7a97727f5bdffe1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 2 Jun 2017 14:28:01 +0200
Subject: [PATCH 0195/1958] iommu/amd: Rip out old queue flushing code

The queue flushing is pretty inefficient when it flushes the
queues for all cpus at once. Further it flushes all domains
from all IOMMUs for all CPUs, which is overkill as well.

Rip it out to make room for something more efficient.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 143 ++------------------------------------
 1 file changed, 6 insertions(+), 137 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 1efbef7f3b61a..ec9da25c06a14 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -89,25 +89,6 @@ LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
 LIST_HEAD(acpihid_map);
 
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
-	unsigned long iova_pfn;
-	unsigned long pages;
-	struct dma_ops_domain *dma_dom;
-};
-
-struct flush_queue {
-	spinlock_t lock;
-	unsigned next;
-	struct flush_queue_entry *entries;
-};
-
-static DEFINE_PER_CPU(struct flush_queue, flush_queue);
-
-static atomic_t queue_timer_on;
-static struct timer_list queue_timer;
-
 /*
  * Domain for untranslated devices - only allocated
  * if iommu=pt passed on kernel cmd line.
@@ -2253,92 +2234,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
  *
  *****************************************************************************/
 
-static void __queue_flush(struct flush_queue *queue)
-{
-	struct protection_domain *domain;
-	unsigned long flags;
-	int idx;
-
-	/* First flush TLB of all known domains */
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-	list_for_each_entry(domain, &amd_iommu_pd_list, list)
-		domain_flush_tlb(domain);
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-
-	/* Wait until flushes have completed */
-	domain_flush_complete(NULL);
-
-	for (idx = 0; idx < queue->next; ++idx) {
-		struct flush_queue_entry *entry;
-
-		entry = queue->entries + idx;
-
-		free_iova_fast(&entry->dma_dom->iovad,
-				entry->iova_pfn,
-				entry->pages);
-
-		/* Not really necessary, just to make sure we catch any bugs */
-		entry->dma_dom = NULL;
-	}
-
-	queue->next = 0;
-}
-
-static void queue_flush_all(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-		unsigned long flags;
-
-		queue = per_cpu_ptr(&flush_queue, cpu);
-		spin_lock_irqsave(&queue->lock, flags);
-		if (queue->next > 0)
-			__queue_flush(queue);
-		spin_unlock_irqrestore(&queue->lock, flags);
-	}
-}
-
-static void queue_flush_timeout(unsigned long unsused)
-{
-	atomic_set(&queue_timer_on, 0);
-	queue_flush_all();
-}
-
-static void queue_add(struct dma_ops_domain *dma_dom,
-		      unsigned long address, unsigned long pages)
-{
-	struct flush_queue_entry *entry;
-	struct flush_queue *queue;
-	unsigned long flags;
-	int idx;
-
-	pages     = __roundup_pow_of_two(pages);
-	address >>= PAGE_SHIFT;
-
-	queue = get_cpu_ptr(&flush_queue);
-	spin_lock_irqsave(&queue->lock, flags);
-
-	if (queue->next == FLUSH_QUEUE_SIZE)
-		__queue_flush(queue);
-
-	idx   = queue->next++;
-	entry = queue->entries + idx;
-
-	entry->iova_pfn = address;
-	entry->pages    = pages;
-	entry->dma_dom  = dma_dom;
-
-	spin_unlock_irqrestore(&queue->lock, flags);
-
-	if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
-		mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
-
-	put_cpu_ptr(&flush_queue);
-}
-
-
 /*
  * In the dma_ops path we only have the struct device. This function
  * finds the corresponding IOMMU, the protection domain and the
@@ -2490,7 +2385,10 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 		domain_flush_tlb(&dma_dom->domain);
 		domain_flush_complete(&dma_dom->domain);
 	} else {
-		queue_add(dma_dom, dma_addr, pages);
+		/* Keep the if() around, we need it later again */
+		dma_ops_free_iova(dma_dom, dma_addr, pages);
+		domain_flush_tlb(&dma_dom->domain);
+		domain_flush_complete(&dma_dom->domain);
 	}
 }
 
@@ -2825,7 +2723,7 @@ static int init_reserved_iova_ranges(void)
 
 int __init amd_iommu_init_api(void)
 {
-	int ret, cpu, err = 0;
+	int ret, err = 0;
 
 	ret = iova_cache_get();
 	if (ret)
@@ -2835,18 +2733,6 @@ int __init amd_iommu_init_api(void)
 	if (ret)
 		return ret;
 
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
-		queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
-					 sizeof(*queue->entries),
-					 GFP_KERNEL);
-		if (!queue->entries)
-			goto out_put_iova;
-
-		spin_lock_init(&queue->lock);
-	}
-
 	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
 	if (err)
 		return err;
@@ -2858,23 +2744,12 @@ int __init amd_iommu_init_api(void)
 	err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
 	if (err)
 		return err;
-	return 0;
-
-out_put_iova:
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
 
-		kfree(queue->entries);
-	}
-
-	return -ENOMEM;
+	return 0;
 }
 
 int __init amd_iommu_init_dma_ops(void)
 {
-	setup_timer(&queue_timer, queue_flush_timeout, 0);
-	atomic_set(&queue_timer_on, 0);
-
 	swiotlb        = iommu_pass_through ? 1 : 0;
 	iommu_detected = 1;
 
@@ -3030,12 +2905,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
 
 	switch (dom->type) {
 	case IOMMU_DOMAIN_DMA:
-		/*
-		 * First make sure the domain is no longer referenced from the
-		 * flush queue
-		 */
-		queue_flush_all();
-
 		/* Now release the domain */
 		dma_dom = to_dma_ops_domain(domain);
 		dma_ops_domain_free(dma_dom);
-- 
GitLab


From d4241a276119bf404e6c0e23f06f84b84c4ecfc0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 2 Jun 2017 14:55:56 +0200
Subject: [PATCH 0196/1958] iommu/amd: Add per-domain flush-queue data
 structures

Make the flush-queue per dma-ops domain and add code
allocate and free the flush-queues;

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 69 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index ec9da25c06a14..2418fcc28fbe6 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -136,6 +136,18 @@ static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+	unsigned long iova_pfn;
+	unsigned long pages;
+};
+
+struct flush_queue {
+	struct flush_queue_entry *entries;
+	unsigned head, tail;
+};
+
 /*
  * Data container for a dma_ops specific protection domain
  */
@@ -145,6 +157,8 @@ struct dma_ops_domain {
 
 	/* IOVA RB-Tree */
 	struct iova_domain iovad;
+
+	struct flush_queue __percpu *flush_queue;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -1742,6 +1756,56 @@ static void free_gcr3_table(struct protection_domain *domain)
 	free_page((unsigned long)domain->gcr3_tbl);
 }
 
+static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		kfree(queue->entries);
+	}
+
+	free_percpu(dom->flush_queue);
+
+	dom->flush_queue = NULL;
+}
+
+static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
+{
+	int cpu;
+
+	dom->flush_queue = alloc_percpu(struct flush_queue);
+	if (!dom->flush_queue)
+		return -ENOMEM;
+
+	/* First make sure everything is cleared */
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		queue->head    = 0;
+		queue->tail    = 0;
+		queue->entries = NULL;
+	}
+
+	/* Now start doing the allocation */
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
+					 GFP_KERNEL);
+		if (!queue->entries) {
+			dma_ops_domain_free_flush_queue(dom);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1753,6 +1817,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
 	del_domain_from_list(&dom->domain);
 
+	dma_ops_domain_free_flush_queue(dom);
+
 	put_iova_domain(&dom->iovad);
 
 	free_pagetable(&dom->domain);
@@ -1791,6 +1857,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	/* Initialize reserved ranges */
 	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
 
+	if (dma_ops_domain_alloc_flush_queue(dma_dom))
+		goto free_dma_dom;
+
 	add_domain_to_list(&dma_dom->domain);
 
 	return dma_dom;
-- 
GitLab


From fd62190a67d6bdf9b93dea056adfcd7fd29b0f92 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 2 Jun 2017 15:37:26 +0200
Subject: [PATCH 0197/1958] iommu/amd: Make use of the per-domain flush queue

Fill the flush-queue on unmap and only flush the IOMMU and
device TLBs when a per-cpu queue gets full.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 60 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2418fcc28fbe6..9fafc30268656 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1806,6 +1806,61 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
 	return 0;
 }
 
+static inline bool queue_ring_full(struct flush_queue *queue)
+{
+	return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
+}
+
+#define queue_ring_for_each(i, q) \
+	for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
+
+static void queue_release(struct dma_ops_domain *dom,
+			  struct flush_queue *queue)
+{
+	unsigned i;
+
+	queue_ring_for_each(i, queue)
+		free_iova_fast(&dom->iovad,
+			       queue->entries[i].iova_pfn,
+			       queue->entries[i].pages);
+
+	queue->head = queue->tail = 0;
+}
+
+static inline unsigned queue_ring_add(struct flush_queue *queue)
+{
+	unsigned idx = queue->tail;
+
+	queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
+
+	return idx;
+}
+
+static void queue_add(struct dma_ops_domain *dom,
+		      unsigned long address, unsigned long pages)
+{
+	struct flush_queue *queue;
+	int idx;
+
+	pages     = __roundup_pow_of_two(pages);
+	address >>= PAGE_SHIFT;
+
+	queue = get_cpu_ptr(dom->flush_queue);
+
+	if (queue_ring_full(queue)) {
+		domain_flush_tlb(&dom->domain);
+		domain_flush_complete(&dom->domain);
+		queue_release(dom, queue);
+	}
+
+	idx = queue_ring_add(queue);
+
+	queue->entries[idx].iova_pfn = address;
+	queue->entries[idx].pages    = pages;
+
+	put_cpu_ptr(dom->flush_queue);
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -2454,10 +2509,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 		domain_flush_tlb(&dma_dom->domain);
 		domain_flush_complete(&dma_dom->domain);
 	} else {
-		/* Keep the if() around, we need it later again */
-		dma_ops_free_iova(dma_dom, dma_addr, pages);
-		domain_flush_tlb(&dma_dom->domain);
-		domain_flush_complete(&dma_dom->domain);
+		queue_add(dma_dom, dma_addr, pages);
 	}
 }
 
-- 
GitLab


From e241f8e76c152e000d481fc8334d41d22c013fe8 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 2 Jun 2017 15:44:57 +0200
Subject: [PATCH 0198/1958] iommu/amd: Add locking to per-domain flush-queue

With locking we can safely access the flush-queues of other
cpus.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 9fafc30268656..9a06acc8cc9dd 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -146,6 +146,7 @@ struct flush_queue_entry {
 struct flush_queue {
 	struct flush_queue_entry *entries;
 	unsigned head, tail;
+	spinlock_t lock;
 };
 
 /*
@@ -1801,6 +1802,8 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
 			dma_ops_domain_free_flush_queue(dom);
 			return -ENOMEM;
 		}
+
+		spin_lock_init(&queue->lock);
 	}
 
 	return 0;
@@ -1808,6 +1811,8 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
 
 static inline bool queue_ring_full(struct flush_queue *queue)
 {
+	assert_spin_locked(&queue->lock);
+
 	return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
 }
 
@@ -1819,6 +1824,8 @@ static void queue_release(struct dma_ops_domain *dom,
 {
 	unsigned i;
 
+	assert_spin_locked(&queue->lock);
+
 	queue_ring_for_each(i, queue)
 		free_iova_fast(&dom->iovad,
 			       queue->entries[i].iova_pfn,
@@ -1831,6 +1838,7 @@ static inline unsigned queue_ring_add(struct flush_queue *queue)
 {
 	unsigned idx = queue->tail;
 
+	assert_spin_locked(&queue->lock);
 	queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
 
 	return idx;
@@ -1840,12 +1848,14 @@ static void queue_add(struct dma_ops_domain *dom,
 		      unsigned long address, unsigned long pages)
 {
 	struct flush_queue *queue;
+	unsigned long flags;
 	int idx;
 
 	pages     = __roundup_pow_of_two(pages);
 	address >>= PAGE_SHIFT;
 
 	queue = get_cpu_ptr(dom->flush_queue);
+	spin_lock_irqsave(&queue->lock, flags);
 
 	if (queue_ring_full(queue)) {
 		domain_flush_tlb(&dom->domain);
@@ -1858,6 +1868,7 @@ static void queue_add(struct dma_ops_domain *dom,
 	queue->entries[idx].iova_pfn = address;
 	queue->entries[idx].pages    = pages;
 
+	spin_unlock_irqrestore(&queue->lock, flags);
 	put_cpu_ptr(dom->flush_queue);
 }
 
-- 
GitLab


From a6e3f6f030396c0576c729fd8ca4bfb654d35bfe Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 2 Jun 2017 16:01:53 +0200
Subject: [PATCH 0199/1958] iommu/amd: Add flush counters to struct
 dma_ops_domain

The counters are increased every time the TLB for a given
domain is flushed. We also store the current value of that
counter into newly added entries of the flush-queue, so that
we can tell whether this entry is already flushed.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 52 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 9a06acc8cc9dd..795208bd39bd1 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -141,6 +141,7 @@ static void detach_device(struct device *dev);
 struct flush_queue_entry {
 	unsigned long iova_pfn;
 	unsigned long pages;
+	u64 counter; /* Flush counter when this entry was added to the queue */
 };
 
 struct flush_queue {
@@ -160,6 +161,27 @@ struct dma_ops_domain {
 	struct iova_domain iovad;
 
 	struct flush_queue __percpu *flush_queue;
+
+	/*
+	 * We need two counter here to be race-free wrt. IOTLB flushing and
+	 * adding entries to the flush queue.
+	 *
+	 * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
+	 * New entries added to the flush ring-buffer get their 'counter' value
+	 * from here. This way we can make sure that entries added to the queue
+	 * (or other per-cpu queues of the same domain) while the TLB is about
+	 * to be flushed are not considered to be flushed already.
+	 */
+	atomic64_t flush_start_cnt;
+
+	/*
+	 * The flush_finish_cnt is incremented when an IOTLB flush is complete.
+	 * This value is always smaller than flush_start_cnt. The queue_add
+	 * function frees all IOVAs that have a counter value smaller than
+	 * flush_finish_cnt. This makes sure that we only free IOVAs that are
+	 * flushed out of the IOTLB of the domain.
+	 */
+	atomic64_t flush_finish_cnt;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -1777,6 +1799,9 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
 {
 	int cpu;
 
+	atomic64_set(&dom->flush_start_cnt,  0);
+	atomic64_set(&dom->flush_finish_cnt, 0);
+
 	dom->flush_queue = alloc_percpu(struct flush_queue);
 	if (!dom->flush_queue)
 		return -ENOMEM;
@@ -1844,22 +1869,48 @@ static inline unsigned queue_ring_add(struct flush_queue *queue)
 	return idx;
 }
 
+static inline void queue_ring_remove_head(struct flush_queue *queue)
+{
+	assert_spin_locked(&queue->lock);
+	queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
+}
+
 static void queue_add(struct dma_ops_domain *dom,
 		      unsigned long address, unsigned long pages)
 {
 	struct flush_queue *queue;
 	unsigned long flags;
+	u64 counter;
 	int idx;
 
 	pages     = __roundup_pow_of_two(pages);
 	address >>= PAGE_SHIFT;
 
+	counter = atomic64_read(&dom->flush_finish_cnt);
+
 	queue = get_cpu_ptr(dom->flush_queue);
 	spin_lock_irqsave(&queue->lock, flags);
 
+	queue_ring_for_each(idx, queue) {
+		/*
+		 * This assumes that counter values in the ring-buffer are
+		 * monotonously rising.
+		 */
+		if (queue->entries[idx].counter >= counter)
+			break;
+
+		free_iova_fast(&dom->iovad,
+			       queue->entries[idx].iova_pfn,
+			       queue->entries[idx].pages);
+
+		queue_ring_remove_head(queue);
+	}
+
 	if (queue_ring_full(queue)) {
+		atomic64_inc(&dom->flush_start_cnt);
 		domain_flush_tlb(&dom->domain);
 		domain_flush_complete(&dom->domain);
+		atomic64_inc(&dom->flush_finish_cnt);
 		queue_release(dom, queue);
 	}
 
@@ -1867,6 +1918,7 @@ static void queue_add(struct dma_ops_domain *dom,
 
 	queue->entries[idx].iova_pfn = address;
 	queue->entries[idx].pages    = pages;
+	queue->entries[idx].counter  = atomic64_read(&dom->flush_start_cnt);
 
 	spin_unlock_irqrestore(&queue->lock, flags);
 	put_cpu_ptr(dom->flush_queue);
-- 
GitLab


From fca6af6a5976dfa00182232f666b4f789c98bd0c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 2 Jun 2017 18:13:37 +0200
Subject: [PATCH 0200/1958] iommu/amd: Add per-domain timer to flush per-cpu
 queues

Add a timer to each dma_ops domain so that we flush unused
IOTLB entries regularily, even if the queues don't get full
all the time.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 84 +++++++++++++++++++++++++++++++--------
 1 file changed, 67 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 795208bd39bd1..00c1796e07bf7 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -182,6 +182,13 @@ struct dma_ops_domain {
 	 * flushed out of the IOTLB of the domain.
 	 */
 	atomic64_t flush_finish_cnt;
+
+	/*
+	 * Timer to make sure we don't keep IOVAs around unflushed
+	 * for too long
+	 */
+	struct timer_list flush_timer;
+	atomic_t flush_timer_on;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -1834,6 +1841,14 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
 	return 0;
 }
 
+static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
+{
+	atomic64_inc(&dom->flush_start_cnt);
+	domain_flush_tlb(&dom->domain);
+	domain_flush_complete(&dom->domain);
+	atomic64_inc(&dom->flush_finish_cnt);
+}
+
 static inline bool queue_ring_full(struct flush_queue *queue)
 {
 	assert_spin_locked(&queue->lock);
@@ -1875,22 +1890,12 @@ static inline void queue_ring_remove_head(struct flush_queue *queue)
 	queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
 }
 
-static void queue_add(struct dma_ops_domain *dom,
-		      unsigned long address, unsigned long pages)
+static void queue_ring_free_flushed(struct dma_ops_domain *dom,
+				    struct flush_queue *queue)
 {
-	struct flush_queue *queue;
-	unsigned long flags;
-	u64 counter;
+	u64 counter = atomic64_read(&dom->flush_finish_cnt);
 	int idx;
 
-	pages     = __roundup_pow_of_two(pages);
-	address >>= PAGE_SHIFT;
-
-	counter = atomic64_read(&dom->flush_finish_cnt);
-
-	queue = get_cpu_ptr(dom->flush_queue);
-	spin_lock_irqsave(&queue->lock, flags);
-
 	queue_ring_for_each(idx, queue) {
 		/*
 		 * This assumes that counter values in the ring-buffer are
@@ -1905,12 +1910,25 @@ static void queue_add(struct dma_ops_domain *dom,
 
 		queue_ring_remove_head(queue);
 	}
+}
+
+static void queue_add(struct dma_ops_domain *dom,
+		      unsigned long address, unsigned long pages)
+{
+	struct flush_queue *queue;
+	unsigned long flags;
+	int idx;
+
+	pages     = __roundup_pow_of_two(pages);
+	address >>= PAGE_SHIFT;
+
+	queue = get_cpu_ptr(dom->flush_queue);
+	spin_lock_irqsave(&queue->lock, flags);
+
+	queue_ring_free_flushed(dom, queue);
 
 	if (queue_ring_full(queue)) {
-		atomic64_inc(&dom->flush_start_cnt);
-		domain_flush_tlb(&dom->domain);
-		domain_flush_complete(&dom->domain);
-		atomic64_inc(&dom->flush_finish_cnt);
+		dma_ops_domain_flush_tlb(dom);
 		queue_release(dom, queue);
 	}
 
@@ -1921,9 +1939,33 @@ static void queue_add(struct dma_ops_domain *dom,
 	queue->entries[idx].counter  = atomic64_read(&dom->flush_start_cnt);
 
 	spin_unlock_irqrestore(&queue->lock, flags);
+
+	if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
+		mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));
+
 	put_cpu_ptr(dom->flush_queue);
 }
 
+static void queue_flush_timeout(unsigned long data)
+{
+	struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
+	int cpu;
+
+	atomic_set(&dom->flush_timer_on, 0);
+
+	dma_ops_domain_flush_tlb(dom);
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+		unsigned long flags;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		spin_lock_irqsave(&queue->lock, flags);
+		queue_ring_free_flushed(dom, queue);
+		spin_unlock_irqrestore(&queue->lock, flags);
+	}
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1935,6 +1977,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
 	del_domain_from_list(&dom->domain);
 
+	if (timer_pending(&dom->flush_timer))
+		del_timer(&dom->flush_timer);
+
 	dma_ops_domain_free_flush_queue(dom);
 
 	put_iova_domain(&dom->iovad);
@@ -1978,6 +2023,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	if (dma_ops_domain_alloc_flush_queue(dma_dom))
 		goto free_dma_dom;
 
+	setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
+		    (unsigned long)dma_dom);
+
+	atomic_set(&dma_dom->flush_timer_on, 0);
+
 	add_domain_to_list(&dma_dom->domain);
 
 	return dma_dom;
-- 
GitLab


From ac3b708ad49f77b44faa104057783f161491e9e4 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 7 Jun 2017 14:38:15 +0200
Subject: [PATCH 0201/1958] iommu/amd: Remove queue_release() function

We can use queue_ring_free_flushed() instead, so remove this
redundancy.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 00c1796e07bf7..6498f5baa7ea5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1859,21 +1859,6 @@ static inline bool queue_ring_full(struct flush_queue *queue)
 #define queue_ring_for_each(i, q) \
 	for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
 
-static void queue_release(struct dma_ops_domain *dom,
-			  struct flush_queue *queue)
-{
-	unsigned i;
-
-	assert_spin_locked(&queue->lock);
-
-	queue_ring_for_each(i, queue)
-		free_iova_fast(&dom->iovad,
-			       queue->entries[i].iova_pfn,
-			       queue->entries[i].pages);
-
-	queue->head = queue->tail = 0;
-}
-
 static inline unsigned queue_ring_add(struct flush_queue *queue)
 {
 	unsigned idx = queue->tail;
@@ -1925,12 +1910,15 @@ static void queue_add(struct dma_ops_domain *dom,
 	queue = get_cpu_ptr(dom->flush_queue);
 	spin_lock_irqsave(&queue->lock, flags);
 
-	queue_ring_free_flushed(dom, queue);
-
-	if (queue_ring_full(queue)) {
+	/*
+	 * When ring-queue is full, flush the entries from the IOTLB so
+	 * that we can free all entries with queue_ring_free_flushed()
+	 * below.
+	 */
+	if (queue_ring_full(queue))
 		dma_ops_domain_flush_tlb(dom);
-		queue_release(dom, queue);
-	}
+
+	queue_ring_free_flushed(dom, queue);
 
 	idx = queue_ring_add(queue);
 
-- 
GitLab


From 2f071ff175ea221710d583222b55f915d90901f0 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Fri, 2 Jun 2017 15:21:18 +1200
Subject: [PATCH 0202/1958] mtd: mchp23k256: add partitioning support

Setting the of_node for the mtd device allows the generic mtd code to
setup the partitions.

[Editorial note (Brian): patch still pending on fixing up the "aligned
to eraseblock" partition sanity check, given that this SRAM has no
eraseblocks.]

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/mchp23k256.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
index 2542f5b8b63fc..3e5feb4546440 100644
--- a/drivers/mtd/devices/mchp23k256.c
+++ b/drivers/mtd/devices/mchp23k256.c
@@ -143,6 +143,7 @@ static int mchp23k256_probe(struct spi_device *spi)
 
 	data = dev_get_platdata(&spi->dev);
 
+	mtd_set_of_node(&flash->mtd, spi->dev.of_node);
 	flash->mtd.dev.parent	= &spi->dev;
 	flash->mtd.type		= MTD_RAM;
 	flash->mtd.flags	= MTD_CAP_RAM;
-- 
GitLab


From 4379075a870b8de43a9ecd5b46884953234fc669 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Fri, 2 Jun 2017 15:21:19 +1200
Subject: [PATCH 0203/1958] mtd: mchp23k256: Add support for mchp23lcv1024

The mchp23lcv1024 is similar to the mchp23k256, the differences (from a
software point of view) are the capacity of the chip and the size of the
addresses used.

There is no way to detect the specific chip so we must be told via a
Device Tree or default to mchp23k256 when device tree is not used.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 .../bindings/mtd/microchip,mchp23k256.txt     |  2 +-
 drivers/mtd/devices/mchp23k256.c              | 66 ++++++++++++++++---
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt b/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt
index 25e5ad38b0f0d..7328eb92a03c3 100644
--- a/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt
+++ b/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt
@@ -3,7 +3,7 @@
 Required properties:
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
   representing partitions.
-- compatible : Must be "microchip,mchp23k256"
+- compatible : Must be one of "microchip,mchp23k256" or "microchip,mchp23lcv1024"
 - reg : Chip-Select number
 - spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
 
diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
index 3e5feb4546440..8956b7dcc984f 100644
--- a/drivers/mtd/devices/mchp23k256.c
+++ b/drivers/mtd/devices/mchp23k256.c
@@ -21,10 +21,18 @@
 #include <linux/spi/spi.h>
 #include <linux/of_device.h>
 
+#define MAX_CMD_SIZE		4
+
+struct mchp23_caps {
+	u8 addr_width;
+	unsigned int size;
+};
+
 struct mchp23k256_flash {
 	struct spi_device	*spi;
 	struct mutex		lock;
 	struct mtd_info		mtd;
+	const struct mchp23_caps	*caps;
 };
 
 #define MCHP23K256_CMD_WRITE_STATUS	0x01
@@ -34,22 +42,40 @@ struct mchp23k256_flash {
 
 #define to_mchp23k256_flash(x) container_of(x, struct mchp23k256_flash, mtd)
 
+static void mchp23k256_addr2cmd(struct mchp23k256_flash *flash,
+				unsigned int addr, u8 *cmd)
+{
+	int i;
+
+	/*
+	 * Address is sent in big endian (MSB first) and we skip
+	 * the first entry of the cmd array which contains the cmd
+	 * opcode.
+	 */
+	for (i = flash->caps->addr_width; i > 0; i--, addr >>= 8)
+		cmd[i] = addr;
+}
+
+static int mchp23k256_cmdsz(struct mchp23k256_flash *flash)
+{
+	return 1 + flash->caps->addr_width;
+}
+
 static int mchp23k256_write(struct mtd_info *mtd, loff_t to, size_t len,
 			    size_t *retlen, const unsigned char *buf)
 {
 	struct mchp23k256_flash *flash = to_mchp23k256_flash(mtd);
 	struct spi_transfer transfer[2] = {};
 	struct spi_message message;
-	unsigned char command[3];
+	unsigned char command[MAX_CMD_SIZE];
 
 	spi_message_init(&message);
 
 	command[0] = MCHP23K256_CMD_WRITE;
-	command[1] = to >> 8;
-	command[2] = to;
+	mchp23k256_addr2cmd(flash, to, command);
 
 	transfer[0].tx_buf = command;
-	transfer[0].len = sizeof(command);
+	transfer[0].len = mchp23k256_cmdsz(flash);
 	spi_message_add_tail(&transfer[0], &message);
 
 	transfer[1].tx_buf = buf;
@@ -73,17 +99,16 @@ static int mchp23k256_read(struct mtd_info *mtd, loff_t from, size_t len,
 	struct mchp23k256_flash *flash = to_mchp23k256_flash(mtd);
 	struct spi_transfer transfer[2] = {};
 	struct spi_message message;
-	unsigned char command[3];
+	unsigned char command[MAX_CMD_SIZE];
 
 	spi_message_init(&message);
 
 	memset(&transfer, 0, sizeof(transfer));
 	command[0] = MCHP23K256_CMD_READ;
-	command[1] = from >> 8;
-	command[2] = from;
+	mchp23k256_addr2cmd(flash, from, command);
 
 	transfer[0].tx_buf = command;
-	transfer[0].len = sizeof(command);
+	transfer[0].len = mchp23k256_cmdsz(flash);
 	spi_message_add_tail(&transfer[0], &message);
 
 	transfer[1].rx_buf = buf;
@@ -123,6 +148,16 @@ static int mchp23k256_set_mode(struct spi_device *spi)
 	return spi_sync(spi, &message);
 }
 
+static const struct mchp23_caps mchp23k256_caps = {
+	.size = SZ_32K,
+	.addr_width = 2,
+};
+
+static const struct mchp23_caps mchp23lcv1024_caps = {
+	.size = SZ_128K,
+	.addr_width = 3,
+};
+
 static int mchp23k256_probe(struct spi_device *spi)
 {
 	struct mchp23k256_flash *flash;
@@ -143,12 +178,16 @@ static int mchp23k256_probe(struct spi_device *spi)
 
 	data = dev_get_platdata(&spi->dev);
 
+	flash->caps = of_device_get_match_data(&spi->dev);
+	if (!flash->caps)
+		flash->caps = &mchp23k256_caps;
+
 	mtd_set_of_node(&flash->mtd, spi->dev.of_node);
 	flash->mtd.dev.parent	= &spi->dev;
 	flash->mtd.type		= MTD_RAM;
 	flash->mtd.flags	= MTD_CAP_RAM;
 	flash->mtd.writesize	= 1;
-	flash->mtd.size		= SZ_32K;
+	flash->mtd.size		= flash->caps->size;
 	flash->mtd._read	= mchp23k256_read;
 	flash->mtd._write	= mchp23k256_write;
 
@@ -168,7 +207,14 @@ static int mchp23k256_remove(struct spi_device *spi)
 }
 
 static const struct of_device_id mchp23k256_of_table[] = {
-	{ .compatible = "microchip,mchp23k256" },
+	{
+		.compatible = "microchip,mchp23k256",
+		.data = &mchp23k256_caps,
+	},
+	{
+		.compatible = "microchip,mchp23lcv1024",
+		.data = &mchp23lcv1024_caps,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, mchp23k256_of_table);
-- 
GitLab


From c46adf0976eacadd60920da5b8bf8e27299183bd Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Thu, 1 Jun 2017 17:01:19 +0800
Subject: [PATCH 0204/1958] mtd: subpagetest: fix wrong written check in
 function write_eraseblock2

Write size in function write_eraseblock2 is subpgsize * k.
It is wrong to check whether written is equal to subpgsize after each
mtd_write.

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/tests/subpagetest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/tests/subpagetest.c b/drivers/mtd/tests/subpagetest.c
index aecc6ce5a9e1c..fa2519ad2435e 100644
--- a/drivers/mtd/tests/subpagetest.c
+++ b/drivers/mtd/tests/subpagetest.c
@@ -102,7 +102,7 @@ static int write_eraseblock2(int ebnum)
 		if (unlikely(err || written != subpgsize * k)) {
 			pr_err("error: write failed at %#llx\n",
 			       (long long)addr);
-			if (written != subpgsize) {
+			if (written != subpgsize * k) {
 				pr_err("  write size: %#x\n",
 				       subpgsize * k);
 				pr_err("  written: %#08zx\n",
-- 
GitLab


From fb418240ecbd721a1237da592c075993c1709955 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Tue, 9 May 2017 16:46:04 -0500
Subject: [PATCH 0205/1958] target: remove dead code

Local variable _ret_ is assigned to a constant value and it is never
updated again. Remove this variable and the dead code it guards.

Addresses-Coverity-ID: 140761
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Reviewed-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_rd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index 20253d04103f6..d12967690054d 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -554,7 +554,7 @@ static ssize_t rd_set_configfs_dev_params(struct se_device *dev,
 	struct rd_dev *rd_dev = RD_DEV(dev);
 	char *orig, *ptr, *opts;
 	substring_t args[MAX_OPT_ARGS];
-	int ret = 0, arg, token;
+	int arg, token;
 
 	opts = kstrdup(page, GFP_KERNEL);
 	if (!opts)
@@ -589,7 +589,7 @@ static ssize_t rd_set_configfs_dev_params(struct se_device *dev,
 	}
 
 	kfree(orig);
-	return (!ret) ? count : ret;
+	return count;
 }
 
 static ssize_t rd_show_configfs_dev_params(struct se_device *dev, char *b)
-- 
GitLab


From 12bdcbd539c6327c09da0503c674733cb2d82cb5 Mon Sep 17 00:00:00 2001
From: Byungchul Park <byungchul.park@lge.com>
Date: Fri, 12 May 2017 09:42:56 +0900
Subject: [PATCH 0206/1958] vhost/scsi: Don't reinvent the wheel but use
 existing llist API

Although llist provides proper APIs, they are not used. Make them used.

Signed-off-by: Byungchul Park <byungchul.park@lge.com>
Acked-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/vhost/scsi.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index fd6c8b66f06fd..679f8960db4b7 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -496,14 +496,12 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
 	struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
 					vs_event_work);
 	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
-	struct vhost_scsi_evt *evt;
+	struct vhost_scsi_evt *evt, *t;
 	struct llist_node *llnode;
 
 	mutex_lock(&vq->mutex);
 	llnode = llist_del_all(&vs->vs_event_list);
-	while (llnode) {
-		evt = llist_entry(llnode, struct vhost_scsi_evt, list);
-		llnode = llist_next(llnode);
+	llist_for_each_entry_safe(evt, t, llnode, list) {
 		vhost_scsi_do_evt_work(vs, evt);
 		vhost_scsi_free_evt(vs, evt);
 	}
@@ -529,10 +527,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 
 	bitmap_zero(signal, VHOST_SCSI_MAX_VQ);
 	llnode = llist_del_all(&vs->vs_completion_list);
-	while (llnode) {
-		cmd = llist_entry(llnode, struct vhost_scsi_cmd,
-				     tvc_completion_list);
-		llnode = llist_next(llnode);
+	llist_for_each_entry(cmd, llnode, tvc_completion_list) {
 		se_cmd = &cmd->tvc_se_cmd;
 
 		pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
-- 
GitLab


From 464fd6419c68bc6b1697e02f46b6d3dd57dfed28 Mon Sep 17 00:00:00 2001
From: Michael Cyr <mikecyr@us.ibm.com>
Date: Tue, 16 May 2017 17:49:21 -0500
Subject: [PATCH 0207/1958] ibmvscsis: Enable Logical Partition Migration
 Support

Changes to support a new mechanism from phyp to better synchronize the
logical partition migration (LPM) of the client partition.
This includes a new VIOCTL to register that we support this new
functionality, and 2 new Transport Event types, and finally another
new VIOCTL to let phyp know once we're ready for the Suspend.

Signed-off-by: Michael Cyr <mikecyr@us.ibm.com>
Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 148 +++++++++++++++++++++--
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h |  25 +++-
 drivers/scsi/ibmvscsi_tgt/libsrp.h       |   5 +-
 3 files changed, 162 insertions(+), 16 deletions(-)

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index abf6026645dd2..35710524d059d 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -155,6 +155,9 @@ static long ibmvscsis_unregister_command_q(struct scsi_info *vscsi)
 		qrc = h_free_crq(vscsi->dds.unit_id);
 		switch (qrc) {
 		case H_SUCCESS:
+			spin_lock_bh(&vscsi->intr_lock);
+			vscsi->flags &= ~PREP_FOR_SUSPEND_FLAGS;
+			spin_unlock_bh(&vscsi->intr_lock);
 			break;
 
 		case H_HARDWARE:
@@ -422,6 +425,9 @@ static void ibmvscsis_disconnect(struct work_struct *work)
 	new_state = vscsi->new_state;
 	vscsi->new_state = 0;
 
+	vscsi->flags |= DISCONNECT_SCHEDULED;
+	vscsi->flags &= ~SCHEDULE_DISCONNECT;
+
 	pr_debug("disconnect: flags 0x%x, state 0x%hx\n", vscsi->flags,
 		 vscsi->state);
 
@@ -802,6 +808,13 @@ static long ibmvscsis_establish_new_q(struct scsi_info *vscsi)
 	long rc = ADAPT_SUCCESS;
 	uint format;
 
+	rc = h_vioctl(vscsi->dds.unit_id, H_ENABLE_PREPARE_FOR_SUSPEND, 30000,
+		      0, 0, 0, 0);
+	if (rc == H_SUCCESS)
+		vscsi->flags |= PREP_FOR_SUSPEND_ENABLED;
+	else if (rc != H_NOT_FOUND)
+		pr_err("Error from Enable Prepare for Suspend: %ld\n", rc);
+
 	vscsi->flags &= PRESERVE_FLAG_FIELDS;
 	vscsi->rsp_q_timer.timer_pops = 0;
 	vscsi->debit = 0;
@@ -950,6 +963,63 @@ static void ibmvscsis_free_cmd_resources(struct scsi_info *vscsi,
 	}
 }
 
+/**
+ * ibmvscsis_ready_for_suspend() - Helper function to call VIOCTL
+ * @vscsi:	Pointer to our adapter structure
+ * @idle:	Indicates whether we were called from adapter_idle.  This
+ *		is important to know if we need to do a disconnect, since if
+ *		we're called from adapter_idle, we're still processing the
+ *		current disconnect, so we can't just call post_disconnect.
+ *
+ * This function is called when the adapter is idle when phyp has sent
+ * us a Prepare for Suspend Transport Event.
+ *
+ * EXECUTION ENVIRONMENT:
+ *	Process or interrupt environment called with interrupt lock held
+ */
+static long ibmvscsis_ready_for_suspend(struct scsi_info *vscsi, bool idle)
+{
+	long rc = 0;
+	struct viosrp_crq *crq;
+
+	/* See if there is a Resume event in the queue */
+	crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index;
+
+	pr_debug("ready_suspend: flags 0x%x, state 0x%hx crq_valid:%x\n",
+		 vscsi->flags, vscsi->state, (int)crq->valid);
+
+	if (!(vscsi->flags & PREP_FOR_SUSPEND_ABORTED) && !(crq->valid)) {
+		rc = h_vioctl(vscsi->dds.unit_id, H_READY_FOR_SUSPEND, 0, 0, 0,
+			      0, 0);
+		if (rc) {
+			pr_err("Ready for Suspend Vioctl failed: %ld\n", rc);
+			rc = 0;
+		}
+	} else if (((vscsi->flags & PREP_FOR_SUSPEND_OVERWRITE) &&
+		    (vscsi->flags & PREP_FOR_SUSPEND_ABORTED)) ||
+		   ((crq->valid) && ((crq->valid != VALID_TRANS_EVENT) ||
+				     (crq->format != RESUME_FROM_SUSP)))) {
+		if (idle) {
+			vscsi->state = ERR_DISCONNECT_RECONNECT;
+			ibmvscsis_reset_queue(vscsi);
+			rc = -1;
+		} else if (vscsi->state == CONNECTED) {
+			ibmvscsis_post_disconnect(vscsi,
+						  ERR_DISCONNECT_RECONNECT, 0);
+		}
+
+		vscsi->flags &= ~PREP_FOR_SUSPEND_OVERWRITE;
+
+		if ((crq->valid) && ((crq->valid != VALID_TRANS_EVENT) ||
+				     (crq->format != RESUME_FROM_SUSP)))
+			pr_err("Invalid element in CRQ after Prepare for Suspend");
+	}
+
+	vscsi->flags &= ~(PREP_FOR_SUSPEND_PENDING | PREP_FOR_SUSPEND_ABORTED);
+
+	return rc;
+}
+
 /**
  * ibmvscsis_trans_event() - Handle a Transport Event
  * @vscsi:	Pointer to our adapter structure
@@ -974,18 +1044,8 @@ static long ibmvscsis_trans_event(struct scsi_info *vscsi,
 	case PARTNER_FAILED:
 	case PARTNER_DEREGISTER:
 		ibmvscsis_delete_client_info(vscsi, true);
-		break;
-
-	default:
-		rc = ERROR;
-		dev_err(&vscsi->dev, "trans_event: invalid format %d\n",
-			(uint)crq->format);
-		ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT,
-					  RESPONSE_Q_DOWN);
-		break;
-	}
-
-	if (rc == ADAPT_SUCCESS) {
+		if (crq->format == MIGRATED)
+			vscsi->flags &= ~PREP_FOR_SUSPEND_OVERWRITE;
 		switch (vscsi->state) {
 		case NO_QUEUE:
 		case ERR_DISCONNECTED:
@@ -1034,6 +1094,60 @@ static long ibmvscsis_trans_event(struct scsi_info *vscsi,
 			vscsi->flags |= (RESPONSE_Q_DOWN | TRANS_EVENT);
 			break;
 		}
+		break;
+
+	case PREPARE_FOR_SUSPEND:
+		pr_debug("Prep for Suspend, crq status = 0x%x\n",
+			 (int)crq->status);
+		switch (vscsi->state) {
+		case ERR_DISCONNECTED:
+		case WAIT_CONNECTION:
+		case CONNECTED:
+			ibmvscsis_ready_for_suspend(vscsi, false);
+			break;
+		case SRP_PROCESSING:
+			vscsi->resume_state = vscsi->state;
+			vscsi->flags |= PREP_FOR_SUSPEND_PENDING;
+			if (crq->status == CRQ_ENTRY_OVERWRITTEN)
+				vscsi->flags |= PREP_FOR_SUSPEND_OVERWRITE;
+			ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, 0);
+			break;
+		case NO_QUEUE:
+		case UNDEFINED:
+		case UNCONFIGURING:
+		case WAIT_ENABLED:
+		case ERR_DISCONNECT:
+		case ERR_DISCONNECT_RECONNECT:
+		case WAIT_IDLE:
+			pr_err("Invalid state for Prepare for Suspend Trans Event: 0x%x\n",
+			       vscsi->state);
+			break;
+		}
+		break;
+
+	case RESUME_FROM_SUSP:
+		pr_debug("Resume from Suspend, crq status = 0x%x\n",
+			 (int)crq->status);
+		if (vscsi->flags & PREP_FOR_SUSPEND_PENDING) {
+			vscsi->flags |= PREP_FOR_SUSPEND_ABORTED;
+		} else {
+			if ((crq->status == CRQ_ENTRY_OVERWRITTEN) ||
+			    (vscsi->flags & PREP_FOR_SUSPEND_OVERWRITE)) {
+				ibmvscsis_post_disconnect(vscsi,
+							  ERR_DISCONNECT_RECONNECT,
+							  0);
+				vscsi->flags &= ~PREP_FOR_SUSPEND_OVERWRITE;
+			}
+		}
+		break;
+
+	default:
+		rc = ERROR;
+		dev_err(&vscsi->dev, "trans_event: invalid format %d\n",
+			(uint)crq->format);
+		ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT,
+					  RESPONSE_Q_DOWN);
+		break;
 	}
 
 	rc = vscsi->flags & SCHEDULE_DISCONNECT;
@@ -1201,6 +1315,7 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi)
 static void ibmvscsis_adapter_idle(struct scsi_info *vscsi)
 {
 	int free_qs = false;
+	long rc = 0;
 
 	pr_debug("adapter_idle: flags 0x%x, state 0x%hx\n", vscsi->flags,
 		 vscsi->state);
@@ -1240,7 +1355,14 @@ static void ibmvscsis_adapter_idle(struct scsi_info *vscsi)
 		vscsi->rsp_q_timer.timer_pops = 0;
 		vscsi->debit = 0;
 		vscsi->credit = 0;
-		if (vscsi->flags & TRANS_EVENT) {
+		if (vscsi->flags & PREP_FOR_SUSPEND_PENDING) {
+			vscsi->state = vscsi->resume_state;
+			vscsi->resume_state = 0;
+			rc = ibmvscsis_ready_for_suspend(vscsi, true);
+			vscsi->flags &= ~DISCONNECT_SCHEDULED;
+			if (rc)
+				break;
+		} else if (vscsi->flags & TRANS_EVENT) {
 			vscsi->state = WAIT_CONNECTION;
 			vscsi->flags &= PRESERVE_FLAG_FIELDS;
 		} else {
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
index b4391a8de4569..cc96c27311345 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
@@ -262,6 +262,14 @@ struct scsi_info {
 #define DISCONNECT_SCHEDULED          0x00800
 	/* remove function is sleeping */
 #define CFG_SLEEPING                  0x01000
+	/* Register for Prepare for Suspend Transport Events */
+#define PREP_FOR_SUSPEND_ENABLED      0x02000
+	/* Prepare for Suspend event sent */
+#define PREP_FOR_SUSPEND_PENDING      0x04000
+	/* Resume from Suspend event sent */
+#define PREP_FOR_SUSPEND_ABORTED      0x08000
+	/* Prepare for Suspend event overwrote another CRQ entry */
+#define PREP_FOR_SUSPEND_OVERWRITE    0x10000
 	u32 flags;
 	/* adapter lock */
 	spinlock_t intr_lock;
@@ -272,6 +280,7 @@ struct scsi_info {
 	/* used in crq, to tag what iu the response is for */
 	u64  empty_iu_tag;
 	uint new_state;
+	uint resume_state;
 	/* control block for the response queue timer */
 	struct timer_cb rsp_q_timer;
 	/* keep last client to enable proper accounting */
@@ -324,8 +333,13 @@ struct scsi_info {
 #define TARGET_STOP(VSCSI) (long)(((VSCSI)->state & DONT_PROCESS_STATE) | \
 				  ((VSCSI)->flags & BLOCK))
 
+#define PREP_FOR_SUSPEND_FLAGS  (PREP_FOR_SUSPEND_ENABLED | \
+				 PREP_FOR_SUSPEND_PENDING | \
+				 PREP_FOR_SUSPEND_ABORTED | \
+				 PREP_FOR_SUSPEND_OVERWRITE)
+
 /* flag bit that are not reset during disconnect */
-#define PRESERVE_FLAG_FIELDS 0
+#define PRESERVE_FLAG_FIELDS (PREP_FOR_SUSPEND_FLAGS)
 
 #define vio_iu(IUE) ((union viosrp_iu *)((IUE)->sbuf->buf))
 
@@ -333,8 +347,15 @@ struct scsi_info {
 #define WRITE_CMD(cdb)	(((cdb)[0] & 0x1F) == 0xA)
 
 #ifndef H_GET_PARTNER_INFO
-#define H_GET_PARTNER_INFO      0x0000000000000008LL
+#define H_GET_PARTNER_INFO              0x0000000000000008LL
+#endif
+#ifndef H_ENABLE_PREPARE_FOR_SUSPEND
+#define H_ENABLE_PREPARE_FOR_SUSPEND    0x000000000000001DLL
 #endif
+#ifndef H_READY_FOR_SUSPEND
+#define H_READY_FOR_SUSPEND             0x000000000000001ELL
+#endif
+
 
 #define h_copy_rdma(l, sa, sb, da, db) \
 		plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db)
diff --git a/drivers/scsi/ibmvscsi_tgt/libsrp.h b/drivers/scsi/ibmvscsi_tgt/libsrp.h
index 4696f331453ea..9fec55b363224 100644
--- a/drivers/scsi/ibmvscsi_tgt/libsrp.h
+++ b/drivers/scsi/ibmvscsi_tgt/libsrp.h
@@ -30,10 +30,13 @@ enum srp_trans_event {
 	UNUSED_FORMAT = 0,
 	PARTNER_FAILED = 1,
 	PARTNER_DEREGISTER = 2,
-	MIGRATED = 6
+	MIGRATED = 6,
+	PREPARE_FOR_SUSPEND = 9,
+	RESUME_FROM_SUSP = 0xA
 };
 
 enum srp_status {
+	CRQ_ENTRY_OVERWRITTEN = 0x20,
 	HEADER_DESCRIPTOR = 0xF1,
 	PING = 0xF5,
 	PING_RESPONSE = 0xF6
-- 
GitLab


From 2237498f0b5c74768f688ebaf16eab2c708d5fdb Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 11 Apr 2017 22:21:47 -0700
Subject: [PATCH 0208/1958] target/iblock: Convert WRITE_SAME to
 blkdev_issue_zeroout

The people who are actively using iblock_execute_write_same_direct() are
doing so in the context of ESX VAAI BlockZero, together with
EXTENDED_COPY and COMPARE_AND_WRITE primitives.

In practice though I've not seen any users of IBLOCK WRITE_SAME for
anything other than VAAI BlockZero, so just using blkdev_issue_zeroout()
when available, and falling back to iblock_execute_write_same() if the
WRITE_SAME buffer contains anything other than zeros should be OK.

(Hook up max_write_zeroes_sectors to signal LBPRZ feature bit in
 target_configure_unmap_from_queue - nab)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c |  2 +-
 drivers/target/target_core_iblock.c | 44 ++++++++++++++++++-----------
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 8add07f387f9d..a5762e601fa19 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -851,7 +851,7 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
 	attrib->unmap_granularity = q->limits.discard_granularity / block_size;
 	attrib->unmap_granularity_alignment = q->limits.discard_alignment /
 								block_size;
-	attrib->unmap_zeroes_data = 0;
+	attrib->unmap_zeroes_data = (q->limits.max_write_zeroes_sectors);
 	return true;
 }
 EXPORT_SYMBOL(target_configure_unmap_from_queue);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bb069ebe4aa6c..b2044133d747c 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -86,6 +86,7 @@ static int iblock_configure_device(struct se_device *dev)
 	struct block_device *bd = NULL;
 	struct blk_integrity *bi;
 	fmode_t mode;
+	unsigned int max_write_zeroes_sectors;
 	int ret = -ENOMEM;
 
 	if (!(ib_dev->ibd_flags & IBDF_HAS_UDEV_PATH)) {
@@ -129,7 +130,11 @@ static int iblock_configure_device(struct se_device *dev)
 	 * Enable write same emulation for IBLOCK and use 0xFFFF as
 	 * the smaller WRITE_SAME(10) only has a two-byte block count.
 	 */
-	dev->dev_attrib.max_write_same_len = 0xFFFF;
+	max_write_zeroes_sectors = bdev_write_zeroes_sectors(bd);
+	if (max_write_zeroes_sectors)
+		dev->dev_attrib.max_write_same_len = max_write_zeroes_sectors;
+	else
+		dev->dev_attrib.max_write_same_len = 0xFFFF;
 
 	if (blk_queue_nonrot(q))
 		dev->dev_attrib.is_nonrot = 1;
@@ -415,28 +420,31 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
 }
 
 static sense_reason_t
-iblock_execute_write_same_direct(struct block_device *bdev, struct se_cmd *cmd)
+iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd)
 {
 	struct se_device *dev = cmd->se_dev;
 	struct scatterlist *sg = &cmd->t_data_sg[0];
-	struct page *page = NULL;
-	int ret;
+	unsigned char *buf, zero = 0x00, *p = &zero;
+	int rc, ret;
 
-	if (sg->offset) {
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			return TCM_OUT_OF_RESOURCES;
-		sg_copy_to_buffer(sg, cmd->t_data_nents, page_address(page),
-				  dev->dev_attrib.block_size);
-	}
+	buf = kmap(sg_page(sg)) + sg->offset;
+	if (!buf)
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	/*
+	 * Fall back to block_execute_write_same() slow-path if
+	 * incoming WRITE_SAME payload does not contain zeros.
+	 */
+	rc = memcmp(buf, p, cmd->data_length);
+	kunmap(sg_page(sg));
+
+	if (rc)
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	ret = blkdev_issue_write_same(bdev,
+	ret = blkdev_issue_zeroout(bdev,
 				target_to_linux_sector(dev, cmd->t_task_lba),
 				target_to_linux_sector(dev,
 					sbc_get_write_same_sectors(cmd)),
-				GFP_KERNEL, page ? page : sg_page(sg));
-	if (page)
-		__free_page(page);
+				GFP_KERNEL, false);
 	if (ret)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
@@ -472,8 +480,10 @@ iblock_execute_write_same(struct se_cmd *cmd)
 		return TCM_INVALID_CDB_FIELD;
 	}
 
-	if (bdev_write_same(bdev))
-		return iblock_execute_write_same_direct(bdev, cmd);
+	if (bdev_write_zeroes_sectors(bdev)) {
+		if (!iblock_execute_zero_out(bdev, cmd))
+			return 0;
+	}
 
 	ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL);
 	if (!ibr)
-- 
GitLab


From c17cd24959cdb12c855dc61e20c36fa25f21f3d3 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 1 Jun 2017 03:10:53 -0700
Subject: [PATCH 0209/1958] target/configfs: Kill se_device->dev_link_magic

Instead of using a hardcoded magic value in se_device when verifying
a target config_item symlink source during target_fabric_port_link(),
go ahead and use target_core_dev_item_ops directly instead.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_configfs.c        |  6 +++++-
 drivers/target/target_core_device.c          |  1 -
 drivers/target/target_core_fabric_configfs.c | 12 +++++++-----
 include/target/target_core_base.h            |  2 --
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 0326607e5ab82..9b8abd55c21cc 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -2236,7 +2236,11 @@ static void target_core_dev_release(struct config_item *item)
 	target_free_device(dev);
 }
 
-static struct configfs_item_operations target_core_dev_item_ops = {
+/*
+ * Used in target_core_fabric_configfs.c to verify valid se_device symlink
+ * within target_fabric_port_link()
+ */
+struct configfs_item_operations target_core_dev_item_ops = {
 	.release		= target_core_dev_release,
 };
 
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index a5762e601fa19..e4771cec108cc 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -756,7 +756,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 	if (!dev)
 		return NULL;
 
-	dev->dev_link_magic = SE_DEV_LINK_MAGIC;
 	dev->se_hba = hba;
 	dev->transport = hba->backend->ops;
 	dev->prot_length = sizeof(struct t10_pi_tuple);
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index d1e6cab8e3d3f..2cbaecd1669d8 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -620,6 +620,8 @@ static struct configfs_attribute *target_fabric_port_attrs[] = {
 	NULL,
 };
 
+extern struct configfs_item_operations target_core_dev_item_ops;
+
 static int target_fabric_port_link(
 	struct config_item *lun_ci,
 	struct config_item *se_dev_ci)
@@ -628,16 +630,16 @@ static int target_fabric_port_link(
 	struct se_lun *lun = container_of(to_config_group(lun_ci),
 				struct se_lun, lun_group);
 	struct se_portal_group *se_tpg;
-	struct se_device *dev =
-		container_of(to_config_group(se_dev_ci), struct se_device, dev_group);
+	struct se_device *dev;
 	struct target_fabric_configfs *tf;
 	int ret;
 
-	if (dev->dev_link_magic != SE_DEV_LINK_MAGIC) {
-		pr_err("Bad dev->dev_link_magic, not a valid se_dev_ci pointer:"
-			" %p to struct se_device: %p\n", se_dev_ci, dev);
+	if (!se_dev_ci->ci_type ||
+	    se_dev_ci->ci_type->ct_item_ops != &target_core_dev_item_ops) {
+		pr_err("Bad se_dev_ci, not a valid se_dev_ci pointer: %p\n", se_dev_ci);
 		return -EFAULT;
 	}
+	dev = container_of(to_config_group(se_dev_ci), struct se_device, dev_group);
 
 	if (!(dev->dev_flags & DF_CONFIGURED)) {
 		pr_err("se_device not configured yet, cannot port link\n");
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 0c1dce2ac6f02..c3c14d053122d 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -746,8 +746,6 @@ struct se_dev_stat_grps {
 };
 
 struct se_device {
-#define SE_DEV_LINK_MAGIC			0xfeeddeef
-	u32			dev_link_magic;
 	/* RELATIVE TARGET PORT IDENTIFER Counter */
 	u16			dev_rpti_counter;
 	/* Used for SAM Task Attribute ordering */
-- 
GitLab


From 9ae0e9ade56f23765366d2cfad24e65f28df977d Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 1 Jun 2017 03:11:18 -0700
Subject: [PATCH 0210/1958] target/configfs: Kill se_lun->lun_link_magic

Instead of using a hardcoded magic value in se_lun when verifying
a target config_item symlink source during target_fabric_mappedlun_link(),
go ahead and use target_fabric_port_item_ops directly instead.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_fabric_configfs.c | 13 ++++++++-----
 drivers/target/target_core_tpg.c             |  1 -
 include/target/target_core_base.h            |  2 --
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 2cbaecd1669d8..e9e917cc64419 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -65,6 +65,8 @@ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf)
 	pr_debug("Setup generic %s\n", __stringify(_name));		\
 }
 
+static struct configfs_item_operations target_fabric_port_item_ops;
+
 /* Start of tfc_tpg_mappedlun_cit */
 
 static int target_fabric_mappedlun_link(
@@ -72,19 +74,20 @@ static int target_fabric_mappedlun_link(
 	struct config_item *lun_ci)
 {
 	struct se_dev_entry *deve;
-	struct se_lun *lun = container_of(to_config_group(lun_ci),
-			struct se_lun, lun_group);
+	struct se_lun *lun;
 	struct se_lun_acl *lacl = container_of(to_config_group(lun_acl_ci),
 			struct se_lun_acl, se_lun_group);
 	struct se_portal_group *se_tpg;
 	struct config_item *nacl_ci, *tpg_ci, *tpg_ci_s, *wwn_ci, *wwn_ci_s;
 	bool lun_access_ro;
 
-	if (lun->lun_link_magic != SE_LUN_LINK_MAGIC) {
-		pr_err("Bad lun->lun_link_magic, not a valid lun_ci pointer:"
-			" %p to struct lun: %p\n", lun_ci, lun);
+	if (!lun_ci->ci_type ||
+	    lun_ci->ci_type->ct_item_ops != &target_fabric_port_item_ops) {
+		pr_err("Bad lun_ci, not a valid lun_ci pointer: %p\n", lun_ci);
 		return -EFAULT;
 	}
+	lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group);
+
 	/*
 	 * Ensure that the source port exists
 	 */
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 310d9e55c6eb7..36913734c6bc5 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -576,7 +576,6 @@ struct se_lun *core_tpg_alloc_lun(
 		return ERR_PTR(-ENOMEM);
 	}
 	lun->unpacked_lun = unpacked_lun;
-	lun->lun_link_magic = SE_LUN_LINK_MAGIC;
 	atomic_set(&lun->lun_acl_count, 0);
 	init_completion(&lun->lun_ref_comp);
 	init_completion(&lun->lun_shutdown_comp);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index c3c14d053122d..47d9f381209fc 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -701,8 +701,6 @@ struct scsi_port_stats {
 
 struct se_lun {
 	u64			unpacked_lun;
-#define SE_LUN_LINK_MAGIC			0xffff7771
-	u32			lun_link_magic;
 	bool			lun_shutdown;
 	bool			lun_access_ro;
 	u32			lun_index;
-- 
GitLab


From eceb4459df4a5b6127ae479280168847b9780b25 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 1 Jun 2017 20:21:33 -0700
Subject: [PATCH 0211/1958] iscsi-target: Avoid holding ->tpg_state_lock during
 param update

As originally reported by Jia-Ju, iscsit_tpg_enable_portal_group()
holds iscsi_portal_group->tpg_state_lock while updating AUTHMETHOD
via iscsi_update_param_value(), which performs a GFP_KERNEL
allocation.

However, since iscsit_tpg_enable_portal_group() is already protected
by iscsit_get_tpg() -> iscsi_portal_group->tpg_access_lock in it's
parent caller, ->tpg_state_lock only needs to be held when setting
TPG_STATE_ACTIVE.

Reported-by: Jia-Ju Bai <baijiaju1990@163.com>
Reviewed-by: Jia-Ju Bai <baijiaju1990@163.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_tpg.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index 2e7e08dbda480..abaabbaebd881 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -311,11 +311,9 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
 	struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
 	int ret;
 
-	spin_lock(&tpg->tpg_state_lock);
 	if (tpg->tpg_state == TPG_STATE_ACTIVE) {
 		pr_err("iSCSI target portal group: %hu is already"
 			" active, ignoring request.\n", tpg->tpgt);
-		spin_unlock(&tpg->tpg_state_lock);
 		return -EINVAL;
 	}
 	/*
@@ -324,10 +322,8 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
 	 * is enforced (as per default), and remove the NONE option.
 	 */
 	param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
-	if (!param) {
-		spin_unlock(&tpg->tpg_state_lock);
+	if (!param)
 		return -EINVAL;
-	}
 
 	if (tpg->tpg_attrib.authentication) {
 		if (!strcmp(param->value, NONE)) {
@@ -341,6 +337,7 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
 			goto err;
 	}
 
+	spin_lock(&tpg->tpg_state_lock);
 	tpg->tpg_state = TPG_STATE_ACTIVE;
 	spin_unlock(&tpg->tpg_state_lock);
 
@@ -353,7 +350,6 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
 	return 0;
 
 err:
-	spin_unlock(&tpg->tpg_state_lock);
 	return ret;
 }
 
-- 
GitLab


From 1adc9105bbf8831d31f764ca8d1b99696e5dedba Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 9 Jun 2017 16:35:18 -0500
Subject: [PATCH 0212/1958] ipmi: Use the proper default value for register
 size in ACPI

It's the proper value, so there's no effect, but just to be proper,
use the right value.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_intf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index b2b618f066e02..b89078bff1c16 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1942,7 +1942,7 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
 				info->io.regspacing = DEFAULT_REGSPACING;
 			info->io.regsize = regsize;
 			if (!info->io.regsize)
-				info->io.regsize = DEFAULT_REGSPACING;
+				info->io.regsize = DEFAULT_REGSIZE;
 			info->io.regshift = regshift;
 			info->irq = irq;
 			if (info->irq)
@@ -2036,7 +2036,7 @@ static int hardcode_find_bmc(void)
 			info->io.regspacing = DEFAULT_REGSPACING;
 		info->io.regsize = regsizes[i];
 		if (!info->io.regsize)
-			info->io.regsize = DEFAULT_REGSPACING;
+			info->io.regsize = DEFAULT_REGSIZE;
 		info->io.regshift = regshifts[i];
 		info->irq = irqs[i];
 		if (info->irq)
@@ -2395,7 +2395,7 @@ static void try_init_dmi(struct dmi_ipmi_data *ipmi_data)
 	info->io.regspacing = ipmi_data->offset;
 	if (!info->io.regspacing)
 		info->io.regspacing = DEFAULT_REGSPACING;
-	info->io.regsize = DEFAULT_REGSPACING;
+	info->io.regsize = DEFAULT_REGSIZE;
 	info->io.regshift = 0;
 
 	info->slave_addr = ipmi_data->slave_addr;
@@ -2761,7 +2761,7 @@ static int acpi_ipmi_probe(struct platform_device *dev)
 			info->io.regspacing =
 				res_second->start - info->io.addr_data;
 	}
-	info->io.regsize = DEFAULT_REGSPACING;
+	info->io.regsize = DEFAULT_REGSIZE;
 	info->io.regshift = 0;
 
 	/* If _GPE exists, use it; otherwise use standard interrupts */
-- 
GitLab


From d45e5316e6a5071dbfe44bae04a89a3ba75e1ea9 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sat, 10 Jun 2017 12:09:05 +0200
Subject: [PATCH 0213/1958] mtd: nand: fsl_ifc: fix handing of bit flips in
 erased pages

If we see unrecoverable ECC error, we need to count number of bitflips
from all-ones and report correctable/uncorrectable according to
that. Otherwise we report ECC failed on erased flash with single bit error.

Signed-off-by: Pavel Machek <pavel@denx.de>
Reported-by: Darwin Dingel <Darwin.Dingel@alliedtelesis.co.nz>
Acked-by: Darwin Dingel <Darwin.Dingel@alliedtelesis.co.nz>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsl_ifc_nand.c | 77 ++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 35 deletions(-)

diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index 89e14daeaba69..d1c4538f870f9 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -171,34 +171,6 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob)
 		ifc_nand_ctrl->index += mtd->writesize;
 }
 
-static int is_blank(struct mtd_info *mtd, unsigned int bufnum)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
-	u8 __iomem *addr = priv->vbase + bufnum * (mtd->writesize * 2);
-	u32 __iomem *mainarea = (u32 __iomem *)addr;
-	u8 __iomem *oob = addr + mtd->writesize;
-	struct mtd_oob_region oobregion = { };
-	int i, section = 0;
-
-	for (i = 0; i < mtd->writesize / 4; i++) {
-		if (__raw_readl(&mainarea[i]) != 0xffffffff)
-			return 0;
-	}
-
-	mtd_ooblayout_ecc(mtd, section++, &oobregion);
-	while (oobregion.length) {
-		for (i = 0; i < oobregion.length; i++) {
-			if (__raw_readb(&oob[oobregion.offset + i]) != 0xff)
-				return 0;
-		}
-
-		mtd_ooblayout_ecc(mtd, section++, &oobregion);
-	}
-
-	return 1;
-}
-
 /* returns nonzero if entire page is blank */
 static int check_read_ecc(struct mtd_info *mtd, struct fsl_ifc_ctrl *ctrl,
 			  u32 *eccstat, unsigned int bufnum)
@@ -274,16 +246,14 @@ static void fsl_ifc_run_command(struct mtd_info *mtd)
 			if (errors == 15) {
 				/*
 				 * Uncorrectable error.
-				 * OK only if the whole page is blank.
+				 * We'll check for blank pages later.
 				 *
 				 * We disable ECCER reporting due to...
 				 * erratum IFC-A002770 -- so report it now if we
 				 * see an uncorrectable error in ECCSTAT.
 				 */
-				if (!is_blank(mtd, bufnum))
-					ctrl->nand_stat |=
-						IFC_NAND_EVTER_STAT_ECCER;
-				break;
+				ctrl->nand_stat |= IFC_NAND_EVTER_STAT_ECCER;
+				continue;
 			}
 
 			mtd->ecc_stats.corrected += errors;
@@ -678,6 +648,39 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip)
 	return nand_fsr | NAND_STATUS_WP;
 }
 
+/*
+ * The controller does not check for bitflips in erased pages,
+ * therefore software must check instead.
+ */
+static int check_erased_page(struct nand_chip *chip, u8 *buf)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 *ecc = chip->oob_poi;
+	const int ecc_size = chip->ecc.bytes;
+	const int pkt_size = chip->ecc.size;
+	int i, res, bitflips = 0;
+	struct mtd_oob_region oobregion = { };
+
+	mtd_ooblayout_ecc(mtd, 0, &oobregion);
+	ecc += oobregion.offset;
+
+	for (i = 0; i < chip->ecc.steps; ++i) {
+		res = nand_check_erased_ecc_chunk(buf, pkt_size, ecc, ecc_size,
+						  NULL, 0,
+						  chip->ecc.strength);
+		if (res < 0)
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += res;
+
+		bitflips = max(res, bitflips);
+		buf += pkt_size;
+		ecc += ecc_size;
+	}
+
+	return bitflips;
+}
+
 static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			     uint8_t *buf, int oob_required, int page)
 {
@@ -689,8 +692,12 @@ static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (oob_required)
 		fsl_ifc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
 
-	if (ctrl->nand_stat & IFC_NAND_EVTER_STAT_ECCER)
-		dev_err(priv->dev, "NAND Flash ECC Uncorrectable Error\n");
+	if (ctrl->nand_stat & IFC_NAND_EVTER_STAT_ECCER) {
+		if (!oob_required)
+			fsl_ifc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+		return check_erased_page(chip, buf);
+	}
 
 	if (ctrl->nand_stat != IFC_NAND_EVTER_STAT_OPC)
 		mtd->ecc_stats.failed++;
-- 
GitLab


From 3762a33b007b63e058eb600eccf0bcd097d386f5 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 1 Jun 2017 16:28:15 +0530
Subject: [PATCH 0214/1958] mtd: nand: orion: Handle return value of
 clk_prepare_enable

clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/orion_nand.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index f8e463a97b9ee..209170ed2b764 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c
@@ -166,7 +166,11 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 		}
 	}
 
-	clk_prepare_enable(info->clk);
+	ret = clk_prepare_enable(info->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare clock!\n");
+		return ret;
+	}
 
 	ret = nand_scan(mtd, 1);
 	if (ret)
-- 
GitLab


From d816f6b6375901a5090018961c5ab631784538ea Mon Sep 17 00:00:00 2001
From: Matthias Lange <matthias.lange@kernkonzept.com>
Date: Mon, 5 Jun 2017 11:33:51 +0200
Subject: [PATCH 0215/1958] mtd: nand: gpmi: Fix typo in data structure name

This makes it easier to grep.

Signed-off-by: Matthias Lange <matthias.lange@kernkonzept.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/gpmi-nand/gpmi-lib.c  | 4 ++--
 drivers/mtd/nand/gpmi-nand/gpmi-nand.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 141bd70a49c2c..57ff0518bbdaa 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -26,7 +26,7 @@
 #include "gpmi-regs.h"
 #include "bch-regs.h"
 
-static struct timing_threshod timing_default_threshold = {
+static struct timing_threshold timing_default_threshold = {
 	.max_data_setup_cycles       = (BM_GPMI_TIMING0_DATA_SETUP >>
 						BP_GPMI_TIMING0_DATA_SETUP),
 	.internal_data_setup_in_ns   = 0,
@@ -329,7 +329,7 @@ static unsigned int ns_to_cycles(unsigned int time,
 static int gpmi_nfc_compute_hardware_timing(struct gpmi_nand_data *this,
 					struct gpmi_nfc_hardware_timing *hw)
 {
-	struct timing_threshod *nfc = &timing_default_threshold;
+	struct timing_threshold *nfc = &timing_default_threshold;
 	struct resources *r = &this->resources;
 	struct nand_chip *nand = &this->nand;
 	struct nand_timing target = this->timing;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index e88a45a62ab6a..9df0ad64e7e06 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -234,7 +234,7 @@ struct gpmi_nfc_hardware_timing {
 };
 
 /**
- * struct timing_threshod - Timing threshold
+ * struct timing_threshold - Timing threshold
  * @max_data_setup_cycles:       The maximum number of data setup cycles that
  *                               can be expressed in the hardware.
  * @internal_data_setup_in_ns:   The time, in ns, that the NFC hardware requires
@@ -256,7 +256,7 @@ struct gpmi_nfc_hardware_timing {
  *                               progress, this is the clock frequency during
  *                               the most recent I/O transaction.
  */
-struct timing_threshod {
+struct timing_threshold {
 	const unsigned int      max_chip_count;
 	const unsigned int      max_data_setup_cycles;
 	const unsigned int      internal_data_setup_in_ns;
-- 
GitLab


From f82c3232d1906bc8754e0b33ce39d09d08cb60c3 Mon Sep 17 00:00:00 2001
From: Matthias Lange <matthias.lange@kernkonzept.com>
Date: Mon, 5 Jun 2017 11:34:38 +0200
Subject: [PATCH 0216/1958] mtd: nand: gpmi: fix typo in comment

Signed-off-by: Matthias Lange <matthias.lange@kernkonzept.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/gpmi-nand/gpmi-lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 57ff0518bbdaa..97787246af41d 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -932,7 +932,7 @@ static int enable_edo_mode(struct gpmi_nand_data *this, int mode)
 
 	nand->select_chip(mtd, 0);
 
-	/* [1] send SET FEATURE commond to NAND */
+	/* [1] send SET FEATURE command to NAND */
 	feature[0] = mode;
 	ret = nand->onfi_set_features(mtd, nand,
 				ONFI_FEATURE_ADDR_TIMING_MODE, feature);
-- 
GitLab


From 2b8c92b4e7265e271cd070a1a5dd1ab2766e516b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 6 Jun 2017 08:21:40 +0900
Subject: [PATCH 0217/1958] mtd: nand: denali_dt: clean up resource ioremap

No need to use two struct resource pointers.  Just reuse one.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali_dt.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index df9ef36cc2ce3..b48430fe3cd44 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -49,7 +49,7 @@ MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
 
 static int denali_dt_probe(struct platform_device *pdev)
 {
-	struct resource *denali_reg, *nand_data;
+	struct resource *res;
 	struct denali_dt *dt;
 	const struct denali_dt_data *data;
 	struct denali_nand_info *denali;
@@ -74,15 +74,13 @@ static int denali_dt_probe(struct platform_device *pdev)
 		return denali->irq;
 	}
 
-	denali_reg = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						  "denali_reg");
-	denali->flash_reg = devm_ioremap_resource(&pdev->dev, denali_reg);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "denali_reg");
+	denali->flash_reg = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(denali->flash_reg))
 		return PTR_ERR(denali->flash_reg);
 
-	nand_data = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						 "nand_data");
-	denali->flash_mem = devm_ioremap_resource(&pdev->dev, nand_data);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
+	denali->flash_mem = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(denali->flash_mem))
 		return PTR_ERR(denali->flash_mem);
 
-- 
GitLab


From df8b97024e6d28e98066696619e2084bbda4e40e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 6 Jun 2017 08:21:41 +0900
Subject: [PATCH 0218/1958] mtd: nand: denali: use BIT() and GENMASK() for
 register macros

Use BIT() and GENMASK() for register field macros.  This will make
it easier to compare the macros with the register description in the
Denali User's Guide.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.h | 244 +++++++++++++++++++-------------------
 1 file changed, 119 insertions(+), 125 deletions(-)

diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index ec004850652a7..37833535a7a38 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -24,273 +24,267 @@
 #include <linux/mtd/nand.h>
 
 #define DEVICE_RESET				0x0
-#define     DEVICE_RESET__BANK0				0x0001
-#define     DEVICE_RESET__BANK1				0x0002
-#define     DEVICE_RESET__BANK2				0x0004
-#define     DEVICE_RESET__BANK3				0x0008
+#define     DEVICE_RESET__BANK(bank)			BIT(bank)
 
 #define TRANSFER_SPARE_REG			0x10
-#define     TRANSFER_SPARE_REG__FLAG			0x0001
+#define     TRANSFER_SPARE_REG__FLAG			BIT(0)
 
 #define LOAD_WAIT_CNT				0x20
-#define     LOAD_WAIT_CNT__VALUE			0xffff
+#define     LOAD_WAIT_CNT__VALUE			GENMASK(15, 0)
 
 #define PROGRAM_WAIT_CNT			0x30
-#define     PROGRAM_WAIT_CNT__VALUE			0xffff
+#define     PROGRAM_WAIT_CNT__VALUE			GENMASK(15, 0)
 
 #define ERASE_WAIT_CNT				0x40
-#define     ERASE_WAIT_CNT__VALUE			0xffff
+#define     ERASE_WAIT_CNT__VALUE			GENMASK(15, 0)
 
 #define INT_MON_CYCCNT				0x50
-#define     INT_MON_CYCCNT__VALUE			0xffff
+#define     INT_MON_CYCCNT__VALUE			GENMASK(15, 0)
 
 #define RB_PIN_ENABLED				0x60
-#define     RB_PIN_ENABLED__BANK0			0x0001
-#define     RB_PIN_ENABLED__BANK1			0x0002
-#define     RB_PIN_ENABLED__BANK2			0x0004
-#define     RB_PIN_ENABLED__BANK3			0x0008
+#define     RB_PIN_ENABLED__BANK(bank)			BIT(bank)
 
 #define MULTIPLANE_OPERATION			0x70
-#define     MULTIPLANE_OPERATION__FLAG			0x0001
+#define     MULTIPLANE_OPERATION__FLAG			BIT(0)
 
 #define MULTIPLANE_READ_ENABLE			0x80
-#define     MULTIPLANE_READ_ENABLE__FLAG		0x0001
+#define     MULTIPLANE_READ_ENABLE__FLAG		BIT(0)
 
 #define COPYBACK_DISABLE			0x90
-#define     COPYBACK_DISABLE__FLAG			0x0001
+#define     COPYBACK_DISABLE__FLAG			BIT(0)
 
 #define CACHE_WRITE_ENABLE			0xa0
-#define     CACHE_WRITE_ENABLE__FLAG			0x0001
+#define     CACHE_WRITE_ENABLE__FLAG			BIT(0)
 
 #define CACHE_READ_ENABLE			0xb0
-#define     CACHE_READ_ENABLE__FLAG			0x0001
+#define     CACHE_READ_ENABLE__FLAG			BIT(0)
 
 #define PREFETCH_MODE				0xc0
-#define     PREFETCH_MODE__PREFETCH_EN			0x0001
-#define     PREFETCH_MODE__PREFETCH_BURST_LENGTH	0xfff0
+#define     PREFETCH_MODE__PREFETCH_EN			BIT(0)
+#define     PREFETCH_MODE__PREFETCH_BURST_LENGTH	GENMASK(15, 4)
 
 #define CHIP_ENABLE_DONT_CARE			0xd0
-#define     CHIP_EN_DONT_CARE__FLAG			0x01
+#define     CHIP_EN_DONT_CARE__FLAG			BIT(0)
 
 #define ECC_ENABLE				0xe0
-#define     ECC_ENABLE__FLAG				0x0001
+#define     ECC_ENABLE__FLAG				BIT(0)
 
 #define GLOBAL_INT_ENABLE			0xf0
-#define     GLOBAL_INT_EN_FLAG				0x01
+#define     GLOBAL_INT_EN_FLAG				BIT(0)
 
 #define WE_2_RE					0x100
-#define     WE_2_RE__VALUE				0x003f
+#define     WE_2_RE__VALUE				GENMASK(5, 0)
 
 #define ADDR_2_DATA				0x110
-#define     ADDR_2_DATA__VALUE				0x003f
+#define     ADDR_2_DATA__VALUE				GENMASK(5, 0)
 
 #define RE_2_WE					0x120
-#define     RE_2_WE__VALUE				0x003f
+#define     RE_2_WE__VALUE				GENMASK(5, 0)
 
 #define ACC_CLKS				0x130
-#define     ACC_CLKS__VALUE				0x000f
+#define     ACC_CLKS__VALUE				GENMASK(3, 0)
 
 #define NUMBER_OF_PLANES			0x140
-#define     NUMBER_OF_PLANES__VALUE			0x0007
+#define     NUMBER_OF_PLANES__VALUE			GENMASK(2, 0)
 
 #define PAGES_PER_BLOCK				0x150
-#define     PAGES_PER_BLOCK__VALUE			0xffff
+#define     PAGES_PER_BLOCK__VALUE			GENMASK(15, 0)
 
 #define DEVICE_WIDTH				0x160
-#define     DEVICE_WIDTH__VALUE				0x0003
+#define     DEVICE_WIDTH__VALUE				GENMASK(1, 0)
 
 #define DEVICE_MAIN_AREA_SIZE			0x170
-#define     DEVICE_MAIN_AREA_SIZE__VALUE		0xffff
+#define     DEVICE_MAIN_AREA_SIZE__VALUE		GENMASK(15, 0)
 
 #define DEVICE_SPARE_AREA_SIZE			0x180
-#define     DEVICE_SPARE_AREA_SIZE__VALUE		0xffff
+#define     DEVICE_SPARE_AREA_SIZE__VALUE		GENMASK(15, 0)
 
 #define TWO_ROW_ADDR_CYCLES			0x190
-#define     TWO_ROW_ADDR_CYCLES__FLAG			0x0001
+#define     TWO_ROW_ADDR_CYCLES__FLAG			BIT(0)
 
 #define MULTIPLANE_ADDR_RESTRICT		0x1a0
-#define     MULTIPLANE_ADDR_RESTRICT__FLAG		0x0001
+#define     MULTIPLANE_ADDR_RESTRICT__FLAG		BIT(0)
 
 #define ECC_CORRECTION				0x1b0
-#define     ECC_CORRECTION__VALUE			0x001f
+#define     ECC_CORRECTION__VALUE			GENMASK(4, 0)
 
 #define READ_MODE				0x1c0
-#define     READ_MODE__VALUE				0x000f
+#define     READ_MODE__VALUE				GENMASK(3, 0)
 
 #define WRITE_MODE				0x1d0
-#define     WRITE_MODE__VALUE				0x000f
+#define     WRITE_MODE__VALUE				GENMASK(3, 0)
 
 #define COPYBACK_MODE				0x1e0
-#define     COPYBACK_MODE__VALUE			0x000f
+#define     COPYBACK_MODE__VALUE			GENMASK(3, 0)
 
 #define RDWR_EN_LO_CNT				0x1f0
-#define     RDWR_EN_LO_CNT__VALUE			0x001f
+#define     RDWR_EN_LO_CNT__VALUE			GENMASK(4, 0)
 
 #define RDWR_EN_HI_CNT				0x200
-#define     RDWR_EN_HI_CNT__VALUE			0x001f
+#define     RDWR_EN_HI_CNT__VALUE			GENMASK(4, 0)
 
 #define MAX_RD_DELAY				0x210
-#define     MAX_RD_DELAY__VALUE				0x000f
+#define     MAX_RD_DELAY__VALUE				GENMASK(3, 0)
 
 #define CS_SETUP_CNT				0x220
-#define     CS_SETUP_CNT__VALUE				0x001f
+#define     CS_SETUP_CNT__VALUE				GENMASK(4, 0)
 
 #define SPARE_AREA_SKIP_BYTES			0x230
-#define     SPARE_AREA_SKIP_BYTES__VALUE		0x003f
+#define     SPARE_AREA_SKIP_BYTES__VALUE		GENMASK(5, 0)
 
 #define SPARE_AREA_MARKER			0x240
-#define     SPARE_AREA_MARKER__VALUE			0xffff
+#define     SPARE_AREA_MARKER__VALUE			GENMASK(15, 0)
 
 #define DEVICES_CONNECTED			0x250
-#define     DEVICES_CONNECTED__VALUE			0x0007
+#define     DEVICES_CONNECTED__VALUE			GENMASK(2, 0)
 
 #define DIE_MASK				0x260
-#define     DIE_MASK__VALUE				0x00ff
+#define     DIE_MASK__VALUE				GENMASK(7, 0)
 
 #define FIRST_BLOCK_OF_NEXT_PLANE		0x270
-#define     FIRST_BLOCK_OF_NEXT_PLANE__VALUE		0xffff
+#define     FIRST_BLOCK_OF_NEXT_PLANE__VALUE		GENMASK(15, 0)
 
 #define WRITE_PROTECT				0x280
-#define     WRITE_PROTECT__FLAG				0x0001
+#define     WRITE_PROTECT__FLAG				BIT(0)
 
 #define RE_2_RE					0x290
-#define     RE_2_RE__VALUE				0x003f
+#define     RE_2_RE__VALUE				GENMASK(5, 0)
 
 #define MANUFACTURER_ID				0x300
-#define     MANUFACTURER_ID__VALUE			0x00ff
+#define     MANUFACTURER_ID__VALUE			GENMASK(7, 0)
 
 #define DEVICE_ID				0x310
-#define     DEVICE_ID__VALUE				0x00ff
+#define     DEVICE_ID__VALUE				GENMASK(7, 0)
 
 #define DEVICE_PARAM_0				0x320
-#define     DEVICE_PARAM_0__VALUE			0x00ff
+#define     DEVICE_PARAM_0__VALUE			GENMASK(7, 0)
 
 #define DEVICE_PARAM_1				0x330
-#define     DEVICE_PARAM_1__VALUE			0x00ff
+#define     DEVICE_PARAM_1__VALUE			GENMASK(7, 0)
 
 #define DEVICE_PARAM_2				0x340
-#define     DEVICE_PARAM_2__VALUE			0x00ff
+#define     DEVICE_PARAM_2__VALUE			GENMASK(7, 0)
 
 #define LOGICAL_PAGE_DATA_SIZE			0x350
-#define     LOGICAL_PAGE_DATA_SIZE__VALUE		0xffff
+#define     LOGICAL_PAGE_DATA_SIZE__VALUE		GENMASK(15, 0)
 
 #define LOGICAL_PAGE_SPARE_SIZE			0x360
-#define     LOGICAL_PAGE_SPARE_SIZE__VALUE		0xffff
+#define     LOGICAL_PAGE_SPARE_SIZE__VALUE		GENMASK(15, 0)
 
 #define REVISION				0x370
-#define     REVISION__VALUE				0xffff
+#define     REVISION__VALUE				GENMASK(15, 0)
 
 #define ONFI_DEVICE_FEATURES			0x380
-#define     ONFI_DEVICE_FEATURES__VALUE			0x003f
+#define     ONFI_DEVICE_FEATURES__VALUE			GENMASK(5, 0)
 
 #define ONFI_OPTIONAL_COMMANDS			0x390
-#define     ONFI_OPTIONAL_COMMANDS__VALUE		0x003f
+#define     ONFI_OPTIONAL_COMMANDS__VALUE		GENMASK(5, 0)
 
 #define ONFI_TIMING_MODE			0x3a0
-#define     ONFI_TIMING_MODE__VALUE			0x003f
+#define     ONFI_TIMING_MODE__VALUE			GENMASK(5, 0)
 
 #define ONFI_PGM_CACHE_TIMING_MODE		0x3b0
-#define     ONFI_PGM_CACHE_TIMING_MODE__VALUE		0x003f
+#define     ONFI_PGM_CACHE_TIMING_MODE__VALUE		GENMASK(5, 0)
 
 #define ONFI_DEVICE_NO_OF_LUNS			0x3c0
-#define     ONFI_DEVICE_NO_OF_LUNS__NO_OF_LUNS		0x00ff
-#define     ONFI_DEVICE_NO_OF_LUNS__ONFI_DEVICE		0x0100
+#define     ONFI_DEVICE_NO_OF_LUNS__NO_OF_LUNS		GENMASK(7, 0)
+#define     ONFI_DEVICE_NO_OF_LUNS__ONFI_DEVICE		BIT(8)
 
 #define ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_L	0x3d0
-#define     ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_L__VALUE	0xffff
+#define     ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_L__VALUE	GENMASK(15, 0)
 
 #define ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_U	0x3e0
-#define     ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_U__VALUE	0xffff
-
-#define FEATURES					0x3f0
-#define     FEATURES__N_BANKS				0x0003
-#define     FEATURES__ECC_MAX_ERR			0x003c
-#define     FEATURES__DMA				0x0040
-#define     FEATURES__CMD_DMA				0x0080
-#define     FEATURES__PARTITION				0x0100
-#define     FEATURES__XDMA_SIDEBAND			0x0200
-#define     FEATURES__GPREG				0x0400
-#define     FEATURES__INDEX_ADDR			0x0800
+#define     ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_U__VALUE	GENMASK(15, 0)
+
+#define FEATURES				0x3f0
+#define     FEATURES__N_BANKS				GENMASK(1, 0)
+#define     FEATURES__ECC_MAX_ERR			GENMASK(5, 2)
+#define     FEATURES__DMA				BIT(6)
+#define     FEATURES__CMD_DMA				BIT(7)
+#define     FEATURES__PARTITION				BIT(8)
+#define     FEATURES__XDMA_SIDEBAND			BIT(9)
+#define     FEATURES__GPREG				BIT(10)
+#define     FEATURES__INDEX_ADDR			BIT(11)
 
 #define TRANSFER_MODE				0x400
-#define     TRANSFER_MODE__VALUE			0x0003
+#define     TRANSFER_MODE__VALUE			GENMASK(1, 0)
 
-#define INTR_STATUS(__bank)	(0x410 + ((__bank) * 0x50))
-#define INTR_EN(__bank)		(0x420 + ((__bank) * 0x50))
+#define INTR_STATUS(bank)			(0x410 + (bank) * 0x50)
+#define INTR_EN(bank)				(0x420 + (bank) * 0x50)
 /* bit[1:0] is used differently depending on IP version */
-#define     INTR__ECC_UNCOR_ERR				0x0001	/* new IP */
-#define     INTR__ECC_TRANSACTION_DONE			0x0001	/* old IP */
-#define     INTR__ECC_ERR				0x0002	/* old IP */
-#define     INTR__DMA_CMD_COMP				0x0004
-#define     INTR__TIME_OUT				0x0008
-#define     INTR__PROGRAM_FAIL				0x0010
-#define     INTR__ERASE_FAIL				0x0020
-#define     INTR__LOAD_COMP				0x0040
-#define     INTR__PROGRAM_COMP				0x0080
-#define     INTR__ERASE_COMP				0x0100
-#define     INTR__PIPE_CPYBCK_CMD_COMP			0x0200
-#define     INTR__LOCKED_BLK				0x0400
-#define     INTR__UNSUP_CMD				0x0800
-#define     INTR__INT_ACT				0x1000
-#define     INTR__RST_COMP				0x2000
-#define     INTR__PIPE_CMD_ERR				0x4000
-#define     INTR__PAGE_XFER_INC				0x8000
-
-#define PAGE_CNT(__bank)	(0x430 + ((__bank) * 0x50))
-#define ERR_PAGE_ADDR(__bank)	(0x440 + ((__bank) * 0x50))
-#define ERR_BLOCK_ADDR(__bank)	(0x450 + ((__bank) * 0x50))
+#define     INTR__ECC_UNCOR_ERR				BIT(0)	/* new IP */
+#define     INTR__ECC_TRANSACTION_DONE			BIT(0)	/* old IP */
+#define     INTR__ECC_ERR				BIT(1)	/* old IP */
+#define     INTR__DMA_CMD_COMP				BIT(2)
+#define     INTR__TIME_OUT				BIT(3)
+#define     INTR__PROGRAM_FAIL				BIT(4)
+#define     INTR__ERASE_FAIL				BIT(5)
+#define     INTR__LOAD_COMP				BIT(6)
+#define     INTR__PROGRAM_COMP				BIT(7)
+#define     INTR__ERASE_COMP				BIT(8)
+#define     INTR__PIPE_CPYBCK_CMD_COMP			BIT(9)
+#define     INTR__LOCKED_BLK				BIT(10)
+#define     INTR__UNSUP_CMD				BIT(11)
+#define     INTR__INT_ACT				BIT(12)
+#define     INTR__RST_COMP				BIT(13)
+#define     INTR__PIPE_CMD_ERR				BIT(14)
+#define     INTR__PAGE_XFER_INC				BIT(15)
+
+#define PAGE_CNT(bank)				(0x430 + (bank) * 0x50)
+#define ERR_PAGE_ADDR(bank)			(0x440 + (bank) * 0x50)
+#define ERR_BLOCK_ADDR(bank)			(0x450 + (bank) * 0x50)
 
 #define ECC_THRESHOLD				0x600
-#define     ECC_THRESHOLD__VALUE			0x03ff
+#define     ECC_THRESHOLD__VALUE			GENMASK(9, 0)
 
 #define ECC_ERROR_BLOCK_ADDRESS			0x610
-#define     ECC_ERROR_BLOCK_ADDRESS__VALUE		0xffff
+#define     ECC_ERROR_BLOCK_ADDRESS__VALUE		GENMASK(15, 0)
 
 #define ECC_ERROR_PAGE_ADDRESS			0x620
-#define     ECC_ERROR_PAGE_ADDRESS__VALUE		0x0fff
-#define     ECC_ERROR_PAGE_ADDRESS__BANK		0xf000
+#define     ECC_ERROR_PAGE_ADDRESS__VALUE		GENMASK(11, 0)
+#define     ECC_ERROR_PAGE_ADDRESS__BANK		GENMASK(15, 12)
 
 #define ECC_ERROR_ADDRESS			0x630
-#define     ECC_ERROR_ADDRESS__OFFSET			0x0fff
-#define     ECC_ERROR_ADDRESS__SECTOR_NR		0xf000
+#define     ECC_ERROR_ADDRESS__OFFSET			GENMASK(11, 0)
+#define     ECC_ERROR_ADDRESS__SECTOR_NR		GENMASK(15, 12)
 
 #define ERR_CORRECTION_INFO			0x640
-#define     ERR_CORRECTION_INFO__BYTEMASK		0x00ff
-#define     ERR_CORRECTION_INFO__DEVICE_NR		0x0f00
-#define     ERR_CORRECTION_INFO__ERROR_TYPE		0x4000
-#define     ERR_CORRECTION_INFO__LAST_ERR_INFO		0x8000
+#define     ERR_CORRECTION_INFO__BYTEMASK		GENMASK(7, 0)
+#define     ERR_CORRECTION_INFO__DEVICE_NR		GENMASK(11, 8)
+#define     ERR_CORRECTION_INFO__ERROR_TYPE		BIT(14)
+#define     ERR_CORRECTION_INFO__LAST_ERR_INFO		BIT(15)
 
 #define ECC_COR_INFO(bank)			(0x650 + (bank) / 2 * 0x10)
 #define     ECC_COR_INFO__SHIFT(bank)			((bank) % 2 * 8)
-#define     ECC_COR_INFO__MAX_ERRORS			0x007f
-#define     ECC_COR_INFO__UNCOR_ERR			0x0080
+#define     ECC_COR_INFO__MAX_ERRORS			GENMASK(6, 0)
+#define     ECC_COR_INFO__UNCOR_ERR			BIT(7)
 
 #define DMA_ENABLE				0x700
-#define     DMA_ENABLE__FLAG				0x0001
+#define     DMA_ENABLE__FLAG				BIT(0)
 
 #define IGNORE_ECC_DONE				0x710
-#define     IGNORE_ECC_DONE__FLAG			0x0001
+#define     IGNORE_ECC_DONE__FLAG			BIT(0)
 
 #define DMA_INTR				0x720
 #define DMA_INTR_EN				0x730
-#define     DMA_INTR__TARGET_ERROR			0x0001
-#define     DMA_INTR__DESC_COMP_CHANNEL0		0x0002
-#define     DMA_INTR__DESC_COMP_CHANNEL1		0x0004
-#define     DMA_INTR__DESC_COMP_CHANNEL2		0x0008
-#define     DMA_INTR__DESC_COMP_CHANNEL3		0x0010
-#define     DMA_INTR__MEMCOPY_DESC_COMP			0x0020
+#define     DMA_INTR__TARGET_ERROR			BIT(0)
+#define     DMA_INTR__DESC_COMP_CHANNEL0		BIT(1)
+#define     DMA_INTR__DESC_COMP_CHANNEL1		BIT(2)
+#define     DMA_INTR__DESC_COMP_CHANNEL2		BIT(3)
+#define     DMA_INTR__DESC_COMP_CHANNEL3		BIT(4)
+#define     DMA_INTR__MEMCOPY_DESC_COMP			BIT(5)
 
 #define TARGET_ERR_ADDR_LO			0x740
-#define     TARGET_ERR_ADDR_LO__VALUE			0xffff
+#define     TARGET_ERR_ADDR_LO__VALUE			GENMASK(15, 0)
 
 #define TARGET_ERR_ADDR_HI			0x750
-#define     TARGET_ERR_ADDR_HI__VALUE			0xffff
+#define     TARGET_ERR_ADDR_HI__VALUE			GENMASK(15, 0)
 
 #define CHNL_ACTIVE				0x760
-#define     CHNL_ACTIVE__CHANNEL0			0x0001
-#define     CHNL_ACTIVE__CHANNEL1			0x0002
-#define     CHNL_ACTIVE__CHANNEL2			0x0004
-#define     CHNL_ACTIVE__CHANNEL3			0x0008
+#define     CHNL_ACTIVE__CHANNEL0			BIT(0)
+#define     CHNL_ACTIVE__CHANNEL1			BIT(1)
+#define     CHNL_ACTIVE__CHANNEL2			BIT(2)
+#define     CHNL_ACTIVE__CHANNEL3			BIT(3)
 
 #define FAIL 1                  /*failed flag*/
 #define PASS 0                  /*success flag*/
-- 
GitLab


From 2c8f8afa7f92acb07641bf95b940d384ed1d0294 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 7 Jun 2017 20:52:10 +0900
Subject: [PATCH 0219/1958] mtd: nand: add generic helpers to check, match,
 maximize ECC settings

Driver are responsible for setting up ECC parameters correctly.
Those include:
  - Check if ECC parameters specified (usually by DT) are valid
  - Meet the chip's ECC requirement
  - Maximize ECC strength if NAND_ECC_MAXIMIZE flag is set

The logic can be generalized by factoring out common code.

This commit adds 3 helpers to the NAND framework:
nand_check_ecc_caps - Check if preset step_size and strength are valid
nand_match_ecc_req - Match the chip's requirement
nand_maximize_ecc - Maximize the ECC strength

To use the helpers above, a driver needs to provide:
  - Data array of supported ECC step size and strength
  - A hook that calculates ECC bytes from the combination of
    step_size and strength.

By using those helpers, code duplication among drivers will be
reduced.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 220 +++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h     |  33 ++++++
 2 files changed, 253 insertions(+)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 2404bb046b69a..85edac9b2bb54 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4523,6 +4523,226 @@ static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
 	}
 }
 
+/**
+ * nand_check_ecc_caps - check the sanity of preset ECC settings
+ * @chip: nand chip info structure
+ * @caps: ECC caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * When ECC step size and strength are already set, check if they are supported
+ * by the controller and the calculated ECC bytes fit within the chip's OOB.
+ * On success, the calculated ECC bytes is set.
+ */
+int nand_check_ecc_caps(struct nand_chip *chip,
+			const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int preset_step = chip->ecc.size;
+	int preset_strength = chip->ecc.strength;
+	int nsteps, ecc_bytes;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	if (!preset_step || !preset_strength)
+		return -ENODATA;
+
+	nsteps = mtd->writesize / preset_step;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+
+		if (stepinfo->stepsize != preset_step)
+			continue;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			if (stepinfo->strengths[j] != preset_strength)
+				continue;
+
+			ecc_bytes = caps->calc_ecc_bytes(preset_step,
+							 preset_strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				return ecc_bytes;
+
+			if (ecc_bytes * nsteps > oobavail) {
+				pr_err("ECC (step, strength) = (%d, %d) does not fit in OOB",
+				       preset_step, preset_strength);
+				return -ENOSPC;
+			}
+
+			chip->ecc.bytes = ecc_bytes;
+
+			return 0;
+		}
+	}
+
+	pr_err("ECC (step, strength) = (%d, %d) not supported on this controller",
+	       preset_step, preset_strength);
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(nand_check_ecc_caps);
+
+/**
+ * nand_match_ecc_req - meet the chip's requirement with least ECC bytes
+ * @chip: nand chip info structure
+ * @caps: ECC engine caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * If a chip's ECC requirement is provided, try to meet it with the least
+ * number of ECC bytes (i.e. with the largest number of OOB-free bytes).
+ * On success, the chosen ECC settings are set.
+ */
+int nand_match_ecc_req(struct nand_chip *chip,
+		       const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int req_step = chip->ecc_step_ds;
+	int req_strength = chip->ecc_strength_ds;
+	int req_corr, step_size, strength, nsteps, ecc_bytes, ecc_bytes_total;
+	int best_step, best_strength, best_ecc_bytes;
+	int best_ecc_bytes_total = INT_MAX;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	/* No information provided by the NAND chip */
+	if (!req_step || !req_strength)
+		return -ENOTSUPP;
+
+	/* number of correctable bits the chip requires in a page */
+	req_corr = mtd->writesize / req_step * req_strength;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+		step_size = stepinfo->stepsize;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			strength = stepinfo->strengths[j];
+
+			/*
+			 * If both step size and strength are smaller than the
+			 * chip's requirement, it is not easy to compare the
+			 * resulted reliability.
+			 */
+			if (step_size < req_step && strength < req_strength)
+				continue;
+
+			if (mtd->writesize % step_size)
+				continue;
+
+			nsteps = mtd->writesize / step_size;
+
+			ecc_bytes = caps->calc_ecc_bytes(step_size, strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				continue;
+			ecc_bytes_total = ecc_bytes * nsteps;
+
+			if (ecc_bytes_total > oobavail ||
+			    strength * nsteps < req_corr)
+				continue;
+
+			/*
+			 * We assume the best is to meet the chip's requrement
+			 * with the least number of ECC bytes.
+			 */
+			if (ecc_bytes_total < best_ecc_bytes_total) {
+				best_ecc_bytes_total = ecc_bytes_total;
+				best_step = step_size;
+				best_strength = strength;
+				best_ecc_bytes = ecc_bytes;
+			}
+		}
+	}
+
+	if (best_ecc_bytes_total == INT_MAX)
+		return -ENOTSUPP;
+
+	chip->ecc.size = best_step;
+	chip->ecc.strength = best_strength;
+	chip->ecc.bytes = best_ecc_bytes;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nand_match_ecc_req);
+
+/**
+ * nand_maximize_ecc - choose the max ECC strength available
+ * @chip: nand chip info structure
+ * @caps: ECC engine caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * Choose the max ECC strength that is supported on the controller, and can fit
+ * within the chip's OOB.  On success, the chosen ECC settings are set.
+ */
+int nand_maximize_ecc(struct nand_chip *chip,
+		      const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int step_size, strength, nsteps, ecc_bytes, corr;
+	int best_corr = 0;
+	int best_step = 0;
+	int best_strength, best_ecc_bytes;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+		step_size = stepinfo->stepsize;
+
+		/* If chip->ecc.size is already set, respect it */
+		if (chip->ecc.size && step_size != chip->ecc.size)
+			continue;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			strength = stepinfo->strengths[j];
+
+			if (mtd->writesize % step_size)
+				continue;
+
+			nsteps = mtd->writesize / step_size;
+
+			ecc_bytes = caps->calc_ecc_bytes(step_size, strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				continue;
+
+			if (ecc_bytes * nsteps > oobavail)
+				continue;
+
+			corr = strength * nsteps;
+
+			/*
+			 * If the number of correctable bits is the same,
+			 * bigger step_size has more reliability.
+			 */
+			if (corr > best_corr ||
+			    (corr == best_corr && step_size > best_step)) {
+				best_corr = corr;
+				best_step = step_size;
+				best_strength = strength;
+				best_ecc_bytes = ecc_bytes;
+			}
+		}
+	}
+
+	if (!best_corr)
+		return -ENOTSUPP;
+
+	chip->ecc.size = best_step;
+	chip->ecc.strength = best_strength;
+	chip->ecc.bytes = best_ecc_bytes;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nand_maximize_ecc);
+
 /*
  * Check if the chip configuration meet the datasheet requirements.
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 8b3607bde1b5d..568f53e812cd1 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -481,6 +481,30 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc)
 	init_waitqueue_head(&nfc->wq);
 }
 
+/**
+ * struct nand_ecc_step_info - ECC step information of ECC engine
+ * @stepsize: data bytes per ECC step
+ * @strengths: array of supported strengths
+ * @nstrengths: number of supported strengths
+ */
+struct nand_ecc_step_info {
+	int stepsize;
+	const int *strengths;
+	int nstrengths;
+};
+
+/**
+ * struct nand_ecc_caps - capability of ECC engine
+ * @stepinfos: array of ECC step information
+ * @nstepinfos: number of ECC step information
+ * @calc_ecc_bytes: driver's hook to calculate ECC bytes per step
+ */
+struct nand_ecc_caps {
+	const struct nand_ecc_step_info *stepinfos;
+	int nstepinfos;
+	int (*calc_ecc_bytes)(int step_size, int strength);
+};
+
 /**
  * struct nand_ecc_ctrl - Control structure for ECC
  * @mode:	ECC mode
@@ -1246,6 +1270,15 @@ int nand_check_erased_ecc_chunk(void *data, int datalen,
 				void *extraoob, int extraooblen,
 				int threshold);
 
+int nand_check_ecc_caps(struct nand_chip *chip,
+			const struct nand_ecc_caps *caps, int oobavail);
+
+int nand_match_ecc_req(struct nand_chip *chip,
+		       const struct nand_ecc_caps *caps,  int oobavail);
+
+int nand_maximize_ecc(struct nand_chip *chip,
+		      const struct nand_ecc_caps *caps, int oobavail);
+
 /* Default write_oob implementation */
 int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 
-- 
GitLab


From a03c60178c181767ecfb26fb311a88742d228118 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 7 Jun 2017 20:52:11 +0900
Subject: [PATCH 0220/1958] mtd: nand: add a shorthand to generate
 nand_ecc_caps structure

struct nand_ecc_caps was designed as flexible as possible to support
multiple stepsizes (like sunxi_nand.c).

So, we need to write multiple arrays even for the simplest case.
I guess many controllers support a single stepsize, so here is a
shorthand macro for the case.

It allows to describe like ...

NAND_ECC_CAPS_SINGLE(denali_pci_ecc_caps, denali_calc_ecc_bytes, 512, 8, 15);

... instead of

static const int denali_pci_ecc_strengths[] = {8, 15};
static const struct nand_ecc_step_info denali_pci_ecc_stepinfo = {
        .stepsize = 512,
        .strengths = denali_pci_ecc_strengths,
        .nstrengths = ARRAY_SIZE(denali_pci_ecc_strengths),
};
static const struct nand_ecc_caps denali_pci_ecc_caps = {
        .stepinfos = &denali_pci_ecc_stepinfo,
        .nstepinfos = 1,
        .calc_ecc_bytes = denali_calc_ecc_bytes,
};

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 include/linux/mtd/nand.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 568f53e812cd1..dc8fbc0334423 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -505,6 +505,20 @@ struct nand_ecc_caps {
 	int (*calc_ecc_bytes)(int step_size, int strength);
 };
 
+/* a shorthand to generate struct nand_ecc_caps with only one ECC stepsize */
+#define NAND_ECC_CAPS_SINGLE(__name, __calc, __step, ...)	\
+static const int __name##_strengths[] = { __VA_ARGS__ };	\
+static const struct nand_ecc_step_info __name##_stepinfo = {	\
+	.stepsize = __step,					\
+	.strengths = __name##_strengths,			\
+	.nstrengths = ARRAY_SIZE(__name##_strengths),		\
+};								\
+static const struct nand_ecc_caps __name = {			\
+	.stepinfos = &__name##_stepinfo,			\
+	.nstepinfos = 1,					\
+	.calc_ecc_bytes = __calc,				\
+}
+
 /**
  * struct nand_ecc_ctrl - Control structure for ECC
  * @mode:	ECC mode
-- 
GitLab


From 7de117fd5bfe0d84e50714ef5dcf5f3cec7f0eef Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 7 Jun 2017 20:52:12 +0900
Subject: [PATCH 0221/1958] mtd: nand: denali: avoid hard-coding ECC step,
 strength, bytes

This driver was originally written for the Intel MRST platform with
several platform-specific parameters hard-coded.

Currently, the ECC settings are hard-coded as follows:

  #define ECC_SECTOR_SIZE 512
  #define ECC_8BITS       14
  #define ECC_15BITS      26

Therefore, the driver can only support two cases.
 - ecc.size = 512, ecc.strength = 8    --> ecc.bytes = 14
 - ecc.size = 512, ecc.strength = 15   --> ecc.bytes = 26

However, these are actually customizable parameters, for example,
UniPhier platform supports the following:

 - ecc.size = 1024, ecc.strength = 8   --> ecc.bytes = 14
 - ecc.size = 1024, ecc.strength = 16  --> ecc.bytes = 28
 - ecc.size = 1024, ecc.strength = 24  --> ecc.bytes = 42

So, we need to handle the ECC parameters in a more generic manner.
Fortunately, the Denali User's Guide explains how to calculate the
ecc.bytes.  The formula is:

  ecc.bytes = 2 * CEIL(13 * ecc.strength / 16)  (for ecc.size = 512)
  ecc.bytes = 2 * CEIL(14 * ecc.strength / 16)  (for ecc.size = 1024)

For DT platforms, it would be reasonable to allow DT to specify ECC
strength by either "nand-ecc-strength" or "nand-ecc-maximize".  If
none of them is specified, the driver will try to meet the chip's ECC
requirement.

For PCI platforms, the max ECC strength is used to keep the original
behavior.

Newer versions of this IP need ecc.size and ecc.steps explicitly
set up via the following registers:
  CFG_DATA_BLOCK_SIZE       (0x6b0)
  CFG_LAST_DATA_BLOCK_SIZE  (0x6c0)
  CFG_NUM_DATA_BLOCKS       (0x6d0)

For older IP versions, write accesses to these registers are just
ignored.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 .../devicetree/bindings/mtd/denali-nand.txt   |  7 ++
 drivers/mtd/nand/denali.c                     | 87 +++++++++++--------
 drivers/mtd/nand/denali.h                     | 12 ++-
 drivers/mtd/nand/denali_dt.c                  |  5 ++
 drivers/mtd/nand/denali_pci.c                 |  4 +
 5 files changed, 78 insertions(+), 37 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index e593bbeb2115d..b7742a7363ea7 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -7,6 +7,13 @@ Required properties:
   - reg-names: Should contain the reg names "nand_data" and "denali_reg"
   - interrupts : The interrupt number.
 
+Optional properties:
+  - nand-ecc-step-size: see nand.txt for details.  If present, the value must be
+      512        for "altr,socfpga-denali-nand"
+  - nand-ecc-strength: see nand.txt for details.  Valid values are:
+      8, 15      for "altr,socfpga-denali-nand"
+  - nand-ecc-maximize: see nand.txt for details
+
 The device tree may optionally contain sub-nodes describing partitions of the
 address space. See partition.txt for more detail.
 
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index b3c99d98fdee9..2d3d9875dfaab 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -886,8 +886,6 @@ static int denali_hw_ecc_fixup(struct mtd_info *mtd,
 	return max_bitflips;
 }
 
-#define ECC_SECTOR_SIZE 512
-
 #define ECC_SECTOR(x)	(((x) & ECC_ERROR_ADDRESS__SECTOR_NR) >> 12)
 #define ECC_BYTE(x)	(((x) & ECC_ERROR_ADDRESS__OFFSET))
 #define ECC_CORRECTION_VALUE(x) ((x) & ERR_CORRECTION_INFO__BYTEMASK)
@@ -899,6 +897,7 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 			       struct denali_nand_info *denali,
 			       unsigned long *uncor_ecc_flags, uint8_t *buf)
 {
+	unsigned int ecc_size = denali->nand.ecc.size;
 	unsigned int bitflips = 0;
 	unsigned int max_bitflips = 0;
 	uint32_t err_addr, err_cor_info;
@@ -928,9 +927,9 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 			 * an erased sector.
 			 */
 			*uncor_ecc_flags |= BIT(err_sector);
-		} else if (err_byte < ECC_SECTOR_SIZE) {
+		} else if (err_byte < ecc_size) {
 			/*
-			 * If err_byte is larger than ECC_SECTOR_SIZE, means error
+			 * If err_byte is larger than ecc_size, means error
 			 * happened in OOB, so we ignore it. It's no need for
 			 * us to correct it err_device is represented the NAND
 			 * error bits are happened in if there are more than
@@ -939,7 +938,7 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 			int offset;
 			unsigned int flips_in_byte;
 
-			offset = (err_sector * ECC_SECTOR_SIZE + err_byte) *
+			offset = (err_sector * ecc_size + err_byte) *
 						denali->devnum + err_device;
 
 			/* correct the ECC error */
@@ -1345,13 +1344,39 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	denali_irq_init(denali);
 }
 
-/*
- * Althogh controller spec said SLC ECC is forceb to be 4bit,
- * but denali controller in MRST only support 15bit and 8bit ECC
- * correction
- */
-#define ECC_8BITS	14
-#define ECC_15BITS	26
+int denali_calc_ecc_bytes(int step_size, int strength)
+{
+	/* BCH code.  Denali requires ecc.bytes to be multiple of 2 */
+	return DIV_ROUND_UP(strength * fls(step_size * 8), 16) * 2;
+}
+EXPORT_SYMBOL(denali_calc_ecc_bytes);
+
+static int denali_ecc_setup(struct mtd_info *mtd, struct nand_chip *chip,
+			    struct denali_nand_info *denali)
+{
+	int oobavail = mtd->oobsize - denali->bbtskipbytes;
+	int ret;
+
+	/*
+	 * If .size and .strength are already set (usually by DT),
+	 * check if they are supported by this controller.
+	 */
+	if (chip->ecc.size && chip->ecc.strength)
+		return nand_check_ecc_caps(chip, denali->ecc_caps, oobavail);
+
+	/*
+	 * We want .size and .strength closest to the chip's requirement
+	 * unless NAND_ECC_MAXIMIZE is requested.
+	 */
+	if (!(chip->ecc.options & NAND_ECC_MAXIMIZE)) {
+		ret = nand_match_ecc_req(chip, denali->ecc_caps, oobavail);
+		if (!ret)
+			return 0;
+	}
+
+	/* Max ECC strength is the last thing we can do */
+	return nand_maximize_ecc(chip, denali->ecc_caps, oobavail);
+}
 
 static int denali_ooblayout_ecc(struct mtd_info *mtd, int section,
 				struct mtd_oob_region *oobregion)
@@ -1588,34 +1613,26 @@ int denali_init(struct denali_nand_info *denali)
 	/* no subpage writes on denali */
 	chip->options |= NAND_NO_SUBPAGE_WRITE;
 
-	/*
-	 * Denali Controller only support 15bit and 8bit ECC in MRST,
-	 * so just let controller do 15bit ECC for MLC and 8bit ECC for
-	 * SLC if possible.
-	 * */
-	if (!nand_is_slc(chip) &&
-			(mtd->oobsize > (denali->bbtskipbytes +
-			ECC_15BITS * (mtd->writesize /
-			ECC_SECTOR_SIZE)))) {
-		/* if MLC OOB size is large enough, use 15bit ECC*/
-		chip->ecc.strength = 15;
-		chip->ecc.bytes = ECC_15BITS;
-		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
-	} else if (mtd->oobsize < (denali->bbtskipbytes +
-			ECC_8BITS * (mtd->writesize /
-			ECC_SECTOR_SIZE))) {
-		pr_err("Your NAND chip OOB is not large enough to contain 8bit ECC correction codes");
+	ret = denali_ecc_setup(mtd, chip, denali);
+	if (ret) {
+		dev_err(denali->dev, "Failed to setup ECC settings.\n");
 		goto failed_req_irq;
-	} else {
-		chip->ecc.strength = 8;
-		chip->ecc.bytes = ECC_8BITS;
-		iowrite32(8, denali->flash_reg + ECC_CORRECTION);
 	}
 
+	dev_dbg(denali->dev,
+		"chosen ECC settings: step=%d, strength=%d, bytes=%d\n",
+		chip->ecc.size, chip->ecc.strength, chip->ecc.bytes);
+
+	iowrite32(chip->ecc.strength, denali->flash_reg + ECC_CORRECTION);
+
+	iowrite32(chip->ecc.size, denali->flash_reg + CFG_DATA_BLOCK_SIZE);
+	iowrite32(chip->ecc.size, denali->flash_reg + CFG_LAST_DATA_BLOCK_SIZE);
+	/* chip->ecc.steps is set by nand_scan_tail(); not available here */
+	iowrite32(mtd->writesize / chip->ecc.size,
+		  denali->flash_reg + CFG_NUM_DATA_BLOCKS);
+
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
 
-	/* override the default read operations */
-	chip->ecc.size = ECC_SECTOR_SIZE;
 	chip->ecc.read_page = denali_read_page;
 	chip->ecc.read_page_raw = denali_read_page_raw;
 	chip->ecc.write_page = denali_write_page;
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 37833535a7a38..a06ed741b550d 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -259,6 +259,14 @@
 #define     ECC_COR_INFO__MAX_ERRORS			GENMASK(6, 0)
 #define     ECC_COR_INFO__UNCOR_ERR			BIT(7)
 
+#define CFG_DATA_BLOCK_SIZE			0x6b0
+
+#define CFG_LAST_DATA_BLOCK_SIZE		0x6c0
+
+#define CFG_NUM_DATA_BLOCKS			0x6d0
+
+#define CFG_META_DATA_SIZE			0x6e0
+
 #define DMA_ENABLE				0x700
 #define     DMA_ENABLE__FLAG				BIT(0)
 
@@ -301,8 +309,6 @@
 #define MODE_10    0x08000000
 #define MODE_11    0x0C000000
 
-#define ECC_SECTOR_SIZE     512
-
 struct nand_buf {
 	int head;
 	int tail;
@@ -337,11 +343,13 @@ struct denali_nand_info {
 	int max_banks;
 	unsigned int revision;
 	unsigned int caps;
+	const struct nand_ecc_caps *ecc_caps;
 };
 
 #define DENALI_CAP_HW_ECC_FIXUP			BIT(0)
 #define DENALI_CAP_DMA_64BIT			BIT(1)
 
+int denali_calc_ecc_bytes(int step_size, int strength);
 extern int denali_init(struct denali_nand_info *denali);
 extern void denali_remove(struct denali_nand_info *denali);
 
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index b48430fe3cd44..bd1aa4cf4457e 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -32,10 +32,14 @@ struct denali_dt {
 struct denali_dt_data {
 	unsigned int revision;
 	unsigned int caps;
+	const struct nand_ecc_caps *ecc_caps;
 };
 
+NAND_ECC_CAPS_SINGLE(denali_socfpga_ecc_caps, denali_calc_ecc_bytes,
+		     512, 8, 15);
 static const struct denali_dt_data denali_socfpga_data = {
 	.caps = DENALI_CAP_HW_ECC_FIXUP,
+	.ecc_caps = &denali_socfpga_ecc_caps,
 };
 
 static const struct of_device_id denali_nand_dt_ids[] = {
@@ -64,6 +68,7 @@ static int denali_dt_probe(struct platform_device *pdev)
 	if (data) {
 		denali->revision = data->revision;
 		denali->caps = data->caps;
+		denali->ecc_caps = data->ecc_caps;
 	}
 
 	denali->platform = DT;
diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c
index ac843238b77e7..37dc0934c24c1 100644
--- a/drivers/mtd/nand/denali_pci.c
+++ b/drivers/mtd/nand/denali_pci.c
@@ -27,6 +27,8 @@ static const struct pci_device_id denali_pci_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, denali_pci_ids);
 
+NAND_ECC_CAPS_SINGLE(denali_pci_ecc_caps, denali_calc_ecc_bytes, 512, 8, 15);
+
 static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	int ret;
@@ -65,6 +67,8 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	pci_set_master(dev);
 	denali->dev = &dev->dev;
 	denali->irq = dev->irq;
+	denali->ecc_caps = &denali_pci_ecc_caps;
+	denali->nand.ecc.options |= NAND_ECC_MAXIMIZE;
 
 	ret = pci_request_regions(dev, DENALI_NAND_NAME);
 	if (ret) {
-- 
GitLab


From 0615e7ad5d52594d12d67aa987d1fd98164e2f64 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 7 Jun 2017 20:52:13 +0900
Subject: [PATCH 0222/1958] mtd: nand: denali: remove Toshiba and Hynix
 specific fixup code

The Denali IP can automatically detect device parameters such as
page size, oob size, device width, etc. and this driver currently
relies on it.  However, this hardware function is known to be
problematic.

[1] Due to a hardware bug, various misdetected cases were reported.
    That is why get_toshiba_nand_para() and get_hynix_nand_para()
    exist to fix-up the misdetected parameters.  It is not realistic
    to add a new NAND device to the *black list* every time we are
    hit by a misdetected case.  We would never be able to guarantee
    that all cases are covered.

[2] Because this feature is unreliable, it is disabled on some
    platforms.

The nand_scan_ident() detects device parameters in a more tested
way.  The hardware should not set the device parameter registers in
a different, unreliable way.  Instead, set the parameters from the
nand_scan_ident() back to the registers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 40 ++++++---------------------------------
 1 file changed, 6 insertions(+), 34 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 2d3d9875dfaab..2b4618bb8d72f 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -337,36 +337,6 @@ static void get_samsung_nand_para(struct denali_nand_info *denali,
 	}
 }
 
-static void get_toshiba_nand_para(struct denali_nand_info *denali)
-{
-	/*
-	 * Workaround to fix a controller bug which reports a wrong
-	 * spare area size for some kind of Toshiba NAND device
-	 */
-	if ((ioread32(denali->flash_reg + DEVICE_MAIN_AREA_SIZE) == 4096) &&
-		(ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE) == 64))
-		iowrite32(216, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-}
-
-static void get_hynix_nand_para(struct denali_nand_info *denali,
-							uint8_t device_id)
-{
-	switch (device_id) {
-	case 0xD5: /* Hynix H27UAG8T2A, H27UBG8U5A or H27UCG8VFA */
-	case 0xD7: /* Hynix H27UDG8VEM, H27UCG8UDM or H27UCG8V5A */
-		iowrite32(128, denali->flash_reg + PAGES_PER_BLOCK);
-		iowrite32(4096, denali->flash_reg + DEVICE_MAIN_AREA_SIZE);
-		iowrite32(224, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-		iowrite32(0, denali->flash_reg + DEVICE_WIDTH);
-		break;
-	default:
-		dev_warn(denali->dev,
-			 "Unknown Hynix NAND (Device ID: 0x%x).\n"
-			 "Will use default parameter values instead.\n",
-			 device_id);
-	}
-}
-
 /*
  * determines how many NAND chips are connected to the controller. Note for
  * Intel CE4100 devices we don't support more than one device.
@@ -453,10 +423,6 @@ static uint16_t denali_nand_timing_set(struct denali_nand_info *denali)
 			return FAIL;
 	} else if (maf_id == 0xEC) { /* Samsung NAND */
 		get_samsung_nand_para(denali, device_id);
-	} else if (maf_id == 0x98) { /* Toshiba NAND */
-		get_toshiba_nand_para(denali);
-	} else if (maf_id == 0xAD) { /* Hynix NAND */
-		get_hynix_nand_para(denali, device_id);
 	}
 
 	dev_info(denali->dev,
@@ -1624,6 +1590,12 @@ int denali_init(struct denali_nand_info *denali)
 		chip->ecc.size, chip->ecc.strength, chip->ecc.bytes);
 
 	iowrite32(chip->ecc.strength, denali->flash_reg + ECC_CORRECTION);
+	iowrite32(mtd->erasesize / mtd->writesize,
+		  denali->flash_reg + PAGES_PER_BLOCK);
+	iowrite32(chip->options & NAND_BUSWIDTH_16 ? 1 : 0,
+		  denali->flash_reg + DEVICE_WIDTH);
+	iowrite32(mtd->writesize, denali->flash_reg + DEVICE_MAIN_AREA_SIZE);
+	iowrite32(mtd->oobsize, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
 
 	iowrite32(chip->ecc.size, denali->flash_reg + CFG_DATA_BLOCK_SIZE);
 	iowrite32(chip->ecc.size, denali->flash_reg + CFG_LAST_DATA_BLOCK_SIZE);
-- 
GitLab


From 91300dd67baec4f046aa76e3a2e8222d15cc76e9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 7 Jun 2017 20:52:14 +0900
Subject: [PATCH 0223/1958] mtd: nand: denali_dt: add compatible strings for
 UniPhier SoC variants

Add two compatible strings for UniPhier SoC family.

"socionext,uniphier-denali-nand-v5a" is used on UniPhier sLD3, LD4,
Pro4, sLD8.

"socionext,uniphier-denali-nand-v5b" is used on UniPhier Pro5, PXs2,
LD6b, LD11, LD20.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 .../devicetree/bindings/mtd/denali-nand.txt   |  6 +++++
 drivers/mtd/nand/denali_dt.c                  | 25 +++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index b7742a7363ea7..504291d2e5c2e 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -3,6 +3,8 @@
 Required properties:
   - compatible : should be one of the following:
       "altr,socfpga-denali-nand"            - for Altera SOCFPGA
+      "socionext,uniphier-denali-nand-v5a"  - for Socionext UniPhier (v5a)
+      "socionext,uniphier-denali-nand-v5b"  - for Socionext UniPhier (v5b)
   - reg : should contain registers location and length for data and reg.
   - reg-names: Should contain the reg names "nand_data" and "denali_reg"
   - interrupts : The interrupt number.
@@ -10,8 +12,12 @@ Required properties:
 Optional properties:
   - nand-ecc-step-size: see nand.txt for details.  If present, the value must be
       512        for "altr,socfpga-denali-nand"
+      1024       for "socionext,uniphier-denali-nand-v5a"
+      1024       for "socionext,uniphier-denali-nand-v5b"
   - nand-ecc-strength: see nand.txt for details.  Valid values are:
       8, 15      for "altr,socfpga-denali-nand"
+      8, 16, 24  for "socionext,uniphier-denali-nand-v5a"
+      8, 16      for "socionext,uniphier-denali-nand-v5b"
   - nand-ecc-maximize: see nand.txt for details
 
 The device tree may optionally contain sub-nodes describing partitions of the
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index bd1aa4cf4457e..be598230c108c 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -42,11 +42,36 @@ static const struct denali_dt_data denali_socfpga_data = {
 	.ecc_caps = &denali_socfpga_ecc_caps,
 };
 
+NAND_ECC_CAPS_SINGLE(denali_uniphier_v5a_ecc_caps, denali_calc_ecc_bytes,
+		     1024, 8, 16, 24);
+static const struct denali_dt_data denali_uniphier_v5a_data = {
+	.caps = DENALI_CAP_HW_ECC_FIXUP |
+		DENALI_CAP_DMA_64BIT,
+	.ecc_caps = &denali_uniphier_v5a_ecc_caps,
+};
+
+NAND_ECC_CAPS_SINGLE(denali_uniphier_v5b_ecc_caps, denali_calc_ecc_bytes,
+		     1024, 8, 16);
+static const struct denali_dt_data denali_uniphier_v5b_data = {
+	.revision = 0x0501,
+	.caps = DENALI_CAP_HW_ECC_FIXUP |
+		DENALI_CAP_DMA_64BIT,
+	.ecc_caps = &denali_uniphier_v5b_ecc_caps,
+};
+
 static const struct of_device_id denali_nand_dt_ids[] = {
 	{
 		.compatible = "altr,socfpga-denali-nand",
 		.data = &denali_socfpga_data,
 	},
+	{
+		.compatible = "socionext,uniphier-denali-nand-v5a",
+		.data = &denali_uniphier_v5a_data,
+	},
+	{
+		.compatible = "socionext,uniphier-denali-nand-v5b",
+		.data = &denali_uniphier_v5b_data,
+	},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
-- 
GitLab


From 0527eb372301bfd9b84160adb6c132b443ae3013 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 2 May 2017 19:35:37 +0530
Subject: [PATCH 0224/1958] pwm: tegra: Set maximum pwm clock source per SoC
 tapeout

The PWM hardware IP is taped-out with different maximum frequency
on different SoCs.

From HW team:

	Before Tegra186, it is 48 MHz.
	In Tegra186, it is 102 MHz.

Add support to limit the clock source frequency to the maximum IP
supported frequency. Provide these values via SoC chipdata.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-tegra.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index 8c6ed556db28a..e9b33f09ff096 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -41,6 +41,9 @@
 
 struct tegra_pwm_soc {
 	unsigned int num_channels;
+
+	/* Maximum IP frequency for given SoCs */
+	unsigned long max_frequency;
 };
 
 struct tegra_pwm_chip {
@@ -201,7 +204,18 @@ static int tegra_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(pwm->clk))
 		return PTR_ERR(pwm->clk);
 
-	/* Read PWM clock rate from source */
+	/* Set maximum frequency of the IP */
+	ret = clk_set_rate(pwm->clk, pwm->soc->max_frequency);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to set max frequency: %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * The requested and configured frequency may differ due to
+	 * clock register resolutions. Get the configured frequency
+	 * so that PWM period can be calculated more accurately.
+	 */
 	pwm->clk_rate = clk_get_rate(pwm->clk);
 
 	pwm->rst = devm_reset_control_get(&pdev->dev, "pwm");
@@ -273,10 +287,12 @@ static int tegra_pwm_resume(struct device *dev)
 
 static const struct tegra_pwm_soc tegra20_pwm_soc = {
 	.num_channels = 4,
+	.max_frequency = 48000000UL,
 };
 
 static const struct tegra_pwm_soc tegra186_pwm_soc = {
 	.num_channels = 1,
+	.max_frequency = 102000000UL,
 };
 
 static const struct of_device_id tegra_pwm_of_match[] = {
-- 
GitLab


From a247f7236d06f504e57637b8ec124fc1af226d08 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 Jun 2017 14:59:14 +0200
Subject: [PATCH 0225/1958] udf: Remove unused UDF_DEFAULT_BLOCKSIZE

The define is unused. Remove it.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 14b4bc1f68013..462ac2e9258c9 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -73,8 +73,6 @@
 #define VDS_POS_TERMINATING_DESC	6
 #define VDS_POS_LENGTH			7
 
-#define UDF_DEFAULT_BLOCKSIZE 2048
-
 #define VSD_FIRST_SECTOR_OFFSET		32768
 #define VSD_MAX_SECTOR_OFFSET		0x800000
 
-- 
GitLab


From d690694be93332cf7a91a6024b0598601f5f993a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 13 Jun 2017 14:58:42 +0200
Subject: [PATCH 0226/1958] mtd: nand: atmel: drop unused include

The Atmel NAND driver doesn't used anything from
linux/platform_data/atmel.h, stop including it.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/atmel/nand-controller.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index d24e67b951671..d922a88e407f1 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -65,7 +65,6 @@
 #include <linux/of_platform.h>
 #include <linux/iopoll.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/atmel.h>
 #include <linux/regmap.h>
 
 #include "pmecc.h"
-- 
GitLab


From 7d57e5e933674c116e46f30eee7629c20f9073da Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 13 Jun 2017 09:22:57 -0600
Subject: [PATCH 0227/1958] vfio/pci: Add Intel XXV710 to hidden INTx devices

XXV710 has the same broken INTx behavior as the rest of the X/XL710
series, the interrupt status register is not wired to report pending
INTx interrupts, thus we never associate the interrupt to the device.
Extend the device IDs to include these so that we hide that the
device supports INTx at all to the user.

Reported-by: Stefan Assmann <sassmann@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
---
 drivers/vfio/pci/vfio_pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 324c52e3a1a47..063c1ce6fa422 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -195,11 +195,11 @@ static bool vfio_pci_nointx(struct pci_dev *pdev)
 	switch (pdev->vendor) {
 	case PCI_VENDOR_ID_INTEL:
 		switch (pdev->device) {
-		/* All i40e (XL710/X710) 10/20/40GbE NICs */
+		/* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */
 		case 0x1572:
 		case 0x1574:
 		case 0x1580 ... 0x1581:
-		case 0x1583 ... 0x1589:
+		case 0x1583 ... 0x158b:
 		case 0x37d0 ... 0x37d2:
 			return true;
 		default:
-- 
GitLab


From 7b3a10df1d4bd8a83934897442370221b4cd631b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 18 May 2017 10:34:31 +0300
Subject: [PATCH 0228/1958] vfio: Use ERR_CAST() instead of open coding it

It's a small cleanup to use ERR_CAST() here.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 561084ab387f3..6a49485eb49da 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -382,7 +382,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 	if (IS_ERR(dev)) {
 		vfio_free_group_minor(minor);
 		vfio_group_unlock_and_free(group);
-		return (struct vfio_group *)dev; /* ERR_PTR */
+		return ERR_CAST(dev);
 	}
 
 	group->minor = minor;
-- 
GitLab


From 1eeef2d7483a7e3f8d2dd2a5b9939b3b814dc549 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Fri, 9 Jun 2017 15:58:31 +1200
Subject: [PATCH 0229/1958] mtd: handle partitioning on devices with 0
 erasesize

erasesize is meaningful for flash devices but for SRAM there is no
concept of an erase block so erasesize is set to 0. When partitioning
these devices instead of ensuring partitions fall on erasesize
boundaries we ensure they fall on writesize boundaries.

Helped-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index ea5e5307f667f..2e152e53ace0f 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -393,8 +393,12 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 			const struct mtd_partition *part, int partno,
 			uint64_t cur_offset)
 {
+	int wr_alignment = (master->flags & MTD_NO_ERASE) ? master->writesize:
+							    master->erasesize;
 	struct mtd_part *slave;
+	u32 remainder;
 	char *name;
+	u64 tmp;
 
 	/* allocate the partition structure */
 	slave = kzalloc(sizeof(*slave), GFP_KERNEL);
@@ -499,10 +503,11 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 	if (slave->offset == MTDPART_OFS_APPEND)
 		slave->offset = cur_offset;
 	if (slave->offset == MTDPART_OFS_NXTBLK) {
+		tmp = cur_offset;
 		slave->offset = cur_offset;
-		if (mtd_mod_by_eb(cur_offset, master) != 0) {
-			/* Round up to next erasesize */
-			slave->offset = (mtd_div_by_eb(cur_offset, master) + 1) * master->erasesize;
+		remainder = do_div(tmp, wr_alignment);
+		if (remainder) {
+			slave->offset += wr_alignment - remainder;
 			printk(KERN_NOTICE "Moving partition %d: "
 			       "0x%012llx -> 0x%012llx\n", partno,
 			       (unsigned long long)cur_offset, (unsigned long long)slave->offset);
@@ -567,19 +572,22 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 		slave->mtd.erasesize = master->erasesize;
 	}
 
-	if ((slave->mtd.flags & MTD_WRITEABLE) &&
-	    mtd_mod_by_eb(slave->offset, &slave->mtd)) {
+	tmp = slave->offset;
+	remainder = do_div(tmp, wr_alignment);
+	if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
 		/* Doesn't start on a boundary of major erase size */
 		/* FIXME: Let it be writable if it is on a boundary of
 		 * _minor_ erase size though */
 		slave->mtd.flags &= ~MTD_WRITEABLE;
-		printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase block boundary -- force read-only\n",
+		printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase/write block boundary -- force read-only\n",
 			part->name);
 	}
-	if ((slave->mtd.flags & MTD_WRITEABLE) &&
-	    mtd_mod_by_eb(slave->mtd.size, &slave->mtd)) {
+
+	tmp = slave->mtd.size;
+	remainder = do_div(tmp, wr_alignment);
+	if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
 		slave->mtd.flags &= ~MTD_WRITEABLE;
-		printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase block -- force read-only\n",
+		printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase/write block -- force read-only\n",
 			part->name);
 	}
 
-- 
GitLab


From 9795e0e8ac0d6a3ee092f1b555b284b57feef99e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 Jun 2017 15:54:58 +0200
Subject: [PATCH 0230/1958] udf: Fix races with i_size changes during readpage

__udf_adinicb_readpage() uses i_size several times. When truncate
changes i_size while the function is running, it can observe several
different values and thus e.g. expose uninitialized parts of page to
userspace. Also use i_size_read() in the function since it does not hold
inode_lock. Since i_size is guaranteed to be small, this cannot really
cause any issues even on 32-bit archs but let's be careful.

CC: stable@vger.kernel.org
Fixes: 9c2fc0de1a6e638fe58c354a463f544f42a90a09
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/file.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/udf/file.c b/fs/udf/file.c
index f5eb2d5b3bac8..e06d2c15749a5 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -43,10 +43,15 @@ static void __udf_adinicb_readpage(struct page *page)
 	struct inode *inode = page->mapping->host;
 	char *kaddr;
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	loff_t isize = i_size_read(inode);
 
+	/*
+	 * We have to be careful here as truncate can change i_size under us.
+	 * So just sample it once and use the same value everywhere.
+	 */
 	kaddr = kmap_atomic(page);
-	memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
-	memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size);
+	memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, isize);
+	memset(kaddr + isize, 0, PAGE_SIZE - isize);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
 	kunmap_atomic(kaddr);
-- 
GitLab


From 146c4ad6ec034a1fbfbf90ba76cb69b6504c374a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 Jun 2017 16:08:52 +0200
Subject: [PATCH 0231/1958] udf: Use i_size_read() in udf_adinicb_writepage()

We don't hold inode_lock in udf_adinicb_writepage() so use i_size_read()
to get i_size. This cannot cause real problems is i_size is guaranteed
to be small but let's be careful.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/udf/file.c b/fs/udf/file.c
index e06d2c15749a5..356c2bf148a5d 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -76,7 +76,8 @@ static int udf_adinicb_writepage(struct page *page,
 	BUG_ON(!PageLocked(page));
 
 	kaddr = kmap_atomic(page);
-	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
+	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
+		i_size_read(inode));
 	SetPageUptodate(page);
 	kunmap_atomic(kaddr);
 	mark_inode_dirty(inode);
-- 
GitLab


From f2e95355891153f66d4156bf3a142c6489cd78c6 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 Jun 2017 16:20:25 +0200
Subject: [PATCH 0232/1958] udf: Fix deadlock between writeback and
 udf_setsize()

udf_setsize() called truncate_setsize() with i_data_sem held. Thus
truncate_pagecache() called from truncate_setsize() could lock a page
under i_data_sem which can deadlock as page lock ranks below
i_data_sem - e. g. writeback can hold page lock and try to acquire
i_data_sem to map a block.

Fix the problem by moving truncate_setsize() calls from under
i_data_sem. It is safe for us to change i_size without holding
i_data_sem as all the places that depend on i_size being stable already
hold inode_lock.

CC: stable@vger.kernel.org
Fixes: 7e49b6f2480cb9a9e7322a91592e56a5c85361f5
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 98c510e172032..18fdb9d90812a 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1222,8 +1222,8 @@ int udf_setsize(struct inode *inode, loff_t newsize)
 			return err;
 		}
 set_size:
-		truncate_setsize(inode, newsize);
 		up_write(&iinfo->i_data_sem);
+		truncate_setsize(inode, newsize);
 	} else {
 		if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
 			down_write(&iinfo->i_data_sem);
@@ -1240,9 +1240,9 @@ int udf_setsize(struct inode *inode, loff_t newsize)
 					  udf_get_block);
 		if (err)
 			return err;
+		truncate_setsize(inode, newsize);
 		down_write(&iinfo->i_data_sem);
 		udf_clear_extent_cache(inode);
-		truncate_setsize(inode, newsize);
 		udf_truncate_extents(inode);
 		up_write(&iinfo->i_data_sem);
 	}
-- 
GitLab


From 3c399fa40fd13c5749386695e71f5f747a634f21 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Jun 2017 09:51:20 +0200
Subject: [PATCH 0233/1958] udf: Use time64_to_tm for timestamp conversion

UDF on-disk time stamp is stored in a form very similar to struct tm.
Use time64_to_tm() for conversion of seconds since epoch to year, month,
... format and then just copy this as necessary to UDF on-disk
structure to simplify the code.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/udftime.c | 45 +++++++++++----------------------------------
 1 file changed, 11 insertions(+), 34 deletions(-)

diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index 77c331f1a7704..b9dadc7e5c357 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -38,6 +38,7 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/time.h>
 
 #define EPOCH_YEAR 1970
 
@@ -81,9 +82,6 @@ static time_t year_seconds[MAX_YEAR_SECONDS] = {
 /*2038*/ SPY(68, 17, 0)
 };
 
-#define SECS_PER_HOUR	(60 * 60)
-#define SECS_PER_DAY	(SECS_PER_HOUR * 24)
-
 struct timespec *
 udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 {
@@ -119,9 +117,9 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 struct timestamp *
 udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts)
 {
-	long int days, rem, y;
-	const unsigned short int *ip;
+	long seconds;
 	int16_t offset;
+	struct tm tm;
 
 	offset = -sys_tz.tz_minuteswest;
 
@@ -130,35 +128,14 @@ udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts)
 
 	dest->typeAndTimezone = cpu_to_le16(0x1000 | (offset & 0x0FFF));
 
-	ts.tv_sec += offset * 60;
-	days = ts.tv_sec / SECS_PER_DAY;
-	rem = ts.tv_sec % SECS_PER_DAY;
-	dest->hour = rem / SECS_PER_HOUR;
-	rem %= SECS_PER_HOUR;
-	dest->minute = rem / 60;
-	dest->second = rem % 60;
-	y = 1970;
-
-#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
-#define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
-
-	while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
-		long int yg = y + days / 365 - (days % 365 < 0);
-
-		/* Adjust DAYS and Y to match the guessed year.  */
-		days -= ((yg - y) * 365
-			 + LEAPS_THRU_END_OF(yg - 1)
-			 - LEAPS_THRU_END_OF(y - 1));
-		y = yg;
-	}
-	dest->year = cpu_to_le16(y);
-	ip = __mon_yday[__isleap(y)];
-	for (y = 11; days < (long int)ip[y]; --y)
-		continue;
-	days -= ip[y];
-	dest->month = y + 1;
-	dest->day = days + 1;
-
+	seconds = ts.tv_sec + offset * 60;
+	time64_to_tm(seconds, 0, &tm);
+	dest->year = cpu_to_le16(tm.tm_year + 1900);
+	dest->month = tm.tm_mon + 1;
+	dest->day = tm.tm_mday;
+	dest->hour = tm.tm_hour;
+	dest->minute = tm.tm_min;
+	dest->second = tm.tm_sec;
 	dest->centiseconds = ts.tv_nsec / 10000000;
 	dest->hundredsOfMicroseconds = (ts.tv_nsec / 1000 -
 					dest->centiseconds * 10000) / 100;
-- 
GitLab


From fd3cfad374d452ebe0a2f87f37c2ceeea7e0d134 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Jun 2017 10:42:48 +0200
Subject: [PATCH 0234/1958] udf: Convert udf_disk_stamp_to_time() to use
 mktime64()

Convert udf_disk_stamp_to_time() to use mktime64() to simplify the code.
As a bonus we get working timestamp conversion for dates before epoch
and after 2038 (both of which are allowed by UDF standard).

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/udftime.c | 53 ++----------------------------------------------
 1 file changed, 2 insertions(+), 51 deletions(-)

diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index b9dadc7e5c357..14626b34d13e4 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -40,52 +40,9 @@
 #include <linux/kernel.h>
 #include <linux/time.h>
 
-#define EPOCH_YEAR 1970
-
-#ifndef __isleap
-/* Nonzero if YEAR is a leap year (every 4 years,
-   except every 100th isn't, and every 400th is).  */
-#define	__isleap(year)	\
-  ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
-#endif
-
-/* How many days come before each month (0-12).  */
-static const unsigned short int __mon_yday[2][13] = {
-	/* Normal years.  */
-	{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
-	/* Leap years.  */
-	{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
-};
-
-#define MAX_YEAR_SECONDS	69
-#define SPD			0x15180	/*3600*24 */
-#define SPY(y, l, s)		(SPD * (365 * y + l) + s)
-
-static time_t year_seconds[MAX_YEAR_SECONDS] = {
-/*1970*/ SPY(0,   0, 0), SPY(1,   0, 0), SPY(2,   0, 0), SPY(3,   1, 0),
-/*1974*/ SPY(4,   1, 0), SPY(5,   1, 0), SPY(6,   1, 0), SPY(7,   2, 0),
-/*1978*/ SPY(8,   2, 0), SPY(9,   2, 0), SPY(10,  2, 0), SPY(11,  3, 0),
-/*1982*/ SPY(12,  3, 0), SPY(13,  3, 0), SPY(14,  3, 0), SPY(15,  4, 0),
-/*1986*/ SPY(16,  4, 0), SPY(17,  4, 0), SPY(18,  4, 0), SPY(19,  5, 0),
-/*1990*/ SPY(20,  5, 0), SPY(21,  5, 0), SPY(22,  5, 0), SPY(23,  6, 0),
-/*1994*/ SPY(24,  6, 0), SPY(25,  6, 0), SPY(26,  6, 0), SPY(27,  7, 0),
-/*1998*/ SPY(28,  7, 0), SPY(29,  7, 0), SPY(30,  7, 0), SPY(31,  8, 0),
-/*2002*/ SPY(32,  8, 0), SPY(33,  8, 0), SPY(34,  8, 0), SPY(35,  9, 0),
-/*2006*/ SPY(36,  9, 0), SPY(37,  9, 0), SPY(38,  9, 0), SPY(39, 10, 0),
-/*2010*/ SPY(40, 10, 0), SPY(41, 10, 0), SPY(42, 10, 0), SPY(43, 11, 0),
-/*2014*/ SPY(44, 11, 0), SPY(45, 11, 0), SPY(46, 11, 0), SPY(47, 12, 0),
-/*2018*/ SPY(48, 12, 0), SPY(49, 12, 0), SPY(50, 12, 0), SPY(51, 13, 0),
-/*2022*/ SPY(52, 13, 0), SPY(53, 13, 0), SPY(54, 13, 0), SPY(55, 14, 0),
-/*2026*/ SPY(56, 14, 0), SPY(57, 14, 0), SPY(58, 14, 0), SPY(59, 15, 0),
-/*2030*/ SPY(60, 15, 0), SPY(61, 15, 0), SPY(62, 15, 0), SPY(63, 16, 0),
-/*2034*/ SPY(64, 16, 0), SPY(65, 16, 0), SPY(66, 16, 0), SPY(67, 17, 0),
-/*2038*/ SPY(68, 17, 0)
-};
-
 struct timespec *
 udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 {
-	int yday;
 	u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone);
 	u16 year = le16_to_cpu(src.year);
 	uint8_t type = typeAndTimezone >> 12;
@@ -100,15 +57,9 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 	} else
 		offset = 0;
 
-	if ((year < EPOCH_YEAR) ||
-	    (year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) {
-		return NULL;
-	}
-	dest->tv_sec = year_seconds[year - EPOCH_YEAR];
+	dest->tv_sec = mktime64(year, src.month, src.day, src.hour, src.minute,
+			src.second);
 	dest->tv_sec -= offset * 60;
-
-	yday = ((__mon_yday[__isleap(year)][src.month - 1]) + src.day - 1);
-	dest->tv_sec += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second;
 	dest->tv_nsec = 1000 * (src.centiseconds * 10000 +
 			src.hundredsOfMicroseconds * 100 + src.microseconds);
 	return dest;
-- 
GitLab


From 08cfe9f08109ad7ebe2248d483c683adbb1c9448 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 14 Jun 2017 17:40:55 +0200
Subject: [PATCH 0235/1958] mdacon: align code in mda_detect properly

This is just a whitespace cleanup. The code was a mess having multiple
commands on one line like:
scr_writew(0xAA55, p); if (scr_readw(p) == 0xAA55) count++;

Indent that properly and make it nicer for reading.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/console/mdacon.c | 60 ++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c
index ec192a1bf2972..424af6bdb4089 100644
--- a/drivers/video/console/mdacon.c
+++ b/drivers/video/console/mdacon.c
@@ -208,10 +208,17 @@ static int mda_detect(void)
 	p = (u16 *) mda_vram_base;
 	q = (u16 *) (mda_vram_base + 0x01000);
 
-	p_save = scr_readw(p); q_save = scr_readw(q);
+	p_save = scr_readw(p);
+	q_save = scr_readw(q);
+
+	scr_writew(0xAA55, p);
+	if (scr_readw(p) == 0xAA55)
+		count++;
+
+	scr_writew(0x55AA, p);
+	if (scr_readw(p) == 0x55AA)
+		count++;
 
-	scr_writew(0xAA55, p); if (scr_readw(p) == 0xAA55) count++;
-	scr_writew(0x55AA, p); if (scr_readw(p) == 0x55AA) count++;
 	scr_writew(p_save, p);
 
 	if (count != 2) {
@@ -220,13 +227,18 @@ static int mda_detect(void)
 
 	/* check if we have 4K or 8K */
 
-	scr_writew(0xA55A, q); scr_writew(0x0000, p);
-	if (scr_readw(q) == 0xA55A) count++;
+	scr_writew(0xA55A, q);
+	scr_writew(0x0000, p);
+	if (scr_readw(q) == 0xA55A)
+		count++;
 	
-	scr_writew(0x5AA5, q); scr_writew(0x0000, p);
-	if (scr_readw(q) == 0x5AA5) count++;
+	scr_writew(0x5AA5, q);
+	scr_writew(0x0000, p);
+	if (scr_readw(q) == 0x5AA5)
+		count++;
 
-	scr_writew(p_save, p); scr_writew(q_save, q);
+	scr_writew(p_save, p);
+	scr_writew(q_save, q);
 	
 	if (count == 4) {
 		mda_vram_len = 0x02000;
@@ -240,14 +252,12 @@ static int mda_detect(void)
 	/* Edward: These two mess `tests' mess up my cursor on bootup */
 
 	/* cursor low register */
-	if (! test_mda_b(0x66, 0x0f)) {
+	if (!test_mda_b(0x66, 0x0f))
 		return 0;
-	}
 
 	/* cursor low register */
-	if (! test_mda_b(0x99, 0x0f)) {
+	if (!test_mda_b(0x99, 0x0f))
 		return 0;
-	}
 #endif
 
 	/* See if the card is a Hercules, by checking whether the vsync
@@ -257,25 +267,25 @@ static int mda_detect(void)
 	
 	p_save = q_save = inb_p(mda_status_port) & MDA_STATUS_VSYNC;
 
-	for (count=0; count < 50000 && p_save == q_save; count++) {
+	for (count = 0; count < 50000 && p_save == q_save; count++) {
 		q_save = inb(mda_status_port) & MDA_STATUS_VSYNC;
 		udelay(2);
 	}
 
 	if (p_save != q_save) {
 		switch (inb_p(mda_status_port) & 0x70) {
-			case 0x10:
-				mda_type = TYPE_HERCPLUS;
-				mda_type_name = "HerculesPlus";
-				break;
-			case 0x50:
-				mda_type = TYPE_HERCCOLOR;
-				mda_type_name = "HerculesColor";
-				break;
-			default:
-				mda_type = TYPE_HERC;
-				mda_type_name = "Hercules";
-				break;
+		case 0x10:
+			mda_type = TYPE_HERCPLUS;
+			mda_type_name = "HerculesPlus";
+			break;
+		case 0x50:
+			mda_type = TYPE_HERCCOLOR;
+			mda_type_name = "HerculesColor";
+			break;
+		default:
+			mda_type = TYPE_HERC;
+			mda_type_name = "Hercules";
+			break;
 		}
 	}
 
-- 
GitLab


From 734f13b11d6fcc4947e75f0e11baf29cb3e161eb Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 14 Jun 2017 17:40:56 +0200
Subject: [PATCH 0236/1958] mdacon: make mda_vram_base u16 *

Given every user of mda_vram_base expects a pointer, let
mda_vram_base be a pointer to u16.

The offset calculation in mda_detect had to be adjusted by / 2 (due to
different pointer arithmetic now).

We introduce a cast to a value returned from VGA_MAP_MEM. But I will
change VGA_MAP_MEM to return a pointer later too. But vgacon needs a
similar change first.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/console/mdacon.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c
index 424af6bdb4089..3ec2b178bb880 100644
--- a/drivers/video/console/mdacon.c
+++ b/drivers/video/console/mdacon.c
@@ -48,7 +48,7 @@ static DEFINE_SPINLOCK(mda_lock);
 
 /* description of the hardware layout */
 
-static unsigned long	mda_vram_base;		/* Base of video memory */
+static u16		*mda_vram_base;		/* Base of video memory */
 static unsigned long	mda_vram_len;		/* Size of video memory */
 static unsigned int	mda_num_columns;	/* Number of text columns */
 static unsigned int	mda_num_lines;		/* Number of text lines */
@@ -205,8 +205,8 @@ static int mda_detect(void)
 
 	/* do a memory check */
 
-	p = (u16 *) mda_vram_base;
-	q = (u16 *) (mda_vram_base + 0x01000);
+	p = mda_vram_base;
+	q = mda_vram_base + 0x01000 / 2;
 
 	p_save = scr_readw(p);
 	q_save = scr_readw(q);
@@ -323,7 +323,7 @@ static const char *mdacon_startup(void)
 	mda_num_lines   = 25;
 
 	mda_vram_len  = 0x01000;
-	mda_vram_base = VGA_MAP_MEM(0xb0000, mda_vram_len);
+	mda_vram_base = (u16 *)VGA_MAP_MEM(0xb0000, mda_vram_len);
 
 	mda_index_port  = 0x3b4;
 	mda_value_port  = 0x3b5;
@@ -420,7 +420,7 @@ static void mdacon_invert_region(struct vc_data *c, u16 *p, int count)
 	}
 }
 
-#define MDA_ADDR(x,y)  ((u16 *) mda_vram_base + (y)*mda_num_columns + (x))
+#define MDA_ADDR(x, y)  (mda_vram_base + (y)*mda_num_columns + (x))
 
 static void mdacon_putc(struct vc_data *c, int ch, int y, int x)
 {
@@ -463,7 +463,7 @@ static int mdacon_blank(struct vc_data *c, int blank, int mode_switch)
 {
 	if (mda_type == TYPE_MDA) {
 		if (blank) 
-			scr_memsetw((void *)mda_vram_base, 
+			scr_memsetw(mda_vram_base,
 				mda_convert_attr(c->vc_video_erase_char),
 				c->vc_screenbuf_size);
 		/* Tell console.c that it has to restore the screen itself */
-- 
GitLab


From 927f7a17eb2ecc9b5a3b31a297f08f221b1f531c Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 14 Jun 2017 17:40:56 +0200
Subject: [PATCH 0237/1958] mdacon: replace MDA_ADDR macro by inline function

MDA_ADDR is one of those macros which could be an inline function. So
convert MDA_ADDR to mda_addr.

Note that we take x and y as unsigned now. But they are absolute
coordinates, so this is no problem.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/console/mdacon.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c
index 3ec2b178bb880..d0d427a2f1a3e 100644
--- a/drivers/video/console/mdacon.c
+++ b/drivers/video/console/mdacon.c
@@ -420,17 +420,20 @@ static void mdacon_invert_region(struct vc_data *c, u16 *p, int count)
 	}
 }
 
-#define MDA_ADDR(x, y)  (mda_vram_base + (y)*mda_num_columns + (x))
+static inline u16 *mda_addr(unsigned int x, unsigned int y)
+{
+	return mda_vram_base + y * mda_num_columns + x;
+}
 
 static void mdacon_putc(struct vc_data *c, int ch, int y, int x)
 {
-	scr_writew(mda_convert_attr(ch), MDA_ADDR(x, y));
+	scr_writew(mda_convert_attr(ch), mda_addr(x, y));
 }
 
 static void mdacon_putcs(struct vc_data *c, const unsigned short *s,
 		         int count, int y, int x)
 {
-	u16 *dest = MDA_ADDR(x, y);
+	u16 *dest = mda_addr(x, y);
 
 	for (; count > 0; count--) {
 		scr_writew(mda_convert_attr(scr_readw(s++)), dest++);
@@ -440,7 +443,7 @@ static void mdacon_putcs(struct vc_data *c, const unsigned short *s,
 static void mdacon_clear(struct vc_data *c, int y, int x, 
 			  int height, int width)
 {
-	u16 *dest = MDA_ADDR(x, y);
+	u16 *dest = mda_addr(x, y);
 	u16 eattr = mda_convert_attr(c->vc_video_erase_char);
 
 	if (width <= 0 || height <= 0)
@@ -512,16 +515,16 @@ static bool mdacon_scroll(struct vc_data *c, unsigned int t, unsigned int b,
 	switch (dir) {
 
 	case SM_UP:
-		scr_memmovew(MDA_ADDR(0,t), MDA_ADDR(0,t+lines),
+		scr_memmovew(mda_addr(0, t), mda_addr(0, t + lines),
 				(b-t-lines)*mda_num_columns*2);
-		scr_memsetw(MDA_ADDR(0,b-lines), eattr,
+		scr_memsetw(mda_addr(0, b - lines), eattr,
 				lines*mda_num_columns*2);
 		break;
 
 	case SM_DOWN:
-		scr_memmovew(MDA_ADDR(0,t+lines), MDA_ADDR(0,t),
+		scr_memmovew(mda_addr(0, t + lines), mda_addr(0, t),
 				(b-t-lines)*mda_num_columns*2);
-		scr_memsetw(MDA_ADDR(0,t), eattr, lines*mda_num_columns*2);
+		scr_memsetw(mda_addr(0, t), eattr, lines*mda_num_columns*2);
 		break;
 	}
 
-- 
GitLab


From ac5b52d2a2980fe34474e23037b02485c61538a3 Mon Sep 17 00:00:00 2001
From: Karim Eshapa <karim.eshapa@gmail.com>
Date: Wed, 14 Jun 2017 17:40:56 +0200
Subject: [PATCH 0238/1958] video: fbdev: omap2: omapfb: displays:
 panel-dsi-cm: Use time comparison kernel macro.

Use time_before_eq() kernel macro for time comparison.

Signed-off-by: Karim Eshapa <karim.eshapa@gmail.com>
Cc: tomi.valkeinen@ti.com
Cc: peter.ujfalusi@ti.com
Cc: tj@kernel.org
Cc: mingo@kernel.org
[b.zolnierkie: patch description fixup]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index fd2b372d0264d..7f20bc267234c 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
@@ -100,7 +100,7 @@ static void hw_guard_wait(struct panel_drv_data *ddata)
 {
 	unsigned long wait = ddata->hw_guard_end - jiffies;
 
-	if ((long)wait > 0 && wait <= ddata->hw_guard_wait) {
+	if ((long)wait > 0 && time_before_eq(wait, ddata->hw_guard_wait)) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(wait);
 	}
-- 
GitLab


From 86388d835e7f98a227e83530fa9b4953193df43a Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 14 Jun 2017 17:40:56 +0200
Subject: [PATCH 0239/1958] video: fbdev: pxafb: Handle return value of
 clk_prepare_enable

clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/pxafb.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index b21a89b03fb47..c3d49e13643cd 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -1436,7 +1436,10 @@ static void pxafb_enable_controller(struct pxafb_info *fbi)
 	pr_debug("reg_lccr3 0x%08x\n", (unsigned int) fbi->reg_lccr3);
 
 	/* enable LCD controller clock */
-	clk_prepare_enable(fbi->clk);
+	if (clk_prepare_enable(fbi->clk)) {
+		pr_err("%s: Failed to prepare clock\n", __func__);
+		return;
+	}
 
 	if (fbi->lccr0 & LCCR0_LCDT)
 		return;
-- 
GitLab


From b543849350d90849719e63ded1b71bc0ee6d9d73 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 14 Jun 2017 17:40:57 +0200
Subject: [PATCH 0240/1958] omapfb: Use sysfs_match_string() helper

Use sysfs_match_string() helper instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 .../video/fbdev/omap2/omapfb/dss/manager-sysfs.c   | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c b/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c
index 9e2a67fdf4d20..44b96af4ef4ef 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c
@@ -182,22 +182,16 @@ static ssize_t manager_trans_key_type_show(struct omap_overlay_manager *mgr,
 static ssize_t manager_trans_key_type_store(struct omap_overlay_manager *mgr,
 					    const char *buf, size_t size)
 {
-	enum omap_dss_trans_key_type key_type;
 	struct omap_overlay_manager_info info;
 	int r;
 
-	for (key_type = OMAP_DSS_COLOR_KEY_GFX_DST;
-			key_type < ARRAY_SIZE(trans_key_type_str); key_type++) {
-		if (sysfs_streq(buf, trans_key_type_str[key_type]))
-			break;
-	}
-
-	if (key_type == ARRAY_SIZE(trans_key_type_str))
-		return -EINVAL;
+	r = sysfs_match_string(trans_key_type_str, buf);
+	if (r < 0)
+		return r;
 
 	mgr->get_manager_info(mgr, &info);
 
-	info.trans_key_type = key_type;
+	info.trans_key_type = r;
 
 	r = mgr->set_manager_info(mgr, &info);
 	if (r)
-- 
GitLab


From 0e25884bbe246d68f1a928a3d5a682ab01a58c83 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 14 Jun 2017 17:40:57 +0200
Subject: [PATCH 0241/1958] video: fbdev: sh_mobile_lcdcfb: constify
 sh_mobile_lcdc_bl_ops.

File size before:
text	data	 bss    dec      hex filename
 17525	 952	   0  18477	482d drivers/video/fbdev/sh_mobile_lcdcfb.o

File size After adding 'const':
text	data	 bss	 dec	 hex filename
 17557	 920	   0   18477	482d drivers/video/fbdev/sh_mobile_lcdcfb.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Cc: geert+renesas@glider.be
Cc: keescook@chromium.org
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/sh_mobile_lcdcfb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c
index 885ee3a563aa3..c3a46506e47e2 100644
--- a/drivers/video/fbdev/sh_mobile_lcdcfb.c
+++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c
@@ -2301,7 +2301,7 @@ static int sh_mobile_lcdc_check_fb(struct backlight_device *bdev,
 	return (info->bl_dev == bdev);
 }
 
-static struct backlight_ops sh_mobile_lcdc_bl_ops = {
+static const struct backlight_ops sh_mobile_lcdc_bl_ops = {
 	.options	= BL_CORE_SUSPENDRESUME,
 	.update_status	= sh_mobile_lcdc_update_bl,
 	.get_brightness	= sh_mobile_lcdc_get_brightness,
-- 
GitLab


From a8feae09110675e77022b6f04ac6d308e842d75d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 14 Jun 2017 17:40:57 +0200
Subject: [PATCH 0242/1958] uvesafb: Fix continuation printks without
 KERN_LEVEL to pr_cont, neatening

Linus recently broke the printk without KERN_CONT behavior.
Fix it for uvesafb.

While there, convert printk(KERN_<LEVEL> to pr_<level>.
Add pr_fmt and remove the embedded prefixes.

Miscellanea:

o Coalesce formats and realign arguments
o Add a missing space to a format when coalescing.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Michal Januszewski <spock@gentoo.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/uvesafb.c | 148 ++++++++++++++--------------------
 1 file changed, 62 insertions(+), 86 deletions(-)

diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
index 98af9e02959b4..dc0e8d90d9cc3 100644
--- a/drivers/video/fbdev/uvesafb.c
+++ b/drivers/video/fbdev/uvesafb.c
@@ -5,6 +5,9 @@
  *     Loosely based upon the vesafb driver.
  *
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -149,8 +152,8 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
 	 * allowed by connector.
 	 */
 	if (sizeof(*m) + len > CONNECTOR_MAX_MSG_SIZE) {
-		printk(KERN_WARNING "uvesafb: message too long (%d), "
-			"can't execute task\n", (int)(sizeof(*m) + len));
+		pr_warn("message too long (%d), can't execute task\n",
+			(int)(sizeof(*m) + len));
 		return -E2BIG;
 	}
 
@@ -198,10 +201,8 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
 		 */
 		err = uvesafb_helper_start();
 		if (err) {
-			printk(KERN_ERR "uvesafb: failed to execute %s\n",
-					v86d_path);
-			printk(KERN_ERR "uvesafb: make sure that the v86d "
-					"helper is installed and executable\n");
+			pr_err("failed to execute %s\n", v86d_path);
+			pr_err("make sure that the v86d helper is installed and executable\n");
 		} else {
 			v86d_started = 1;
 			err = cn_netlink_send(m, 0, 0, gfp_any());
@@ -375,9 +376,8 @@ static u8 *uvesafb_vbe_state_save(struct uvesafb_par *par)
 	err = uvesafb_exec(task);
 
 	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
-		printk(KERN_WARNING "uvesafb: VBE get state call "
-				"failed (eax=0x%x, err=%d)\n",
-				task->t.regs.eax, err);
+		pr_warn("VBE get state call failed (eax=0x%x, err=%d)\n",
+			task->t.regs.eax, err);
 		kfree(state);
 		state = NULL;
 	}
@@ -407,9 +407,8 @@ static void uvesafb_vbe_state_restore(struct uvesafb_par *par, u8 *state_buf)
 
 	err = uvesafb_exec(task);
 	if (err || (task->t.regs.eax & 0xffff) != 0x004f)
-		printk(KERN_WARNING "uvesafb: VBE state restore call "
-				"failed (eax=0x%x, err=%d)\n",
-				task->t.regs.eax, err);
+		pr_warn("VBE state restore call failed (eax=0x%x, err=%d)\n",
+			task->t.regs.eax, err);
 
 	uvesafb_free(task);
 }
@@ -427,24 +426,22 @@ static int uvesafb_vbe_getinfo(struct uvesafb_ktask *task,
 
 	err = uvesafb_exec(task);
 	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
-		printk(KERN_ERR "uvesafb: Getting VBE info block failed "
-				"(eax=0x%x, err=%d)\n", (u32)task->t.regs.eax,
-				err);
+		pr_err("Getting VBE info block failed (eax=0x%x, err=%d)\n",
+		       (u32)task->t.regs.eax, err);
 		return -EINVAL;
 	}
 
 	if (par->vbe_ib.vbe_version < 0x0200) {
-		printk(KERN_ERR "uvesafb: Sorry, pre-VBE 2.0 cards are "
-				"not supported.\n");
+		pr_err("Sorry, pre-VBE 2.0 cards are not supported\n");
 		return -EINVAL;
 	}
 
 	if (!par->vbe_ib.mode_list_ptr) {
-		printk(KERN_ERR "uvesafb: Missing mode list!\n");
+		pr_err("Missing mode list!\n");
 		return -EINVAL;
 	}
 
-	printk(KERN_INFO "uvesafb: ");
+	pr_info("");
 
 	/*
 	 * Convert string pointers and the mode list pointer into
@@ -452,23 +449,24 @@ static int uvesafb_vbe_getinfo(struct uvesafb_ktask *task,
 	 * video adapter and its vendor.
 	 */
 	if (par->vbe_ib.oem_vendor_name_ptr)
-		printk("%s, ",
+		pr_cont("%s, ",
 			((char *)task->buf) + par->vbe_ib.oem_vendor_name_ptr);
 
 	if (par->vbe_ib.oem_product_name_ptr)
-		printk("%s, ",
+		pr_cont("%s, ",
 			((char *)task->buf) + par->vbe_ib.oem_product_name_ptr);
 
 	if (par->vbe_ib.oem_product_rev_ptr)
-		printk("%s, ",
+		pr_cont("%s, ",
 			((char *)task->buf) + par->vbe_ib.oem_product_rev_ptr);
 
 	if (par->vbe_ib.oem_string_ptr)
-		printk("OEM: %s, ",
+		pr_cont("OEM: %s, ",
 			((char *)task->buf) + par->vbe_ib.oem_string_ptr);
 
-	printk("VBE v%d.%d\n", ((par->vbe_ib.vbe_version & 0xff00) >> 8),
-			par->vbe_ib.vbe_version & 0xff);
+	pr_cont("VBE v%d.%d\n",
+		(par->vbe_ib.vbe_version & 0xff00) >> 8,
+		par->vbe_ib.vbe_version & 0xff);
 
 	return 0;
 }
@@ -507,8 +505,7 @@ static int uvesafb_vbe_getmodes(struct uvesafb_ktask *task,
 
 		err = uvesafb_exec(task);
 		if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
-			printk(KERN_WARNING "uvesafb: Getting mode info block "
-				"for mode 0x%x failed (eax=0x%x, err=%d)\n",
+			pr_warn("Getting mode info block for mode 0x%x failed (eax=0x%x, err=%d)\n",
 				*mode, (u32)task->t.regs.eax, err);
 			mode++;
 			par->vbe_modes_cnt--;
@@ -569,23 +566,20 @@ static int uvesafb_vbe_getpmi(struct uvesafb_ktask *task,
 						+ task->t.regs.edi);
 		par->pmi_start = (u8 *)par->pmi_base + par->pmi_base[1];
 		par->pmi_pal = (u8 *)par->pmi_base + par->pmi_base[2];
-		printk(KERN_INFO "uvesafb: protected mode interface info at "
-				 "%04x:%04x\n",
-				 (u16)task->t.regs.es, (u16)task->t.regs.edi);
-		printk(KERN_INFO "uvesafb: pmi: set display start = %p, "
-				 "set palette = %p\n", par->pmi_start,
-				 par->pmi_pal);
+		pr_info("protected mode interface info at %04x:%04x\n",
+			(u16)task->t.regs.es, (u16)task->t.regs.edi);
+		pr_info("pmi: set display start = %p, set palette = %p\n",
+			par->pmi_start, par->pmi_pal);
 
 		if (par->pmi_base[3]) {
-			printk(KERN_INFO "uvesafb: pmi: ports = ");
+			pr_info("pmi: ports =");
 			for (i = par->pmi_base[3]/2;
 					par->pmi_base[i] != 0xffff; i++)
-				printk("%x ", par->pmi_base[i]);
-			printk("\n");
+				pr_cont(" %x", par->pmi_base[i]);
+			pr_cont("\n");
 
 			if (par->pmi_base[i] != 0xffff) {
-				printk(KERN_INFO "uvesafb: can't handle memory"
-						 " requests, pmi disabled\n");
+				pr_info("can't handle memory requests, pmi disabled\n");
 				par->ypan = par->pmi_setpal = 0;
 			}
 		}
@@ -634,17 +628,13 @@ static int uvesafb_vbe_getedid(struct uvesafb_ktask *task, struct fb_info *info)
 		return -EINVAL;
 
 	if ((task->t.regs.ebx & 0x3) == 3) {
-		printk(KERN_INFO "uvesafb: VBIOS/hardware supports both "
-				 "DDC1 and DDC2 transfers\n");
+		pr_info("VBIOS/hardware supports both DDC1 and DDC2 transfers\n");
 	} else if ((task->t.regs.ebx & 0x3) == 2) {
-		printk(KERN_INFO "uvesafb: VBIOS/hardware supports DDC2 "
-				 "transfers\n");
+		pr_info("VBIOS/hardware supports DDC2 transfers\n");
 	} else if ((task->t.regs.ebx & 0x3) == 1) {
-		printk(KERN_INFO "uvesafb: VBIOS/hardware supports DDC1 "
-				 "transfers\n");
+		pr_info("VBIOS/hardware supports DDC1 transfers\n");
 	} else {
-		printk(KERN_INFO "uvesafb: VBIOS/hardware doesn't support "
-				 "DDC transfers\n");
+		pr_info("VBIOS/hardware doesn't support DDC transfers\n");
 		return -EINVAL;
 	}
 
@@ -718,14 +708,12 @@ static void uvesafb_vbe_getmonspecs(struct uvesafb_ktask *task,
 	}
 
 	if (info->monspecs.gtf)
-		printk(KERN_INFO
-			"uvesafb: monitor limits: vf = %d Hz, hf = %d kHz, "
-			"clk = %d MHz\n", info->monspecs.vfmax,
+		pr_info("monitor limits: vf = %d Hz, hf = %d kHz, clk = %d MHz\n",
+			info->monspecs.vfmax,
 			(int)(info->monspecs.hfmax / 1000),
 			(int)(info->monspecs.dclkmax / 1000000));
 	else
-		printk(KERN_INFO "uvesafb: no monitor limits have been set, "
-				 "default refresh rate will be used\n");
+		pr_info("no monitor limits have been set, default refresh rate will be used\n");
 
 	/* Add VBE modes to the modelist. */
 	for (i = 0; i < par->vbe_modes_cnt; i++) {
@@ -779,8 +767,7 @@ static void uvesafb_vbe_getstatesize(struct uvesafb_ktask *task,
 	err = uvesafb_exec(task);
 
 	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
-		printk(KERN_WARNING "uvesafb: VBE state buffer size "
-			"cannot be determined (eax=0x%x, err=%d)\n",
+		pr_warn("VBE state buffer size cannot be determined (eax=0x%x, err=%d)\n",
 			task->t.regs.eax, err);
 		par->vbe_state_size = 0;
 		return;
@@ -815,8 +802,7 @@ static int uvesafb_vbe_init(struct fb_info *info)
 	if (par->pmi_setpal || par->ypan) {
 		if (__supported_pte_mask & _PAGE_NX) {
 			par->pmi_setpal = par->ypan = 0;
-			printk(KERN_WARNING "uvesafb: NX protection is active, "
-					    "better not use the PMI.\n");
+			pr_warn("NX protection is active, better not use the PMI\n");
 		} else {
 			uvesafb_vbe_getpmi(task, par);
 		}
@@ -859,8 +845,7 @@ static int uvesafb_vbe_init_mode(struct fb_info *info)
 				goto gotmode;
 			}
 		}
-		printk(KERN_INFO "uvesafb: requested VBE mode 0x%x is "
-				 "unavailable\n", vbemode);
+		pr_info("requested VBE mode 0x%x is unavailable\n", vbemode);
 		vbemode = 0;
 	}
 
@@ -1181,8 +1166,8 @@ static int uvesafb_open(struct fb_info *info, int user)
 	if (!cnt && par->vbe_state_size) {
 		buf =  uvesafb_vbe_state_save(par);
 		if (IS_ERR(buf)) {
-			printk(KERN_WARNING "uvesafb: save hardware state"
-				"failed, error code is %ld!\n", PTR_ERR(buf));
+			pr_warn("save hardware state failed, error code is %ld!\n",
+				PTR_ERR(buf));
 		} else {
 			par->vbe_state_orig = buf;
 		}
@@ -1293,17 +1278,16 @@ static int uvesafb_set_par(struct fb_info *info)
 		 * use our own timings.  Try again with the default timings.
 		 */
 		if (crtc != NULL) {
-			printk(KERN_WARNING "uvesafb: mode switch failed "
-				"(eax=0x%x, err=%d). Trying again with "
-				"default timings.\n", task->t.regs.eax, err);
+			pr_warn("mode switch failed (eax=0x%x, err=%d) - trying again with default timings\n",
+				task->t.regs.eax, err);
 			uvesafb_reset(task);
 			kfree(crtc);
 			crtc = NULL;
 			info->var.pixclock = 0;
 			goto setmode;
 		} else {
-			printk(KERN_ERR "uvesafb: mode switch failed (eax="
-				"0x%x, err=%d)\n", task->t.regs.eax, err);
+			pr_err("mode switch failed (eax=0x%x, err=%d)\n",
+			       task->t.regs.eax, err);
 			err = -EINVAL;
 			goto out;
 		}
@@ -1510,13 +1494,11 @@ static void uvesafb_init_info(struct fb_info *info, struct vbe_mode_ib *mode)
 				 mode->bytes_per_scan_line;
 
 	if (par->ypan && info->var.yres_virtual > info->var.yres) {
-		printk(KERN_INFO "uvesafb: scrolling: %s "
-			"using protected mode interface, "
-			"yres_virtual=%d\n",
+		pr_info("scrolling: %s using protected mode interface, yres_virtual=%d\n",
 			(par->ypan > 1) ? "ywrap" : "ypan",
 			info->var.yres_virtual);
 	} else {
-		printk(KERN_INFO "uvesafb: scrolling: redraw\n");
+		pr_info("scrolling: redraw\n");
 		info->var.yres_virtual = info->var.yres;
 		par->ypan = 0;
 	}
@@ -1704,7 +1686,7 @@ static int uvesafb_probe(struct platform_device *dev)
 
 	err = uvesafb_vbe_init(info);
 	if (err) {
-		printk(KERN_ERR "uvesafb: vbe_init() failed with %d\n", err);
+		pr_err("vbe_init() failed with %d\n", err);
 		goto out;
 	}
 
@@ -1726,15 +1708,15 @@ static int uvesafb_probe(struct platform_device *dev)
 	uvesafb_init_info(info, mode);
 
 	if (!request_region(0x3c0, 32, "uvesafb")) {
-		printk(KERN_ERR "uvesafb: request region 0x3c0-0x3e0 failed\n");
+		pr_err("request region 0x3c0-0x3e0 failed\n");
 		err = -EIO;
 		goto out_mode;
 	}
 
 	if (!request_mem_region(info->fix.smem_start, info->fix.smem_len,
 				"uvesafb")) {
-		printk(KERN_ERR "uvesafb: cannot reserve video memory at "
-				"0x%lx\n", info->fix.smem_start);
+		pr_err("cannot reserve video memory at 0x%lx\n",
+		       info->fix.smem_start);
 		err = -EIO;
 		goto out_reg;
 	}
@@ -1743,10 +1725,8 @@ static int uvesafb_probe(struct platform_device *dev)
 	uvesafb_ioremap(info);
 
 	if (!info->screen_base) {
-		printk(KERN_ERR
-			"uvesafb: abort, cannot ioremap 0x%x bytes of video "
-			"memory at 0x%lx\n",
-			info->fix.smem_len, info->fix.smem_start);
+		pr_err("abort, cannot ioremap 0x%x bytes of video memory at 0x%lx\n",
+		       info->fix.smem_len, info->fix.smem_start);
 		err = -EIO;
 		goto out_mem;
 	}
@@ -1754,16 +1734,14 @@ static int uvesafb_probe(struct platform_device *dev)
 	platform_set_drvdata(dev, info);
 
 	if (register_framebuffer(info) < 0) {
-		printk(KERN_ERR
-			"uvesafb: failed to register framebuffer device\n");
+		pr_err("failed to register framebuffer device\n");
 		err = -EINVAL;
 		goto out_unmap;
 	}
 
-	printk(KERN_INFO "uvesafb: framebuffer at 0x%lx, mapped to 0x%p, "
-			"using %dk, total %dk\n", info->fix.smem_start,
-			info->screen_base, info->fix.smem_len/1024,
-			par->vbe_ib.total_memory * 64);
+	pr_info("framebuffer at 0x%lx, mapped to 0x%p, using %dk, total %dk\n",
+		info->fix.smem_start, info->screen_base,
+		info->fix.smem_len / 1024, par->vbe_ib.total_memory * 64);
 	fb_info(info, "%s frame buffer device\n", info->fix.id);
 
 	err = sysfs_create_group(&dev->dev.kobj, &uvesafb_dev_attgrp);
@@ -1871,8 +1849,7 @@ static int uvesafb_setup(char *options)
 		else if (this_opt[0] >= '0' && this_opt[0] <= '9') {
 			mode_option = this_opt;
 		} else {
-			printk(KERN_WARNING
-				"uvesafb: unrecognized option %s\n", this_opt);
+			pr_warn("unrecognized option %s\n", this_opt);
 		}
 	}
 
@@ -1931,8 +1908,7 @@ static int uvesafb_init(void)
 		err = driver_create_file(&uvesafb_driver.driver,
 				&driver_attr_v86d);
 		if (err) {
-			printk(KERN_WARNING "uvesafb: failed to register "
-					"attributes\n");
+			pr_warn("failed to register attributes\n");
 			err = 0;
 		}
 	}
-- 
GitLab


From d44005672d83f89d7d797efc490a751a696e7d91 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 15 Jun 2017 23:20:54 +0200
Subject: [PATCH 0243/1958] i2c: stub: fix build warning regression

Commit 6c42778780c40c ("i2c: stub: use pr_fmt") changed the DEBUG
handling and caused build warnings. Revert back to the original.

Fixes: 6c42778780c40c ("i2c: stub: use pr_fmt")
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-stub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-stub.c b/drivers/i2c/i2c-stub.c
index 8b0900147f96f..4a9ad91c5ba3e 100644
--- a/drivers/i2c/i2c-stub.c
+++ b/drivers/i2c/i2c-stub.c
@@ -15,7 +15,7 @@
     GNU General Public License for more details.
 */
 
-#define DEBUG
+#define DEBUG 1
 #define pr_fmt(fmt) "i2c-stub: " fmt
 
 #include <linux/errno.h>
-- 
GitLab


From 54bd63570484167cb13edf81e31fff107b879981 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 15 Jun 2017 10:36:22 +0200
Subject: [PATCH 0244/1958] iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel

When booting into a kdump kernel, suppress IO_PAGE_FAULTs by
default for all devices. But allow the faults again when a
domain is assigned to a device.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c       | 3 ++-
 drivers/iommu/amd_iommu_init.c  | 9 +++++++++
 drivers/iommu/amd_iommu_types.h | 1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 6498f5baa7ea5..95ee360a51991 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2078,7 +2078,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 		flags    |= tmp;
 	}
 
-	flags &= ~(0xffffUL);
+
+	flags &= ~(DTE_FLAG_SA | 0xffffULL);
 	flags |= domain->id;
 
 	amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 3fa7e3b35507d..cf7896550e758 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -29,6 +29,7 @@
 #include <linux/export.h>
 #include <linux/iommu.h>
 #include <linux/kmemleak.h>
+#include <linux/crash_dump.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -1900,6 +1901,14 @@ static void init_device_table_dma(void)
 	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
 		set_dev_entry_bit(devid, DEV_ENTRY_VALID);
 		set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+		/*
+		 * In kdump kernels in-flight DMA from the old kernel might
+		 * cause IO_PAGE_FAULTs. There are no reports that a kdump
+		 * actually failed because of that, so just disable fault
+		 * reporting in the hardware to get rid of the messages
+		 */
+		if (is_kdump_kernel())
+			set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
 	}
 }
 
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 6960d7db2fabe..294a409e283b7 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -322,6 +322,7 @@
 #define IOMMU_PTE_IW (1ULL << 62)
 
 #define DTE_FLAG_IOTLB	(1ULL << 32)
+#define DTE_FLAG_SA	(1ULL << 34)
 #define DTE_FLAG_GV	(1ULL << 55)
 #define DTE_FLAG_MASK	(0x3ffULL << 32)
 #define DTE_GLX_SHIFT	(56)
-- 
GitLab


From 68c35ea25bdd4ad10445c4c02f7d48b3dccab8cc Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0245/1958] mfd: cros_ec: Add helper for event notifier.

Add cros_ec_get_event() entry point to retrieve event within functions
called by the notifier.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_proto.c | 20 ++++++++++++++++++++
 include/linux/mfd/cros_ec.h             | 10 ++++++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index ed5dee744c74d..7428c2b965bba 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -494,3 +494,23 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev)
 		return get_keyboard_state_event(ec_dev);
 }
 EXPORT_SYMBOL(cros_ec_get_next_event);
+
+u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev)
+{
+	u32 host_event;
+
+	BUG_ON(!ec_dev->mkbp_event_supported);
+
+	if (ec_dev->event_data.event_type != EC_MKBP_EVENT_HOST_EVENT)
+		return 0;
+
+	if (ec_dev->event_size != sizeof(host_event)) {
+		dev_warn(ec_dev->dev, "Invalid host event size\n");
+		return 0;
+	}
+
+	host_event = get_unaligned_le32(&ec_dev->event_data.data.host_event);
+
+	return host_event;
+}
+EXPORT_SYMBOL(cros_ec_get_host_event);
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 28baee63eaf62..b61b2e013698e 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -300,6 +300,16 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev);
  */
 int cros_ec_get_next_event(struct cros_ec_device *ec_dev);
 
+/**
+ * cros_ec_get_host_event - Return a mask of event set by the EC.
+ *
+ * When MKBP is supported, when the EC raises an interrupt,
+ * We collect the events raised and call the functions in the ec notifier.
+ *
+ * This function is a helper to know which events are raised.
+ */
+u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
+
 /* sysfs stuff */
 extern struct attribute_group cros_ec_attr_group;
 extern struct attribute_group cros_ec_lightbar_attr_group;
-- 
GitLab


From 0aa877c558477e5c4b0faaa618cfd41f8c0b3319 Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0246/1958] mfd: cros_ec: Add EC console read structures
 definitions

ec_params_console_read_v1 is used to capture EC logs from kernel,
and ec_params_get_cmd_versions_v1 is used to probe whether EC
supports that command.

Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 include/linux/mfd/cros_ec_commands.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index c93e7e0300efa..1b19e424e1cf6 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -625,6 +625,10 @@ struct ec_params_get_cmd_versions {
 	uint8_t cmd;      /* Command to check */
 } __packed;
 
+struct ec_params_get_cmd_versions_v1 {
+	uint16_t cmd;     /* Command to check */
+} __packed;
+
 struct ec_response_get_cmd_versions {
 	/*
 	 * Mask of supported versions; use EC_VER_MASK() to compare with a
@@ -2285,13 +2289,28 @@ struct ec_params_charge_control {
 #define EC_CMD_CONSOLE_SNAPSHOT 0x97
 
 /*
- * Read next chunk of data from saved snapshot.
+ * Read data from the saved snapshot. If the subcmd parameter is
+ * CONSOLE_READ_NEXT, this will return data starting from the beginning of
+ * the latest snapshot. If it is CONSOLE_READ_RECENT, it will start from the
+ * end of the previous snapshot.
+ *
+ * The params are only looked at in version >= 1 of this command. Prior
+ * versions will just default to CONSOLE_READ_NEXT behavior.
  *
  * Response is null-terminated string.  Empty string, if there is no more
  * remaining output.
  */
 #define EC_CMD_CONSOLE_READ 0x98
 
+enum ec_console_read_subcmd {
+	CONSOLE_READ_NEXT = 0,
+	CONSOLE_READ_RECENT
+};
+
+struct ec_params_console_read_v1 {
+	uint8_t subcmd; /* enum ec_console_read_subcmd */
+} __packed;
+
 /*****************************************************************************/
 
 /*
-- 
GitLab


From e86264595225d2764a903965356ef59aeb7d1c47 Mon Sep 17 00:00:00 2001
From: Eric Caruso <ejcaruso@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0247/1958] mfd: cros_ec: add debugfs, console log file

If the EC supports the new CONSOLE_READ command type, then we
place a console_log file in debugfs for that EC device which allows
us to grab EC logs. The kernel will poll every 10 seconds for the
log and keep its own buffer, but userspace should grab this and
write it out to some logs which actually get rotated.

Signed-off-by: Eric Caruso <ejcaruso@chromium.org>
Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
[bleung: restored original version of this commit, with pointer size
 issue to be fixed in next commit]
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/Makefile          |   3 +-
 drivers/platform/chrome/cros_ec_debugfs.c | 347 ++++++++++++++++++++++
 drivers/platform/chrome/cros_ec_debugfs.h |  27 ++
 drivers/platform/chrome/cros_ec_dev.c     |   7 +
 include/linux/mfd/cros_ec.h               |   4 +
 5 files changed, 387 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/chrome/cros_ec_debugfs.c
 create mode 100644 drivers/platform/chrome/cros_ec_debugfs.h

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 4f3462783a3cd..3870afeefcf42 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -2,7 +2,8 @@
 obj-$(CONFIG_CHROMEOS_LAPTOP)		+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)		+= chromeos_pstore.o
 cros_ec_devs-objs			:= cros_ec_dev.o cros_ec_sysfs.o \
-					   cros_ec_lightbar.o cros_ec_vbc.o
+					   cros_ec_lightbar.o cros_ec_vbc.o \
+					   cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_devs.o
 obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpc.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
new file mode 100644
index 0000000000000..225f93631051d
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -0,0 +1,347 @@
+/*
+ * cros_ec_debugfs - debug logs for Chrome OS EC
+ *
+ * Copyright 2015 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include "cros_ec_dev.h"
+#include "cros_ec_debugfs.h"
+
+#define LOG_SHIFT		14
+#define LOG_SIZE		(1 << LOG_SHIFT)
+#define LOG_POLL_SEC		10
+
+#define CIRC_ADD(idx, size, value)	(((idx) + (value)) & ((size) - 1))
+
+/* struct cros_ec_debugfs - ChromeOS EC debugging information
+ *
+ * @ec: EC device this debugfs information belongs to
+ * @dir: dentry for debugfs files
+ * @log_buffer: circular buffer for console log information
+ * @read_msg: preallocated EC command and buffer to read console log
+ * @log_mutex: mutex to protect circular buffer
+ * @log_wq: waitqueue for log readers
+ * @log_poll_work: recurring task to poll EC for new console log data
+ */
+struct cros_ec_debugfs {
+	struct cros_ec_dev *ec;
+	struct dentry *dir;
+	struct circ_buf log_buffer;
+	struct cros_ec_command *read_msg;
+	struct mutex log_mutex;
+	wait_queue_head_t log_wq;
+	struct delayed_work log_poll_work;
+};
+
+/*
+ * We need to make sure that the EC log buffer on the UART is large enough,
+ * so that it is unlikely enough to overlow within LOG_POLL_SEC.
+ */
+static void cros_ec_console_log_work(struct work_struct *__work)
+{
+	struct cros_ec_debugfs *debug_info =
+		container_of(to_delayed_work(__work),
+			     struct cros_ec_debugfs,
+			     log_poll_work);
+	struct cros_ec_dev *ec = debug_info->ec;
+	struct circ_buf *cb = &debug_info->log_buffer;
+	struct cros_ec_command snapshot_msg = {
+		.command = EC_CMD_CONSOLE_SNAPSHOT + ec->cmd_offset,
+	};
+
+	struct ec_params_console_read_v1 *read_params =
+		(struct ec_params_console_read_v1 *)debug_info->read_msg->data;
+	uint8_t *ec_buffer = (uint8_t *)debug_info->read_msg->data;
+	int idx;
+	int buf_space;
+	int ret;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, &snapshot_msg);
+	if (ret < 0) {
+		dev_err(ec->dev, "EC communication failed\n");
+		goto resched;
+	}
+	if (snapshot_msg.result != EC_RES_SUCCESS) {
+		dev_err(ec->dev, "EC failed to snapshot the console log\n");
+		goto resched;
+	}
+
+	/* Loop until we have read everything, or there's an error. */
+	mutex_lock(&debug_info->log_mutex);
+	buf_space = CIRC_SPACE(cb->head, cb->tail, LOG_SIZE);
+
+	while (1) {
+		if (!buf_space) {
+			dev_info_once(ec->dev,
+				      "Some logs may have been dropped...\n");
+			break;
+		}
+
+		memset(read_params, '\0', sizeof(*read_params));
+		read_params->subcmd = CONSOLE_READ_RECENT;
+		ret = cros_ec_cmd_xfer(ec->ec_dev, debug_info->read_msg);
+		if (ret < 0) {
+			dev_err(ec->dev, "EC communication failed\n");
+			break;
+		}
+		if (debug_info->read_msg->result != EC_RES_SUCCESS) {
+			dev_err(ec->dev,
+				"EC failed to read the console log\n");
+			break;
+		}
+
+		/* If the buffer is empty, we're done here. */
+		if (ret == 0 || ec_buffer[0] == '\0')
+			break;
+
+		idx = 0;
+		while (idx < ret && ec_buffer[idx] != '\0' && buf_space > 0) {
+			cb->buf[cb->head] = ec_buffer[idx];
+			cb->head = CIRC_ADD(cb->head, LOG_SIZE, 1);
+			idx++;
+			buf_space--;
+		}
+
+		wake_up(&debug_info->log_wq);
+	}
+
+	mutex_unlock(&debug_info->log_mutex);
+
+resched:
+	schedule_delayed_work(&debug_info->log_poll_work,
+			      msecs_to_jiffies(LOG_POLL_SEC * 1000));
+}
+
+static int cros_ec_console_log_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct cros_ec_debugfs *debug_info = file->private_data;
+	struct circ_buf *cb = &debug_info->log_buffer;
+	ssize_t ret;
+
+	mutex_lock(&debug_info->log_mutex);
+
+	while (!CIRC_CNT(cb->head, cb->tail, LOG_SIZE)) {
+		if (file->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			goto error;
+		}
+
+		mutex_unlock(&debug_info->log_mutex);
+
+		ret = wait_event_interruptible(debug_info->log_wq,
+					CIRC_CNT(cb->head, cb->tail, LOG_SIZE));
+		if (ret < 0)
+			return ret;
+
+		mutex_lock(&debug_info->log_mutex);
+	}
+
+	/* Only copy until the end of the circular buffer, and let userspace
+	 * retry to get the rest of the data.
+	 */
+	ret = min_t(size_t, CIRC_CNT_TO_END(cb->head, cb->tail, LOG_SIZE),
+		    count);
+
+	if (copy_to_user(buf, cb->buf + cb->tail, ret)) {
+		ret = -EFAULT;
+		goto error;
+	}
+
+	cb->tail = CIRC_ADD(cb->tail, LOG_SIZE, ret);
+
+error:
+	mutex_unlock(&debug_info->log_mutex);
+	return ret;
+}
+
+static unsigned int cros_ec_console_log_poll(struct file *file,
+					     poll_table *wait)
+{
+	struct cros_ec_debugfs *debug_info = file->private_data;
+	unsigned int mask = 0;
+
+	poll_wait(file, &debug_info->log_wq, wait);
+
+	mutex_lock(&debug_info->log_mutex);
+	if (CIRC_CNT(debug_info->log_buffer.head,
+		     debug_info->log_buffer.tail,
+		     LOG_SIZE))
+		mask |= POLLIN | POLLRDNORM;
+	mutex_unlock(&debug_info->log_mutex);
+
+	return mask;
+}
+
+static int cros_ec_console_log_release(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+const struct file_operations cros_ec_console_log_fops = {
+	.owner = THIS_MODULE,
+	.open = cros_ec_console_log_open,
+	.read = cros_ec_console_log_read,
+	.llseek = no_llseek,
+	.poll = cros_ec_console_log_poll,
+	.release = cros_ec_console_log_release,
+};
+
+static int ec_read_version_supported(struct cros_ec_dev *ec)
+{
+	struct ec_params_get_cmd_versions_v1 *params;
+	struct ec_response_get_cmd_versions *response;
+	int ret;
+
+	struct cros_ec_command *msg;
+
+	msg = kzalloc(sizeof(*msg) + max(sizeof(params), sizeof(response)),
+		GFP_KERNEL);
+	if (!msg)
+		return 0;
+
+	msg->command = EC_CMD_GET_CMD_VERSIONS + ec->cmd_offset;
+	msg->outsize = sizeof(params);
+	msg->insize = sizeof(response);
+
+	params = (struct ec_params_get_cmd_versions_v1 *)msg->data;
+	params->cmd = EC_CMD_CONSOLE_READ;
+	response = (struct ec_response_get_cmd_versions *)msg->data;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg) >= 0 &&
+		msg->result == EC_RES_SUCCESS &&
+		(response->version_mask & EC_VER_MASK(1));
+
+	kfree(msg);
+
+	return ret;
+}
+
+static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info)
+{
+	struct cros_ec_dev *ec = debug_info->ec;
+	char *buf;
+	int read_params_size;
+	int read_response_size;
+
+	if (!ec_read_version_supported(ec)) {
+		dev_warn(ec->dev,
+			"device does not support reading the console log\n");
+		return 0;
+	}
+
+	buf = devm_kzalloc(ec->dev, LOG_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	read_params_size = sizeof(struct ec_params_console_read_v1);
+	read_response_size = ec->ec_dev->max_response;
+	debug_info->read_msg = devm_kzalloc(ec->dev,
+		sizeof(*debug_info->read_msg) +
+			max(read_params_size, read_response_size), GFP_KERNEL);
+	if (!debug_info->read_msg)
+		return -ENOMEM;
+
+	debug_info->read_msg->version = 1;
+	debug_info->read_msg->command = EC_CMD_CONSOLE_READ + ec->cmd_offset;
+	debug_info->read_msg->outsize = read_params_size;
+	debug_info->read_msg->insize = read_response_size;
+
+	debug_info->log_buffer.buf = buf;
+	debug_info->log_buffer.head = 0;
+	debug_info->log_buffer.tail = 0;
+
+	mutex_init(&debug_info->log_mutex);
+	init_waitqueue_head(&debug_info->log_wq);
+
+	if (!debugfs_create_file("console_log",
+				 S_IFREG | S_IRUGO,
+				 debug_info->dir,
+				 debug_info,
+				 &cros_ec_console_log_fops))
+		return -ENOMEM;
+
+	INIT_DELAYED_WORK(&debug_info->log_poll_work,
+			  cros_ec_console_log_work);
+	schedule_delayed_work(&debug_info->log_poll_work, 0);
+
+	return 0;
+}
+
+static void cros_ec_cleanup_console_log(struct cros_ec_debugfs *debug_info)
+{
+	if (debug_info->log_buffer.buf) {
+		cancel_delayed_work_sync(&debug_info->log_poll_work);
+		mutex_destroy(&debug_info->log_mutex);
+	}
+}
+
+int cros_ec_debugfs_init(struct cros_ec_dev *ec)
+{
+	struct cros_ec_platform *ec_platform = dev_get_platdata(ec->dev);
+	const char *name = ec_platform->ec_name;
+	struct cros_ec_debugfs *debug_info;
+	int ret;
+
+	debug_info = devm_kzalloc(ec->dev, sizeof(*debug_info), GFP_KERNEL);
+	if (!debug_info)
+		return -ENOMEM;
+
+	debug_info->ec = ec;
+	debug_info->dir = debugfs_create_dir(name, NULL);
+	if (!debug_info->dir)
+		return -ENOMEM;
+
+	ret = cros_ec_create_console_log(debug_info);
+	if (ret)
+		goto remove_debugfs;
+
+	ec->debug_info = debug_info;
+
+	return 0;
+
+remove_debugfs:
+	debugfs_remove_recursive(debug_info->dir);
+	return ret;
+}
+
+void cros_ec_debugfs_remove(struct cros_ec_dev *ec)
+{
+	if (!ec->debug_info)
+		return;
+
+	debugfs_remove_recursive(ec->debug_info->dir);
+	cros_ec_cleanup_console_log(ec->debug_info);
+}
diff --git a/drivers/platform/chrome/cros_ec_debugfs.h b/drivers/platform/chrome/cros_ec_debugfs.h
new file mode 100644
index 0000000000000..1ff3a50aa1b8a
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_debugfs.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2015 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _DRV_CROS_EC_DEBUGFS_H_
+#define _DRV_CROS_EC_DEBUGFS_H_
+
+#include "cros_ec_dev.h"
+
+/* debugfs stuff */
+int cros_ec_debugfs_init(struct cros_ec_dev *ec);
+void cros_ec_debugfs_remove(struct cros_ec_dev *ec);
+
+#endif  /* _DRV_CROS_EC_DEBUGFS_H_ */
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 6aa120cd0574d..20ce1c23fb5c0 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+#include "cros_ec_debugfs.h"
 #include "cros_ec_dev.h"
 
 /* Device variables */
@@ -427,6 +428,9 @@ static int ec_device_probe(struct platform_device *pdev)
 		goto failed;
 	}
 
+	if (cros_ec_debugfs_init(ec))
+		dev_warn(dev, "failed to create debugfs directory\n");
+
 	/* check whether this EC is a sensor hub. */
 	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE))
 		cros_ec_sensors_register(ec);
@@ -441,6 +445,9 @@ static int ec_device_probe(struct platform_device *pdev)
 static int ec_device_remove(struct platform_device *pdev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
+
+	cros_ec_debugfs_remove(ec);
+
 	cdev_del(&ec->cdev);
 	device_unregister(&ec->class_dev);
 	return 0;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index b61b2e013698e..3b16c9009749c 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -172,6 +172,8 @@ struct cros_ec_platform {
 	u16 cmd_offset;
 };
 
+struct cros_ec_debugfs;
+
 /*
  * struct cros_ec_dev - ChromeOS EC device entry point
  *
@@ -179,6 +181,7 @@ struct cros_ec_platform {
  * @cdev: Character device structure in /dev
  * @ec_dev: cros_ec_device structure to talk to the physical device
  * @dev: pointer to the platform device
+ * @debug_info: cros_ec_debugfs structure for debugging information
  * @cmd_offset: offset to apply for each command.
  */
 struct cros_ec_dev {
@@ -186,6 +189,7 @@ struct cros_ec_dev {
 	struct cdev cdev;
 	struct cros_ec_device *ec_dev;
 	struct device *dev;
+	struct cros_ec_debugfs *debug_info;
 	u16 cmd_offset;
 	u32 features[2];
 };
-- 
GitLab


From 73b44f40c63bebcce9d66c4072878ecaceb43dda Mon Sep 17 00:00:00 2001
From: Shawn Nematbakhsh <shawnn@chromium.org>
Date: Thu, 16 Feb 2017 09:49:29 -0800
Subject: [PATCH 0248/1958] cros_ec_debugfs: Pass proper struct sizes to
 cros_ec_cmd_xfer()

We should output or receive every byte in the param / reply struct,
unrelated to the pointer size.

Signed-off-by: Shawn Nematbakhsh <shawnn@chromium.org>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
[bleung: Picked from crosreview.com/444085 for cros_ec_debugfs.c only.
 cros_ec.c upstream had a different cros_ec_sleep_event which didn't
 have the sizeof issue]
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_debugfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 225f93631051d..c4150871b8ed8 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -227,14 +227,14 @@ static int ec_read_version_supported(struct cros_ec_dev *ec)
 
 	struct cros_ec_command *msg;
 
-	msg = kzalloc(sizeof(*msg) + max(sizeof(params), sizeof(response)),
+	msg = kzalloc(sizeof(*msg) + max(sizeof(*params), sizeof(*response)),
 		GFP_KERNEL);
 	if (!msg)
 		return 0;
 
 	msg->command = EC_CMD_GET_CMD_VERSIONS + ec->cmd_offset;
-	msg->outsize = sizeof(params);
-	msg->insize = sizeof(response);
+	msg->outsize = sizeof(*params);
+	msg->insize = sizeof(*response);
 
 	params = (struct ec_params_get_cmd_versions_v1 *)msg->data;
 	params->cmd = EC_CMD_CONSOLE_READ;
-- 
GitLab


From 6e4941067cef482c9ed254cf06cab70c32db05b2 Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0249/1958] mfd: cros_ec: Add support for dumping panic
 information

This dumps the EC panic information from the previous reboot.

Similar to the information presented by ectool panicinfo, except
that we do not bother doing any parsing (we should write a small
offline tool for that).

Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_debugfs.c | 54 +++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index c4150871b8ed8..4cc66f4057609 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -47,15 +47,19 @@
  * @log_mutex: mutex to protect circular buffer
  * @log_wq: waitqueue for log readers
  * @log_poll_work: recurring task to poll EC for new console log data
+ * @panicinfo_blob: panicinfo debugfs blob
  */
 struct cros_ec_debugfs {
 	struct cros_ec_dev *ec;
 	struct dentry *dir;
+	/* EC log */
 	struct circ_buf log_buffer;
 	struct cros_ec_command *read_msg;
 	struct mutex log_mutex;
 	wait_queue_head_t log_wq;
 	struct delayed_work log_poll_work;
+	/* EC panicinfo */
+	struct debugfs_blob_wrapper panicinfo_blob;
 };
 
 /*
@@ -308,6 +312,52 @@ static void cros_ec_cleanup_console_log(struct cros_ec_debugfs *debug_info)
 	}
 }
 
+static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
+{
+	struct cros_ec_device *ec_dev = debug_info->ec->ec_dev;
+	int ret;
+	struct cros_ec_command *msg;
+	int insize;
+
+	insize = ec_dev->max_response;
+
+	msg = devm_kzalloc(debug_info->ec->dev,
+			sizeof(*msg) + insize, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	msg->command = EC_CMD_GET_PANIC_INFO;
+	msg->insize = insize;
+
+	ret = cros_ec_cmd_xfer(ec_dev, msg);
+	if (ret < 0) {
+		dev_warn(debug_info->ec->dev, "Cannot read panicinfo.\n");
+		ret = 0;
+		goto free;
+	}
+
+	/* No panic data */
+	if (ret == 0)
+		goto free;
+
+	debug_info->panicinfo_blob.data = msg->data;
+	debug_info->panicinfo_blob.size = ret;
+
+	if (!debugfs_create_blob("panicinfo",
+				 S_IFREG | S_IRUGO,
+				 debug_info->dir,
+				 &debug_info->panicinfo_blob)) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	devm_kfree(debug_info->ec->dev, msg);
+	return ret;
+}
+
 int cros_ec_debugfs_init(struct cros_ec_dev *ec)
 {
 	struct cros_ec_platform *ec_platform = dev_get_platdata(ec->dev);
@@ -324,6 +374,10 @@ int cros_ec_debugfs_init(struct cros_ec_dev *ec)
 	if (!debug_info->dir)
 		return -ENOMEM;
 
+	ret = cros_ec_create_panicinfo(debug_info);
+	if (ret)
+		goto remove_debugfs;
+
 	ret = cros_ec_create_console_log(debug_info);
 	if (ret)
 		goto remove_debugfs;
-- 
GitLab


From be8647d2bea48a15a685c855dda33d22e9550493 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 16 Jun 2017 16:38:20 -0500
Subject: [PATCH 0250/1958] ipmi:ssif: Use i2c_adapter_id instead of
 adapter->nr

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 1d4fd846e4574..05e18041c357e 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1096,7 +1096,7 @@ static int inc_usecount(void *send_info)
 {
 	struct ssif_info *ssif_info = send_info;
 
-	if (!i2c_get_adapter(ssif_info->client->adapter->nr))
+	if (!i2c_get_adapter(i2c_adapter_id(ssif_info->client->adapter)))
 		return -ENODEV;
 
 	i2c_use_client(ssif_info->client);
@@ -1665,7 +1665,8 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	{
 		unsigned int thread_num;
 
-		thread_num = ((ssif_info->client->adapter->nr << 8) |
+		thread_num = ((i2c_adapter_id(ssif_info->client->adapter)
+			       << 8) |
 			      ssif_info->client->addr);
 		init_completion(&ssif_info->wake_thread);
 		ssif_info->thread = kthread_run(ipmi_ssif_thread, ssif_info,
-- 
GitLab


From 063345aede9905beb9b73129da7a0e9d5933b078 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Jun 2017 12:29:12 +0200
Subject: [PATCH 0251/1958] i2c: xgene-slimpro: include linux/io.h for memremap

The newly added support for the pcc mailbox fails to build
in some configurations:

drivers/i2c/busses/i2c-xgene-slimpro.c: In function 'xgene_slimpro_i2c_probe':
drivers/i2c/busses/i2c-xgene-slimpro.c:516:25: error: implicit declaration of function 'memremap'; did you mean 'memcmp'? [-Werror=implicit-function-declaration]
drivers/i2c/busses/i2c-xgene-slimpro.c:518:13: error: 'MEMREMAP_WB' undeclared (first use in this function)
drivers/i2c/busses/i2c-xgene-slimpro.c:518:13: note: each undeclared identifier is reported only once for each function it appears in

This includes the missing header file.

Fixes: df5da47fe722 ("i2c: xgene-slimpro: Add ACPI support by using PCC mailbox")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Hoan Tran <hotran@apm.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xgene-slimpro.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
index 8e5c8f28bc8bf..7e89ba6fcf6f1 100644
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
@@ -27,6 +27,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
 #include <linux/of.h>
-- 
GitLab


From 09a1de04d59870b34b2a8106671c7bdea4ca9a90 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 18 May 2017 11:23:06 +0300
Subject: [PATCH 0252/1958] i2c: i801: Add support for Intel Cannon Lake

Added SMBUS PCI Ids for SMBUS for Cannon Lake PCH.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[jarkko.nikula@linux.intel.com: Add entries to Documentation and Kconfig.
Cover Cannon Lake-H too]
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-i801 | 2 ++
 drivers/i2c/busses/Kconfig        | 2 ++
 drivers/i2c/busses/i2c-i801.c     | 8 ++++++++
 3 files changed, 12 insertions(+)

diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index 820d9040de164..0500193434cb2 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -34,6 +34,8 @@ Supported adapters:
   * Intel Broxton (SOC)
   * Intel Lewisburg (PCH)
   * Intel Gemini Lake (SOC)
+  * Intel Cannon Lake-H (PCH)
+  * Intel Cannon Lake-LP (PCH)
    Datasheets: Publicly available at the Intel website
 
 On Intel Patsburg and later chipsets, both the normal host SMBus controller
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 144cbadc7c728..ed7106e48ba4f 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -129,6 +129,8 @@ config I2C_I801
 	    Broxton (SOC)
 	    Lewisburg (PCH)
 	    Gemini Lake (SOC)
+	    Cannon Lake-H (PCH)
+	    Cannon Lake-LP (PCH)
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-i801.
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 6484fa6dbb84a..c9536e17d6ff0 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -66,6 +66,8 @@
  * Lewisburg Supersku (PCH)	0xa223	32	hard	yes	yes	yes
  * Kaby Lake PCH-H (PCH)	0xa2a3	32	hard	yes	yes	yes
  * Gemini Lake (SOC)		0x31d4	32	hard	yes	yes	yes
+ * Cannon Lake-H (PCH)		0xa323	32	hard	yes	yes	yes
+ * Cannon Lake-LP (PCH)		0x9da3	32	hard	yes	yes	yes
  *
  * Features supported by this driver:
  * Software PEC				no
@@ -226,10 +228,12 @@
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_SMBUS		0x9c22
 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_SMBUS	0x9ca2
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS	0x9d23
+#define PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS		0x9da3
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS	0xa123
 #define PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS		0xa1a3
 #define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS	0xa223
 #define PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS	0xa2a3
+#define PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS		0xa323
 
 struct i801_mux_config {
 	char *gpio_chip;
@@ -1026,6 +1030,8 @@ static const struct pci_device_id i801_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS) },
 	{ 0, }
 };
 
@@ -1499,6 +1505,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	switch (dev->device) {
 	case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
 	case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
+	case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
+	case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
 	case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
 	case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
 	case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
-- 
GitLab


From 227855b95411572c3936228e8eb87f2688da2c03 Mon Sep 17 00:00:00 2001
From: George Cherian <george.cherian@cavium.com>
Date: Thu, 25 May 2017 11:42:15 +0000
Subject: [PATCH 0253/1958] i2c: xlp9xx: Enable HWMON class probing for xlp9xx

Set I2C_CLASS_HWMON for xlp9xx to enable automatic probing of BMC
devices by the ipmi-ssif driver.

Signed-off-by: George Cherian <george.cherian@cavium.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-xlp9xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
index ae80228104e9f..6b106e94bc095 100644
--- a/drivers/i2c/busses/i2c-xlp9xx.c
+++ b/drivers/i2c/busses/i2c-xlp9xx.c
@@ -393,6 +393,7 @@ static int xlp9xx_i2c_probe(struct platform_device *pdev)
 	init_completion(&priv->msg_complete);
 	priv->adapter.dev.parent = &pdev->dev;
 	priv->adapter.algo = &xlp9xx_i2c_algo;
+	priv->adapter.class = I2C_CLASS_HWMON;
 	ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&pdev->dev));
 	priv->adapter.dev.of_node = pdev->dev.of_node;
 	priv->dev = &pdev->dev;
-- 
GitLab


From 5c8e3ab146de92800d516386cbb7040c90210b27 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sun, 28 May 2017 11:30:45 +0200
Subject: [PATCH 0254/1958] i2c: use proper name for the R-Car SoC

It is 'R-Car', not 'RCar'. No code or binding changes, only descriptive text.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-rcar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 214bf2835d1f9..214a71cb64429 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -1,5 +1,5 @@
 /*
- * Driver for the Renesas RCar I2C unit
+ * Driver for the Renesas R-Car I2C unit
  *
  * Copyright (C) 2014-15 Wolfram Sang <wsa@sang-engineering.com>
  * Copyright (C) 2011-2015 Renesas Electronics Corporation
-- 
GitLab


From 56a6cb88657557edd84eabddd9f509d13b5b81c5 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 31 May 2017 12:27:44 +0530
Subject: [PATCH 0255/1958] i2c: at91: Handle return value of
 clk_prepare_enable

clk_prepare_enable() can fail here and we must check its return value.
Also, add a missing clk_disable_unprepare().

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-at91.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index fabbb9e491612..2525cd9bcbbdd 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -1083,12 +1083,16 @@ static int at91_twi_probe(struct platform_device *pdev)
 		dev_err(dev->dev, "no clock defined\n");
 		return -ENODEV;
 	}
-	clk_prepare_enable(dev->clk);
+	rc = clk_prepare_enable(dev->clk);
+	if (rc)
+		return rc;
 
 	if (dev->dev->of_node) {
 		rc = at91_twi_configure_dma(dev, phy_addr);
-		if (rc == -EPROBE_DEFER)
+		if (rc == -EPROBE_DEFER) {
+			clk_disable_unprepare(dev->clk);
 			return rc;
+		}
 	}
 
 	if (!of_property_read_u32(pdev->dev.of_node, "atmel,fifo-size",
-- 
GitLab


From f27e7805ba6274dc544a8ad85490160c5b535955 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 31 May 2017 12:45:38 +0530
Subject: [PATCH 0256/1958] i2c: at91: Fix compilation warning.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace '%d' by '%zu' to fix the following type of compilation warnings:

drivers/i2c/busses/i2c-at91.c:277:2: warning: format ‘%d’ expects argument of type ‘int’, but argument 5 has type ‘size_t’ [-Wformat=]
  dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len);
  ^

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Tested-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-at91.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index 2525cd9bcbbdd..38dd61d621df4 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -274,7 +274,7 @@ static void at91_twi_write_next_byte(struct at91_twi_dev *dev)
 		if (!dev->use_alt_cmd)
 			at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
-	dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len);
+	dev_dbg(dev->dev, "wrote 0x%x, to go %zu\n", *dev->buf, dev->buf_len);
 
 	++dev->buf;
 }
@@ -402,7 +402,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
 			dev->msg->flags &= ~I2C_M_RECV_LEN;
 			dev->buf_len += *dev->buf;
 			dev->msg->len = dev->buf_len + 1;
-			dev_dbg(dev->dev, "received block length %d\n",
+			dev_dbg(dev->dev, "received block length %zu\n",
 					 dev->buf_len);
 		} else {
 			/* abort and send the stop by reading one more byte */
@@ -415,7 +415,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
 	if (!dev->use_alt_cmd && dev->buf_len == 1)
 		at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
-	dev_dbg(dev->dev, "read 0x%x, to go %d\n", *dev->buf, dev->buf_len);
+	dev_dbg(dev->dev, "read 0x%x, to go %zu\n", *dev->buf, dev->buf_len);
 
 	++dev->buf;
 }
@@ -622,7 +622,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
 	 * writing the corresponding bit into the Control Register.
 	 */
 
-	dev_dbg(dev->dev, "transfer: %s %d bytes.\n",
+	dev_dbg(dev->dev, "transfer: %s %zu bytes.\n",
 		(dev->msg->flags & I2C_M_RD) ? "read" : "write", dev->buf_len);
 
 	reinit_completion(&dev->cmd_complete);
-- 
GitLab


From 7912e7fef2aebe577f0b46d3cba261f2783c5695 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Jun 2017 21:21:45 -0700
Subject: [PATCH 0257/1958] xfs: push buffer of flush locked dquot to avoid
 quotacheck deadlock

Reclaim during quotacheck can lead to deadlocks on the dquot flush
lock:

 - Quotacheck populates a local delwri queue with the physical dquot
   buffers.
 - Quotacheck performs the xfs_qm_dqusage_adjust() bulkstat and
   dirties all of the dquots.
 - Reclaim kicks in and attempts to flush a dquot whose buffer is
   already queud on the quotacheck queue. The flush succeeds but
   queueing to the reclaim delwri queue fails as the backing buffer is
   already queued. The flush unlock is now deferred to I/O completion
   of the buffer from the quotacheck queue.
 - The dqadjust bulkstat continues and dirties the recently flushed
   dquot once again.
 - Quotacheck proceeds to the xfs_qm_flush_one() walk which requires
   the flush lock to update the backing buffers with the in-core
   recalculated values. It deadlocks on the redirtied dquot as the
   flush lock was already acquired by reclaim, but the buffer resides
   on the local delwri queue which isn't submitted until the end of
   quotacheck.

This is reproduced by running quotacheck on a filesystem with a
couple million inodes in low memory (512MB-1GB) situations. This is
a regression as of commit 43ff2122e6 ("xfs: on-stack delayed write
buffer lists"), which removed a trylock and buffer I/O submission
from the quotacheck dquot flush sequence.

Quotacheck first resets and collects the physical dquot buffers in a
delwri queue. Then, it traverses the filesystem inodes via bulkstat,
updates the in-core dquots, flushes the corrected dquots to the
backing buffers and finally submits the delwri queue for I/O. Since
the backing buffers are queued across the entire quotacheck
operation, dquot reclaim cannot possibly complete a dquot flush
before quotacheck completes.

Therefore, quotacheck must submit the buffer for I/O in order to
cycle the flush lock and flush the dirty in-core dquot to the
buffer. Add a delwri queue buffer push mechanism to submit an
individual buffer for I/O without losing the delwri queue status and
use it from quotacheck to avoid the deadlock. This restores
quotacheck behavior to as before the regression was introduced.

Reported-by: Martin Svec <martin.svec@zoner.cz>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf.c   | 60 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_buf.h   |  1 +
 fs/xfs/xfs_qm.c    | 28 +++++++++++++++++++++-
 fs/xfs/xfs_trace.h |  1 +
 4 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 16d6a578fc160..a9640b136b32c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2047,6 +2047,66 @@ xfs_buf_delwri_submit(
 	return error;
 }
 
+/*
+ * Push a single buffer on a delwri queue.
+ *
+ * The purpose of this function is to submit a single buffer of a delwri queue
+ * and return with the buffer still on the original queue. The waiting delwri
+ * buffer submission infrastructure guarantees transfer of the delwri queue
+ * buffer reference to a temporary wait list. We reuse this infrastructure to
+ * transfer the buffer back to the original queue.
+ *
+ * Note the buffer transitions from the queued state, to the submitted and wait
+ * listed state and back to the queued state during this call. The buffer
+ * locking and queue management logic between _delwri_pushbuf() and
+ * _delwri_queue() guarantee that the buffer cannot be queued to another list
+ * before returning.
+ */
+int
+xfs_buf_delwri_pushbuf(
+	struct xfs_buf		*bp,
+	struct list_head	*buffer_list)
+{
+	LIST_HEAD		(submit_list);
+	int			error;
+
+	ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+	trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
+
+	/*
+	 * Isolate the buffer to a new local list so we can submit it for I/O
+	 * independently from the rest of the original list.
+	 */
+	xfs_buf_lock(bp);
+	list_move(&bp->b_list, &submit_list);
+	xfs_buf_unlock(bp);
+
+	/*
+	 * Delwri submission clears the DELWRI_Q buffer flag and returns with
+	 * the buffer on the wait list with an associated reference. Rather than
+	 * bounce the buffer from a local wait list back to the original list
+	 * after I/O completion, reuse the original list as the wait list.
+	 */
+	xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
+
+	/*
+	 * The buffer is now under I/O and wait listed as during typical delwri
+	 * submission. Lock the buffer to wait for I/O completion. Rather than
+	 * remove the buffer from the wait list and release the reference, we
+	 * want to return with the buffer queued to the original list. The
+	 * buffer already sits on the original list with a wait list reference,
+	 * however. If we let the queue inherit that wait list reference, all we
+	 * need to do is reset the DELWRI_Q flag.
+	 */
+	xfs_buf_lock(bp);
+	error = bp->b_error;
+	bp->b_flags |= _XBF_DELWRI_Q;
+	xfs_buf_unlock(bp);
+
+	return error;
+}
+
 int __init
 xfs_buf_init(void)
 {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 1508121f29f29..20721261dae5e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -332,6 +332,7 @@ extern void xfs_buf_delwri_cancel(struct list_head *);
 extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
 extern int xfs_buf_delwri_submit(struct list_head *);
 extern int xfs_buf_delwri_submit_nowait(struct list_head *);
+extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
 
 /* Buffer Daemon Setup Routines */
 extern int xfs_buf_init(void);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5fe6e70b88efe..6ce948c436d5f 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1247,6 +1247,7 @@ xfs_qm_flush_one(
 	struct xfs_dquot	*dqp,
 	void			*data)
 {
+	struct xfs_mount	*mp = dqp->q_mount;
 	struct list_head	*buffer_list = data;
 	struct xfs_buf		*bp = NULL;
 	int			error = 0;
@@ -1257,7 +1258,32 @@ xfs_qm_flush_one(
 	if (!XFS_DQ_IS_DIRTY(dqp))
 		goto out_unlock;
 
-	xfs_dqflock(dqp);
+	/*
+	 * The only way the dquot is already flush locked by the time quotacheck
+	 * gets here is if reclaim flushed it before the dqadjust walk dirtied
+	 * it for the final time. Quotacheck collects all dquot bufs in the
+	 * local delwri queue before dquots are dirtied, so reclaim can't have
+	 * possibly queued it for I/O. The only way out is to push the buffer to
+	 * cycle the flush lock.
+	 */
+	if (!xfs_dqflock_nowait(dqp)) {
+		/* buf is pinned in-core by delwri list */
+		DEFINE_SINGLE_BUF_MAP(map, dqp->q_blkno,
+				      mp->m_quotainfo->qi_dqchunklen);
+		bp = _xfs_buf_find(mp->m_ddev_targp, &map, 1, 0, NULL);
+		if (!bp) {
+			error = -EINVAL;
+			goto out_unlock;
+		}
+		xfs_buf_unlock(bp);
+
+		xfs_buf_delwri_pushbuf(bp, buffer_list);
+		xfs_buf_rele(bp);
+
+		error = -EAGAIN;
+		goto out_unlock;
+	}
+
 	error = xfs_qm_dqflush(dqp, &bp);
 	if (error)
 		goto out_unlock;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7c5a16528d8bb..306d89c552677 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -367,6 +367,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
 DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
 DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
 DEFINE_BUF_EVENT(xfs_buf_delwri_split);
+DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
 DEFINE_BUF_EVENT(xfs_buf_get_uncached);
 DEFINE_BUF_EVENT(xfs_buf_item_relse);
 DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
-- 
GitLab


From d205a7d0ec47d11977882b5e910ad35f7be912b4 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 14 Jun 2017 21:23:05 -0700
Subject: [PATCH 0258/1958] xfs: refactor dir2 leaf readahead shadow buffer
 cleverness

Currently, the dir2 leaf block getdents function uses a complex state
tracking mechanism to create a shadow copy of the block mappings and
then uses the shadow copy to schedule readahead.  Since the read and
readahead functions are perfectly capable of reading the mappings
themselves, we can tear all that out in favor of a simpler function that
simply keeps pushing the readahead window further out.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_dir2_readdir.c | 318 ++++++++++----------------------------
 1 file changed, 84 insertions(+), 234 deletions(-)

diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 20b7a5c6eb2f6..1bc7401ea1b25 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -243,214 +243,100 @@ xfs_dir2_block_getdents(
 	return 0;
 }
 
-struct xfs_dir2_leaf_map_info {
-	xfs_extlen_t	map_blocks;	/* number of fsbs in map */
-	xfs_dablk_t	map_off;	/* last mapped file offset */
-	int		map_size;	/* total entries in *map */
-	int		map_valid;	/* valid entries in *map */
-	int		nmap;		/* mappings to ask xfs_bmapi */
-	xfs_dir2_db_t	curdb;		/* db for current block */
-	int		ra_current;	/* number of read-ahead blks */
-	int		ra_index;	/* *map index for read-ahead */
-	int		ra_offset;	/* map entry offset for ra */
-	int		ra_want;	/* readahead count wanted */
-	struct xfs_bmbt_irec map[];	/* map vector for blocks */
-};
-
+/*
+ * Read a directory block and initiate readahead for blocks beyond that.
+ * We maintain a sliding readahead window of the remaining space in the
+ * buffer rounded up to the nearest block.
+ */
 STATIC int
 xfs_dir2_leaf_readbuf(
 	struct xfs_da_args	*args,
 	size_t			bufsize,
-	struct xfs_dir2_leaf_map_info *mip,
-	xfs_dir2_off_t		*curoff,
-	struct xfs_buf		**bpp,
-	bool			trim_map)
+	xfs_dir2_off_t		*cur_off,
+	xfs_dablk_t		*ra_blk,
+	struct xfs_buf		**bpp)
 {
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_buf		*bp = NULL;
-	struct xfs_bmbt_irec	*map = mip->map;
+	struct xfs_da_geometry	*geo = args->geo;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
+	struct xfs_bmbt_irec	map;
 	struct blk_plug		plug;
+	xfs_dir2_off_t		new_off;
+	xfs_dablk_t		next_ra;
+	xfs_dablk_t		map_off;
+	xfs_dablk_t		last_da;
+	xfs_extnum_t		idx;
+	int			ra_want;
 	int			error = 0;
-	int			length;
-	int			i;
-	int			j;
-	struct xfs_da_geometry	*geo = args->geo;
-
-	/*
-	 * If the caller just finished processing a buffer, it will tell us
-	 * we need to trim that block out of the mapping now it is done.
-	 */
-	if (trim_map) {
-		mip->map_blocks -= geo->fsbcount;
-		/*
-		 * Loop to get rid of the extents for the
-		 * directory block.
-		 */
-		for (i = geo->fsbcount; i > 0; ) {
-			j = min_t(int, map->br_blockcount, i);
-			map->br_blockcount -= j;
-			map->br_startblock += j;
-			map->br_startoff += j;
-			/*
-			 * If mapping is done, pitch it from
-			 * the table.
-			 */
-			if (!map->br_blockcount && --mip->map_valid)
-				memmove(&map[0], &map[1],
-					sizeof(map[0]) * mip->map_valid);
-			i -= j;
-		}
-	}
 
-	/*
-	 * Recalculate the readahead blocks wanted.
-	 */
-	mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1;
-	ASSERT(mip->ra_want >= 0);
-
-	/*
-	 * If we don't have as many as we want, and we haven't
-	 * run out of data blocks, get some more mappings.
-	 */
-	if (1 + mip->ra_want > mip->map_blocks &&
-	    mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) {
-		/*
-		 * Get more bmaps, fill in after the ones
-		 * we already have in the table.
-		 */
-		mip->nmap = mip->map_size - mip->map_valid;
-		error = xfs_bmapi_read(dp, mip->map_off,
-				xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) -
-								mip->map_off,
-				&map[mip->map_valid], &mip->nmap, 0);
-
-		/*
-		 * Don't know if we should ignore this or try to return an
-		 * error.  The trouble with returning errors is that readdir
-		 * will just stop without actually passing the error through.
-		 */
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(args->trans, dp, XFS_DATA_FORK);
 		if (error)
-			goto out;	/* XXX */
-
-		/*
-		 * If we got all the mappings we asked for, set the final map
-		 * offset based on the last bmap value received.  Otherwise,
-		 * we've reached the end.
-		 */
-		if (mip->nmap == mip->map_size - mip->map_valid) {
-			i = mip->map_valid + mip->nmap - 1;
-			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
-		} else
-			mip->map_off = xfs_dir2_byte_to_da(geo,
-							XFS_DIR2_LEAF_OFFSET);
-
-		/*
-		 * Look for holes in the mapping, and eliminate them.  Count up
-		 * the valid blocks.
-		 */
-		for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
-			if (map[i].br_startblock == HOLESTARTBLOCK) {
-				mip->nmap--;
-				length = mip->map_valid + mip->nmap - i;
-				if (length)
-					memmove(&map[i], &map[i + 1],
-						sizeof(map[i]) * length);
-			} else {
-				mip->map_blocks += map[i].br_blockcount;
-				i++;
-			}
-		}
-		mip->map_valid += mip->nmap;
+			goto out;
 	}
 
 	/*
-	 * No valid mappings, so no more data blocks.
+	 * Look for mapped directory blocks at or above the current offset.
+	 * Truncate down to the nearest directory block to start the scanning
+	 * operation.
 	 */
-	if (!mip->map_valid) {
-		*curoff = xfs_dir2_da_to_byte(geo, mip->map_off);
+	last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
+	map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off));
+	if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map))
 		goto out;
-	}
+	if (map.br_startoff >= last_da)
+		goto out;
+	xfs_trim_extent(&map, map_off, last_da - map_off);
 
-	/*
-	 * Read the directory block starting at the first mapping.
-	 */
-	mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff);
-	error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
-			map->br_blockcount >= geo->fsbcount ?
-			    XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) :
-			    -1, &bp);
-	/*
-	 * Should just skip over the data block instead of giving up.
-	 */
+	/* Read the directory block of that first mapping. */
+	new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
+	if (new_off > *cur_off)
+		*cur_off = new_off;
+	error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, -1, &bp);
 	if (error)
-		goto out;	/* XXX */
-
-	/*
-	 * Adjust the current amount of read-ahead: we just read a block that
-	 * was previously ra.
-	 */
-	if (mip->ra_current)
-		mip->ra_current -= geo->fsbcount;
+		goto out;
 
 	/*
-	 * Do we need more readahead?
-	 * Each loop tries to process 1 full dir blk; last may be partial.
+	 * Start readahead for the next bufsize's worth of dir data blocks.
+	 * We may have already issued readahead for some of that range;
+	 * ra_blk tracks the last block we tried to read(ahead).
 	 */
+	ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
+	if (*ra_blk >= last_da)
+		goto out;
+	else if (*ra_blk == 0)
+		*ra_blk = map.br_startoff;
+	next_ra = map.br_startoff + geo->fsbcount;
+	if (next_ra >= last_da)
+		goto out_no_ra;
+	if (map.br_blockcount < geo->fsbcount &&
+	    !xfs_iext_get_extent(ifp, ++idx, &map))
+		goto out_no_ra;
+	if (map.br_startoff >= last_da)
+		goto out_no_ra;
+	xfs_trim_extent(&map, next_ra, last_da - next_ra);
+
+	/* Start ra for each dir (not fs) block that has a mapping. */
 	blk_start_plug(&plug);
-	for (mip->ra_index = mip->ra_offset = i = 0;
-	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
-	     i += geo->fsbcount) {
-		ASSERT(mip->ra_index < mip->map_valid);
-		/*
-		 * Read-ahead a contiguous directory block.
-		 */
-		if (i > mip->ra_current &&
-		    (map[mip->ra_index].br_blockcount - mip->ra_offset) >=
-		    geo->fsbcount) {
-			xfs_dir3_data_readahead(dp,
-				map[mip->ra_index].br_startoff + mip->ra_offset,
-				XFS_FSB_TO_DADDR(dp->i_mount,
-					map[mip->ra_index].br_startblock +
-							mip->ra_offset));
-			mip->ra_current = i;
-		}
-
-		/*
-		 * Read-ahead a non-contiguous directory block.  This doesn't
-		 * use our mapping, but this is a very rare case.
-		 */
-		else if (i > mip->ra_current) {
-			xfs_dir3_data_readahead(dp,
-					map[mip->ra_index].br_startoff +
-							mip->ra_offset, -1);
-			mip->ra_current = i;
-		}
-
-		/*
-		 * Advance offset through the mapping table, processing a full
-		 * dir block even if it is fragmented into several extents.
-		 * But stop if we have consumed all valid mappings, even if
-		 * it's not yet a full directory block.
-		 */
-		for (j = 0;
-		     j < geo->fsbcount && mip->ra_index < mip->map_valid;
-		     j += length ) {
-			/*
-			 * The rest of this extent but not more than a dir
-			 * block.
-			 */
-			length = min_t(int, geo->fsbcount - j,
-					map[mip->ra_index].br_blockcount -
-							mip->ra_offset);
-			mip->ra_offset += length;
-
-			/*
-			 * Advance to the next mapping if this one is used up.
-			 */
-			if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
-				mip->ra_offset = 0;
-				mip->ra_index++;
+	while (ra_want > 0) {
+		next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount);
+		while (ra_want > 0 &&
+		       next_ra < map.br_startoff + map.br_blockcount) {
+			if (next_ra >= last_da) {
+				*ra_blk = last_da;
+				break;
+			}
+			if (next_ra > *ra_blk) {
+				xfs_dir3_data_readahead(dp, next_ra, -2);
+				*ra_blk = next_ra;
 			}
+			ra_want -= geo->fsbcount;
+			next_ra += geo->fsbcount;
+		}
+		if (!xfs_iext_get_extent(ifp, ++idx, &map)) {
+			*ra_blk = last_da;
+			break;
 		}
 	}
 	blk_finish_plug(&plug);
@@ -458,6 +344,9 @@ xfs_dir2_leaf_readbuf(
 out:
 	*bpp = bp;
 	return error;
+out_no_ra:
+	*ra_blk = last_da;
+	goto out;
 }
 
 /*
@@ -475,14 +364,14 @@ xfs_dir2_leaf_getdents(
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
-	int			error = 0;	/* error return value */
-	int			length;		/* temporary length value */
-	int			byteoff;	/* offset in current block */
-	xfs_dir2_off_t		curoff;		/* current overall offset */
-	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
 	char			*ptr = NULL;	/* pointer to current data */
-	struct xfs_dir2_leaf_map_info *map_info;
 	struct xfs_da_geometry	*geo = args->geo;
+	xfs_dablk_t		rablk = 0;	/* current readahead block */
+	xfs_dir2_off_t		curoff;		/* current overall offset */
+	int			length;		/* temporary length value */
+	int			byteoff;	/* offset in current block */
+	int			lock_mode;
+	int			error = 0;	/* error return value */
 
 	/*
 	 * If the offset is at or past the largest allowed value,
@@ -491,30 +380,12 @@ xfs_dir2_leaf_getdents(
 	if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)
 		return 0;
 
-	/*
-	 * Set up to bmap a number of blocks based on the caller's
-	 * buffer size, the directory block size, and the filesystem
-	 * block size.
-	 */
-	length = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
-	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
-				(length * sizeof(struct xfs_bmbt_irec)),
-			       KM_SLEEP | KM_NOFS);
-	map_info->map_size = length;
-
 	/*
 	 * Inside the loop we keep the main offset value as a byte offset
 	 * in the directory file.
 	 */
 	curoff = xfs_dir2_dataptr_to_byte(ctx->pos);
 
-	/*
-	 * Force this conversion through db so we truncate the offset
-	 * down to get the start of the data block.
-	 */
-	map_info->map_off = xfs_dir2_db_to_da(geo,
-					      xfs_dir2_byte_to_db(geo, curoff));
-
 	/*
 	 * Loop over directory entries until we reach the end offset.
 	 * Get more blocks and readahead as necessary.
@@ -527,38 +398,18 @@ xfs_dir2_leaf_getdents(
 		 * current buffer, need to get another one.
 		 */
 		if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
-			int	lock_mode;
-			bool	trim_map = false;
-
 			if (bp) {
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(args->trans, bp);
 				bp = NULL;
-				trim_map = true;
 			}
 
 			lock_mode = xfs_ilock_data_map_shared(dp);
-			error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
-						      &curoff, &bp, trim_map);
+			error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
+					&rablk, &bp);
 			xfs_iunlock(dp, lock_mode);
-			if (error || !map_info->map_valid)
+			if (error || !bp)
 				break;
 
-			/*
-			 * Having done a read, we need to set a new offset.
-			 */
-			newoff = xfs_dir2_db_off_to_byte(geo,
-							 map_info->curdb, 0);
-			/*
-			 * Start of the current block.
-			 */
-			if (curoff < newoff)
-				curoff = newoff;
-			/*
-			 * Make sure we're in the right block.
-			 */
-			else if (curoff > newoff)
-				ASSERT(xfs_dir2_byte_to_db(geo, curoff) ==
-				       map_info->curdb);
 			hdr = bp->b_addr;
 			xfs_dir3_data_check(dp, bp);
 			/*
@@ -643,7 +494,6 @@ xfs_dir2_leaf_getdents(
 		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
 	else
 		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
-	kmem_free(map_info);
 	if (bp)
 		xfs_trans_brelse(NULL, bp);
 	return error;
-- 
GitLab


From e1a4e37cc7b665b6804fba812aca2f4d7402c249 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 14 Jun 2017 21:25:57 -0700
Subject: [PATCH 0259/1958] xfs: try to avoid blowing out the transaction
 reservation when bunmaping a shared extent

In a pathological scenario where we are trying to bunmapi a single
extent in which every other block is shared, it's possible that trying
to unmap the entire large extent in a single transaction can generate so
many EFIs that we overflow the transaction reservation.

Therefore, use a heuristic to guess at the number of blocks we can
safely unmap from a reflink file's data fork in an single transaction.
This should prevent problems such as the log head slamming into the tail
and ASSERTs that trigger because we've exceeded the transaction
reservation.

Note that since bunmapi can fail to unmap the entire range, we must also
teach the deferred unmap code to roll into a new transaction whenever we
get low on reservation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
[hch: random edits, all bugs are my fault]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c     | 38 ++++++++++++++++++++++++++++--------
 fs/xfs/libxfs/xfs_bmap.h     |  2 +-
 fs/xfs/libxfs/xfs_refcount.c | 10 +---------
 fs/xfs/libxfs/xfs_refcount.h | 16 +++++++++++++++
 fs/xfs/xfs_bmap_item.c       | 17 ++++++++++++++--
 fs/xfs/xfs_trans.h           |  2 +-
 fs/xfs/xfs_trans_bmap.c      | 11 +++++++++--
 7 files changed, 73 insertions(+), 23 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a7048eafa8e6d..19480ed231a48 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5434,6 +5434,7 @@ __xfs_bunmapi(
 	int			whichfork;	/* data or attribute fork */
 	xfs_fsblock_t		sum;
 	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
+	xfs_fileoff_t		max_len;
 
 	trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
 
@@ -5455,6 +5456,16 @@ __xfs_bunmapi(
 	ASSERT(len > 0);
 	ASSERT(nexts >= 0);
 
+	/*
+	 * Guesstimate how many blocks we can unmap without running the risk of
+	 * blowing out the transaction with a mix of EFIs and reflink
+	 * adjustments.
+	 */
+	if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
+	else
+		max_len = len;
+
 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
 	    (error = xfs_iread_extents(tp, ip, whichfork)))
 		return error;
@@ -5499,7 +5510,7 @@ __xfs_bunmapi(
 
 	extno = 0;
 	while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
-	       (nexts == 0 || extno < nexts)) {
+	       (nexts == 0 || extno < nexts) && max_len > 0) {
 		/*
 		 * Is the found extent after a hole in which bno lives?
 		 * Just back up to the previous extent, if so.
@@ -5531,6 +5542,15 @@ __xfs_bunmapi(
 		}
 		if (del.br_startoff + del.br_blockcount > bno + 1)
 			del.br_blockcount = bno + 1 - del.br_startoff;
+
+		/* How much can we safely unmap? */
+		if (max_len < del.br_blockcount) {
+			del.br_startoff += del.br_blockcount - max_len;
+			if (!wasdel)
+				del.br_startblock += del.br_blockcount - max_len;
+			del.br_blockcount = max_len;
+		}
+
 		sum = del.br_startblock + del.br_blockcount;
 		if (isrt &&
 		    (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
@@ -5707,6 +5727,7 @@ __xfs_bunmapi(
 		if (!isrt && wasdel)
 			xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
 
+		max_len -= del.br_blockcount;
 		bno = del.br_startoff - 1;
 nodelete:
 		/*
@@ -6472,15 +6493,16 @@ xfs_bmap_finish_one(
 	int				whichfork,
 	xfs_fileoff_t			startoff,
 	xfs_fsblock_t			startblock,
-	xfs_filblks_t			blockcount,
+	xfs_filblks_t			*blockcount,
 	xfs_exntst_t			state)
 {
-	int				error = 0, done;
+	xfs_fsblock_t			firstfsb;
+	int				error = 0;
 
 	trace_xfs_bmap_deferred(tp->t_mountp,
 			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
 			XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
-			ip->i_ino, whichfork, startoff, blockcount, state);
+			ip->i_ino, whichfork, startoff, *blockcount, state);
 
 	if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
 		return -EFSCORRUPTED;
@@ -6492,13 +6514,13 @@ xfs_bmap_finish_one(
 
 	switch (type) {
 	case XFS_BMAP_MAP:
-		error = xfs_bmapi_remap(tp, ip, startoff, blockcount,
+		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
 				startblock, dfops);
+		*blockcount = 0;
 		break;
 	case XFS_BMAP_UNMAP:
-		error = xfs_bunmapi(tp, ip, startoff, blockcount,
-				XFS_BMAPI_REMAP, 1, &startblock, dfops, &done);
-		ASSERT(done);
+		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
+				XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
 		break;
 	default:
 		ASSERT(0);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index c35a14fa15272..851982a5dfbc5 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -271,7 +271,7 @@ struct xfs_bmap_intent {
 int	xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops,
 		struct xfs_inode *ip, enum xfs_bmap_intent_type type,
 		int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
-		xfs_filblks_t blockcount, xfs_exntst_t state);
+		xfs_filblks_t *blockcount, xfs_exntst_t state);
 int	xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
 int	xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 82a38d86ebad8..e17016163542d 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -784,14 +784,6 @@ xfs_refcount_merge_extents(
 }
 
 /*
- * While we're adjusting the refcounts records of an extent, we have
- * to keep an eye on the number of extents we're dirtying -- run too
- * many in a single transaction and we'll exceed the transaction's
- * reservation and crash the fs.  Each record adds 12 bytes to the
- * log (plus any key updates) so we'll conservatively assume 24 bytes
- * per record.  We must also leave space for btree splits on both ends
- * of the range and space for the CUD and a new CUI.
- *
  * XXX: This is a pretty hand-wavy estimate.  The penalty for guessing
  * true incorrectly is a shutdown FS; the penalty for guessing false
  * incorrectly is more transaction rolls than might be necessary.
@@ -822,7 +814,7 @@ xfs_refcount_still_have_space(
 	else if (overhead > cur->bc_tp->t_log_res)
 		return false;
 	return  cur->bc_tp->t_log_res - overhead >
-		cur->bc_private.a.priv.refc.nr_ops * 32;
+		cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 098dc668ab2c0..eafb9d1f3b374 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -67,4 +67,20 @@ extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp,
 extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
 		xfs_agnumber_t agno);
 
+/*
+ * While we're adjusting the refcounts records of an extent, we have
+ * to keep an eye on the number of extents we're dirtying -- run too
+ * many in a single transaction and we'll exceed the transaction's
+ * reservation and crash the fs.  Each record adds 12 bytes to the
+ * log (plus any key updates) so we'll conservatively assume 32 bytes
+ * per record.  We must also leave space for btree splits on both ends
+ * of the range and space for the CUD and a new CUI.
+ */
+#define XFS_REFCOUNT_ITEM_OVERHEAD	32
+
+static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
+{
+	return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
+}
+
 #endif	/* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index d419d23fa2149..88073910fa5d4 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -396,6 +396,7 @@ xfs_bui_recover(
 	struct xfs_map_extent		*bmap;
 	xfs_fsblock_t			startblock_fsb;
 	xfs_fsblock_t			inode_fsb;
+	xfs_filblks_t			count;
 	bool				op_ok;
 	struct xfs_bud_log_item		*budp;
 	enum xfs_bmap_intent_type	type;
@@ -404,6 +405,7 @@ xfs_bui_recover(
 	struct xfs_trans		*tp;
 	struct xfs_inode		*ip = NULL;
 	struct xfs_defer_ops		dfops;
+	struct xfs_bmbt_irec		irec;
 	xfs_fsblock_t			firstfsb;
 
 	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
@@ -481,13 +483,24 @@ xfs_bui_recover(
 	}
 	xfs_trans_ijoin(tp, ip, 0);
 
+	count = bmap->me_len;
 	error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
 			ip, whichfork, bmap->me_startoff,
-			bmap->me_startblock, bmap->me_len,
-			state);
+			bmap->me_startblock, &count, state);
 	if (error)
 		goto err_dfops;
 
+	if (count > 0) {
+		ASSERT(type == XFS_BMAP_UNMAP);
+		irec.br_startblock = bmap->me_startblock;
+		irec.br_blockcount = count;
+		irec.br_startoff = bmap->me_startoff;
+		irec.br_state = state;
+		error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
+		if (error)
+			goto err_dfops;
+	}
+
 	/* Finish transaction, free inodes. */
 	error = xfs_defer_finish(&tp, &dfops, NULL);
 	if (error)
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a07acbf0bd8a4..08923e59ea9dc 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -275,6 +275,6 @@ int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
 		struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops,
 		enum xfs_bmap_intent_type type, struct xfs_inode *ip,
 		int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
-		xfs_filblks_t blockcount, xfs_exntst_t state);
+		xfs_filblks_t *blockcount, xfs_exntst_t state);
 
 #endif	/* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
index 6408e7d7c08c8..14543d93cd4b6 100644
--- a/fs/xfs/xfs_trans_bmap.c
+++ b/fs/xfs/xfs_trans_bmap.c
@@ -63,7 +63,7 @@ xfs_trans_log_finish_bmap_update(
 	int				whichfork,
 	xfs_fileoff_t			startoff,
 	xfs_fsblock_t			startblock,
-	xfs_filblks_t			blockcount,
+	xfs_filblks_t			*blockcount,
 	xfs_exntst_t			state)
 {
 	int				error;
@@ -196,16 +196,23 @@ xfs_bmap_update_finish_item(
 	void				**state)
 {
 	struct xfs_bmap_intent		*bmap;
+	xfs_filblks_t			count;
 	int				error;
 
 	bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+	count = bmap->bi_bmap.br_blockcount;
 	error = xfs_trans_log_finish_bmap_update(tp, done_item, dop,
 			bmap->bi_type,
 			bmap->bi_owner, bmap->bi_whichfork,
 			bmap->bi_bmap.br_startoff,
 			bmap->bi_bmap.br_startblock,
-			bmap->bi_bmap.br_blockcount,
+			&count,
 			bmap->bi_bmap.br_state);
+	if (!error && count > 0) {
+		ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+		bmap->bi_bmap.br_blockcount = count;
+		return -EAGAIN;
+	}
 	kmem_free(bmap);
 	return error;
 }
-- 
GitLab


From ccdab3d6e881649f05110d9098fd248d2753aaf3 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Jun 2017 21:29:12 -0700
Subject: [PATCH 0260/1958] xfs: define bug_on_assert debug mode sysfs tunable

In DEBUG mode, assert failures unconditionally trigger a kernel BUG.
This is useful in diagnostic situations to panic a system and
collect detailed state information at the time of a failure.

This can also cause problems in cases where DEBUG mode code is
desired but it is preferable not trigger kernel BUGs on assert
failure. For example, during development of new code or during
certain xfstests tests that intentionally cause corruption and test
the kernel for survival (but otherwise may expect to trigger assert
failures).

To provide additional flexibility, create the
<sysfs>/fs/xfs/debug/bug_on_assert tunable to configure assert
failure behavior at runtime. This tunable is only available in DEBUG
mode and is enabled by default to preserve existing default
behavior. When disabled, assert failures in DEBUG mode result in
kernel warnings.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_globals.c |  2 ++
 fs/xfs/xfs_message.c |  5 ++++-
 fs/xfs/xfs_sysctl.h  |  1 +
 fs/xfs/xfs_sysfs.c   | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 687a4b01fc53e..673adf0a40ccf 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -47,4 +47,6 @@ xfs_param_t xfs_params = {
 
 struct xfs_globals xfs_globals = {
 	.log_recovery_delay	=	0,	/* no delay by default */
+	.bug_on_assert		=	true,	/* historical default in DEBUG
+						 * mode */
 };
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 11792d888e4e7..e68bd1050eab5 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -110,7 +110,10 @@ assfail(char *expr, char *file, int line)
 {
 	xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
 		expr, file, line);
-	BUG();
+	if (xfs_globals.bug_on_assert)
+		BUG();
+	else
+		WARN_ON(1);
 }
 
 void
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index 984a3499cfe38..82afee005140a 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -95,6 +95,7 @@ extern xfs_param_t	xfs_params;
 
 struct xfs_globals {
 	int	log_recovery_delay;	/* log recovery delay (secs) */
+	bool	bug_on_assert;		/* BUG() the kernel on assert failure */
 };
 extern struct xfs_globals	xfs_globals;
 
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 80ac15fb96381..ec6e0e2f95d66 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -145,6 +145,38 @@ struct kobj_type xfs_mp_ktype = {
 #ifdef DEBUG
 /* debug */
 
+STATIC ssize_t
+bug_on_assert_store(
+	struct kobject		*kobject,
+	const char		*buf,
+	size_t			count)
+{
+	int			ret;
+	int			val;
+
+	ret = kstrtoint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val == 1)
+		xfs_globals.bug_on_assert = true;
+	else if (val == 0)
+		xfs_globals.bug_on_assert = false;
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+STATIC ssize_t
+bug_on_assert_show(
+	struct kobject		*kobject,
+	char			*buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0);
+}
+XFS_SYSFS_ATTR_RW(bug_on_assert);
+
 STATIC ssize_t
 log_recovery_delay_store(
 	struct kobject	*kobject,
@@ -176,6 +208,7 @@ log_recovery_delay_show(
 XFS_SYSFS_ATTR_RW(log_recovery_delay);
 
 static struct attribute *xfs_dbg_attrs[] = {
+	ATTR_LIST(bug_on_assert),
 	ATTR_LIST(log_recovery_delay),
 	NULL,
 };
-- 
GitLab


From 1040960efaabb8e6c87633c7becbb51fc99d4b9b Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Jun 2017 21:29:13 -0700
Subject: [PATCH 0261/1958] xfs: define fatal assert build time tunable

While configurable at runtime, the DEBUG mode assert failure
behavior is usually either desired or not for a particular
situation. For example, developers using kernel modules may prefer
for fatal asserts to remain disabled across module reloads while QE
engineers doing broad regression testing may prefer to have fatal
asserts enabled on boot to facilitate data collection for bug
reports.

To provide a compromise/convenience for developers, create a Kconfig
option that sets the default value of the DEBUG mode 'bug_on_assert'
sysfs tunable. The default behavior remains to trigger kernel BUGs
on assert failures to preserve existing behavior across kernel
configuration updates with DEBUG mode enabled.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/Kconfig       | 13 +++++++++++++
 fs/xfs/xfs.h         |  4 ++++
 fs/xfs/xfs_globals.c |  7 +++++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 35faf128f36d8..1b98cfa342ab3 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -96,3 +96,16 @@ config XFS_DEBUG
 	  not useful unless you are debugging a particular problem.
 
 	  Say N unless you are an XFS developer, or you play one on TV.
+
+config XFS_ASSERT_FATAL
+	bool "XFS fatal asserts"
+	default y
+	depends on XFS_FS && XFS_DEBUG
+	help
+	  Set the default DEBUG mode ASSERT failure behavior.
+
+	  Say Y here to cause DEBUG mode ASSERT failures to result in fatal
+	  errors that BUG() the kernel by default. If you say N, ASSERT failures
+	  result in warnings.
+
+	  This behavior can be modified at runtime via sysfs.
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index a742c47f7d5a0..80cd0fd867832 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -24,6 +24,10 @@
 #define XFS_BUF_LOCK_TRACKING 1
 #endif
 
+#ifdef CONFIG_XFS_ASSERT_FATAL
+#define XFS_ASSERT_FATAL 1
+#endif
+
 #ifdef CONFIG_XFS_WARN
 #define XFS_WARN 1
 #endif
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 673adf0a40ccf..3e1cc3001bcbf 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -47,6 +47,9 @@ xfs_param_t xfs_params = {
 
 struct xfs_globals xfs_globals = {
 	.log_recovery_delay	=	0,	/* no delay by default */
-	.bug_on_assert		=	true,	/* historical default in DEBUG
-						 * mode */
+#ifdef XFS_ASSERT_FATAL
+	.bug_on_assert		=	true,	/* assert failures BUG() */
+#else
+	.bug_on_assert		=	false,	/* assert failures WARN() */
+#endif
 };
-- 
GitLab


From 7d2d5653460443e0586bd7d2e07811b4f4095e36 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Jun 2017 21:29:48 -0700
Subject: [PATCH 0262/1958] xfs: separate shutdown from ticket reservation
 print helper

xlog_print_tic_res() pre-dates delayed logging and the committed
items list (CIL) and thus retains some factoring warts, such as hard
coded function names in the output and the fact that it induces a
shutdown.

In preparation for more detailed logging of regular transaction
overrun situations, refactor xlog_print_tic_res() to be slightly
more generic. Reword some of the warning messages and pull the
shutdown into the callers.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log.c     | 12 ++++++------
 fs/xfs/xfs_log_cil.c |  4 +++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3731f13f63e98..8b283f7cefeaf 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2024,7 +2024,7 @@ xlog_print_tic_res(
 	};
 #undef REG_TYPE_STR
 
-	xfs_warn(mp, "xlog_write: reservation summary:");
+	xfs_warn(mp, "ticket reservation summary:");
 	xfs_warn(mp, "  unit res    = %d bytes",
 		 ticket->t_unit_res);
 	xfs_warn(mp, "  current res = %d bytes",
@@ -2045,10 +2045,6 @@ xlog_print_tic_res(
 			    "bad-rtype" : res_type_str[r_type]),
 			    ticket->t_res_arr[i].r_len);
 	}
-
-	xfs_alert_tag(mp, XFS_PTAG_LOGRES,
-		"xlog_write: reservation ran out. Need to up reservation");
-	xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
 }
 
 /*
@@ -2321,8 +2317,12 @@ xlog_write(
 	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
 		ticket->t_curr_res -= sizeof(xlog_op_header_t);
 
-	if (ticket->t_curr_res < 0)
+	if (ticket->t_curr_res < 0) {
+		xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
+		     "ctx ticket reservation ran out. Need to up reservation");
 		xlog_print_tic_res(log->l_mp, ticket);
+		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+	}
 
 	index = 0;
 	lv = log_vector;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 82f1cbcc4de15..52fe04229aa66 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -987,8 +987,10 @@ xfs_log_commit_cil(
 	xlog_cil_insert_items(log, tp);
 
 	/* check we didn't blow the reservation */
-	if (tp->t_ticket->t_curr_res < 0)
+	if (tp->t_ticket->t_curr_res < 0) {
 		xlog_print_tic_res(mp, tp->t_ticket);
+		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+	}
 
 	tp->t_commit_lsn = cil->xc_ctx->sequence;
 	if (commit_lsn)
-- 
GitLab


From e2f2342639a414b60de3876a8b437eac2b795dbe Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Jun 2017 21:29:49 -0700
Subject: [PATCH 0263/1958] xfs: refactor xlog_cil_insert_items() to facilitate
 transaction dump

Transaction reservation overrun detection currently occurs too late
to print useful information about the offending transaction.
Ideally, the transaction data is printed before the associated log
items are moved from the transaction to the CIL, which occurs in
xlog_cil_insert_items(), such that details of the items logged by
the transaction are available for analysis.

Refactor xlog_cil_insert_items() to facilitate moving tx overrun
detection to this function. Update the function to track each bit of
extra log reservation stolen from the transaction (i.e., such as for
the CIL context ticket) and perform the log item migration as the
last operation before the CIL lock is released. This creates a
context where the transaction reservation consumption has been fully
calculated when the log items are moved to the CIL. This patch makes
no functional changes.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_cil.c | 62 +++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 30 deletions(-)

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 52fe04229aa66..8de3baa7d3f0f 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -410,6 +410,7 @@ xlog_cil_insert_items(
 	int			len = 0;
 	int			diff_iovecs = 0;
 	int			iclog_space;
+	int			iovhdr_res = 0, split_res = 0, ctx_res = 0;
 
 	ASSERT(tp);
 
@@ -419,30 +420,11 @@ xlog_cil_insert_items(
 	 */
 	xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
 
-	/*
-	 * Now (re-)position everything modified at the tail of the CIL.
-	 * We do this here so we only need to take the CIL lock once during
-	 * the transaction commit.
-	 */
 	spin_lock(&cil->xc_cil_lock);
-	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-		struct xfs_log_item	*lip = lidp->lid_item;
-
-		/* Skip items which aren't dirty in this transaction. */
-		if (!(lidp->lid_flags & XFS_LID_DIRTY))
-			continue;
-
-		/*
-		 * Only move the item if it isn't already at the tail. This is
-		 * to prevent a transient list_empty() state when reinserting
-		 * an item that is already the only item in the CIL.
-		 */
-		if (!list_is_last(&lip->li_cil, &cil->xc_cil))
-			list_move_tail(&lip->li_cil, &cil->xc_cil);
-	}
 
 	/* account for space used by new iovec headers  */
-	len += diff_iovecs * sizeof(xlog_op_header_t);
+	iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
+	len += iovhdr_res;
 	ctx->nvecs += diff_iovecs;
 
 	/* attach the transaction to the CIL if it has any busy extents */
@@ -457,27 +439,47 @@ xlog_cil_insert_items(
 	 * during the transaction commit.
 	 */
 	if (ctx->ticket->t_curr_res == 0) {
-		ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
-		tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
+		ctx_res = ctx->ticket->t_unit_res;
+		ctx->ticket->t_curr_res = ctx_res;
+		tp->t_ticket->t_curr_res -= ctx_res;
 	}
 
 	/* do we need space for more log record headers? */
 	iclog_space = log->l_iclog_size - log->l_iclog_hsize;
 	if (len > 0 && (ctx->space_used / iclog_space !=
 				(ctx->space_used + len) / iclog_space)) {
-		int hdrs;
-
-		hdrs = (len + iclog_space - 1) / iclog_space;
+		split_res = (len + iclog_space - 1) / iclog_space;
 		/* need to take into account split region headers, too */
-		hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
-		ctx->ticket->t_unit_res += hdrs;
-		ctx->ticket->t_curr_res += hdrs;
-		tp->t_ticket->t_curr_res -= hdrs;
+		split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+		ctx->ticket->t_unit_res += split_res;
+		ctx->ticket->t_curr_res += split_res;
+		tp->t_ticket->t_curr_res -= split_res;
 		ASSERT(tp->t_ticket->t_curr_res >= len);
 	}
 	tp->t_ticket->t_curr_res -= len;
 	ctx->space_used += len;
 
+	/*
+	 * Now (re-)position everything modified at the tail of the CIL.
+	 * We do this here so we only need to take the CIL lock once during
+	 * the transaction commit.
+	 */
+	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+		struct xfs_log_item	*lip = lidp->lid_item;
+
+		/* Skip items which aren't dirty in this transaction. */
+		if (!(lidp->lid_flags & XFS_LID_DIRTY))
+			continue;
+
+		/*
+		 * Only move the item if it isn't already at the tail. This is
+		 * to prevent a transient list_empty() state when reinserting
+		 * an item that is already the only item in the CIL.
+		 */
+		if (!list_is_last(&lip->li_cil, &cil->xc_cil))
+			list_move_tail(&lip->li_cil, &cil->xc_cil);
+	}
+
 	spin_unlock(&cil->xc_cil_lock);
 }
 
-- 
GitLab


From d4ca1d550d052accec85ae26ac5b9d3d8b8f81f1 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Jun 2017 21:29:50 -0700
Subject: [PATCH 0264/1958] xfs: dump transaction usage details on log
 reservation overrun

If a transaction log reservation overrun occurs, the ticket data
associated with the reservation is dumped in xfs_log_commit_cil().
This occurs long after the transaction items and details have been
removed from the transaction and effectively lost. This limited set
of ticket data provides very little information to support debugging
transaction overruns based on the typical report.

To improve transaction log reservation overrun reporting, create a
helper to dump transaction details such as log items, log vector
data, etc., as well as the underlying ticket data for the
transaction. Move the overrun detection from xfs_log_commit_cil() to
xlog_cil_insert_items() so it occurs prior to migration of the
logged items to the CIL. Call the new helper such that it is able to
dump this transaction data before it is lost.

Also, warn on overrun to provide callstack context for the offending
transaction and include a few additional messages from
xlog_cil_insert_items() to display the reservation consumed locally
for overhead such as log vector headers, split region headers and
the context ticket. This provides a complete general breakdown of
the reservation consumption of a transaction when/if it happens to
overrun the reservation.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log.c      | 49 +++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_cil.c  | 24 +++++++++++++++------
 fs/xfs/xfs_log_priv.h |  1 +
 3 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8b283f7cefeaf..c8d0481033479 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2047,6 +2047,55 @@ xlog_print_tic_res(
 	}
 }
 
+/*
+ * Print a summary of the transaction.
+ */
+void
+xlog_print_trans(
+	struct xfs_trans		*tp)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_log_item_desc	*lidp;
+
+	/* dump core transaction and ticket info */
+	xfs_warn(mp, "transaction summary:");
+	xfs_warn(mp, "  flags	= 0x%x", tp->t_flags);
+
+	xlog_print_tic_res(mp, tp->t_ticket);
+
+	/* dump each log item */
+	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+		struct xfs_log_item	*lip = lidp->lid_item;
+		struct xfs_log_vec	*lv = lip->li_lv;
+		struct xfs_log_iovec	*vec;
+		int			i;
+
+		xfs_warn(mp, "log item: ");
+		xfs_warn(mp, "  type	= 0x%x", lip->li_type);
+		xfs_warn(mp, "  flags	= 0x%x", lip->li_flags);
+		if (!lv)
+			continue;
+		xfs_warn(mp, "  niovecs	= %d", lv->lv_niovecs);
+		xfs_warn(mp, "  size	= %d", lv->lv_size);
+		xfs_warn(mp, "  bytes	= %d", lv->lv_bytes);
+		xfs_warn(mp, "  buf len	= %d", lv->lv_buf_len);
+
+		/* dump each iovec for the log item */
+		vec = lv->lv_iovecp;
+		for (i = 0; i < lv->lv_niovecs; i++) {
+			int dumplen = min(vec->i_len, 32);
+
+			xfs_warn(mp, "  iovec[%d]", i);
+			xfs_warn(mp, "    type	= 0x%x", vec->i_type);
+			xfs_warn(mp, "    len	= %d", vec->i_len);
+			xfs_warn(mp, "    first %d bytes of iovec[%d]:", dumplen, i);
+			xfs_hex_dump(vec->i_addr, dumplen);;
+
+			vec++;
+		}
+	}
+}
+
 /*
  * Calculate the potential space needed by the log vector.  Each region gets
  * its own xlog_op_header_t and may need to be double word aligned.
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 8de3baa7d3f0f..04b389210cc72 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -459,6 +459,21 @@ xlog_cil_insert_items(
 	tp->t_ticket->t_curr_res -= len;
 	ctx->space_used += len;
 
+	/*
+	 * If we've overrun the reservation, dump the tx details before we move
+	 * the log items. Shutdown is imminent...
+	 */
+	if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
+		xfs_warn(log->l_mp, "Transaction log reservation overrun:");
+		xfs_warn(log->l_mp,
+			 "  log items: %d bytes (iov hdrs: %d bytes)",
+			 len, iovhdr_res);
+		xfs_warn(log->l_mp, "  split region headers: %d bytes",
+			 split_res);
+		xfs_warn(log->l_mp, "  ctx ticket: %d bytes", ctx_res);
+		xlog_print_trans(tp);
+	}
+
 	/*
 	 * Now (re-)position everything modified at the tail of the CIL.
 	 * We do this here so we only need to take the CIL lock once during
@@ -481,6 +496,9 @@ xlog_cil_insert_items(
 	}
 
 	spin_unlock(&cil->xc_cil_lock);
+
+	if (tp->t_ticket->t_curr_res < 0)
+		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
 }
 
 static void
@@ -988,12 +1006,6 @@ xfs_log_commit_cil(
 
 	xlog_cil_insert_items(log, tp);
 
-	/* check we didn't blow the reservation */
-	if (tp->t_ticket->t_curr_res < 0) {
-		xlog_print_tic_res(mp, tp->t_ticket);
-		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
-	}
-
 	tp->t_commit_lsn = cil->xc_ctx->sequence;
 	if (commit_lsn)
 		*commit_lsn = tp->t_commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c2604a5366f27..62113a5d25044 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -456,6 +456,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
 }
 
 void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
+void	xlog_print_trans(struct xfs_trans *);
 int
 xlog_write(
 	struct xlog		*log,
-- 
GitLab


From 3398a4005f0c8ced67a9071475562d435d88b7a6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 14 Jun 2017 21:30:44 -0700
Subject: [PATCH 0265/1958] xfs: remove XFS_HSIZE

XFS_HSIZE is an extremly confusing way to calculate the size of handle_t.
Given that handle_t always only had two sizes, and one of them isn't
even covered by XFS_HSIZE to start with just remove the macro and use
a constant sizeof expression.

Note that XFS_HSIZE isn't used in xfsprogs, xfsdump or xfstests either.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_fs.h | 4 ----
 fs/xfs/xfs_ioctl.c     | 3 +--
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 095bdf049a3fe..a9aa13e66046f 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -446,10 +446,6 @@ typedef struct xfs_handle {
 } xfs_handle_t;
 #define ha_fsid ha_u._ha_fsid
 
-#define XFS_HSIZE(handle)	(((char *) &(handle).ha_fid.fid_pad	 \
-				 - (char *) &(handle))			  \
-				 + (handle).ha_fid.fid_len)
-
 /*
  * Structure passed to XFS_IOC_SWAPEXT
  */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6190697603c94..f6af76975bc87 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -120,8 +120,7 @@ xfs_find_handle(
 		handle.ha_fid.fid_pad = 0;
 		handle.ha_fid.fid_gen = inode->i_generation;
 		handle.ha_fid.fid_ino = ip->i_ino;
-
-		hsize = XFS_HSIZE(handle);
+		hsize = sizeof(xfs_handle_t);
 	}
 
 	error = -EFAULT;
-- 
GitLab


From f990fc5ad13b8fecdf154f36c252d49d7e9bdfab Mon Sep 17 00:00:00 2001
From: Shan Hai <shan.hai@oracle.com>
Date: Wed, 14 Jun 2017 21:35:07 -0700
Subject: [PATCH 0266/1958] xfs: remove lsn relevant fields from xfs_trans
 structure and its users

The t_lsn is not used anymore and the t_commit_lsn is used as a tmp
storage for the checkpoint sequence number only in the current code.

And the start/commit lsn are tracked as a transaction group tag in
the xfs_cil_ctx instead of a single transaction, so remove them from
the xfs_trans structure and their users to match with the design.

Signed-off-by: Shan Hai <shan.hai@oracle.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_cil.c |  7 ++++---
 fs/xfs/xfs_trace.h   | 19 -------------------
 fs/xfs/xfs_trans.h   |  4 ----
 3 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 04b389210cc72..fbe72b134bef2 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -993,6 +993,7 @@ xfs_log_commit_cil(
 {
 	struct xlog		*log = mp->m_log;
 	struct xfs_cil		*cil = log->l_cilp;
+	xfs_lsn_t		xc_commit_lsn;
 
 	/*
 	 * Do all necessary memory allocation before we lock the CIL.
@@ -1006,9 +1007,9 @@ xfs_log_commit_cil(
 
 	xlog_cil_insert_items(log, tp);
 
-	tp->t_commit_lsn = cil->xc_ctx->sequence;
+	xc_commit_lsn = cil->xc_ctx->sequence;
 	if (commit_lsn)
-		*commit_lsn = tp->t_commit_lsn;
+		*commit_lsn = xc_commit_lsn;
 
 	xfs_log_done(mp, tp->t_ticket, NULL, regrant);
 	xfs_trans_unreserve_and_mod_sb(tp);
@@ -1024,7 +1025,7 @@ xfs_log_commit_cil(
 	 * the log items. This affects (at least) processing of stale buffers,
 	 * inodes and EFIs.
 	 */
-	xfs_trans_free_items(tp, tp->t_commit_lsn, false);
+	xfs_trans_free_items(tp, xc_commit_lsn, false);
 
 	xlog_cil_push_background(log);
 
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 306d89c552677..a560bfd392c10 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1491,25 +1491,6 @@ TRACE_EVENT(xfs_extent_busy_trim,
 		  __entry->tlen)
 );
 
-TRACE_EVENT(xfs_trans_commit_lsn,
-	TP_PROTO(struct xfs_trans *trans),
-	TP_ARGS(trans),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(struct xfs_trans *, tp)
-		__field(xfs_lsn_t, lsn)
-	),
-	TP_fast_assign(
-		__entry->dev = trans->t_mountp->m_super->s_dev;
-		__entry->tp = trans;
-		__entry->lsn = trans->t_commit_lsn;
-	),
-	TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->tp,
-		  __entry->lsn)
-);
-
 TRACE_EVENT(xfs_agf,
 	TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
 		 unsigned long caller_ip),
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 08923e59ea9dc..82bb61e330d91 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -105,10 +105,6 @@ typedef struct xfs_trans {
 	unsigned int		t_rtx_res;	/* # of rt extents resvd */
 	unsigned int		t_rtx_res_used;	/* # of resvd rt extents used */
 	struct xlog_ticket	*t_ticket;	/* log mgr ticket */
-	xfs_lsn_t		t_lsn;		/* log seq num of start of
-						 * transaction. */
-	xfs_lsn_t		t_commit_lsn;	/* log seq num of end of
-						 * transaction. */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
 	unsigned int		t_flags;	/* misc flags */
-- 
GitLab


From 0cbe48cc5814ae26e4058c5d666914c86a4a6b5d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 14 Jun 2017 21:35:34 -0700
Subject: [PATCH 0267/1958] xfs: avoid harmless gcc-7 warnings

gcc-7 flags the use of integer math inside of a condition
as a potential bug:

fs/xfs/xfs_bmap_util.c: In function 'xfs_swap_extents_check_format':
fs/xfs/xfs_bmap_util.c:1619:8: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context]
fs/xfs/xfs_bmap_util.c:1629:8: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context]

There is already a helper function for testing the di_forkoff
field for zero, so let's use that instead to shut up the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 9e3cc2146d5b0..7493a64f5b998 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1617,7 +1617,7 @@ xfs_swap_extents_check_format(
 	 * extent format...
 	 */
 	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
-		if (XFS_IFORK_BOFF(ip) &&
+		if (XFS_IFORK_Q(ip) &&
 		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
 			return -EINVAL;
 		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
@@ -1627,7 +1627,7 @@ xfs_swap_extents_check_format(
 
 	/* Reciprocal target->temp btree format checks */
 	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
-		if (XFS_IFORK_BOFF(tip) &&
+		if (XFS_IFORK_Q(tip) &&
 		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
 			return -EINVAL;
 		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
-- 
GitLab


From 79e641ce29cfae5b8fc55fb77ac62d11d2d849c0 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Jun 2017 21:35:35 -0700
Subject: [PATCH 0268/1958] xfs: release bli from transaction properly on fs
 shutdown

If a filesystem shutdown occurs with a buffer log item in the CIL
and a log force occurs, the ->iop_unpin() handler is generally
expected to tear down the bli properly. This entails freeing the bli
memory and releasing the associated hold on the buffer so it can be
released and the filesystem unmounted.

If this sequence occurs while ->bli_refcount is elevated (i.e.,
another transaction is open and attempting to modify the buffer),
however, ->iop_unpin() may not be responsible for releasing the bli.
Instead, the transaction may release the final ->bli_refcount
reference and thus xfs_trans_brelse() is responsible for tearing
down the bli.

While xfs_trans_brelse() does drop the reference count, it only
attempts to release the bli if it is clean (i.e., not in the
CIL/AIL). If the filesystem is shutdown and the bli is sitting dirty
in the CIL as noted above, this ends up skipping the last
opportunity to release the bli. In turn, this leaves the hold on the
buffer and causes an unmount hang. This can be reproduced by running
generic/388 in repetition.

Update xfs_trans_brelse() to handle this shutdown corner case
correctly. If the final bli reference is dropped and the filesystem
is shutdown, remove the bli from the AIL (if necessary) and release
the bli to drop the buffer hold and ensure an unmount does not hang.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_trans_buf.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 8ee29ca132dc1..86987d823d764 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -356,6 +356,7 @@ xfs_trans_brelse(xfs_trans_t	*tp,
 		 xfs_buf_t	*bp)
 {
 	xfs_buf_log_item_t	*bip;
+	int			freed;
 
 	/*
 	 * Default to a normal brelse() call if the tp is NULL.
@@ -419,16 +420,22 @@ xfs_trans_brelse(xfs_trans_t	*tp,
 	/*
 	 * Drop our reference to the buf log item.
 	 */
-	atomic_dec(&bip->bli_refcount);
+	freed = atomic_dec_and_test(&bip->bli_refcount);
 
 	/*
-	 * If the buf item is not tracking data in the log, then
-	 * we must free it before releasing the buffer back to the
-	 * free pool.  Before releasing the buffer to the free pool,
-	 * clear the transaction pointer in b_fsprivate2 to dissolve
-	 * its relation to this transaction.
+	 * If the buf item is not tracking data in the log, then we must free it
+	 * before releasing the buffer back to the free pool.
+	 *
+	 * If the fs has shutdown and we dropped the last reference, it may fall
+	 * on us to release a (possibly dirty) bli if it never made it to the
+	 * AIL (e.g., the aborted unpin already happened and didn't release it
+	 * due to our reference). Since we're already shutdown and need xa_lock,
+	 * just force remove from the AIL and release the bli here.
 	 */
-	if (!xfs_buf_item_dirty(bip)) {
+	if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
+		xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_buf_item_relse(bp);
+	} else if (!xfs_buf_item_dirty(bip)) {
 /***
 		ASSERT(bp->b_pincount == 0);
 ***/
-- 
GitLab


From 3d4b4a3e30ae7a949c31e1e10268a3da4723d290 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Jun 2017 21:35:35 -0700
Subject: [PATCH 0269/1958] xfs: remove bli from AIL before release on
 transaction abort

When a buffer is modified, logged and committed, it ultimately ends
up sitting on the AIL with a dirty bli waiting for metadata
writeback. If another transaction locks and invalidates the buffer
(freeing an inode chunk, for example) in the meantime, the bli is
flagged as stale, the dirty state is cleared and the bli remains in
the AIL.

If a shutdown occurs before the transaction that has invalidated the
buffer is committed, the transaction is ultimately aborted. The log
items are flagged as such and ->iop_unlock() handles the aborted
items. Because the bli is clean (due to the invalidation),
->iop_unlock() unconditionally releases it. The log item may still
reside in the AIL, however, which means the I/O completion handler
may still run and attempt to access it. This results in assert
failure due to the release of the bli while still present in the AIL
and a subsequent NULL dereference and panic in the buffer I/O
completion handling. This can be reproduced by running generic/388
in repetition.

To avoid this problem, update xfs_buf_item_unlock() to first check
whether the bli is aborted and if so, remove it from the AIL before
it is released. This ensures that the bli is no longer accessed
during the shutdown sequence after it has been freed.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf_item.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 0306168af332c..f6a8422e9562c 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -636,20 +636,23 @@ xfs_buf_item_unlock(
 
 	/*
 	 * Clean buffers, by definition, cannot be in the AIL. However, aborted
-	 * buffers may be dirty and hence in the AIL. Therefore if we are
-	 * aborting a buffer and we've just taken the last refernce away, we
-	 * have to check if it is in the AIL before freeing it. We need to free
-	 * it in this case, because an aborted transaction has already shut the
-	 * filesystem down and this is the last chance we will have to do so.
+	 * buffers may be in the AIL regardless of dirty state. An aborted
+	 * transaction that invalidates a buffer already in the AIL may have
+	 * marked it stale and cleared the dirty state, for example.
+	 *
+	 * Therefore if we are aborting a buffer and we've just taken the last
+	 * reference away, we have to check if it is in the AIL before freeing
+	 * it. We need to free it in this case, because an aborted transaction
+	 * has already shut the filesystem down and this is the last chance we
+	 * will have to do so.
 	 */
 	if (atomic_dec_and_test(&bip->bli_refcount)) {
-		if (clean)
-			xfs_buf_item_relse(bp);
-		else if (aborted) {
+		if (aborted) {
 			ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
 			xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
 			xfs_buf_item_relse(bp);
-		}
+		} else if (clean)
+			xfs_buf_item_relse(bp);
 	}
 
 	if (!(flags & XFS_BLI_HOLD))
-- 
GitLab


From e393f674c5fedced24432138a8fc40c3d7a628b8 Mon Sep 17 00:00:00 2001
From: Luis Oliveira <Luis.Oliveira@synopsys.com>
Date: Wed, 14 Jun 2017 11:43:21 +0100
Subject: [PATCH 0270/1958] i2c: designware: Cleaning and comment style fixes.

The purpose of this commit is to fix some comments and styling in the
existing code due to the need of reuse this code. What is being made
here is:

- Sorted the headers files
- Corrected some comments style (capital letters, lowcase i2c)
- Reverse tree in the variables declaration
- Add/remove empty lines and tabs where needed
- Fix of misspelled word "endianness" and "transferred"
- Replaced the return variable "r" with the more standard "ret"

The value of this, besides the rules of coding style, is because I
will use this code after and it will make my future patch a lot bigger and
complicated to review. The work here won't bring any additional work to
backported fixes because is just style and reordering.

Signed-off-by: Luis Oliveira <lolivei@synopsys.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-core.c    | 78 ++++++++++-----------
 drivers/i2c/busses/i2c-designware-core.h    |  2 +-
 drivers/i2c/busses/i2c-designware-platdrv.c | 43 ++++++------
 3 files changed, 62 insertions(+), 61 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index 3c41995634c2f..d9cab6dc28123 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -21,17 +21,17 @@
  * ----------------------------------------------------------------------------
  *
  */
+#include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/pm_runtime.h>
-#include <linux/delay.h>
 #include <linux/module.h>
-#include "i2c-designware-core.h"
+#include <linux/pm_runtime.h>
 
+#include "i2c-designware-core.h"
 /*
  * Registers offset
  */
@@ -98,7 +98,7 @@
 
 #define DW_IC_ERR_TX_ABRT	0x1
 
-#define DW_IC_TAR_10BITADDR_MASTER BIT(12)
+#define DW_IC_TAR_10BITADDR_MASTER		BIT(12)
 
 #define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH	(BIT(2) | BIT(3))
 #define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK	GENMASK(3, 2)
@@ -113,10 +113,10 @@
 #define TIMEOUT			20 /* ms */
 
 /*
- * hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
+ * Hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
  *
- * only expected abort codes are listed here
- * refer to the datasheet for the full list
+ * Only expected abort codes are listed here,
+ * refer to the datasheet for the full list.
  */
 #define ABRT_7B_ADDR_NOACK	0
 #define ABRT_10ADDR1_NOACK	1
@@ -318,7 +318,7 @@ static void i2c_dw_release_lock(struct dw_i2c_dev *dev)
 }
 
 /**
- * i2c_dw_init() - initialize the designware i2c master hardware
+ * i2c_dw_init() - Initialize the designware I2C master hardware
  * @dev: device private data
  *
  * This functions configures and enables the I2C master.
@@ -344,8 +344,8 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 		/* Configure register access mode 16bit */
 		dev->flags |= ACCESS_16BIT;
 	} else if (reg != DW_IC_COMP_TYPE_VALUE) {
-		dev_err(dev->dev, "Unknown Synopsys component type: "
-			"0x%08x\n", reg);
+		dev_err(dev->dev,
+			"Unknown Synopsys component type: 0x%08x\n", reg);
 		i2c_dw_release_lock(dev);
 		return -ENODEV;
 	}
@@ -355,7 +355,7 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 	/* Disable the adapter */
 	__i2c_dw_enable_and_wait(dev, false);
 
-	/* set standard and fast speed deviders for high/low periods */
+	/* Set standard and fast speed deviders for high/low periods */
 
 	sda_falling_time = dev->sda_falling_time ?: 300; /* ns */
 	scl_falling_time = dev->scl_falling_time ?: 300; /* ns */
@@ -444,7 +444,7 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 	dw_writel(dev, dev->tx_fifo_depth / 2, DW_IC_TX_TL);
 	dw_writel(dev, 0, DW_IC_RX_TL);
 
-	/* configure the i2c master */
+	/* Configure the I2C master */
 	dw_writel(dev, dev->master_cfg , DW_IC_CON);
 
 	i2c_dw_release_lock(dev);
@@ -480,7 +480,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 	/* Disable the adapter */
 	__i2c_dw_enable_and_wait(dev, false);
 
-	/* if the slave address is ten bit address, enable 10BITADDR */
+	/* If the slave address is ten bit address, enable 10BITADDR */
 	ic_con = dw_readl(dev, DW_IC_CON);
 	if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) {
 		ic_con |= DW_IC_CON_10BITADDR_MASTER;
@@ -503,7 +503,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 	 */
 	dw_writel(dev, msgs[dev->msg_write_idx].addr | ic_tar, DW_IC_TAR);
 
-	/* enforce disabled interrupts (due to HW issues) */
+	/* Enforce disabled interrupts (due to HW issues) */
 	i2c_dw_disable_int(dev);
 
 	/* Enable the adapter */
@@ -537,9 +537,9 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
 		u32 flags = msgs[dev->msg_write_idx].flags;
 
 		/*
-		 * if target address has changed, we need to
-		 * reprogram the target address in the i2c
-		 * adapter when we are done with this transfer
+		 * If target address has changed, we need to
+		 * reprogram the target address in the I2C
+		 * adapter when we are done with this transfer.
 		 */
 		if (msgs[dev->msg_write_idx].addr != addr) {
 			dev_err(dev->dev,
@@ -599,7 +599,7 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
 
 			if (msgs[dev->msg_write_idx].flags & I2C_M_RD) {
 
-				/* avoid rx buffer overrun */
+				/* Avoid rx buffer overrun */
 				if (dev->rx_outstanding >= dev->rx_fifo_depth)
 					break;
 
@@ -728,7 +728,7 @@ static int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev)
 }
 
 /*
- * Prepare controller for a transaction and call i2c_dw_xfer_msg
+ * Prepare controller for a transaction and call i2c_dw_xfer_msg.
  */
 static int
 i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
@@ -759,10 +759,10 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	if (ret < 0)
 		goto done;
 
-	/* start the transfers */
+	/* Start the transfers */
 	i2c_dw_xfer_init(dev);
 
-	/* wait for tx to complete */
+	/* Wait for tx to complete */
 	if (!wait_for_completion_timeout(&dev->cmd_complete, adap->timeout)) {
 		dev_err(dev->dev, "controller timed out\n");
 		/* i2c_dw_init implicitly disables the adapter */
@@ -786,7 +786,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 		goto done;
 	}
 
-	/* no error */
+	/* No error */
 	if (likely(!dev->cmd_err && !dev->status)) {
 		ret = num;
 		goto done;
@@ -821,8 +821,8 @@ static u32 i2c_dw_func(struct i2c_adapter *adap)
 }
 
 static const struct i2c_algorithm i2c_dw_algo = {
-	.master_xfer	= i2c_dw_xfer,
-	.functionality	= i2c_dw_func,
+	.master_xfer = i2c_dw_xfer,
+	.functionality = i2c_dw_func,
 };
 
 static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
@@ -903,7 +903,7 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
 
 		/*
 		 * Anytime TX_ABRT is set, the contents of the tx/rx
-		 * buffers are flushed.  Make sure to skip them.
+		 * buffers are flushed. Make sure to skip them.
 		 */
 		dw_writel(dev, 0, DW_IC_INTR_MASK);
 		goto tx_aborted;
@@ -925,7 +925,7 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
 	if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err)
 		complete(&dev->cmd_complete);
 	else if (unlikely(dev->flags & ACCESS_INTR_MASK)) {
-		/* workaround to trigger pending interrupt */
+		/* Workaround to trigger pending interrupt */
 		stat = dw_readl(dev, DW_IC_INTR_MASK);
 		i2c_dw_disable_int(dev);
 		dw_writel(dev, stat, DW_IC_INTR_MASK);
@@ -961,13 +961,13 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 {
 	struct i2c_adapter *adap = &dev->adapter;
 	unsigned long irq_flags;
-	int r;
+	int ret;
 
 	init_completion(&dev->cmd_complete);
 
-	r = i2c_dw_init(dev);
-	if (r)
-		return r;
+	ret = i2c_dw_init(dev);
+	if (ret)
+		return ret;
 
 	snprintf(adap->name, sizeof(adap->name),
 		 "Synopsys DesignWare I2C adapter");
@@ -984,12 +984,12 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 	}
 
 	i2c_dw_disable_int(dev);
-	r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, irq_flags,
-			     dev_name(dev->dev), dev);
-	if (r) {
+	ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, irq_flags,
+			       dev_name(dev->dev), dev);
+	if (ret) {
 		dev_err(dev->dev, "failure requesting irq %i: %d\n",
-			dev->irq, r);
-		return r;
+			dev->irq, ret);
+		return ret;
 	}
 
 	/*
@@ -999,12 +999,12 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 	 * registered I2C slaves that do I2C transfers in their probe.
 	 */
 	pm_runtime_get_noresume(dev->dev);
-	r = i2c_add_numbered_adapter(adap);
-	if (r)
-		dev_err(dev->dev, "failure adding adapter: %d\n", r);
+	ret = i2c_add_numbered_adapter(adap);
+	if (ret)
+		dev_err(dev->dev, "failure adding adapter: %d\n", ret);
 	pm_runtime_put_noidle(dev->dev);
 
-	return r;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i2c_dw_probe);
 
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index a7cf429daf60d..fe9b66898a725 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -49,7 +49,7 @@
  * @cmd_complete: tx completion indicator
  * @clk: input reference clock
  * @cmd_err: run time hadware error code
- * @msgs: points to an array of messages currently being transfered
+ * @msgs: points to an array of messages currently being transferred
  * @msgs_num: the number of elements in msgs
  * @msg_write_idx: the element index of the current tx message in the msgs
  *	array
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index d1263b82d646a..613dfb88307fb 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -21,27 +21,28 @@
  * ----------------------------------------------------------------------------
  *
  */
-#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmi.h>
-#include <linux/i2c.h>
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
+#include <linux/platform_data/i2c-designware.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
-#include <linux/io.h>
 #include <linux/reset.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/acpi.h>
-#include <linux/platform_data/i2c-designware.h>
+
 #include "i2c-designware-core.h"
 
 static u32 i2c_dw_get_clk_rate_khz(struct dw_i2c_dev *dev)
@@ -209,11 +210,11 @@ static void dw_i2c_set_fifo_size(struct dw_i2c_dev *dev, int id)
 static int dw_i2c_plat_probe(struct platform_device *pdev)
 {
 	struct dw_i2c_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	struct dw_i2c_dev *dev;
 	struct i2c_adapter *adap;
-	struct resource *mem;
-	int irq, r;
+	struct dw_i2c_dev *dev;
 	u32 acpi_speed, ht = 0;
+	struct resource *mem;
+	int irq, ret;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -276,12 +277,12 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	    && dev->clk_freq != 1000000 && dev->clk_freq != 3400000) {
 		dev_err(&pdev->dev,
 			"Only 100kHz, 400kHz, 1MHz and 3.4MHz supported");
-		r = -EINVAL;
+		ret = -EINVAL;
 		goto exit_reset;
 	}
 
-	r = i2c_dw_probe_lock_support(dev);
-	if (r)
+	ret = i2c_dw_probe_lock_support(dev);
+	if (ret)
 		goto exit_reset;
 
 	dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
@@ -327,11 +328,11 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 		pm_runtime_enable(&pdev->dev);
 	}
 
-	r = i2c_dw_probe(dev);
-	if (r)
+	ret = i2c_dw_probe(dev);
+	if (ret)
 		goto exit_probe;
 
-	return r;
+	return ret;
 
 exit_probe:
 	if (!dev->pm_disabled)
@@ -339,7 +340,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 exit_reset:
 	if (!IS_ERR_OR_NULL(dev->rst))
 		reset_control_assert(dev->rst);
-	return r;
+	return ret;
 }
 
 static int dw_i2c_plat_remove(struct platform_device *pdev)
@@ -423,7 +424,7 @@ static const struct dev_pm_ops dw_i2c_dev_pm_ops = {
 #define DW_I2C_DEV_PMOPS NULL
 #endif
 
-/* work with hotplug and coldplug */
+/* Work with hotplug and coldplug */
 MODULE_ALIAS("platform:i2c_designware");
 
 static struct platform_driver dw_i2c_driver = {
-- 
GitLab


From 89a1e1bd7ba9a6711908bb79af0af6cbdf4a7e79 Mon Sep 17 00:00:00 2001
From: Luis Oliveira <Luis.Oliveira@synopsys.com>
Date: Wed, 14 Jun 2017 11:43:22 +0100
Subject: [PATCH 0271/1958] i2c: designware: refactoring of the i2c-designware

- Factor out all _master() part of code from i2c-designware-core
  and i2c-designware-platdrv to separate functions.
- Standardize all code related with MASTER mode.
- I have to take off DW_IC_INTR_TX_EMPTY from DW_IC_INTR_DEFAULT_MASK
  because it is master specific.

The purpose of this is to prepare the controller to have is I2C MASTER
flow in a separate driver. To do this first all the
functions/definitions related to the MASTER flow were identified.

Signed-off-by: Luis Oliveira <lolivei@synopsys.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-core.c    | 56 +++++++++++++--------
 drivers/i2c/busses/i2c-designware-platdrv.c | 31 +++++++-----
 2 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index d9cab6dc28123..e8a7621c187e4 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -87,10 +87,10 @@
 #define DW_IC_INTR_GEN_CALL	0x800
 
 #define DW_IC_INTR_DEFAULT_MASK		(DW_IC_INTR_RX_FULL | \
-					 DW_IC_INTR_TX_EMPTY | \
 					 DW_IC_INTR_TX_ABRT | \
 					 DW_IC_INTR_STOP_DET)
-
+#define DW_IC_INTR_MASTER_MASK		(DW_IC_INTR_DEFAULT_MASK | \
+					 DW_IC_INTR_TX_EMPTY)
 #define DW_IC_STATUS_ACTIVITY	0x1
 
 #define DW_IC_SDA_HOLD_RX_SHIFT		16
@@ -202,6 +202,16 @@ static void dw_writel(struct dw_i2c_dev *dev, u32 b, int offset)
 	}
 }
 
+static void i2c_dw_configure_fifo_master(struct dw_i2c_dev *dev)
+{
+	/* Configure Tx/Rx FIFO threshold levels */
+	dw_writel(dev, dev->tx_fifo_depth / 2, DW_IC_TX_TL);
+	dw_writel(dev, 0, DW_IC_RX_TL);
+
+	/* Configure the I2C master */
+	dw_writel(dev, dev->master_cfg, DW_IC_CON);
+}
+
 static u32
 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
 {
@@ -440,13 +450,7 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 			"Hardware too old to adjust SDA hold time.\n");
 	}
 
-	/* Configure Tx/Rx FIFO threshold levels */
-	dw_writel(dev, dev->tx_fifo_depth / 2, DW_IC_TX_TL);
-	dw_writel(dev, 0, DW_IC_RX_TL);
-
-	/* Configure the I2C master */
-	dw_writel(dev, dev->master_cfg , DW_IC_CON);
-
+	i2c_dw_configure_fifo_master(dev);
 	i2c_dw_release_lock(dev);
 
 	return 0;
@@ -511,7 +515,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 
 	/* Clear and enable interrupts */
 	dw_readl(dev, DW_IC_CLR_INTR);
-	dw_writel(dev, DW_IC_INTR_DEFAULT_MASK, DW_IC_INTR_MASK);
+	dw_writel(dev, DW_IC_INTR_MASTER_MASK, DW_IC_INTR_MASK);
 }
 
 /*
@@ -531,7 +535,7 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
 	u8 *buf = dev->tx_buf;
 	bool need_restart = false;
 
-	intr_mask = DW_IC_INTR_DEFAULT_MASK;
+	intr_mask = DW_IC_INTR_MASTER_MASK;
 
 	for (; dev->msg_write_idx < dev->msgs_num; dev->msg_write_idx++) {
 		u32 flags = msgs[dev->msg_write_idx].flags;
@@ -881,22 +885,14 @@ static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
 }
 
 /*
- * Interrupt service routine. This gets called whenever an I2C interrupt
+ * Interrupt service routine. This gets called whenever an I2C master interrupt
  * occurs.
  */
-static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
+static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev)
 {
-	struct dw_i2c_dev *dev = dev_id;
-	u32 stat, enabled;
-
-	enabled = dw_readl(dev, DW_IC_ENABLE);
-	stat = dw_readl(dev, DW_IC_RAW_INTR_STAT);
-	dev_dbg(dev->dev, "%s: enabled=%#x stat=%#x\n", __func__, enabled, stat);
-	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
-		return IRQ_NONE;
+	u32 stat;
 
 	stat = i2c_dw_read_clear_intrbits(dev);
-
 	if (stat & DW_IC_INTR_TX_ABRT) {
 		dev->cmd_err |= DW_IC_ERR_TX_ABRT;
 		dev->status = STATUS_IDLE;
@@ -931,6 +927,22 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
 		dw_writel(dev, stat, DW_IC_INTR_MASK);
 	}
 
+	return 0;
+}
+
+static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
+{
+	struct dw_i2c_dev *dev = dev_id;
+	u32 stat, enabled;
+
+	enabled = dw_readl(dev, DW_IC_ENABLE);
+	stat = dw_readl(dev, DW_IC_RAW_INTR_STAT);
+	dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
+	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
+		return IRQ_NONE;
+
+	i2c_dw_irq_handler_master(dev);
+
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 613dfb88307fb..fd14b410369e6 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -172,6 +172,23 @@ static inline int dw_i2c_acpi_configure(struct platform_device *pdev)
 }
 #endif
 
+static void i2c_dw_configure_master(struct dw_i2c_dev *dev)
+{
+	dev->master_cfg = DW_IC_CON_MASTER | DW_IC_CON_SLAVE_DISABLE |
+			  DW_IC_CON_RESTART_EN;
+
+	switch (dev->clk_freq) {
+	case 100000:
+		dev->master_cfg |= DW_IC_CON_SPEED_STD;
+		break;
+	case 3400000:
+		dev->master_cfg |= DW_IC_CON_SPEED_HIGH;
+		break;
+	default:
+		dev->master_cfg |= DW_IC_CON_SPEED_FAST;
+	}
+}
+
 static int i2c_dw_plat_prepare_clk(struct dw_i2c_dev *i_dev, bool prepare)
 {
 	if (IS_ERR(i_dev->clk))
@@ -287,19 +304,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 
 	dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
 
-	dev->master_cfg = DW_IC_CON_MASTER | DW_IC_CON_SLAVE_DISABLE |
-			  DW_IC_CON_RESTART_EN;
-
-	switch (dev->clk_freq) {
-	case 100000:
-		dev->master_cfg |= DW_IC_CON_SPEED_STD;
-		break;
-	case 3400000:
-		dev->master_cfg |= DW_IC_CON_SPEED_HIGH;
-		break;
-	default:
-		dev->master_cfg |= DW_IC_CON_SPEED_FAST;
-	}
+	i2c_dw_configure_master(dev);
 
 	dev->clk = devm_clk_get(&pdev->dev, NULL);
 	if (!i2c_dw_plat_prepare_clk(dev, true)) {
-- 
GitLab


From 90312351fd1e47183662a6abf159cbf751ea62f1 Mon Sep 17 00:00:00 2001
From: Luis Oliveira <Luis.Oliveira@synopsys.com>
Date: Wed, 14 Jun 2017 11:43:23 +0100
Subject: [PATCH 0272/1958] i2c: designware: MASTER mode as separated driver

- The functions related to I2C master mode of operation were transformed
  in a single driver.
- Common definitions were moved to i2c-designware-core.h
- The i2c-designware-core is now only a library file, the functions
  associated are in a source file called i2c-designware-common and
  are used by both i2c-designware-master and i2c-designware-slave.
- To decrease noise in namespace common i2c_dw_*() functions are
  now using ops to keep them private.
- Designware PCI driver had to be changed to match the previous ops
  functions implementation.

Almost all of the "core" source is now part of the "master" source. The
difference is the functions used by both modes and they are in the
"common" source file.

Signed-off-by: Luis Oliveira <lolivei@synopsys.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Makefile                   |   1 +
 drivers/i2c/busses/i2c-designware-common.c    | 275 +++++++++++++
 drivers/i2c/busses/i2c-designware-core.h      | 143 ++++++-
 ...ignware-core.c => i2c-designware-master.c} | 367 +-----------------
 drivers/i2c/busses/i2c-designware-pcidrv.c    |   9 +-
 drivers/i2c/busses/i2c-designware-platdrv.c   |   6 +-
 6 files changed, 433 insertions(+), 368 deletions(-)
 create mode 100644 drivers/i2c/busses/i2c-designware-common.c
 rename drivers/i2c/busses/{i2c-designware-core.c => i2c-designware-master.c} (65%)

diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 30b60855fbcd1..c89a4314c5633 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_I2C_CBUS_GPIO)	+= i2c-cbus-gpio.o
 obj-$(CONFIG_I2C_CPM)		+= i2c-cpm.o
 obj-$(CONFIG_I2C_DAVINCI)	+= i2c-davinci.o
 obj-$(CONFIG_I2C_DESIGNWARE_CORE)	+= i2c-designware-core.o
+i2c-designware-core-objs := i2c-designware-common.o i2c-designware-master.o
 obj-$(CONFIG_I2C_DESIGNWARE_PLATFORM)	+= i2c-designware-platform.o
 i2c-designware-platform-objs := i2c-designware-platdrv.o
 i2c-designware-platform-$(CONFIG_I2C_DESIGNWARE_BAYTRAIL) += i2c-designware-baytrail.o
diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
new file mode 100644
index 0000000000000..0d0ccb73f0e66
--- /dev/null
+++ b/drivers/i2c/busses/i2c-designware-common.c
@@ -0,0 +1,275 @@
+/*
+ * Synopsys DesignWare I2C adapter driver.
+ *
+ * Based on the TI DAVINCI I2C adapter driver.
+ *
+ * Copyright (C) 2006 Texas Instruments.
+ * Copyright (C) 2007 MontaVista Software Inc.
+ * Copyright (C) 2009 Provigent Ltd.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * ----------------------------------------------------------------------------
+ *
+ */
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+
+#include "i2c-designware-core.h"
+
+static char *abort_sources[] = {
+	[ABRT_7B_ADDR_NOACK] =
+		"slave address not acknowledged (7bit mode)",
+	[ABRT_10ADDR1_NOACK] =
+		"first address byte not acknowledged (10bit mode)",
+	[ABRT_10ADDR2_NOACK] =
+		"second address byte not acknowledged (10bit mode)",
+	[ABRT_TXDATA_NOACK] =
+		"data not acknowledged",
+	[ABRT_GCALL_NOACK] =
+		"no acknowledgement for a general call",
+	[ABRT_GCALL_READ] =
+		"read after general call",
+	[ABRT_SBYTE_ACKDET] =
+		"start byte acknowledged",
+	[ABRT_SBYTE_NORSTRT] =
+		"trying to send start byte when restart is disabled",
+	[ABRT_10B_RD_NORSTRT] =
+		"trying to read when restart is disabled (10bit mode)",
+	[ABRT_MASTER_DIS] =
+		"trying to use disabled adapter",
+	[ARB_LOST] =
+		"lost arbitration",
+};
+
+u32 dw_readl(struct dw_i2c_dev *dev, int offset)
+{
+	u32 value;
+
+	if (dev->flags & ACCESS_16BIT)
+		value = readw_relaxed(dev->base + offset) |
+			(readw_relaxed(dev->base + offset + 2) << 16);
+	else
+		value = readl_relaxed(dev->base + offset);
+
+	if (dev->flags & ACCESS_SWAP)
+		return swab32(value);
+	else
+		return value;
+}
+
+void dw_writel(struct dw_i2c_dev *dev, u32 b, int offset)
+{
+	if (dev->flags & ACCESS_SWAP)
+		b = swab32(b);
+
+	if (dev->flags & ACCESS_16BIT) {
+		writew_relaxed((u16)b, dev->base + offset);
+		writew_relaxed((u16)(b >> 16), dev->base + offset + 2);
+	} else {
+		writel_relaxed(b, dev->base + offset);
+	}
+}
+
+u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
+{
+	/*
+	 * DesignWare I2C core doesn't seem to have solid strategy to meet
+	 * the tHD;STA timing spec.  Configuring _HCNT based on tHIGH spec
+	 * will result in violation of the tHD;STA spec.
+	 */
+	if (cond)
+		/*
+		 * Conditional expression:
+		 *
+		 *   IC_[FS]S_SCL_HCNT + (1+4+3) >= IC_CLK * tHIGH
+		 *
+		 * This is based on the DW manuals, and represents an ideal
+		 * configuration.  The resulting I2C bus speed will be
+		 * faster than any of the others.
+		 *
+		 * If your hardware is free from tHD;STA issue, try this one.
+		 */
+		return (ic_clk * tSYMBOL + 500000) / 1000000 - 8 + offset;
+	else
+		/*
+		 * Conditional expression:
+		 *
+		 *   IC_[FS]S_SCL_HCNT + 3 >= IC_CLK * (tHD;STA + tf)
+		 *
+		 * This is just experimental rule; the tHD;STA period turned
+		 * out to be proportinal to (_HCNT + 3).  With this setting,
+		 * we could meet both tHIGH and tHD;STA timing specs.
+		 *
+		 * If unsure, you'd better to take this alternative.
+		 *
+		 * The reason why we need to take into account "tf" here,
+		 * is the same as described in i2c_dw_scl_lcnt().
+		 */
+		return (ic_clk * (tSYMBOL + tf) + 500000) / 1000000
+			- 3 + offset;
+}
+
+u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
+{
+	/*
+	 * Conditional expression:
+	 *
+	 *   IC_[FS]S_SCL_LCNT + 1 >= IC_CLK * (tLOW + tf)
+	 *
+	 * DW I2C core starts counting the SCL CNTs for the LOW period
+	 * of the SCL clock (tLOW) as soon as it pulls the SCL line.
+	 * In order to meet the tLOW timing spec, we need to take into
+	 * account the fall time of SCL signal (tf).  Default tf value
+	 * should be 0.3 us, for safety.
+	 */
+	return ((ic_clk * (tLOW + tf) + 500000) / 1000000) - 1 + offset;
+}
+
+void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable)
+{
+	dw_writel(dev, enable, DW_IC_ENABLE);
+}
+
+void __i2c_dw_enable_and_wait(struct dw_i2c_dev *dev, bool enable)
+{
+	int timeout = 100;
+
+	do {
+		__i2c_dw_enable(dev, enable);
+		if ((dw_readl(dev, DW_IC_ENABLE_STATUS) & 1) == enable)
+			return;
+
+		/*
+		 * Wait 10 times the signaling period of the highest I2C
+		 * transfer supported by the driver (for 400KHz this is
+		 * 25us) as described in the DesignWare I2C databook.
+		 */
+		usleep_range(25, 250);
+	} while (timeout--);
+
+	dev_warn(dev->dev, "timeout in %sabling adapter\n",
+		 enable ? "en" : "dis");
+}
+
+unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev)
+{
+	/*
+	 * Clock is not necessary if we got LCNT/HCNT values directly from
+	 * the platform code.
+	 */
+	if (WARN_ON_ONCE(!dev->get_clk_rate_khz))
+		return 0;
+	return dev->get_clk_rate_khz(dev);
+}
+
+int i2c_dw_acquire_lock(struct dw_i2c_dev *dev)
+{
+	int ret;
+
+	if (!dev->acquire_lock)
+		return 0;
+
+	ret = dev->acquire_lock(dev);
+	if (!ret)
+		return 0;
+
+	dev_err(dev->dev, "couldn't acquire bus ownership\n");
+
+	return ret;
+}
+
+void i2c_dw_release_lock(struct dw_i2c_dev *dev)
+{
+	if (dev->release_lock)
+		dev->release_lock(dev);
+}
+
+/*
+ * Waiting for bus not busy
+ */
+int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev)
+{
+	int timeout = TIMEOUT;
+
+	while (dw_readl(dev, DW_IC_STATUS) & DW_IC_STATUS_ACTIVITY) {
+		if (timeout <= 0) {
+			dev_warn(dev->dev, "timeout waiting for bus ready\n");
+			return -ETIMEDOUT;
+		}
+		timeout--;
+		usleep_range(1000, 1100);
+	}
+
+	return 0;
+}
+
+int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev)
+{
+	unsigned long abort_source = dev->abort_source;
+	int i;
+
+	if (abort_source & DW_IC_TX_ABRT_NOACK) {
+		for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
+			dev_dbg(dev->dev,
+				"%s: %s\n", __func__, abort_sources[i]);
+		return -EREMOTEIO;
+	}
+
+	for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
+		dev_err(dev->dev, "%s: %s\n", __func__, abort_sources[i]);
+
+	if (abort_source & DW_IC_TX_ARB_LOST)
+		return -EAGAIN;
+	else if (abort_source & DW_IC_TX_ABRT_GCALL_READ)
+		return -EINVAL; /* wrong msgs[] data */
+	else
+		return -EIO;
+}
+
+u32 i2c_dw_func(struct i2c_adapter *adap)
+{
+	struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
+
+	return dev->functionality;
+}
+
+void i2c_dw_disable(struct dw_i2c_dev *dev)
+{
+	/* Disable controller */
+	__i2c_dw_enable_and_wait(dev, false);
+
+	/* Disable all interupts */
+	dw_writel(dev, 0, DW_IC_INTR_MASK);
+	dw_readl(dev, DW_IC_CLR_INTR);
+}
+
+void i2c_dw_disable_int(struct dw_i2c_dev *dev)
+{
+	dw_writel(dev, 0, DW_IC_INTR_MASK);
+}
+
+u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev)
+{
+	return dw_readl(dev, DW_IC_COMP_PARAM_1);
+}
+EXPORT_SYMBOL_GPL(i2c_dw_read_comp_param);
+
+MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index fe9b66898a725..661cfa869ff7a 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -41,6 +41,124 @@
 #define DW_IC_CON_RESTART_EN		0x20
 #define DW_IC_CON_SLAVE_DISABLE		0x40
 
+/*
+ * Registers offset
+ */
+#define DW_IC_CON		0x0
+#define DW_IC_TAR		0x4
+#define DW_IC_DATA_CMD		0x10
+#define DW_IC_SS_SCL_HCNT	0x14
+#define DW_IC_SS_SCL_LCNT	0x18
+#define DW_IC_FS_SCL_HCNT	0x1c
+#define DW_IC_FS_SCL_LCNT	0x20
+#define DW_IC_HS_SCL_HCNT	0x24
+#define DW_IC_HS_SCL_LCNT	0x28
+#define DW_IC_INTR_STAT		0x2c
+#define DW_IC_INTR_MASK		0x30
+#define DW_IC_RAW_INTR_STAT	0x34
+#define DW_IC_RX_TL		0x38
+#define DW_IC_TX_TL		0x3c
+#define DW_IC_CLR_INTR		0x40
+#define DW_IC_CLR_RX_UNDER	0x44
+#define DW_IC_CLR_RX_OVER	0x48
+#define DW_IC_CLR_TX_OVER	0x4c
+#define DW_IC_CLR_RD_REQ	0x50
+#define DW_IC_CLR_TX_ABRT	0x54
+#define DW_IC_CLR_RX_DONE	0x58
+#define DW_IC_CLR_ACTIVITY	0x5c
+#define DW_IC_CLR_STOP_DET	0x60
+#define DW_IC_CLR_START_DET	0x64
+#define DW_IC_CLR_GEN_CALL	0x68
+#define DW_IC_ENABLE		0x6c
+#define DW_IC_STATUS		0x70
+#define DW_IC_TXFLR		0x74
+#define DW_IC_RXFLR		0x78
+#define DW_IC_SDA_HOLD		0x7c
+#define DW_IC_TX_ABRT_SOURCE	0x80
+#define DW_IC_ENABLE_STATUS	0x9c
+#define DW_IC_COMP_PARAM_1	0xf4
+#define DW_IC_COMP_VERSION	0xf8
+#define DW_IC_SDA_HOLD_MIN_VERS	0x3131312A
+#define DW_IC_COMP_TYPE		0xfc
+#define DW_IC_COMP_TYPE_VALUE	0x44570140
+
+#define DW_IC_INTR_RX_UNDER	0x001
+#define DW_IC_INTR_RX_OVER	0x002
+#define DW_IC_INTR_RX_FULL	0x004
+#define DW_IC_INTR_TX_OVER	0x008
+#define DW_IC_INTR_TX_EMPTY	0x010
+#define DW_IC_INTR_RD_REQ	0x020
+#define DW_IC_INTR_TX_ABRT	0x040
+#define DW_IC_INTR_RX_DONE	0x080
+#define DW_IC_INTR_ACTIVITY	0x100
+#define DW_IC_INTR_STOP_DET	0x200
+#define DW_IC_INTR_START_DET	0x400
+#define DW_IC_INTR_GEN_CALL	0x800
+
+#define DW_IC_INTR_DEFAULT_MASK		(DW_IC_INTR_RX_FULL | \
+					 DW_IC_INTR_TX_ABRT | \
+					 DW_IC_INTR_STOP_DET)
+#define DW_IC_INTR_MASTER_MASK		(DW_IC_INTR_DEFAULT_MASK | \
+					 DW_IC_INTR_TX_EMPTY)
+#define DW_IC_STATUS_ACTIVITY		0x1
+#define DW_IC_STATUS_TFE		BIT(2)
+#define DW_IC_STATUS_MASTER_ACTIVITY	BIT(5)
+
+#define DW_IC_SDA_HOLD_RX_SHIFT		16
+#define DW_IC_SDA_HOLD_RX_MASK		GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT)
+
+#define DW_IC_ERR_TX_ABRT	0x1
+
+#define DW_IC_TAR_10BITADDR_MASTER BIT(12)
+
+#define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH	(BIT(2) | BIT(3))
+#define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK	GENMASK(3, 2)
+
+/*
+ * Status codes
+ */
+#define STATUS_IDLE			0x0
+#define STATUS_WRITE_IN_PROGRESS	0x1
+#define STATUS_READ_IN_PROGRESS		0x2
+
+#define TIMEOUT			20 /* ms */
+
+/*
+ * Hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
+ *
+ * Only expected abort codes are listed here
+ * refer to the datasheet for the full list
+ */
+#define ABRT_7B_ADDR_NOACK	0
+#define ABRT_10ADDR1_NOACK	1
+#define ABRT_10ADDR2_NOACK	2
+#define ABRT_TXDATA_NOACK	3
+#define ABRT_GCALL_NOACK	4
+#define ABRT_GCALL_READ		5
+#define ABRT_SBYTE_ACKDET	7
+#define ABRT_SBYTE_NORSTRT	9
+#define ABRT_10B_RD_NORSTRT	10
+#define ABRT_MASTER_DIS		11
+#define ARB_LOST		12
+
+#define DW_IC_TX_ABRT_7B_ADDR_NOACK	(1UL << ABRT_7B_ADDR_NOACK)
+#define DW_IC_TX_ABRT_10ADDR1_NOACK	(1UL << ABRT_10ADDR1_NOACK)
+#define DW_IC_TX_ABRT_10ADDR2_NOACK	(1UL << ABRT_10ADDR2_NOACK)
+#define DW_IC_TX_ABRT_TXDATA_NOACK	(1UL << ABRT_TXDATA_NOACK)
+#define DW_IC_TX_ABRT_GCALL_NOACK	(1UL << ABRT_GCALL_NOACK)
+#define DW_IC_TX_ABRT_GCALL_READ	(1UL << ABRT_GCALL_READ)
+#define DW_IC_TX_ABRT_SBYTE_ACKDET	(1UL << ABRT_SBYTE_ACKDET)
+#define DW_IC_TX_ABRT_SBYTE_NORSTRT	(1UL << ABRT_SBYTE_NORSTRT)
+#define DW_IC_TX_ABRT_10B_RD_NORSTRT	(1UL << ABRT_10B_RD_NORSTRT)
+#define DW_IC_TX_ABRT_MASTER_DIS	(1UL << ABRT_MASTER_DIS)
+#define DW_IC_TX_ARB_LOST		(1UL << ARB_LOST)
+
+#define DW_IC_TX_ABRT_NOACK		(DW_IC_TX_ABRT_7B_ADDR_NOACK | \
+					 DW_IC_TX_ABRT_10ADDR1_NOACK | \
+					 DW_IC_TX_ABRT_10ADDR2_NOACK | \
+					 DW_IC_TX_ABRT_TXDATA_NOACK | \
+					 DW_IC_TX_ABRT_GCALL_NOACK)
+
 
 /**
  * struct dw_i2c_dev - private i2c-designware data
@@ -80,6 +198,9 @@
  * @acquire_lock: function to acquire a hardware lock on the bus
  * @release_lock: function to release a hardware lock on the bus
  * @pm_disabled: true if power-management should be disabled for this i2c-bus
+ * @disable: function to disable the controller
+ * @disable_int: function to disable all interrupts
+ * @init: function to initialize the I2C hardware
  *
  * HCNT and LCNT parameters can be used if the platform knows more accurate
  * values than the one computed based only on the input clock frequency.
@@ -129,6 +250,9 @@ struct dw_i2c_dev {
 	int			(*acquire_lock)(struct dw_i2c_dev *dev);
 	void			(*release_lock)(struct dw_i2c_dev *dev);
 	bool			pm_disabled;
+	void			(*disable)(struct dw_i2c_dev *dev);
+	void			(*disable_int)(struct dw_i2c_dev *dev);
+	int			(*init)(struct dw_i2c_dev *dev);
 };
 
 #define ACCESS_SWAP		0x00000001
@@ -137,9 +261,22 @@ struct dw_i2c_dev {
 
 #define MODEL_CHERRYTRAIL	0x00000100
 
-extern int i2c_dw_init(struct dw_i2c_dev *dev);
-extern void i2c_dw_disable(struct dw_i2c_dev *dev);
-extern void i2c_dw_disable_int(struct dw_i2c_dev *dev);
+u32 dw_readl(struct dw_i2c_dev *dev, int offset);
+void dw_writel(struct dw_i2c_dev *dev, u32 b, int offset);
+u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset);
+u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset);
+void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable);
+void __i2c_dw_enable_and_wait(struct dw_i2c_dev *dev, bool enable);
+unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev);
+int i2c_dw_acquire_lock(struct dw_i2c_dev *dev);
+void i2c_dw_release_lock(struct dw_i2c_dev *dev);
+int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev);
+int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev);
+u32 i2c_dw_func(struct i2c_adapter *adap);
+void i2c_dw_disable(struct dw_i2c_dev *dev);
+void i2c_dw_disable_int(struct dw_i2c_dev *dev);
+int i2c_dw_init(struct dw_i2c_dev *dev);
+
 extern u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev);
 extern int i2c_dw_probe(struct dw_i2c_dev *dev);
 
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-master.c
similarity index 65%
rename from drivers/i2c/busses/i2c-designware-core.c
rename to drivers/i2c/busses/i2c-designware-master.c
index e8a7621c187e4..eefc4db1ee3e2 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -22,9 +22,9 @@
  *
  */
 #include <linux/delay.h>
-#include <linux/export.h>
-#include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/export.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -32,175 +32,6 @@
 #include <linux/pm_runtime.h>
 
 #include "i2c-designware-core.h"
-/*
- * Registers offset
- */
-#define DW_IC_CON		0x0
-#define DW_IC_TAR		0x4
-#define DW_IC_DATA_CMD		0x10
-#define DW_IC_SS_SCL_HCNT	0x14
-#define DW_IC_SS_SCL_LCNT	0x18
-#define DW_IC_FS_SCL_HCNT	0x1c
-#define DW_IC_FS_SCL_LCNT	0x20
-#define DW_IC_HS_SCL_HCNT	0x24
-#define DW_IC_HS_SCL_LCNT	0x28
-#define DW_IC_INTR_STAT		0x2c
-#define DW_IC_INTR_MASK		0x30
-#define DW_IC_RAW_INTR_STAT	0x34
-#define DW_IC_RX_TL		0x38
-#define DW_IC_TX_TL		0x3c
-#define DW_IC_CLR_INTR		0x40
-#define DW_IC_CLR_RX_UNDER	0x44
-#define DW_IC_CLR_RX_OVER	0x48
-#define DW_IC_CLR_TX_OVER	0x4c
-#define DW_IC_CLR_RD_REQ	0x50
-#define DW_IC_CLR_TX_ABRT	0x54
-#define DW_IC_CLR_RX_DONE	0x58
-#define DW_IC_CLR_ACTIVITY	0x5c
-#define DW_IC_CLR_STOP_DET	0x60
-#define DW_IC_CLR_START_DET	0x64
-#define DW_IC_CLR_GEN_CALL	0x68
-#define DW_IC_ENABLE		0x6c
-#define DW_IC_STATUS		0x70
-#define DW_IC_TXFLR		0x74
-#define DW_IC_RXFLR		0x78
-#define DW_IC_SDA_HOLD		0x7c
-#define DW_IC_TX_ABRT_SOURCE	0x80
-#define DW_IC_ENABLE_STATUS	0x9c
-#define DW_IC_COMP_PARAM_1	0xf4
-#define DW_IC_COMP_VERSION	0xf8
-#define DW_IC_SDA_HOLD_MIN_VERS	0x3131312A
-#define DW_IC_COMP_TYPE		0xfc
-#define DW_IC_COMP_TYPE_VALUE	0x44570140
-
-#define DW_IC_INTR_RX_UNDER	0x001
-#define DW_IC_INTR_RX_OVER	0x002
-#define DW_IC_INTR_RX_FULL	0x004
-#define DW_IC_INTR_TX_OVER	0x008
-#define DW_IC_INTR_TX_EMPTY	0x010
-#define DW_IC_INTR_RD_REQ	0x020
-#define DW_IC_INTR_TX_ABRT	0x040
-#define DW_IC_INTR_RX_DONE	0x080
-#define DW_IC_INTR_ACTIVITY	0x100
-#define DW_IC_INTR_STOP_DET	0x200
-#define DW_IC_INTR_START_DET	0x400
-#define DW_IC_INTR_GEN_CALL	0x800
-
-#define DW_IC_INTR_DEFAULT_MASK		(DW_IC_INTR_RX_FULL | \
-					 DW_IC_INTR_TX_ABRT | \
-					 DW_IC_INTR_STOP_DET)
-#define DW_IC_INTR_MASTER_MASK		(DW_IC_INTR_DEFAULT_MASK | \
-					 DW_IC_INTR_TX_EMPTY)
-#define DW_IC_STATUS_ACTIVITY	0x1
-
-#define DW_IC_SDA_HOLD_RX_SHIFT		16
-#define DW_IC_SDA_HOLD_RX_MASK		GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT)
-
-#define DW_IC_ERR_TX_ABRT	0x1
-
-#define DW_IC_TAR_10BITADDR_MASTER		BIT(12)
-
-#define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH	(BIT(2) | BIT(3))
-#define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK	GENMASK(3, 2)
-
-/*
- * status codes
- */
-#define STATUS_IDLE			0x0
-#define STATUS_WRITE_IN_PROGRESS	0x1
-#define STATUS_READ_IN_PROGRESS		0x2
-
-#define TIMEOUT			20 /* ms */
-
-/*
- * Hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
- *
- * Only expected abort codes are listed here,
- * refer to the datasheet for the full list.
- */
-#define ABRT_7B_ADDR_NOACK	0
-#define ABRT_10ADDR1_NOACK	1
-#define ABRT_10ADDR2_NOACK	2
-#define ABRT_TXDATA_NOACK	3
-#define ABRT_GCALL_NOACK	4
-#define ABRT_GCALL_READ		5
-#define ABRT_SBYTE_ACKDET	7
-#define ABRT_SBYTE_NORSTRT	9
-#define ABRT_10B_RD_NORSTRT	10
-#define ABRT_MASTER_DIS		11
-#define ARB_LOST		12
-
-#define DW_IC_TX_ABRT_7B_ADDR_NOACK	(1UL << ABRT_7B_ADDR_NOACK)
-#define DW_IC_TX_ABRT_10ADDR1_NOACK	(1UL << ABRT_10ADDR1_NOACK)
-#define DW_IC_TX_ABRT_10ADDR2_NOACK	(1UL << ABRT_10ADDR2_NOACK)
-#define DW_IC_TX_ABRT_TXDATA_NOACK	(1UL << ABRT_TXDATA_NOACK)
-#define DW_IC_TX_ABRT_GCALL_NOACK	(1UL << ABRT_GCALL_NOACK)
-#define DW_IC_TX_ABRT_GCALL_READ	(1UL << ABRT_GCALL_READ)
-#define DW_IC_TX_ABRT_SBYTE_ACKDET	(1UL << ABRT_SBYTE_ACKDET)
-#define DW_IC_TX_ABRT_SBYTE_NORSTRT	(1UL << ABRT_SBYTE_NORSTRT)
-#define DW_IC_TX_ABRT_10B_RD_NORSTRT	(1UL << ABRT_10B_RD_NORSTRT)
-#define DW_IC_TX_ABRT_MASTER_DIS	(1UL << ABRT_MASTER_DIS)
-#define DW_IC_TX_ARB_LOST		(1UL << ARB_LOST)
-
-#define DW_IC_TX_ABRT_NOACK		(DW_IC_TX_ABRT_7B_ADDR_NOACK | \
-					 DW_IC_TX_ABRT_10ADDR1_NOACK | \
-					 DW_IC_TX_ABRT_10ADDR2_NOACK | \
-					 DW_IC_TX_ABRT_TXDATA_NOACK | \
-					 DW_IC_TX_ABRT_GCALL_NOACK)
-
-static char *abort_sources[] = {
-	[ABRT_7B_ADDR_NOACK] =
-		"slave address not acknowledged (7bit mode)",
-	[ABRT_10ADDR1_NOACK] =
-		"first address byte not acknowledged (10bit mode)",
-	[ABRT_10ADDR2_NOACK] =
-		"second address byte not acknowledged (10bit mode)",
-	[ABRT_TXDATA_NOACK] =
-		"data not acknowledged",
-	[ABRT_GCALL_NOACK] =
-		"no acknowledgement for a general call",
-	[ABRT_GCALL_READ] =
-		"read after general call",
-	[ABRT_SBYTE_ACKDET] =
-		"start byte acknowledged",
-	[ABRT_SBYTE_NORSTRT] =
-		"trying to send start byte when restart is disabled",
-	[ABRT_10B_RD_NORSTRT] =
-		"trying to read when restart is disabled (10bit mode)",
-	[ABRT_MASTER_DIS] =
-		"trying to use disabled adapter",
-	[ARB_LOST] =
-		"lost arbitration",
-};
-
-static u32 dw_readl(struct dw_i2c_dev *dev, int offset)
-{
-	u32 value;
-
-	if (dev->flags & ACCESS_16BIT)
-		value = readw_relaxed(dev->base + offset) |
-			(readw_relaxed(dev->base + offset + 2) << 16);
-	else
-		value = readl_relaxed(dev->base + offset);
-
-	if (dev->flags & ACCESS_SWAP)
-		return swab32(value);
-	else
-		return value;
-}
-
-static void dw_writel(struct dw_i2c_dev *dev, u32 b, int offset)
-{
-	if (dev->flags & ACCESS_SWAP)
-		b = swab32(b);
-
-	if (dev->flags & ACCESS_16BIT) {
-		writew_relaxed((u16)b, dev->base + offset);
-		writew_relaxed((u16)(b >> 16), dev->base + offset + 2);
-	} else {
-		writel_relaxed(b, dev->base + offset);
-	}
-}
 
 static void i2c_dw_configure_fifo_master(struct dw_i2c_dev *dev)
 {
@@ -212,121 +43,6 @@ static void i2c_dw_configure_fifo_master(struct dw_i2c_dev *dev)
 	dw_writel(dev, dev->master_cfg, DW_IC_CON);
 }
 
-static u32
-i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
-{
-	/*
-	 * DesignWare I2C core doesn't seem to have solid strategy to meet
-	 * the tHD;STA timing spec.  Configuring _HCNT based on tHIGH spec
-	 * will result in violation of the tHD;STA spec.
-	 */
-	if (cond)
-		/*
-		 * Conditional expression:
-		 *
-		 *   IC_[FS]S_SCL_HCNT + (1+4+3) >= IC_CLK * tHIGH
-		 *
-		 * This is based on the DW manuals, and represents an ideal
-		 * configuration.  The resulting I2C bus speed will be
-		 * faster than any of the others.
-		 *
-		 * If your hardware is free from tHD;STA issue, try this one.
-		 */
-		return (ic_clk * tSYMBOL + 500000) / 1000000 - 8 + offset;
-	else
-		/*
-		 * Conditional expression:
-		 *
-		 *   IC_[FS]S_SCL_HCNT + 3 >= IC_CLK * (tHD;STA + tf)
-		 *
-		 * This is just experimental rule; the tHD;STA period turned
-		 * out to be proportinal to (_HCNT + 3).  With this setting,
-		 * we could meet both tHIGH and tHD;STA timing specs.
-		 *
-		 * If unsure, you'd better to take this alternative.
-		 *
-		 * The reason why we need to take into account "tf" here,
-		 * is the same as described in i2c_dw_scl_lcnt().
-		 */
-		return (ic_clk * (tSYMBOL + tf) + 500000) / 1000000
-			- 3 + offset;
-}
-
-static u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
-{
-	/*
-	 * Conditional expression:
-	 *
-	 *   IC_[FS]S_SCL_LCNT + 1 >= IC_CLK * (tLOW + tf)
-	 *
-	 * DW I2C core starts counting the SCL CNTs for the LOW period
-	 * of the SCL clock (tLOW) as soon as it pulls the SCL line.
-	 * In order to meet the tLOW timing spec, we need to take into
-	 * account the fall time of SCL signal (tf).  Default tf value
-	 * should be 0.3 us, for safety.
-	 */
-	return ((ic_clk * (tLOW + tf) + 500000) / 1000000) - 1 + offset;
-}
-
-static void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable)
-{
-	dw_writel(dev, enable, DW_IC_ENABLE);
-}
-
-static void __i2c_dw_enable_and_wait(struct dw_i2c_dev *dev, bool enable)
-{
-	int timeout = 100;
-
-	do {
-		__i2c_dw_enable(dev, enable);
-		if ((dw_readl(dev, DW_IC_ENABLE_STATUS) & 1) == enable)
-			return;
-
-		/*
-		 * Wait 10 times the signaling period of the highest I2C
-		 * transfer supported by the driver (for 400KHz this is
-		 * 25us) as described in the DesignWare I2C databook.
-		 */
-		usleep_range(25, 250);
-	} while (timeout--);
-
-	dev_warn(dev->dev, "timeout in %sabling adapter\n",
-		 enable ? "en" : "dis");
-}
-
-static unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev)
-{
-	/*
-	 * Clock is not necessary if we got LCNT/HCNT values directly from
-	 * the platform code.
-	 */
-	if (WARN_ON_ONCE(!dev->get_clk_rate_khz))
-		return 0;
-	return dev->get_clk_rate_khz(dev);
-}
-
-static int i2c_dw_acquire_lock(struct dw_i2c_dev *dev)
-{
-	int ret;
-
-	if (!dev->acquire_lock)
-		return 0;
-
-	ret = dev->acquire_lock(dev);
-	if (!ret)
-		return 0;
-
-	dev_err(dev->dev, "couldn't acquire bus ownership\n");
-
-	return ret;
-}
-
-static void i2c_dw_release_lock(struct dw_i2c_dev *dev)
-{
-	if (dev->release_lock)
-		dev->release_lock(dev);
-}
-
 /**
  * i2c_dw_init() - Initialize the designware I2C master hardware
  * @dev: device private data
@@ -457,25 +173,6 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 }
 EXPORT_SYMBOL_GPL(i2c_dw_init);
 
-/*
- * Waiting for bus not busy
- */
-static int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev)
-{
-	int timeout = TIMEOUT;
-
-	while (dw_readl(dev, DW_IC_STATUS) & DW_IC_STATUS_ACTIVITY) {
-		if (timeout <= 0) {
-			dev_warn(dev->dev, "timeout waiting for bus ready\n");
-			return -ETIMEDOUT;
-		}
-		timeout--;
-		usleep_range(1000, 1100);
-	}
-
-	return 0;
-}
-
 static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 {
 	struct i2c_msg *msgs = dev->msgs;
@@ -708,29 +405,6 @@ i2c_dw_read(struct dw_i2c_dev *dev)
 	}
 }
 
-static int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev)
-{
-	unsigned long abort_source = dev->abort_source;
-	int i;
-
-	if (abort_source & DW_IC_TX_ABRT_NOACK) {
-		for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
-			dev_dbg(dev->dev,
-				"%s: %s\n", __func__, abort_sources[i]);
-		return -EREMOTEIO;
-	}
-
-	for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
-		dev_err(dev->dev, "%s: %s\n", __func__, abort_sources[i]);
-
-	if (abort_source & DW_IC_TX_ARB_LOST)
-		return -EAGAIN;
-	else if (abort_source & DW_IC_TX_ABRT_GCALL_READ)
-		return -EINVAL; /* wrong msgs[] data */
-	else
-		return -EIO;
-}
-
 /*
  * Prepare controller for a transaction and call i2c_dw_xfer_msg.
  */
@@ -818,12 +492,6 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	return ret;
 }
 
-static u32 i2c_dw_func(struct i2c_adapter *adap)
-{
-	struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
-	return dev->functionality;
-}
-
 static const struct i2c_algorithm i2c_dw_algo = {
 	.master_xfer = i2c_dw_xfer,
 	.functionality = i2c_dw_func,
@@ -946,29 +614,6 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-void i2c_dw_disable(struct dw_i2c_dev *dev)
-{
-	/* Disable controller */
-	__i2c_dw_enable_and_wait(dev, false);
-
-	/* Disable all interupts */
-	dw_writel(dev, 0, DW_IC_INTR_MASK);
-	dw_readl(dev, DW_IC_CLR_INTR);
-}
-EXPORT_SYMBOL_GPL(i2c_dw_disable);
-
-void i2c_dw_disable_int(struct dw_i2c_dev *dev)
-{
-	dw_writel(dev, 0, DW_IC_INTR_MASK);
-}
-EXPORT_SYMBOL_GPL(i2c_dw_disable_int);
-
-u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev)
-{
-	return dw_readl(dev, DW_IC_COMP_PARAM_1);
-}
-EXPORT_SYMBOL_GPL(i2c_dw_read_comp_param);
-
 int i2c_dw_probe(struct dw_i2c_dev *dev)
 {
 	struct i2c_adapter *adap = &dev->adapter;
@@ -977,7 +622,11 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 
 	init_completion(&dev->cmd_complete);
 
-	ret = i2c_dw_init(dev);
+	dev->init = i2c_dw_init;
+	dev->disable = i2c_dw_disable;
+	dev->disable_int = i2c_dw_disable_int;
+
+	ret = dev->init(dev);
 	if (ret)
 		return ret;
 
@@ -1020,5 +669,5 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 }
 EXPORT_SYMBOL_GPL(i2c_dw_probe);
 
-MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter core");
+MODULE_DESCRIPTION("Synopsys DesignWare I2C bus master adapter");
 MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index ed485b69b449c..86e1bd0b82e91 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -187,16 +187,19 @@ static struct dw_pci_controller dw_pci_controllers[] = {
 static int i2c_dw_pci_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
+
+	i_dev->disable(i_dev);
 
-	i2c_dw_disable(pci_get_drvdata(pdev));
 	return 0;
 }
 
 static int i2c_dw_pci_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
 
-	return i2c_dw_init(pci_get_drvdata(pdev));
+	return i_dev->init(i_dev);
 }
 #endif
 
@@ -296,7 +299,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
 {
 	struct dw_i2c_dev *dev = pci_get_drvdata(pdev);
 
-	i2c_dw_disable(dev);
+	dev->disable(dev);
 	pm_runtime_forbid(&pdev->dev);
 	pm_runtime_get_noresume(&pdev->dev);
 
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index fd14b410369e6..1f38c807be5f5 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -356,7 +356,7 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
 
 	i2c_del_adapter(&dev->adapter);
 
-	i2c_dw_disable(dev);
+	dev->disable(dev);
 
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
@@ -400,7 +400,7 @@ static int dw_i2c_plat_suspend(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev);
 
-	i2c_dw_disable(i_dev);
+	i_dev->disable(i_dev);
 	i2c_dw_plat_prepare_clk(i_dev, false);
 
 	return 0;
@@ -412,7 +412,7 @@ static int dw_i2c_plat_resume(struct device *dev)
 	struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev);
 
 	i2c_dw_plat_prepare_clk(i_dev, true);
-	i2c_dw_init(i_dev);
+	i_dev->init(i_dev);
 
 	return 0;
 }
-- 
GitLab


From 04606ccc84e3c799ede8ad19fe0409c5a4892cc4 Mon Sep 17 00:00:00 2001
From: Luis Oliveira <Luis.Oliveira@synopsys.com>
Date: Wed, 14 Jun 2017 11:43:24 +0100
Subject: [PATCH 0273/1958] i2c: designware: introducing I2C_SLAVE definitions

- Definitions were added to core library
- A example was added to designware-core.txt Documentation that shows
  how the slave can be setup using DTS

SLAVE related definitions were added to the core of the controller.

Signed-off-by: Luis Oliveira <lolivei@synopsys.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../bindings/i2c/i2c-designware.txt           | 16 ++++++++-
 drivers/i2c/busses/i2c-designware-core.h      | 33 +++++++++++++++++--
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/i2c/i2c-designware.txt b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
index fee26dc3e858e..fbb0a6d8b9643 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-designware.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
@@ -20,7 +20,7 @@ Optional properties :
  - i2c-sda-falling-time-ns : should contain the SDA falling time in nanoseconds.
    This value which is by default 300ns is used to compute the tHIGH period.
 
-Example :
+Examples :
 
 	i2c@f0000 {
 		#address-cells = <1>;
@@ -43,3 +43,17 @@ Example :
 		i2c-sda-falling-time-ns = <300>;
 		i2c-scl-falling-time-ns = <300>;
 	};
+
+	i2c@1120000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x2000 0x100>;
+		clock-frequency = <400000>;
+		clocks = <&i2cclk>;
+		interrupts = <0>;
+
+		eeprom@64 {
+			compatible = "linux,slave-24c02";
+			reg = <0x40000064>;
+		};
+	};
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 661cfa869ff7a..76807eafda537 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -1,5 +1,5 @@
 /*
- * Synopsys DesignWare I2C adapter driver (master only).
+ * Synopsys DesignWare I2C adapter driver.
  *
  * Based on the TI DAVINCI I2C adapter driver.
  *
@@ -37,15 +37,20 @@
 #define DW_IC_CON_SPEED_FAST		0x4
 #define DW_IC_CON_SPEED_HIGH		0x6
 #define DW_IC_CON_SPEED_MASK		0x6
+#define DW_IC_CON_10BITADDR_SLAVE		0x8
 #define DW_IC_CON_10BITADDR_MASTER	0x10
 #define DW_IC_CON_RESTART_EN		0x20
 #define DW_IC_CON_SLAVE_DISABLE		0x40
+#define DW_IC_CON_STOP_DET_IFADDRESSED		0x80
+#define DW_IC_CON_TX_EMPTY_CTRL		0x100
+#define DW_IC_CON_RX_FIFO_FULL_HLD_CTRL		0x200
 
 /*
  * Registers offset
  */
 #define DW_IC_CON		0x0
 #define DW_IC_TAR		0x4
+#define DW_IC_SAR		0x8
 #define DW_IC_DATA_CMD		0x10
 #define DW_IC_SS_SCL_HCNT	0x14
 #define DW_IC_SS_SCL_LCNT	0x18
@@ -76,6 +81,7 @@
 #define DW_IC_SDA_HOLD		0x7c
 #define DW_IC_TX_ABRT_SOURCE	0x80
 #define DW_IC_ENABLE_STATUS	0x9c
+#define DW_IC_CLR_RESTART_DET	0xa8
 #define DW_IC_COMP_PARAM_1	0xf4
 #define DW_IC_COMP_VERSION	0xf8
 #define DW_IC_SDA_HOLD_MIN_VERS	0x3131312A
@@ -94,15 +100,22 @@
 #define DW_IC_INTR_STOP_DET	0x200
 #define DW_IC_INTR_START_DET	0x400
 #define DW_IC_INTR_GEN_CALL	0x800
+#define DW_IC_INTR_RESTART_DET	0x1000
 
 #define DW_IC_INTR_DEFAULT_MASK		(DW_IC_INTR_RX_FULL | \
 					 DW_IC_INTR_TX_ABRT | \
 					 DW_IC_INTR_STOP_DET)
 #define DW_IC_INTR_MASTER_MASK		(DW_IC_INTR_DEFAULT_MASK | \
 					 DW_IC_INTR_TX_EMPTY)
+#define DW_IC_INTR_SLAVE_MASK		(DW_IC_INTR_DEFAULT_MASK | \
+					 DW_IC_INTR_RX_DONE | \
+					 DW_IC_INTR_RX_UNDER | \
+					 DW_IC_INTR_RD_REQ)
+
 #define DW_IC_STATUS_ACTIVITY		0x1
 #define DW_IC_STATUS_TFE		BIT(2)
 #define DW_IC_STATUS_MASTER_ACTIVITY	BIT(5)
+#define DW_IC_STATUS_SLAVE_ACTIVITY	BIT(6)
 
 #define DW_IC_SDA_HOLD_RX_SHIFT		16
 #define DW_IC_SDA_HOLD_RX_MASK		GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT)
@@ -115,7 +128,7 @@
 #define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK	GENMASK(3, 2)
 
 /*
- * Status codes
+ * status codes
  */
 #define STATUS_IDLE			0x0
 #define STATUS_WRITE_IN_PROGRESS	0x1
@@ -123,6 +136,12 @@
 
 #define TIMEOUT			20 /* ms */
 
+/*
+ * operation modes
+ */
+#define DW_IC_MASTER		0
+#define DW_IC_SLAVE		1
+
 /*
  * Hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
  *
@@ -140,6 +159,9 @@
 #define ABRT_10B_RD_NORSTRT	10
 #define ABRT_MASTER_DIS		11
 #define ARB_LOST		12
+#define ABRT_SLAVE_FLUSH_TXFIFO	13
+#define ABRT_SLAVE_ARBLOST	14
+#define ABRT_SLAVE_RD_INTX	15
 
 #define DW_IC_TX_ABRT_7B_ADDR_NOACK	(1UL << ABRT_7B_ADDR_NOACK)
 #define DW_IC_TX_ABRT_10ADDR1_NOACK	(1UL << ABRT_10ADDR1_NOACK)
@@ -152,6 +174,9 @@
 #define DW_IC_TX_ABRT_10B_RD_NORSTRT	(1UL << ABRT_10B_RD_NORSTRT)
 #define DW_IC_TX_ABRT_MASTER_DIS	(1UL << ABRT_MASTER_DIS)
 #define DW_IC_TX_ARB_LOST		(1UL << ARB_LOST)
+#define DW_IC_RX_ABRT_SLAVE_RD_INTX	(1UL << ABRT_SLAVE_RD_INTX)
+#define DW_IC_RX_ABRT_SLAVE_ARBLOST	(1UL << ABRT_SLAVE_ARBLOST)
+#define DW_IC_RX_ABRT_SLAVE_FLUSH_TXFIFO	(1UL << ABRT_SLAVE_FLUSH_TXFIFO)
 
 #define DW_IC_TX_ABRT_NOACK		(DW_IC_TX_ABRT_7B_ADDR_NOACK | \
 					 DW_IC_TX_ABRT_10ADDR1_NOACK | \
@@ -166,6 +191,7 @@
  * @base: IO registers pointer
  * @cmd_complete: tx completion indicator
  * @clk: input reference clock
+ * @slave: represent an I2C slave device
  * @cmd_err: run time hadware error code
  * @msgs: points to an array of messages currently being transferred
  * @msgs_num: the number of elements in msgs
@@ -182,6 +208,7 @@
  * @abort_source: copy of the TX_ABRT_SOURCE register
  * @irq: interrupt number for the i2c master
  * @adapter: i2c subsystem adapter node
+ * @slave_cfg: configuration for the slave device
  * @tx_fifo_depth: depth of the hardware tx fifo
  * @rx_fifo_depth: depth of the hardware rx fifo
  * @rx_outstanding: current master-rx elements in tx fifo
@@ -212,6 +239,7 @@ struct dw_i2c_dev {
 	struct completion	cmd_complete;
 	struct clk		*clk;
 	struct reset_control	*rst;
+	struct i2c_client		*slave;
 	u32			(*get_clk_rate_khz) (struct dw_i2c_dev *dev);
 	struct dw_pci_controller *controller;
 	int			cmd_err;
@@ -231,6 +259,7 @@ struct dw_i2c_dev {
 	struct i2c_adapter	adapter;
 	u32			functionality;
 	u32			master_cfg;
+	u32			slave_cfg;
 	unsigned int		tx_fifo_depth;
 	unsigned int		rx_fifo_depth;
 	int			rx_outstanding;
-- 
GitLab


From cdea46566bb21ce309725a024208322a409055cc Mon Sep 17 00:00:00 2001
From: Tony Camuso <tcamuso@redhat.com>
Date: Mon, 19 Jun 2017 13:17:33 -0400
Subject: [PATCH 0274/1958] ipmi: use rcu lock around call to
 intf->handlers->sender()

A vendor with a system having more than 128 CPUs occasionally encounters
the following crash during shutdown. This is not an easily reproduceable
event, but the vendor was able to provide the following analysis of the
crash, which exhibits the same footprint each time.

crash> bt
PID: 0      TASK: ffff88017c70ce70  CPU: 5   COMMAND: "swapper/5"
 #0 [ffff88085c143ac8] machine_kexec at ffffffff81059c8b
 #1 [ffff88085c143b28] __crash_kexec at ffffffff811052e2
 #2 [ffff88085c143bf8] crash_kexec at ffffffff811053d0
 #3 [ffff88085c143c10] oops_end at ffffffff8168ef88
 #4 [ffff88085c143c38] no_context at ffffffff8167ebb3
 #5 [ffff88085c143c88] __bad_area_nosemaphore at ffffffff8167ec49
 #6 [ffff88085c143cd0] bad_area_nosemaphore at ffffffff8167edb3
 #7 [ffff88085c143ce0] __do_page_fault at ffffffff81691d1e
 #8 [ffff88085c143d40] do_page_fault at ffffffff81691ec5
 #9 [ffff88085c143d70] page_fault at ffffffff8168e188
    [exception RIP: unknown or invalid address]
    RIP: ffffffffa053c800  RSP: ffff88085c143e28  RFLAGS: 00010206
    RAX: ffff88017c72bfd8  RBX: ffff88017a8dc000  RCX: ffff8810588b5ac8
    RDX: ffff8810588b5a00  RSI: ffffffffa053c800  RDI: ffff8810588b5a00
    RBP: ffff88085c143e58   R8: ffff88017c70d408   R9: ffff88017a8dc000
    R10: 0000000000000002  R11: ffff88085c143da0  R12: ffff8810588b5ac8
    R13: 0000000000000100  R14: ffffffffa053c800  R15: ffff8810588b5a00
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
    <IRQ stack>
    [exception RIP: cpuidle_enter_state+82]
    RIP: ffffffff81514192  RSP: ffff88017c72be50  RFLAGS: 00000202
    RAX: 0000001e4c3c6f16  RBX: 000000000000f8a0  RCX: 0000000000000018
    RDX: 0000000225c17d03  RSI: ffff88017c72bfd8  RDI: 0000001e4c3c6f16
    RBP: ffff88017c72be78   R8: 000000000000237e   R9: 0000000000000018
    R10: 0000000000002494  R11: 0000000000000001  R12: ffff88017c72be20
    R13: ffff88085c14f8e0  R14: 0000000000000082  R15: 0000001e4c3bb400
    ORIG_RAX: ffffffffffffff10  CS: 0010  SS: 0018

This is the corresponding stack trace

It has crashed because the area pointed with RIP extracted from timer
element is already removed during a shutdown process.

The function is smi_timeout().

And we think ffff8810588b5a00 in RDX is a parameter struct smi_info

crash> rd ffff8810588b5a00 20
ffff8810588b5a00:  ffff8810588b6000 0000000000000000   .`.X............
ffff8810588b5a10:  ffff880853264400 ffffffffa05417e0   .D&S......T.....
ffff8810588b5a20:  24a024a000000000 0000000000000000   .....$.$........
ffff8810588b5a30:  0000000000000000 0000000000000000   ................
ffff8810588b5a30:  0000000000000000 0000000000000000   ................
ffff8810588b5a40:  ffffffffa053a040 ffffffffa053a060   @.S.....`.S.....
ffff8810588b5a50:  0000000000000000 0000000100000001   ................
ffff8810588b5a60:  0000000000000000 0000000000000e00   ................
ffff8810588b5a70:  ffffffffa053a580 ffffffffa053a6e0   ..S.......S.....
ffff8810588b5a80:  ffffffffa053a4a0 ffffffffa053a250   ..S.....P.S.....
ffff8810588b5a90:  0000000500000002 0000000000000000   ................

Unfortunately the top of this area is already detroyed by someone.
But because of two reasonns we think this is struct smi_info
 1) The address included in between  ffff8810588b5a70 and ffff8810588b5a80:
  are inside of ipmi_si_intf.c  see crash> module ffff88085779d2c0

 2) We've found the area which point this.
  It is offset 0x68 of  ffff880859df4000

crash> rd  ffff880859df4000 100
ffff880859df4000:  0000000000000000 0000000000000001   ................
ffff880859df4010:  ffffffffa0535290 dead000000000200   .RS.............
ffff880859df4020:  ffff880859df4020 ffff880859df4020    @.Y.... @.Y....
ffff880859df4030:  0000000000000002 0000000000100010   ................
ffff880859df4040:  ffff880859df4040 ffff880859df4040   @@.Y....@@.Y....
ffff880859df4050:  0000000000000000 0000000000000000   ................
ffff880859df4060:  0000000000000000 ffff8810588b5a00   .........Z.X....
ffff880859df4070:  0000000000000001 ffff880859df4078   ........x@.Y....

 If we regards it as struct ipmi_smi in shutdown process
 it looks consistent.

The remedy for this apparent race is affixed below.

Signed-off-by: Tony Camuso <tcamuso@redhat.com>
Cc: stable@vger.kernel.org # 3.19

This was first introduced in 7ea0ed2b5be817 ipmi: Make the
message handler easier to use for SMI interfaces
where some code was moved outside of the rcu_read_lock()
and the lock was not added.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 9f699951b75aa..49a7e9685e77f 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3878,6 +3878,9 @@ static void smi_recv_tasklet(unsigned long val)
 	 * because the lower layer is allowed to hold locks while calling
 	 * message delivery.
 	 */
+
+	rcu_read_lock();
+
 	if (!run_to_completion)
 		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
 	if (intf->curr_msg == NULL && !intf->in_shutdown) {
@@ -3900,6 +3903,8 @@ static void smi_recv_tasklet(unsigned long val)
 	if (newmsg)
 		intf->handlers->sender(intf->send_info, newmsg);
 
+	rcu_read_unlock();
+
 	handle_new_recv_msgs(intf);
 }
 
-- 
GitLab


From 9f88145f1871456de67ae6a242ac2661187bd4ff Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 9 Jun 2017 21:19:52 -0500
Subject: [PATCH 0275/1958] ipmi: Create a platform device for a DMI-specified
 IPMI interface

Create a platform device for each IPMI device in the DMI table,
a separate kind of device for SSIF types and for KCS, BT, and
SMIC types.  This is so auto-loading IPMI devices will work
from just SMBIOS tables.

This also adds the ability to extract the slave address from
the SMBIOS tables, so that when the driver uses ACPI-specified
interfaces, it can still extract the slave address from SMBIOS.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Cc: Andy Lutomirski <luto@kernel.org>
---
 drivers/char/ipmi/Kconfig    |   4 +
 drivers/char/ipmi/Makefile   |   1 +
 drivers/char/ipmi/ipmi_dmi.c | 273 +++++++++++++++++++++++++++++++++++
 drivers/char/ipmi/ipmi_dmi.h |  12 ++
 4 files changed, 290 insertions(+)
 create mode 100644 drivers/char/ipmi/ipmi_dmi.c
 create mode 100644 drivers/char/ipmi/ipmi_dmi.h

diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 90f3edffb0677..f6fa056a52fcf 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -5,6 +5,7 @@
 menuconfig IPMI_HANDLER
        tristate 'IPMI top-level message handler'
        depends on HAS_IOMEM
+       select IPMI_DMI_DECODE if DMI
        help
          This enables the central IPMI message handler, required for IPMI
 	 to work.
@@ -16,6 +17,9 @@ menuconfig IPMI_HANDLER
 
 	 If unsure, say N.
 
+config IPMI_DMI_DECODE
+       bool
+
 if IPMI_HANDLER
 
 config IPMI_PANIC_EVENT
diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index 0d98cd91def1d..eefb0b301e836 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -7,6 +7,7 @@ ipmi_si-y := ipmi_si_intf.o ipmi_kcs_sm.o ipmi_smic_sm.o ipmi_bt_sm.o
 obj-$(CONFIG_IPMI_HANDLER) += ipmi_msghandler.o
 obj-$(CONFIG_IPMI_DEVICE_INTERFACE) += ipmi_devintf.o
 obj-$(CONFIG_IPMI_SI) += ipmi_si.o
+obj-$(CONFIG_IPMI_DMI_DECODE) += ipmi_dmi.o
 obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
 obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c
new file mode 100644
index 0000000000000..2a84401dea05b
--- /dev/null
+++ b/drivers/char/ipmi/ipmi_dmi.c
@@ -0,0 +1,273 @@
+/*
+ * A hack to create a platform device from a DMI entry.  This will
+ * allow autoloading of the IPMI drive based on SMBIOS entries.
+ */
+
+#include <linux/ipmi.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include "ipmi_dmi.h"
+
+struct ipmi_dmi_info {
+	int type;
+	u32 flags;
+	unsigned long addr;
+	u8 slave_addr;
+	struct ipmi_dmi_info *next;
+};
+
+static struct ipmi_dmi_info *ipmi_dmi_infos;
+
+static int ipmi_dmi_nr __initdata;
+
+static void __init dmi_add_platform_ipmi(unsigned long base_addr,
+					 u32 flags,
+					 u8 slave_addr,
+					 int irq,
+					 int offset,
+					 int type)
+{
+	struct platform_device *pdev;
+	struct resource r[4];
+	unsigned int num_r = 1, size;
+	struct property_entry p[4] = {
+		PROPERTY_ENTRY_U8("slave-addr", slave_addr),
+		PROPERTY_ENTRY_U8("ipmi-type", type),
+		PROPERTY_ENTRY_U16("i2c-addr", base_addr),
+		{ }
+	};
+	char *name, *override;
+	int rv;
+	struct ipmi_dmi_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		pr_warn("ipmi:dmi: Could not allocate dmi info\n");
+	} else {
+		info->type = type;
+		info->flags = flags;
+		info->addr = base_addr;
+		info->slave_addr = slave_addr;
+		info->next = ipmi_dmi_infos;
+		ipmi_dmi_infos = info;
+	}
+
+	name = "dmi-ipmi-si";
+	override = "ipmi_si";
+	switch (type) {
+	case IPMI_DMI_TYPE_SSIF:
+		name = "dmi-ipmi-ssif";
+		override = "ipmi_ssif";
+		offset = 1;
+		size = 1;
+		break;
+	case IPMI_DMI_TYPE_BT:
+		size = 3;
+		break;
+	case IPMI_DMI_TYPE_KCS:
+	case IPMI_DMI_TYPE_SMIC:
+		size = 2;
+		break;
+	default:
+		pr_err("ipmi:dmi: Invalid IPMI type: %d", type);
+		return;
+	}
+
+	pdev = platform_device_alloc(name, ipmi_dmi_nr);
+	if (!pdev) {
+		pr_err("ipmi:dmi: Error allocation IPMI platform device");
+		return;
+	}
+	pdev->driver_override = override;
+
+	if (type == IPMI_DMI_TYPE_SSIF)
+		goto add_properties;
+
+	memset(r, 0, sizeof(r));
+
+	r[0].start = base_addr;
+	r[0].end = r[0].start + offset - 1;
+	r[0].name = "IPMI Address 1";
+	r[0].flags = flags;
+
+	if (size > 1) {
+		r[1].start = r[0].start + offset;
+		r[1].end = r[1].start + offset - 1;
+		r[1].name = "IPMI Address 2";
+		r[1].flags = flags;
+		num_r++;
+	}
+
+	if (size > 2) {
+		r[2].start = r[1].start + offset;
+		r[2].end = r[2].start + offset - 1;
+		r[2].name = "IPMI Address 3";
+		r[2].flags = flags;
+		num_r++;
+	}
+
+	if (irq) {
+		r[num_r].start = irq;
+		r[num_r].end = irq;
+		r[num_r].name = "IPMI IRQ";
+		r[num_r].flags = IORESOURCE_IRQ;
+		num_r++;
+	}
+
+	rv = platform_device_add_resources(pdev, r, num_r);
+	if (rv) {
+		dev_err(&pdev->dev,
+			"ipmi:dmi: Unable to add resources: %d\n", rv);
+		goto err;
+	}
+
+add_properties:
+	rv = platform_device_add_properties(pdev, p);
+	if (rv) {
+		dev_err(&pdev->dev,
+			"ipmi:dmi: Unable to add properties: %d\n", rv);
+		goto err;
+	}
+
+	rv = platform_device_add(pdev);
+	if (rv) {
+		dev_err(&pdev->dev, "ipmi:dmi: Unable to add device: %d\n", rv);
+		goto err;
+	}
+
+	ipmi_dmi_nr++;
+	return;
+
+err:
+	platform_device_put(pdev);
+}
+
+/*
+ * Look up the slave address for a given interface.  This is here
+ * because ACPI doesn't have a slave address while SMBIOS does, but we
+ * prefer using ACPI so the ACPI code can use the IPMI namespace.
+ * This function allows an ACPI-specified IPMI device to look up the
+ * slave address from the DMI table.
+ */
+int ipmi_dmi_get_slave_addr(int type, u32 flags, unsigned long base_addr)
+{
+	struct ipmi_dmi_info *info = ipmi_dmi_infos;
+
+	while (info) {
+		if (info->type == type &&
+		    info->flags == flags &&
+		    info->addr == base_addr)
+			return info->slave_addr;
+		info = info->next;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ipmi_dmi_get_slave_addr);
+
+#define DMI_IPMI_MIN_LENGTH	0x10
+#define DMI_IPMI_VER2_LENGTH	0x12
+#define DMI_IPMI_TYPE		4
+#define DMI_IPMI_SLAVEADDR	6
+#define DMI_IPMI_ADDR		8
+#define DMI_IPMI_ACCESS		0x10
+#define DMI_IPMI_IRQ		0x11
+#define DMI_IPMI_IO_MASK	0xfffe
+
+static void __init dmi_decode_ipmi(const struct dmi_header *dm)
+{
+	const u8	*data = (const u8 *) dm;
+	u32             flags = IORESOURCE_IO;
+	unsigned long	base_addr;
+	u8              len = dm->length;
+	u8              slave_addr;
+	int             irq = 0, offset;
+	int             type;
+
+	if (len < DMI_IPMI_MIN_LENGTH)
+		return;
+
+	type = data[DMI_IPMI_TYPE];
+	slave_addr = data[DMI_IPMI_SLAVEADDR];
+
+	memcpy(&base_addr, data + DMI_IPMI_ADDR, sizeof(unsigned long));
+	if (len >= DMI_IPMI_VER2_LENGTH) {
+		if (type == IPMI_DMI_TYPE_SSIF) {
+			offset = 0;
+			flags = 0;
+			base_addr = data[DMI_IPMI_ADDR] >> 1;
+			if (base_addr == 0) {
+				/*
+				 * Some broken systems put the I2C address in
+				 * the slave address field.  We try to
+				 * accommodate them here.
+				 */
+				base_addr = data[DMI_IPMI_SLAVEADDR] >> 1;
+				slave_addr = 0;
+			}
+		} else {
+			if (base_addr & 1) {
+				/* I/O */
+				base_addr &= DMI_IPMI_IO_MASK;
+			} else {
+				/* Memory */
+				flags = IORESOURCE_MEM;
+			}
+
+			/*
+			 * If bit 4 of byte 0x10 is set, then the lsb
+			 * for the address is odd.
+			 */
+			base_addr |= (data[DMI_IPMI_ACCESS] >> 4) & 1;
+
+			irq = data[DMI_IPMI_IRQ];
+
+			/*
+			 * The top two bits of byte 0x10 hold the
+			 * register spacing.
+			 */
+			switch ((data[DMI_IPMI_ACCESS] >> 6) & 3) {
+			case 0: /* Byte boundaries */
+				offset = 1;
+				break;
+			case 1: /* 32-bit boundaries */
+				offset = 4;
+				break;
+			case 2: /* 16-byte boundaries */
+				offset = 16;
+				break;
+			default:
+				pr_err("ipmi:dmi: Invalid offset: 0");
+				return;
+			}
+		}
+	} else {
+		/* Old DMI spec. */
+		/*
+		 * Note that technically, the lower bit of the base
+		 * address should be 1 if the address is I/O and 0 if
+		 * the address is in memory.  So many systems get that
+		 * wrong (and all that I have seen are I/O) so we just
+		 * ignore that bit and assume I/O.  Systems that use
+		 * memory should use the newer spec, anyway.
+		 */
+		base_addr = base_addr & DMI_IPMI_IO_MASK;
+		offset = 1;
+	}
+
+	dmi_add_platform_ipmi(base_addr, flags, slave_addr, irq,
+			      offset, type);
+}
+
+static int __init scan_for_dmi_ipmi(void)
+{
+	const struct dmi_device *dev = NULL;
+
+	while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev)))
+		dmi_decode_ipmi((const struct dmi_header *) dev->device_data);
+
+	return 0;
+}
+subsys_initcall(scan_for_dmi_ipmi);
diff --git a/drivers/char/ipmi/ipmi_dmi.h b/drivers/char/ipmi/ipmi_dmi.h
new file mode 100644
index 0000000000000..0a1afe5ceb1ed
--- /dev/null
+++ b/drivers/char/ipmi/ipmi_dmi.h
@@ -0,0 +1,12 @@
+/*
+ * DMI defines for use by IPMI
+ */
+
+#define IPMI_DMI_TYPE_KCS	0x01
+#define IPMI_DMI_TYPE_SMIC	0x02
+#define IPMI_DMI_TYPE_BT	0x03
+#define IPMI_DMI_TYPE_SSIF	0x04
+
+#ifdef CONFIG_IPMI_DMI_DECODE
+int ipmi_dmi_get_slave_addr(int type, u32 flags, unsigned long base_addr);
+#endif
-- 
GitLab


From 0944d889a237b6107f9ceeee053fe7221cdd1089 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 3 Feb 2016 10:45:28 -0600
Subject: [PATCH 0276/1958] ipmi: Convert DMI handling over to a platform
 device

Now that the IPMI DMI code creates a platform device for IPMI devices
in the firmware, use that instead of handling all the DMI work
in the IPMI drivers themselves.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Cc: Andy Lutomirski <luto@kernel.org>
---
 drivers/char/ipmi/ipmi_si_intf.c | 261 ++++++++++++++-----------------
 drivers/char/ipmi/ipmi_ssif.c    | 162 ++++++++++++-------
 2 files changed, 219 insertions(+), 204 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index b89078bff1c16..edaf6284c6638 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -61,6 +61,7 @@
 #include <linux/ipmi_smi.h>
 #include <asm/io.h>
 #include "ipmi_si_sm.h"
+#include "ipmi_dmi.h"
 #include <linux/dmi.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
@@ -2273,136 +2274,105 @@ static void spmi_find_bmc(void)
 }
 #endif
 
-#ifdef CONFIG_DMI
-struct dmi_ipmi_data {
-	u8   		type;
-	u8   		addr_space;
-	unsigned long	base_addr;
-	u8   		irq;
-	u8              offset;
-	u8              slave_addr;
-};
-
-static int decode_dmi(const struct dmi_header *dm,
-				struct dmi_ipmi_data *dmi)
+#if defined(CONFIG_DMI) || defined(CONFIG_ACPI)
+struct resource *ipmi_get_info_from_resources(struct platform_device *pdev,
+					      struct smi_info *info)
 {
-	const u8	*data = (const u8 *)dm;
-	unsigned long  	base_addr;
-	u8		reg_spacing;
-	u8              len = dm->length;
-
-	dmi->type = data[4];
+	struct resource *res, *res_second;
 
-	memcpy(&base_addr, data+8, sizeof(unsigned long));
-	if (len >= 0x11) {
-		if (base_addr & 1) {
-			/* I/O */
-			base_addr &= 0xFFFE;
-			dmi->addr_space = IPMI_IO_ADDR_SPACE;
-		} else
-			/* Memory */
-			dmi->addr_space = IPMI_MEM_ADDR_SPACE;
-
-		/* If bit 4 of byte 0x10 is set, then the lsb for the address
-		   is odd. */
-		dmi->base_addr = base_addr | ((data[0x10] & 0x10) >> 4);
-
-		dmi->irq = data[0x11];
-
-		/* The top two bits of byte 0x10 hold the register spacing. */
-		reg_spacing = (data[0x10] & 0xC0) >> 6;
-		switch (reg_spacing) {
-		case 0x00: /* Byte boundaries */
-		    dmi->offset = 1;
-		    break;
-		case 0x01: /* 32-bit boundaries */
-		    dmi->offset = 4;
-		    break;
-		case 0x02: /* 16-byte boundaries */
-		    dmi->offset = 16;
-		    break;
-		default:
-		    /* Some other interface, just ignore it. */
-		    return -EIO;
-		}
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res) {
+		info->io_setup = port_setup;
+		info->io.addr_type = IPMI_IO_ADDR_SPACE;
 	} else {
-		/* Old DMI spec. */
-		/*
-		 * Note that technically, the lower bit of the base
-		 * address should be 1 if the address is I/O and 0 if
-		 * the address is in memory.  So many systems get that
-		 * wrong (and all that I have seen are I/O) so we just
-		 * ignore that bit and assume I/O.  Systems that use
-		 * memory should use the newer spec, anyway.
-		 */
-		dmi->base_addr = base_addr & 0xfffe;
-		dmi->addr_space = IPMI_IO_ADDR_SPACE;
-		dmi->offset = 1;
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (res) {
+			info->io_setup = mem_setup;
+			info->io.addr_type = IPMI_MEM_ADDR_SPACE;
+		}
 	}
+	if (!res) {
+		dev_err(&pdev->dev, "no I/O or memory address\n");
+		return NULL;
+	}
+	info->io.addr_data = res->start;
 
-	dmi->slave_addr = data[6];
+	info->io.regspacing = DEFAULT_REGSPACING;
+	res_second = platform_get_resource(pdev,
+			       (info->io.addr_type == IPMI_IO_ADDR_SPACE) ?
+					IORESOURCE_IO : IORESOURCE_MEM,
+			       1);
+	if (res_second) {
+		if (res_second->start > info->io.addr_data)
+			info->io.regspacing =
+				res_second->start - info->io.addr_data;
+	}
+	info->io.regsize = DEFAULT_REGSIZE;
+	info->io.regshift = 0;
 
-	return 0;
+	return res;
 }
 
-static void try_init_dmi(struct dmi_ipmi_data *ipmi_data)
+#endif
+
+#ifdef CONFIG_DMI
+static int dmi_ipmi_probe(struct platform_device *pdev)
 {
 	struct smi_info *info;
+	u8 type, slave_addr;
+	int rv;
+
+	if (!si_trydmi)
+		return -ENODEV;
+
+	rv = device_property_read_u8(&pdev->dev, "ipmi-type", &type);
+	if (rv)
+		return -ENODEV;
 
 	info = smi_info_alloc();
 	if (!info) {
 		pr_err(PFX "Could not allocate SI data\n");
-		return;
+		return -ENOMEM;
 	}
 
 	info->addr_source = SI_SMBIOS;
 	pr_info(PFX "probing via SMBIOS\n");
 
-	switch (ipmi_data->type) {
-	case 0x01: /* KCS */
+	switch (type) {
+	case IPMI_DMI_TYPE_KCS:
 		info->si_type = SI_KCS;
 		break;
-	case 0x02: /* SMIC */
+	case IPMI_DMI_TYPE_SMIC:
 		info->si_type = SI_SMIC;
 		break;
-	case 0x03: /* BT */
+	case IPMI_DMI_TYPE_BT:
 		info->si_type = SI_BT;
 		break;
 	default:
 		kfree(info);
-		return;
+		return -EINVAL;
 	}
 
-	switch (ipmi_data->addr_space) {
-	case IPMI_MEM_ADDR_SPACE:
-		info->io_setup = mem_setup;
-		info->io.addr_type = IPMI_MEM_ADDR_SPACE;
-		break;
-
-	case IPMI_IO_ADDR_SPACE:
-		info->io_setup = port_setup;
-		info->io.addr_type = IPMI_IO_ADDR_SPACE;
-		break;
-
-	default:
-		kfree(info);
-		pr_warn(PFX "Unknown SMBIOS I/O Address type: %d\n",
-			ipmi_data->addr_space);
-		return;
+	if (!ipmi_get_info_from_resources(pdev, info)) {
+		rv = -EINVAL;
+		goto err_free;
 	}
-	info->io.addr_data = ipmi_data->base_addr;
 
-	info->io.regspacing = ipmi_data->offset;
-	if (!info->io.regspacing)
-		info->io.regspacing = DEFAULT_REGSPACING;
-	info->io.regsize = DEFAULT_REGSIZE;
-	info->io.regshift = 0;
-
-	info->slave_addr = ipmi_data->slave_addr;
+	rv = device_property_read_u8(&pdev->dev, "slave-addr", &slave_addr);
+	if (rv) {
+		dev_warn(&pdev->dev, "device has no slave-addr property");
+		info->slave_addr = 0x20;
+	} else {
+		info->slave_addr = slave_addr;
+	}
 
-	info->irq = ipmi_data->irq;
-	if (info->irq)
+	info->irq = platform_get_irq(pdev, 0);
+	if (info->irq > 0)
 		info->irq_setup = std_irq_setup;
+	else
+		info->irq = 0;
+
+	info->dev = &pdev->dev;
 
 	pr_info("ipmi_si: SMBIOS: %s %#lx regsize %d spacing %d irq %d\n",
 		(info->io.addr_type == IPMI_IO_ADDR_SPACE) ? "io" : "mem",
@@ -2411,21 +2381,17 @@ static void try_init_dmi(struct dmi_ipmi_data *ipmi_data)
 
 	if (add_smi(info))
 		kfree(info);
-}
 
-static void dmi_find_bmc(void)
-{
-	const struct dmi_device *dev = NULL;
-	struct dmi_ipmi_data data;
-	int                  rv;
+	return 0;
 
-	while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev))) {
-		memset(&data, 0, sizeof(data));
-		rv = decode_dmi((const struct dmi_header *) dev->device_data,
-				&data);
-		if (!rv)
-			try_init_dmi(&data);
-	}
+err_free:
+	kfree(info);
+	return rv;
+}
+#else
+static int dmi_ipmi_probe(struct platform_device *pdev)
+{
+	return -ENODEV;
 }
 #endif /* CONFIG_DMI */
 
@@ -2684,17 +2650,47 @@ static int of_ipmi_probe(struct platform_device *dev)
 #endif
 
 #ifdef CONFIG_ACPI
+static int find_slave_address(struct smi_info *info, int slave_addr)
+{
+#ifdef CONFIG_IPMI_DMI_DECODE
+	if (!slave_addr) {
+		int type = -1;
+		u32 flags = IORESOURCE_IO;
+
+		switch (info->si_type) {
+		case SI_KCS:
+			type = IPMI_DMI_TYPE_KCS;
+			break;
+		case SI_BT:
+			type = IPMI_DMI_TYPE_BT;
+			break;
+		case SI_SMIC:
+			type = IPMI_DMI_TYPE_SMIC;
+			break;
+		}
+
+		if (info->io.addr_type == IPMI_MEM_ADDR_SPACE)
+			flags = IORESOURCE_MEM;
+
+		slave_addr = ipmi_dmi_get_slave_addr(type, flags,
+						     info->io.addr_data);
+	}
+#endif
+
+	return slave_addr;
+}
+
 static int acpi_ipmi_probe(struct platform_device *dev)
 {
 	struct smi_info *info;
-	struct resource *res, *res_second;
 	acpi_handle handle;
 	acpi_status status;
 	unsigned long long tmp;
+	struct resource *res;
 	int rv = -EINVAL;
 
 	if (!si_tryacpi)
-	       return 0;
+		return -ENODEV;
 
 	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle)
@@ -2734,35 +2730,11 @@ static int acpi_ipmi_probe(struct platform_device *dev)
 		goto err_free;
 	}
 
-	res = platform_get_resource(dev, IORESOURCE_IO, 0);
-	if (res) {
-		info->io_setup = port_setup;
-		info->io.addr_type = IPMI_IO_ADDR_SPACE;
-	} else {
-		res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-		if (res) {
-			info->io_setup = mem_setup;
-			info->io.addr_type = IPMI_MEM_ADDR_SPACE;
-		}
-	}
+	res = ipmi_get_info_from_resources(dev, info);
 	if (!res) {
-		dev_err(&dev->dev, "no I/O or memory address\n");
+		rv = -EINVAL;
 		goto err_free;
 	}
-	info->io.addr_data = res->start;
-
-	info->io.regspacing = DEFAULT_REGSPACING;
-	res_second = platform_get_resource(dev,
-			       (info->io.addr_type == IPMI_IO_ADDR_SPACE) ?
-					IORESOURCE_IO : IORESOURCE_MEM,
-			       1);
-	if (res_second) {
-		if (res_second->start > info->io.addr_data)
-			info->io.regspacing =
-				res_second->start - info->io.addr_data;
-	}
-	info->io.regsize = DEFAULT_REGSIZE;
-	info->io.regshift = 0;
 
 	/* If _GPE exists, use it; otherwise use standard interrupts */
 	status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp);
@@ -2778,6 +2750,8 @@ static int acpi_ipmi_probe(struct platform_device *dev)
 		}
 	}
 
+	info->slave_addr = find_slave_address(info, info->slave_addr);
+
 	info->dev = &dev->dev;
 	platform_set_drvdata(dev, info);
 
@@ -2813,7 +2787,10 @@ static int ipmi_probe(struct platform_device *dev)
 	if (of_ipmi_probe(dev) == 0)
 		return 0;
 
-	return acpi_ipmi_probe(dev);
+	if (acpi_ipmi_probe(dev) == 0)
+		return 0;
+
+	return dmi_ipmi_probe(dev);
 }
 
 static int ipmi_remove(struct platform_device *dev)
@@ -3786,11 +3763,6 @@ static int init_ipmi_si(void)
 	}
 #endif
 
-#ifdef CONFIG_DMI
-	if (si_trydmi)
-		dmi_find_bmc();
-#endif
-
 #ifdef CONFIG_ACPI
 	if (si_tryacpi)
 		spmi_find_bmc();
@@ -3938,6 +3910,7 @@ static void cleanup_ipmi_si(void)
 }
 module_exit(cleanup_ipmi_si);
 
+MODULE_ALIAS("platform:dmi-ipmi-si");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Corey Minyard <minyard@mvista.com>");
 MODULE_DESCRIPTION("Interface to the IPMI driver for the KCS, SMIC, and BT"
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 05e18041c357e..61434830e641f 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -53,6 +53,7 @@
 #include <linux/acpi.h>
 #include <linux/ctype.h>
 #include <linux/time64.h>
+#include "ipmi_dmi.h"
 
 #define PFX "ipmi_ssif: "
 #define DEVICE_NAME "ipmi_ssif"
@@ -180,6 +181,8 @@ struct ssif_addr_info {
 	int slave_addr;
 	enum ipmi_addr_src addr_src;
 	union ipmi_smi_info_union addr_info;
+	struct device *dev;
+	struct i2c_client *client;
 
 	struct mutex clients_mutex;
 	struct list_head clients;
@@ -1171,6 +1174,7 @@ static LIST_HEAD(ssif_infos);
 static int ssif_remove(struct i2c_client *client)
 {
 	struct ssif_info *ssif_info = i2c_get_clientdata(client);
+	struct ssif_addr_info *addr_info;
 	int rv;
 
 	if (!ssif_info)
@@ -1198,6 +1202,13 @@ static int ssif_remove(struct i2c_client *client)
 		kthread_stop(ssif_info->thread);
 	}
 
+	list_for_each_entry(addr_info, &ssif_infos, link) {
+		if (addr_info->client == client) {
+			addr_info->client = NULL;
+			break;
+		}
+	}
+
 	/*
 	 * No message can be outstanding now, we have removed the
 	 * upper layer and it permitted us to do so.
@@ -1406,27 +1417,13 @@ static bool check_acpi(struct ssif_info *ssif_info, struct device *dev)
 
 static int find_slave_address(struct i2c_client *client, int slave_addr)
 {
-	struct ssif_addr_info *info;
-
-	if (slave_addr)
-		return slave_addr;
-
-	/*
-	 * Came in without a slave address, search around to see if
-	 * the other sources have a slave address.  This lets us pick
-	 * up an SMBIOS slave address when using ACPI.
-	 */
-	list_for_each_entry(info, &ssif_infos, link) {
-		if (info->binfo.addr != client->addr)
-			continue;
-		if (info->adapter_name && strcmp_nospace(info->adapter_name,
-				   client->adapter->name))
-			continue;
-		if (info->slave_addr) {
-			slave_addr = info->slave_addr;
-			break;
-		}
-	}
+#ifdef CONFIG_IPMI_DMI_DECODE
+	if (!slave_addr)
+		slave_addr = ipmi_dmi_get_slave_addr(
+			IPMI_DMI_TYPE_SSIF,
+			i2c_adapter_id(client->adapter),
+			client->addr);
+#endif
 
 	return slave_addr;
 }
@@ -1448,7 +1445,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	u8		  slave_addr = 0;
 	struct ssif_addr_info *addr_info = NULL;
 
-
 	resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
 	if (!resp)
 		return -ENOMEM;
@@ -1469,6 +1465,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			ssif_info->addr_source = addr_info->addr_src;
 			ssif_info->ssif_debug = addr_info->debug;
 			ssif_info->addr_info = addr_info->addr_info;
+			addr_info->client = client;
 			slave_addr = addr_info->slave_addr;
 		}
 	}
@@ -1707,8 +1704,19 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	}
 
  out:
-	if (rv)
+	if (rv) {
+		/*
+		 * Note that if addr_info->client is assigned, we
+		 * leave it.  The i2c client hangs around even if we
+		 * return a failure here, and the failure here is not
+		 * propagated back to the i2c code.  This seems to be
+		 * design intent, strange as it may be.  But if we
+		 * don't leave it, ssif_platform_remove will not remove
+		 * the client like it should.
+		 */
+		dev_err(&client->dev, "Unable to start IPMI SSIF: %d\n", rv);
 		kfree(ssif_info);
+	}
 	kfree(resp);
 	return rv;
 
@@ -1733,7 +1741,8 @@ static int ssif_adapter_handler(struct device *adev, void *opaque)
 
 static int new_ssif_client(int addr, char *adapter_name,
 			   int debug, int slave_addr,
-			   enum ipmi_addr_src addr_src)
+			   enum ipmi_addr_src addr_src,
+			   struct device *dev)
 {
 	struct ssif_addr_info *addr_info;
 	int rv = 0;
@@ -1766,6 +1775,9 @@ static int new_ssif_client(int addr, char *adapter_name,
 	addr_info->debug = debug;
 	addr_info->slave_addr = slave_addr;
 	addr_info->addr_src = addr_src;
+	addr_info->dev = dev;
+
+	dev_set_drvdata(dev, addr_info);
 
 	list_add_tail(&addr_info->link, &ssif_infos);
 
@@ -1904,7 +1916,7 @@ static int try_init_spmi(struct SPMITable *spmi)
 
 	myaddr = spmi->addr.address & 0x7f;
 
-	return new_ssif_client(myaddr, NULL, 0, 0, SI_SPMI);
+	return new_ssif_client(myaddr, NULL, 0, 0, SI_SPMI, NULL);
 }
 
 static void spmi_find_bmc(void)
@@ -1933,48 +1945,40 @@ static void spmi_find_bmc(void) { }
 #endif
 
 #ifdef CONFIG_DMI
-static int decode_dmi(const struct dmi_device *dmi_dev)
+static int dmi_ipmi_probe(struct platform_device *pdev)
 {
-	struct dmi_header *dm = dmi_dev->device_data;
-	u8             *data = (u8 *) dm;
-	u8             len = dm->length;
-	unsigned short myaddr;
-	int            slave_addr;
+	u8 type, slave_addr = 0;
+	u16 i2c_addr;
+	int rv;
 
-	if (num_addrs >= MAX_SSIF_BMCS)
-		return -1;
+	if (!ssif_trydmi)
+		return -ENODEV;
 
-	if (len < 9)
-		return -1;
+	rv = device_property_read_u8(&pdev->dev, "ipmi-type", &type);
+	if (rv)
+		return -ENODEV;
 
-	if (data[0x04] != 4) /* Not SSIF */
-		return -1;
+	if (type != IPMI_DMI_TYPE_SSIF)
+		return -ENODEV;
 
-	if ((data[8] >> 1) == 0) {
-		/*
-		 * Some broken systems put the I2C address in
-		 * the slave address field.  We try to
-		 * accommodate them here.
-		 */
-		myaddr = data[6] >> 1;
-		slave_addr = 0;
-	} else {
-		myaddr = data[8] >> 1;
-		slave_addr = data[6];
+	rv = device_property_read_u16(&pdev->dev, "i2c-addr", &i2c_addr);
+	if (rv) {
+		dev_warn(&pdev->dev, PFX "No i2c-addr property\n");
+		return -ENODEV;
 	}
 
-	return new_ssif_client(myaddr, NULL, 0, slave_addr, SI_SMBIOS);
-}
-
-static void dmi_iterator(void)
-{
-	const struct dmi_device *dev = NULL;
+	rv = device_property_read_u8(&pdev->dev, "slave-addr", &slave_addr);
+	if (rv)
+		dev_warn(&pdev->dev, "device has no slave-addr property");
 
-	while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev)))
-		decode_dmi(dev);
+	return new_ssif_client(i2c_addr, NULL, 0,
+			       slave_addr, SI_SMBIOS, &pdev->dev);
 }
 #else
-static void dmi_iterator(void) { }
+static int dmi_ipmi_probe(struct platform_device *pdev)
+{
+	return -ENODEV;
+}
 #endif
 
 static const struct i2c_device_id ssif_id[] = {
@@ -1995,6 +1999,36 @@ static struct i2c_driver ssif_i2c_driver = {
 	.detect		= ssif_detect
 };
 
+static int ssif_platform_probe(struct platform_device *dev)
+{
+	return dmi_ipmi_probe(dev);
+}
+
+static int ssif_platform_remove(struct platform_device *dev)
+{
+	struct ssif_addr_info *addr_info = dev_get_drvdata(&dev->dev);
+
+	if (!addr_info)
+		return 0;
+
+	mutex_lock(&ssif_infos_mutex);
+	if (addr_info->client)
+		i2c_unregister_device(addr_info->client);
+
+	list_del(&addr_info->link);
+	kfree(addr_info);
+	mutex_unlock(&ssif_infos_mutex);
+	return 0;
+}
+
+static struct platform_driver ipmi_driver = {
+	.driver = {
+		.name = DEVICE_NAME,
+	},
+	.probe		= ssif_platform_probe,
+	.remove		= ssif_platform_remove,
+};
+
 static int init_ipmi_ssif(void)
 {
 	int i;
@@ -2009,7 +2043,7 @@ static int init_ipmi_ssif(void)
 	for (i = 0; i < num_addrs; i++) {
 		rv = new_ssif_client(addr[i], adapter_name[i],
 				     dbg[i], slave_addrs[i],
-				     SI_HARDCODED);
+				     SI_HARDCODED, NULL);
 		if (rv)
 			pr_err(PFX
 			       "Couldn't add hardcoded device at addr 0x%x\n",
@@ -2019,11 +2053,16 @@ static int init_ipmi_ssif(void)
 	if (ssif_tryacpi)
 		ssif_i2c_driver.driver.acpi_match_table	=
 			ACPI_PTR(ssif_acpi_match);
-	if (ssif_trydmi)
-		dmi_iterator();
+
 	if (ssif_tryacpi)
 		spmi_find_bmc();
 
+	if (ssif_trydmi) {
+		rv = platform_driver_register(&ipmi_driver);
+		if (rv)
+			pr_err(PFX "Unable to register driver: %d\n", rv);
+	}
+
 	ssif_i2c_driver.address_list = ssif_address_list();
 
 	rv = i2c_add_driver(&ssif_i2c_driver);
@@ -2043,10 +2082,13 @@ static void cleanup_ipmi_ssif(void)
 
 	i2c_del_driver(&ssif_i2c_driver);
 
+	platform_driver_unregister(&ipmi_driver);
+
 	free_ssif_clients();
 }
 module_exit(cleanup_ipmi_ssif);
 
+MODULE_ALIAS("platform:dmi-ipmi-ssif");
 MODULE_AUTHOR("Todd C Davis <todd.c.davis@intel.com>, Corey Minyard <minyard@acm.org>");
 MODULE_DESCRIPTION("IPMI driver for management controllers on a SMBus");
 MODULE_LICENSE("GPL");
-- 
GitLab


From bdca8cc096203b17ad0ac4e19f50578207e054d2 Mon Sep 17 00:00:00 2001
From: Nagarathnam Muthusamy <nagarathnam.muthusamy@oracle.com>
Date: Mon, 19 Jun 2017 13:08:48 -0400
Subject: [PATCH 0277/1958] Adding asm-prototypes.h for genksyms to generate
 crc

This patch adds the prototypes of assembly defined functions to asm-prototypes.h.
Some prototypes are directly added as they are not present in any existing header
files.

Signed-off-by: Nagarathnam Muthusamy <nagarathnam.muthusamy@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/asm-prototypes.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 arch/sparc/include/asm/asm-prototypes.h

diff --git a/arch/sparc/include/asm/asm-prototypes.h b/arch/sparc/include/asm/asm-prototypes.h
new file mode 100644
index 0000000000000..d381e11c5dbbf
--- /dev/null
+++ b/arch/sparc/include/asm/asm-prototypes.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <asm/xor.h>
+#include <asm/checksum.h>
+#include <asm/trap_block.h>
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+#include <asm/ftrace.h>
+#include <asm/cacheflush.h>
+#include <asm/oplib.h>
+#include <linux/atomic.h>
+
+void *__memscan_zero(void *, size_t);
+void *__memscan_generic(void *, int, size_t);
+void *__bzero(void *, size_t);
+void VISenter(void); /* Dummy prototype to supress warning */
+#undef memcpy
+#undef memset
+void *memcpy(void *dest, const void *src, size_t n);
+void *memset(void *s, int c, size_t n);
+typedef int TItype __attribute__((mode(TI)));
+TItype __multi3(TItype a, TItype b);
-- 
GitLab


From d16c0649feb4fe4e814f44803df5a617769c3233 Mon Sep 17 00:00:00 2001
From: Nagarathnam Muthusamy <nagarathnam.muthusamy@oracle.com>
Date: Mon, 19 Jun 2017 13:08:49 -0400
Subject: [PATCH 0278/1958] sed regex in Makefile.build requires line break
 between exported symbols

The following regex in Makefile.build matches only one ___EXPORT_SYMBOL per line.

sed
's/.*___EXPORT_SYMBOL[[:space:]]*\([a-zA-Z0-9_]*\)[[:space:]]*,.*/EXPORT_SYMBOL(\1);/'

ATOMIC_OPS macro in atomic_64.S expands multiple symbols in same line hence
version generation is done only for the last matched symbol. This patch adds
new line between the symbol expansions.

Signed-off-by: Nagarathnam Muthusamy <nagarathnam.muthusamy@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/lib/atomic_64.S | 44 ++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 1c6a1bde51388..ce17c3094ba6d 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -62,19 +62,23 @@ ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
 ENDPROC(atomic_fetch_##op);						\
 EXPORT_SYMBOL(atomic_fetch_##op);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
+ATOMIC_OP(add)
+ATOMIC_OP_RETURN(add)
+ATOMIC_FETCH_OP(add)
 
-ATOMIC_OPS(add)
-ATOMIC_OPS(sub)
+ATOMIC_OP(sub)
+ATOMIC_OP_RETURN(sub)
+ATOMIC_FETCH_OP(sub)
 
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+ATOMIC_OP(and)
+ATOMIC_FETCH_OP(and)
 
-ATOMIC_OPS(and)
-ATOMIC_OPS(or)
-ATOMIC_OPS(xor)
+ATOMIC_OP(or)
+ATOMIC_FETCH_OP(or)
+
+ATOMIC_OP(xor)
+ATOMIC_FETCH_OP(xor)
 
-#undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -124,19 +128,23 @@ ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
 ENDPROC(atomic64_fetch_##op);						\
 EXPORT_SYMBOL(atomic64_fetch_##op);
 
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
+ATOMIC64_OP(add)
+ATOMIC64_OP_RETURN(add)
+ATOMIC64_FETCH_OP(add)
+
+ATOMIC64_OP(sub)
+ATOMIC64_OP_RETURN(sub)
+ATOMIC64_FETCH_OP(sub)
 
-ATOMIC64_OPS(add)
-ATOMIC64_OPS(sub)
+ATOMIC64_OP(and)
+ATOMIC64_FETCH_OP(and)
 
-#undef ATOMIC64_OPS
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+ATOMIC64_OP(or)
+ATOMIC64_FETCH_OP(or)
 
-ATOMIC64_OPS(and)
-ATOMIC64_OPS(or)
-ATOMIC64_OPS(xor)
+ATOMIC64_OP(xor)
+ATOMIC64_FETCH_OP(xor)
 
-#undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
-- 
GitLab


From f5a651f1d5e524cab345250a783702fb6a3f14d6 Mon Sep 17 00:00:00 2001
From: Nagarathnam Muthusamy <nagarathnam.muthusamy@oracle.com>
Date: Mon, 19 Jun 2017 13:08:50 -0400
Subject: [PATCH 0279/1958] Adding the type of exported symbols

Missing symbol type for few functions prevents genksyms from generating
symbol versions for those functions. This patch fixes them.

Signed-off-by: Nagarathnam Muthusamy <nagarathnam.muthusamy@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/lib/checksum_64.S | 1 +
 arch/sparc/lib/csum_copy.S   | 1 +
 arch/sparc/lib/memscan_64.S  | 2 ++
 arch/sparc/lib/memset.S      | 1 +
 4 files changed, 5 insertions(+)

diff --git a/arch/sparc/lib/checksum_64.S b/arch/sparc/lib/checksum_64.S
index f6732174fe6bd..6cfa521f444d1 100644
--- a/arch/sparc/lib/checksum_64.S
+++ b/arch/sparc/lib/checksum_64.S
@@ -38,6 +38,7 @@ csum_partial_fix_alignment:
 
 	.align		32
 	.globl		csum_partial
+	.type		csum_partial,#function
 	EXPORT_SYMBOL(csum_partial)
 csum_partial:		/* %o0=buff, %o1=len, %o2=sum */
 	prefetch	[%o0 + 0x000], #n_reads
diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S
index 0ecbafc30fd00..b1051e77c49ac 100644
--- a/arch/sparc/lib/csum_copy.S
+++ b/arch/sparc/lib/csum_copy.S
@@ -65,6 +65,7 @@
 	 add		%o5, %o4, %o4
 
 	.globl		FUNC_NAME
+	.type		FUNC_NAME,#function
 	EXPORT_SYMBOL(FUNC_NAME)
 FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len, %o3=sum */
 	LOAD(prefetch, %o0 + 0x000, #n_reads)
diff --git a/arch/sparc/lib/memscan_64.S b/arch/sparc/lib/memscan_64.S
index daa96f4b03e60..5efee1f4be366 100644
--- a/arch/sparc/lib/memscan_64.S
+++ b/arch/sparc/lib/memscan_64.S
@@ -14,6 +14,8 @@
 	.text
 	.align	32
 	.globl		__memscan_zero, __memscan_generic
+	.type		__memscan_zero,#function
+	.type		__memscan_generic,#function
 	.globl		memscan
 	EXPORT_SYMBOL(__memscan_zero)
 	EXPORT_SYMBOL(__memscan_generic)
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
index bb539b42b088a..e23338dbfc43a 100644
--- a/arch/sparc/lib/memset.S
+++ b/arch/sparc/lib/memset.S
@@ -63,6 +63,7 @@
 __bzero_begin:
 
 	.globl	__bzero
+	.type	__bzero,#function
 	.globl	memset
 	EXPORT_SYMBOL(__bzero)
 	EXPORT_SYMBOL(memset)
-- 
GitLab


From f64622167f4aa5124fed264d481509829d34e126 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 24 May 2017 19:31:06 +0530
Subject: [PATCH 0280/1958] i2c: emev2: Handle return value of
 clk_prepare_enable

clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-emev2.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c
index 3129127088542..d2e84480fbe96 100644
--- a/drivers/i2c/busses/i2c-emev2.c
+++ b/drivers/i2c/busses/i2c-emev2.c
@@ -375,7 +375,9 @@ static int em_i2c_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->sclk))
 		return PTR_ERR(priv->sclk);
 
-	clk_prepare_enable(priv->sclk);
+	ret = clk_prepare_enable(priv->sclk);
+	if (ret)
+		return ret;
 
 	priv->adap.timeout = msecs_to_jiffies(100);
 	priv->adap.retries = 5;
-- 
GitLab


From 5a4c73342ad493c61f19a1406f47dcd35e18030f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:04 -0700
Subject: [PATCH 0281/1958] xfs: optimize _btree_query_all

Don't bother wandering our way through the leaf nodes when the caller
issues a query_all; just zoom down the left side of the tree and walk
rightwards along level zero.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_btree.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 3a673ba201aae..d50517942273e 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4849,12 +4849,14 @@ xfs_btree_query_all(
 	xfs_btree_query_range_fn	fn,
 	void				*priv)
 {
-	union xfs_btree_irec		low_rec;
-	union xfs_btree_irec		high_rec;
+	union xfs_btree_key		low_key;
+	union xfs_btree_key		high_key;
+
+	memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
+	memset(&low_key, 0, sizeof(low_key));
+	memset(&high_key, 0xFF, sizeof(high_key));
 
-	memset(&low_rec, 0, sizeof(low_rec));
-	memset(&high_rec, 0xFF, sizeof(high_rec));
-	return xfs_btree_query_range(cur, &low_rec, &high_rec, fn, priv);
+	return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
 }
 
 /*
-- 
GitLab


From c8ce540db5f67d254aafb14b5d76422c62a906df Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:05 -0700
Subject: [PATCH 0282/1958] xfs: remove double-underscore integer types

This is a purely mechanical patch that removes the private
__{u,}int{8,16,32,64}_t typedefs in favor of using the system
{u,}int{8,16,32,64}_t typedefs.  This is the sed script used to perform
the transformation and fix the resulting whitespace and indentation
errors:

s/typedef\t__uint8_t/typedef __uint8_t\t/g
s/typedef\t__uint/typedef __uint/g
s/typedef\t__int\([0-9]*\)_t/typedef int\1_t\t/g
s/__uint8_t\t/__uint8_t\t\t/g
s/__uint/uint/g
s/__int\([0-9]*\)_t\t/__int\1_t\t\t/g
s/__int/int/g
/^typedef.*int[0-9]*_t;$/d

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_alloc_btree.c    |  20 +--
 fs/xfs/libxfs/xfs_attr_remote.c    |   8 +-
 fs/xfs/libxfs/xfs_attr_sf.h        |  10 +-
 fs/xfs/libxfs/xfs_bit.h            |  24 +--
 fs/xfs/libxfs/xfs_bmap_btree.c     |   8 +-
 fs/xfs/libxfs/xfs_btree.c          |  22 +--
 fs/xfs/libxfs/xfs_btree.h          |  18 +-
 fs/xfs/libxfs/xfs_cksum.h          |  16 +-
 fs/xfs/libxfs/xfs_da_btree.c       |   2 +-
 fs/xfs/libxfs/xfs_da_btree.h       |   8 +-
 fs/xfs/libxfs/xfs_da_format.c      |  28 ++--
 fs/xfs/libxfs/xfs_da_format.h      |  64 ++++----
 fs/xfs/libxfs/xfs_dir2.h           |   8 +-
 fs/xfs/libxfs/xfs_dir2_leaf.c      |  12 +-
 fs/xfs/libxfs/xfs_dir2_priv.h      |   2 +-
 fs/xfs/libxfs/xfs_dir2_sf.c        |   2 +-
 fs/xfs/libxfs/xfs_format.h         | 112 ++++++-------
 fs/xfs/libxfs/xfs_fs.h             |  12 +-
 fs/xfs/libxfs/xfs_ialloc.c         |   6 +-
 fs/xfs/libxfs/xfs_ialloc_btree.c   |   4 +-
 fs/xfs/libxfs/xfs_inode_buf.c      |   2 +-
 fs/xfs/libxfs/xfs_inode_buf.h      |  28 ++--
 fs/xfs/libxfs/xfs_log_format.h     | 256 ++++++++++++++---------------
 fs/xfs/libxfs/xfs_log_recover.h    |   2 +-
 fs/xfs/libxfs/xfs_quota_defs.h     |   4 +-
 fs/xfs/libxfs/xfs_refcount_btree.c |   8 +-
 fs/xfs/libxfs/xfs_rmap.c           |   8 +-
 fs/xfs/libxfs/xfs_rmap.h           |   8 +-
 fs/xfs/libxfs/xfs_rmap_btree.c     |  30 ++--
 fs/xfs/libxfs/xfs_rtbitmap.c       |   2 +-
 fs/xfs/libxfs/xfs_sb.c             |   4 +-
 fs/xfs/libxfs/xfs_types.h          |  46 +++---
 fs/xfs/xfs_aops.c                  |   4 +-
 fs/xfs/xfs_attr_list.c             |   2 +-
 fs/xfs/xfs_bmap_util.c             |  24 +--
 fs/xfs/xfs_buf.c                   |   2 +-
 fs/xfs/xfs_dir2_readdir.c          |   8 +-
 fs/xfs/xfs_discard.c               |   4 +-
 fs/xfs/xfs_dquot.c                 |   2 +-
 fs/xfs/xfs_fsops.c                 |  16 +-
 fs/xfs/xfs_fsops.h                 |   4 +-
 fs/xfs/xfs_inode.c                 |   6 +-
 fs/xfs/xfs_inode.h                 |   4 +-
 fs/xfs/xfs_ioctl.c                 |  20 +--
 fs/xfs/xfs_ioctl.h                 |  10 +-
 fs/xfs/xfs_ioctl32.h               |   6 +-
 fs/xfs/xfs_linux.h                 |  20 +--
 fs/xfs/xfs_log.c                   |  20 +--
 fs/xfs/xfs_log.h                   |   2 +-
 fs/xfs/xfs_log_priv.h              |   2 +-
 fs/xfs/xfs_log_recover.c           |  28 ++--
 fs/xfs/xfs_mount.c                 |  16 +-
 fs/xfs/xfs_mount.h                 |  34 ++--
 fs/xfs/xfs_qm_bhv.c                |   2 +-
 fs/xfs/xfs_rtalloc.c               |   8 +-
 fs/xfs/xfs_stats.c                 |   8 +-
 fs/xfs/xfs_stats.h                 | 190 ++++++++++-----------
 fs/xfs/xfs_super.c                 |  26 +--
 fs/xfs/xfs_trace.h                 |  20 +--
 fs/xfs/xfs_trans.h                 |   2 +-
 fs/xfs/xfs_trans_rmap.c            |   2 +-
 61 files changed, 634 insertions(+), 642 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index e1fcfe7f0a9a4..5020cbc02db85 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -253,7 +253,7 @@ xfs_allocbt_init_ptr_from_cur(
 	ptr->s = agf->agf_roots[cur->bc_btnum];
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_bnobt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
@@ -261,42 +261,42 @@ xfs_bnobt_key_diff(
 	xfs_alloc_rec_incore_t	*rec = &cur->bc_rec.a;
 	xfs_alloc_key_t		*kp = &key->alloc;
 
-	return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+	return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_cntbt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
 {
 	xfs_alloc_rec_incore_t	*rec = &cur->bc_rec.a;
 	xfs_alloc_key_t		*kp = &key->alloc;
-	__int64_t		diff;
+	int64_t			diff;
 
-	diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+	diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
 	if (diff)
 		return diff;
 
-	return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+	return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_bnobt_diff_two_keys(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	return (__int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
+	return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
 			  be32_to_cpu(k2->alloc.ar_startblock);
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_cntbt_diff_two_keys(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	__int64_t		diff;
+	int64_t			diff;
 
 	diff =  be32_to_cpu(k1->alloc.ar_blockcount) -
 		be32_to_cpu(k2->alloc.ar_blockcount);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d52f525f5b2df..da72b16bef8ef 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -253,7 +253,7 @@ xfs_attr_rmtval_copyout(
 	xfs_ino_t	ino,
 	int		*offset,
 	int		*valuelen,
-	__uint8_t	**dst)
+	uint8_t		**dst)
 {
 	char		*src = bp->b_addr;
 	xfs_daddr_t	bno = bp->b_bn;
@@ -301,7 +301,7 @@ xfs_attr_rmtval_copyin(
 	xfs_ino_t	ino,
 	int		*offset,
 	int		*valuelen,
-	__uint8_t	**src)
+	uint8_t		**src)
 {
 	char		*dst = bp->b_addr;
 	xfs_daddr_t	bno = bp->b_bn;
@@ -355,7 +355,7 @@ xfs_attr_rmtval_get(
 	struct xfs_mount	*mp = args->dp->i_mount;
 	struct xfs_buf		*bp;
 	xfs_dablk_t		lblkno = args->rmtblkno;
-	__uint8_t		*dst = args->value;
+	uint8_t			*dst = args->value;
 	int			valuelen;
 	int			nmap;
 	int			error;
@@ -421,7 +421,7 @@ xfs_attr_rmtval_set(
 	struct xfs_bmbt_irec	map;
 	xfs_dablk_t		lblkno;
 	xfs_fileoff_t		lfileoff = 0;
-	__uint8_t		*src = args->value;
+	uint8_t			*src = args->value;
 	int			blkcnt;
 	int			valuelen;
 	int			nmap;
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 90928bbe693c0..afd684ae31365 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -31,10 +31,10 @@ typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
  * We generate this then sort it, attr_list() must return things in hash-order.
  */
 typedef struct xfs_attr_sf_sort {
-	__uint8_t	entno;		/* entry number in original list */
-	__uint8_t	namelen;	/* length of name value (no null) */
-	__uint8_t	valuelen;	/* length of value */
-	__uint8_t	flags;		/* flags bits (see xfs_attr_leaf.h) */
+	uint8_t		entno;		/* entry number in original list */
+	uint8_t		namelen;	/* length of name value (no null) */
+	uint8_t		valuelen;	/* length of value */
+	uint8_t		flags;		/* flags bits (see xfs_attr_leaf.h) */
 	xfs_dahash_t	hash;		/* this entry's hash value */
 	unsigned char	*name;		/* name value, pointer into buffer */
 } xfs_attr_sf_sort_t;
@@ -42,7 +42,7 @@ typedef struct xfs_attr_sf_sort {
 #define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen)	/* space name/value uses */ \
 	(((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen)))
 #define XFS_ATTR_SF_ENTSIZE_MAX			/* max space for name&value */ \
-	((1 << (NBBY*(int)sizeof(__uint8_t))) - 1)
+	((1 << (NBBY*(int)sizeof(uint8_t))) - 1)
 #define XFS_ATTR_SF_ENTSIZE(sfep)		/* space an entry uses */ \
 	((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
 #define XFS_ATTR_SF_NEXTENTRY(sfep)		/* next entry in struct */ \
diff --git a/fs/xfs/libxfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h
index e1649c0d3e02f..61c6b2025d0c8 100644
--- a/fs/xfs/libxfs/xfs_bit.h
+++ b/fs/xfs/libxfs/xfs_bit.h
@@ -25,47 +25,47 @@
 /*
  * masks with n high/low bits set, 64-bit values
  */
-static inline __uint64_t xfs_mask64hi(int n)
+static inline uint64_t xfs_mask64hi(int n)
 {
-	return (__uint64_t)-1 << (64 - (n));
+	return (uint64_t)-1 << (64 - (n));
 }
-static inline __uint32_t xfs_mask32lo(int n)
+static inline uint32_t xfs_mask32lo(int n)
 {
-	return ((__uint32_t)1 << (n)) - 1;
+	return ((uint32_t)1 << (n)) - 1;
 }
-static inline __uint64_t xfs_mask64lo(int n)
+static inline uint64_t xfs_mask64lo(int n)
 {
-	return ((__uint64_t)1 << (n)) - 1;
+	return ((uint64_t)1 << (n)) - 1;
 }
 
 /* Get high bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_highbit32(__uint32_t v)
+static inline int xfs_highbit32(uint32_t v)
 {
 	return fls(v) - 1;
 }
 
 /* Get high bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_highbit64(__uint64_t v)
+static inline int xfs_highbit64(uint64_t v)
 {
 	return fls64(v) - 1;
 }
 
 /* Get low bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_lowbit32(__uint32_t v)
+static inline int xfs_lowbit32(uint32_t v)
 {
 	return ffs(v) - 1;
 }
 
 /* Get low bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_lowbit64(__uint64_t v)
+static inline int xfs_lowbit64(uint64_t v)
 {
-	__uint32_t	w = (__uint32_t)v;
+	uint32_t	w = (uint32_t)v;
 	int		n = 0;
 
 	if (w) {	/* lower bits */
 		n = ffs(w);
 	} else {	/* upper bits */
-		w = (__uint32_t)(v >> 32);
+		w = (uint32_t)(v >> 32);
 		if (w) {
 			n = ffs(w);
 			if (n)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6cba69aff0774..5e2b3dc138ade 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -94,8 +94,8 @@ xfs_bmdr_to_bmbt(
  */
 STATIC void
 __xfs_bmbt_get_all(
-		__uint64_t l0,
-		__uint64_t l1,
+		uint64_t l0,
+		uint64_t l1,
 		xfs_bmbt_irec_t *s)
 {
 	int	ext_flag;
@@ -588,12 +588,12 @@ xfs_bmbt_init_ptr_from_cur(
 	ptr->l = 0;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_bmbt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
 {
-	return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
+	return (int64_t)be64_to_cpu(key->bmbt.br_startoff) -
 				      cur->bc_rec.b.br_startoff;
 }
 
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index d50517942273e..2aac3f499d97c 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -43,7 +43,7 @@ kmem_zone_t	*xfs_btree_cur_zone;
 /*
  * Btree magic numbers.
  */
-static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
+static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
 	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
 	  XFS_FIBT_MAGIC, 0 },
 	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
@@ -51,12 +51,12 @@ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
 	  XFS_REFC_CRC_MAGIC }
 };
 
-__uint32_t
+uint32_t
 xfs_btree_magic(
 	int			crc,
 	xfs_btnum_t		btnum)
 {
-	__uint32_t		magic = xfs_magics[crc][btnum];
+	uint32_t		magic = xfs_magics[crc][btnum];
 
 	/* Ensure we asked for crc for crc-only magics. */
 	ASSERT(magic != 0);
@@ -778,14 +778,14 @@ xfs_btree_lastrec(
  */
 void
 xfs_btree_offsets(
-	__int64_t	fields,		/* bitmask of fields */
+	int64_t		fields,		/* bitmask of fields */
 	const short	*offsets,	/* table of field offsets */
 	int		nbits,		/* number of bits to inspect */
 	int		*first,		/* output: first byte offset */
 	int		*last)		/* output: last byte offset */
 {
 	int		i;		/* current bit number */
-	__int64_t	imask;		/* mask for current bit number */
+	int64_t		imask;		/* mask for current bit number */
 
 	ASSERT(fields != 0);
 	/*
@@ -1846,7 +1846,7 @@ xfs_btree_lookup(
 	int			*stat)	/* success/failure */
 {
 	struct xfs_btree_block	*block;	/* current btree block */
-	__int64_t		diff;	/* difference for the current key */
+	int64_t			diff;	/* difference for the current key */
 	int			error;	/* error return value */
 	int			keyno;	/* current key number */
 	int			level;	/* level in the btree */
@@ -4435,7 +4435,7 @@ xfs_btree_visit_blocks(
  * recovery completion writes the changes to disk.
  */
 struct xfs_btree_block_change_owner_info {
-	__uint64_t		new_owner;
+	uint64_t		new_owner;
 	struct list_head	*buffer_list;
 };
 
@@ -4481,7 +4481,7 @@ xfs_btree_block_change_owner(
 int
 xfs_btree_change_owner(
 	struct xfs_btree_cur	*cur,
-	__uint64_t		new_owner,
+	uint64_t		new_owner,
 	struct list_head	*buffer_list)
 {
 	struct xfs_btree_block_change_owner_info	bbcoi;
@@ -4585,7 +4585,7 @@ xfs_btree_simple_query_range(
 {
 	union xfs_btree_rec		*recp;
 	union xfs_btree_key		rec_key;
-	__int64_t			diff;
+	int64_t				diff;
 	int				stat;
 	bool				firstrec = true;
 	int				error;
@@ -4682,8 +4682,8 @@ xfs_btree_overlapped_query_range(
 	union xfs_btree_key		*hkp;
 	union xfs_btree_rec		*recp;
 	struct xfs_btree_block		*block;
-	__int64_t			ldiff;
-	__int64_t			hdiff;
+	int64_t				ldiff;
+	int64_t				hdiff;
 	int				level;
 	struct xfs_buf			*bp;
 	int				i;
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 27bed08261c53..0a931f6441032 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -76,7 +76,7 @@ union xfs_btree_rec {
 #define	XFS_BTNUM_RMAP	((xfs_btnum_t)XFS_BTNUM_RMAPi)
 #define	XFS_BTNUM_REFC	((xfs_btnum_t)XFS_BTNUM_REFCi)
 
-__uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
+uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
 
 /*
  * For logging record fields.
@@ -150,14 +150,14 @@ struct xfs_btree_ops {
 					  union xfs_btree_rec *rec);
 
 	/* difference between key value and cursor value */
-	__int64_t (*key_diff)(struct xfs_btree_cur *cur,
+	int64_t (*key_diff)(struct xfs_btree_cur *cur,
 			      union xfs_btree_key *key);
 
 	/*
 	 * Difference between key2 and key1 -- positive if key1 > key2,
 	 * negative if key1 < key2, and zero if equal.
 	 */
-	__int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
+	int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
 				   union xfs_btree_key *key1,
 				   union xfs_btree_key *key2);
 
@@ -213,11 +213,11 @@ typedef struct xfs_btree_cur
 	union xfs_btree_irec	bc_rec;	/* current insert/search record value */
 	struct xfs_buf	*bc_bufs[XFS_BTREE_MAXLEVELS];	/* buf ptr per level */
 	int		bc_ptrs[XFS_BTREE_MAXLEVELS];	/* key/record # */
-	__uint8_t	bc_ra[XFS_BTREE_MAXLEVELS];	/* readahead bits */
+	uint8_t		bc_ra[XFS_BTREE_MAXLEVELS];	/* readahead bits */
 #define	XFS_BTCUR_LEFTRA	1	/* left sibling has been read-ahead */
 #define	XFS_BTCUR_RIGHTRA	2	/* right sibling has been read-ahead */
-	__uint8_t	bc_nlevels;	/* number of levels in the tree */
-	__uint8_t	bc_blocklog;	/* log2(blocksize) of btree blocks */
+	uint8_t		bc_nlevels;	/* number of levels in the tree */
+	uint8_t		bc_blocklog;	/* log2(blocksize) of btree blocks */
 	xfs_btnum_t	bc_btnum;	/* identifies which btree type */
 	int		bc_statoff;	/* offset of btre stats array */
 	union {
@@ -330,7 +330,7 @@ xfs_btree_islastblock(
  */
 void
 xfs_btree_offsets(
-	__int64_t		fields,	/* bitmask of fields */
+	int64_t			fields,	/* bitmask of fields */
 	const short		*offsets,/* table of field offsets */
 	int			nbits,	/* number of bits to inspect */
 	int			*first,	/* output: first byte offset */
@@ -408,7 +408,7 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
 int xfs_btree_insert(struct xfs_btree_cur *, int *);
 int xfs_btree_delete(struct xfs_btree_cur *, int *);
 int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
-int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner,
+int xfs_btree_change_owner(struct xfs_btree_cur *cur, uint64_t new_owner,
 			   struct list_head *buffer_list);
 
 /*
@@ -434,7 +434,7 @@ static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
 }
 
 static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
-		__uint16_t numrecs)
+		uint16_t numrecs)
 {
 	block->bb_numrecs = cpu_to_be16(numrecs);
 }
diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h
index a416c7cb23ea5..8211f48b98e6d 100644
--- a/fs/xfs/libxfs/xfs_cksum.h
+++ b/fs/xfs/libxfs/xfs_cksum.h
@@ -1,7 +1,7 @@
 #ifndef _XFS_CKSUM_H
 #define _XFS_CKSUM_H 1
 
-#define XFS_CRC_SEED	(~(__uint32_t)0)
+#define XFS_CRC_SEED	(~(uint32_t)0)
 
 /*
  * Calculate the intermediate checksum for a buffer that has the CRC field
@@ -9,11 +9,11 @@
  * cksum_offset parameter. We do not modify the buffer during verification,
  * hence we have to split the CRC calculation across the cksum_offset.
  */
-static inline __uint32_t
+static inline uint32_t
 xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
 {
-	__uint32_t zero = 0;
-	__uint32_t crc;
+	uint32_t zero = 0;
+	uint32_t crc;
 
 	/* Calculate CRC up to the checksum. */
 	crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset);
@@ -30,7 +30,7 @@ xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
  * Fast CRC method where the buffer is modified. Callers must have exclusive
  * access to the buffer while the calculation takes place.
  */
-static inline __uint32_t
+static inline uint32_t
 xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
 {
 	/* zero the CRC field */
@@ -48,7 +48,7 @@ xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
  * so that it is consistent on disk.
  */
 static inline __le32
-xfs_end_cksum(__uint32_t crc)
+xfs_end_cksum(uint32_t crc)
 {
 	return ~cpu_to_le32(crc);
 }
@@ -62,7 +62,7 @@ xfs_end_cksum(__uint32_t crc)
 static inline void
 xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
 {
-	__uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
+	uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
 
 	*(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc);
 }
@@ -73,7 +73,7 @@ xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
 static inline int
 xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset)
 {
-	__uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
+	uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
 
 	return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc);
 }
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 1bdf2888295b9..48f1136b80bf4 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1952,7 +1952,7 @@ xfs_da3_path_shift(
  * This is implemented with some source-level loop unrolling.
  */
 xfs_dahash_t
-xfs_da_hashname(const __uint8_t *name, int namelen)
+xfs_da_hashname(const uint8_t *name, int namelen)
 {
 	xfs_dahash_t hash;
 
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 4e29cb6a36279..ae6de17467f26 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -60,10 +60,10 @@ enum xfs_dacmp {
  */
 typedef struct xfs_da_args {
 	struct xfs_da_geometry *geo;	/* da block geometry */
-	const __uint8_t	*name;		/* string (maybe not NULL terminated) */
+	const uint8_t		*name;		/* string (maybe not NULL terminated) */
 	int		namelen;	/* length of string (maybe no NULL) */
-	__uint8_t	filetype;	/* filetype of inode for directories */
-	__uint8_t	*value;		/* set of bytes (maybe contain NULLs) */
+	uint8_t		filetype;	/* filetype of inode for directories */
+	uint8_t		*value;		/* set of bytes (maybe contain NULLs) */
 	int		valuelen;	/* length of value */
 	int		flags;		/* argument flags (eg: ATTR_NOCREATE) */
 	xfs_dahash_t	hashval;	/* hash value of name */
@@ -207,7 +207,7 @@ int	xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
 int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 					  struct xfs_buf *dead_buf);
 
-uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
+uint xfs_da_hashname(const uint8_t *name_string, int name_length);
 enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
 				const unsigned char *name, int len);
 
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index f1e8d4dbb6001..6d77d1a8498ad 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -49,7 +49,7 @@ xfs_dir3_sf_entsize(
 	struct xfs_dir2_sf_hdr	*hdr,
 	int			len)
 {
-	return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t);
+	return xfs_dir2_sf_entsize(hdr, len) + sizeof(uint8_t);
 }
 
 static struct xfs_dir2_sf_entry *
@@ -77,7 +77,7 @@ xfs_dir3_sf_nextentry(
  * not necessary. For non-filetype enable directories, the type is always
  * unknown and we never store the value.
  */
-static __uint8_t
+static uint8_t
 xfs_dir2_sfe_get_ftype(
 	struct xfs_dir2_sf_entry *sfep)
 {
@@ -87,16 +87,16 @@ xfs_dir2_sfe_get_ftype(
 static void
 xfs_dir2_sfe_put_ftype(
 	struct xfs_dir2_sf_entry *sfep,
-	__uint8_t		ftype)
+	uint8_t			ftype)
 {
 	ASSERT(ftype < XFS_DIR3_FT_MAX);
 }
 
-static __uint8_t
+static uint8_t
 xfs_dir3_sfe_get_ftype(
 	struct xfs_dir2_sf_entry *sfep)
 {
-	__uint8_t	ftype;
+	uint8_t		ftype;
 
 	ftype = sfep->name[sfep->namelen];
 	if (ftype >= XFS_DIR3_FT_MAX)
@@ -107,7 +107,7 @@ xfs_dir3_sfe_get_ftype(
 static void
 xfs_dir3_sfe_put_ftype(
 	struct xfs_dir2_sf_entry *sfep,
-	__uint8_t		ftype)
+	uint8_t			ftype)
 {
 	ASSERT(ftype < XFS_DIR3_FT_MAX);
 
@@ -124,7 +124,7 @@ xfs_dir3_sfe_put_ftype(
 static xfs_ino_t
 xfs_dir2_sf_get_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
-	__uint8_t		*from)
+	uint8_t			*from)
 {
 	if (hdr->i8count)
 		return get_unaligned_be64(from) & 0x00ffffffffffffffULL;
@@ -135,7 +135,7 @@ xfs_dir2_sf_get_ino(
 static void
 xfs_dir2_sf_put_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
-	__uint8_t		*to,
+	uint8_t			*to,
 	xfs_ino_t		ino)
 {
 	ASSERT((ino & 0xff00000000000000ULL) == 0);
@@ -225,7 +225,7 @@ xfs_dir3_sfe_put_ino(
 
 #define XFS_DIR3_DATA_ENTSIZE(n)					\
 	round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) +	\
-		 sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)),	\
+		 sizeof(xfs_dir2_data_off_t) + sizeof(uint8_t)),	\
 		XFS_DIR2_DATA_ALIGN)
 
 static int
@@ -242,7 +242,7 @@ xfs_dir3_data_entsize(
 	return XFS_DIR3_DATA_ENTSIZE(n);
 }
 
-static __uint8_t
+static uint8_t
 xfs_dir2_data_get_ftype(
 	struct xfs_dir2_data_entry *dep)
 {
@@ -252,16 +252,16 @@ xfs_dir2_data_get_ftype(
 static void
 xfs_dir2_data_put_ftype(
 	struct xfs_dir2_data_entry *dep,
-	__uint8_t		ftype)
+	uint8_t			ftype)
 {
 	ASSERT(ftype < XFS_DIR3_FT_MAX);
 }
 
-static __uint8_t
+static uint8_t
 xfs_dir3_data_get_ftype(
 	struct xfs_dir2_data_entry *dep)
 {
-	__uint8_t	ftype = dep->name[dep->namelen];
+	uint8_t		ftype = dep->name[dep->namelen];
 
 	if (ftype >= XFS_DIR3_FT_MAX)
 		return XFS_DIR3_FT_UNKNOWN;
@@ -271,7 +271,7 @@ xfs_dir3_data_get_ftype(
 static void
 xfs_dir3_data_put_ftype(
 	struct xfs_dir2_data_entry *dep,
-	__uint8_t		type)
+	uint8_t			type)
 {
 	ASSERT(type < XFS_DIR3_FT_MAX);
 	ASSERT(dep->namelen != 0);
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 9a492a9e19bd0..3771edcb301d5 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -111,11 +111,11 @@ struct xfs_da3_intnode {
  * appropriate.
  */
 struct xfs_da3_icnode_hdr {
-	__uint32_t	forw;
-	__uint32_t	back;
-	__uint16_t	magic;
-	__uint16_t	count;
-	__uint16_t	level;
+	uint32_t	forw;
+	uint32_t	back;
+	uint16_t	magic;
+	uint16_t	count;
+	uint16_t	level;
 };
 
 /*
@@ -187,14 +187,14 @@ struct xfs_da3_icnode_hdr {
 /*
  * Byte offset in data block and shortform entry.
  */
-typedef	__uint16_t	xfs_dir2_data_off_t;
+typedef uint16_t	xfs_dir2_data_off_t;
 #define	NULLDATAOFF	0xffffU
 typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
 
 /*
  * Offset in data space of a data entry.
  */
-typedef	__uint32_t	xfs_dir2_dataptr_t;
+typedef uint32_t	xfs_dir2_dataptr_t;
 #define	XFS_DIR2_MAX_DATAPTR	((xfs_dir2_dataptr_t)0xffffffff)
 #define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
 
@@ -206,7 +206,7 @@ typedef	xfs_off_t	xfs_dir2_off_t;
 /*
  * Directory block number (logical dirblk in file)
  */
-typedef	__uint32_t	xfs_dir2_db_t;
+typedef uint32_t	xfs_dir2_db_t;
 
 #define XFS_INO32_SIZE	4
 #define XFS_INO64_SIZE	8
@@ -226,9 +226,9 @@ typedef	__uint32_t	xfs_dir2_db_t;
  * over them.
  */
 typedef struct xfs_dir2_sf_hdr {
-	__uint8_t		count;		/* count of entries */
-	__uint8_t		i8count;	/* count of 8-byte inode #s */
-	__uint8_t		parent[8];	/* parent dir inode number */
+	uint8_t			count;		/* count of entries */
+	uint8_t			i8count;	/* count of 8-byte inode #s */
+	uint8_t			parent[8];	/* parent dir inode number */
 } __packed xfs_dir2_sf_hdr_t;
 
 typedef struct xfs_dir2_sf_entry {
@@ -447,11 +447,11 @@ struct xfs_dir3_leaf_hdr {
 };
 
 struct xfs_dir3_icleaf_hdr {
-	__uint32_t		forw;
-	__uint32_t		back;
-	__uint16_t		magic;
-	__uint16_t		count;
-	__uint16_t		stale;
+	uint32_t		forw;
+	uint32_t		back;
+	uint16_t		magic;
+	uint16_t		count;
+	uint16_t		stale;
 };
 
 /*
@@ -538,10 +538,10 @@ struct xfs_dir3_free {
  * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
  */
 struct xfs_dir3_icfree_hdr {
-	__uint32_t	magic;
-	__uint32_t	firstdb;
-	__uint32_t	nvalid;
-	__uint32_t	nused;
+	uint32_t	magic;
+	uint32_t	firstdb;
+	uint32_t	nvalid;
+	uint32_t	nused;
 
 };
 
@@ -632,10 +632,10 @@ typedef struct xfs_attr_shortform {
 		__u8	padding;
 	} hdr;
 	struct xfs_attr_sf_entry {
-		__uint8_t namelen;	/* actual length of name (no NULL) */
-		__uint8_t valuelen;	/* actual length of value (no NULL) */
-		__uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
-		__uint8_t nameval[1];	/* name & value bytes concatenated */
+		uint8_t namelen;	/* actual length of name (no NULL) */
+		uint8_t valuelen;	/* actual length of value (no NULL) */
+		uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
+		uint8_t nameval[1];	/* name & value bytes concatenated */
 	} list[1];			/* variable sized array */
 } xfs_attr_shortform_t;
 
@@ -725,22 +725,22 @@ struct xfs_attr3_leafblock {
  * incore, neutral version of the attribute leaf header
  */
 struct xfs_attr3_icleaf_hdr {
-	__uint32_t	forw;
-	__uint32_t	back;
-	__uint16_t	magic;
-	__uint16_t	count;
-	__uint16_t	usedbytes;
+	uint32_t	forw;
+	uint32_t	back;
+	uint16_t	magic;
+	uint16_t	count;
+	uint16_t	usedbytes;
 	/*
 	 * firstused is 32-bit here instead of 16-bit like the on-disk variant
 	 * to support maximum fsb size of 64k without overflow issues throughout
 	 * the attr code. Instead, the overflow condition is handled on
 	 * conversion to/from disk.
 	 */
-	__uint32_t	firstused;
+	uint32_t	firstused;
 	__u8		holes;
 	struct {
-		__uint16_t	base;
-		__uint16_t	size;
+		uint16_t	base;
+		uint16_t	size;
 	} freemap[XFS_ATTR_LEAF_MAPSIZE];
 };
 
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index d6e6d9d16f6c3..21c8f8bf94d52 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -47,9 +47,9 @@ struct xfs_dir_ops {
 	struct xfs_dir2_sf_entry *
 		(*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr,
 				struct xfs_dir2_sf_entry *sfep);
-	__uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
+	uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
 	void	(*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep,
-				__uint8_t ftype);
+				uint8_t ftype);
 	xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr,
 				struct xfs_dir2_sf_entry *sfep);
 	void	(*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr,
@@ -60,9 +60,9 @@ struct xfs_dir_ops {
 				     xfs_ino_t ino);
 
 	int	(*data_entsize)(int len);
-	__uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
+	uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
 	void	(*data_put_ftype)(struct xfs_dir2_data_entry *dep,
-				__uint8_t ftype);
+				uint8_t ftype);
 	__be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep);
 	struct xfs_dir2_data_free *
 		(*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr);
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index b887fb2a2bcf5..68bf3e860a90e 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -145,7 +145,7 @@ xfs_dir3_leaf_check_int(
 static bool
 xfs_dir3_leaf_verify(
 	struct xfs_buf		*bp,
-	__uint16_t		magic)
+	uint16_t		magic)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
@@ -154,7 +154,7 @@ xfs_dir3_leaf_verify(
 
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-		__uint16_t		magic3;
+		uint16_t		magic3;
 
 		magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
 							 : XFS_DIR3_LEAFN_MAGIC;
@@ -178,7 +178,7 @@ xfs_dir3_leaf_verify(
 static void
 __read_verify(
 	struct xfs_buf  *bp,
-	__uint16_t	magic)
+	uint16_t	magic)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 
@@ -195,7 +195,7 @@ __read_verify(
 static void
 __write_verify(
 	struct xfs_buf  *bp,
-	__uint16_t	magic)
+	uint16_t	magic)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	struct xfs_buf_log_item	*bip = bp->b_fspriv;
@@ -299,7 +299,7 @@ xfs_dir3_leaf_init(
 	struct xfs_trans	*tp,
 	struct xfs_buf		*bp,
 	xfs_ino_t		owner,
-	__uint16_t		type)
+	uint16_t		type)
 {
 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
 
@@ -343,7 +343,7 @@ xfs_dir3_leaf_get_buf(
 	xfs_da_args_t		*args,
 	xfs_dir2_db_t		bno,
 	struct xfs_buf		**bpp,
-	__uint16_t		magic)
+	uint16_t		magic)
 {
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_trans	*tp = args->trans;
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 39f8604f764e1..011df4da6cc23 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -69,7 +69,7 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
 		struct xfs_dir2_leaf_entry *ents, int *indexp,
 		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
 extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
-		struct xfs_buf **bpp, __uint16_t magic);
+		struct xfs_buf **bpp, uint16_t magic);
 extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
 		struct xfs_buf *bp, int first, int last);
 extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index e84af093b2ab9..be8b9755f66a0 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -647,7 +647,7 @@ xfs_dir2_sf_verify(
 	int				offset;
 	int				size;
 	int				error;
-	__uint8_t			filetype;
+	uint8_t				filetype;
 
 	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
 	/*
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index a1dccd8d96bce..e204a942e5bf3 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -103,8 +103,8 @@ struct xfs_ifork;
  * Must be padded to 64 bit alignment.
  */
 typedef struct xfs_sb {
-	__uint32_t	sb_magicnum;	/* magic number == XFS_SB_MAGIC */
-	__uint32_t	sb_blocksize;	/* logical block size, bytes */
+	uint32_t	sb_magicnum;	/* magic number == XFS_SB_MAGIC */
+	uint32_t	sb_blocksize;	/* logical block size, bytes */
 	xfs_rfsblock_t	sb_dblocks;	/* number of data blocks */
 	xfs_rfsblock_t	sb_rblocks;	/* number of realtime blocks */
 	xfs_rtblock_t	sb_rextents;	/* number of realtime extents */
@@ -118,45 +118,45 @@ typedef struct xfs_sb {
 	xfs_agnumber_t	sb_agcount;	/* number of allocation groups */
 	xfs_extlen_t	sb_rbmblocks;	/* number of rt bitmap blocks */
 	xfs_extlen_t	sb_logblocks;	/* number of log blocks */
-	__uint16_t	sb_versionnum;	/* header version == XFS_SB_VERSION */
-	__uint16_t	sb_sectsize;	/* volume sector size, bytes */
-	__uint16_t	sb_inodesize;	/* inode size, bytes */
-	__uint16_t	sb_inopblock;	/* inodes per block */
+	uint16_t	sb_versionnum;	/* header version == XFS_SB_VERSION */
+	uint16_t	sb_sectsize;	/* volume sector size, bytes */
+	uint16_t	sb_inodesize;	/* inode size, bytes */
+	uint16_t	sb_inopblock;	/* inodes per block */
 	char		sb_fname[12];	/* file system name */
-	__uint8_t	sb_blocklog;	/* log2 of sb_blocksize */
-	__uint8_t	sb_sectlog;	/* log2 of sb_sectsize */
-	__uint8_t	sb_inodelog;	/* log2 of sb_inodesize */
-	__uint8_t	sb_inopblog;	/* log2 of sb_inopblock */
-	__uint8_t	sb_agblklog;	/* log2 of sb_agblocks (rounded up) */
-	__uint8_t	sb_rextslog;	/* log2 of sb_rextents */
-	__uint8_t	sb_inprogress;	/* mkfs is in progress, don't mount */
-	__uint8_t	sb_imax_pct;	/* max % of fs for inode space */
+	uint8_t		sb_blocklog;	/* log2 of sb_blocksize */
+	uint8_t		sb_sectlog;	/* log2 of sb_sectsize */
+	uint8_t		sb_inodelog;	/* log2 of sb_inodesize */
+	uint8_t		sb_inopblog;	/* log2 of sb_inopblock */
+	uint8_t		sb_agblklog;	/* log2 of sb_agblocks (rounded up) */
+	uint8_t		sb_rextslog;	/* log2 of sb_rextents */
+	uint8_t		sb_inprogress;	/* mkfs is in progress, don't mount */
+	uint8_t		sb_imax_pct;	/* max % of fs for inode space */
 					/* statistics */
 	/*
 	 * These fields must remain contiguous.  If you really
 	 * want to change their layout, make sure you fix the
 	 * code in xfs_trans_apply_sb_deltas().
 	 */
-	__uint64_t	sb_icount;	/* allocated inodes */
-	__uint64_t	sb_ifree;	/* free inodes */
-	__uint64_t	sb_fdblocks;	/* free data blocks */
-	__uint64_t	sb_frextents;	/* free realtime extents */
+	uint64_t	sb_icount;	/* allocated inodes */
+	uint64_t	sb_ifree;	/* free inodes */
+	uint64_t	sb_fdblocks;	/* free data blocks */
+	uint64_t	sb_frextents;	/* free realtime extents */
 	/*
 	 * End contiguous fields.
 	 */
 	xfs_ino_t	sb_uquotino;	/* user quota inode */
 	xfs_ino_t	sb_gquotino;	/* group quota inode */
-	__uint16_t	sb_qflags;	/* quota flags */
-	__uint8_t	sb_flags;	/* misc. flags */
-	__uint8_t	sb_shared_vn;	/* shared version number */
+	uint16_t	sb_qflags;	/* quota flags */
+	uint8_t		sb_flags;	/* misc. flags */
+	uint8_t		sb_shared_vn;	/* shared version number */
 	xfs_extlen_t	sb_inoalignmt;	/* inode chunk alignment, fsblocks */
-	__uint32_t	sb_unit;	/* stripe or raid unit */
-	__uint32_t	sb_width;	/* stripe or raid width */
-	__uint8_t	sb_dirblklog;	/* log2 of dir block size (fsbs) */
-	__uint8_t	sb_logsectlog;	/* log2 of the log sector size */
-	__uint16_t	sb_logsectsize;	/* sector size for the log, bytes */
-	__uint32_t	sb_logsunit;	/* stripe unit size for the log */
-	__uint32_t	sb_features2;	/* additional feature bits */
+	uint32_t	sb_unit;	/* stripe or raid unit */
+	uint32_t	sb_width;	/* stripe or raid width */
+	uint8_t		sb_dirblklog;	/* log2 of dir block size (fsbs) */
+	uint8_t		sb_logsectlog;	/* log2 of the log sector size */
+	uint16_t	sb_logsectsize;	/* sector size for the log, bytes */
+	uint32_t	sb_logsunit;	/* stripe unit size for the log */
+	uint32_t	sb_features2;	/* additional feature bits */
 
 	/*
 	 * bad features2 field as a result of failing to pad the sb structure to
@@ -167,17 +167,17 @@ typedef struct xfs_sb {
 	 * the value in sb_features2 when formatting the incore superblock to
 	 * the disk buffer.
 	 */
-	__uint32_t	sb_bad_features2;
+	uint32_t	sb_bad_features2;
 
 	/* version 5 superblock fields start here */
 
 	/* feature masks */
-	__uint32_t	sb_features_compat;
-	__uint32_t	sb_features_ro_compat;
-	__uint32_t	sb_features_incompat;
-	__uint32_t	sb_features_log_incompat;
+	uint32_t	sb_features_compat;
+	uint32_t	sb_features_ro_compat;
+	uint32_t	sb_features_incompat;
+	uint32_t	sb_features_log_incompat;
 
-	__uint32_t	sb_crc;		/* superblock crc */
+	uint32_t	sb_crc;		/* superblock crc */
 	xfs_extlen_t	sb_spino_align;	/* sparse inode chunk alignment */
 
 	xfs_ino_t	sb_pquotino;	/* project quota inode */
@@ -449,7 +449,7 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
 static inline bool
 xfs_sb_has_compat_feature(
 	struct xfs_sb	*sbp,
-	__uint32_t	feature)
+	uint32_t	feature)
 {
 	return (sbp->sb_features_compat & feature) != 0;
 }
@@ -465,7 +465,7 @@ xfs_sb_has_compat_feature(
 static inline bool
 xfs_sb_has_ro_compat_feature(
 	struct xfs_sb	*sbp,
-	__uint32_t	feature)
+	uint32_t	feature)
 {
 	return (sbp->sb_features_ro_compat & feature) != 0;
 }
@@ -482,7 +482,7 @@ xfs_sb_has_ro_compat_feature(
 static inline bool
 xfs_sb_has_incompat_feature(
 	struct xfs_sb	*sbp,
-	__uint32_t	feature)
+	uint32_t	feature)
 {
 	return (sbp->sb_features_incompat & feature) != 0;
 }
@@ -492,7 +492,7 @@ xfs_sb_has_incompat_feature(
 static inline bool
 xfs_sb_has_incompat_log_feature(
 	struct xfs_sb	*sbp,
-	__uint32_t	feature)
+	uint32_t	feature)
 {
 	return (sbp->sb_features_log_incompat & feature) != 0;
 }
@@ -594,8 +594,8 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
  */
 #define XFS_FSB_TO_B(mp,fsbno)	((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog)
 #define XFS_B_TO_FSB(mp,b)	\
-	((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
-#define XFS_B_TO_FSBT(mp,b)	(((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
+	((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
+#define XFS_B_TO_FSBT(mp,b)	(((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
 #define XFS_B_FSB_OFFSET(mp,b)	((b) & (mp)->m_blockmask)
 
 /*
@@ -1072,7 +1072,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  * next agno_log bits - ag number
  * high agno_log-agblklog-inopblog bits - 0
  */
-#define	XFS_INO_MASK(k)			(__uint32_t)((1ULL << (k)) - 1)
+#define	XFS_INO_MASK(k)			(uint32_t)((1ULL << (k)) - 1)
 #define	XFS_INO_OFFSET_BITS(mp)		(mp)->m_sb.sb_inopblog
 #define	XFS_INO_AGBNO_BITS(mp)		(mp)->m_sb.sb_agblklog
 #define	XFS_INO_AGINO_BITS(mp)		(mp)->m_agino_log
@@ -1269,16 +1269,16 @@ typedef __be32 xfs_alloc_ptr_t;
 #define	XFS_FIBT_MAGIC		0x46494254	/* 'FIBT' */
 #define	XFS_FIBT_CRC_MAGIC	0x46494233	/* 'FIB3' */
 
-typedef	__uint64_t	xfs_inofree_t;
+typedef uint64_t	xfs_inofree_t;
 #define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t))
 #define	XFS_INODES_PER_CHUNK_LOG	(XFS_NBBYLOG + 3)
 #define	XFS_INOBT_ALL_FREE		((xfs_inofree_t)-1)
 #define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i))
 
 #define XFS_INOBT_HOLEMASK_FULL		0	/* holemask for full chunk */
-#define XFS_INOBT_HOLEMASK_BITS		(NBBY * sizeof(__uint16_t))
+#define XFS_INOBT_HOLEMASK_BITS		(NBBY * sizeof(uint16_t))
 #define XFS_INODES_PER_HOLEMASK_BIT	\
-	(XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
+	(XFS_INODES_PER_CHUNK / (NBBY * sizeof(uint16_t)))
 
 static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
 {
@@ -1312,9 +1312,9 @@ typedef struct xfs_inobt_rec {
 
 typedef struct xfs_inobt_rec_incore {
 	xfs_agino_t	ir_startino;	/* starting inode number */
-	__uint16_t	ir_holemask;	/* hole mask for sparse chunks */
-	__uint8_t	ir_count;	/* total inode count */
-	__uint8_t	ir_freecount;	/* count of free inodes (set bits) */
+	uint16_t	ir_holemask;	/* hole mask for sparse chunks */
+	uint8_t		ir_count;	/* total inode count */
+	uint8_t		ir_freecount;	/* count of free inodes (set bits) */
 	xfs_inofree_t	ir_free;	/* free inode mask */
 } xfs_inobt_rec_incore_t;
 
@@ -1397,15 +1397,15 @@ struct xfs_rmap_rec {
  *  rm_offset:54-60 aren't used and should be zero
  *  rm_offset:0-53 is the block offset within the inode
  */
-#define XFS_RMAP_OFF_ATTR_FORK	((__uint64_t)1ULL << 63)
-#define XFS_RMAP_OFF_BMBT_BLOCK	((__uint64_t)1ULL << 62)
-#define XFS_RMAP_OFF_UNWRITTEN	((__uint64_t)1ULL << 61)
+#define XFS_RMAP_OFF_ATTR_FORK	((uint64_t)1ULL << 63)
+#define XFS_RMAP_OFF_BMBT_BLOCK	((uint64_t)1ULL << 62)
+#define XFS_RMAP_OFF_UNWRITTEN	((uint64_t)1ULL << 61)
 
-#define XFS_RMAP_LEN_MAX	((__uint32_t)~0U)
+#define XFS_RMAP_LEN_MAX	((uint32_t)~0U)
 #define XFS_RMAP_OFF_FLAGS	(XFS_RMAP_OFF_ATTR_FORK | \
 				 XFS_RMAP_OFF_BMBT_BLOCK | \
 				 XFS_RMAP_OFF_UNWRITTEN)
-#define XFS_RMAP_OFF_MASK	((__uint64_t)0x3FFFFFFFFFFFFFULL)
+#define XFS_RMAP_OFF_MASK	((uint64_t)0x3FFFFFFFFFFFFFULL)
 
 #define XFS_RMAP_OFF(off)		((off) & XFS_RMAP_OFF_MASK)
 
@@ -1431,8 +1431,8 @@ struct xfs_rmap_rec {
 struct xfs_rmap_irec {
 	xfs_agblock_t	rm_startblock;	/* extent start block */
 	xfs_extlen_t	rm_blockcount;	/* extent length */
-	__uint64_t	rm_owner;	/* extent owner */
-	__uint64_t	rm_offset;	/* offset within the owner */
+	uint64_t	rm_owner;	/* extent owner */
+	uint64_t	rm_offset;	/* offset within the owner */
 	unsigned int	rm_flags;	/* state flags */
 };
 
@@ -1544,11 +1544,11 @@ typedef struct xfs_bmbt_rec {
 	__be64			l0, l1;
 } xfs_bmbt_rec_t;
 
-typedef __uint64_t	xfs_bmbt_rec_base_t;	/* use this for casts */
+typedef uint64_t	xfs_bmbt_rec_base_t;	/* use this for casts */
 typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
 
 typedef struct xfs_bmbt_rec_host {
-	__uint64_t		l0, l1;
+	uint64_t		l0, l1;
 } xfs_bmbt_rec_host_t;
 
 /*
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index a9aa13e66046f..8c61f21535d4f 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -302,10 +302,10 @@ typedef struct xfs_bstat {
  * and using two 16bit values to hold new 32bit projid was choosen
  * to retain compatibility with "old" filesystems).
  */
-static inline __uint32_t
+static inline uint32_t
 bstat_get_projid(struct xfs_bstat *bs)
 {
-	return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
+	return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
 }
 
 /*
@@ -451,10 +451,10 @@ typedef struct xfs_handle {
  */
 typedef struct xfs_swapext
 {
-	__int64_t	sx_version;	/* version */
+	int64_t		sx_version;	/* version */
 #define XFS_SX_VERSION		0
-	__int64_t	sx_fdtarget;	/* fd of target file */
-	__int64_t	sx_fdtmp;	/* fd of tmp file */
+	int64_t		sx_fdtarget;	/* fd of target file */
+	int64_t		sx_fdtmp;	/* fd of tmp file */
 	xfs_off_t	sx_offset;	/* offset into file */
 	xfs_off_t	sx_length;	/* leng from offset */
 	char		sx_pad[16];	/* pad space, unused */
@@ -542,7 +542,7 @@ typedef struct xfs_swapext
 #define XFS_IOC_ATTRLIST_BY_HANDLE   _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
 #define XFS_IOC_ATTRMULTI_BY_HANDLE  _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
 #define XFS_IOC_FSGEOMETRY	     _IOR ('X', 124, struct xfs_fsop_geom)
-#define XFS_IOC_GOINGDOWN	     _IOR ('X', 125, __uint32_t)
+#define XFS_IOC_GOINGDOWN	     _IOR ('X', 125, uint32_t)
 /*	XFS_IOC_GETFSUUID ---------- deprecated 140	 */
 
 
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index d41ade5d293eb..1e5ed940b84d2 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -140,9 +140,9 @@ xfs_inobt_get_rec(
 STATIC int
 xfs_inobt_insert_rec(
 	struct xfs_btree_cur	*cur,
-	__uint16_t		holemask,
-	__uint8_t		count,
-	__int32_t		freecount,
+	uint16_t		holemask,
+	uint8_t			count,
+	int32_t			freecount,
 	xfs_inofree_t		free,
 	int			*stat)
 {
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 7c471881c9a67..ed52d99732b85 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -219,12 +219,12 @@ xfs_finobt_init_ptr_from_cur(
 	ptr->s = agi->agi_free_root;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_inobt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
 {
-	return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
+	return (int64_t)be32_to_cpu(key->inobt.ir_startino) -
 			  cur->bc_rec.i.ir_startino;
 }
 
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 09c3d1aecef26..d887af940f097 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -444,7 +444,7 @@ xfs_dinode_calc_crc(
 	struct xfs_mount	*mp,
 	struct xfs_dinode	*dip)
 {
-	__uint32_t		crc;
+	uint32_t		crc;
 
 	if (dip->di_version < 3)
 		return;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 6848a0afbce7a..0827d7def1ce2 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -28,26 +28,26 @@ struct xfs_dinode;
  * format specific structures at the appropriate time.
  */
 struct xfs_icdinode {
-	__int8_t	di_version;	/* inode version */
-	__int8_t	di_format;	/* format of di_c data */
-	__uint16_t	di_flushiter;	/* incremented on flush */
-	__uint32_t	di_uid;		/* owner's user id */
-	__uint32_t	di_gid;		/* owner's group id */
-	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
-	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
+	int8_t		di_version;	/* inode version */
+	int8_t		di_format;	/* format of di_c data */
+	uint16_t	di_flushiter;	/* incremented on flush */
+	uint32_t	di_uid;		/* owner's user id */
+	uint32_t	di_gid;		/* owner's group id */
+	uint16_t	di_projid_lo;	/* lower part of owner's project id */
+	uint16_t	di_projid_hi;	/* higher part of owner's project id */
 	xfs_fsize_t	di_size;	/* number of bytes in file */
 	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
 	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
 	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
-	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
-	__int8_t	di_aformat;	/* format of attr fork's data */
-	__uint32_t	di_dmevmask;	/* DMIG event mask */
-	__uint16_t	di_dmstate;	/* DMIG state info */
-	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
+	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
+	int8_t		di_aformat;	/* format of attr fork's data */
+	uint32_t	di_dmevmask;	/* DMIG event mask */
+	uint16_t	di_dmstate;	/* DMIG state info */
+	uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
 
-	__uint64_t	di_flags2;	/* more random flags */
-	__uint32_t	di_cowextsize;	/* basic cow extent size for file */
+	uint64_t	di_flags2;	/* more random flags */
+	uint32_t	di_cowextsize;	/* basic cow extent size for file */
 
 	xfs_ictimestamp_t di_crtime;	/* time created */
 };
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 7ae571f8e34ac..8372e9bcd7b6b 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -31,7 +31,7 @@ struct xfs_trans_res;
  * through all the log items definitions and everything they encode into the
  * log.
  */
-typedef __uint32_t xlog_tid_t;
+typedef uint32_t xlog_tid_t;
 
 #define XLOG_MIN_ICLOGS		2
 #define XLOG_MAX_ICLOGS		8
@@ -211,7 +211,7 @@ typedef struct xfs_log_iovec {
 typedef struct xfs_trans_header {
 	uint		th_magic;		/* magic number */
 	uint		th_type;		/* transaction type */
-	__int32_t	th_tid;			/* transaction id (unused) */
+	int32_t		th_tid;			/* transaction id (unused) */
 	uint		th_num_items;		/* num items logged by trans */
 } xfs_trans_header_t;
 
@@ -265,52 +265,52 @@ typedef struct xfs_trans_header {
  * must be added on to the end.
  */
 typedef struct xfs_inode_log_format {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint64_t		ilf_ino;	/* inode number */
+	uint16_t		ilf_type;	/* inode log item type */
+	uint16_t		ilf_size;	/* size of this item */
+	uint32_t		ilf_fields;	/* flags for fields logged */
+	uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	uint16_t		ilf_dsize;	/* size of data/ext/root */
+	uint64_t		ilf_ino;	/* inode number */
 	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
 		uuid_t		ilfu_uuid;	/* mount point value */
 	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
+	int64_t			ilf_blkno;	/* blkno of inode buffer */
+	int32_t			ilf_len;	/* len of inode buffer */
+	int32_t			ilf_boffset;	/* off of inode in buffer */
 } xfs_inode_log_format_t;
 
 typedef struct xfs_inode_log_format_32 {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint64_t		ilf_ino;	/* inode number */
+	uint16_t		ilf_type;	/* inode log item type */
+	uint16_t		ilf_size;	/* size of this item */
+	uint32_t		ilf_fields;	/* flags for fields logged */
+	uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	uint16_t		ilf_dsize;	/* size of data/ext/root */
+	uint64_t		ilf_ino;	/* inode number */
 	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
 		uuid_t		ilfu_uuid;	/* mount point value */
 	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
+	int64_t			ilf_blkno;	/* blkno of inode buffer */
+	int32_t			ilf_len;	/* len of inode buffer */
+	int32_t			ilf_boffset;	/* off of inode in buffer */
 } __attribute__((packed)) xfs_inode_log_format_32_t;
 
 typedef struct xfs_inode_log_format_64 {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint32_t		ilf_pad;	/* pad for 64 bit boundary */
-	__uint64_t		ilf_ino;	/* inode number */
+	uint16_t		ilf_type;	/* inode log item type */
+	uint16_t		ilf_size;	/* size of this item */
+	uint32_t		ilf_fields;	/* flags for fields logged */
+	uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	uint16_t		ilf_dsize;	/* size of data/ext/root */
+	uint32_t		ilf_pad;	/* pad for 64 bit boundary */
+	uint64_t		ilf_ino;	/* inode number */
 	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
 		uuid_t		ilfu_uuid;	/* mount point value */
 	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
+	int64_t			ilf_blkno;	/* blkno of inode buffer */
+	int32_t			ilf_len;	/* len of inode buffer */
+	int32_t			ilf_boffset;	/* off of inode in buffer */
 } xfs_inode_log_format_64_t;
 
 
@@ -379,8 +379,8 @@ static inline int xfs_ilog_fdata(int w)
  * information.
  */
 typedef struct xfs_ictimestamp {
-	__int32_t	t_sec;		/* timestamp seconds */
-	__int32_t	t_nsec;		/* timestamp nanoseconds */
+	int32_t		t_sec;		/* timestamp seconds */
+	int32_t		t_nsec;		/* timestamp nanoseconds */
 } xfs_ictimestamp_t;
 
 /*
@@ -388,18 +388,18 @@ typedef struct xfs_ictimestamp {
  * kept identical to struct xfs_dinode except for the endianness annotations.
  */
 struct xfs_log_dinode {
-	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
-	__uint16_t	di_mode;	/* mode and type of file */
-	__int8_t	di_version;	/* inode version */
-	__int8_t	di_format;	/* format of di_c data */
-	__uint8_t	di_pad3[2];	/* unused in v2/3 inodes */
-	__uint32_t	di_uid;		/* owner's user id */
-	__uint32_t	di_gid;		/* owner's group id */
-	__uint32_t	di_nlink;	/* number of links to file */
-	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
-	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
-	__uint8_t	di_pad[6];	/* unused, zeroed space */
-	__uint16_t	di_flushiter;	/* incremented on flush */
+	uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
+	uint16_t	di_mode;	/* mode and type of file */
+	int8_t		di_version;	/* inode version */
+	int8_t		di_format;	/* format of di_c data */
+	uint8_t		di_pad3[2];	/* unused in v2/3 inodes */
+	uint32_t	di_uid;		/* owner's user id */
+	uint32_t	di_gid;		/* owner's group id */
+	uint32_t	di_nlink;	/* number of links to file */
+	uint16_t	di_projid_lo;	/* lower part of owner's project id */
+	uint16_t	di_projid_hi;	/* higher part of owner's project id */
+	uint8_t		di_pad[6];	/* unused, zeroed space */
+	uint16_t	di_flushiter;	/* incremented on flush */
 	xfs_ictimestamp_t di_atime;	/* time last accessed */
 	xfs_ictimestamp_t di_mtime;	/* time last modified */
 	xfs_ictimestamp_t di_ctime;	/* time created/inode modified */
@@ -408,23 +408,23 @@ struct xfs_log_dinode {
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
 	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
 	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
-	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
-	__int8_t	di_aformat;	/* format of attr fork's data */
-	__uint32_t	di_dmevmask;	/* DMIG event mask */
-	__uint16_t	di_dmstate;	/* DMIG state info */
-	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
-	__uint32_t	di_gen;		/* generation number */
+	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
+	int8_t		di_aformat;	/* format of attr fork's data */
+	uint32_t	di_dmevmask;	/* DMIG event mask */
+	uint16_t	di_dmstate;	/* DMIG state info */
+	uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
+	uint32_t	di_gen;		/* generation number */
 
 	/* di_next_unlinked is the only non-core field in the old dinode */
 	xfs_agino_t	di_next_unlinked;/* agi unlinked list ptr */
 
 	/* start of the extended dinode, writable fields */
-	__uint32_t	di_crc;		/* CRC of the inode */
-	__uint64_t	di_changecount;	/* number of attribute changes */
+	uint32_t	di_crc;		/* CRC of the inode */
+	uint64_t	di_changecount;	/* number of attribute changes */
 	xfs_lsn_t	di_lsn;		/* flush sequence */
-	__uint64_t	di_flags2;	/* more random flags */
-	__uint32_t	di_cowextsize;	/* basic cow extent size for file */
-	__uint8_t	di_pad2[12];	/* more padding for future expansion */
+	uint64_t	di_flags2;	/* more random flags */
+	uint32_t	di_cowextsize;	/* basic cow extent size for file */
+	uint8_t		di_pad2[12];	/* more padding for future expansion */
 
 	/* fields only written to during inode creation */
 	xfs_ictimestamp_t di_crtime;	/* time created */
@@ -483,7 +483,7 @@ typedef struct xfs_buf_log_format {
 	unsigned short	blf_size;	/* size of this item */
 	unsigned short	blf_flags;	/* misc state */
 	unsigned short	blf_len;	/* number of blocks in this buf */
-	__int64_t	blf_blkno;	/* starting blkno of this buf */
+	int64_t		blf_blkno;	/* starting blkno of this buf */
 	unsigned int	blf_map_size;	/* used size of data bitmap in words */
 	unsigned int	blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
 } xfs_buf_log_format_t;
@@ -533,7 +533,7 @@ xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
 	blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
 }
 
-static inline __uint16_t
+static inline uint16_t
 xfs_blft_from_flags(struct xfs_buf_log_format *blf)
 {
 	return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
@@ -554,14 +554,14 @@ typedef struct xfs_extent {
  * conversion routine.
  */
 typedef struct xfs_extent_32 {
-	__uint64_t	ext_start;
-	__uint32_t	ext_len;
+	uint64_t	ext_start;
+	uint32_t	ext_len;
 } __attribute__((packed)) xfs_extent_32_t;
 
 typedef struct xfs_extent_64 {
-	__uint64_t	ext_start;
-	__uint32_t	ext_len;
-	__uint32_t	ext_pad;
+	uint64_t	ext_start;
+	uint32_t	ext_len;
+	uint32_t	ext_pad;
 } xfs_extent_64_t;
 
 /*
@@ -570,26 +570,26 @@ typedef struct xfs_extent_64 {
  * size is given by efi_nextents.
  */
 typedef struct xfs_efi_log_format {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
+	uint16_t		efi_type;	/* efi log item type */
+	uint16_t		efi_size;	/* size of this item */
+	uint32_t		efi_nextents;	/* # extents to free */
+	uint64_t		efi_id;		/* efi identifier */
 	xfs_extent_t		efi_extents[1];	/* array of extents to free */
 } xfs_efi_log_format_t;
 
 typedef struct xfs_efi_log_format_32 {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
+	uint16_t		efi_type;	/* efi log item type */
+	uint16_t		efi_size;	/* size of this item */
+	uint32_t		efi_nextents;	/* # extents to free */
+	uint64_t		efi_id;		/* efi identifier */
 	xfs_extent_32_t		efi_extents[1];	/* array of extents to free */
 } __attribute__((packed)) xfs_efi_log_format_32_t;
 
 typedef struct xfs_efi_log_format_64 {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
+	uint16_t		efi_type;	/* efi log item type */
+	uint16_t		efi_size;	/* size of this item */
+	uint32_t		efi_nextents;	/* # extents to free */
+	uint64_t		efi_id;		/* efi identifier */
 	xfs_extent_64_t		efi_extents[1];	/* array of extents to free */
 } xfs_efi_log_format_64_t;
 
@@ -599,26 +599,26 @@ typedef struct xfs_efi_log_format_64 {
  * size is given by efd_nextents;
  */
 typedef struct xfs_efd_log_format {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	uint16_t		efd_type;	/* efd log item type */
+	uint16_t		efd_size;	/* size of this item */
+	uint32_t		efd_nextents;	/* # of extents freed */
+	uint64_t		efd_efi_id;	/* id of corresponding efi */
 	xfs_extent_t		efd_extents[1];	/* array of extents freed */
 } xfs_efd_log_format_t;
 
 typedef struct xfs_efd_log_format_32 {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	uint16_t		efd_type;	/* efd log item type */
+	uint16_t		efd_size;	/* size of this item */
+	uint32_t		efd_nextents;	/* # of extents freed */
+	uint64_t		efd_efi_id;	/* id of corresponding efi */
 	xfs_extent_32_t		efd_extents[1];	/* array of extents freed */
 } __attribute__((packed)) xfs_efd_log_format_32_t;
 
 typedef struct xfs_efd_log_format_64 {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	uint16_t		efd_type;	/* efd log item type */
+	uint16_t		efd_size;	/* size of this item */
+	uint32_t		efd_nextents;	/* # of extents freed */
+	uint64_t		efd_efi_id;	/* id of corresponding efi */
 	xfs_extent_64_t		efd_extents[1];	/* array of extents freed */
 } xfs_efd_log_format_64_t;
 
@@ -626,11 +626,11 @@ typedef struct xfs_efd_log_format_64 {
  * RUI/RUD (reverse mapping) log format definitions
  */
 struct xfs_map_extent {
-	__uint64_t		me_owner;
-	__uint64_t		me_startblock;
-	__uint64_t		me_startoff;
-	__uint32_t		me_len;
-	__uint32_t		me_flags;
+	uint64_t		me_owner;
+	uint64_t		me_startblock;
+	uint64_t		me_startoff;
+	uint32_t		me_len;
+	uint32_t		me_flags;
 };
 
 /* rmap me_flags: upper bits are flags, lower byte is type code */
@@ -659,10 +659,10 @@ struct xfs_map_extent {
  * size is given by rui_nextents.
  */
 struct xfs_rui_log_format {
-	__uint16_t		rui_type;	/* rui log item type */
-	__uint16_t		rui_size;	/* size of this item */
-	__uint32_t		rui_nextents;	/* # extents to free */
-	__uint64_t		rui_id;		/* rui identifier */
+	uint16_t		rui_type;	/* rui log item type */
+	uint16_t		rui_size;	/* size of this item */
+	uint32_t		rui_nextents;	/* # extents to free */
+	uint64_t		rui_id;		/* rui identifier */
 	struct xfs_map_extent	rui_extents[];	/* array of extents to rmap */
 };
 
@@ -680,19 +680,19 @@ xfs_rui_log_format_sizeof(
  * size is given by rud_nextents;
  */
 struct xfs_rud_log_format {
-	__uint16_t		rud_type;	/* rud log item type */
-	__uint16_t		rud_size;	/* size of this item */
-	__uint32_t		__pad;
-	__uint64_t		rud_rui_id;	/* id of corresponding rui */
+	uint16_t		rud_type;	/* rud log item type */
+	uint16_t		rud_size;	/* size of this item */
+	uint32_t		__pad;
+	uint64_t		rud_rui_id;	/* id of corresponding rui */
 };
 
 /*
  * CUI/CUD (refcount update) log format definitions
  */
 struct xfs_phys_extent {
-	__uint64_t		pe_startblock;
-	__uint32_t		pe_len;
-	__uint32_t		pe_flags;
+	uint64_t		pe_startblock;
+	uint32_t		pe_len;
+	uint32_t		pe_flags;
 };
 
 /* refcount pe_flags: upper bits are flags, lower byte is type code */
@@ -707,10 +707,10 @@ struct xfs_phys_extent {
  * size is given by cui_nextents.
  */
 struct xfs_cui_log_format {
-	__uint16_t		cui_type;	/* cui log item type */
-	__uint16_t		cui_size;	/* size of this item */
-	__uint32_t		cui_nextents;	/* # extents to free */
-	__uint64_t		cui_id;		/* cui identifier */
+	uint16_t		cui_type;	/* cui log item type */
+	uint16_t		cui_size;	/* size of this item */
+	uint32_t		cui_nextents;	/* # extents to free */
+	uint64_t		cui_id;		/* cui identifier */
 	struct xfs_phys_extent	cui_extents[];	/* array of extents */
 };
 
@@ -728,10 +728,10 @@ xfs_cui_log_format_sizeof(
  * size is given by cud_nextents;
  */
 struct xfs_cud_log_format {
-	__uint16_t		cud_type;	/* cud log item type */
-	__uint16_t		cud_size;	/* size of this item */
-	__uint32_t		__pad;
-	__uint64_t		cud_cui_id;	/* id of corresponding cui */
+	uint16_t		cud_type;	/* cud log item type */
+	uint16_t		cud_size;	/* size of this item */
+	uint32_t		__pad;
+	uint64_t		cud_cui_id;	/* id of corresponding cui */
 };
 
 /*
@@ -755,10 +755,10 @@ struct xfs_cud_log_format {
  * size is given by bui_nextents.
  */
 struct xfs_bui_log_format {
-	__uint16_t		bui_type;	/* bui log item type */
-	__uint16_t		bui_size;	/* size of this item */
-	__uint32_t		bui_nextents;	/* # extents to free */
-	__uint64_t		bui_id;		/* bui identifier */
+	uint16_t		bui_type;	/* bui log item type */
+	uint16_t		bui_size;	/* size of this item */
+	uint32_t		bui_nextents;	/* # extents to free */
+	uint64_t		bui_id;		/* bui identifier */
 	struct xfs_map_extent	bui_extents[];	/* array of extents to bmap */
 };
 
@@ -776,10 +776,10 @@ xfs_bui_log_format_sizeof(
  * size is given by bud_nextents;
  */
 struct xfs_bud_log_format {
-	__uint16_t		bud_type;	/* bud log item type */
-	__uint16_t		bud_size;	/* size of this item */
-	__uint32_t		__pad;
-	__uint64_t		bud_bui_id;	/* id of corresponding bui */
+	uint16_t		bud_type;	/* bud log item type */
+	uint16_t		bud_size;	/* size of this item */
+	uint32_t		__pad;
+	uint64_t		bud_bui_id;	/* id of corresponding bui */
 };
 
 /*
@@ -789,12 +789,12 @@ struct xfs_bud_log_format {
  * 32 bits : log_recovery code assumes that.
  */
 typedef struct xfs_dq_logformat {
-	__uint16_t		qlf_type;      /* dquot log item type */
-	__uint16_t		qlf_size;      /* size of this item */
+	uint16_t		qlf_type;      /* dquot log item type */
+	uint16_t		qlf_size;      /* size of this item */
 	xfs_dqid_t		qlf_id;	       /* usr/grp/proj id : 32 bits */
-	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
-	__int32_t		qlf_len;       /* len of dquot buffer */
-	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
+	int64_t			qlf_blkno;     /* blkno of dquot buffer */
+	int32_t			qlf_len;       /* len of dquot buffer */
+	uint32_t		qlf_boffset;   /* off of dquot in buffer */
 } xfs_dq_logformat_t;
 
 /*
@@ -853,8 +853,8 @@ typedef struct xfs_qoff_logformat {
  * decoding can be done correctly.
  */
 struct xfs_icreate_log {
-	__uint16_t	icl_type;	/* type of log format structure */
-	__uint16_t	icl_size;	/* size of log format structure */
+	uint16_t	icl_type;	/* type of log format structure */
+	uint16_t	icl_size;	/* size of log format structure */
 	__be32		icl_ag;		/* ag being allocated in */
 	__be32		icl_agbno;	/* start block of inode range */
 	__be32		icl_count;	/* number of inodes to initialise */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 29a01ec89dd09..66948a9fd4861 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -26,7 +26,7 @@
 #define XLOG_RHASH_SIZE	16
 #define XLOG_RHASH_SHIFT 2
 #define XLOG_RHASH(tid)	\
-	((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
+	((((uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
 
 #define XLOG_MAX_REGIONS_IN_ITEM   (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1)
 
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 8eed51275bb39..d69c772271cb0 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -27,8 +27,8 @@
  * they may need 64-bit accounting. Hence, 64-bit quota-counters,
  * and quota-limits. This is a waste in the common case, but hey ...
  */
-typedef __uint64_t	xfs_qcnt_t;
-typedef __uint16_t	xfs_qwarncnt_t;
+typedef uint64_t	xfs_qcnt_t;
+typedef uint16_t	xfs_qwarncnt_t;
 
 /*
  * flags for q_flags field in the dquot.
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 50add5272807e..65c222ae5b048 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -202,7 +202,7 @@ xfs_refcountbt_init_ptr_from_cur(
 	ptr->s = agf->agf_refcount_root;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_refcountbt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
@@ -210,16 +210,16 @@ xfs_refcountbt_key_diff(
 	struct xfs_refcount_irec	*rec = &cur->bc_rec.rc;
 	struct xfs_refcount_key		*kp = &key->refc;
 
-	return (__int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+	return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_refcountbt_diff_two_keys(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	return (__int64_t)be32_to_cpu(k1->refc.rc_startblock) -
+	return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
 			  be32_to_cpu(k2->refc.rc_startblock);
 }
 
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 06cfb93c2ef95..1bcb41fe01567 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2061,7 +2061,7 @@ int
 xfs_rmap_finish_one(
 	struct xfs_trans		*tp,
 	enum xfs_rmap_intent_type	type,
-	__uint64_t			owner,
+	uint64_t			owner,
 	int				whichfork,
 	xfs_fileoff_t			startoff,
 	xfs_fsblock_t			startblock,
@@ -2182,7 +2182,7 @@ __xfs_rmap_add(
 	struct xfs_mount		*mp,
 	struct xfs_defer_ops		*dfops,
 	enum xfs_rmap_intent_type	type,
-	__uint64_t			owner,
+	uint64_t			owner,
 	int				whichfork,
 	struct xfs_bmbt_irec		*bmap)
 {
@@ -2266,7 +2266,7 @@ xfs_rmap_alloc_extent(
 	xfs_agnumber_t		agno,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
-	__uint64_t		owner)
+	uint64_t		owner)
 {
 	struct xfs_bmbt_irec	bmap;
 
@@ -2290,7 +2290,7 @@ xfs_rmap_free_extent(
 	xfs_agnumber_t		agno,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
-	__uint64_t		owner)
+	uint64_t		owner)
 {
 	struct xfs_bmbt_irec	bmap;
 
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 98f908fea1035..265116d044f42 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -179,7 +179,7 @@ enum xfs_rmap_intent_type {
 struct xfs_rmap_intent {
 	struct list_head			ri_list;
 	enum xfs_rmap_intent_type		ri_type;
-	__uint64_t				ri_owner;
+	uint64_t				ri_owner;
 	int					ri_whichfork;
 	struct xfs_bmbt_irec			ri_bmap;
 };
@@ -196,15 +196,15 @@ int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		struct xfs_bmbt_irec *imap);
 int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
-		__uint64_t owner);
+		uint64_t owner);
 int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
-		__uint64_t owner);
+		uint64_t owner);
 
 void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
 		struct xfs_btree_cur *rcur, int error);
 int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
-		__uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+		uint64_t owner, int whichfork, xfs_fileoff_t startoff,
 		xfs_fsblock_t startblock, xfs_filblks_t blockcount,
 		xfs_exntst_t state, struct xfs_btree_cur **pcur);
 
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 74e5a54bc428f..c5b4a1c862b0c 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -199,7 +199,7 @@ xfs_rmapbt_init_high_key_from_rec(
 	union xfs_btree_key	*key,
 	union xfs_btree_rec	*rec)
 {
-	__uint64_t		off;
+	uint64_t		off;
 	int			adj;
 
 	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
@@ -241,7 +241,7 @@ xfs_rmapbt_init_ptr_from_cur(
 	ptr->s = agf->agf_roots[cur->bc_btnum];
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_rmapbt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
@@ -249,9 +249,9 @@ xfs_rmapbt_key_diff(
 	struct xfs_rmap_irec	*rec = &cur->bc_rec.r;
 	struct xfs_rmap_key	*kp = &key->rmap;
 	__u64			x, y;
-	__int64_t		d;
+	int64_t			d;
 
-	d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
 	if (d)
 		return d;
 
@@ -271,7 +271,7 @@ xfs_rmapbt_key_diff(
 	return 0;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_rmapbt_diff_two_keys(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
@@ -279,10 +279,10 @@ xfs_rmapbt_diff_two_keys(
 {
 	struct xfs_rmap_key	*kp1 = &k1->rmap;
 	struct xfs_rmap_key	*kp2 = &k2->rmap;
-	__int64_t		d;
+	int64_t			d;
 	__u64			x, y;
 
-	d = (__int64_t)be32_to_cpu(kp1->rm_startblock) -
+	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
 		       be32_to_cpu(kp2->rm_startblock);
 	if (d)
 		return d;
@@ -384,10 +384,10 @@ xfs_rmapbt_keys_inorder(
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	__uint32_t		x;
-	__uint32_t		y;
-	__uint64_t		a;
-	__uint64_t		b;
+	uint32_t		x;
+	uint32_t		y;
+	uint64_t		a;
+	uint64_t		b;
 
 	x = be32_to_cpu(k1->rmap.rm_startblock);
 	y = be32_to_cpu(k2->rmap.rm_startblock);
@@ -414,10 +414,10 @@ xfs_rmapbt_recs_inorder(
 	union xfs_btree_rec	*r1,
 	union xfs_btree_rec	*r2)
 {
-	__uint32_t		x;
-	__uint32_t		y;
-	__uint64_t		a;
-	__uint64_t		b;
+	uint32_t		x;
+	uint32_t		y;
+	uint64_t		a;
+	uint64_t		b;
 
 	x = be32_to_cpu(r1->rmap.rm_startblock);
 	y = be32_to_cpu(r2->rmap.rm_startblock);
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index e47b99e59f603..26bba7f90fdfd 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1011,7 +1011,7 @@ xfs_rtfree_extent(
 	    mp->m_sb.sb_rextents) {
 		if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
 			mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
-		*(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
+		*(uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
 		xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
 	}
 	return 0;
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 584ec896a5337..9b5aae2bcc0b7 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -448,7 +448,7 @@ xfs_sb_quota_to_disk(
 	struct xfs_dsb	*to,
 	struct xfs_sb	*from)
 {
-	__uint16_t	qflags = from->sb_qflags;
+	uint16_t	qflags = from->sb_qflags;
 
 	to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
 	if (xfs_sb_version_has_pquotino(from)) {
@@ -756,7 +756,7 @@ xfs_sb_mount_common(
 	mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
 
 	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
-	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
+	mp->m_ialloc_inos = (int)MAX((uint16_t)XFS_INODES_PER_CHUNK,
 					sbp->sb_inopblock);
 	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
 
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 717909f2f7b70..0220159bd4636 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -18,34 +18,34 @@
 #ifndef __XFS_TYPES_H__
 #define	__XFS_TYPES_H__
 
-typedef __uint32_t	prid_t;		/* project ID */
+typedef uint32_t	prid_t;		/* project ID */
 
-typedef __uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
-typedef	__uint32_t	xfs_agino_t;	/* inode # within allocation grp */
-typedef	__uint32_t	xfs_extlen_t;	/* extent length in blocks */
-typedef	__uint32_t	xfs_agnumber_t;	/* allocation group number */
-typedef __int32_t	xfs_extnum_t;	/* # of extents in a file */
-typedef __int16_t	xfs_aextnum_t;	/* # extents in an attribute fork */
-typedef	__int64_t	xfs_fsize_t;	/* bytes in a file */
-typedef __uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
+typedef uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
+typedef uint32_t	xfs_agino_t;	/* inode # within allocation grp */
+typedef uint32_t	xfs_extlen_t;	/* extent length in blocks */
+typedef uint32_t	xfs_agnumber_t;	/* allocation group number */
+typedef int32_t		xfs_extnum_t;	/* # of extents in a file */
+typedef int16_t		xfs_aextnum_t;	/* # extents in an attribute fork */
+typedef int64_t		xfs_fsize_t;	/* bytes in a file */
+typedef uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
 
-typedef	__int32_t	xfs_suminfo_t;	/* type of bitmap summary info */
-typedef	__int32_t	xfs_rtword_t;	/* word type for bitmap manipulations */
+typedef int32_t		xfs_suminfo_t;	/* type of bitmap summary info */
+typedef int32_t		xfs_rtword_t;	/* word type for bitmap manipulations */
 
-typedef	__int64_t	xfs_lsn_t;	/* log sequence number */
-typedef	__int32_t	xfs_tid_t;	/* transaction identifier */
+typedef int64_t		xfs_lsn_t;	/* log sequence number */
+typedef int32_t		xfs_tid_t;	/* transaction identifier */
 
-typedef	__uint32_t	xfs_dablk_t;	/* dir/attr block number (in file) */
-typedef	__uint32_t	xfs_dahash_t;	/* dir/attr hash value */
+typedef uint32_t	xfs_dablk_t;	/* dir/attr block number (in file) */
+typedef uint32_t	xfs_dahash_t;	/* dir/attr hash value */
 
-typedef	__uint64_t	xfs_fsblock_t;	/* blockno in filesystem (agno|agbno) */
-typedef __uint64_t	xfs_rfsblock_t;	/* blockno in filesystem (raw) */
-typedef __uint64_t	xfs_rtblock_t;	/* extent (block) in realtime area */
-typedef __uint64_t	xfs_fileoff_t;	/* block number in a file */
-typedef __uint64_t	xfs_filblks_t;	/* number of blocks in a file */
+typedef uint64_t	xfs_fsblock_t;	/* blockno in filesystem (agno|agbno) */
+typedef uint64_t	xfs_rfsblock_t;	/* blockno in filesystem (raw) */
+typedef uint64_t	xfs_rtblock_t;	/* extent (block) in realtime area */
+typedef uint64_t	xfs_fileoff_t;	/* block number in a file */
+typedef uint64_t	xfs_filblks_t;	/* number of blocks in a file */
 
-typedef	__int64_t	xfs_srtblock_t;	/* signed version of xfs_rtblock_t */
-typedef __int64_t	xfs_sfiloff_t;	/* signed block number in a file */
+typedef int64_t		xfs_srtblock_t;	/* signed version of xfs_rtblock_t */
+typedef int64_t		xfs_sfiloff_t;	/* signed block number in a file */
 
 /*
  * Null values for the types.
@@ -125,7 +125,7 @@ struct xfs_name {
  * uid_t and gid_t are hard-coded to 32 bits in the inode.
  * Hence, an 'id' in a dquot is 32 bits..
  */
-typedef __uint32_t	xfs_dqid_t;
+typedef uint32_t	xfs_dqid_t;
 
 /*
  * Constants for bit manipulations.
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7cd55e2..6bab8c449b814 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -836,7 +836,7 @@ xfs_writepage_map(
 	struct inode		*inode,
 	struct page		*page,
 	loff_t			offset,
-	__uint64_t              end_offset)
+	uint64_t              end_offset)
 {
 	LIST_HEAD(submit_list);
 	struct xfs_ioend	*ioend, *next;
@@ -991,7 +991,7 @@ xfs_do_writepage(
 	struct xfs_writepage_ctx *wpc = data;
 	struct inode		*inode = page->mapping->host;
 	loff_t			offset;
-	__uint64_t              end_offset;
+	uint64_t              end_offset;
 	pgoff_t                 end_index;
 
 	trace_xfs_writepage(inode, page, 0, 0);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 97c45b6eb91ec..9bc1e12179899 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -279,7 +279,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	if (bp == NULL) {
 		cursor->blkno = 0;
 		for (;;) {
-			__uint16_t magic;
+			uint16_t magic;
 
 			error = xfs_da3_node_read(NULL, dp,
 						      cursor->blkno, -1, &bp,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 7493a64f5b998..0ea70a44c1a78 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -389,11 +389,11 @@ xfs_getbmapx_fix_eof_hole(
 	struct getbmapx		*out,		/* output structure */
 	int			prealloced,	/* this is a file with
 						 * preallocated data space */
-	__int64_t		end,		/* last block requested */
+	int64_t			end,		/* last block requested */
 	xfs_fsblock_t		startblock,
 	bool			moretocome)
 {
-	__int64_t		fixlen;
+	int64_t			fixlen;
 	xfs_mount_t		*mp;		/* file system mount point */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	xfs_extnum_t		lastx;		/* last extent pointer */
@@ -514,9 +514,9 @@ xfs_getbmap(
 	xfs_bmap_format_t	formatter,	/* format to user */
 	void			*arg)		/* formatter arg */
 {
-	__int64_t		bmvend;		/* last block requested */
+	int64_t			bmvend;		/* last block requested */
 	int			error = 0;	/* return value */
-	__int64_t		fixlen;		/* length for -1 case */
+	int64_t			fixlen;		/* length for -1 case */
 	int			i;		/* extent number */
 	int			lock;		/* lock state */
 	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
@@ -605,7 +605,7 @@ xfs_getbmap(
 	if (bmv->bmv_length == -1) {
 		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
 		bmv->bmv_length =
-			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
+			max_t(int64_t, fixlen - bmv->bmv_offset, 0);
 	} else if (bmv->bmv_length == 0) {
 		bmv->bmv_entries = 0;
 		return 0;
@@ -742,7 +742,7 @@ xfs_getbmap(
 				out[cur_ext].bmv_offset +
 				out[cur_ext].bmv_length;
 			bmv->bmv_length =
-				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+				max_t(int64_t, 0, bmvend - bmv->bmv_offset);
 
 			/*
 			 * In case we don't want to return the hole,
@@ -1676,7 +1676,7 @@ xfs_swap_extent_rmap(
 	xfs_filblks_t			ilen;
 	xfs_filblks_t			rlen;
 	int				nimaps;
-	__uint64_t			tip_flags2;
+	uint64_t			tip_flags2;
 
 	/*
 	 * If the source file has shared blocks, we must flag the donor
@@ -1792,7 +1792,7 @@ xfs_swap_extent_forks(
 	int			aforkblks = 0;
 	int			taforkblks = 0;
 	xfs_extnum_t		nextents;
-	__uint64_t		tmp;
+	uint64_t		tmp;
 	int			error;
 
 	/*
@@ -1850,15 +1850,15 @@ xfs_swap_extent_forks(
 	/*
 	 * Fix the on-disk inode values
 	 */
-	tmp = (__uint64_t)ip->i_d.di_nblocks;
+	tmp = (uint64_t)ip->i_d.di_nblocks;
 	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
 	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
 
-	tmp = (__uint64_t) ip->i_d.di_nextents;
+	tmp = (uint64_t) ip->i_d.di_nextents;
 	ip->i_d.di_nextents = tip->i_d.di_nextents;
 	tip->i_d.di_nextents = tmp;
 
-	tmp = (__uint64_t) ip->i_d.di_format;
+	tmp = (uint64_t) ip->i_d.di_format;
 	ip->i_d.di_format = tip->i_d.di_format;
 	tip->i_d.di_format = tmp;
 
@@ -1927,7 +1927,7 @@ xfs_swap_extents(
 	int			error = 0;
 	int			lock_flags;
 	struct xfs_ifork	*cowfp;
-	__uint64_t		f;
+	uint64_t		f;
 	int			resblks;
 
 	/*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a9640b136b32c..89b586f688b0b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1194,7 +1194,7 @@ xfs_buf_ioerror_alert(
 {
 	xfs_alert(bp->b_target->bt_mount,
 "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
-		(__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
+		(uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
 }
 
 int
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 1bc7401ea1b25..ede4790753bc1 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -44,7 +44,7 @@ static unsigned char xfs_dir3_filetype_table[] = {
 static unsigned char
 xfs_dir3_get_dtype(
 	struct xfs_mount	*mp,
-	__uint8_t		filetype)
+	uint8_t			filetype)
 {
 	if (!xfs_sb_version_hasftype(&mp->m_sb))
 		return DT_UNKNOWN;
@@ -117,7 +117,7 @@ xfs_dir2_sf_getdents(
 	 */
 	sfep = xfs_dir2_sf_firstentry(sfp);
 	for (i = 0; i < sfp->count; i++) {
-		__uint8_t filetype;
+		uint8_t filetype;
 
 		off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
 				xfs_dir2_sf_get_offset(sfep));
@@ -194,7 +194,7 @@ xfs_dir2_block_getdents(
 	 * Each object is a real entry (dep) or an unused one (dup).
 	 */
 	while (ptr < endptr) {
-		__uint8_t filetype;
+		uint8_t filetype;
 
 		dup = (xfs_dir2_data_unused_t *)ptr;
 		/*
@@ -391,7 +391,7 @@ xfs_dir2_leaf_getdents(
 	 * Get more blocks and readahead as necessary.
 	 */
 	while (curoff < XFS_DIR2_LEAF_OFFSET) {
-		__uint8_t filetype;
+		uint8_t filetype;
 
 		/*
 		 * If we have no buffer, or we're off the end of the
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 6a05d278da64f..b2cde54261822 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -39,7 +39,7 @@ xfs_trim_extents(
 	xfs_daddr_t		start,
 	xfs_daddr_t		end,
 	xfs_daddr_t		minlen,
-	__uint64_t		*blocks_trimmed)
+	uint64_t		*blocks_trimmed)
 {
 	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
 	struct xfs_btree_cur	*cur;
@@ -166,7 +166,7 @@ xfs_ioc_trim(
 	struct fstrim_range	range;
 	xfs_daddr_t		start, end, minlen;
 	xfs_agnumber_t		start_agno, end_agno, agno;
-	__uint64_t		blocks_trimmed = 0;
+	uint64_t		blocks_trimmed = 0;
 	int			error, last_error = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9d06cc30e875e..e57c6cce91aab 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -276,7 +276,7 @@ xfs_qm_init_dquot_blk(
 void
 xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
 {
-	__uint64_t space;
+	uint64_t space;
 
 	dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
 	dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 6ccaae9eb0eea..8f22fc579dbba 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -602,7 +602,7 @@ xfs_growfs_data_private(
 	if (nagimax)
 		mp->m_maxagi = nagimax;
 	if (mp->m_sb.sb_imax_pct) {
-		__uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
+		uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
 		do_div(icount, 100);
 		mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
 	} else
@@ -793,17 +793,17 @@ xfs_fs_counts(
 int
 xfs_reserve_blocks(
 	xfs_mount_t             *mp,
-	__uint64_t              *inval,
+	uint64_t              *inval,
 	xfs_fsop_resblks_t      *outval)
 {
-	__int64_t		lcounter, delta;
-	__int64_t		fdblks_delta = 0;
-	__uint64_t		request;
-	__int64_t		free;
+	int64_t			lcounter, delta;
+	int64_t			fdblks_delta = 0;
+	uint64_t		request;
+	int64_t			free;
 	int			error = 0;
 
 	/* If inval is null, report current values and return */
-	if (inval == (__uint64_t *)NULL) {
+	if (inval == (uint64_t *)NULL) {
 		if (!outval)
 			return -EINVAL;
 		outval->resblks = mp->m_resblks;
@@ -904,7 +904,7 @@ xfs_reserve_blocks(
 int
 xfs_fs_goingdown(
 	xfs_mount_t	*mp,
-	__uint32_t	inflags)
+	uint32_t	inflags)
 {
 	switch (inflags) {
 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f34915898fea2..2954c13a3acd2 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -22,9 +22,9 @@ extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion);
 extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in);
 extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in);
 extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
-extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
+extern int xfs_reserve_blocks(xfs_mount_t *mp, uint64_t *inval,
 				xfs_fsop_resblks_t *outval);
-extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern int xfs_fs_goingdown(xfs_mount_t *mp, uint32_t inflags);
 
 extern int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
 extern int xfs_fs_unreserve_ag_blocks(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ec9826c565009..ffbfe7d8bc6d9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -632,7 +632,7 @@ __xfs_iflock(
 
 STATIC uint
 _xfs_dic2xflags(
-	__uint16_t		di_flags,
+	uint16_t		di_flags,
 	uint64_t		di_flags2,
 	bool			has_attr)
 {
@@ -855,8 +855,8 @@ xfs_ialloc(
 		inode->i_version = 1;
 		ip->i_d.di_flags2 = 0;
 		ip->i_d.di_cowextsize = 0;
-		ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
-		ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
+		ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
+		ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
 	}
 
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 10e89fcb49d74..677d0bfe1c2d3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -192,8 +192,8 @@ static inline void
 xfs_set_projid(struct xfs_inode *ip,
 		prid_t projid)
 {
-	ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
-	ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
+	ip->i_d.di_projid_hi = (uint16_t) (projid >> 16);
+	ip->i_d.di_projid_lo = (uint16_t) (projid & 0xffff);
 }
 
 static inline prid_t
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index f6af76975bc87..8ffe4eac0b483 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -443,8 +443,8 @@ xfs_attrmulti_attr_get(
 	struct inode		*inode,
 	unsigned char		*name,
 	unsigned char		__user *ubuf,
-	__uint32_t		*len,
-	__uint32_t		flags)
+	uint32_t		*len,
+	uint32_t		flags)
 {
 	unsigned char		*kbuf;
 	int			error = -EFAULT;
@@ -472,8 +472,8 @@ xfs_attrmulti_attr_set(
 	struct inode		*inode,
 	unsigned char		*name,
 	const unsigned char	__user *ubuf,
-	__uint32_t		len,
-	__uint32_t		flags)
+	uint32_t		len,
+	uint32_t		flags)
 {
 	unsigned char		*kbuf;
 	int			error;
@@ -498,7 +498,7 @@ int
 xfs_attrmulti_attr_remove(
 	struct inode		*inode,
 	unsigned char		*name,
-	__uint32_t		flags)
+	uint32_t		flags)
 {
 	int			error;
 
@@ -876,7 +876,7 @@ xfs_merge_ioc_xflags(
 
 STATIC unsigned int
 xfs_di2lxflags(
-	__uint16_t	di_flags)
+	uint16_t	di_flags)
 {
 	unsigned int	flags = 0;
 
@@ -1287,7 +1287,7 @@ xfs_ioctl_setattr_check_projid(
 	struct fsxattr		*fa)
 {
 	/* Disallow 32bit project ids if projid32bit feature is not enabled. */
-	if (fa->fsx_projid > (__uint16_t)-1 &&
+	if (fa->fsx_projid > (uint16_t)-1 &&
 	    !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
 		return -EINVAL;
 
@@ -1931,7 +1931,7 @@ xfs_file_ioctl(
 
 	case XFS_IOC_SET_RESBLKS: {
 		xfs_fsop_resblks_t inout;
-		__uint64_t	   in;
+		uint64_t	   in;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -2017,12 +2017,12 @@ xfs_file_ioctl(
 	}
 
 	case XFS_IOC_GOINGDOWN: {
-		__uint32_t in;
+		uint32_t in;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
-		if (get_user(in, (__uint32_t __user *)arg))
+		if (get_user(in, (uint32_t __user *)arg))
 			return -EFAULT;
 
 		return xfs_fs_goingdown(mp, in);
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 8b52881bfd901..e86c3ea137d2d 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -48,22 +48,22 @@ xfs_attrmulti_attr_get(
 	struct inode		*inode,
 	unsigned char		*name,
 	unsigned char		__user *ubuf,
-	__uint32_t		*len,
-	__uint32_t		flags);
+	uint32_t		*len,
+	uint32_t		flags);
 
 extern int
 xfs_attrmulti_attr_set(
 	struct inode		*inode,
 	unsigned char		*name,
 	const unsigned char	__user *ubuf,
-	__uint32_t		len,
-	__uint32_t		flags);
+	uint32_t		len,
+	uint32_t		flags);
 
 extern int
 xfs_attrmulti_attr_remove(
 	struct inode		*inode,
 	unsigned char		*name,
-	__uint32_t		flags);
+	uint32_t		flags);
 
 extern struct dentry *
 xfs_handle_to_dentry(
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index b1bb45444df8a..5492bcf6f442b 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -112,9 +112,9 @@ typedef struct compat_xfs_fsop_handlereq {
 
 /* The bstat field in the swapext struct needs translation */
 typedef struct compat_xfs_swapext {
-	__int64_t		sx_version;	/* version */
-	__int64_t		sx_fdtarget;	/* fd of target file */
-	__int64_t		sx_fdtmp;	/* fd of tmp file */
+	int64_t			sx_version;	/* version */
+	int64_t			sx_fdtarget;	/* fd of target file */
+	int64_t			sx_fdtmp;	/* fd of tmp file */
 	xfs_off_t		sx_offset;	/* offset into file */
 	xfs_off_t		sx_length;	/* leng from offset */
 	char			sx_pad[16];	/* pad space, unused */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 044fb0e15390c..ecdae42267d31 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -23,14 +23,6 @@
 /*
  * Kernel specific type declarations for XFS
  */
-typedef signed char		__int8_t;
-typedef unsigned char		__uint8_t;
-typedef signed short int	__int16_t;
-typedef unsigned short int	__uint16_t;
-typedef signed int		__int32_t;
-typedef unsigned int		__uint32_t;
-typedef signed long long int	__int64_t;
-typedef unsigned long long int	__uint64_t;
 
 typedef __s64			xfs_off_t;	/* <file offset> type */
 typedef unsigned long long	xfs_ino_t;	/* <inode> type */
@@ -186,22 +178,22 @@ extern struct xstats xfsstats;
  * are converting to the init_user_ns. The uid is later mapped to a particular
  * user namespace value when crossing the kernel/user boundary.
  */
-static inline __uint32_t xfs_kuid_to_uid(kuid_t uid)
+static inline uint32_t xfs_kuid_to_uid(kuid_t uid)
 {
 	return from_kuid(&init_user_ns, uid);
 }
 
-static inline kuid_t xfs_uid_to_kuid(__uint32_t uid)
+static inline kuid_t xfs_uid_to_kuid(uint32_t uid)
 {
 	return make_kuid(&init_user_ns, uid);
 }
 
-static inline __uint32_t xfs_kgid_to_gid(kgid_t gid)
+static inline uint32_t xfs_kgid_to_gid(kgid_t gid)
 {
 	return from_kgid(&init_user_ns, gid);
 }
 
-static inline kgid_t xfs_gid_to_kgid(__uint32_t gid)
+static inline kgid_t xfs_gid_to_kgid(uint32_t gid)
 {
 	return make_kgid(&init_user_ns, gid);
 }
@@ -231,14 +223,14 @@ static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
 
 #define do_mod(a, b)	xfs_do_mod(&(a), (b), sizeof(a))
 
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+static inline uint64_t roundup_64(uint64_t x, uint32_t y)
 {
 	x += y - 1;
 	do_div(x, y);
 	return x * y;
 }
 
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+static inline uint64_t howmany_64(uint64_t x, uint32_t y)
 {
 	x += y - 1;
 	do_div(x, y);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c8d0481033479..98b39cbae42dc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -434,7 +434,7 @@ xfs_log_reserve(
 	int		 	unit_bytes,
 	int		 	cnt,
 	struct xlog_ticket	**ticp,
-	__uint8_t	 	client,
+	uint8_t		 	client,
 	bool			permanent)
 {
 	struct xlog		*log = mp->m_log;
@@ -825,9 +825,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		if (!error) {
 			/* the data section must be 32 bit size aligned */
 			struct {
-			    __uint16_t magic;
-			    __uint16_t pad1;
-			    __uint32_t pad2; /* may as well make it 64 bits */
+			    uint16_t magic;
+			    uint16_t pad1;
+			    uint32_t pad2; /* may as well make it 64 bits */
 			} magic = {
 				.magic = XLOG_UNMOUNT_TYPE,
 			};
@@ -1665,7 +1665,7 @@ xlog_cksum(
 	char			*dp,
 	int			size)
 {
-	__uint32_t		crc;
+	uint32_t		crc;
 
 	/* first generate the crc for the record header ... */
 	crc = xfs_start_cksum_update((char *)rhead,
@@ -1828,7 +1828,7 @@ xlog_sync(
 		 */
 		dptr = (char *)&iclog->ic_header + count;
 		for (i = 0; i < split; i += BBSIZE) {
-			__uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
+			uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
 			if (++cycle == XLOG_HEADER_MAGIC_NUM)
 				cycle++;
 			*(__be32 *)dptr = cpu_to_be32(cycle);
@@ -2412,8 +2412,8 @@ xlog_write(
 			}
 
 			reg = &vecp[index];
-			ASSERT(reg->i_len % sizeof(__int32_t) == 0);
-			ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
+			ASSERT(reg->i_len % sizeof(int32_t) == 0);
+			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
 
 			start_rec_copy = xlog_write_start_rec(ptr, ticket);
 			if (start_rec_copy) {
@@ -3192,7 +3192,7 @@ xlog_state_switch_iclogs(
 	/* Round up to next log-sunit */
 	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
 	    log->l_mp->m_sb.sb_logsunit > 1) {
-		__uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
+		uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
 		log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
 	}
 
@@ -3820,7 +3820,7 @@ xlog_verify_iclog(
 	xlog_in_core_2_t	*xhdr;
 	void			*base_ptr, *ptr, *p;
 	ptrdiff_t		field_offset;
-	__uint8_t		clientid;
+	uint8_t			clientid;
 	int			len, i, j, k, op_len;
 	int			idx;
 
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index cc5a9f1574e75..bf212772595cc 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -159,7 +159,7 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
 			  int		   length,
 			  int		   count,
 			  struct xlog_ticket **ticket,
-			  __uint8_t	   clientid,
+			  uint8_t		   clientid,
 			  bool		   permanent);
 int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
 void      xfs_log_unmount(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 62113a5d25044..51bf7b8273871 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -419,7 +419,7 @@ struct xlog {
 };
 
 #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
-	((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
+	((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
 
 #define XLOG_FORCED_SHUTDOWN(log)	((log)->l_flags & XLOG_IO_ERROR)
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index cd0b077deb354..e19b20c87f355 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2230,9 +2230,9 @@ xlog_recover_get_buf_lsn(
 	struct xfs_mount	*mp,
 	struct xfs_buf		*bp)
 {
-	__uint32_t		magic32;
-	__uint16_t		magic16;
-	__uint16_t		magicda;
+	uint32_t		magic32;
+	uint16_t		magic16;
+	uint16_t		magicda;
 	void			*blk = bp->b_addr;
 	uuid_t			*uuid;
 	xfs_lsn_t		lsn = -1;
@@ -2381,9 +2381,9 @@ xlog_recover_validate_buf_type(
 	xfs_lsn_t		current_lsn)
 {
 	struct xfs_da_blkinfo	*info = bp->b_addr;
-	__uint32_t		magic32;
-	__uint16_t		magic16;
-	__uint16_t		magicda;
+	uint32_t		magic32;
+	uint16_t		magic16;
+	uint16_t		magicda;
 	char			*warnmsg = NULL;
 
 	/*
@@ -2852,7 +2852,7 @@ xlog_recover_buffer_pass2(
 	if (XFS_DINODE_MAGIC ==
 	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
 	    (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize,
-			(__uint32_t)log->l_mp->m_inode_cluster_size))) {
+			(uint32_t)log->l_mp->m_inode_cluster_size))) {
 		xfs_buf_stale(bp);
 		error = xfs_bwrite(bp);
 	} else {
@@ -3423,7 +3423,7 @@ xlog_recover_efd_pass2(
 	xfs_efd_log_format_t	*efd_formatp;
 	xfs_efi_log_item_t	*efip = NULL;
 	xfs_log_item_t		*lip;
-	__uint64_t		efi_id;
+	uint64_t		efi_id;
 	struct xfs_ail_cursor	cur;
 	struct xfs_ail		*ailp = log->l_ailp;
 
@@ -3519,7 +3519,7 @@ xlog_recover_rud_pass2(
 	struct xfs_rud_log_format	*rud_formatp;
 	struct xfs_rui_log_item		*ruip = NULL;
 	struct xfs_log_item		*lip;
-	__uint64_t			rui_id;
+	uint64_t			rui_id;
 	struct xfs_ail_cursor		cur;
 	struct xfs_ail			*ailp = log->l_ailp;
 
@@ -3635,7 +3635,7 @@ xlog_recover_cud_pass2(
 	struct xfs_cud_log_format	*cud_formatp;
 	struct xfs_cui_log_item		*cuip = NULL;
 	struct xfs_log_item		*lip;
-	__uint64_t			cui_id;
+	uint64_t			cui_id;
 	struct xfs_ail_cursor		cur;
 	struct xfs_ail			*ailp = log->l_ailp;
 
@@ -3754,7 +3754,7 @@ xlog_recover_bud_pass2(
 	struct xfs_bud_log_format	*bud_formatp;
 	struct xfs_bui_log_item		*buip = NULL;
 	struct xfs_log_item		*lip;
-	__uint64_t			bui_id;
+	uint64_t			bui_id;
 	struct xfs_ail_cursor		cur;
 	struct xfs_ail			*ailp = log->l_ailp;
 
@@ -5772,9 +5772,9 @@ xlog_recover_check_summary(
 	xfs_buf_t	*agfbp;
 	xfs_buf_t	*agibp;
 	xfs_agnumber_t	agno;
-	__uint64_t	freeblks;
-	__uint64_t	itotal;
-	__uint64_t	ifree;
+	uint64_t	freeblks;
+	uint64_t	itotal;
+	uint64_t	ifree;
 	int		error;
 
 	mp = log->l_mp;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2eaf818591661..cc6789d352320 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -174,7 +174,7 @@ xfs_free_perag(
 int
 xfs_sb_validate_fsb_count(
 	xfs_sb_t	*sbp,
-	__uint64_t	nblocks)
+	uint64_t	nblocks)
 {
 	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
 	ASSERT(sbp->sb_blocklog >= BBSHIFT);
@@ -436,7 +436,7 @@ STATIC void
 xfs_set_maxicount(xfs_mount_t *mp)
 {
 	xfs_sb_t	*sbp = &(mp->m_sb);
-	__uint64_t	icount;
+	uint64_t	icount;
 
 	if (sbp->sb_imax_pct) {
 		/*
@@ -502,7 +502,7 @@ xfs_set_low_space_thresholds(
 	int i;
 
 	for (i = 0; i < XFS_LOWSP_MAX; i++) {
-		__uint64_t space = mp->m_sb.sb_dblocks;
+		uint64_t space = mp->m_sb.sb_dblocks;
 
 		do_div(space, 100);
 		mp->m_low_space[i] = space * (i + 1);
@@ -598,10 +598,10 @@ xfs_mount_reset_sbqflags(
 	return xfs_sync_sb(mp, false);
 }
 
-__uint64_t
+uint64_t
 xfs_default_resblks(xfs_mount_t *mp)
 {
-	__uint64_t resblks;
+	uint64_t resblks;
 
 	/*
 	 * We default to 5% or 8192 fsbs of space reserved, whichever is
@@ -612,7 +612,7 @@ xfs_default_resblks(xfs_mount_t *mp)
 	 */
 	resblks = mp->m_sb.sb_dblocks;
 	do_div(resblks, 20);
-	resblks = min_t(__uint64_t, resblks, 8192);
+	resblks = min_t(uint64_t, resblks, 8192);
 	return resblks;
 }
 
@@ -632,7 +632,7 @@ xfs_mountfs(
 {
 	struct xfs_sb		*sbp = &(mp->m_sb);
 	struct xfs_inode	*rip;
-	__uint64_t		resblks;
+	uint64_t		resblks;
 	uint			quotamount = 0;
 	uint			quotaflags = 0;
 	int			error = 0;
@@ -1060,7 +1060,7 @@ void
 xfs_unmountfs(
 	struct xfs_mount	*mp)
 {
-	__uint64_t		resblks;
+	uint64_t		resblks;
 	int			error;
 
 	cancel_delayed_work_sync(&mp->m_eofblocks_work);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9fa312a41c930..305d95394e2d1 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -108,10 +108,10 @@ typedef struct xfs_mount {
 	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */
 	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */
 	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */
-	__uint8_t		m_blkbit_log;	/* blocklog + NBBY */
-	__uint8_t		m_blkbb_log;	/* blocklog - BBSHIFT */
-	__uint8_t		m_agno_log;	/* log #ag's */
-	__uint8_t		m_agino_log;	/* #bits for agino in inum */
+	uint8_t			m_blkbit_log;	/* blocklog + NBBY */
+	uint8_t			m_blkbb_log;	/* blocklog - BBSHIFT */
+	uint8_t			m_agno_log;	/* log #ag's */
+	uint8_t			m_agino_log;	/* #bits for agino in inum */
 	uint			m_inode_cluster_size;/* min inode buf size */
 	uint			m_blockmask;	/* sb_blocksize-1 */
 	uint			m_blockwsize;	/* sb_blocksize in words */
@@ -139,7 +139,7 @@ typedef struct xfs_mount {
 	struct mutex		m_growlock;	/* growfs mutex */
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
 	uint			m_dmevmask;	/* DMI events for this FS */
-	__uint64_t		m_flags;	/* global mount flags */
+	uint64_t		m_flags;	/* global mount flags */
 	bool			m_inotbt_nores; /* no per-AG finobt resv. */
 	int			m_ialloc_inos;	/* inodes in inode allocation */
 	int			m_ialloc_blks;	/* blocks in inode allocation */
@@ -148,14 +148,14 @@ typedef struct xfs_mount {
 	int			m_inoalign_mask;/* mask sb_inoalignmt if used */
 	uint			m_qflags;	/* quota status flags */
 	struct xfs_trans_resv	m_resv;		/* precomputed res values */
-	__uint64_t		m_maxicount;	/* maximum inode count */
-	__uint64_t		m_resblks;	/* total reserved blocks */
-	__uint64_t		m_resblks_avail;/* available reserved blocks */
-	__uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
+	uint64_t		m_maxicount;	/* maximum inode count */
+	uint64_t		m_resblks;	/* total reserved blocks */
+	uint64_t		m_resblks_avail;/* available reserved blocks */
+	uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
 	int			m_dalign;	/* stripe unit */
 	int			m_swidth;	/* stripe width */
 	int			m_sinoalign;	/* stripe unit inode alignment */
-	__uint8_t		m_sectbb_log;	/* sectlog - BBSHIFT */
+	uint8_t			m_sectbb_log;	/* sectlog - BBSHIFT */
 	const struct xfs_nameops *m_dirnameops;	/* vector of dir name ops */
 	const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
 	const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
@@ -194,7 +194,7 @@ typedef struct xfs_mount {
 	 * ever support shrinks it would have to be persisted in addition
 	 * to various other kinds of pain inflicted on the pNFS server.
 	 */
-	__uint32_t		m_generation;
+	uint32_t		m_generation;
 
 	bool			m_fail_unmount;
 #ifdef DEBUG
@@ -367,12 +367,12 @@ typedef struct xfs_perag {
 	char		pagi_init;	/* this agi's entry is initialized */
 	char		pagf_metadata;	/* the agf is preferred to be metadata */
 	char		pagi_inodeok;	/* The agi is ok for inodes */
-	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
+	uint8_t		pagf_levels[XFS_BTNUM_AGF];
 					/* # of levels in bno & cnt btree */
-	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
+	uint32_t	pagf_flcount;	/* count of blocks in freelist */
 	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
 	xfs_extlen_t	pagf_longest;	/* longest free space */
-	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
+	uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
 	xfs_agino_t	pagi_freecount;	/* number of free inodes */
 	xfs_agino_t	pagi_count;	/* number of allocated inodes */
 
@@ -411,7 +411,7 @@ typedef struct xfs_perag {
 	struct xfs_ag_resv	pag_agfl_resv;
 
 	/* reference count */
-	__uint8_t		pagf_refcount_level;
+	uint8_t			pagf_refcount_level;
 } xfs_perag_t;
 
 static inline struct xfs_ag_resv *
@@ -434,7 +434,7 @@ void xfs_buf_hash_destroy(xfs_perag_t *pag);
 
 extern void	xfs_uuid_table_free(void);
 extern int	xfs_log_sbcount(xfs_mount_t *);
-extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
+extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp);
 extern int	xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
 				     xfs_agnumber_t *maxagi);
@@ -450,7 +450,7 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int	xfs_readsb(xfs_mount_t *, int);
 extern void	xfs_freesb(xfs_mount_t *);
 extern bool	xfs_fs_writable(struct xfs_mount *mp, int level);
-extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
+extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t);
 
 extern int	xfs_dev_is_read_only(struct xfs_mount *, char *);
 
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 3e52d5de7ae1e..2be6d2735ca9f 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -33,7 +33,7 @@ xfs_fill_statvfs_from_dquot(
 	struct kstatfs		*statp,
 	struct xfs_dquot	*dqp)
 {
-	__uint64_t		limit;
+	uint64_t		limit;
 
 	limit = dqp->q_core.d_blk_softlimit ?
 		be64_to_cpu(dqp->q_core.d_blk_softlimit) :
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index c57aa7f180877..91472193643b1 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1256,13 +1256,13 @@ xfs_rtpick_extent(
 {
 	xfs_rtblock_t	b;		/* result block */
 	int		log2;		/* log of sequence number */
-	__uint64_t	resid;		/* residual after log removed */
-	__uint64_t	seq;		/* sequence number of file creation */
-	__uint64_t	*seqp;		/* pointer to seqno in inode */
+	uint64_t	resid;		/* residual after log removed */
+	uint64_t	seq;		/* sequence number of file creation */
+	uint64_t	*seqp;		/* pointer to seqno in inode */
 
 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
 
-	seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
+	seqp = (uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
 	if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
 		mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
 		*seqp = 0;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f11282c96887a..056e12b421ebc 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -33,9 +33,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
 {
 	int		i, j;
 	int		len = 0;
-	__uint64_t	xs_xstrat_bytes = 0;
-	__uint64_t	xs_write_bytes = 0;
-	__uint64_t	xs_read_bytes = 0;
+	uint64_t	xs_xstrat_bytes = 0;
+	uint64_t	xs_write_bytes = 0;
+	uint64_t	xs_read_bytes = 0;
 
 	static const struct xstats_entry {
 		char	*desc;
@@ -100,7 +100,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
 void xfs_stats_clearall(struct xfsstats __percpu *stats)
 {
 	int		c;
-	__uint32_t	vn_active;
+	uint32_t	vn_active;
 
 	xfs_notice(NULL, "Clearing xfsstats");
 	for_each_possible_cpu(c) {
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 375840f5a99aa..f64d0ae345c4d 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -54,125 +54,125 @@ enum {
  */
 struct __xfsstats {
 # define XFSSTAT_END_EXTENT_ALLOC	4
-	__uint32_t		xs_allocx;
-	__uint32_t		xs_allocb;
-	__uint32_t		xs_freex;
-	__uint32_t		xs_freeb;
+	uint32_t		xs_allocx;
+	uint32_t		xs_allocb;
+	uint32_t		xs_freex;
+	uint32_t		xs_freeb;
 # define XFSSTAT_END_ALLOC_BTREE	(XFSSTAT_END_EXTENT_ALLOC+4)
-	__uint32_t		xs_abt_lookup;
-	__uint32_t		xs_abt_compare;
-	__uint32_t		xs_abt_insrec;
-	__uint32_t		xs_abt_delrec;
+	uint32_t		xs_abt_lookup;
+	uint32_t		xs_abt_compare;
+	uint32_t		xs_abt_insrec;
+	uint32_t		xs_abt_delrec;
 # define XFSSTAT_END_BLOCK_MAPPING	(XFSSTAT_END_ALLOC_BTREE+7)
-	__uint32_t		xs_blk_mapr;
-	__uint32_t		xs_blk_mapw;
-	__uint32_t		xs_blk_unmap;
-	__uint32_t		xs_add_exlist;
-	__uint32_t		xs_del_exlist;
-	__uint32_t		xs_look_exlist;
-	__uint32_t		xs_cmp_exlist;
+	uint32_t		xs_blk_mapr;
+	uint32_t		xs_blk_mapw;
+	uint32_t		xs_blk_unmap;
+	uint32_t		xs_add_exlist;
+	uint32_t		xs_del_exlist;
+	uint32_t		xs_look_exlist;
+	uint32_t		xs_cmp_exlist;
 # define XFSSTAT_END_BLOCK_MAP_BTREE	(XFSSTAT_END_BLOCK_MAPPING+4)
-	__uint32_t		xs_bmbt_lookup;
-	__uint32_t		xs_bmbt_compare;
-	__uint32_t		xs_bmbt_insrec;
-	__uint32_t		xs_bmbt_delrec;
+	uint32_t		xs_bmbt_lookup;
+	uint32_t		xs_bmbt_compare;
+	uint32_t		xs_bmbt_insrec;
+	uint32_t		xs_bmbt_delrec;
 # define XFSSTAT_END_DIRECTORY_OPS	(XFSSTAT_END_BLOCK_MAP_BTREE+4)
-	__uint32_t		xs_dir_lookup;
-	__uint32_t		xs_dir_create;
-	__uint32_t		xs_dir_remove;
-	__uint32_t		xs_dir_getdents;
+	uint32_t		xs_dir_lookup;
+	uint32_t		xs_dir_create;
+	uint32_t		xs_dir_remove;
+	uint32_t		xs_dir_getdents;
 # define XFSSTAT_END_TRANSACTIONS	(XFSSTAT_END_DIRECTORY_OPS+3)
-	__uint32_t		xs_trans_sync;
-	__uint32_t		xs_trans_async;
-	__uint32_t		xs_trans_empty;
+	uint32_t		xs_trans_sync;
+	uint32_t		xs_trans_async;
+	uint32_t		xs_trans_empty;
 # define XFSSTAT_END_INODE_OPS		(XFSSTAT_END_TRANSACTIONS+7)
-	__uint32_t		xs_ig_attempts;
-	__uint32_t		xs_ig_found;
-	__uint32_t		xs_ig_frecycle;
-	__uint32_t		xs_ig_missed;
-	__uint32_t		xs_ig_dup;
-	__uint32_t		xs_ig_reclaims;
-	__uint32_t		xs_ig_attrchg;
+	uint32_t		xs_ig_attempts;
+	uint32_t		xs_ig_found;
+	uint32_t		xs_ig_frecycle;
+	uint32_t		xs_ig_missed;
+	uint32_t		xs_ig_dup;
+	uint32_t		xs_ig_reclaims;
+	uint32_t		xs_ig_attrchg;
 # define XFSSTAT_END_LOG_OPS		(XFSSTAT_END_INODE_OPS+5)
-	__uint32_t		xs_log_writes;
-	__uint32_t		xs_log_blocks;
-	__uint32_t		xs_log_noiclogs;
-	__uint32_t		xs_log_force;
-	__uint32_t		xs_log_force_sleep;
+	uint32_t		xs_log_writes;
+	uint32_t		xs_log_blocks;
+	uint32_t		xs_log_noiclogs;
+	uint32_t		xs_log_force;
+	uint32_t		xs_log_force_sleep;
 # define XFSSTAT_END_TAIL_PUSHING	(XFSSTAT_END_LOG_OPS+10)
-	__uint32_t		xs_try_logspace;
-	__uint32_t		xs_sleep_logspace;
-	__uint32_t		xs_push_ail;
-	__uint32_t		xs_push_ail_success;
-	__uint32_t		xs_push_ail_pushbuf;
-	__uint32_t		xs_push_ail_pinned;
-	__uint32_t		xs_push_ail_locked;
-	__uint32_t		xs_push_ail_flushing;
-	__uint32_t		xs_push_ail_restarts;
-	__uint32_t		xs_push_ail_flush;
+	uint32_t		xs_try_logspace;
+	uint32_t		xs_sleep_logspace;
+	uint32_t		xs_push_ail;
+	uint32_t		xs_push_ail_success;
+	uint32_t		xs_push_ail_pushbuf;
+	uint32_t		xs_push_ail_pinned;
+	uint32_t		xs_push_ail_locked;
+	uint32_t		xs_push_ail_flushing;
+	uint32_t		xs_push_ail_restarts;
+	uint32_t		xs_push_ail_flush;
 # define XFSSTAT_END_WRITE_CONVERT	(XFSSTAT_END_TAIL_PUSHING+2)
-	__uint32_t		xs_xstrat_quick;
-	__uint32_t		xs_xstrat_split;
+	uint32_t		xs_xstrat_quick;
+	uint32_t		xs_xstrat_split;
 # define XFSSTAT_END_READ_WRITE_OPS	(XFSSTAT_END_WRITE_CONVERT+2)
-	__uint32_t		xs_write_calls;
-	__uint32_t		xs_read_calls;
+	uint32_t		xs_write_calls;
+	uint32_t		xs_read_calls;
 # define XFSSTAT_END_ATTRIBUTE_OPS	(XFSSTAT_END_READ_WRITE_OPS+4)
-	__uint32_t		xs_attr_get;
-	__uint32_t		xs_attr_set;
-	__uint32_t		xs_attr_remove;
-	__uint32_t		xs_attr_list;
+	uint32_t		xs_attr_get;
+	uint32_t		xs_attr_set;
+	uint32_t		xs_attr_remove;
+	uint32_t		xs_attr_list;
 # define XFSSTAT_END_INODE_CLUSTER	(XFSSTAT_END_ATTRIBUTE_OPS+3)
-	__uint32_t		xs_iflush_count;
-	__uint32_t		xs_icluster_flushcnt;
-	__uint32_t		xs_icluster_flushinode;
+	uint32_t		xs_iflush_count;
+	uint32_t		xs_icluster_flushcnt;
+	uint32_t		xs_icluster_flushinode;
 # define XFSSTAT_END_VNODE_OPS		(XFSSTAT_END_INODE_CLUSTER+8)
-	__uint32_t		vn_active;	/* # vnodes not on free lists */
-	__uint32_t		vn_alloc;	/* # times vn_alloc called */
-	__uint32_t		vn_get;		/* # times vn_get called */
-	__uint32_t		vn_hold;	/* # times vn_hold called */
-	__uint32_t		vn_rele;	/* # times vn_rele called */
-	__uint32_t		vn_reclaim;	/* # times vn_reclaim called */
-	__uint32_t		vn_remove;	/* # times vn_remove called */
-	__uint32_t		vn_free;	/* # times vn_free called */
+	uint32_t		vn_active;	/* # vnodes not on free lists */
+	uint32_t		vn_alloc;	/* # times vn_alloc called */
+	uint32_t		vn_get;		/* # times vn_get called */
+	uint32_t		vn_hold;	/* # times vn_hold called */
+	uint32_t		vn_rele;	/* # times vn_rele called */
+	uint32_t		vn_reclaim;	/* # times vn_reclaim called */
+	uint32_t		vn_remove;	/* # times vn_remove called */
+	uint32_t		vn_free;	/* # times vn_free called */
 #define XFSSTAT_END_BUF			(XFSSTAT_END_VNODE_OPS+9)
-	__uint32_t		xb_get;
-	__uint32_t		xb_create;
-	__uint32_t		xb_get_locked;
-	__uint32_t		xb_get_locked_waited;
-	__uint32_t		xb_busy_locked;
-	__uint32_t		xb_miss_locked;
-	__uint32_t		xb_page_retries;
-	__uint32_t		xb_page_found;
-	__uint32_t		xb_get_read;
+	uint32_t		xb_get;
+	uint32_t		xb_create;
+	uint32_t		xb_get_locked;
+	uint32_t		xb_get_locked_waited;
+	uint32_t		xb_busy_locked;
+	uint32_t		xb_miss_locked;
+	uint32_t		xb_page_retries;
+	uint32_t		xb_page_found;
+	uint32_t		xb_get_read;
 /* Version 2 btree counters */
 #define XFSSTAT_END_ABTB_V2		(XFSSTAT_END_BUF + __XBTS_MAX)
-	__uint32_t		xs_abtb_2[__XBTS_MAX];
+	uint32_t		xs_abtb_2[__XBTS_MAX];
 #define XFSSTAT_END_ABTC_V2		(XFSSTAT_END_ABTB_V2 + __XBTS_MAX)
-	__uint32_t		xs_abtc_2[__XBTS_MAX];
+	uint32_t		xs_abtc_2[__XBTS_MAX];
 #define XFSSTAT_END_BMBT_V2		(XFSSTAT_END_ABTC_V2 + __XBTS_MAX)
-	__uint32_t		xs_bmbt_2[__XBTS_MAX];
+	uint32_t		xs_bmbt_2[__XBTS_MAX];
 #define XFSSTAT_END_IBT_V2		(XFSSTAT_END_BMBT_V2 + __XBTS_MAX)
-	__uint32_t		xs_ibt_2[__XBTS_MAX];
+	uint32_t		xs_ibt_2[__XBTS_MAX];
 #define XFSSTAT_END_FIBT_V2		(XFSSTAT_END_IBT_V2 + __XBTS_MAX)
-	__uint32_t		xs_fibt_2[__XBTS_MAX];
+	uint32_t		xs_fibt_2[__XBTS_MAX];
 #define XFSSTAT_END_RMAP_V2		(XFSSTAT_END_FIBT_V2 + __XBTS_MAX)
-	__uint32_t		xs_rmap_2[__XBTS_MAX];
+	uint32_t		xs_rmap_2[__XBTS_MAX];
 #define XFSSTAT_END_REFCOUNT		(XFSSTAT_END_RMAP_V2 + __XBTS_MAX)
-	__uint32_t		xs_refcbt_2[__XBTS_MAX];
+	uint32_t		xs_refcbt_2[__XBTS_MAX];
 #define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_REFCOUNT + 6)
-	__uint32_t		xs_qm_dqreclaims;
-	__uint32_t		xs_qm_dqreclaim_misses;
-	__uint32_t		xs_qm_dquot_dups;
-	__uint32_t		xs_qm_dqcachemisses;
-	__uint32_t		xs_qm_dqcachehits;
-	__uint32_t		xs_qm_dqwants;
+	uint32_t		xs_qm_dqreclaims;
+	uint32_t		xs_qm_dqreclaim_misses;
+	uint32_t		xs_qm_dquot_dups;
+	uint32_t		xs_qm_dqcachemisses;
+	uint32_t		xs_qm_dqcachehits;
+	uint32_t		xs_qm_dqwants;
 #define XFSSTAT_END_QM			(XFSSTAT_END_XQMSTAT+2)
-	__uint32_t		xs_qm_dquot;
-	__uint32_t		xs_qm_dquot_unused;
+	uint32_t		xs_qm_dquot;
+	uint32_t		xs_qm_dquot_unused;
 /* Extra precision counters */
-	__uint64_t		xs_xstrat_bytes;
-	__uint64_t		xs_write_bytes;
-	__uint64_t		xs_read_bytes;
+	uint64_t		xs_xstrat_bytes;
+	uint64_t		xs_write_bytes;
+	uint64_t		xs_read_bytes;
 };
 
 struct xfsstats {
@@ -186,7 +186,7 @@ struct xfsstats {
  * simple wrapper for getting the array index of s struct member offset
  */
 #define XFS_STATS_CALC_INDEX(member)	\
-	(offsetof(struct __xfsstats, member) / (int)sizeof(__uint32_t))
+	(offsetof(struct __xfsstats, member) / (int)sizeof(uint32_t))
 
 
 int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575f101db..a19aab897c9b3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -196,7 +196,7 @@ xfs_parseargs(
 	int			dsunit = 0;
 	int			dswidth = 0;
 	int			iosize = 0;
-	__uint8_t		iosizelog = 0;
+	uint8_t			iosizelog = 0;
 
 	/*
 	 * set up the mount name first so all the errors will refer to the
@@ -556,7 +556,7 @@ xfs_showargs(
 
 	return 0;
 }
-static __uint64_t
+static uint64_t
 xfs_max_file_offset(
 	unsigned int		blockshift)
 {
@@ -587,7 +587,7 @@ xfs_max_file_offset(
 # endif
 #endif
 
-	return (((__uint64_t)pagefactor) << bitshift) - 1;
+	return (((uint64_t)pagefactor) << bitshift) - 1;
 }
 
 /*
@@ -622,7 +622,7 @@ xfs_set_inode_alloc(
 	 * the max inode percentage.  Used only for inode32.
 	 */
 	if (mp->m_maxicount) {
-		__uint64_t	icount;
+		uint64_t	icount;
 
 		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 		do_div(icount, 100);
@@ -1088,12 +1088,12 @@ xfs_fs_statfs(
 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
 	xfs_sb_t		*sbp = &mp->m_sb;
 	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
-	__uint64_t		fakeinos, id;
-	__uint64_t		icount;
-	__uint64_t		ifree;
-	__uint64_t		fdblocks;
+	uint64_t		fakeinos, id;
+	uint64_t		icount;
+	uint64_t		ifree;
+	uint64_t		fdblocks;
 	xfs_extlen_t		lsize;
-	__int64_t		ffree;
+	int64_t			ffree;
 
 	statp->f_type = XFS_SB_MAGIC;
 	statp->f_namelen = MAXNAMELEN - 1;
@@ -1116,7 +1116,7 @@ xfs_fs_statfs(
 	statp->f_bavail = statp->f_bfree;
 
 	fakeinos = statp->f_bfree << sbp->sb_inopblog;
-	statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+	statp->f_files = MIN(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
 	if (mp->m_maxicount)
 		statp->f_files = min_t(typeof(statp->f_files),
 					statp->f_files,
@@ -1129,7 +1129,7 @@ xfs_fs_statfs(
 
 	/* make sure statp->f_ffree does not underflow */
 	ffree = statp->f_files - (icount - ifree);
-	statp->f_ffree = max_t(__int64_t, ffree, 0);
+	statp->f_ffree = max_t(int64_t, ffree, 0);
 
 
 	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
@@ -1142,7 +1142,7 @@ xfs_fs_statfs(
 STATIC void
 xfs_save_resvblks(struct xfs_mount *mp)
 {
-	__uint64_t resblks = 0;
+	uint64_t resblks = 0;
 
 	mp->m_resblks_save = mp->m_resblks;
 	xfs_reserve_blocks(mp, &resblks, NULL);
@@ -1151,7 +1151,7 @@ xfs_save_resvblks(struct xfs_mount *mp)
 STATIC void
 xfs_restore_resvblks(struct xfs_mount *mp)
 {
-	__uint64_t resblks;
+	uint64_t resblks;
 
 	if (mp->m_resblks_save) {
 		resblks = mp->m_resblks_save;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a560bfd392c10..bcc3cdf8e1c5d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -251,7 +251,7 @@ TRACE_EVENT(xfs_iext_insert,
 		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
 		  (long)__entry->idx,
 		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
+		  (int64_t)__entry->startblock,
 		  __entry->blockcount,
 		  __entry->state,
 		  (char *)__entry->caller_ip)
@@ -295,7 +295,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
 		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
 		  (long)__entry->idx,
 		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
+		  (int64_t)__entry->startblock,
 		  __entry->blockcount,
 		  __entry->state,
 		  (char *)__entry->caller_ip)
@@ -1281,7 +1281,7 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
 		  __entry->count,
 		  __print_symbolic(__entry->type, XFS_IO_TYPES),
 		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
+		  (int64_t)__entry->startblock,
 		  __entry->blockcount)
 )
 
@@ -2039,7 +2039,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
 	TP_ARGS(log, buf_f),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
-		__field(__int64_t, blkno)
+		__field(int64_t, blkno)
 		__field(unsigned short, len)
 		__field(unsigned short, flags)
 		__field(unsigned short, size)
@@ -2088,7 +2088,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
 		__field(int, fields)
 		__field(unsigned short, asize)
 		__field(unsigned short, dsize)
-		__field(__int64_t, blkno)
+		__field(int64_t, blkno)
 		__field(int, len)
 		__field(int, boffset)
 	),
@@ -3238,8 +3238,8 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class,
 		__field(xfs_agnumber_t, agno)
 		__field(xfs_fsblock_t, bno)
 		__field(xfs_filblks_t, len)
-		__field(__uint64_t, owner)
-		__field(__uint64_t, offset)
+		__field(uint64_t, owner)
+		__field(uint64_t, offset)
 		__field(unsigned int, flags)
 	),
 	TP_fast_assign(
@@ -3279,9 +3279,9 @@ DECLARE_EVENT_CLASS(xfs_getfsmap_class,
 		__field(dev_t, keydev)
 		__field(xfs_daddr_t, block)
 		__field(xfs_daddr_t, len)
-		__field(__uint64_t, owner)
-		__field(__uint64_t, offset)
-		__field(__uint64_t, flags)
+		__field(uint64_t, owner)
+		__field(uint64_t, offset)
+		__field(uint64_t, flags)
 	),
 	TP_fast_assign(
 		__entry->dev = mp->m_super->s_dev;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 82bb61e330d91..6bdad6f589345 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -245,7 +245,7 @@ struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
 		struct xfs_rui_log_item *ruip);
 int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
 		struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
-		__uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+		uint64_t owner, int whichfork, xfs_fileoff_t startoff,
 		xfs_fsblock_t startblock, xfs_filblks_t blockcount,
 		xfs_exntst_t state, struct xfs_btree_cur **pcur);
 
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
index 9ead064b5e90a..9b577beb43d7c 100644
--- a/fs/xfs/xfs_trans_rmap.c
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -96,7 +96,7 @@ xfs_trans_log_finish_rmap_update(
 	struct xfs_trans		*tp,
 	struct xfs_rud_log_item		*rudp,
 	enum xfs_rmap_intent_type	type,
-	__uint64_t			owner,
+	uint64_t			owner,
 	int				whichfork,
 	xfs_fileoff_t			startoff,
 	xfs_fsblock_t			startblock,
-- 
GitLab


From 38dee376d67047e9877a34e408013852c9729eb8 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:06 -0700
Subject: [PATCH 0283/1958] xfs: always compile the btree inorder check
 functions

The btree record and key inorder check functions will be used by the
btree scrubber code, so make sure they're always built.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_alloc_btree.c    | 6 ------
 fs/xfs/libxfs/xfs_bmap_btree.c     | 4 ----
 fs/xfs/libxfs/xfs_btree.h          | 2 --
 fs/xfs/libxfs/xfs_ialloc_btree.c   | 6 ------
 fs/xfs/libxfs/xfs_refcount_btree.c | 4 ----
 fs/xfs/libxfs/xfs_rmap_btree.c     | 4 ----
 6 files changed, 26 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 5020cbc02db85..cfde0a0f97061 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -395,7 +395,6 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
 };
 
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_bnobt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -442,7 +441,6 @@ xfs_cntbt_recs_inorder(
 		 be32_to_cpu(r1->alloc.ar_startblock) <
 		 be32_to_cpu(r2->alloc.ar_startblock));
 }
-#endif /* DEBUG */
 
 static const struct xfs_btree_ops xfs_bnobt_ops = {
 	.rec_len		= sizeof(xfs_alloc_rec_t),
@@ -462,10 +460,8 @@ static const struct xfs_btree_ops xfs_bnobt_ops = {
 	.key_diff		= xfs_bnobt_key_diff,
 	.buf_ops		= &xfs_allocbt_buf_ops,
 	.diff_two_keys		= xfs_bnobt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_bnobt_keys_inorder,
 	.recs_inorder		= xfs_bnobt_recs_inorder,
-#endif
 };
 
 static const struct xfs_btree_ops xfs_cntbt_ops = {
@@ -486,10 +482,8 @@ static const struct xfs_btree_ops xfs_cntbt_ops = {
 	.key_diff		= xfs_cntbt_key_diff,
 	.buf_ops		= &xfs_allocbt_buf_ops,
 	.diff_two_keys		= xfs_cntbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_cntbt_keys_inorder,
 	.recs_inorder		= xfs_cntbt_recs_inorder,
-#endif
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 5e2b3dc138ade..e23495e236209 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -687,7 +687,6 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
 };
 
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_bmbt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -708,7 +707,6 @@ xfs_bmbt_recs_inorder(
 		xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
 		xfs_bmbt_disk_get_startoff(&r2->bmbt);
 }
-#endif	/* DEBUG */
 
 static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.rec_len		= sizeof(xfs_bmbt_rec_t),
@@ -726,10 +724,8 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.init_ptr_from_cur	= xfs_bmbt_init_ptr_from_cur,
 	.key_diff		= xfs_bmbt_key_diff,
 	.buf_ops		= &xfs_bmbt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_bmbt_keys_inorder,
 	.recs_inorder		= xfs_bmbt_recs_inorder,
-#endif
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 0a931f6441032..177a364ce5cff 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -163,7 +163,6 @@ struct xfs_btree_ops {
 
 	const struct xfs_buf_ops	*buf_ops;
 
-#if defined(DEBUG) || defined(XFS_WARN)
 	/* check that k1 is lower than k2 */
 	int	(*keys_inorder)(struct xfs_btree_cur *cur,
 				union xfs_btree_key *k1,
@@ -173,7 +172,6 @@ struct xfs_btree_ops {
 	int	(*recs_inorder)(struct xfs_btree_cur *cur,
 				union xfs_btree_rec *r1,
 				union xfs_btree_rec *r2);
-#endif
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index ed52d99732b85..6b1ddeb7e5c14 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -302,7 +302,6 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
 	.verify_write = xfs_inobt_write_verify,
 };
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_inobt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -322,7 +321,6 @@ xfs_inobt_recs_inorder(
 	return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
 		be32_to_cpu(r2->inobt.ir_startino);
 }
-#endif	/* DEBUG */
 
 static const struct xfs_btree_ops xfs_inobt_ops = {
 	.rec_len		= sizeof(xfs_inobt_rec_t),
@@ -339,10 +337,8 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
 	.init_ptr_from_cur	= xfs_inobt_init_ptr_from_cur,
 	.key_diff		= xfs_inobt_key_diff,
 	.buf_ops		= &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
-#endif
 };
 
 static const struct xfs_btree_ops xfs_finobt_ops = {
@@ -360,10 +356,8 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
 	.init_ptr_from_cur	= xfs_finobt_init_ptr_from_cur,
 	.key_diff		= xfs_inobt_key_diff,
 	.buf_ops		= &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
-#endif
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 65c222ae5b048..3c59dd3d58d71 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -285,7 +285,6 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
 	.verify_write		= xfs_refcountbt_write_verify,
 };
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_refcountbt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -306,7 +305,6 @@ xfs_refcountbt_recs_inorder(
 		be32_to_cpu(r1->refc.rc_blockcount) <=
 		be32_to_cpu(r2->refc.rc_startblock);
 }
-#endif
 
 static const struct xfs_btree_ops xfs_refcountbt_ops = {
 	.rec_len		= sizeof(struct xfs_refcount_rec),
@@ -325,10 +323,8 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = {
 	.key_diff		= xfs_refcountbt_key_diff,
 	.buf_ops		= &xfs_refcountbt_buf_ops,
 	.diff_two_keys		= xfs_refcountbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_refcountbt_keys_inorder,
 	.recs_inorder		= xfs_refcountbt_recs_inorder,
-#endif
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index c5b4a1c862b0c..9d9c9192584c9 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -377,7 +377,6 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
 	.verify_write		= xfs_rmapbt_write_verify,
 };
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_rmapbt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -437,7 +436,6 @@ xfs_rmapbt_recs_inorder(
 		return 1;
 	return 0;
 }
-#endif	/* DEBUG */
 
 static const struct xfs_btree_ops xfs_rmapbt_ops = {
 	.rec_len		= sizeof(struct xfs_rmap_rec),
@@ -456,10 +454,8 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
 	.key_diff		= xfs_rmapbt_key_diff,
 	.buf_ops		= &xfs_rmapbt_buf_ops,
 	.diff_two_keys		= xfs_rmapbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_rmapbt_keys_inorder,
 	.recs_inorder		= xfs_rmapbt_recs_inorder,
-#endif
 };
 
 /*
-- 
GitLab


From 2678809799e6e37db0800725157f5ebfc03a9df7 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:07 -0700
Subject: [PATCH 0284/1958] xfs: export various function for the online
 scrubber

Export various internal functions so that the online scrubber can use
them to check the state of metadata.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_alloc.c     |  2 +-
 fs/xfs/libxfs/xfs_alloc.h     |  2 ++
 fs/xfs/libxfs/xfs_btree.c     | 12 ++++++------
 fs/xfs/libxfs/xfs_btree.h     | 13 +++++++++++++
 fs/xfs/libxfs/xfs_dir2_leaf.c |  2 +-
 fs/xfs/libxfs/xfs_dir2_priv.h |  2 ++
 fs/xfs/libxfs/xfs_inode_buf.c |  2 +-
 fs/xfs/libxfs/xfs_inode_buf.h |  3 +++
 fs/xfs/libxfs/xfs_rmap.c      |  3 ++-
 fs/xfs/libxfs/xfs_rmap.h      |  3 +++
 fs/xfs/libxfs/xfs_rtbitmap.c  |  2 +-
 fs/xfs/xfs_itable.c           |  2 +-
 fs/xfs/xfs_itable.h           |  2 ++
 fs/xfs/xfs_rtalloc.h          |  3 +++
 14 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 7486401ccbd36..fefa8daa1c36e 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -606,7 +606,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = {
 /*
  * Read in the allocation group free block array.
  */
-STATIC int				/* error */
+int					/* error */
 xfs_alloc_read_agfl(
 	xfs_mount_t	*mp,		/* mount point structure */
 	xfs_trans_t	*tp,		/* transaction pointer */
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 77d9c27330abc..ef26edc2e9383 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -213,6 +213,8 @@ xfs_alloc_get_rec(
 
 int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
 			xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp,
+			xfs_agnumber_t agno, struct xfs_buf **bpp);
 int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
 int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
 		struct xfs_buf **agbp);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2aac3f499d97c..2f8075aa8725f 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -568,7 +568,7 @@ xfs_btree_ptr_offset(
 /*
  * Return a pointer to the n-th record in the btree block.
  */
-STATIC union xfs_btree_rec *
+union xfs_btree_rec *
 xfs_btree_rec_addr(
 	struct xfs_btree_cur	*cur,
 	int			n,
@@ -581,7 +581,7 @@ xfs_btree_rec_addr(
 /*
  * Return a pointer to the n-th key in the btree block.
  */
-STATIC union xfs_btree_key *
+union xfs_btree_key *
 xfs_btree_key_addr(
 	struct xfs_btree_cur	*cur,
 	int			n,
@@ -594,7 +594,7 @@ xfs_btree_key_addr(
 /*
  * Return a pointer to the n-th high key in the btree block.
  */
-STATIC union xfs_btree_key *
+union xfs_btree_key *
 xfs_btree_high_key_addr(
 	struct xfs_btree_cur	*cur,
 	int			n,
@@ -607,7 +607,7 @@ xfs_btree_high_key_addr(
 /*
  * Return a pointer to the n-th block pointer in the btree block.
  */
-STATIC union xfs_btree_ptr *
+union xfs_btree_ptr *
 xfs_btree_ptr_addr(
 	struct xfs_btree_cur	*cur,
 	int			n,
@@ -641,7 +641,7 @@ xfs_btree_get_iroot(
  * Retrieve the block pointer from the cursor at the given level.
  * This may be an inode btree root or from a buffer.
  */
-STATIC struct xfs_btree_block *		/* generic btree block pointer */
+struct xfs_btree_block *		/* generic btree block pointer */
 xfs_btree_get_block(
 	struct xfs_btree_cur	*cur,	/* btree cursor */
 	int			level,	/* level in btree */
@@ -1756,7 +1756,7 @@ xfs_btree_decrement(
 	return error;
 }
 
-STATIC int
+int
 xfs_btree_lookup_get_block(
 	struct xfs_btree_cur	*cur,	/* btree cursor */
 	int			level,	/* level in the btree */
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 177a364ce5cff..9c95e965cfe53 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -504,4 +504,17 @@ int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
 
 int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
 
+union xfs_btree_rec *xfs_btree_rec_addr(struct xfs_btree_cur *cur, int n,
+		struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_key_addr(struct xfs_btree_cur *cur, int n,
+		struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_high_key_addr(struct xfs_btree_cur *cur, int n,
+		struct xfs_btree_block *block);
+union xfs_btree_ptr *xfs_btree_ptr_addr(struct xfs_btree_cur *cur, int n,
+		struct xfs_btree_block *block);
+int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level,
+		union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
+struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
+		int level, struct xfs_buf **bpp);
+
 #endif	/* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 68bf3e860a90e..7002024a5d0d4 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -256,7 +256,7 @@ const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
 	.verify_write = xfs_dir3_leafn_write_verify,
 };
 
-static int
+int
 xfs_dir3_leaf_read(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*dp,
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 011df4da6cc23..576f2d267fa73 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -58,6 +58,8 @@ extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
 		struct xfs_buf **bpp);
 
 /* xfs_dir2_leaf.c */
+extern int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
+		xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
 extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
 		xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
 extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d887af940f097..0c970cf7ab63f 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -381,7 +381,7 @@ xfs_log_dinode_to_disk(
 	}
 }
 
-static bool
+bool
 xfs_dinode_verify(
 	struct xfs_mount	*mp,
 	xfs_ino_t		ino,
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 0827d7def1ce2..a9c97a356c306 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -82,4 +82,7 @@ void	xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #define	xfs_inobp_check(mp, bp)
 #endif /* DEBUG */
 
+bool	xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
+			  struct xfs_dinode *dip);
+
 #endif	/* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 1bcb41fe01567..eda275beebe0b 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -179,7 +179,8 @@ xfs_rmap_delete(
 	return error;
 }
 
-static int
+/* Convert an internal btree record to an rmap record. */
+int
 xfs_rmap_btrec_to_irec(
 	union xfs_btree_rec	*rec,
 	struct xfs_rmap_irec	*irec)
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 265116d044f42..466ede637080e 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -216,5 +216,8 @@ int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
 		struct xfs_rmap_irec *irec, int	*stat);
 int xfs_rmap_compare(const struct xfs_rmap_irec *a,
 		const struct xfs_rmap_irec *b);
+union xfs_btree_rec;
+int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
+		struct xfs_rmap_irec *irec);
 
 #endif	/* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 26bba7f90fdfd..5d4e43ef4eea2 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -70,7 +70,7 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
  * Get a buffer for the bitmap or summary file block specified.
  * The buffer is returned read and locked.
  */
-static int
+int
 xfs_rtbuf_get(
 	xfs_mount_t	*mp,		/* file system mount structure */
 	xfs_trans_t	*tp,		/* transaction pointer */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 26d67ce3c18d9..c393a2f6d8c3f 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -31,7 +31,7 @@
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 
-STATIC int
+int
 xfs_internal_inum(
 	xfs_mount_t	*mp,
 	xfs_ino_t	ino)
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 6ea8b3912fa4f..17e86e0541af1 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -96,4 +96,6 @@ xfs_inumbers(
 	void			__user *buffer, /* buffer with inode info */
 	inumbers_fmt_pf		formatter);
 
+int xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
+
 #endif	/* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index f13133e6f19fa..79defa722bf1c 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -107,6 +107,8 @@ xfs_growfs_rt(
 /*
  * From xfs_rtbitmap.c
  */
+int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
+		  xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
 int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
 		      xfs_rtblock_t start, xfs_extlen_t len, int val,
 		      xfs_rtblock_t *new, int *stat);
@@ -143,6 +145,7 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp,
 # define xfs_growfs_rt(mp,in)                           (ENOSYS)
 # define xfs_rtalloc_query_range(t,l,h,f,p)             (ENOSYS)
 # define xfs_rtalloc_query_all(t,f,p)                   (ENOSYS)
+# define xfs_rtbuf_get(m,t,b,i,p)                       (ENOSYS)
 static inline int		/* error */
 xfs_rtmount_init(
 	xfs_mount_t	*mp)	/* file system mount structure */
-- 
GitLab


From 118bb47e281cde728608633f1a358fb9f2ac0adc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:08 -0700
Subject: [PATCH 0285/1958] xfs: plumb in needed functions for range querying
 of various btrees

Plumb in the pieces (init_high_key, diff_two_keys) necessary to call
query_range on the inode space and block mapping btrees and to extract
raw btree records.  This will eventually be used by the inobt and bmbt
scrubbers.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap_btree.c   | 22 ++++++++++++++++++++++
 fs/xfs/libxfs/xfs_ialloc_btree.c | 26 ++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index e23495e236209..85de225130145 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -572,6 +572,16 @@ xfs_bmbt_init_key_from_rec(
 		cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
 }
 
+STATIC void
+xfs_bmbt_init_high_key_from_rec(
+	union xfs_btree_key	*key,
+	union xfs_btree_rec	*rec)
+{
+	key->bmbt.br_startoff = cpu_to_be64(
+			xfs_bmbt_disk_get_startoff(&rec->bmbt) +
+			xfs_bmbt_disk_get_blockcount(&rec->bmbt) - 1);
+}
+
 STATIC void
 xfs_bmbt_init_rec_from_cur(
 	struct xfs_btree_cur	*cur,
@@ -597,6 +607,16 @@ xfs_bmbt_key_diff(
 				      cur->bc_rec.b.br_startoff;
 }
 
+STATIC int64_t
+xfs_bmbt_diff_two_keys(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*k1,
+	union xfs_btree_key	*k2)
+{
+	return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
+			  be64_to_cpu(k2->bmbt.br_startoff);
+}
+
 static bool
 xfs_bmbt_verify(
 	struct xfs_buf		*bp)
@@ -720,9 +740,11 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.get_minrecs		= xfs_bmbt_get_minrecs,
 	.get_dmaxrecs		= xfs_bmbt_get_dmaxrecs,
 	.init_key_from_rec	= xfs_bmbt_init_key_from_rec,
+	.init_high_key_from_rec	= xfs_bmbt_init_high_key_from_rec,
 	.init_rec_from_cur	= xfs_bmbt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_bmbt_init_ptr_from_cur,
 	.key_diff		= xfs_bmbt_key_diff,
+	.diff_two_keys		= xfs_bmbt_diff_two_keys,
 	.buf_ops		= &xfs_bmbt_buf_ops,
 	.keys_inorder		= xfs_bmbt_keys_inorder,
 	.recs_inorder		= xfs_bmbt_recs_inorder,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 6b1ddeb7e5c14..317caba9faa67 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -174,6 +174,18 @@ xfs_inobt_init_key_from_rec(
 	key->inobt.ir_startino = rec->inobt.ir_startino;
 }
 
+STATIC void
+xfs_inobt_init_high_key_from_rec(
+	union xfs_btree_key	*key,
+	union xfs_btree_rec	*rec)
+{
+	__u32			x;
+
+	x = be32_to_cpu(rec->inobt.ir_startino);
+	x += XFS_INODES_PER_CHUNK - 1;
+	key->inobt.ir_startino = cpu_to_be32(x);
+}
+
 STATIC void
 xfs_inobt_init_rec_from_cur(
 	struct xfs_btree_cur	*cur,
@@ -228,6 +240,16 @@ xfs_inobt_key_diff(
 			  cur->bc_rec.i.ir_startino;
 }
 
+STATIC int64_t
+xfs_inobt_diff_two_keys(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*k1,
+	union xfs_btree_key	*k2)
+{
+	return (int64_t)be32_to_cpu(k1->inobt.ir_startino) -
+			  be32_to_cpu(k2->inobt.ir_startino);
+}
+
 static int
 xfs_inobt_verify(
 	struct xfs_buf		*bp)
@@ -333,10 +355,12 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
 	.get_minrecs		= xfs_inobt_get_minrecs,
 	.get_maxrecs		= xfs_inobt_get_maxrecs,
 	.init_key_from_rec	= xfs_inobt_init_key_from_rec,
+	.init_high_key_from_rec	= xfs_inobt_init_high_key_from_rec,
 	.init_rec_from_cur	= xfs_inobt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_inobt_init_ptr_from_cur,
 	.key_diff		= xfs_inobt_key_diff,
 	.buf_ops		= &xfs_inobt_buf_ops,
+	.diff_two_keys		= xfs_inobt_diff_two_keys,
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
 };
@@ -352,10 +376,12 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
 	.get_minrecs		= xfs_inobt_get_minrecs,
 	.get_maxrecs		= xfs_inobt_get_maxrecs,
 	.init_key_from_rec	= xfs_inobt_init_key_from_rec,
+	.init_high_key_from_rec	= xfs_inobt_init_high_key_from_rec,
 	.init_rec_from_cur	= xfs_inobt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_finobt_init_ptr_from_cur,
 	.key_diff		= xfs_inobt_key_diff,
 	.buf_ops		= &xfs_inobt_buf_ops,
+	.diff_two_keys		= xfs_inobt_diff_two_keys,
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
 };
-- 
GitLab


From e936945ee49693f40217db82a7db55c94e34ce4c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:08 -0700
Subject: [PATCH 0286/1958] xfs: export _inobt_btrec_to_irec and
 _ialloc_cluster_alignment for scrub

Create a function to extract an in-core inobt record from a generic
btree_rec union so that scrub will be able to check inobt records
and check inode block alignment.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_ialloc.c | 44 +++++++++++++++++++++++---------------
 fs/xfs/libxfs/xfs_ialloc.h |  5 +++++
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 1e5ed940b84d2..c514fe98bbabc 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -46,7 +46,7 @@
 /*
  * Allocation group level functions.
  */
-static inline int
+int
 xfs_ialloc_cluster_alignment(
 	struct xfs_mount	*mp)
 {
@@ -98,24 +98,15 @@ xfs_inobt_update(
 	return xfs_btree_update(cur, &rec);
 }
 
-/*
- * Get the data from the pointed-to record.
- */
-int					/* error */
-xfs_inobt_get_rec(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_inobt_rec_incore_t	*irec,	/* btree record */
-	int			*stat)	/* output: success/failure */
+/* Convert on-disk btree record to incore inobt record. */
+void
+xfs_inobt_btrec_to_irec(
+	struct xfs_mount		*mp,
+	union xfs_btree_rec		*rec,
+	struct xfs_inobt_rec_incore	*irec)
 {
-	union xfs_btree_rec	*rec;
-	int			error;
-
-	error = xfs_btree_get_rec(cur, &rec, stat);
-	if (error || *stat == 0)
-		return error;
-
 	irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
-	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+	if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
 		irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
 		irec->ir_count = rec->inobt.ir_u.sp.ir_count;
 		irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
@@ -130,6 +121,25 @@ xfs_inobt_get_rec(
 				be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
 	}
 	irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_inobt_get_rec(
+	struct xfs_btree_cur		*cur,
+	struct xfs_inobt_rec_incore	*irec,
+	int				*stat)
+{
+	union xfs_btree_rec		*rec;
+	int				error;
+
+	error = xfs_btree_get_rec(cur, &rec, stat);
+	if (error || *stat == 0)
+		return error;
+
+	xfs_inobt_btrec_to_irec(cur->bc_mp, rec, irec);
 
 	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 0bb89669fc072..b32cfb5aeb5b3 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -168,5 +168,10 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
 int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
 		xfs_agnumber_t agno, struct xfs_buf **bpp);
 
+union xfs_btree_rec;
+void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
+		struct xfs_inobt_rec_incore *irec);
+
+int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
 
 #endif	/* __XFS_IALLOC_H__ */
-- 
GitLab


From 378f681c4b588c0432236e5517bf47b7e54ff2dc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 19 Jun 2017 08:58:56 -0700
Subject: [PATCH 0287/1958] xfs: check if an inode is cached and allocated

Check the inode cache for a particular inode number.  If it's in the
cache, check that it's not currently being reclaimed.  If it's not being
reclaimed, return zero if the inode is allocated.  This function will be
used by various scrubbers to decide if the cache is more up to date
than the disk in terms of checking if an inode is allocated.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_icache.c | 52 +++++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_icache.h |  4 ++++
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 990210fcb9c32..d78c853265d4f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -368,6 +368,11 @@ xfs_iget_cache_hit(
 	if (ip->i_flags & XFS_IRECLAIMABLE) {
 		trace_xfs_iget_reclaim(ip);
 
+		if (flags & XFS_IGET_INCORE) {
+			error = -EAGAIN;
+			goto out_error;
+		}
+
 		/*
 		 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
 		 * from stomping over us while we recycle the inode.  We can't
@@ -432,7 +437,8 @@ xfs_iget_cache_hit(
 	if (lock_flags != 0)
 		xfs_ilock(ip, lock_flags);
 
-	xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
+	if (!(flags & XFS_IGET_INCORE))
+		xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
 	XFS_STATS_INC(mp, xs_ig_found);
 
 	return 0;
@@ -603,6 +609,10 @@ xfs_iget(
 			goto out_error_or_again;
 	} else {
 		rcu_read_unlock();
+		if (flags & XFS_IGET_INCORE) {
+			error = -ENOENT;
+			goto out_error_or_again;
+		}
 		XFS_STATS_INC(mp, xs_ig_missed);
 
 		error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
@@ -623,7 +633,7 @@ xfs_iget(
 	return 0;
 
 out_error_or_again:
-	if (error == -EAGAIN) {
+	if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) {
 		delay(1);
 		goto again;
 	}
@@ -631,6 +641,44 @@ xfs_iget(
 	return error;
 }
 
+/*
+ * "Is this a cached inode that's also allocated?"
+ *
+ * Look up an inode by number in the given file system.  If the inode is
+ * in cache and isn't in purgatory, return 1 if the inode is allocated
+ * and 0 if it is not.  For all other cases (not in cache, being torn
+ * down, etc.), return a negative error code.
+ *
+ * The caller has to prevent inode allocation and freeing activity,
+ * presumably by locking the AGI buffer.   This is to ensure that an
+ * inode cannot transition from allocated to freed until the caller is
+ * ready to allow that.  If the inode is in an intermediate state (new,
+ * reclaimable, or being reclaimed), -EAGAIN will be returned; if the
+ * inode is not in the cache, -ENOENT will be returned.  The caller must
+ * deal with these scenarios appropriately.
+ *
+ * This is a specialized use case for the online scrubber; if you're
+ * reading this, you probably want xfs_iget.
+ */
+int
+xfs_icache_inode_is_allocated(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	xfs_ino_t		ino,
+	bool			*inuse)
+{
+	struct xfs_inode	*ip;
+	int			error;
+
+	error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip);
+	if (error)
+		return error;
+
+	*inuse = !!(VFS_I(ip)->i_mode);
+	IRELE(ip);
+	return 0;
+}
+
 /*
  * The inode lookup is done in batches to keep the amount of lock traffic and
  * radix tree lookups to a minimum. The batch size is a trade off between
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9183f77958ef7..bff4d85e54984 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -47,6 +47,7 @@ struct xfs_eofblocks {
 #define XFS_IGET_CREATE		0x1
 #define XFS_IGET_UNTRUSTED	0x2
 #define XFS_IGET_DONTCACHE	0x4
+#define XFS_IGET_INCORE		0x8	/* don't read from disk or reinit */
 
 /*
  * flags for AG inode iterator
@@ -126,4 +127,7 @@ xfs_fs_eofblocks_from_user(
 	return 0;
 }
 
+int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
+				  xfs_ino_t ino, bool *inuse);
+
 #endif
-- 
GitLab


From 92ff7285f1df5590d53ab7031f0b86552b59121d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:10 -0700
Subject: [PATCH 0288/1958] xfs: reflink find shared should take a transaction

Adapt _reflink_find_shared to take an optional transaction pointer.  The
inode scrubber code will need to decide (within transaction context) if
a file has shared blocks.  To avoid buffer deadlocks, we must pass the
tp through to this function's utility calls.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_bmap_util.c |  4 ++--
 fs/xfs/xfs_reflink.c   | 15 ++++++++-------
 fs/xfs/xfs_reflink.h   |  6 +++---
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 0ea70a44c1a78..863180c418584 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -455,8 +455,8 @@ xfs_getbmap_adjust_shared(
 
 	agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
 	agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
-	error = xfs_reflink_find_shared(mp, agno, agbno, map->br_blockcount,
-			&ebno, &elen, true);
+	error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+			map->br_blockcount, &ebno, &elen, true);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index ffe6fe7a7eb54..e25c9953a7c98 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -155,6 +155,7 @@
 int
 xfs_reflink_find_shared(
 	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
 	xfs_agnumber_t		agno,
 	xfs_agblock_t		agbno,
 	xfs_extlen_t		aglen,
@@ -166,18 +167,18 @@ xfs_reflink_find_shared(
 	struct xfs_btree_cur	*cur;
 	int			error;
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
 	if (error)
 		return error;
 
-	cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
 
 	error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
 			find_end_of_shared);
 
 	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
 
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
 	return error;
 }
 
@@ -217,7 +218,7 @@ xfs_reflink_trim_around_shared(
 	agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock);
 	aglen = irec->br_blockcount;
 
-	error = xfs_reflink_find_shared(ip->i_mount, agno, agbno,
+	error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno,
 			aglen, &fbno, &flen, true);
 	if (error)
 		return error;
@@ -1373,8 +1374,8 @@ xfs_reflink_dirty_extents(
 			agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
 			aglen = map[1].br_blockcount;
 
-			error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
-					&rbno, &rlen, true);
+			error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+					aglen, &rbno, &rlen, true);
 			if (error)
 				goto out;
 			if (rbno == NULLAGBLOCK)
@@ -1445,7 +1446,7 @@ xfs_reflink_clear_inode_flag(
 		agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock);
 		aglen = map.br_blockcount;
 
-		error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
+		error = xfs_reflink_find_shared(mp, *tpp, agno, agbno, aglen,
 				&rbno, &rlen, false);
 		if (error)
 			return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index d29a7967f0290..b8cc5c3e27244 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -20,9 +20,9 @@
 #ifndef __XFS_REFLINK_H
 #define __XFS_REFLINK_H 1
 
-extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
-		xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
-		xfs_extlen_t *flen, bool find_maximal);
+extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
+		xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen,
+		xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
 extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
 		struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
 
-- 
GitLab


From ea7cdd7b7baffc40d1e60de7c93e5b1aabb3d081 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:11 -0700
Subject: [PATCH 0289/1958] xfs: separate function to check if inode shares
 extents

Separate the "clear reflink flag" function into one function that checks
if the flag is needed, and a second function that checks and clears the
flag.  The inode scrub code will want to check the necessity of the flag
without clearing it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_reflink.c | 88 ++++++++++++++++++++++++++------------------
 fs/xfs/xfs_reflink.h |  2 +
 2 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e25c9953a7c98..ab2270a871969 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1406,57 +1406,73 @@ xfs_reflink_dirty_extents(
 	return error;
 }
 
-/* Clear the inode reflink flag if there are no shared extents. */
+/* Does this inode need the reflink flag? */
 int
-xfs_reflink_clear_inode_flag(
-	struct xfs_inode	*ip,
-	struct xfs_trans	**tpp)
+xfs_reflink_inode_has_shared_extents(
+	struct xfs_trans		*tp,
+	struct xfs_inode		*ip,
+	bool				*has_shared)
 {
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		fbno;
-	xfs_filblks_t		end;
-	xfs_agnumber_t		agno;
-	xfs_agblock_t		agbno;
-	xfs_extlen_t		aglen;
-	xfs_agblock_t		rbno;
-	xfs_extlen_t		rlen;
-	struct xfs_bmbt_irec	map;
-	int			nmaps;
-	int			error = 0;
-
-	ASSERT(xfs_is_reflink_inode(ip));
+	struct xfs_bmbt_irec		got;
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_ifork		*ifp;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			agbno;
+	xfs_extlen_t			aglen;
+	xfs_agblock_t			rbno;
+	xfs_extlen_t			rlen;
+	xfs_extnum_t			idx;
+	bool				found;
+	int				error;
 
-	fbno = 0;
-	end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip)));
-	while (end - fbno > 0) {
-		nmaps = 1;
-		/*
-		 * Look for extents in the file.  Skip holes, delalloc, or
-		 * unwritten extents; they can't be reflinked.
-		 */
-		error = xfs_bmapi_read(ip, fbno, end - fbno, &map, &nmaps, 0);
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
 		if (error)
 			return error;
-		if (nmaps == 0)
-			break;
-		if (!xfs_bmap_is_real_extent(&map))
-			goto next;
+	}
 
-		agno = XFS_FSB_TO_AGNO(mp, map.br_startblock);
-		agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock);
-		aglen = map.br_blockcount;
+	*has_shared = false;
+	found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got);
+	while (found) {
+		if (isnullstartblock(got.br_startblock) ||
+		    got.br_state != XFS_EXT_NORM)
+			goto next;
+		agno = XFS_FSB_TO_AGNO(mp, got.br_startblock);
+		agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock);
+		aglen = got.br_blockcount;
 
-		error = xfs_reflink_find_shared(mp, *tpp, agno, agbno, aglen,
+		error = xfs_reflink_find_shared(mp, tp, agno, agbno, aglen,
 				&rbno, &rlen, false);
 		if (error)
 			return error;
 		/* Is there still a shared block here? */
-		if (rbno != NULLAGBLOCK)
+		if (rbno != NULLAGBLOCK) {
+			*has_shared = true;
 			return 0;
+		}
 next:
-		fbno = map.br_startoff + map.br_blockcount;
+		found = xfs_iext_get_extent(ifp, ++idx, &got);
 	}
 
+	return 0;
+}
+
+/* Clear the inode reflink flag if there are no shared extents. */
+int
+xfs_reflink_clear_inode_flag(
+	struct xfs_inode	*ip,
+	struct xfs_trans	**tpp)
+{
+	bool			needs_flag;
+	int			error = 0;
+
+	ASSERT(xfs_is_reflink_inode(ip));
+
+	error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
+	if (error || needs_flag)
+		return error;
+
 	/*
 	 * We didn't find any shared blocks so turn off the reflink flag.
 	 * First, get rid of any leftover CoW mappings.
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index b8cc5c3e27244..701487bab468c 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -47,6 +47,8 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
 extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
+extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
+		struct xfs_inode *ip, bool *has_shared);
 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
 		struct xfs_trans **tpp);
 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
-- 
GitLab


From e297a783e41560b44e3c14f38e420cba518113b8 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 19:58:56 -0400
Subject: [PATCH 0290/1958] random: add wait_for_random_bytes() API

This enables users of get_random_{bytes,u32,u64,int,long} to wait until
the pool is ready before using this function, in case they actually want
to have reliable randomness.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 41 +++++++++++++++++++++++++++++++----------
 include/linux/random.h |  1 +
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 01a260f674374..3853dd4f92e79 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -851,11 +851,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	}
 }
 
-static inline void crng_wait_ready(void)
-{
-	wait_event_interruptible(crng_init_wait, crng_ready());
-}
-
 static void _extract_crng(struct crng_state *crng,
 			  __u8 out[CHACHA20_BLOCK_SIZE])
 {
@@ -1477,7 +1472,10 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
  * number of good random numbers, suitable for key generation, seeding
  * TCP sequence numbers, etc.  It does not rely on the hardware random
  * number generator.  For random bytes direct from the hardware RNG
- * (when available), use get_random_bytes_arch().
+ * (when available), use get_random_bytes_arch(). In order to ensure
+ * that the randomness provided by this function is okay, the function
+ * wait_for_random_bytes() should be called and return 0 at least once
+ * at any point prior.
  */
 void get_random_bytes(void *buf, int nbytes)
 {
@@ -1506,6 +1504,24 @@ void get_random_bytes(void *buf, int nbytes)
 }
 EXPORT_SYMBOL(get_random_bytes);
 
+/*
+ * Wait for the urandom pool to be seeded and thus guaranteed to supply
+ * cryptographically secure random numbers. This applies to: the /dev/urandom
+ * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
+ * family of functions. Using any of these functions without first calling
+ * this function forfeits the guarantee of security.
+ *
+ * Returns: 0 if the urandom pool has been seeded.
+ *          -ERESTARTSYS if the function was interrupted by a signal.
+ */
+int wait_for_random_bytes(void)
+{
+	if (likely(crng_ready()))
+		return 0;
+	return wait_event_interruptible(crng_init_wait, crng_ready());
+}
+EXPORT_SYMBOL(wait_for_random_bytes);
+
 /*
  * Add a callback function that will be invoked when the nonblocking
  * pool is initialised.
@@ -1860,6 +1876,8 @@ const struct file_operations urandom_fops = {
 SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
 		unsigned int, flags)
 {
+	int ret;
+
 	if (flags & ~(GRND_NONBLOCK|GRND_RANDOM))
 		return -EINVAL;
 
@@ -1872,9 +1890,9 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
 	if (!crng_ready()) {
 		if (flags & GRND_NONBLOCK)
 			return -EAGAIN;
-		crng_wait_ready();
-		if (signal_pending(current))
-			return -ERESTARTSYS;
+		ret = wait_for_random_bytes();
+		if (unlikely(ret))
+			return ret;
 	}
 	return urandom_read(NULL, buf, count, NULL);
 }
@@ -2035,7 +2053,10 @@ static rwlock_t batched_entropy_reset_lock = __RW_LOCK_UNLOCKED(batched_entropy_
 /*
  * Get a random word for internal kernel use only. The quality of the random
  * number is either as good as RDRAND or as good as /dev/urandom, with the
- * goal of being quite fast and not depleting entropy.
+ * goal of being quite fast and not depleting entropy. In order to ensure
+ * that the randomness provided by this function is okay, the function
+ * wait_for_random_bytes() should be called and return 0 at least once
+ * at any point prior.
  */
 static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
 u64 get_random_u64(void)
diff --git a/include/linux/random.h b/include/linux/random.h
index ed5c3838780de..e29929347c95b 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -34,6 +34,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
 
 extern void get_random_bytes(void *buf, int nbytes);
+extern int wait_for_random_bytes(void);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern void get_random_bytes_arch(void *buf, int nbytes);
-- 
GitLab


From da9ba564bd683374b8d319756f312821b8265b06 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 20:05:02 -0400
Subject: [PATCH 0291/1958] random: add
 get_random_{bytes,u32,u64,int,long,once}_wait family

These functions are simple convenience wrappers that call
wait_for_random_bytes before calling the respective get_random_*
function.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/net.h    |  2 ++
 include/linux/once.h   |  2 ++
 include/linux/random.h | 25 +++++++++++++++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index abcfa46a2bd9a..dda2cc939a531 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -274,6 +274,8 @@ do {									\
 
 #define net_get_random_once(buf, nbytes)			\
 	get_random_once((buf), (nbytes))
+#define net_get_random_once_wait(buf, nbytes)			\
+	get_random_once_wait((buf), (nbytes))
 
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
diff --git a/include/linux/once.h b/include/linux/once.h
index 285f12cb40e6a..9c98aaa87cbcd 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -53,5 +53,7 @@ void __do_once_done(bool *done, struct static_key *once_key,
 
 #define get_random_once(buf, nbytes)					     \
 	DO_ONCE(get_random_bytes, (buf), (nbytes))
+#define get_random_once_wait(buf, nbytes)                                    \
+	DO_ONCE(get_random_bytes_wait, (buf), (nbytes))                      \
 
 #endif /* _LINUX_ONCE_H */
diff --git a/include/linux/random.h b/include/linux/random.h
index e29929347c95b..4aecc339558d5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -58,6 +58,31 @@ static inline unsigned long get_random_long(void)
 #endif
 }
 
+/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
+ * Returns the result of the call to wait_for_random_bytes. */
+static inline int get_random_bytes_wait(void *buf, int nbytes)
+{
+	int ret = wait_for_random_bytes();
+	if (unlikely(ret))
+		return ret;
+	get_random_bytes(buf, nbytes);
+	return 0;
+}
+
+#define declare_get_random_var_wait(var) \
+	static inline int get_random_ ## var ## _wait(var *out) { \
+		int ret = wait_for_random_bytes(); \
+		if (unlikely(ret)) \
+			return ret; \
+		*out = get_random_ ## var(); \
+		return 0; \
+	}
+declare_get_random_var_wait(u32)
+declare_get_random_var_wait(u64)
+declare_get_random_var_wait(int)
+declare_get_random_var_wait(long)
+#undef declare_get_random_var
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
-- 
GitLab


From 51b0817b0d23d51223575eff14629b3602a93cd4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 20:42:50 -0400
Subject: [PATCH 0292/1958] cifs: use get_random_u32 for 32-bit lock random

Using get_random_u32 here is faster, more fitting of the use case, and
just as cryptographically secure. It also has the benefit of providing
better randomness at early boot, which is sometimes when this is used.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Steve French <sfrench@samba.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/cifs/cifsfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9a1667e0e8d60..fe0c8dcc7dc7c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1359,7 +1359,7 @@ init_cifs(void)
 	spin_lock_init(&cifs_tcp_ses_lock);
 	spin_lock_init(&GlobalMid_Lock);
 
-	get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
+	cifs_lock_secret = get_random_u32();
 
 	if (cifs_max_pending < 2) {
 		cifs_max_pending = 2;
-- 
GitLab


From 6787ab81b29115b6d2e7d17fe8a8017da66197d6 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 22:34:26 -0400
Subject: [PATCH 0293/1958] iscsi: ensure RNG is seeded before use

It's not safe to use weak random data here, especially for the challenge
response randomness. Since we're always in process context, it's safe to
simply wait until we have enough randomness to carry out the
authentication correctly.

While we're at it, we clean up a small memleak during an error
condition.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
Cc: Lee Duncan <lduncan@suse.com>
Cc: Chris Leech <cleech@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/target/iscsi/iscsi_target_auth.c  | 14 +++++++++++---
 drivers/target/iscsi/iscsi_target_login.c | 22 ++++++++++++++--------
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index 903b667f8e013..f9bc8ec6fb6b5 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -47,18 +47,21 @@ static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len)
 	}
 }
 
-static void chap_gen_challenge(
+static int chap_gen_challenge(
 	struct iscsi_conn *conn,
 	int caller,
 	char *c_str,
 	unsigned int *c_len)
 {
+	int ret;
 	unsigned char challenge_asciihex[CHAP_CHALLENGE_LENGTH * 2 + 1];
 	struct iscsi_chap *chap = conn->auth_protocol;
 
 	memset(challenge_asciihex, 0, CHAP_CHALLENGE_LENGTH * 2 + 1);
 
-	get_random_bytes(chap->challenge, CHAP_CHALLENGE_LENGTH);
+	ret = get_random_bytes_wait(chap->challenge, CHAP_CHALLENGE_LENGTH);
+	if (unlikely(ret))
+		return ret;
 	chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge,
 				CHAP_CHALLENGE_LENGTH);
 	/*
@@ -69,6 +72,7 @@ static void chap_gen_challenge(
 
 	pr_debug("[%s] Sending CHAP_C=0x%s\n\n", (caller) ? "server" : "client",
 			challenge_asciihex);
+	return 0;
 }
 
 static int chap_check_algorithm(const char *a_str)
@@ -143,6 +147,7 @@ static struct iscsi_chap *chap_server_open(
 	case CHAP_DIGEST_UNKNOWN:
 	default:
 		pr_err("Unsupported CHAP_A value\n");
+		kfree(conn->auth_protocol);
 		return NULL;
 	}
 
@@ -156,7 +161,10 @@ static struct iscsi_chap *chap_server_open(
 	/*
 	 * Generate Challenge.
 	 */
-	chap_gen_challenge(conn, 1, aic_str, aic_len);
+	if (chap_gen_challenge(conn, 1, aic_str, aic_len) < 0) {
+		kfree(conn->auth_protocol);
+		return NULL;
+	}
 
 	return chap;
 }
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 66238477137bc..5ef028c117385 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -245,22 +245,26 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn)
 	return 0;
 }
 
-static void iscsi_login_set_conn_values(
+static int iscsi_login_set_conn_values(
 	struct iscsi_session *sess,
 	struct iscsi_conn *conn,
 	__be16 cid)
 {
+	int ret;
 	conn->sess		= sess;
 	conn->cid		= be16_to_cpu(cid);
 	/*
 	 * Generate a random Status sequence number (statsn) for the new
 	 * iSCSI connection.
 	 */
-	get_random_bytes(&conn->stat_sn, sizeof(u32));
+	ret = get_random_bytes_wait(&conn->stat_sn, sizeof(u32));
+	if (unlikely(ret))
+		return ret;
 
 	mutex_lock(&auth_id_lock);
 	conn->auth_id		= iscsit_global->auth_id++;
 	mutex_unlock(&auth_id_lock);
+	return 0;
 }
 
 __printf(2, 3) int iscsi_change_param_sprintf(
@@ -306,7 +310,11 @@ static int iscsi_login_zero_tsih_s1(
 		return -ENOMEM;
 	}
 
-	iscsi_login_set_conn_values(sess, conn, pdu->cid);
+	ret = iscsi_login_set_conn_values(sess, conn, pdu->cid);
+	if (unlikely(ret)) {
+		kfree(sess);
+		return ret;
+	}
 	sess->init_task_tag	= pdu->itt;
 	memcpy(&sess->isid, pdu->isid, 6);
 	sess->exp_cmd_sn	= be32_to_cpu(pdu->cmdsn);
@@ -497,8 +505,7 @@ static int iscsi_login_non_zero_tsih_s1(
 {
 	struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
 
-	iscsi_login_set_conn_values(NULL, conn, pdu->cid);
-	return 0;
+	return iscsi_login_set_conn_values(NULL, conn, pdu->cid);
 }
 
 /*
@@ -554,9 +561,8 @@ static int iscsi_login_non_zero_tsih_s2(
 		atomic_set(&sess->session_continuation, 1);
 	spin_unlock_bh(&sess->conn_lock);
 
-	iscsi_login_set_conn_values(sess, conn, pdu->cid);
-
-	if (iscsi_copy_param_list(&conn->param_list,
+	if (iscsi_login_set_conn_values(sess, conn, pdu->cid) < 0 ||
+	    iscsi_copy_param_list(&conn->param_list,
 			conn->tpg->param_list, 0) < 0) {
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
-- 
GitLab


From ae5b806a3d3c23d9d3b57aecbe687a0bd82ab086 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 22:44:54 -0400
Subject: [PATCH 0294/1958] ceph: ensure RNG is seeded before using

Ceph uses the RNG for various nonce generations, and it shouldn't accept
using bad randomness. So, we wait for the RNG to be properly seeded. We
do this by calling wait_for_random_bytes() in a function that is
certainly called in process context, early on, so that all subsequent
calls to get_random_bytes are necessarily acceptable.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: "Yan, Zheng" <zyan@redhat.com>
Cc: Sage Weil <sage@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 net/ceph/ceph_common.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4fd02831beed2..26ab58665f772 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -611,7 +611,11 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
 {
 	struct ceph_client *client;
 	struct ceph_entity_addr *myaddr = NULL;
-	int err = -ENOMEM;
+	int err;
+
+	err = wait_for_random_bytes();
+	if (err < 0)
+		return ERR_PTR(err);
 
 	client = kzalloc(sizeof(*client), GFP_KERNEL);
 	if (client == NULL)
-- 
GitLab


From d48ad080ec0101c2cca92926bed64993ab565c3d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 22:47:13 -0400
Subject: [PATCH 0295/1958] rhashtable: use get_random_u32 for hash_rnd

This is much faster and just as secure. It also has the added benefit of
probably returning better randomness at early-boot on systems with
architectural RNGs.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 lib/rhashtable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index d9e7274a04cd9..a1eb7c947f467 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -235,7 +235,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 
 	INIT_LIST_HEAD(&tbl->walkers);
 
-	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+	tbl->hash_rnd = get_random_u32();
 
 	for (i = 0; i < nbuckets; i++)
 		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
-- 
GitLab


From b3d0f7895d1251f0af0dd38410e7be3e52be4a1d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 23:00:05 -0400
Subject: [PATCH 0296/1958] net/neighbor: use get_random_u32 for 32-bit hash
 random

Using get_random_u32 here is faster, more fitting of the use case, and
just as cryptographically secure. It also has the benefit of providing
better randomness at early boot, which is when many of these structures
are assigned.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 net/core/neighbour.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d274f81fcc2c0..9784133b0cdb0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -312,8 +312,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
 
 static void neigh_get_hash_rnd(u32 *x)
 {
-	get_random_bytes(x, sizeof(*x));
-	*x |= 1;
+	*x = get_random_u32() | 1;
 }
 
 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
-- 
GitLab


From 7aed9f72c3c70bdc18e72ec086c1a0eafdcd273f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 23:01:20 -0400
Subject: [PATCH 0297/1958] net/route: use get_random_int for random counter

Using get_random_int here is faster, more fitting of the use case, and
just as cryptographically secure. It also has the benefit of providing
better randomness at early boot, which is when many of these structures
are assigned.

Also, semantically, it's not really proper to have been assigning an
atomic_t in this way before, even if in practice it works fine.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 net/ipv4/route.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9eebe43e1..11e001a420942 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2936,8 +2936,7 @@ static __net_init int rt_genid_init(struct net *net)
 {
 	atomic_set(&net->ipv4.rt_genid, 0);
 	atomic_set(&net->fnhe_genid, 0);
-	get_random_bytes(&net->ipv4.dev_addr_genid,
-			 sizeof(net->ipv4.dev_addr_genid));
+	atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
 	return 0;
 }
 
-- 
GitLab


From d06bfd1989fe97623b32d6df4ffa6e4338c99dc8 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 7 Jun 2017 23:06:55 -0400
Subject: [PATCH 0298/1958] random: warn when kernel uses unseeded randomness

This enables an important dmesg notification about when drivers have
used the crng without it being seeded first. Prior, these errors would
occur silently, and so there hasn't been a great way of diagnosing these
types of bugs for obscure setups. By adding this as a config option, we
can leave it on by default, so that we learn where these issues happen,
in the field, will still allowing some people to turn it off, if they
really know what they're doing and do not want the log entries.

However, we don't leave it _completely_ by default. An earlier version
of this patch simply had `default y`. I'd really love that, but it turns
out, this problem with unseeded randomness being used is really quite
present and is going to take a long time to fix. Thus, as a compromise
between log-messages-for-all and nobody-knows, this is `default y`,
except it is also `depends on DEBUG_KERNEL`. This will ensure that the
curious see the messages while others don't have to.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 15 +++++++++++++--
 lib/Kconfig.debug     | 16 ++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 3853dd4f92e79..fa5bbd5a7ca0f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -288,7 +288,6 @@
 #define SEC_XFER_SIZE		512
 #define EXTRACT_SIZE		10
 
-#define DEBUG_RANDOM_BOOT 0
 
 #define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))
 
@@ -1481,7 +1480,7 @@ void get_random_bytes(void *buf, int nbytes)
 {
 	__u8 tmp[CHACHA20_BLOCK_SIZE];
 
-#if DEBUG_RANDOM_BOOT > 0
+#ifdef CONFIG_WARN_UNSEEDED_RANDOM
 	if (!crng_ready())
 		printk(KERN_NOTICE "random: %pF get_random_bytes called "
 		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
@@ -2075,6 +2074,12 @@ u64 get_random_u64(void)
 	    return ret;
 #endif
 
+#ifdef CONFIG_WARN_UNSEEDED_RANDOM
+	if (!crng_ready())
+		printk(KERN_NOTICE "random: %pF get_random_u64 called "
+		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
+#endif
+
 	batch = &get_cpu_var(batched_entropy_u64);
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
@@ -2101,6 +2106,12 @@ u32 get_random_u32(void)
 	if (arch_get_random_int(&ret))
 		return ret;
 
+#ifdef CONFIG_WARN_UNSEEDED_RANDOM
+	if (!crng_ready())
+		printk(KERN_NOTICE "random: %pF get_random_u32 called "
+		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
+#endif
+
 	batch = &get_cpu_var(batched_entropy_u32);
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4587ebe52c7e..c4159605bfbfc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1209,6 +1209,22 @@ config STACKTRACE
 	  It is also used by various kernel debugging features that require
 	  stack trace generation.
 
+config WARN_UNSEEDED_RANDOM
+	bool "Warn when kernel uses unseeded randomness"
+	default y
+	depends on DEBUG_KERNEL
+	help
+	  Some parts of the kernel contain bugs relating to their use of
+	  cryptographically secure random numbers before it's actually possible
+	  to generate those numbers securely. This setting ensures that these
+	  flaws don't go unnoticed, by enabling a message, should this ever
+	  occur. This will allow people with obscure setups to know when things
+	  are going wrong, so that they might contact developers about fixing
+	  it.
+
+	  Say Y here, unless you simply do not care about using unseeded
+	  randomness and do not want a potential warning message in your logs.
+
 config DEBUG_KOBJECT
 	bool "kobject debugging"
 	depends on DEBUG_KERNEL
-- 
GitLab


From b21ff825d6b7fa7122f9c4455f9a0f157b9fb225 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:35 +0900
Subject: [PATCH 0299/1958] mtd: nand: denali: set NAND_ECC_CUSTOM_PAGE_ACCESS

The denali_cmdfunc() actually does nothing valuable for
NAND_CMD_{PAGEPROG,READ0,SEQIN}.

For NAND_CMD_{READ0,SEQIN}, it copies "page" to "denali->page", then
denali_read_page(_raw) compares them just for the sanity check.
(Inconsistently, this check is missing from denali_write_page(_raw).)

The Denali controller is equipped with high level read/write interface,
so let's skip unneeded call of cmdfunc().

If NAND_ECC_CUSTOM_PAGE_ACCESS is set, nand_write_page() will not
call ->waitfunc hook.  So, ->write_page(_raw) hooks should directly
return -EIO on failure.  The error handling of page writes will be
much simpler.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 41 ++++++++++++---------------------------
 drivers/mtd/nand/denali.h |  1 -
 2 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 2b4618bb8d72f..7133a33b4ad36 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -998,13 +998,16 @@ static void denali_setup_dma(struct denali_nand_info *denali, int op)
  * configuration details.
  */
 static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, bool raw_xfer)
+			const uint8_t *buf, int page, bool raw_xfer)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	dma_addr_t addr = denali->buf.dma_buf;
 	size_t size = mtd->writesize + mtd->oobsize;
 	uint32_t irq_status;
 	uint32_t irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
+	int ret = 0;
+
+	denali->page = page;
 
 	/*
 	 * if it is a raw xfer, we want to disable ecc and send the spare area.
@@ -1036,13 +1039,13 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (irq_status == 0) {
 		dev_err(denali->dev, "timeout on write_page (type = %d)\n",
 			raw_xfer);
-		denali->status = NAND_STATUS_FAIL;
+		ret = -EIO;
 	}
 
 	denali_enable_dma(denali, false);
 	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_TO_DEVICE);
 
-	return 0;
+	return ret;
 }
 
 /* NAND core entry points */
@@ -1059,7 +1062,7 @@ static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	 * for regular page writes, we let HW handle all the ECC
 	 * data written to the device.
 	 */
-	return write_page(mtd, chip, buf, false);
+	return write_page(mtd, chip, buf, page, false);
 }
 
 /*
@@ -1075,7 +1078,7 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	 * for raw page writes, we want to disable ECC and simply write
 	 * whatever data is in the buffer.
 	 */
-	return write_page(mtd, chip, buf, true);
+	return write_page(mtd, chip, buf, page, true);
 }
 
 static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
@@ -1105,12 +1108,7 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	unsigned long uncor_ecc_flags = 0;
 	int stat = 0;
 
-	if (page != denali->page) {
-		dev_err(denali->dev,
-			"IN %s: page %d is not equal to denali->page %d",
-			__func__, page, denali->page);
-		BUG();
-	}
+	denali->page = page;
 
 	setup_ecc_for_xfer(denali, true, false);
 
@@ -1154,12 +1152,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	size_t size = mtd->writesize + mtd->oobsize;
 	uint32_t irq_mask = INTR__DMA_CMD_COMP;
 
-	if (page != denali->page) {
-		dev_err(denali->dev,
-			"IN %s: page %d is not equal to denali->page %d",
-			__func__, page, denali->page);
-		BUG();
-	}
+	denali->page = page;
 
 	setup_ecc_for_xfer(denali, false, true);
 	denali_enable_dma(denali, true);
@@ -1204,12 +1197,7 @@ static void denali_select_chip(struct mtd_info *mtd, int chip)
 
 static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	int status = denali->status;
-
-	denali->status = 0;
-
-	return status;
+	return 0;
 }
 
 static int denali_erase(struct mtd_info *mtd, int page)
@@ -1238,8 +1226,6 @@ static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
 	int i;
 
 	switch (cmd) {
-	case NAND_CMD_PAGEPROG:
-		break;
 	case NAND_CMD_STATUS:
 		read_status(denali);
 		break;
@@ -1259,10 +1245,6 @@ static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
 			write_byte_to_buf(denali, id);
 		}
 		break;
-	case NAND_CMD_READ0:
-	case NAND_CMD_SEQIN:
-		denali->page = page;
-		break;
 	case NAND_CMD_RESET:
 		reset_bank(denali);
 		break;
@@ -1605,6 +1587,7 @@ int denali_init(struct denali_nand_info *denali)
 
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
 
+	chip->ecc.options |= NAND_ECC_CUSTOM_PAGE_ACCESS;
 	chip->ecc.read_page = denali_read_page;
 	chip->ecc.read_page_raw = denali_read_page_raw;
 	chip->ecc.write_page = denali_write_page;
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index a06ed741b550d..352d8328b94af 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -323,7 +323,6 @@ struct nand_buf {
 struct denali_nand_info {
 	struct nand_chip nand;
 	int flash_bank; /* currently selected chip */
-	int status;
 	int platform;
 	struct nand_buf buf;
 	struct device *dev;
-- 
GitLab


From 959e9f2ae9dac4821d7da354a37272650febebbe Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:36 +0900
Subject: [PATCH 0300/1958] mtd: nand: denali: remove unneeded
 find_valid_banks()

The function find_valid_banks() issues the Read ID (0x90) command,
then compares the first byte (Manufacturer ID) of each bank with
the one of bank0.

This is equivalent to what nand_scan_ident() does.  The number of
chips is detected there, so this is unneeded.

What is worse for find_valid_banks() is that, if multiple chips are
connected to INTEL_CE4100 platform, it crashes the kernel by BUG().
This is what we should avoid.  This function is just harmful and
unneeded.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 47 ---------------------------------------
 drivers/mtd/nand/denali.h |  1 -
 2 files changed, 48 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 7133a33b4ad36..122df4c6126d0 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -337,51 +337,6 @@ static void get_samsung_nand_para(struct denali_nand_info *denali,
 	}
 }
 
-/*
- * determines how many NAND chips are connected to the controller. Note for
- * Intel CE4100 devices we don't support more than one device.
- */
-static void find_valid_banks(struct denali_nand_info *denali)
-{
-	uint32_t id[denali->max_banks];
-	int i;
-
-	denali->total_used_banks = 1;
-	for (i = 0; i < denali->max_banks; i++) {
-		index_addr(denali, MODE_11 | (i << 24) | 0, 0x90);
-		index_addr(denali, MODE_11 | (i << 24) | 1, 0);
-		index_addr_read_data(denali, MODE_11 | (i << 24) | 2, &id[i]);
-
-		dev_dbg(denali->dev,
-			"Return 1st ID for bank[%d]: %x\n", i, id[i]);
-
-		if (i == 0) {
-			if (!(id[i] & 0x0ff))
-				break; /* WTF? */
-		} else {
-			if ((id[i] & 0x0ff) == (id[0] & 0x0ff))
-				denali->total_used_banks++;
-			else
-				break;
-		}
-	}
-
-	if (denali->platform == INTEL_CE4100) {
-		/*
-		 * Platform limitations of the CE4100 device limit
-		 * users to a single chip solution for NAND.
-		 * Multichip support is not enabled.
-		 */
-		if (denali->total_used_banks != 1) {
-			dev_err(denali->dev,
-				"Sorry, Intel CE4100 only supports a single NAND device.\n");
-			BUG();
-		}
-	}
-	dev_dbg(denali->dev,
-		"denali->total_used_banks: %d\n", denali->total_used_banks);
-}
-
 /*
  * Use the configuration feature register to determine the maximum number of
  * banks that the hardware supports.
@@ -439,8 +394,6 @@ static uint16_t denali_nand_timing_set(struct denali_nand_info *denali)
 			ioread32(denali->flash_reg + RDWR_EN_HI_CNT),
 			ioread32(denali->flash_reg + CS_SETUP_CNT));
 
-	find_valid_banks(denali);
-
 	/*
 	 * If the user specified to override the default timings
 	 * with a specific ONFI mode, we apply those changes here.
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 352d8328b94af..0e4a8965f6f1d 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -326,7 +326,6 @@ struct denali_nand_info {
 	int platform;
 	struct nand_buf buf;
 	struct device *dev;
-	int total_used_banks;
 	int page;
 	void __iomem *flash_reg;	/* Register Interface */
 	void __iomem *flash_mem;	/* Host Data/Command Interface */
-- 
GitLab


From 1bb88666775e50fdef1d915d395eca9267bd3ab1 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:37 +0900
Subject: [PATCH 0301/1958] mtd: nand: denali: handle timing parameters by
 setup_data_interface()

Handling timing parameters in a driver's own way should be avoided
because it duplicates efforts of drivers/mtd/nand/nand_timings.c
Besides, this driver hard-codes Intel specific parameters such as
CLK_X=5, CLK_MULTI=4.  Taking a certain device (Samsung K9WAG08U1A)
into account by get_samsung_nand_para() is weird as well.

Now, the core framework provides .setup_data_interface() hook, which
handles timing parameters in a generic manner.

While I am working on this, I found even more issues in the current
code, so fixed the following as well:

- In recent IP versions, WE_2_RE and TWHR2 share the same register.
  Likewise for ADDR_2_DATA and TCWAW, CS_SETUP_CNT and TWB.  When
  updating one, the other must be masked.  Otherwise, the other will
  be set to 0, then timing settings will be broken.

- The recent IP release expanded the ADDR_2_DATA to 7-bit wide.
  This register is related to tADL.  As commit 74a332e78e8f ("mtd:
  nand: timings: Fix tADL_min for ONFI 4.0 chips") addressed, the
  ONFi 4.0 increased the minimum of tADL to 400 nsec.  This may not
  fit in the 6-bit ADDR_2_DATA in older versions.  Check the IP
  revision and handle this correctly, otherwise the register value
  would wrap around.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c     | 345 ++++++++++++----------------------
 drivers/mtd/nand/denali.h     |  26 +--
 drivers/mtd/nand/denali_dt.c  |   3 +-
 drivers/mtd/nand/denali_pci.c |   6 +-
 4 files changed, 139 insertions(+), 241 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 122df4c6126d0..ca2b6b8850ba8 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -28,17 +28,6 @@
 
 MODULE_LICENSE("GPL");
 
-/*
- * We define a module parameter that allows the user to override
- * the hardware and decide what timing mode should be used.
- */
-#define NAND_DEFAULT_TIMINGS	-1
-
-static int onfi_timing_mode = NAND_DEFAULT_TIMINGS;
-module_param(onfi_timing_mode, int, S_IRUGO);
-MODULE_PARM_DESC(onfi_timing_mode,
-	   "Overrides default ONFI setting. -1 indicates use default timings");
-
 #define DENALI_NAND_NAME    "denali-nand"
 
 /*
@@ -63,10 +52,12 @@ MODULE_PARM_DESC(onfi_timing_mode,
 #define CHIP_SELECT_INVALID	-1
 
 /*
- * This macro divides two integers and rounds fractional values up
- * to the nearest integer value.
+ * The bus interface clock, clk_x, is phase aligned with the core clock.  The
+ * clk_x is an integral multiple N of the core clk.  The value N is configured
+ * at IP delivery time, and its available value is 4, 5, or 6.  We need to align
+ * to the largest value to make it work with any possible configuration.
  */
-#define CEIL_DIV(X, Y) (((X)%(Y)) ? ((X)/(Y)+1) : ((X)/(Y)))
+#define DENALI_CLK_X_MULT	6
 
 /*
  * this macro allows us to convert from an MTD structure to our own
@@ -195,148 +186,6 @@ static uint16_t denali_nand_reset(struct denali_nand_info *denali)
 	return PASS;
 }
 
-/*
- * this routine calculates the ONFI timing values for a given mode and
- * programs the clocking register accordingly. The mode is determined by
- * the get_onfi_nand_para routine.
- */
-static void nand_onfi_timing_set(struct denali_nand_info *denali,
-								uint16_t mode)
-{
-	uint16_t Trea[6] = {40, 30, 25, 20, 20, 16};
-	uint16_t Trp[6] = {50, 25, 17, 15, 12, 10};
-	uint16_t Treh[6] = {30, 15, 15, 10, 10, 7};
-	uint16_t Trc[6] = {100, 50, 35, 30, 25, 20};
-	uint16_t Trhoh[6] = {0, 15, 15, 15, 15, 15};
-	uint16_t Trloh[6] = {0, 0, 0, 0, 5, 5};
-	uint16_t Tcea[6] = {100, 45, 30, 25, 25, 25};
-	uint16_t Tadl[6] = {200, 100, 100, 100, 70, 70};
-	uint16_t Trhw[6] = {200, 100, 100, 100, 100, 100};
-	uint16_t Trhz[6] = {200, 100, 100, 100, 100, 100};
-	uint16_t Twhr[6] = {120, 80, 80, 60, 60, 60};
-	uint16_t Tcs[6] = {70, 35, 25, 25, 20, 15};
-
-	uint16_t data_invalid_rhoh, data_invalid_rloh, data_invalid;
-	uint16_t dv_window = 0;
-	uint16_t en_lo, en_hi;
-	uint16_t acc_clks;
-	uint16_t addr_2_data, re_2_we, re_2_re, we_2_re, cs_cnt;
-
-	en_lo = CEIL_DIV(Trp[mode], CLK_X);
-	en_hi = CEIL_DIV(Treh[mode], CLK_X);
-#if ONFI_BLOOM_TIME
-	if ((en_hi * CLK_X) < (Treh[mode] + 2))
-		en_hi++;
-#endif
-
-	if ((en_lo + en_hi) * CLK_X < Trc[mode])
-		en_lo += CEIL_DIV((Trc[mode] - (en_lo + en_hi) * CLK_X), CLK_X);
-
-	if ((en_lo + en_hi) < CLK_MULTI)
-		en_lo += CLK_MULTI - en_lo - en_hi;
-
-	while (dv_window < 8) {
-		data_invalid_rhoh = en_lo * CLK_X + Trhoh[mode];
-
-		data_invalid_rloh = (en_lo + en_hi) * CLK_X + Trloh[mode];
-
-		data_invalid = data_invalid_rhoh < data_invalid_rloh ?
-					data_invalid_rhoh : data_invalid_rloh;
-
-		dv_window = data_invalid - Trea[mode];
-
-		if (dv_window < 8)
-			en_lo++;
-	}
-
-	acc_clks = CEIL_DIV(Trea[mode], CLK_X);
-
-	while (acc_clks * CLK_X - Trea[mode] < 3)
-		acc_clks++;
-
-	if (data_invalid - acc_clks * CLK_X < 2)
-		dev_warn(denali->dev, "%s, Line %d: Warning!\n",
-			 __FILE__, __LINE__);
-
-	addr_2_data = CEIL_DIV(Tadl[mode], CLK_X);
-	re_2_we = CEIL_DIV(Trhw[mode], CLK_X);
-	re_2_re = CEIL_DIV(Trhz[mode], CLK_X);
-	we_2_re = CEIL_DIV(Twhr[mode], CLK_X);
-	cs_cnt = CEIL_DIV((Tcs[mode] - Trp[mode]), CLK_X);
-	if (cs_cnt == 0)
-		cs_cnt = 1;
-
-	if (Tcea[mode]) {
-		while (cs_cnt * CLK_X + Trea[mode] < Tcea[mode])
-			cs_cnt++;
-	}
-
-#if MODE5_WORKAROUND
-	if (mode == 5)
-		acc_clks = 5;
-#endif
-
-	/* Sighting 3462430: Temporary hack for MT29F128G08CJABAWP:B */
-	if (ioread32(denali->flash_reg + MANUFACTURER_ID) == 0 &&
-		ioread32(denali->flash_reg + DEVICE_ID) == 0x88)
-		acc_clks = 6;
-
-	iowrite32(acc_clks, denali->flash_reg + ACC_CLKS);
-	iowrite32(re_2_we, denali->flash_reg + RE_2_WE);
-	iowrite32(re_2_re, denali->flash_reg + RE_2_RE);
-	iowrite32(we_2_re, denali->flash_reg + WE_2_RE);
-	iowrite32(addr_2_data, denali->flash_reg + ADDR_2_DATA);
-	iowrite32(en_lo, denali->flash_reg + RDWR_EN_LO_CNT);
-	iowrite32(en_hi, denali->flash_reg + RDWR_EN_HI_CNT);
-	iowrite32(cs_cnt, denali->flash_reg + CS_SETUP_CNT);
-}
-
-/* queries the NAND device to see what ONFI modes it supports. */
-static uint16_t get_onfi_nand_para(struct denali_nand_info *denali)
-{
-	int i;
-
-	/*
-	 * we needn't to do a reset here because driver has already
-	 * reset all the banks before
-	 */
-	if (!(ioread32(denali->flash_reg + ONFI_TIMING_MODE) &
-		ONFI_TIMING_MODE__VALUE))
-		return FAIL;
-
-	for (i = 5; i > 0; i--) {
-		if (ioread32(denali->flash_reg + ONFI_TIMING_MODE) &
-			(0x01 << i))
-			break;
-	}
-
-	nand_onfi_timing_set(denali, i);
-
-	/*
-	 * By now, all the ONFI devices we know support the page cache
-	 * rw feature. So here we enable the pipeline_rw_ahead feature
-	 */
-	/* iowrite32(1, denali->flash_reg + CACHE_WRITE_ENABLE); */
-	/* iowrite32(1, denali->flash_reg + CACHE_READ_ENABLE);  */
-
-	return PASS;
-}
-
-static void get_samsung_nand_para(struct denali_nand_info *denali,
-							uint8_t device_id)
-{
-	if (device_id == 0xd3) { /* Samsung K9WAG08U1A */
-		/* Set timing register values according to datasheet */
-		iowrite32(5, denali->flash_reg + ACC_CLKS);
-		iowrite32(20, denali->flash_reg + RE_2_WE);
-		iowrite32(12, denali->flash_reg + WE_2_RE);
-		iowrite32(14, denali->flash_reg + ADDR_2_DATA);
-		iowrite32(3, denali->flash_reg + RDWR_EN_LO_CNT);
-		iowrite32(2, denali->flash_reg + RDWR_EN_HI_CNT);
-		iowrite32(2, denali->flash_reg + CS_SETUP_CNT);
-	}
-}
-
 /*
  * Use the configuration feature register to determine the maximum number of
  * banks that the hardware supports.
@@ -352,58 +201,6 @@ static void detect_max_banks(struct denali_nand_info *denali)
 		denali->max_banks <<= 1;
 }
 
-static uint16_t denali_nand_timing_set(struct denali_nand_info *denali)
-{
-	uint16_t status = PASS;
-	uint32_t id_bytes[8], addr;
-	uint8_t maf_id, device_id;
-	int i;
-
-	/*
-	 * Use read id method to get device ID and other params.
-	 * For some NAND chips, controller can't report the correct
-	 * device ID by reading from DEVICE_ID register
-	 */
-	addr = MODE_11 | BANK(denali->flash_bank);
-	index_addr(denali, addr | 0, 0x90);
-	index_addr(denali, addr | 1, 0);
-	for (i = 0; i < 8; i++)
-		index_addr_read_data(denali, addr | 2, &id_bytes[i]);
-	maf_id = id_bytes[0];
-	device_id = id_bytes[1];
-
-	if (ioread32(denali->flash_reg + ONFI_DEVICE_NO_OF_LUNS) &
-		ONFI_DEVICE_NO_OF_LUNS__ONFI_DEVICE) { /* ONFI 1.0 NAND */
-		if (FAIL == get_onfi_nand_para(denali))
-			return FAIL;
-	} else if (maf_id == 0xEC) { /* Samsung NAND */
-		get_samsung_nand_para(denali, device_id);
-	}
-
-	dev_info(denali->dev,
-			"Dump timing register values:\n"
-			"acc_clks: %d, re_2_we: %d, re_2_re: %d\n"
-			"we_2_re: %d, addr_2_data: %d, rdwr_en_lo_cnt: %d\n"
-			"rdwr_en_hi_cnt: %d, cs_setup_cnt: %d\n",
-			ioread32(denali->flash_reg + ACC_CLKS),
-			ioread32(denali->flash_reg + RE_2_WE),
-			ioread32(denali->flash_reg + RE_2_RE),
-			ioread32(denali->flash_reg + WE_2_RE),
-			ioread32(denali->flash_reg + ADDR_2_DATA),
-			ioread32(denali->flash_reg + RDWR_EN_LO_CNT),
-			ioread32(denali->flash_reg + RDWR_EN_HI_CNT),
-			ioread32(denali->flash_reg + CS_SETUP_CNT));
-
-	/*
-	 * If the user specified to override the default timings
-	 * with a specific ONFI mode, we apply those changes here.
-	 */
-	if (onfi_timing_mode != NAND_DEFAULT_TIMINGS)
-		nand_onfi_timing_set(denali, onfi_timing_mode);
-
-	return status;
-}
-
 static void denali_set_intr_modes(struct denali_nand_info *denali,
 					uint16_t INT_ENABLE)
 {
@@ -1209,7 +1006,121 @@ static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
 		break;
 	}
 }
-/* end NAND core entry points */
+
+#define DIV_ROUND_DOWN_ULL(ll, d) \
+	({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; })
+
+static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
+				       const struct nand_data_interface *conf)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	const struct nand_sdr_timings *timings;
+	unsigned long t_clk;
+	int acc_clks, re_2_we, re_2_re, we_2_re, addr_2_data;
+	int rdwr_en_lo, rdwr_en_hi, rdwr_en_lo_hi, cs_setup;
+	int addr_2_data_mask;
+	uint32_t tmp;
+
+	timings = nand_get_sdr_timings(conf);
+	if (IS_ERR(timings))
+		return PTR_ERR(timings);
+
+	/* clk_x period in picoseconds */
+	t_clk = DIV_ROUND_DOWN_ULL(1000000000000ULL, denali->clk_x_rate);
+	if (!t_clk)
+		return -EINVAL;
+
+	if (chipnr == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	/* tREA -> ACC_CLKS */
+	acc_clks = DIV_ROUND_UP(timings->tREA_max, t_clk);
+	acc_clks = min_t(int, acc_clks, ACC_CLKS__VALUE);
+
+	tmp = ioread32(denali->flash_reg + ACC_CLKS);
+	tmp &= ~ACC_CLKS__VALUE;
+	tmp |= acc_clks;
+	iowrite32(tmp, denali->flash_reg + ACC_CLKS);
+
+	/* tRWH -> RE_2_WE */
+	re_2_we = DIV_ROUND_UP(timings->tRHW_min, t_clk);
+	re_2_we = min_t(int, re_2_we, RE_2_WE__VALUE);
+
+	tmp = ioread32(denali->flash_reg + RE_2_WE);
+	tmp &= ~RE_2_WE__VALUE;
+	tmp |= re_2_we;
+	iowrite32(tmp, denali->flash_reg + RE_2_WE);
+
+	/* tRHZ -> RE_2_RE */
+	re_2_re = DIV_ROUND_UP(timings->tRHZ_max, t_clk);
+	re_2_re = min_t(int, re_2_re, RE_2_RE__VALUE);
+
+	tmp = ioread32(denali->flash_reg + RE_2_RE);
+	tmp &= ~RE_2_RE__VALUE;
+	tmp |= re_2_re;
+	iowrite32(tmp, denali->flash_reg + RE_2_RE);
+
+	/* tWHR -> WE_2_RE */
+	we_2_re = DIV_ROUND_UP(timings->tWHR_min, t_clk);
+	we_2_re = min_t(int, we_2_re, TWHR2_AND_WE_2_RE__WE_2_RE);
+
+	tmp = ioread32(denali->flash_reg + TWHR2_AND_WE_2_RE);
+	tmp &= ~TWHR2_AND_WE_2_RE__WE_2_RE;
+	tmp |= we_2_re;
+	iowrite32(tmp, denali->flash_reg + TWHR2_AND_WE_2_RE);
+
+	/* tADL -> ADDR_2_DATA */
+
+	/* for older versions, ADDR_2_DATA is only 6 bit wide */
+	addr_2_data_mask = TCWAW_AND_ADDR_2_DATA__ADDR_2_DATA;
+	if (denali->revision < 0x0501)
+		addr_2_data_mask >>= 1;
+
+	addr_2_data = DIV_ROUND_UP(timings->tADL_min, t_clk);
+	addr_2_data = min_t(int, addr_2_data, addr_2_data_mask);
+
+	tmp = ioread32(denali->flash_reg + TCWAW_AND_ADDR_2_DATA);
+	tmp &= ~addr_2_data_mask;
+	tmp |= addr_2_data;
+	iowrite32(tmp, denali->flash_reg + TCWAW_AND_ADDR_2_DATA);
+
+	/* tREH, tWH -> RDWR_EN_HI_CNT */
+	rdwr_en_hi = DIV_ROUND_UP(max(timings->tREH_min, timings->tWH_min),
+				  t_clk);
+	rdwr_en_hi = min_t(int, rdwr_en_hi, RDWR_EN_HI_CNT__VALUE);
+
+	tmp = ioread32(denali->flash_reg + RDWR_EN_HI_CNT);
+	tmp &= ~RDWR_EN_HI_CNT__VALUE;
+	tmp |= rdwr_en_hi;
+	iowrite32(tmp, denali->flash_reg + RDWR_EN_HI_CNT);
+
+	/* tRP, tWP -> RDWR_EN_LO_CNT */
+	rdwr_en_lo = DIV_ROUND_UP(max(timings->tRP_min, timings->tWP_min),
+				  t_clk);
+	rdwr_en_lo_hi = DIV_ROUND_UP(max(timings->tRC_min, timings->tWC_min),
+				     t_clk);
+	rdwr_en_lo_hi = max(rdwr_en_lo_hi, DENALI_CLK_X_MULT);
+	rdwr_en_lo = max(rdwr_en_lo, rdwr_en_lo_hi - rdwr_en_hi);
+	rdwr_en_lo = min_t(int, rdwr_en_lo, RDWR_EN_LO_CNT__VALUE);
+
+	tmp = ioread32(denali->flash_reg + RDWR_EN_LO_CNT);
+	tmp &= ~RDWR_EN_LO_CNT__VALUE;
+	tmp |= rdwr_en_lo;
+	iowrite32(tmp, denali->flash_reg + RDWR_EN_LO_CNT);
+
+	/* tCS, tCEA -> CS_SETUP_CNT */
+	cs_setup = max3((int)DIV_ROUND_UP(timings->tCS_min, t_clk) - rdwr_en_lo,
+			(int)DIV_ROUND_UP(timings->tCEA_max, t_clk) - acc_clks,
+			0);
+	cs_setup = min_t(int, cs_setup, CS_SETUP_CNT__VALUE);
+
+	tmp = ioread32(denali->flash_reg + CS_SETUP_CNT);
+	tmp &= ~CS_SETUP_CNT__VALUE;
+	tmp |= cs_setup;
+	iowrite32(tmp, denali->flash_reg + CS_SETUP_CNT);
+
+	return 0;
+}
 
 /* Initialization code to bring the device up to a known good state */
 static void denali_hw_init(struct denali_nand_info *denali)
@@ -1241,7 +1152,6 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	/* Should set value for these registers when init */
 	iowrite32(0, denali->flash_reg + TWO_ROW_ADDR_CYCLES);
 	iowrite32(1, denali->flash_reg + ECC_ENABLE);
-	denali_nand_timing_set(denali);
 	denali_irq_init(denali);
 }
 
@@ -1416,17 +1326,6 @@ int denali_init(struct denali_nand_info *denali)
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
-	if (denali->platform == INTEL_CE4100) {
-		/*
-		 * Due to a silicon limitation, we can only support
-		 * ONFI timing mode 1 and below.
-		 */
-		if (onfi_timing_mode < -1 || onfi_timing_mode > 1) {
-			pr_err("Intel CE4100 only supports ONFI timing mode 1 or below\n");
-			return -EINVAL;
-		}
-	}
-
 	/* allocate a temporary buffer for nand_scan_ident() */
 	denali->buf.buf = devm_kzalloc(denali->dev, PAGE_SIZE,
 					GFP_DMA | GFP_KERNEL);
@@ -1460,6 +1359,10 @@ int denali_init(struct denali_nand_info *denali)
 	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
 	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
+	/* clk rate info is needed for setup_data_interface */
+	if (denali->clk_x_rate)
+		chip->setup_data_interface = denali_setup_data_interface;
+
 	/*
 	 * scan for NAND devices attached to the controller
 	 * this is the first stage in a two step process to register
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 0e4a8965f6f1d..fb473895a79dd 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -72,11 +72,14 @@
 #define GLOBAL_INT_ENABLE			0xf0
 #define     GLOBAL_INT_EN_FLAG				BIT(0)
 
-#define WE_2_RE					0x100
-#define     WE_2_RE__VALUE				GENMASK(5, 0)
+#define TWHR2_AND_WE_2_RE			0x100
+#define     TWHR2_AND_WE_2_RE__WE_2_RE			GENMASK(5, 0)
+#define     TWHR2_AND_WE_2_RE__TWHR2			GENMASK(13, 8)
 
-#define ADDR_2_DATA				0x110
-#define     ADDR_2_DATA__VALUE				GENMASK(5, 0)
+#define TCWAW_AND_ADDR_2_DATA			0x110
+/* The width of ADDR_2_DATA is 6 bit for old IP, 7 bit for new IP */
+#define     TCWAW_AND_ADDR_2_DATA__ADDR_2_DATA		GENMASK(6, 0)
+#define     TCWAW_AND_ADDR_2_DATA__TCWAW		GENMASK(13, 8)
 
 #define RE_2_WE					0x120
 #define     RE_2_WE__VALUE				GENMASK(5, 0)
@@ -128,6 +131,7 @@
 
 #define CS_SETUP_CNT				0x220
 #define     CS_SETUP_CNT__VALUE				GENMASK(4, 0)
+#define     CS_SETUP_CNT__TWB				GENMASK(17, 12)
 
 #define SPARE_AREA_SKIP_BYTES			0x230
 #define     SPARE_AREA_SKIP_BYTES__VALUE		GENMASK(5, 0)
@@ -294,16 +298,8 @@
 #define     CHNL_ACTIVE__CHANNEL2			BIT(2)
 #define     CHNL_ACTIVE__CHANNEL3			BIT(3)
 
-#define FAIL 1                  /*failed flag*/
 #define PASS 0                  /*success flag*/
 
-#define CLK_X  5
-#define CLK_MULTI 4
-
-#define ONFI_BLOOM_TIME         1
-#define MODE5_WORKAROUND        0
-
-
 #define MODE_00    0x00000000
 #define MODE_01    0x04000000
 #define MODE_10    0x08000000
@@ -316,14 +312,10 @@ struct nand_buf {
 	dma_addr_t dma_buf;
 };
 
-#define INTEL_CE4100	1
-#define INTEL_MRST	2
-#define DT		3
-
 struct denali_nand_info {
 	struct nand_chip nand;
+	unsigned long clk_x_rate;	/* bus interface clock rate */
 	int flash_bank; /* currently selected chip */
-	int platform;
 	struct nand_buf buf;
 	struct device *dev;
 	int page;
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index be598230c108c..ebcce50f4005e 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -96,7 +96,6 @@ static int denali_dt_probe(struct platform_device *pdev)
 		denali->ecc_caps = data->ecc_caps;
 	}
 
-	denali->platform = DT;
 	denali->dev = &pdev->dev;
 	denali->irq = platform_get_irq(pdev, 0);
 	if (denali->irq < 0) {
@@ -121,6 +120,8 @@ static int denali_dt_probe(struct platform_device *pdev)
 	}
 	clk_prepare_enable(dt->clk);
 
+	denali->clk_x_rate = clk_get_rate(dt->clk);
+
 	ret = denali_init(denali);
 	if (ret)
 		goto out_disable_clk;
diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c
index 37dc0934c24c1..6217525c10005 100644
--- a/drivers/mtd/nand/denali_pci.c
+++ b/drivers/mtd/nand/denali_pci.c
@@ -19,6 +19,9 @@
 
 #define DENALI_NAND_NAME    "denali-nand-pci"
 
+#define INTEL_CE4100	1
+#define INTEL_MRST	2
+
 /* List of platforms this NAND controller has be integrated into */
 static const struct pci_device_id denali_pci_ids[] = {
 	{ PCI_VDEVICE(INTEL, 0x0701), INTEL_CE4100 },
@@ -47,13 +50,11 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	}
 
 	if (id->driver_data == INTEL_CE4100) {
-		denali->platform = INTEL_CE4100;
 		mem_base = pci_resource_start(dev, 0);
 		mem_len = pci_resource_len(dev, 1);
 		csr_base = pci_resource_start(dev, 1);
 		csr_len = pci_resource_len(dev, 1);
 	} else {
-		denali->platform = INTEL_MRST;
 		csr_base = pci_resource_start(dev, 0);
 		csr_len = pci_resource_len(dev, 0);
 		mem_base = pci_resource_start(dev, 1);
@@ -69,6 +70,7 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	denali->irq = dev->irq;
 	denali->ecc_caps = &denali_pci_ecc_caps;
 	denali->nand.ecc.options |= NAND_ECC_MAXIMIZE;
+	denali->clk_x_rate = 200000000;		/* 200 MHz */
 
 	ret = pci_request_regions(dev, DENALI_NAND_NAME);
 	if (ret) {
-- 
GitLab


From c19e31d0a32dd3ee555a536414104acf86b9a884 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:38 +0900
Subject: [PATCH 0302/1958] mtd: nand: denali: rework interrupt handling

Simplify the interrupt handling and fix issues:

- The register field view of INTR_EN / INTR_STATUS is different
  among IP versions.  The global macro DENALI_IRQ_ALL is hard-coded
  for Intel platforms.  The interrupt mask should be determined at
  run-time depending on the running platform.

- wait_for_irq() loops do {} while() until interested flags are
  asserted.  The logic can be simplified.

- The spin_lock() guard seems too complex (and suspicious in a race
  condition if wait_for_completion_timeout() bails out by timeout).

- denali->complete is reused again and again, but reinit_completion()
  is missing.  Add it.

Re-work the code to make it more robust and easier to handle.

While we are here, also rename the jump label "failed_req_irq" to
more appropriate "disable_irq".

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 317 ++++++++++++++------------------------
 drivers/mtd/nand/denali.h |   1 +
 2 files changed, 117 insertions(+), 201 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index ca2b6b8850ba8..2acdc5f42a006 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -30,27 +30,14 @@ MODULE_LICENSE("GPL");
 
 #define DENALI_NAND_NAME    "denali-nand"
 
-/*
- * We define a macro here that combines all interrupts this driver uses into
- * a single constant value, for convenience.
- */
-#define DENALI_IRQ_ALL	(INTR__DMA_CMD_COMP | \
-			INTR__ECC_TRANSACTION_DONE | \
-			INTR__ECC_ERR | \
-			INTR__PROGRAM_FAIL | \
-			INTR__LOAD_COMP | \
-			INTR__PROGRAM_COMP | \
-			INTR__TIME_OUT | \
-			INTR__ERASE_FAIL | \
-			INTR__RST_COMP | \
-			INTR__ERASE_COMP)
-
 /*
  * indicates whether or not the internal value for the flash bank is
  * valid or not
  */
 #define CHIP_SELECT_INVALID	-1
 
+#define DENALI_NR_BANKS		4
+
 /*
  * The bus interface clock, clk_x, is phase aligned with the core clock.  The
  * clk_x is an integral multiple N of the core clk.  The value N is configured
@@ -85,14 +72,6 @@ static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
  */
 #define BANK(x) ((x) << 24)
 
-/* forward declarations */
-static void clear_interrupts(struct denali_nand_info *denali);
-static uint32_t wait_for_irq(struct denali_nand_info *denali,
-							uint32_t irq_mask);
-static void denali_irq_enable(struct denali_nand_info *denali,
-							uint32_t int_mask);
-static uint32_t read_interrupt_status(struct denali_nand_info *denali);
-
 /*
  * Certain operations for the denali NAND controller use an indexed mode to
  * read/write data. The operation is performed by writing the address value
@@ -143,22 +122,6 @@ static void read_status(struct denali_nand_info *denali)
 		write_byte_to_buf(denali, 0);
 }
 
-/* resets a specific device connected to the core */
-static void reset_bank(struct denali_nand_info *denali)
-{
-	uint32_t irq_status;
-	uint32_t irq_mask = INTR__RST_COMP | INTR__TIME_OUT;
-
-	clear_interrupts(denali);
-
-	iowrite32(1 << denali->flash_bank, denali->flash_reg + DEVICE_RESET);
-
-	irq_status = wait_for_irq(denali, irq_mask);
-
-	if (irq_status & INTR__TIME_OUT)
-		dev_err(denali->dev, "reset bank failed.\n");
-}
-
 /* Reset the flash controller */
 static uint16_t denali_nand_reset(struct denali_nand_info *denali)
 {
@@ -201,169 +164,124 @@ static void detect_max_banks(struct denali_nand_info *denali)
 		denali->max_banks <<= 1;
 }
 
-static void denali_set_intr_modes(struct denali_nand_info *denali,
-					uint16_t INT_ENABLE)
+static void denali_enable_irq(struct denali_nand_info *denali)
 {
-	if (INT_ENABLE)
-		iowrite32(1, denali->flash_reg + GLOBAL_INT_ENABLE);
-	else
-		iowrite32(0, denali->flash_reg + GLOBAL_INT_ENABLE);
-}
+	int i;
 
-/*
- * validation function to verify that the controlling software is making
- * a valid request
- */
-static inline bool is_flash_bank_valid(int flash_bank)
-{
-	return flash_bank >= 0 && flash_bank < 4;
+	for (i = 0; i < DENALI_NR_BANKS; i++)
+		iowrite32(U32_MAX, denali->flash_reg + INTR_EN(i));
+	iowrite32(GLOBAL_INT_EN_FLAG, denali->flash_reg + GLOBAL_INT_ENABLE);
 }
 
-static void denali_irq_init(struct denali_nand_info *denali)
+static void denali_disable_irq(struct denali_nand_info *denali)
 {
-	uint32_t int_mask;
 	int i;
 
-	/* Disable global interrupts */
-	denali_set_intr_modes(denali, false);
-
-	int_mask = DENALI_IRQ_ALL;
-
-	/* Clear all status bits */
-	for (i = 0; i < denali->max_banks; ++i)
-		iowrite32(0xFFFF, denali->flash_reg + INTR_STATUS(i));
-
-	denali_irq_enable(denali, int_mask);
+	for (i = 0; i < DENALI_NR_BANKS; i++)
+		iowrite32(0, denali->flash_reg + INTR_EN(i));
+	iowrite32(0, denali->flash_reg + GLOBAL_INT_ENABLE);
 }
 
-static void denali_irq_cleanup(int irqnum, struct denali_nand_info *denali)
+static void denali_clear_irq(struct denali_nand_info *denali,
+			     int bank, uint32_t irq_status)
 {
-	denali_set_intr_modes(denali, false);
+	/* write one to clear bits */
+	iowrite32(irq_status, denali->flash_reg + INTR_STATUS(bank));
 }
 
-static void denali_irq_enable(struct denali_nand_info *denali,
-							uint32_t int_mask)
+static void denali_clear_irq_all(struct denali_nand_info *denali)
 {
 	int i;
 
-	for (i = 0; i < denali->max_banks; ++i)
-		iowrite32(int_mask, denali->flash_reg + INTR_EN(i));
+	for (i = 0; i < DENALI_NR_BANKS; i++)
+		denali_clear_irq(denali, i, U32_MAX);
 }
 
-/*
- * This function only returns when an interrupt that this driver cares about
- * occurs. This is to reduce the overhead of servicing interrupts
- */
-static inline uint32_t denali_irq_detected(struct denali_nand_info *denali)
+static irqreturn_t denali_isr(int irq, void *dev_id)
 {
-	return read_interrupt_status(denali) & DENALI_IRQ_ALL;
-}
+	struct denali_nand_info *denali = dev_id;
+	irqreturn_t ret = IRQ_NONE;
+	uint32_t irq_status;
+	int i;
 
-/* Interrupts are cleared by writing a 1 to the appropriate status bit */
-static inline void clear_interrupt(struct denali_nand_info *denali,
-							uint32_t irq_mask)
-{
-	uint32_t intr_status_reg;
+	spin_lock(&denali->irq_lock);
 
-	intr_status_reg = INTR_STATUS(denali->flash_bank);
+	for (i = 0; i < DENALI_NR_BANKS; i++) {
+		irq_status = ioread32(denali->flash_reg + INTR_STATUS(i));
+		if (irq_status)
+			ret = IRQ_HANDLED;
 
-	iowrite32(irq_mask, denali->flash_reg + intr_status_reg);
-}
+		denali_clear_irq(denali, i, irq_status);
 
-static void clear_interrupts(struct denali_nand_info *denali)
-{
-	uint32_t status;
+		if (i != denali->flash_bank)
+			continue;
+
+		denali->irq_status |= irq_status;
 
-	spin_lock_irq(&denali->irq_lock);
+		if (denali->irq_status & denali->irq_mask)
+			complete(&denali->complete);
+	}
 
-	status = read_interrupt_status(denali);
-	clear_interrupt(denali, status);
+	spin_unlock(&denali->irq_lock);
 
-	denali->irq_status = 0x0;
-	spin_unlock_irq(&denali->irq_lock);
+	return ret;
 }
 
-static uint32_t read_interrupt_status(struct denali_nand_info *denali)
+static void denali_reset_irq(struct denali_nand_info *denali)
 {
-	uint32_t intr_status_reg;
-
-	intr_status_reg = INTR_STATUS(denali->flash_bank);
+	unsigned long flags;
 
-	return ioread32(denali->flash_reg + intr_status_reg);
+	spin_lock_irqsave(&denali->irq_lock, flags);
+	denali->irq_status = 0;
+	denali->irq_mask = 0;
+	spin_unlock_irqrestore(&denali->irq_lock, flags);
 }
 
-/*
- * This is the interrupt service routine. It handles all interrupts
- * sent to this device. Note that on CE4100, this is a shared interrupt.
- */
-static irqreturn_t denali_isr(int irq, void *dev_id)
+static uint32_t denali_wait_for_irq(struct denali_nand_info *denali,
+				    uint32_t irq_mask)
 {
-	struct denali_nand_info *denali = dev_id;
+	unsigned long time_left, flags;
 	uint32_t irq_status;
-	irqreturn_t result = IRQ_NONE;
 
-	spin_lock(&denali->irq_lock);
+	spin_lock_irqsave(&denali->irq_lock, flags);
 
-	/* check to see if a valid NAND chip has been selected. */
-	if (is_flash_bank_valid(denali->flash_bank)) {
-		/*
-		 * check to see if controller generated the interrupt,
-		 * since this is a shared interrupt
-		 */
-		irq_status = denali_irq_detected(denali);
-		if (irq_status != 0) {
-			/* handle interrupt */
-			/* first acknowledge it */
-			clear_interrupt(denali, irq_status);
-			/*
-			 * store the status in the device context for someone
-			 * to read
-			 */
-			denali->irq_status |= irq_status;
-			/* notify anyone who cares that it happened */
-			complete(&denali->complete);
-			/* tell the OS that we've handled this */
-			result = IRQ_HANDLED;
-		}
+	irq_status = denali->irq_status;
+
+	if (irq_mask & irq_status) {
+		/* return immediately if the IRQ has already happened. */
+		spin_unlock_irqrestore(&denali->irq_lock, flags);
+		return irq_status;
 	}
-	spin_unlock(&denali->irq_lock);
-	return result;
+
+	denali->irq_mask = irq_mask;
+	reinit_completion(&denali->complete);
+	spin_unlock_irqrestore(&denali->irq_lock, flags);
+
+	time_left = wait_for_completion_timeout(&denali->complete,
+						msecs_to_jiffies(1000));
+	if (!time_left) {
+		dev_err(denali->dev, "timeout while waiting for irq 0x%x\n",
+			denali->irq_mask);
+		return 0;
+	}
+
+	return denali->irq_status;
 }
 
-static uint32_t wait_for_irq(struct denali_nand_info *denali, uint32_t irq_mask)
+/* resets a specific device connected to the core */
+static void reset_bank(struct denali_nand_info *denali)
 {
-	unsigned long comp_res;
-	uint32_t intr_status;
-	unsigned long timeout = msecs_to_jiffies(1000);
+	uint32_t irq_status;
 
-	do {
-		comp_res =
-			wait_for_completion_timeout(&denali->complete, timeout);
-		spin_lock_irq(&denali->irq_lock);
-		intr_status = denali->irq_status;
-
-		if (intr_status & irq_mask) {
-			denali->irq_status &= ~irq_mask;
-			spin_unlock_irq(&denali->irq_lock);
-			/* our interrupt was detected */
-			break;
-		}
+	denali_reset_irq(denali);
 
-		/*
-		 * these are not the interrupts you are looking for -
-		 * need to wait again
-		 */
-		spin_unlock_irq(&denali->irq_lock);
-	} while (comp_res != 0);
+	iowrite32(1 << denali->flash_bank, denali->flash_reg + DEVICE_RESET);
 
-	if (comp_res == 0) {
-		/* timeout */
-		pr_err("timeout occurred, status = 0x%x, mask = 0x%x\n",
-				intr_status, irq_mask);
+	irq_status = denali_wait_for_irq(denali,
+					 INTR__RST_COMP | INTR__TIME_OUT);
 
-		intr_status = 0;
-	}
-	return intr_status;
+	if (!(irq_status & INTR__RST_COMP))
+		dev_err(denali->dev, "reset bank failed.\n");
 }
 
 /*
@@ -397,7 +315,7 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
 
 	setup_ecc_for_xfer(denali, ecc_en, transfer_spare);
 
-	clear_interrupts(denali);
+	denali_reset_irq(denali);
 
 	addr = BANK(denali->flash_bank) | denali->page;
 
@@ -479,9 +397,9 @@ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 		write_data_to_flash_mem(denali, buf, mtd->oobsize);
 
 		/* wait for operation to complete */
-		irq_status = wait_for_irq(denali, irq_mask);
+		irq_status = denali_wait_for_irq(denali, irq_mask);
 
-		if (irq_status == 0) {
+		if (!(irq_status & INTR__PROGRAM_COMP)) {
 			dev_err(denali->dev, "OOB write failed\n");
 			status = -EIO;
 		}
@@ -510,9 +428,9 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 		 * can always use status0 bit as the
 		 * mask is identical for each bank.
 		 */
-		irq_status = wait_for_irq(denali, irq_mask);
+		irq_status = denali_wait_for_irq(denali, irq_mask);
 
-		if (irq_status == 0)
+		if (!(irq_status & INTR__LOAD_COMP))
 			dev_err(denali->dev, "page on OOB timeout %d\n",
 					denali->page);
 
@@ -620,9 +538,9 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 	unsigned int err_byte, err_sector, err_device;
 	uint8_t err_cor_value;
 	unsigned int prev_sector = 0;
+	uint32_t irq_status;
 
-	/* read the ECC errors. we'll ignore them for now */
-	denali_set_intr_modes(denali, false);
+	denali_reset_irq(denali);
 
 	do {
 		err_addr = ioread32(denali->flash_reg + ECC_ERROR_ADDRESS);
@@ -674,10 +592,9 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 	 * ECC_TRANSACTION_DONE interrupt, so here just wait for
 	 * a while for this interrupt
 	 */
-	while (!(read_interrupt_status(denali) & INTR__ECC_TRANSACTION_DONE))
-		cpu_relax();
-	clear_interrupts(denali);
-	denali_set_intr_modes(denali, true);
+	irq_status = denali_wait_for_irq(denali, INTR__ECC_TRANSACTION_DONE);
+	if (!(irq_status & INTR__ECC_TRANSACTION_DONE))
+		return -EIO;
 
 	return max_bitflips;
 }
@@ -778,15 +695,14 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	dma_sync_single_for_device(denali->dev, addr, size, DMA_TO_DEVICE);
 
-	clear_interrupts(denali);
+	denali_reset_irq(denali);
 	denali_enable_dma(denali, true);
 
 	denali_setup_dma(denali, DENALI_WRITE);
 
 	/* wait for operation to complete */
-	irq_status = wait_for_irq(denali, irq_mask);
-
-	if (irq_status == 0) {
+	irq_status = denali_wait_for_irq(denali, irq_mask);
+	if (!(irq_status & INTR__DMA_CMD_COMP)) {
 		dev_err(denali->dev, "timeout on write_page (type = %d)\n",
 			raw_xfer);
 		ret = -EIO;
@@ -865,11 +781,11 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	denali_enable_dma(denali, true);
 	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
 
-	clear_interrupts(denali);
+	denali_reset_irq(denali);
 	denali_setup_dma(denali, DENALI_READ);
 
 	/* wait for operation to complete */
-	irq_status = wait_for_irq(denali, irq_mask);
+	irq_status = denali_wait_for_irq(denali, irq_mask);
 
 	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
 
@@ -901,6 +817,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	dma_addr_t addr = denali->buf.dma_buf;
 	size_t size = mtd->writesize + mtd->oobsize;
 	uint32_t irq_mask = INTR__DMA_CMD_COMP;
+	uint32_t irq_status;
 
 	denali->page = page;
 
@@ -909,11 +826,13 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 
 	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
 
-	clear_interrupts(denali);
+	denali_reset_irq(denali);
 	denali_setup_dma(denali, DENALI_READ);
 
 	/* wait for operation to complete */
-	wait_for_irq(denali, irq_mask);
+	irq_status = denali_wait_for_irq(denali, irq_mask);
+	if (irq_status & INTR__DMA_CMD_COMP)
+		return -ETIMEDOUT;
 
 	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
 
@@ -940,9 +859,7 @@ static void denali_select_chip(struct mtd_info *mtd, int chip)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
-	spin_lock_irq(&denali->irq_lock);
 	denali->flash_bank = chip;
-	spin_unlock_irq(&denali->irq_lock);
 }
 
 static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
@@ -953,19 +870,19 @@ static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
 static int denali_erase(struct mtd_info *mtd, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-
 	uint32_t cmd, irq_status;
 
-	clear_interrupts(denali);
+	denali_reset_irq(denali);
 
 	/* setup page read request for access type */
 	cmd = MODE_10 | BANK(denali->flash_bank) | page;
 	index_addr(denali, cmd, 0x1);
 
 	/* wait for erase to complete or failure to occur */
-	irq_status = wait_for_irq(denali, INTR__ERASE_COMP | INTR__ERASE_FAIL);
+	irq_status = denali_wait_for_irq(denali,
+					 INTR__ERASE_COMP | INTR__ERASE_FAIL);
 
-	return irq_status & INTR__ERASE_FAIL ? NAND_STATUS_FAIL : PASS;
+	return irq_status & INTR__ERASE_COMP ? 0 : NAND_STATUS_FAIL;
 }
 
 static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
@@ -1152,7 +1069,6 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	/* Should set value for these registers when init */
 	iowrite32(0, denali->flash_reg + TWO_ROW_ADDR_CYCLES);
 	iowrite32(1, denali->flash_reg + ECC_ENABLE);
-	denali_irq_init(denali);
 }
 
 int denali_calc_ecc_bytes(int step_size, int strength)
@@ -1264,9 +1180,6 @@ static void denali_drv_init(struct denali_nand_info *denali)
 
 	/* indicate that MTD has not selected a valid bank yet */
 	denali->flash_bank = CHIP_SELECT_INVALID;
-
-	/* initialize our irq_status variable to indicate no interrupts */
-	denali->irq_status = 0;
 }
 
 static int denali_multidev_fixup(struct denali_nand_info *denali)
@@ -1336,6 +1249,8 @@ int denali_init(struct denali_nand_info *denali)
 	denali_hw_init(denali);
 	denali_drv_init(denali);
 
+	denali_clear_irq_all(denali);
+
 	/* Request IRQ after all the hardware initialization is finished */
 	ret = devm_request_irq(denali->dev, denali->irq, denali_isr,
 			       IRQF_SHARED, DENALI_NAND_NAME, denali);
@@ -1344,8 +1259,8 @@ int denali_init(struct denali_nand_info *denali)
 		return ret;
 	}
 
-	/* now that our ISR is registered, we can enable interrupts */
-	denali_set_intr_modes(denali, true);
+	denali_enable_irq(denali);
+
 	nand_set_flash_node(chip, denali->dev->of_node);
 	/* Fallback to the default name if DT did not give "label" property */
 	if (!mtd->name)
@@ -1370,7 +1285,7 @@ int denali_init(struct denali_nand_info *denali)
 	 */
 	ret = nand_scan_ident(mtd, denali->max_banks, NULL);
 	if (ret)
-		goto failed_req_irq;
+		goto disable_irq;
 
 	/* allocate the right size buffer now */
 	devm_kfree(denali->dev, denali->buf.buf);
@@ -1379,7 +1294,7 @@ int denali_init(struct denali_nand_info *denali)
 			     GFP_KERNEL);
 	if (!denali->buf.buf) {
 		ret = -ENOMEM;
-		goto failed_req_irq;
+		goto disable_irq;
 	}
 
 	ret = dma_set_mask(denali->dev,
@@ -1387,7 +1302,7 @@ int denali_init(struct denali_nand_info *denali)
 					64 : 32));
 	if (ret) {
 		dev_err(denali->dev, "No usable DMA configuration\n");
-		goto failed_req_irq;
+		goto disable_irq;
 	}
 
 	denali->buf.dma_buf = dma_map_single(denali->dev, denali->buf.buf,
@@ -1396,7 +1311,7 @@ int denali_init(struct denali_nand_info *denali)
 	if (dma_mapping_error(denali->dev, denali->buf.dma_buf)) {
 		dev_err(denali->dev, "Failed to map DMA buffer\n");
 		ret = -EIO;
-		goto failed_req_irq;
+		goto disable_irq;
 	}
 
 	/*
@@ -1420,7 +1335,7 @@ int denali_init(struct denali_nand_info *denali)
 	ret = denali_ecc_setup(mtd, chip, denali);
 	if (ret) {
 		dev_err(denali->dev, "Failed to setup ECC settings.\n");
-		goto failed_req_irq;
+		goto disable_irq;
 	}
 
 	dev_dbg(denali->dev,
@@ -1454,21 +1369,21 @@ int denali_init(struct denali_nand_info *denali)
 
 	ret = denali_multidev_fixup(denali);
 	if (ret)
-		goto failed_req_irq;
+		goto disable_irq;
 
 	ret = nand_scan_tail(mtd);
 	if (ret)
-		goto failed_req_irq;
+		goto disable_irq;
 
 	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret) {
 		dev_err(denali->dev, "Failed to register MTD: %d\n", ret);
-		goto failed_req_irq;
+		goto disable_irq;
 	}
 	return 0;
 
-failed_req_irq:
-	denali_irq_cleanup(denali->irq, denali);
+disable_irq:
+	denali_disable_irq(denali);
 
 	return ret;
 }
@@ -1486,7 +1401,7 @@ void denali_remove(struct denali_nand_info *denali)
 	int bufsize = mtd->writesize + mtd->oobsize;
 
 	nand_release(mtd);
-	denali_irq_cleanup(denali->irq, denali);
+	denali_disable_irq(denali);
 	dma_unmap_single(denali->dev, denali->buf.dma_buf, bufsize,
 			 DMA_BIDIRECTIONAL);
 }
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index fb473895a79dd..a0ac0f84f8b5a 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -325,6 +325,7 @@ struct denali_nand_info {
 	/* elements used by ISR */
 	struct completion complete;
 	spinlock_t irq_lock;
+	uint32_t irq_mask;
 	uint32_t irq_status;
 	int irq;
 
-- 
GitLab


From fa6134e5457d3177b82b35fa4004505f2571150a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:39 +0900
Subject: [PATCH 0303/1958] mtd: nand: denali: switch over to cmd_ctrl instead
 of cmdfunc

The NAND_CMD_SET_FEATURES support is missing from denali_cmdfunc().
We also see /* TODO: Read OOB data */ comment.

It would be possible to add more commands along with the current
implementation, but having ->cmd_ctrl() seems a better approach from
the discussion with Boris [1].

Rely on the default ->cmdfunc() from the framework and implement the
driver's own ->cmd_ctrl().

This transition also fixes NAND_CMD_STATUS and NAND_CMD_PARAM handling.
NAND_CMD_STATUS was just faked by the register read, so the only valid
bit was the WP bit.  NAND_CMD_PARAM was completely broken; not only the
command sent on the bus was NAND_CMD_STATUS instead of NAND_CMD_PARAM,
but also the driver was only reading 8 bytes, while the parameter page
contains several hundreds of bytes.

Also add ->write_byte(), which is needed for write direction commands,
->read/write_buf(16), which will be used some commits later.
->read_word() is not used for now, but the core may call it in the
future.

Now, this driver can drop nand_onfi_get_set_features_notsupp().

[1] https://lkml.org/lkml/2017/3/15/97

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 232 ++++++++++++++++++++------------------
 drivers/mtd/nand/denali.h |   2 -
 2 files changed, 123 insertions(+), 111 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 2acdc5f42a006..8091ba0916cc4 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -85,43 +85,6 @@ static void index_addr(struct denali_nand_info *denali,
 	iowrite32(data, denali->flash_mem + 0x10);
 }
 
-/* Perform an indexed read of the device */
-static void index_addr_read_data(struct denali_nand_info *denali,
-				 uint32_t address, uint32_t *pdata)
-{
-	iowrite32(address, denali->flash_mem);
-	*pdata = ioread32(denali->flash_mem + 0x10);
-}
-
-/*
- * We need to buffer some data for some of the NAND core routines.
- * The operations manage buffering that data.
- */
-static void reset_buf(struct denali_nand_info *denali)
-{
-	denali->buf.head = denali->buf.tail = 0;
-}
-
-static void write_byte_to_buf(struct denali_nand_info *denali, uint8_t byte)
-{
-	denali->buf.buf[denali->buf.tail++] = byte;
-}
-
-/* reads the status of the device */
-static void read_status(struct denali_nand_info *denali)
-{
-	uint32_t cmd;
-
-	/* initialize the data buffer to store status */
-	reset_buf(denali);
-
-	cmd = ioread32(denali->flash_reg + WRITE_PROTECT);
-	if (cmd)
-		write_byte_to_buf(denali, NAND_STATUS_WP);
-	else
-		write_byte_to_buf(denali, 0);
-}
-
 /* Reset the flash controller */
 static uint16_t denali_nand_reset(struct denali_nand_info *denali)
 {
@@ -268,20 +231,16 @@ static uint32_t denali_wait_for_irq(struct denali_nand_info *denali,
 	return denali->irq_status;
 }
 
-/* resets a specific device connected to the core */
-static void reset_bank(struct denali_nand_info *denali)
+static uint32_t denali_check_irq(struct denali_nand_info *denali)
 {
+	unsigned long flags;
 	uint32_t irq_status;
 
-	denali_reset_irq(denali);
-
-	iowrite32(1 << denali->flash_bank, denali->flash_reg + DEVICE_RESET);
-
-	irq_status = denali_wait_for_irq(denali,
-					 INTR__RST_COMP | INTR__TIME_OUT);
+	spin_lock_irqsave(&denali->irq_lock, flags);
+	irq_status = denali->irq_status;
+	spin_unlock_irqrestore(&denali->irq_lock, flags);
 
-	if (!(irq_status & INTR__RST_COMP))
-		dev_err(denali->dev, "reset bank failed.\n");
+	return irq_status;
 }
 
 /*
@@ -302,6 +261,105 @@ static void setup_ecc_for_xfer(struct denali_nand_info *denali, bool ecc_en,
 	iowrite32(transfer_spare_flag, denali->flash_reg + TRANSFER_SPARE_REG);
 }
 
+static void denali_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int i;
+
+	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+
+	for (i = 0; i < len; i++)
+		buf[i] = ioread32(denali->flash_mem + 0x10);
+}
+
+static void denali_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int i;
+
+	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+
+	for (i = 0; i < len; i++)
+		iowrite32(buf[i], denali->flash_mem + 0x10);
+}
+
+static void denali_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	uint16_t *buf16 = (uint16_t *)buf;
+	int i;
+
+	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+
+	for (i = 0; i < len / 2; i++)
+		buf16[i] = ioread32(denali->flash_mem + 0x10);
+}
+
+static void denali_write_buf16(struct mtd_info *mtd, const uint8_t *buf,
+			       int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	const uint16_t *buf16 = (const uint16_t *)buf;
+	int i;
+
+	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+
+	for (i = 0; i < len / 2; i++)
+		iowrite32(buf16[i], denali->flash_mem + 0x10);
+}
+
+static uint8_t denali_read_byte(struct mtd_info *mtd)
+{
+	uint8_t byte;
+
+	denali_read_buf(mtd, &byte, 1);
+
+	return byte;
+}
+
+static void denali_write_byte(struct mtd_info *mtd, uint8_t byte)
+{
+	denali_write_buf(mtd, &byte, 1);
+}
+
+static uint16_t denali_read_word(struct mtd_info *mtd)
+{
+	uint16_t word;
+
+	denali_read_buf16(mtd, (uint8_t *)&word, 2);
+
+	return word;
+}
+
+static void denali_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	uint32_t type;
+
+	if (ctrl & NAND_CLE)
+		type = 0;
+	else if (ctrl & NAND_ALE)
+		type = 1;
+	else
+		return;
+
+	/*
+	 * Some commands are followed by chip->dev_ready or chip->waitfunc.
+	 * irq_status must be cleared here to catch the R/B# interrupt later.
+	 */
+	if (ctrl & NAND_CTRL_CHANGE)
+		denali_reset_irq(denali);
+
+	index_addr(denali, MODE_11 | BANK(denali->flash_bank) | type, dat);
+}
+
+static int denali_dev_ready(struct mtd_info *mtd)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+
+	return !!(denali_check_irq(denali) & INTR__INT_ACT);
+}
+
 /*
  * sends a pipeline command operation to the controller. See the Denali NAND
  * controller's user guide for more information (section 4.2.3.6).
@@ -844,17 +902,6 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
-static uint8_t denali_read_byte(struct mtd_info *mtd)
-{
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint8_t result = 0xff;
-
-	if (denali->buf.head < denali->buf.tail)
-		result = denali->buf.buf[denali->buf.head++];
-
-	return result;
-}
-
 static void denali_select_chip(struct mtd_info *mtd, int chip)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
@@ -864,7 +911,13 @@ static void denali_select_chip(struct mtd_info *mtd, int chip)
 
 static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
 {
-	return 0;
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	uint32_t irq_status;
+
+	/* R/B# pin transitioned from low to high? */
+	irq_status = denali_wait_for_irq(denali, INTR__INT_ACT);
+
+	return irq_status & INTR__INT_ACT ? 0 : NAND_STATUS_FAIL;
 }
 
 static int denali_erase(struct mtd_info *mtd, int page)
@@ -885,45 +938,6 @@ static int denali_erase(struct mtd_info *mtd, int page)
 	return irq_status & INTR__ERASE_COMP ? 0 : NAND_STATUS_FAIL;
 }
 
-static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
-			   int page)
-{
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t addr, id;
-	int i;
-
-	switch (cmd) {
-	case NAND_CMD_STATUS:
-		read_status(denali);
-		break;
-	case NAND_CMD_READID:
-	case NAND_CMD_PARAM:
-		reset_buf(denali);
-		/*
-		 * sometimes ManufactureId read from register is not right
-		 * e.g. some of Micron MT29F32G08QAA MLC NAND chips
-		 * So here we send READID cmd to NAND insteand
-		 */
-		addr = MODE_11 | BANK(denali->flash_bank);
-		index_addr(denali, addr | 0, 0x90);
-		index_addr(denali, addr | 1, col);
-		for (i = 0; i < 8; i++) {
-			index_addr_read_data(denali, addr | 2, &id);
-			write_byte_to_buf(denali, id);
-		}
-		break;
-	case NAND_CMD_RESET:
-		reset_bank(denali);
-		break;
-	case NAND_CMD_READOOB:
-		/* TODO: Read OOB data */
-		break;
-	default:
-		pr_err(": unsupported command received 0x%x\n", cmd);
-		break;
-	}
-}
-
 #define DIV_ROUND_DOWN_ULL(ll, d) \
 	({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; })
 
@@ -1239,12 +1253,6 @@ int denali_init(struct denali_nand_info *denali)
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
-	/* allocate a temporary buffer for nand_scan_ident() */
-	denali->buf.buf = devm_kzalloc(denali->dev, PAGE_SIZE,
-					GFP_DMA | GFP_KERNEL);
-	if (!denali->buf.buf)
-		return -ENOMEM;
-
 	mtd->dev.parent = denali->dev;
 	denali_hw_init(denali);
 	denali_drv_init(denali);
@@ -1268,11 +1276,12 @@ int denali_init(struct denali_nand_info *denali)
 
 	/* register the driver with the NAND core subsystem */
 	chip->select_chip = denali_select_chip;
-	chip->cmdfunc = denali_cmdfunc;
 	chip->read_byte = denali_read_byte;
+	chip->write_byte = denali_write_byte;
+	chip->read_word = denali_read_word;
+	chip->cmd_ctrl = denali_cmd_ctrl;
+	chip->dev_ready = denali_dev_ready;
 	chip->waitfunc = denali_waitfunc;
-	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
-	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	/* clk rate info is needed for setup_data_interface */
 	if (denali->clk_x_rate)
@@ -1287,8 +1296,6 @@ int denali_init(struct denali_nand_info *denali)
 	if (ret)
 		goto disable_irq;
 
-	/* allocate the right size buffer now */
-	devm_kfree(denali->dev, denali->buf.buf);
 	denali->buf.buf = devm_kzalloc(denali->dev,
 			     mtd->writesize + mtd->oobsize,
 			     GFP_KERNEL);
@@ -1358,6 +1365,13 @@ int denali_init(struct denali_nand_info *denali)
 
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
 
+	if (chip->options & NAND_BUSWIDTH_16) {
+		chip->read_buf = denali_read_buf16;
+		chip->write_buf = denali_write_buf16;
+	} else {
+		chip->read_buf = denali_read_buf;
+		chip->write_buf = denali_write_buf;
+	}
 	chip->ecc.options |= NAND_ECC_CUSTOM_PAGE_ACCESS;
 	chip->ecc.read_page = denali_read_page;
 	chip->ecc.read_page_raw = denali_read_page_raw;
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index a0ac0f84f8b5a..a84d8784ee987 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -306,8 +306,6 @@
 #define MODE_11    0x0C000000
 
 struct nand_buf {
-	int head;
-	int tail;
 	uint8_t *buf;
 	dma_addr_t dma_buf;
 };
-- 
GitLab


From f486287d2372422d68768c88fe54e80a48aee8ec Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:40 +0900
Subject: [PATCH 0304/1958] mtd: nand: denali: fix bank reset function to
 detect the number of chips

The nand_scan_ident() iterates over maxchips, and calls nand_reset()
for each.  This driver currently passes the maximum number of banks
(=chip selects) supported by the controller as maxchips.  So, maxchips
is typically 4 or 8.  Usually, less number of NAND chips are connected
to the controller.

This can be a problem for ONFi devices.  Now, this driver implements
->setup_data_interface() hook, so nand_setup_data_interface() issues
Set Features (0xEF) command, which waits until the chip returns R/B#
response.  If no chip there, we know it never happens, but the driver
still ends up with waiting for a long time.  It will finally bail-out
with timeout error and the driver will work with existing chips, but
unnecessary wait will give a bad user experience.

The denali_nand_reset() polls the INTR__RST_COMP and INTR__TIME_OUT
bits, but they are always set even if not NAND chip is connected to
that bank.  To know the chip existence, INTR__INT_ACT bit must be
checked; this flag is set only when R/B# is toggled.  Since the Reset
(0xFF) command toggles the R/B# pin, this can be used to know the
actual number of chips, and update denali->max_banks.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 52 +++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 8091ba0916cc4..3169ba58c58af 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -85,33 +85,6 @@ static void index_addr(struct denali_nand_info *denali,
 	iowrite32(data, denali->flash_mem + 0x10);
 }
 
-/* Reset the flash controller */
-static uint16_t denali_nand_reset(struct denali_nand_info *denali)
-{
-	int i;
-
-	for (i = 0; i < denali->max_banks; i++)
-		iowrite32(INTR__RST_COMP | INTR__TIME_OUT,
-		denali->flash_reg + INTR_STATUS(i));
-
-	for (i = 0; i < denali->max_banks; i++) {
-		iowrite32(1 << i, denali->flash_reg + DEVICE_RESET);
-		while (!(ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			(INTR__RST_COMP | INTR__TIME_OUT)))
-			cpu_relax();
-		if (ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			INTR__TIME_OUT)
-			dev_dbg(denali->dev,
-			"NAND Reset operation timed out on bank %d\n", i);
-	}
-
-	for (i = 0; i < denali->max_banks; i++)
-		iowrite32(INTR__RST_COMP | INTR__TIME_OUT,
-			  denali->flash_reg + INTR_STATUS(i));
-
-	return PASS;
-}
-
 /*
  * Use the configuration feature register to determine the maximum number of
  * banks that the hardware supports.
@@ -1053,7 +1026,28 @@ static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
 	return 0;
 }
 
-/* Initialization code to bring the device up to a known good state */
+static void denali_reset_banks(struct denali_nand_info *denali)
+{
+	int i;
+
+	denali_clear_irq_all(denali);
+
+	for (i = 0; i < denali->max_banks; i++) {
+		iowrite32(1 << i, denali->flash_reg + DEVICE_RESET);
+		while (!(ioread32(denali->flash_reg + INTR_STATUS(i)) &
+			(INTR__RST_COMP | INTR__TIME_OUT)))
+			cpu_relax();
+		if (!(ioread32(denali->flash_reg + INTR_STATUS(i)) &
+		      INTR__INT_ACT))
+			break;
+	}
+
+	dev_dbg(denali->dev, "%d chips connected\n", i);
+	denali->max_banks = i;
+
+	denali_clear_irq_all(denali);
+}
+
 static void denali_hw_init(struct denali_nand_info *denali)
 {
 	/*
@@ -1073,7 +1067,7 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	denali->bbtskipbytes = ioread32(denali->flash_reg +
 						SPARE_AREA_SKIP_BYTES);
 	detect_max_banks(denali);
-	denali_nand_reset(denali);
+	denali_reset_banks(denali);
 	iowrite32(0x0F, denali->flash_reg + RB_PIN_ENABLED);
 	iowrite32(CHIP_EN_DONT_CARE__FLAG,
 			denali->flash_reg + CHIP_ENABLE_DONT_CARE);
-- 
GitLab


From d49f5790278d18fd6b8e474397d9abfdf29a48f2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:41 +0900
Subject: [PATCH 0305/1958] mtd: nand: denali: use interrupt instead of polling
 for bank reset

The current bank reset implementation polls the INTR_STATUS register
until interested bits are set.  This is not good because:

- polling simply wastes time-slice of the thread

- The while() loop may continue eternally if no bit is set, for
  example, due to the controller problem.  The denali_wait_for_irq()
  uses wait_for_completion_timeout(), which is safer.

We can use interrupt by moving the denali_reset_bank() call below
the interrupt setup.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 3169ba58c58af..31d987d26e129 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1028,24 +1028,25 @@ static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
 
 static void denali_reset_banks(struct denali_nand_info *denali)
 {
+	u32 irq_status;
 	int i;
 
-	denali_clear_irq_all(denali);
-
 	for (i = 0; i < denali->max_banks; i++) {
-		iowrite32(1 << i, denali->flash_reg + DEVICE_RESET);
-		while (!(ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			(INTR__RST_COMP | INTR__TIME_OUT)))
-			cpu_relax();
-		if (!(ioread32(denali->flash_reg + INTR_STATUS(i)) &
-		      INTR__INT_ACT))
+		denali->flash_bank = i;
+
+		denali_reset_irq(denali);
+
+		iowrite32(DEVICE_RESET__BANK(i),
+			  denali->flash_reg + DEVICE_RESET);
+
+		irq_status = denali_wait_for_irq(denali,
+			INTR__RST_COMP | INTR__INT_ACT | INTR__TIME_OUT);
+		if (!(irq_status & INTR__INT_ACT))
 			break;
 	}
 
 	dev_dbg(denali->dev, "%d chips connected\n", i);
 	denali->max_banks = i;
-
-	denali_clear_irq_all(denali);
 }
 
 static void denali_hw_init(struct denali_nand_info *denali)
@@ -1067,7 +1068,6 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	denali->bbtskipbytes = ioread32(denali->flash_reg +
 						SPARE_AREA_SKIP_BYTES);
 	detect_max_banks(denali);
-	denali_reset_banks(denali);
 	iowrite32(0x0F, denali->flash_reg + RB_PIN_ENABLED);
 	iowrite32(CHIP_EN_DONT_CARE__FLAG,
 			denali->flash_reg + CHIP_ENABLE_DONT_CARE);
@@ -1185,9 +1185,6 @@ static void denali_drv_init(struct denali_nand_info *denali)
 	 * element that might be access shared data (interrupt status)
 	 */
 	spin_lock_init(&denali->irq_lock);
-
-	/* indicate that MTD has not selected a valid bank yet */
-	denali->flash_bank = CHIP_SELECT_INVALID;
 }
 
 static int denali_multidev_fixup(struct denali_nand_info *denali)
@@ -1262,6 +1259,9 @@ int denali_init(struct denali_nand_info *denali)
 	}
 
 	denali_enable_irq(denali);
+	denali_reset_banks(denali);
+
+	denali->flash_bank = CHIP_SELECT_INVALID;
 
 	nand_set_flash_node(chip, denali->dev->of_node);
 	/* Fallback to the default name if DT did not give "label" property */
-- 
GitLab


From 2291cb89680637f8f965371973273be312f2cede Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:42 +0900
Subject: [PATCH 0306/1958] mtd: nand: denali: propagate page to helpers via
 function argument

This driver stores the currently addressed page into denali->page,
which is later read out by helper functions.  While I am tackling on
this driver, I often missed to insert "denali->page = page;" where
needed.  This makes page_read/write callbacks to get access to a
wrong page, which is a bug hard to figure out.

Instead, I'd rather pass the page via function argument because the
compiler's prototype checks will help to detect bugs.

For the same reason, propagate dma_addr to the DMA helpers instead
of denali->buf.dma_buf .

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 58 ++++++++++++++++-----------------------
 drivers/mtd/nand/denali.h |  1 -
 2 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 31d987d26e129..24849f6bb37b3 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -337,7 +337,7 @@ static int denali_dev_ready(struct mtd_info *mtd)
  * sends a pipeline command operation to the controller. See the Denali NAND
  * controller's user guide for more information (section 4.2.3.6).
  */
-static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
+static int denali_send_pipeline_cmd(struct denali_nand_info *denali, int page,
 				    bool ecc_en, bool transfer_spare,
 				    int access_type, int op)
 {
@@ -348,7 +348,7 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
 
 	denali_reset_irq(denali);
 
-	addr = BANK(denali->flash_bank) | denali->page;
+	addr = BANK(denali->flash_bank) | page;
 
 	if (op == DENALI_WRITE && access_type != SPARE_ACCESS) {
 		cmd = MODE_01 | addr;
@@ -421,9 +421,7 @@ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 	uint32_t irq_mask = INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL;
 	int status = 0;
 
-	denali->page = page;
-
-	if (denali_send_pipeline_cmd(denali, false, false, SPARE_ACCESS,
+	if (denali_send_pipeline_cmd(denali, page, false, false, SPARE_ACCESS,
 							DENALI_WRITE) == PASS) {
 		write_data_to_flash_mem(denali, buf, mtd->oobsize);
 
@@ -448,9 +446,7 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 	uint32_t irq_mask = INTR__LOAD_COMP;
 	uint32_t irq_status, addr, cmd;
 
-	denali->page = page;
-
-	if (denali_send_pipeline_cmd(denali, false, true, SPARE_ACCESS,
+	if (denali_send_pipeline_cmd(denali, page, false, true, SPARE_ACCESS,
 							DENALI_READ) == PASS) {
 		read_data_from_flash_mem(denali, buf, mtd->oobsize);
 
@@ -462,8 +458,7 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 		irq_status = denali_wait_for_irq(denali, irq_mask);
 
 		if (!(irq_status & INTR__LOAD_COMP))
-			dev_err(denali->dev, "page on OOB timeout %d\n",
-					denali->page);
+			dev_err(denali->dev, "page on OOB timeout %d\n", page);
 
 		/*
 		 * We set the device back to MAIN_ACCESS here as I observed
@@ -472,7 +467,7 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 		 * is reliable (according to the MTD test infrastructure)
 		 * if you are in MAIN_ACCESS.
 		 */
-		addr = BANK(denali->flash_bank) | denali->page;
+		addr = BANK(denali->flash_bank) | page;
 		cmd = MODE_10 | addr;
 		index_addr(denali, cmd, MAIN_ACCESS);
 	}
@@ -637,13 +632,13 @@ static void denali_enable_dma(struct denali_nand_info *denali, bool en)
 	ioread32(denali->flash_reg + DMA_ENABLE);
 }
 
-static void denali_setup_dma64(struct denali_nand_info *denali, int op)
+static void denali_setup_dma64(struct denali_nand_info *denali,
+			       dma_addr_t dma_addr, int page, int op)
 {
 	uint32_t mode;
 	const int page_count = 1;
-	uint64_t addr = denali->buf.dma_buf;
 
-	mode = MODE_10 | BANK(denali->flash_bank) | denali->page;
+	mode = MODE_10 | BANK(denali->flash_bank) | page;
 
 	/* DMA is a three step process */
 
@@ -654,41 +649,42 @@ static void denali_setup_dma64(struct denali_nand_info *denali, int op)
 	index_addr(denali, mode, 0x01002000 | (64 << 16) | op | page_count);
 
 	/* 2. set memory low address */
-	index_addr(denali, mode, addr);
+	index_addr(denali, mode, dma_addr);
 
 	/* 3. set memory high address */
-	index_addr(denali, mode, addr >> 32);
+	index_addr(denali, mode, (uint64_t)dma_addr >> 32);
 }
 
-static void denali_setup_dma32(struct denali_nand_info *denali, int op)
+static void denali_setup_dma32(struct denali_nand_info *denali,
+			       dma_addr_t dma_addr, int page, int op)
 {
 	uint32_t mode;
 	const int page_count = 1;
-	uint32_t addr = denali->buf.dma_buf;
 
 	mode = MODE_10 | BANK(denali->flash_bank);
 
 	/* DMA is a four step process */
 
 	/* 1. setup transfer type and # of pages */
-	index_addr(denali, mode | denali->page, 0x2000 | op | page_count);
+	index_addr(denali, mode | page, 0x2000 | op | page_count);
 
 	/* 2. set memory high address bits 23:8 */
-	index_addr(denali, mode | ((addr >> 16) << 8), 0x2200);
+	index_addr(denali, mode | ((dma_addr >> 16) << 8), 0x2200);
 
 	/* 3. set memory low address bits 23:8 */
-	index_addr(denali, mode | ((addr & 0xffff) << 8), 0x2300);
+	index_addr(denali, mode | ((dma_addr & 0xffff) << 8), 0x2300);
 
 	/* 4. interrupt when complete, burst len = 64 bytes */
 	index_addr(denali, mode | 0x14000, 0x2400);
 }
 
-static void denali_setup_dma(struct denali_nand_info *denali, int op)
+static void denali_setup_dma(struct denali_nand_info *denali,
+			     dma_addr_t dma_addr, int page, int op)
 {
 	if (denali->caps & DENALI_CAP_DMA_64BIT)
-		denali_setup_dma64(denali, op);
+		denali_setup_dma64(denali, dma_addr, page, op);
 	else
-		denali_setup_dma32(denali, op);
+		denali_setup_dma32(denali, dma_addr, page, op);
 }
 
 /*
@@ -705,8 +701,6 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	uint32_t irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
 	int ret = 0;
 
-	denali->page = page;
-
 	/*
 	 * if it is a raw xfer, we want to disable ecc and send the spare area.
 	 * !raw_xfer - enable ecc
@@ -729,7 +723,7 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	denali_reset_irq(denali);
 	denali_enable_dma(denali, true);
 
-	denali_setup_dma(denali, DENALI_WRITE);
+	denali_setup_dma(denali, addr, page, DENALI_WRITE);
 
 	/* wait for operation to complete */
 	irq_status = denali_wait_for_irq(denali, irq_mask);
@@ -805,15 +799,13 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	unsigned long uncor_ecc_flags = 0;
 	int stat = 0;
 
-	denali->page = page;
-
 	setup_ecc_for_xfer(denali, true, false);
 
 	denali_enable_dma(denali, true);
 	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
 
 	denali_reset_irq(denali);
-	denali_setup_dma(denali, DENALI_READ);
+	denali_setup_dma(denali, addr, page, DENALI_READ);
 
 	/* wait for operation to complete */
 	irq_status = denali_wait_for_irq(denali, irq_mask);
@@ -832,7 +824,7 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 		return stat;
 
 	if (uncor_ecc_flags) {
-		read_oob_data(mtd, chip->oob_poi, denali->page);
+		read_oob_data(mtd, chip->oob_poi, page);
 
 		stat = denali_check_erased_page(mtd, chip, buf,
 						uncor_ecc_flags, stat);
@@ -850,15 +842,13 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	uint32_t irq_mask = INTR__DMA_CMD_COMP;
 	uint32_t irq_status;
 
-	denali->page = page;
-
 	setup_ecc_for_xfer(denali, false, true);
 	denali_enable_dma(denali, true);
 
 	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
 
 	denali_reset_irq(denali);
-	denali_setup_dma(denali, DENALI_READ);
+	denali_setup_dma(denali, addr, page, DENALI_READ);
 
 	/* wait for operation to complete */
 	irq_status = denali_wait_for_irq(denali, irq_mask);
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index a84d8784ee987..ad2223d179d07 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -316,7 +316,6 @@ struct denali_nand_info {
 	int flash_bank; /* currently selected chip */
 	struct nand_buf buf;
 	struct device *dev;
-	int page;
 	void __iomem *flash_reg;	/* Register Interface */
 	void __iomem *flash_mem;	/* Host Data/Command Interface */
 
-- 
GitLab


From 00fc615fd671877cd6e52b556a9d097223b6804e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:43 +0900
Subject: [PATCH 0307/1958] mtd: nand: denali: merge struct nand_buf into
 struct denali_nand_info

Now struct nand_buf has only two members, so I see no reason for the
separation.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 29 ++++++++++++++---------------
 drivers/mtd/nand/denali.h |  8 ++------
 2 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 24849f6bb37b3..4ba8ad6103817 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -695,7 +695,7 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 			const uint8_t *buf, int page, bool raw_xfer)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->buf.dma_buf;
+	dma_addr_t addr = denali->dma_addr;
 	size_t size = mtd->writesize + mtd->oobsize;
 	uint32_t irq_status;
 	uint32_t irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
@@ -709,11 +709,11 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	setup_ecc_for_xfer(denali, !raw_xfer, raw_xfer);
 
 	/* copy buffer into DMA buffer */
-	memcpy(denali->buf.buf, buf, mtd->writesize);
+	memcpy(denali->buf, buf, mtd->writesize);
 
 	if (raw_xfer) {
 		/* transfer the data to the spare area */
-		memcpy(denali->buf.buf + mtd->writesize,
+		memcpy(denali->buf + mtd->writesize,
 			chip->oob_poi,
 			mtd->oobsize);
 	}
@@ -790,7 +790,7 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->buf.dma_buf;
+	dma_addr_t addr = denali->dma_addr;
 	size_t size = mtd->writesize + mtd->oobsize;
 	uint32_t irq_status;
 	uint32_t irq_mask = denali->caps & DENALI_CAP_HW_ECC_FIXUP ?
@@ -812,7 +812,7 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
 
-	memcpy(buf, denali->buf.buf, mtd->writesize);
+	memcpy(buf, denali->buf, mtd->writesize);
 
 	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
 		stat = denali_hw_ecc_fixup(mtd, denali, &uncor_ecc_flags);
@@ -837,7 +837,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 				uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->buf.dma_buf;
+	dma_addr_t addr = denali->dma_addr;
 	size_t size = mtd->writesize + mtd->oobsize;
 	uint32_t irq_mask = INTR__DMA_CMD_COMP;
 	uint32_t irq_status;
@@ -859,8 +859,8 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 
 	denali_enable_dma(denali, false);
 
-	memcpy(buf, denali->buf.buf, mtd->writesize);
-	memcpy(chip->oob_poi, denali->buf.buf + mtd->writesize, mtd->oobsize);
+	memcpy(buf, denali->buf, mtd->writesize);
+	memcpy(chip->oob_poi, denali->buf + mtd->writesize, mtd->oobsize);
 
 	return 0;
 }
@@ -1280,10 +1280,9 @@ int denali_init(struct denali_nand_info *denali)
 	if (ret)
 		goto disable_irq;
 
-	denali->buf.buf = devm_kzalloc(denali->dev,
-			     mtd->writesize + mtd->oobsize,
-			     GFP_KERNEL);
-	if (!denali->buf.buf) {
+	denali->buf = devm_kzalloc(denali->dev, mtd->writesize + mtd->oobsize,
+				   GFP_KERNEL);
+	if (!denali->buf) {
 		ret = -ENOMEM;
 		goto disable_irq;
 	}
@@ -1296,10 +1295,10 @@ int denali_init(struct denali_nand_info *denali)
 		goto disable_irq;
 	}
 
-	denali->buf.dma_buf = dma_map_single(denali->dev, denali->buf.buf,
+	denali->dma_addr = dma_map_single(denali->dev, denali->buf,
 			     mtd->writesize + mtd->oobsize,
 			     DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(denali->dev, denali->buf.dma_buf)) {
+	if (dma_mapping_error(denali->dev, denali->dma_addr)) {
 		dev_err(denali->dev, "Failed to map DMA buffer\n");
 		ret = -EIO;
 		goto disable_irq;
@@ -1400,7 +1399,7 @@ void denali_remove(struct denali_nand_info *denali)
 
 	nand_release(mtd);
 	denali_disable_irq(denali);
-	dma_unmap_single(denali->dev, denali->buf.dma_buf, bufsize,
+	dma_unmap_single(denali->dev, denali->dma_addr, bufsize,
 			 DMA_BIDIRECTIONAL);
 }
 EXPORT_SYMBOL(denali_remove);
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index ad2223d179d07..1b991d3016f83 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -305,16 +305,10 @@
 #define MODE_10    0x08000000
 #define MODE_11    0x0C000000
 
-struct nand_buf {
-	uint8_t *buf;
-	dma_addr_t dma_buf;
-};
-
 struct denali_nand_info {
 	struct nand_chip nand;
 	unsigned long clk_x_rate;	/* bus interface clock rate */
 	int flash_bank; /* currently selected chip */
-	struct nand_buf buf;
 	struct device *dev;
 	void __iomem *flash_reg;	/* Register Interface */
 	void __iomem *flash_mem;	/* Host Data/Command Interface */
@@ -326,6 +320,8 @@ struct denali_nand_info {
 	uint32_t irq_status;
 	int irq;
 
+	void *buf;
+	dma_addr_t dma_addr;
 	int devnum;	/* represent how many nands connected */
 	int bbtskipbytes;
 	int max_banks;
-- 
GitLab


From 96a376bd93bb76945fce61e2e17ddc7f152b31c6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:44 +0900
Subject: [PATCH 0308/1958] mtd: nand: denali: use flag instead of register
 macro for direction

It is not a good idea to re-use macros that represent a specific
register bit field for the transfer direction.

It is true that bit 8 indicates the direction for the MAP10 pipeline
operation and the data DMA operation, but this is not valid across
the IP.

Use a simple flag (write: 1, read: 0) for the direction.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 4ba8ad6103817..3b7d2f81aa5be 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -63,9 +63,6 @@ static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 #define MAIN_ACCESS		0x42
 #define MAIN_SPARE_ACCESS	0x43
 
-#define DENALI_READ	0
-#define DENALI_WRITE	0x100
-
 /*
  * this is a helper macro that allows us to
  * format the bank into the proper bits for the controller
@@ -339,7 +336,7 @@ static int denali_dev_ready(struct mtd_info *mtd)
  */
 static int denali_send_pipeline_cmd(struct denali_nand_info *denali, int page,
 				    bool ecc_en, bool transfer_spare,
-				    int access_type, int op)
+				    int access_type, int write)
 {
 	int status = PASS;
 	uint32_t addr, cmd;
@@ -350,17 +347,17 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali, int page,
 
 	addr = BANK(denali->flash_bank) | page;
 
-	if (op == DENALI_WRITE && access_type != SPARE_ACCESS) {
+	if (write && access_type != SPARE_ACCESS) {
 		cmd = MODE_01 | addr;
 		iowrite32(cmd, denali->flash_mem);
-	} else if (op == DENALI_WRITE && access_type == SPARE_ACCESS) {
+	} else if (write && access_type == SPARE_ACCESS) {
 		/* read spare area */
 		cmd = MODE_10 | addr;
 		index_addr(denali, cmd, access_type);
 
 		cmd = MODE_01 | addr;
 		iowrite32(cmd, denali->flash_mem);
-	} else if (op == DENALI_READ) {
+	} else {
 		/* setup page read request for access type */
 		cmd = MODE_10 | addr;
 		index_addr(denali, cmd, access_type);
@@ -422,7 +419,7 @@ static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 	int status = 0;
 
 	if (denali_send_pipeline_cmd(denali, page, false, false, SPARE_ACCESS,
-							DENALI_WRITE) == PASS) {
+							1) == PASS) {
 		write_data_to_flash_mem(denali, buf, mtd->oobsize);
 
 		/* wait for operation to complete */
@@ -447,7 +444,7 @@ static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
 	uint32_t irq_status, addr, cmd;
 
 	if (denali_send_pipeline_cmd(denali, page, false, true, SPARE_ACCESS,
-							DENALI_READ) == PASS) {
+							0) == PASS) {
 		read_data_from_flash_mem(denali, buf, mtd->oobsize);
 
 		/*
@@ -633,7 +630,7 @@ static void denali_enable_dma(struct denali_nand_info *denali, bool en)
 }
 
 static void denali_setup_dma64(struct denali_nand_info *denali,
-			       dma_addr_t dma_addr, int page, int op)
+			       dma_addr_t dma_addr, int page, int write)
 {
 	uint32_t mode;
 	const int page_count = 1;
@@ -646,7 +643,8 @@ static void denali_setup_dma64(struct denali_nand_info *denali,
 	 * 1. setup transfer type, interrupt when complete,
 	 *    burst len = 64 bytes, the number of pages
 	 */
-	index_addr(denali, mode, 0x01002000 | (64 << 16) | op | page_count);
+	index_addr(denali, mode,
+		   0x01002000 | (64 << 16) | (write << 8) | page_count);
 
 	/* 2. set memory low address */
 	index_addr(denali, mode, dma_addr);
@@ -656,7 +654,7 @@ static void denali_setup_dma64(struct denali_nand_info *denali,
 }
 
 static void denali_setup_dma32(struct denali_nand_info *denali,
-			       dma_addr_t dma_addr, int page, int op)
+			       dma_addr_t dma_addr, int page, int write)
 {
 	uint32_t mode;
 	const int page_count = 1;
@@ -666,7 +664,7 @@ static void denali_setup_dma32(struct denali_nand_info *denali,
 	/* DMA is a four step process */
 
 	/* 1. setup transfer type and # of pages */
-	index_addr(denali, mode | page, 0x2000 | op | page_count);
+	index_addr(denali, mode | page, 0x2000 | (write << 8) | page_count);
 
 	/* 2. set memory high address bits 23:8 */
 	index_addr(denali, mode | ((dma_addr >> 16) << 8), 0x2200);
@@ -679,12 +677,12 @@ static void denali_setup_dma32(struct denali_nand_info *denali,
 }
 
 static void denali_setup_dma(struct denali_nand_info *denali,
-			     dma_addr_t dma_addr, int page, int op)
+			     dma_addr_t dma_addr, int page, int write)
 {
 	if (denali->caps & DENALI_CAP_DMA_64BIT)
-		denali_setup_dma64(denali, dma_addr, page, op);
+		denali_setup_dma64(denali, dma_addr, page, write);
 	else
-		denali_setup_dma32(denali, dma_addr, page, op);
+		denali_setup_dma32(denali, dma_addr, page, write);
 }
 
 /*
@@ -723,7 +721,7 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	denali_reset_irq(denali);
 	denali_enable_dma(denali, true);
 
-	denali_setup_dma(denali, addr, page, DENALI_WRITE);
+	denali_setup_dma(denali, addr, page, 1);
 
 	/* wait for operation to complete */
 	irq_status = denali_wait_for_irq(denali, irq_mask);
@@ -805,7 +803,7 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
 
 	denali_reset_irq(denali);
-	denali_setup_dma(denali, addr, page, DENALI_READ);
+	denali_setup_dma(denali, addr, page, 0);
 
 	/* wait for operation to complete */
 	irq_status = denali_wait_for_irq(denali, irq_mask);
@@ -848,7 +846,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
 
 	denali_reset_irq(denali);
-	denali_setup_dma(denali, addr, page, DENALI_READ);
+	denali_setup_dma(denali, addr, page, 0);
 
 	/* wait for operation to complete */
 	irq_status = denali_wait_for_irq(denali, irq_mask);
-- 
GitLab


From 26d266e10e5eb59cfbcc47922655dc3149e1bd59 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:45 +0900
Subject: [PATCH 0309/1958] mtd: nand: denali: fix raw and oob accessors for
 syndrome page layout

The Denali IP adopts the syndrome page layout; payload and ECC are
interleaved, with BBM area always placed at the beginning of OOB.

The figure below shows the page organization for ecc->steps == 2:

  |----------------|    |-----------|
  |                |    |           |
  |                |    |           |
  |    Payload0    |    |           |
  |                |    |           |
  |                |    |           |
  |                |    |           |
  |----------------|    |  in-band  |
  |      ECC0      |    |   area    |
  |----------------|    |           |
  |                |    |           |
  |                |    |           |
  |    Payload1    |    |           |
  |                |    |           |
  |                |    |           |
  |----------------|    |-----------|
  |      BBM       |    |           |
  |----------------|    |           |
  |Payload1 (cont.)|    |           |
  |----------------|    |out-of-band|
  |      ECC1      |    |    area   |
  |----------------|    |           |
  |    OOB free    |    |           |
  |----------------|    |-----------|

The current raw / oob accessors do not take that into consideration,
so in-band and out-of-band data are transferred as stored in the
device.  In the case above,

  in-band:      Payload0 + ECC0 + Payload1(partial)
  out-of-band:  BBM + Payload1(cont.) + ECC1 + OOB-free

This is wrong.  As the comment block of struct nand_ecc_ctrl says,
driver callbacks must hide the specific layout used by the hardware
and always return contiguous in-band and out-of-band data.

The current implementation is completely screwed-up, so read/write
callbacks must be re-worked.

Also, it is reasonable to support PIO transfer in case DMA may not
work for some reasons.  Actually, the Data DMA may not be equipped
depending on the configuration of the RTL.  This can be checked by
reading the bit 4 of the FEATURES register.  Even if the controller
has the DMA support, dma_set_mask() and dma_map_single() could fail.
In either case, the driver can fall back to the PIO transfer.  Slower
access would be better than giving up.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 610 +++++++++++++++++++++-----------------
 drivers/mtd/nand/denali.h |   3 +-
 2 files changed, 344 insertions(+), 269 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 3b7d2f81aa5be..ed0044c560e54 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -55,14 +55,6 @@ static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 	return container_of(mtd_to_nand(mtd), struct denali_nand_info, nand);
 }
 
-/*
- * These constants are defined by the driver to enable common driver
- * configuration options.
- */
-#define SPARE_ACCESS		0x41
-#define MAIN_ACCESS		0x42
-#define MAIN_SPARE_ACCESS	0x43
-
 /*
  * this is a helper macro that allows us to
  * format the bank into the proper bits for the controller
@@ -330,146 +322,6 @@ static int denali_dev_ready(struct mtd_info *mtd)
 	return !!(denali_check_irq(denali) & INTR__INT_ACT);
 }
 
-/*
- * sends a pipeline command operation to the controller. See the Denali NAND
- * controller's user guide for more information (section 4.2.3.6).
- */
-static int denali_send_pipeline_cmd(struct denali_nand_info *denali, int page,
-				    bool ecc_en, bool transfer_spare,
-				    int access_type, int write)
-{
-	int status = PASS;
-	uint32_t addr, cmd;
-
-	setup_ecc_for_xfer(denali, ecc_en, transfer_spare);
-
-	denali_reset_irq(denali);
-
-	addr = BANK(denali->flash_bank) | page;
-
-	if (write && access_type != SPARE_ACCESS) {
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	} else if (write && access_type == SPARE_ACCESS) {
-		/* read spare area */
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, access_type);
-
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	} else {
-		/* setup page read request for access type */
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, access_type);
-
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	}
-	return status;
-}
-
-/* helper function that simply writes a buffer to the flash */
-static int write_data_to_flash_mem(struct denali_nand_info *denali,
-				   const uint8_t *buf, int len)
-{
-	uint32_t *buf32;
-	int i;
-
-	/*
-	 * verify that the len is a multiple of 4.
-	 * see comment in read_data_from_flash_mem()
-	 */
-	BUG_ON((len % 4) != 0);
-
-	/* write the data to the flash memory */
-	buf32 = (uint32_t *)buf;
-	for (i = 0; i < len / 4; i++)
-		iowrite32(*buf32++, denali->flash_mem + 0x10);
-	return i * 4; /* intent is to return the number of bytes read */
-}
-
-/* helper function that simply reads a buffer from the flash */
-static int read_data_from_flash_mem(struct denali_nand_info *denali,
-				    uint8_t *buf, int len)
-{
-	uint32_t *buf32;
-	int i;
-
-	/*
-	 * we assume that len will be a multiple of 4, if not it would be nice
-	 * to know about it ASAP rather than have random failures...
-	 * This assumption is based on the fact that this function is designed
-	 * to be used to read flash pages, which are typically multiples of 4.
-	 */
-	BUG_ON((len % 4) != 0);
-
-	/* transfer the data from the flash */
-	buf32 = (uint32_t *)buf;
-	for (i = 0; i < len / 4; i++)
-		*buf32++ = ioread32(denali->flash_mem + 0x10);
-	return i * 4; /* intent is to return the number of bytes read */
-}
-
-/* writes OOB data to the device */
-static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
-{
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t irq_status;
-	uint32_t irq_mask = INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL;
-	int status = 0;
-
-	if (denali_send_pipeline_cmd(denali, page, false, false, SPARE_ACCESS,
-							1) == PASS) {
-		write_data_to_flash_mem(denali, buf, mtd->oobsize);
-
-		/* wait for operation to complete */
-		irq_status = denali_wait_for_irq(denali, irq_mask);
-
-		if (!(irq_status & INTR__PROGRAM_COMP)) {
-			dev_err(denali->dev, "OOB write failed\n");
-			status = -EIO;
-		}
-	} else {
-		dev_err(denali->dev, "unable to send pipeline command\n");
-		status = -EIO;
-	}
-	return status;
-}
-
-/* reads OOB data from the device */
-static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
-{
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t irq_mask = INTR__LOAD_COMP;
-	uint32_t irq_status, addr, cmd;
-
-	if (denali_send_pipeline_cmd(denali, page, false, true, SPARE_ACCESS,
-							0) == PASS) {
-		read_data_from_flash_mem(denali, buf, mtd->oobsize);
-
-		/*
-		 * wait for command to be accepted
-		 * can always use status0 bit as the
-		 * mask is identical for each bank.
-		 */
-		irq_status = denali_wait_for_irq(denali, irq_mask);
-
-		if (!(irq_status & INTR__LOAD_COMP))
-			dev_err(denali->dev, "page on OOB timeout %d\n", page);
-
-		/*
-		 * We set the device back to MAIN_ACCESS here as I observed
-		 * instability with the controller if you do a block erase
-		 * and the last transaction was a SPARE_ACCESS. Block erase
-		 * is reliable (according to the MTD test infrastructure)
-		 * if you are in MAIN_ACCESS.
-		 */
-		addr = BANK(denali->flash_bank) | page;
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, MAIN_ACCESS);
-	}
-}
-
 static int denali_check_erased_page(struct mtd_info *mtd,
 				    struct nand_chip *chip, uint8_t *buf,
 				    unsigned long uncor_ecc_flags,
@@ -685,144 +537,303 @@ static void denali_setup_dma(struct denali_nand_info *denali,
 		denali_setup_dma32(denali, dma_addr, page, write);
 }
 
-/*
- * writes a page. user specifies type, and this function handles the
- * configuration details.
- */
-static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int page, bool raw_xfer)
+static int denali_pio_read(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->dma_addr;
-	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_status;
-	uint32_t irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
-	int ret = 0;
+	uint32_t addr = BANK(denali->flash_bank) | page;
+	uint32_t *buf32 = (uint32_t *)buf;
+	uint32_t irq_status, ecc_err_mask;
+	int i;
 
-	/*
-	 * if it is a raw xfer, we want to disable ecc and send the spare area.
-	 * !raw_xfer - enable ecc
-	 * raw_xfer - transfer spare
-	 */
-	setup_ecc_for_xfer(denali, !raw_xfer, raw_xfer);
+	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
+		ecc_err_mask = INTR__ECC_UNCOR_ERR;
+	else
+		ecc_err_mask = INTR__ECC_ERR;
 
-	/* copy buffer into DMA buffer */
-	memcpy(denali->buf, buf, mtd->writesize);
+	denali_reset_irq(denali);
 
-	if (raw_xfer) {
-		/* transfer the data to the spare area */
-		memcpy(denali->buf + mtd->writesize,
-			chip->oob_poi,
-			mtd->oobsize);
-	}
+	iowrite32(MODE_01 | addr, denali->flash_mem);
+	for (i = 0; i < size / 4; i++)
+		*buf32++ = ioread32(denali->flash_mem + 0x10);
+
+	irq_status = denali_wait_for_irq(denali, INTR__PAGE_XFER_INC);
+	if (!(irq_status & INTR__PAGE_XFER_INC))
+		return -EIO;
 
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_TO_DEVICE);
+	return irq_status & ecc_err_mask ? -EBADMSG : 0;
+}
+
+static int denali_pio_write(struct denali_nand_info *denali,
+			    const void *buf, size_t size, int page, int raw)
+{
+	uint32_t addr = BANK(denali->flash_bank) | page;
+	const uint32_t *buf32 = (uint32_t *)buf;
+	uint32_t irq_status;
+	int i;
 
 	denali_reset_irq(denali);
+
+	iowrite32(MODE_01 | addr, denali->flash_mem);
+	for (i = 0; i < size / 4; i++)
+		iowrite32(*buf32++, denali->flash_mem + 0x10);
+
+	irq_status = denali_wait_for_irq(denali,
+				INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL);
+	if (!(irq_status & INTR__PROGRAM_COMP))
+		return -EIO;
+
+	return 0;
+}
+
+static int denali_pio_xfer(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw, int write)
+{
+	if (write)
+		return denali_pio_write(denali, buf, size, page, raw);
+	else
+		return denali_pio_read(denali, buf, size, page, raw);
+}
+
+static int denali_dma_xfer(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw, int write)
+{
+	dma_addr_t dma_addr = denali->dma_addr;
+	uint32_t irq_mask, irq_status, ecc_err_mask;
+	enum dma_data_direction dir = write ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	int ret = 0;
+
+	dma_sync_single_for_device(denali->dev, dma_addr, size, dir);
+
+	if (write) {
+		/*
+		 * INTR__PROGRAM_COMP is never asserted for the DMA transfer.
+		 * We can use INTR__DMA_CMD_COMP instead.  This flag is asserted
+		 * when the page program is completed.
+		 */
+		irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
+		ecc_err_mask = 0;
+	} else if (denali->caps & DENALI_CAP_HW_ECC_FIXUP) {
+		irq_mask = INTR__DMA_CMD_COMP;
+		ecc_err_mask = INTR__ECC_UNCOR_ERR;
+	} else {
+		irq_mask = INTR__DMA_CMD_COMP;
+		ecc_err_mask = INTR__ECC_ERR;
+	}
+
 	denali_enable_dma(denali, true);
 
-	denali_setup_dma(denali, addr, page, 1);
+	denali_reset_irq(denali);
+	denali_setup_dma(denali, dma_addr, page, write);
 
 	/* wait for operation to complete */
 	irq_status = denali_wait_for_irq(denali, irq_mask);
-	if (!(irq_status & INTR__DMA_CMD_COMP)) {
-		dev_err(denali->dev, "timeout on write_page (type = %d)\n",
-			raw_xfer);
+	if (!(irq_status & INTR__DMA_CMD_COMP))
 		ret = -EIO;
-	}
+	else if (irq_status & ecc_err_mask)
+		ret = -EBADMSG;
 
 	denali_enable_dma(denali, false);
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(denali->dev, dma_addr, size, dir);
 
 	return ret;
 }
 
-/* NAND core entry points */
-
-/*
- * this is the callback that the NAND core calls to write a page. Since
- * writing a page with ECC or without is similar, all the work is done
- * by write_page above.
- */
-static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required, int page)
+static int denali_data_xfer(struct denali_nand_info *denali, void *buf,
+			    size_t size, int page, int raw, int write)
 {
-	/*
-	 * for regular page writes, we let HW handle all the ECC
-	 * data written to the device.
-	 */
-	return write_page(mtd, chip, buf, page, false);
+	setup_ecc_for_xfer(denali, !raw, raw);
+
+	if (denali->dma_avail)
+		return denali_dma_xfer(denali, buf, size, page, raw, write);
+	else
+		return denali_pio_xfer(denali, buf, size, page, raw, write);
 }
 
-/*
- * This is the callback that the NAND core calls to write a page without ECC.
- * raw access is similar to ECC page writes, so all the work is done in the
- * write_page() function above.
- */
-static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				 const uint8_t *buf, int oob_required,
-				 int page)
+static void denali_oob_xfer(struct mtd_info *mtd, struct nand_chip *chip,
+			    int page, int write)
 {
-	/*
-	 * for raw page writes, we want to disable ECC and simply write
-	 * whatever data is in the buffer.
-	 */
-	return write_page(mtd, chip, buf, page, true);
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	unsigned int start_cmd = write ? NAND_CMD_SEQIN : NAND_CMD_READ0;
+	unsigned int rnd_cmd = write ? NAND_CMD_RNDIN : NAND_CMD_RNDOUT;
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	uint8_t *bufpoi = chip->oob_poi;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	int oob_skip = denali->bbtskipbytes;
+	size_t size = writesize + oobsize;
+	int i, pos, len;
+
+	/* BBM at the beginning of the OOB area */
+	chip->cmdfunc(mtd, start_cmd, writesize, page);
+	if (write)
+		chip->write_buf(mtd, bufpoi, oob_skip);
+	else
+		chip->read_buf(mtd, bufpoi, oob_skip);
+	bufpoi += oob_skip;
+
+	/* OOB ECC */
+	for (i = 0; i < ecc_steps; i++) {
+		pos = ecc_size + i * (ecc_size + ecc_bytes);
+		len = ecc_bytes;
+
+		if (pos >= writesize)
+			pos += oob_skip;
+		else if (pos + len > writesize)
+			len = writesize - pos;
+
+		chip->cmdfunc(mtd, rnd_cmd, pos, -1);
+		if (write)
+			chip->write_buf(mtd, bufpoi, len);
+		else
+			chip->read_buf(mtd, bufpoi, len);
+		bufpoi += len;
+		if (len < ecc_bytes) {
+			len = ecc_bytes - len;
+			chip->cmdfunc(mtd, rnd_cmd, writesize + oob_skip, -1);
+			if (write)
+				chip->write_buf(mtd, bufpoi, len);
+			else
+				chip->read_buf(mtd, bufpoi, len);
+			bufpoi += len;
+		}
+	}
+
+	/* OOB free */
+	len = oobsize - (bufpoi - chip->oob_poi);
+	chip->cmdfunc(mtd, rnd_cmd, size - len, -1);
+	if (write)
+		chip->write_buf(mtd, bufpoi, len);
+	else
+		chip->read_buf(mtd, bufpoi, len);
 }
 
-static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			    int page)
+static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
 {
-	return write_oob_data(mtd, chip->oob_poi, page);
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	void *dma_buf = denali->buf;
+	int oob_skip = denali->bbtskipbytes;
+	size_t size = writesize + oobsize;
+	int ret, i, pos, len;
+
+	ret = denali_data_xfer(denali, dma_buf, size, page, 1, 0);
+	if (ret)
+		return ret;
+
+	/* Arrange the buffer for syndrome payload/ecc layout */
+	if (buf) {
+		for (i = 0; i < ecc_steps; i++) {
+			pos = i * (ecc_size + ecc_bytes);
+			len = ecc_size;
+
+			if (pos >= writesize)
+				pos += oob_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(buf, dma_buf + pos, len);
+			buf += len;
+			if (len < ecc_size) {
+				len = ecc_size - len;
+				memcpy(buf, dma_buf + writesize + oob_skip,
+				       len);
+				buf += len;
+			}
+		}
+	}
+
+	if (oob_required) {
+		uint8_t *oob = chip->oob_poi;
+
+		/* BBM at the beginning of the OOB area */
+		memcpy(oob, dma_buf + writesize, oob_skip);
+		oob += oob_skip;
+
+		/* OOB ECC */
+		for (i = 0; i < ecc_steps; i++) {
+			pos = ecc_size + i * (ecc_size + ecc_bytes);
+			len = ecc_bytes;
+
+			if (pos >= writesize)
+				pos += oob_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(oob, dma_buf + pos, len);
+			oob += len;
+			if (len < ecc_bytes) {
+				len = ecc_bytes - len;
+				memcpy(oob, dma_buf + writesize + oob_skip,
+				       len);
+				oob += len;
+			}
+		}
+
+		/* OOB free */
+		len = oobsize - (oob - chip->oob_poi);
+		memcpy(oob, dma_buf + size - len, len);
+	}
+
+	return 0;
 }
 
 static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 			   int page)
 {
-	read_oob_data(mtd, chip->oob_poi, page);
+	denali_oob_xfer(mtd, chip, page, 0);
 
 	return 0;
 }
 
-static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			    uint8_t *buf, int oob_required, int page)
+static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
+			    int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->dma_addr;
-	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_status;
-	uint32_t irq_mask = denali->caps & DENALI_CAP_HW_ECC_FIXUP ?
-				INTR__DMA_CMD_COMP | INTR__ECC_UNCOR_ERR :
-				INTR__ECC_TRANSACTION_DONE | INTR__ECC_ERR;
-	unsigned long uncor_ecc_flags = 0;
-	int stat = 0;
+	int status;
 
-	setup_ecc_for_xfer(denali, true, false);
+	denali_reset_irq(denali);
 
-	denali_enable_dma(denali, true);
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
+	denali_oob_xfer(mtd, chip, page, 1);
 
-	denali_reset_irq(denali);
-	denali_setup_dma(denali, addr, page, 0);
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
 
-	/* wait for operation to complete */
-	irq_status = denali_wait_for_irq(denali, irq_mask);
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
+}
+
+static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
+			    uint8_t *buf, int oob_required, int page)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	unsigned long uncor_ecc_flags = 0;
+	int stat = 0;
+	int ret;
 
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
+	ret = denali_data_xfer(denali, denali->buf, mtd->writesize, page, 0, 0);
+	if (ret && ret != -EBADMSG)
+		return ret;
 
 	memcpy(buf, denali->buf, mtd->writesize);
 
 	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
 		stat = denali_hw_ecc_fixup(mtd, denali, &uncor_ecc_flags);
-	else if (irq_status & INTR__ECC_ERR)
+	else if (ret == -EBADMSG)
 		stat = denali_sw_ecc_fixup(mtd, denali, &uncor_ecc_flags, buf);
-	denali_enable_dma(denali, false);
 
 	if (stat < 0)
 		return stat;
 
 	if (uncor_ecc_flags) {
-		read_oob_data(mtd, chip->oob_poi, page);
+		ret = denali_read_oob(mtd, chip, page);
+		if (ret)
+			return ret;
 
 		stat = denali_check_erased_page(mtd, chip, buf,
 						uncor_ecc_flags, stat);
@@ -831,36 +842,93 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	return stat;
 }
 
-static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				 const uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->dma_addr;
-	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_mask = INTR__DMA_CMD_COMP;
-	uint32_t irq_status;
-
-	setup_ecc_for_xfer(denali, false, true);
-	denali_enable_dma(denali, true);
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	void *dma_buf = denali->buf;
+	int oob_skip = denali->bbtskipbytes;
+	size_t size = writesize + oobsize;
+	int i, pos, len;
 
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
+	/*
+	 * Fill the buffer with 0xff first except the full page transfer.
+	 * This simplifies the logic.
+	 */
+	if (!buf || !oob_required)
+		memset(dma_buf, 0xff, size);
+
+	/* Arrange the buffer for syndrome payload/ecc layout */
+	if (buf) {
+		for (i = 0; i < ecc_steps; i++) {
+			pos = i * (ecc_size + ecc_bytes);
+			len = ecc_size;
+
+			if (pos >= writesize)
+				pos += oob_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(dma_buf + pos, buf, len);
+			buf += len;
+			if (len < ecc_size) {
+				len = ecc_size - len;
+				memcpy(dma_buf + writesize + oob_skip, buf,
+				       len);
+				buf += len;
+			}
+		}
+	}
 
-	denali_reset_irq(denali);
-	denali_setup_dma(denali, addr, page, 0);
+	if (oob_required) {
+		const uint8_t *oob = chip->oob_poi;
+
+		/* BBM at the beginning of the OOB area */
+		memcpy(dma_buf + writesize, oob, oob_skip);
+		oob += oob_skip;
+
+		/* OOB ECC */
+		for (i = 0; i < ecc_steps; i++) {
+			pos = ecc_size + i * (ecc_size + ecc_bytes);
+			len = ecc_bytes;
+
+			if (pos >= writesize)
+				pos += oob_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(dma_buf + pos, oob, len);
+			oob += len;
+			if (len < ecc_bytes) {
+				len = ecc_bytes - len;
+				memcpy(dma_buf + writesize + oob_skip, oob,
+				       len);
+				oob += len;
+			}
+		}
 
-	/* wait for operation to complete */
-	irq_status = denali_wait_for_irq(denali, irq_mask);
-	if (irq_status & INTR__DMA_CMD_COMP)
-		return -ETIMEDOUT;
+		/* OOB free */
+		len = oobsize - (oob - chip->oob_poi);
+		memcpy(dma_buf + size - len, oob, len);
+	}
 
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
+	return denali_data_xfer(denali, dma_buf, size, page, 1, 1);
+}
 
-	denali_enable_dma(denali, false);
+static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+			     const uint8_t *buf, int oob_required, int page)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
-	memcpy(buf, denali->buf, mtd->writesize);
-	memcpy(chip->oob_poi, denali->buf + mtd->writesize, mtd->oobsize);
+	memcpy(denali->buf, buf, mtd->writesize);
 
-	return 0;
+	return denali_data_xfer(denali, denali->buf, mtd->writesize, page,
+				0, 1);
 }
 
 static void denali_select_chip(struct mtd_info *mtd, int chip)
@@ -1285,21 +1353,29 @@ int denali_init(struct denali_nand_info *denali)
 		goto disable_irq;
 	}
 
-	ret = dma_set_mask(denali->dev,
-			   DMA_BIT_MASK(denali->caps & DENALI_CAP_DMA_64BIT ?
-					64 : 32));
-	if (ret) {
-		dev_err(denali->dev, "No usable DMA configuration\n");
-		goto disable_irq;
+	if (ioread32(denali->flash_reg + FEATURES) & FEATURES__DMA)
+		denali->dma_avail = 1;
+
+	if (denali->dma_avail) {
+		int dma_bit = denali->caps & DENALI_CAP_DMA_64BIT ? 64 : 32;
+
+		ret = dma_set_mask(denali->dev, DMA_BIT_MASK(dma_bit));
+		if (ret) {
+			dev_info(denali->dev,
+				 "Failed to set DMA mask. Disabling DMA.\n");
+			denali->dma_avail = 0;
+		}
 	}
 
-	denali->dma_addr = dma_map_single(denali->dev, denali->buf,
-			     mtd->writesize + mtd->oobsize,
-			     DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(denali->dev, denali->dma_addr)) {
-		dev_err(denali->dev, "Failed to map DMA buffer\n");
-		ret = -EIO;
-		goto disable_irq;
+	if (denali->dma_avail) {
+		denali->dma_addr = dma_map_single(denali->dev, denali->buf,
+						  mtd->writesize + mtd->oobsize,
+						  DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(denali->dev, denali->dma_addr)) {
+			dev_info(denali->dev,
+				 "Failed to map DMA buffer. Disabling DMA.\n");
+			denali->dma_avail = 0;
+		};
 	}
 
 	/*
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 1b991d3016f83..f5da52f09e34b 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -298,8 +298,6 @@
 #define     CHNL_ACTIVE__CHANNEL2			BIT(2)
 #define     CHNL_ACTIVE__CHANNEL3			BIT(3)
 
-#define PASS 0                  /*success flag*/
-
 #define MODE_00    0x00000000
 #define MODE_01    0x04000000
 #define MODE_10    0x08000000
@@ -322,6 +320,7 @@ struct denali_nand_info {
 
 	void *buf;
 	dma_addr_t dma_addr;
+	int dma_avail;
 	int devnum;	/* represent how many nands connected */
 	int bbtskipbytes;
 	int max_banks;
-- 
GitLab


From 57a4d8b5f6482718f17c08f2cdfea085b1d3c12a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:46 +0900
Subject: [PATCH 0310/1958] mtd: nand: denali: support hardware-assisted erased
 page detection

Recent versions of this IP support automatic erased page detection.
If an erased page is detected on reads, the controller does not set
INTR__ECC_UNCOR_ERR, but INTR__ERASED_PAGE.

The detection of erased pages is based on the number of zeros in a
page; if the number of zeros is less than the value in the field
ERASED_THRESHOLD, the page is assumed as erased.

Please note ERASED_THRESHOLD specifies the number of zeros in a _page_
instead of an ECC chunk.  Moreover, the controller does not provide a
way to know the actual number of bitflips.

Actually, an erased page (all 0xff) is not an ECC correctable pattern
on the Denali ECC engine.  In other words, there may be overlap between
the following two:

[1] a bit pattern reachable from a valid payload + ECC pattern within
    ecc.strength bitflips
[2] a bit pattern reachable from an erased state (all 0xff) within
    ecc.strength bitflips

So, this feature may intercept ECC correctable patterns, then replace
[1] with [2].

After all, this feature can work safely only when ECC_THRESHOLD == 1,
i.e. detect erased pages without any bitflips.  This should be the
case most of the time.  If there is a bitflip or more, the driver will
fallback to the software method by using nand_check_erased_ecc_chunk().

Strangely enough, the driver still has to fill the buffer with 0xff
in case of INTR__ERASED_PAGE because the ECC correction engine has
already manipulated the data in the buffer before it judges erased
pages.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 9 ++++++++-
 drivers/mtd/nand/denali.h | 5 +++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index ed0044c560e54..e8d8e6c6f45e5 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -560,6 +560,9 @@ static int denali_pio_read(struct denali_nand_info *denali, void *buf,
 	if (!(irq_status & INTR__PAGE_XFER_INC))
 		return -EIO;
 
+	if (irq_status & INTR__ERASED_PAGE)
+		memset(buf, 0xff, size);
+
 	return irq_status & ecc_err_mask ? -EBADMSG : 0;
 }
 
@@ -635,6 +638,9 @@ static int denali_dma_xfer(struct denali_nand_info *denali, void *buf,
 	denali_enable_dma(denali, false);
 	dma_sync_single_for_cpu(denali->dev, dma_addr, size, dir);
 
+	if (irq_status & INTR__ERASED_PAGE)
+		memset(buf, 0xff, size);
+
 	return ret;
 }
 
@@ -1406,7 +1412,8 @@ int denali_init(struct denali_nand_info *denali)
 		"chosen ECC settings: step=%d, strength=%d, bytes=%d\n",
 		chip->ecc.size, chip->ecc.strength, chip->ecc.bytes);
 
-	iowrite32(chip->ecc.strength, denali->flash_reg + ECC_CORRECTION);
+	iowrite32(MAKE_ECC_CORRECTION(chip->ecc.strength, 1),
+		  denali->flash_reg + ECC_CORRECTION);
 	iowrite32(mtd->erasesize / mtd->writesize,
 		  denali->flash_reg + PAGES_PER_BLOCK);
 	iowrite32(chip->options & NAND_BUSWIDTH_16 ? 1 : 0,
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index f5da52f09e34b..657a794af695e 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -110,6 +110,10 @@
 
 #define ECC_CORRECTION				0x1b0
 #define     ECC_CORRECTION__VALUE			GENMASK(4, 0)
+#define     ECC_CORRECTION__ERASE_THRESHOLD		GENMASK(31, 16)
+#define     MAKE_ECC_CORRECTION(val, thresh)		\
+			(((val) & (ECC_CORRECTION__VALUE)) | \
+			(((thresh) << 16) & (ECC_CORRECTION__ERASE_THRESHOLD)))
 
 #define READ_MODE				0x1c0
 #define     READ_MODE__VALUE				GENMASK(3, 0)
@@ -233,6 +237,7 @@
 #define     INTR__RST_COMP				BIT(13)
 #define     INTR__PIPE_CMD_ERR				BIT(14)
 #define     INTR__PAGE_XFER_INC				BIT(15)
+#define     INTR__ERASED_PAGE				BIT(16)
 
 #define PAGE_CNT(bank)				(0x430 + (bank) * 0x50)
 #define ERR_PAGE_ADDR(bank)			(0x440 + (bank) * 0x50)
-- 
GitLab


From 997cde2a222091270ce0c276e567b68e5615f577 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:47 +0900
Subject: [PATCH 0311/1958] mtd: nand: denali: skip driver internal bounce
 buffer when possible

For ecc->read_page() and ecc->write_page(), it is possible to call
dma_map_single() against the given buffer.  This bypasses the driver
internal bounce buffer and save the memcpy().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 38 ++++++++++++--------------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index e8d8e6c6f45e5..ec5fc8da5f9a3 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -600,12 +600,16 @@ static int denali_pio_xfer(struct denali_nand_info *denali, void *buf,
 static int denali_dma_xfer(struct denali_nand_info *denali, void *buf,
 			   size_t size, int page, int raw, int write)
 {
-	dma_addr_t dma_addr = denali->dma_addr;
+	dma_addr_t dma_addr;
 	uint32_t irq_mask, irq_status, ecc_err_mask;
 	enum dma_data_direction dir = write ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
 	int ret = 0;
 
-	dma_sync_single_for_device(denali->dev, dma_addr, size, dir);
+	dma_addr = dma_map_single(denali->dev, buf, size, dir);
+	if (dma_mapping_error(denali->dev, dma_addr)) {
+		dev_dbg(denali->dev, "Failed to DMA-map buffer. Trying PIO.\n");
+		return denali_pio_xfer(denali, buf, size, page, raw, write);
+	}
 
 	if (write) {
 		/*
@@ -636,7 +640,7 @@ static int denali_dma_xfer(struct denali_nand_info *denali, void *buf,
 		ret = -EBADMSG;
 
 	denali_enable_dma(denali, false);
-	dma_sync_single_for_cpu(denali->dev, dma_addr, size, dir);
+	dma_unmap_single(denali->dev, dma_addr, size, dir);
 
 	if (irq_status & INTR__ERASED_PAGE)
 		memset(buf, 0xff, size);
@@ -822,12 +826,10 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	int stat = 0;
 	int ret;
 
-	ret = denali_data_xfer(denali, denali->buf, mtd->writesize, page, 0, 0);
+	ret = denali_data_xfer(denali, buf, mtd->writesize, page, 0, 0);
 	if (ret && ret != -EBADMSG)
 		return ret;
 
-	memcpy(buf, denali->buf, mtd->writesize);
-
 	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
 		stat = denali_hw_ecc_fixup(mtd, denali, &uncor_ecc_flags);
 	else if (ret == -EBADMSG)
@@ -931,10 +933,8 @@ static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
-	memcpy(denali->buf, buf, mtd->writesize);
-
-	return denali_data_xfer(denali, denali->buf, mtd->writesize, page,
-				0, 1);
+	return denali_data_xfer(denali, (void *)buf, mtd->writesize,
+				page, 0, 1);
 }
 
 static void denali_select_chip(struct mtd_info *mtd, int chip)
@@ -1374,14 +1374,8 @@ int denali_init(struct denali_nand_info *denali)
 	}
 
 	if (denali->dma_avail) {
-		denali->dma_addr = dma_map_single(denali->dev, denali->buf,
-						  mtd->writesize + mtd->oobsize,
-						  DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(denali->dev, denali->dma_addr)) {
-			dev_info(denali->dev,
-				 "Failed to map DMA buffer. Disabling DMA.\n");
-			denali->dma_avail = 0;
-		};
+		chip->options |= NAND_USE_BOUNCE_BUFFER;
+		chip->buf_align = 16;
 	}
 
 	/*
@@ -1471,16 +1465,8 @@ EXPORT_SYMBOL(denali_init);
 void denali_remove(struct denali_nand_info *denali)
 {
 	struct mtd_info *mtd = nand_to_mtd(&denali->nand);
-	/*
-	 * Pre-compute DMA buffer size to avoid any problems in case
-	 * nand_release() ever changes in a way that mtd->writesize and
-	 * mtd->oobsize are not reliable after this call.
-	 */
-	int bufsize = mtd->writesize + mtd->oobsize;
 
 	nand_release(mtd);
 	denali_disable_irq(denali);
-	dma_unmap_single(denali->dev, denali->dma_addr, bufsize,
-			 DMA_BIDIRECTIONAL);
 }
 EXPORT_SYMBOL(denali_remove);
-- 
GitLab


From 7d370b2c255612569818134ae0a6d0e46eabfe8b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:48 +0900
Subject: [PATCH 0312/1958] mtd: nand: denali: use non-managed kmalloc() for
 DMA buffer

As Russell and Lars stated in the discussion [1], using
devm_k*alloc() with DMA is not a good idea.

Let's use kmalloc (not kzalloc because no need for zero-out).
Also, allocate the buffer as late as possible because it must be
freed for any error that follows.

[1] https://lkml.org/lkml/2017/3/8/693

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index ec5fc8da5f9a3..bb2da2fd069e8 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/mtd/mtd.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include "denali.h"
 
@@ -1352,13 +1353,6 @@ int denali_init(struct denali_nand_info *denali)
 	if (ret)
 		goto disable_irq;
 
-	denali->buf = devm_kzalloc(denali->dev, mtd->writesize + mtd->oobsize,
-				   GFP_KERNEL);
-	if (!denali->buf) {
-		ret = -ENOMEM;
-		goto disable_irq;
-	}
-
 	if (ioread32(denali->flash_reg + FEATURES) & FEATURES__DMA)
 		denali->dma_avail = 1;
 
@@ -1443,17 +1437,30 @@ int denali_init(struct denali_nand_info *denali)
 	if (ret)
 		goto disable_irq;
 
+	/*
+	 * This buffer is DMA-mapped by denali_{read,write}_page_raw.  Do not
+	 * use devm_kmalloc() because the memory allocated by devm_ does not
+	 * guarantee DMA-safe alignment.
+	 */
+	denali->buf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL);
+	if (!denali->buf) {
+		ret = -ENOMEM;
+		goto disable_irq;
+	}
+
 	ret = nand_scan_tail(mtd);
 	if (ret)
-		goto disable_irq;
+		goto free_buf;
 
 	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret) {
 		dev_err(denali->dev, "Failed to register MTD: %d\n", ret);
-		goto disable_irq;
+		goto free_buf;
 	}
 	return 0;
 
+free_buf:
+	kfree(denali->buf);
 disable_irq:
 	denali_disable_irq(denali);
 
@@ -1467,6 +1474,7 @@ void denali_remove(struct denali_nand_info *denali)
 	struct mtd_info *mtd = nand_to_mtd(&denali->nand);
 
 	nand_release(mtd);
+	kfree(denali->buf);
 	denali_disable_irq(denali);
 }
 EXPORT_SYMBOL(denali_remove);
-- 
GitLab


From 777f2d49e8e9fbb5d00f54c496877220b6e6ff6c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Jun 2017 22:45:49 +0900
Subject: [PATCH 0313/1958] mtd: nand: denali: enable bad block table scan

Now this driver is ready to remove NAND_SKIP_BBTSCAN.

The BBT descriptors in denali.c are equivalent to the ones in
nand_bbt.c.  There is no need to duplicate the equivalent structures.
The with-oob decriptors do not work for this driver anyway.

The bbt_pattern (offs = 8) and the version (veroffs = 12) area
overlaps the ECC area.  Set NAND_BBT_NO_OOB flag to use the no_oob
variant of the BBT descriptors.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c | 31 ++-----------------------------
 1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index bb2da2fd069e8..5ac3eb07c6d66 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1211,29 +1211,6 @@ static const struct mtd_ooblayout_ops denali_ooblayout_ops = {
 	.free = denali_ooblayout_free,
 };
 
-static uint8_t bbt_pattern[] = {'B', 'b', 't', '0' };
-static uint8_t mirror_pattern[] = {'1', 't', 'b', 'B' };
-
-static struct nand_bbt_descr bbt_main_descr = {
-	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE
-		| NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP,
-	.offs =	8,
-	.len = 4,
-	.veroffs = 12,
-	.maxblocks = 4,
-	.pattern = bbt_pattern,
-};
-
-static struct nand_bbt_descr bbt_mirror_descr = {
-	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE
-		| NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP,
-	.offs =	8,
-	.len = 4,
-	.veroffs = 12,
-	.maxblocks = 4,
-	.pattern = mirror_pattern,
-};
-
 /* initialize driver data structures */
 static void denali_drv_init(struct denali_nand_info *denali)
 {
@@ -1378,13 +1355,9 @@ int denali_init(struct denali_nand_info *denali)
 	 * bad block management.
 	 */
 
-	/* Bad block management */
-	chip->bbt_td = &bbt_main_descr;
-	chip->bbt_md = &bbt_mirror_descr;
-
-	/* skip the scan for now until we have OOB read and write support */
 	chip->bbt_options |= NAND_BBT_USE_FLASH;
-	chip->options |= NAND_SKIP_BBTSCAN;
+	chip->bbt_options |= NAND_BBT_NO_OOB;
+
 	chip->ecc.mode = NAND_ECC_HW_SYNDROME;
 
 	/* no subpage writes on denali */
-- 
GitLab


From 0d3a966d2b34f449df7859fa39e3db5b71da2bfa Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 16 Jun 2017 14:36:39 +0900
Subject: [PATCH 0314/1958] mtd: nand: denali: avoid magic numbers and rename
 for clarification

Introduce some macros and helpers to avoid magic numbers and
rename macros/functions for clarification.

- We see '| 2' in several places.  This means Data Cycle in MAP11 mode.
  The Denali User's Guide says bit[1:0] of MAP11 is like follows:

  b'00 = Command Cycle
  b'01 = Address Cycle
  b'10 = Data Cycle

  So, this commit added DENALI_MAP11_{CMD,ADDR,DATA} macros.

- We see 'denali->flash_mem + 0x10' in several places, but 0x10 is a
  magic number.  Actually, this accesses the data port of the Host
  Data/Command Interface.  So, this commit added DENALI_HOST_DATA.
  On the other hand, 'denali->flash_mem' gets access to the address
  port, so DENALI_HOST_ADDR was also added.

- We see 'index_addr(denali, cmd, 0x1)' in denali_erase(), but 0x1
  is a magic number.  0x1 means the erase operation.  Replace 0x1
  with DENALI_ERASE.

- Rename index_addr() to denali_host_write() for clarification

- Denali User's Guide says MAP{00,01,10,11} for access mode.  Match
  the macros with terminology in the IP document.

- Rename struct members as follows:
  flash_bank   -> active_bank    (currently selected bank)
  flash_reg    -> reg            (base address of registers)
  flash_mem    -> host           (base address of host interface)
  devnum       -> devs_per_cs    (devices connected in parallel)
  bbtskipbytes -> oob_skip_bytes (number of bytes to skip in OOB)

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/denali.c     | 247 +++++++++++++++++-----------------
 drivers/mtd/nand/denali.h     |  15 +--
 drivers/mtd/nand/denali_dt.c  |  12 +-
 drivers/mtd/nand/denali_pci.c |  16 +--
 4 files changed, 144 insertions(+), 146 deletions(-)

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 5ac3eb07c6d66..d723be3521482 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -31,12 +31,26 @@ MODULE_LICENSE("GPL");
 
 #define DENALI_NAND_NAME    "denali-nand"
 
-/*
- * indicates whether or not the internal value for the flash bank is
- * valid or not
- */
-#define CHIP_SELECT_INVALID	-1
+/* Host Data/Command Interface */
+#define DENALI_HOST_ADDR	0x00
+#define DENALI_HOST_DATA	0x10
+
+#define DENALI_MAP00		(0 << 26)	/* direct access to buffer */
+#define DENALI_MAP01		(1 << 26)	/* read/write pages in PIO */
+#define DENALI_MAP10		(2 << 26)	/* high-level control plane */
+#define DENALI_MAP11		(3 << 26)	/* direct controller access */
+
+/* MAP11 access cycle type */
+#define DENALI_MAP11_CMD	((DENALI_MAP11) | 0)	/* command cycle */
+#define DENALI_MAP11_ADDR	((DENALI_MAP11) | 1)	/* address cycle */
+#define DENALI_MAP11_DATA	((DENALI_MAP11) | 2)	/* data cycle */
+
+/* MAP10 commands */
+#define DENALI_ERASE		0x01
+
+#define DENALI_BANK(denali)	((denali)->active_bank << 24)
 
+#define DENALI_INVALID_BANK	-1
 #define DENALI_NR_BANKS		4
 
 /*
@@ -56,23 +70,11 @@ static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 	return container_of(mtd_to_nand(mtd), struct denali_nand_info, nand);
 }
 
-/*
- * this is a helper macro that allows us to
- * format the bank into the proper bits for the controller
- */
-#define BANK(x) ((x) << 24)
-
-/*
- * Certain operations for the denali NAND controller use an indexed mode to
- * read/write data. The operation is performed by writing the address value
- * of the command to the device memory followed by the data. This function
- * abstracts this common operation.
- */
-static void index_addr(struct denali_nand_info *denali,
-				uint32_t address, uint32_t data)
+static void denali_host_write(struct denali_nand_info *denali,
+			      uint32_t addr, uint32_t data)
 {
-	iowrite32(address, denali->flash_mem);
-	iowrite32(data, denali->flash_mem + 0x10);
+	iowrite32(addr, denali->host + DENALI_HOST_ADDR);
+	iowrite32(data, denali->host + DENALI_HOST_DATA);
 }
 
 /*
@@ -81,7 +83,7 @@ static void index_addr(struct denali_nand_info *denali,
  */
 static void detect_max_banks(struct denali_nand_info *denali)
 {
-	uint32_t features = ioread32(denali->flash_reg + FEATURES);
+	uint32_t features = ioread32(denali->reg + FEATURES);
 
 	denali->max_banks = 1 << (features & FEATURES__N_BANKS);
 
@@ -95,8 +97,8 @@ static void denali_enable_irq(struct denali_nand_info *denali)
 	int i;
 
 	for (i = 0; i < DENALI_NR_BANKS; i++)
-		iowrite32(U32_MAX, denali->flash_reg + INTR_EN(i));
-	iowrite32(GLOBAL_INT_EN_FLAG, denali->flash_reg + GLOBAL_INT_ENABLE);
+		iowrite32(U32_MAX, denali->reg + INTR_EN(i));
+	iowrite32(GLOBAL_INT_EN_FLAG, denali->reg + GLOBAL_INT_ENABLE);
 }
 
 static void denali_disable_irq(struct denali_nand_info *denali)
@@ -104,15 +106,15 @@ static void denali_disable_irq(struct denali_nand_info *denali)
 	int i;
 
 	for (i = 0; i < DENALI_NR_BANKS; i++)
-		iowrite32(0, denali->flash_reg + INTR_EN(i));
-	iowrite32(0, denali->flash_reg + GLOBAL_INT_ENABLE);
+		iowrite32(0, denali->reg + INTR_EN(i));
+	iowrite32(0, denali->reg + GLOBAL_INT_ENABLE);
 }
 
 static void denali_clear_irq(struct denali_nand_info *denali,
 			     int bank, uint32_t irq_status)
 {
 	/* write one to clear bits */
-	iowrite32(irq_status, denali->flash_reg + INTR_STATUS(bank));
+	iowrite32(irq_status, denali->reg + INTR_STATUS(bank));
 }
 
 static void denali_clear_irq_all(struct denali_nand_info *denali)
@@ -133,13 +135,13 @@ static irqreturn_t denali_isr(int irq, void *dev_id)
 	spin_lock(&denali->irq_lock);
 
 	for (i = 0; i < DENALI_NR_BANKS; i++) {
-		irq_status = ioread32(denali->flash_reg + INTR_STATUS(i));
+		irq_status = ioread32(denali->reg + INTR_STATUS(i));
 		if (irq_status)
 			ret = IRQ_HANDLED;
 
 		denali_clear_irq(denali, i, irq_status);
 
-		if (i != denali->flash_bank)
+		if (i != denali->active_bank)
 			continue;
 
 		denali->irq_status |= irq_status;
@@ -220,8 +222,8 @@ static void setup_ecc_for_xfer(struct denali_nand_info *denali, bool ecc_en,
 	transfer_spare_flag = transfer_spare ? TRANSFER_SPARE_REG__FLAG : 0;
 
 	/* Enable spare area/ECC per user's request. */
-	iowrite32(ecc_en_flag, denali->flash_reg + ECC_ENABLE);
-	iowrite32(transfer_spare_flag, denali->flash_reg + TRANSFER_SPARE_REG);
+	iowrite32(ecc_en_flag, denali->reg + ECC_ENABLE);
+	iowrite32(transfer_spare_flag, denali->reg + TRANSFER_SPARE_REG);
 }
 
 static void denali_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
@@ -229,10 +231,11 @@ static void denali_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	int i;
 
-	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+	iowrite32(DENALI_MAP11_DATA | DENALI_BANK(denali),
+		  denali->host + DENALI_HOST_ADDR);
 
 	for (i = 0; i < len; i++)
-		buf[i] = ioread32(denali->flash_mem + 0x10);
+		buf[i] = ioread32(denali->host + DENALI_HOST_DATA);
 }
 
 static void denali_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
@@ -240,10 +243,11 @@ static void denali_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	int i;
 
-	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+	iowrite32(DENALI_MAP11_DATA | DENALI_BANK(denali),
+		  denali->host + DENALI_HOST_ADDR);
 
 	for (i = 0; i < len; i++)
-		iowrite32(buf[i], denali->flash_mem + 0x10);
+		iowrite32(buf[i], denali->host + DENALI_HOST_DATA);
 }
 
 static void denali_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
@@ -252,10 +256,11 @@ static void denali_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
 	uint16_t *buf16 = (uint16_t *)buf;
 	int i;
 
-	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+	iowrite32(DENALI_MAP11_DATA | DENALI_BANK(denali),
+		  denali->host + DENALI_HOST_ADDR);
 
 	for (i = 0; i < len / 2; i++)
-		buf16[i] = ioread32(denali->flash_mem + 0x10);
+		buf16[i] = ioread32(denali->host + DENALI_HOST_DATA);
 }
 
 static void denali_write_buf16(struct mtd_info *mtd, const uint8_t *buf,
@@ -265,10 +270,11 @@ static void denali_write_buf16(struct mtd_info *mtd, const uint8_t *buf,
 	const uint16_t *buf16 = (const uint16_t *)buf;
 	int i;
 
-	iowrite32(MODE_11 | BANK(denali->flash_bank) | 2, denali->flash_mem);
+	iowrite32(DENALI_MAP11_DATA | DENALI_BANK(denali),
+		  denali->host + DENALI_HOST_ADDR);
 
 	for (i = 0; i < len / 2; i++)
-		iowrite32(buf16[i], denali->flash_mem + 0x10);
+		iowrite32(buf16[i], denali->host + DENALI_HOST_DATA);
 }
 
 static uint8_t denali_read_byte(struct mtd_info *mtd)
@@ -300,9 +306,9 @@ static void denali_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
 	uint32_t type;
 
 	if (ctrl & NAND_CLE)
-		type = 0;
+		type = DENALI_MAP11_CMD;
 	else if (ctrl & NAND_ALE)
-		type = 1;
+		type = DENALI_MAP11_ADDR;
 	else
 		return;
 
@@ -313,7 +319,7 @@ static void denali_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
 	if (ctrl & NAND_CTRL_CHANGE)
 		denali_reset_irq(denali);
 
-	index_addr(denali, MODE_11 | BANK(denali->flash_bank) | type, dat);
+	denali_host_write(denali, DENALI_BANK(denali) | type, dat);
 }
 
 static int denali_dev_ready(struct mtd_info *mtd)
@@ -366,11 +372,11 @@ static int denali_hw_ecc_fixup(struct mtd_info *mtd,
 			       unsigned long *uncor_ecc_flags)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
-	int bank = denali->flash_bank;
+	int bank = denali->active_bank;
 	uint32_t ecc_cor;
 	unsigned int max_bitflips;
 
-	ecc_cor = ioread32(denali->flash_reg + ECC_COR_INFO(bank));
+	ecc_cor = ioread32(denali->reg + ECC_COR_INFO(bank));
 	ecc_cor >>= ECC_COR_INFO__SHIFT(bank);
 
 	if (ecc_cor & ECC_COR_INFO__UNCOR_ERR) {
@@ -419,11 +425,11 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 	denali_reset_irq(denali);
 
 	do {
-		err_addr = ioread32(denali->flash_reg + ECC_ERROR_ADDRESS);
+		err_addr = ioread32(denali->reg + ECC_ERROR_ADDRESS);
 		err_sector = ECC_SECTOR(err_addr);
 		err_byte = ECC_BYTE(err_addr);
 
-		err_cor_info = ioread32(denali->flash_reg + ERR_CORRECTION_INFO);
+		err_cor_info = ioread32(denali->reg + ERR_CORRECTION_INFO);
 		err_cor_value = ECC_CORRECTION_VALUE(err_cor_info);
 		err_device = ECC_ERR_DEVICE(err_cor_info);
 
@@ -449,7 +455,7 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 			unsigned int flips_in_byte;
 
 			offset = (err_sector * ecc_size + err_byte) *
-						denali->devnum + err_device;
+					denali->devs_per_cs + err_device;
 
 			/* correct the ECC error */
 			flips_in_byte = hweight8(buf[offset] ^ err_cor_value);
@@ -478,8 +484,8 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 /* programs the controller to either enable/disable DMA transfers */
 static void denali_enable_dma(struct denali_nand_info *denali, bool en)
 {
-	iowrite32(en ? DMA_ENABLE__FLAG : 0, denali->flash_reg + DMA_ENABLE);
-	ioread32(denali->flash_reg + DMA_ENABLE);
+	iowrite32(en ? DMA_ENABLE__FLAG : 0, denali->reg + DMA_ENABLE);
+	ioread32(denali->reg + DMA_ENABLE);
 }
 
 static void denali_setup_dma64(struct denali_nand_info *denali,
@@ -488,7 +494,7 @@ static void denali_setup_dma64(struct denali_nand_info *denali,
 	uint32_t mode;
 	const int page_count = 1;
 
-	mode = MODE_10 | BANK(denali->flash_bank) | page;
+	mode = DENALI_MAP10 | DENALI_BANK(denali) | page;
 
 	/* DMA is a three step process */
 
@@ -496,14 +502,14 @@ static void denali_setup_dma64(struct denali_nand_info *denali,
 	 * 1. setup transfer type, interrupt when complete,
 	 *    burst len = 64 bytes, the number of pages
 	 */
-	index_addr(denali, mode,
-		   0x01002000 | (64 << 16) | (write << 8) | page_count);
+	denali_host_write(denali, mode,
+			  0x01002000 | (64 << 16) | (write << 8) | page_count);
 
 	/* 2. set memory low address */
-	index_addr(denali, mode, dma_addr);
+	denali_host_write(denali, mode, dma_addr);
 
 	/* 3. set memory high address */
-	index_addr(denali, mode, (uint64_t)dma_addr >> 32);
+	denali_host_write(denali, mode, (uint64_t)dma_addr >> 32);
 }
 
 static void denali_setup_dma32(struct denali_nand_info *denali,
@@ -512,21 +518,22 @@ static void denali_setup_dma32(struct denali_nand_info *denali,
 	uint32_t mode;
 	const int page_count = 1;
 
-	mode = MODE_10 | BANK(denali->flash_bank);
+	mode = DENALI_MAP10 | DENALI_BANK(denali);
 
 	/* DMA is a four step process */
 
 	/* 1. setup transfer type and # of pages */
-	index_addr(denali, mode | page, 0x2000 | (write << 8) | page_count);
+	denali_host_write(denali, mode | page,
+			  0x2000 | (write << 8) | page_count);
 
 	/* 2. set memory high address bits 23:8 */
-	index_addr(denali, mode | ((dma_addr >> 16) << 8), 0x2200);
+	denali_host_write(denali, mode | ((dma_addr >> 16) << 8), 0x2200);
 
 	/* 3. set memory low address bits 23:8 */
-	index_addr(denali, mode | ((dma_addr & 0xffff) << 8), 0x2300);
+	denali_host_write(denali, mode | ((dma_addr & 0xffff) << 8), 0x2300);
 
 	/* 4. interrupt when complete, burst len = 64 bytes */
-	index_addr(denali, mode | 0x14000, 0x2400);
+	denali_host_write(denali, mode | 0x14000, 0x2400);
 }
 
 static void denali_setup_dma(struct denali_nand_info *denali,
@@ -541,7 +548,7 @@ static void denali_setup_dma(struct denali_nand_info *denali,
 static int denali_pio_read(struct denali_nand_info *denali, void *buf,
 			   size_t size, int page, int raw)
 {
-	uint32_t addr = BANK(denali->flash_bank) | page;
+	uint32_t addr = DENALI_BANK(denali) | page;
 	uint32_t *buf32 = (uint32_t *)buf;
 	uint32_t irq_status, ecc_err_mask;
 	int i;
@@ -553,9 +560,9 @@ static int denali_pio_read(struct denali_nand_info *denali, void *buf,
 
 	denali_reset_irq(denali);
 
-	iowrite32(MODE_01 | addr, denali->flash_mem);
+	iowrite32(DENALI_MAP01 | addr, denali->host + DENALI_HOST_ADDR);
 	for (i = 0; i < size / 4; i++)
-		*buf32++ = ioread32(denali->flash_mem + 0x10);
+		*buf32++ = ioread32(denali->host + DENALI_HOST_DATA);
 
 	irq_status = denali_wait_for_irq(denali, INTR__PAGE_XFER_INC);
 	if (!(irq_status & INTR__PAGE_XFER_INC))
@@ -570,16 +577,16 @@ static int denali_pio_read(struct denali_nand_info *denali, void *buf,
 static int denali_pio_write(struct denali_nand_info *denali,
 			    const void *buf, size_t size, int page, int raw)
 {
-	uint32_t addr = BANK(denali->flash_bank) | page;
+	uint32_t addr = DENALI_BANK(denali) | page;
 	const uint32_t *buf32 = (uint32_t *)buf;
 	uint32_t irq_status;
 	int i;
 
 	denali_reset_irq(denali);
 
-	iowrite32(MODE_01 | addr, denali->flash_mem);
+	iowrite32(DENALI_MAP01 | addr, denali->host + DENALI_HOST_ADDR);
 	for (i = 0; i < size / 4; i++)
-		iowrite32(*buf32++, denali->flash_mem + 0x10);
+		iowrite32(*buf32++, denali->host + DENALI_HOST_DATA);
 
 	irq_status = denali_wait_for_irq(denali,
 				INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL);
@@ -672,7 +679,7 @@ static void denali_oob_xfer(struct mtd_info *mtd, struct nand_chip *chip,
 	int ecc_steps = chip->ecc.steps;
 	int ecc_size = chip->ecc.size;
 	int ecc_bytes = chip->ecc.bytes;
-	int oob_skip = denali->bbtskipbytes;
+	int oob_skip = denali->oob_skip_bytes;
 	size_t size = writesize + oobsize;
 	int i, pos, len;
 
@@ -730,7 +737,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	int ecc_size = chip->ecc.size;
 	int ecc_bytes = chip->ecc.bytes;
 	void *dma_buf = denali->buf;
-	int oob_skip = denali->bbtskipbytes;
+	int oob_skip = denali->oob_skip_bytes;
 	size_t size = writesize + oobsize;
 	int ret, i, pos, len;
 
@@ -861,7 +868,7 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	int ecc_size = chip->ecc.size;
 	int ecc_bytes = chip->ecc.bytes;
 	void *dma_buf = denali->buf;
-	int oob_skip = denali->bbtskipbytes;
+	int oob_skip = denali->oob_skip_bytes;
 	size_t size = writesize + oobsize;
 	int i, pos, len;
 
@@ -942,7 +949,7 @@ static void denali_select_chip(struct mtd_info *mtd, int chip)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
-	denali->flash_bank = chip;
+	denali->active_bank = chip;
 }
 
 static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
@@ -959,13 +966,12 @@ static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
 static int denali_erase(struct mtd_info *mtd, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t cmd, irq_status;
+	uint32_t irq_status;
 
 	denali_reset_irq(denali);
 
-	/* setup page read request for access type */
-	cmd = MODE_10 | BANK(denali->flash_bank) | page;
-	index_addr(denali, cmd, 0x1);
+	denali_host_write(denali, DENALI_MAP10 | DENALI_BANK(denali) | page,
+			  DENALI_ERASE);
 
 	/* wait for erase to complete or failure to occur */
 	irq_status = denali_wait_for_irq(denali,
@@ -1004,37 +1010,37 @@ static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
 	acc_clks = DIV_ROUND_UP(timings->tREA_max, t_clk);
 	acc_clks = min_t(int, acc_clks, ACC_CLKS__VALUE);
 
-	tmp = ioread32(denali->flash_reg + ACC_CLKS);
+	tmp = ioread32(denali->reg + ACC_CLKS);
 	tmp &= ~ACC_CLKS__VALUE;
 	tmp |= acc_clks;
-	iowrite32(tmp, denali->flash_reg + ACC_CLKS);
+	iowrite32(tmp, denali->reg + ACC_CLKS);
 
 	/* tRWH -> RE_2_WE */
 	re_2_we = DIV_ROUND_UP(timings->tRHW_min, t_clk);
 	re_2_we = min_t(int, re_2_we, RE_2_WE__VALUE);
 
-	tmp = ioread32(denali->flash_reg + RE_2_WE);
+	tmp = ioread32(denali->reg + RE_2_WE);
 	tmp &= ~RE_2_WE__VALUE;
 	tmp |= re_2_we;
-	iowrite32(tmp, denali->flash_reg + RE_2_WE);
+	iowrite32(tmp, denali->reg + RE_2_WE);
 
 	/* tRHZ -> RE_2_RE */
 	re_2_re = DIV_ROUND_UP(timings->tRHZ_max, t_clk);
 	re_2_re = min_t(int, re_2_re, RE_2_RE__VALUE);
 
-	tmp = ioread32(denali->flash_reg + RE_2_RE);
+	tmp = ioread32(denali->reg + RE_2_RE);
 	tmp &= ~RE_2_RE__VALUE;
 	tmp |= re_2_re;
-	iowrite32(tmp, denali->flash_reg + RE_2_RE);
+	iowrite32(tmp, denali->reg + RE_2_RE);
 
 	/* tWHR -> WE_2_RE */
 	we_2_re = DIV_ROUND_UP(timings->tWHR_min, t_clk);
 	we_2_re = min_t(int, we_2_re, TWHR2_AND_WE_2_RE__WE_2_RE);
 
-	tmp = ioread32(denali->flash_reg + TWHR2_AND_WE_2_RE);
+	tmp = ioread32(denali->reg + TWHR2_AND_WE_2_RE);
 	tmp &= ~TWHR2_AND_WE_2_RE__WE_2_RE;
 	tmp |= we_2_re;
-	iowrite32(tmp, denali->flash_reg + TWHR2_AND_WE_2_RE);
+	iowrite32(tmp, denali->reg + TWHR2_AND_WE_2_RE);
 
 	/* tADL -> ADDR_2_DATA */
 
@@ -1046,20 +1052,20 @@ static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
 	addr_2_data = DIV_ROUND_UP(timings->tADL_min, t_clk);
 	addr_2_data = min_t(int, addr_2_data, addr_2_data_mask);
 
-	tmp = ioread32(denali->flash_reg + TCWAW_AND_ADDR_2_DATA);
+	tmp = ioread32(denali->reg + TCWAW_AND_ADDR_2_DATA);
 	tmp &= ~addr_2_data_mask;
 	tmp |= addr_2_data;
-	iowrite32(tmp, denali->flash_reg + TCWAW_AND_ADDR_2_DATA);
+	iowrite32(tmp, denali->reg + TCWAW_AND_ADDR_2_DATA);
 
 	/* tREH, tWH -> RDWR_EN_HI_CNT */
 	rdwr_en_hi = DIV_ROUND_UP(max(timings->tREH_min, timings->tWH_min),
 				  t_clk);
 	rdwr_en_hi = min_t(int, rdwr_en_hi, RDWR_EN_HI_CNT__VALUE);
 
-	tmp = ioread32(denali->flash_reg + RDWR_EN_HI_CNT);
+	tmp = ioread32(denali->reg + RDWR_EN_HI_CNT);
 	tmp &= ~RDWR_EN_HI_CNT__VALUE;
 	tmp |= rdwr_en_hi;
-	iowrite32(tmp, denali->flash_reg + RDWR_EN_HI_CNT);
+	iowrite32(tmp, denali->reg + RDWR_EN_HI_CNT);
 
 	/* tRP, tWP -> RDWR_EN_LO_CNT */
 	rdwr_en_lo = DIV_ROUND_UP(max(timings->tRP_min, timings->tWP_min),
@@ -1070,10 +1076,10 @@ static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
 	rdwr_en_lo = max(rdwr_en_lo, rdwr_en_lo_hi - rdwr_en_hi);
 	rdwr_en_lo = min_t(int, rdwr_en_lo, RDWR_EN_LO_CNT__VALUE);
 
-	tmp = ioread32(denali->flash_reg + RDWR_EN_LO_CNT);
+	tmp = ioread32(denali->reg + RDWR_EN_LO_CNT);
 	tmp &= ~RDWR_EN_LO_CNT__VALUE;
 	tmp |= rdwr_en_lo;
-	iowrite32(tmp, denali->flash_reg + RDWR_EN_LO_CNT);
+	iowrite32(tmp, denali->reg + RDWR_EN_LO_CNT);
 
 	/* tCS, tCEA -> CS_SETUP_CNT */
 	cs_setup = max3((int)DIV_ROUND_UP(timings->tCS_min, t_clk) - rdwr_en_lo,
@@ -1081,10 +1087,10 @@ static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
 			0);
 	cs_setup = min_t(int, cs_setup, CS_SETUP_CNT__VALUE);
 
-	tmp = ioread32(denali->flash_reg + CS_SETUP_CNT);
+	tmp = ioread32(denali->reg + CS_SETUP_CNT);
 	tmp &= ~CS_SETUP_CNT__VALUE;
 	tmp |= cs_setup;
-	iowrite32(tmp, denali->flash_reg + CS_SETUP_CNT);
+	iowrite32(tmp, denali->reg + CS_SETUP_CNT);
 
 	return 0;
 }
@@ -1095,12 +1101,12 @@ static void denali_reset_banks(struct denali_nand_info *denali)
 	int i;
 
 	for (i = 0; i < denali->max_banks; i++) {
-		denali->flash_bank = i;
+		denali->active_bank = i;
 
 		denali_reset_irq(denali);
 
 		iowrite32(DEVICE_RESET__BANK(i),
-			  denali->flash_reg + DEVICE_RESET);
+			  denali->reg + DEVICE_RESET);
 
 		irq_status = denali_wait_for_irq(denali,
 			INTR__RST_COMP | INTR__INT_ACT | INTR__TIME_OUT);
@@ -1119,8 +1125,7 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	 * override it.
 	 */
 	if (!denali->revision)
-		denali->revision =
-				swab16(ioread32(denali->flash_reg + REVISION));
+		denali->revision = swab16(ioread32(denali->reg + REVISION));
 
 	/*
 	 * tell driver how many bit controller will skip before
@@ -1128,18 +1133,16 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	 * set by firmware. So we read this value out.
 	 * if this value is 0, just let it be.
 	 */
-	denali->bbtskipbytes = ioread32(denali->flash_reg +
-						SPARE_AREA_SKIP_BYTES);
+	denali->oob_skip_bytes = ioread32(denali->reg + SPARE_AREA_SKIP_BYTES);
 	detect_max_banks(denali);
-	iowrite32(0x0F, denali->flash_reg + RB_PIN_ENABLED);
-	iowrite32(CHIP_EN_DONT_CARE__FLAG,
-			denali->flash_reg + CHIP_ENABLE_DONT_CARE);
+	iowrite32(0x0F, denali->reg + RB_PIN_ENABLED);
+	iowrite32(CHIP_EN_DONT_CARE__FLAG, denali->reg + CHIP_ENABLE_DONT_CARE);
 
-	iowrite32(0xffff, denali->flash_reg + SPARE_AREA_MARKER);
+	iowrite32(0xffff, denali->reg + SPARE_AREA_MARKER);
 
 	/* Should set value for these registers when init */
-	iowrite32(0, denali->flash_reg + TWO_ROW_ADDR_CYCLES);
-	iowrite32(1, denali->flash_reg + ECC_ENABLE);
+	iowrite32(0, denali->reg + TWO_ROW_ADDR_CYCLES);
+	iowrite32(1, denali->reg + ECC_ENABLE);
 }
 
 int denali_calc_ecc_bytes(int step_size, int strength)
@@ -1152,7 +1155,7 @@ EXPORT_SYMBOL(denali_calc_ecc_bytes);
 static int denali_ecc_setup(struct mtd_info *mtd, struct nand_chip *chip,
 			    struct denali_nand_info *denali)
 {
-	int oobavail = mtd->oobsize - denali->bbtskipbytes;
+	int oobavail = mtd->oobsize - denali->oob_skip_bytes;
 	int ret;
 
 	/*
@@ -1185,7 +1188,7 @@ static int denali_ooblayout_ecc(struct mtd_info *mtd, int section,
 	if (section)
 		return -ERANGE;
 
-	oobregion->offset = denali->bbtskipbytes;
+	oobregion->offset = denali->oob_skip_bytes;
 	oobregion->length = chip->ecc.total;
 
 	return 0;
@@ -1200,7 +1203,7 @@ static int denali_ooblayout_free(struct mtd_info *mtd, int section,
 	if (section)
 		return -ERANGE;
 
-	oobregion->offset = chip->ecc.total + denali->bbtskipbytes;
+	oobregion->offset = chip->ecc.total + denali->oob_skip_bytes;
 	oobregion->length = mtd->oobsize - oobregion->offset;
 
 	return 0;
@@ -1239,23 +1242,23 @@ static int denali_multidev_fixup(struct denali_nand_info *denali)
 	 * In this case, the core framework knows nothing about this fact,
 	 * so we should tell it the _logical_ pagesize and anything necessary.
 	 */
-	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
+	denali->devs_per_cs = ioread32(denali->reg + DEVICES_CONNECTED);
 
 	/*
 	 * On some SoCs, DEVICES_CONNECTED is not auto-detected.
 	 * For those, DEVICES_CONNECTED is left to 0.  Set 1 if it is the case.
 	 */
-	if (denali->devnum == 0) {
-		denali->devnum = 1;
-		iowrite32(1, denali->flash_reg + DEVICES_CONNECTED);
+	if (denali->devs_per_cs == 0) {
+		denali->devs_per_cs = 1;
+		iowrite32(1, denali->reg + DEVICES_CONNECTED);
 	}
 
-	if (denali->devnum == 1)
+	if (denali->devs_per_cs == 1)
 		return 0;
 
-	if (denali->devnum != 2) {
+	if (denali->devs_per_cs != 2) {
 		dev_err(denali->dev, "unsupported number of devices %d\n",
-			denali->devnum);
+			denali->devs_per_cs);
 		return -EINVAL;
 	}
 
@@ -1273,7 +1276,7 @@ static int denali_multidev_fixup(struct denali_nand_info *denali)
 	chip->ecc.size <<= 1;
 	chip->ecc.bytes <<= 1;
 	chip->ecc.strength <<= 1;
-	denali->bbtskipbytes <<= 1;
+	denali->oob_skip_bytes <<= 1;
 
 	return 0;
 }
@@ -1301,7 +1304,7 @@ int denali_init(struct denali_nand_info *denali)
 	denali_enable_irq(denali);
 	denali_reset_banks(denali);
 
-	denali->flash_bank = CHIP_SELECT_INVALID;
+	denali->active_bank = DENALI_INVALID_BANK;
 
 	nand_set_flash_node(chip, denali->dev->of_node);
 	/* Fallback to the default name if DT did not give "label" property */
@@ -1330,7 +1333,7 @@ int denali_init(struct denali_nand_info *denali)
 	if (ret)
 		goto disable_irq;
 
-	if (ioread32(denali->flash_reg + FEATURES) & FEATURES__DMA)
+	if (ioread32(denali->reg + FEATURES) & FEATURES__DMA)
 		denali->dma_avail = 1;
 
 	if (denali->dma_avail) {
@@ -1374,19 +1377,19 @@ int denali_init(struct denali_nand_info *denali)
 		chip->ecc.size, chip->ecc.strength, chip->ecc.bytes);
 
 	iowrite32(MAKE_ECC_CORRECTION(chip->ecc.strength, 1),
-		  denali->flash_reg + ECC_CORRECTION);
+		  denali->reg + ECC_CORRECTION);
 	iowrite32(mtd->erasesize / mtd->writesize,
-		  denali->flash_reg + PAGES_PER_BLOCK);
+		  denali->reg + PAGES_PER_BLOCK);
 	iowrite32(chip->options & NAND_BUSWIDTH_16 ? 1 : 0,
-		  denali->flash_reg + DEVICE_WIDTH);
-	iowrite32(mtd->writesize, denali->flash_reg + DEVICE_MAIN_AREA_SIZE);
-	iowrite32(mtd->oobsize, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
+		  denali->reg + DEVICE_WIDTH);
+	iowrite32(mtd->writesize, denali->reg + DEVICE_MAIN_AREA_SIZE);
+	iowrite32(mtd->oobsize, denali->reg + DEVICE_SPARE_AREA_SIZE);
 
-	iowrite32(chip->ecc.size, denali->flash_reg + CFG_DATA_BLOCK_SIZE);
-	iowrite32(chip->ecc.size, denali->flash_reg + CFG_LAST_DATA_BLOCK_SIZE);
+	iowrite32(chip->ecc.size, denali->reg + CFG_DATA_BLOCK_SIZE);
+	iowrite32(chip->ecc.size, denali->reg + CFG_LAST_DATA_BLOCK_SIZE);
 	/* chip->ecc.steps is set by nand_scan_tail(); not available here */
 	iowrite32(mtd->writesize / chip->ecc.size,
-		  denali->flash_reg + CFG_NUM_DATA_BLOCKS);
+		  denali->reg + CFG_NUM_DATA_BLOCKS);
 
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
 
diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index 657a794af695e..237cc706b0fb4 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h
@@ -303,18 +303,13 @@
 #define     CHNL_ACTIVE__CHANNEL2			BIT(2)
 #define     CHNL_ACTIVE__CHANNEL3			BIT(3)
 
-#define MODE_00    0x00000000
-#define MODE_01    0x04000000
-#define MODE_10    0x08000000
-#define MODE_11    0x0C000000
-
 struct denali_nand_info {
 	struct nand_chip nand;
 	unsigned long clk_x_rate;	/* bus interface clock rate */
-	int flash_bank; /* currently selected chip */
+	int active_bank;		/* currently selected bank */
 	struct device *dev;
-	void __iomem *flash_reg;	/* Register Interface */
-	void __iomem *flash_mem;	/* Host Data/Command Interface */
+	void __iomem *reg;		/* Register Interface */
+	void __iomem *host;		/* Host Data/Command Interface */
 
 	/* elements used by ISR */
 	struct completion complete;
@@ -326,8 +321,8 @@ struct denali_nand_info {
 	void *buf;
 	dma_addr_t dma_addr;
 	int dma_avail;
-	int devnum;	/* represent how many nands connected */
-	int bbtskipbytes;
+	int devs_per_cs;		/* devices connected in parallel */
+	int oob_skip_bytes;
 	int max_banks;
 	unsigned int revision;
 	unsigned int caps;
diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index ebcce50f4005e..47f398edf18f4 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c
@@ -104,14 +104,14 @@ static int denali_dt_probe(struct platform_device *pdev)
 	}
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "denali_reg");
-	denali->flash_reg = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(denali->flash_reg))
-		return PTR_ERR(denali->flash_reg);
+	denali->reg = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(denali->reg))
+		return PTR_ERR(denali->reg);
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
-	denali->flash_mem = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(denali->flash_mem))
-		return PTR_ERR(denali->flash_mem);
+	denali->host = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(denali->host))
+		return PTR_ERR(denali->host);
 
 	dt->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dt->clk)) {
diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c
index 6217525c10005..81370c79aa48a 100644
--- a/drivers/mtd/nand/denali_pci.c
+++ b/drivers/mtd/nand/denali_pci.c
@@ -78,14 +78,14 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return ret;
 	}
 
-	denali->flash_reg = ioremap_nocache(csr_base, csr_len);
-	if (!denali->flash_reg) {
+	denali->reg = ioremap_nocache(csr_base, csr_len);
+	if (!denali->reg) {
 		dev_err(&dev->dev, "Spectra: Unable to remap memory region\n");
 		return -ENOMEM;
 	}
 
-	denali->flash_mem = ioremap_nocache(mem_base, mem_len);
-	if (!denali->flash_mem) {
+	denali->host = ioremap_nocache(mem_base, mem_len);
+	if (!denali->host) {
 		dev_err(&dev->dev, "Spectra: ioremap_nocache failed!");
 		ret = -ENOMEM;
 		goto failed_remap_reg;
@@ -100,9 +100,9 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	return 0;
 
 failed_remap_mem:
-	iounmap(denali->flash_mem);
+	iounmap(denali->host);
 failed_remap_reg:
-	iounmap(denali->flash_reg);
+	iounmap(denali->reg);
 	return ret;
 }
 
@@ -112,8 +112,8 @@ static void denali_pci_remove(struct pci_dev *dev)
 	struct denali_nand_info *denali = pci_get_drvdata(dev);
 
 	denali_remove(denali);
-	iounmap(denali->flash_reg);
-	iounmap(denali->flash_mem);
+	iounmap(denali->reg);
+	iounmap(denali->host);
 }
 
 static struct pci_driver denali_pci_driver = {
-- 
GitLab


From d1ab0da84dcbac30a9039ccef72d69bf0a68bfc7 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>
Date: Fri, 9 Jun 2017 16:27:21 +0530
Subject: [PATCH 0315/1958] mtd: nand: ifc: Initialize SRAM for all version >=
 1.0

All IFC version >= 1.0 use 28nm technology for SRAM. Here SRAM has
a requirement to initialize before any read operation performed for
avoiding ECC Error.

So update condition check to initialize SRAM for all IFC version >= 1.0.0

Signed-off-by: Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/fsl_ifc_nand.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index d1c4538f870f9..59408ec2c69f2 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -913,7 +913,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 		chip->ecc.algo = NAND_ECC_HAMMING;
 	}
 
-	if (ctrl->version == FSL_IFC_VERSION_1_1_0)
+	if (ctrl->version >= FSL_IFC_VERSION_1_1_0)
 		fsl_ifc_sram_init(priv);
 
 	return 0;
-- 
GitLab


From d29cb3e45e923715f74d8a08d5c1ea996dce5a59 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:12 -0700
Subject: [PATCH 0316/1958] xfs: make _bmap_count_blocks consistent wrt
 delalloc extent behavior

There is an inconsistency in the way that _bmap_count_blocks deals with
delalloc reservations -- if the specified fork is in extents format,
*count is set to the total number of blocks referenced by the in-core
fork, including delalloc extents.  However, if the fork is in btree
format, *count is set to the number of blocks referenced by the on-disk
fork, which does /not/ include delalloc extents.

For the lone existing caller of _bmap_count_blocks this hasn't been an
issue because the function is only used to count xattr fork blocks
(where there aren't any delalloc reservations).  However, when scrub
comes along it will use this same function to check di_nblocks against
both on-disk extent maps, so we need this behavior to be consistent.

Therefore, fix _bmap_count_leaves not to include delalloc extents and
remove unnecessary parameters.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_bmap_util.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 863180c418584..424e3bd127004 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -219,20 +219,22 @@ xfs_bmap_eof(
  */
 
 /*
- * Count leaf blocks given a range of extent records.
+ * Count leaf blocks given a range of extent records.  Delayed allocation
+ * extents are not counted towards the totals.
  */
 STATIC void
 xfs_bmap_count_leaves(
-	xfs_ifork_t		*ifp,
-	xfs_extnum_t		idx,
-	int			numrecs,
+	struct xfs_ifork	*ifp,
 	int			*count)
 {
-	int		b;
+	xfs_extnum_t		i;
+	xfs_extnum_t		nr_exts = xfs_iext_count(ifp);
 
-	for (b = 0; b < numrecs; b++) {
-		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
-		*count += xfs_bmbt_get_blockcount(frp);
+	for (i = 0; i < nr_exts; i++) {
+		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, i);
+		if (!isnullstartblock(xfs_bmbt_get_startblock(frp))) {
+			*count += xfs_bmbt_get_blockcount(frp);
+		}
 	}
 }
 
@@ -334,7 +336,8 @@ xfs_bmap_count_tree(
 }
 
 /*
- * Count fsblocks of the given fork.
+ * Count fsblocks of the given fork.  Delayed allocation extents are
+ * not counted towards the totals.
  */
 static int					/* error */
 xfs_bmap_count_blocks(
@@ -354,7 +357,7 @@ xfs_bmap_count_blocks(
 	mp = ip->i_mount;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
-		xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
+		xfs_bmap_count_leaves(ifp, count);
 		return 0;
 	}
 
-- 
GitLab


From e7f5d5ca36e29fa3c9702f236b0ab73f63336080 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:12 -0700
Subject: [PATCH 0317/1958] xfs: refactor the ifork block counting function

Refactor the inode fork block counting function to count extents for us
at the same time.  This will be used by the bmbt scrubber function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_bmap_util.c | 109 +++++++++++++++++++++++++----------------
 fs/xfs/xfs_bmap_util.h |   4 ++
 2 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 424e3bd127004..93e955262d07e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -225,7 +225,8 @@ xfs_bmap_eof(
 STATIC void
 xfs_bmap_count_leaves(
 	struct xfs_ifork	*ifp,
-	int			*count)
+	xfs_extnum_t		*numrecs,
+	xfs_filblks_t		*count)
 {
 	xfs_extnum_t		i;
 	xfs_extnum_t		nr_exts = xfs_iext_count(ifp);
@@ -233,6 +234,7 @@ xfs_bmap_count_leaves(
 	for (i = 0; i < nr_exts; i++) {
 		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, i);
 		if (!isnullstartblock(xfs_bmbt_get_startblock(frp))) {
+			(*numrecs)++;
 			*count += xfs_bmbt_get_blockcount(frp);
 		}
 	}
@@ -247,7 +249,7 @@ xfs_bmap_disk_count_leaves(
 	struct xfs_mount	*mp,
 	struct xfs_btree_block	*block,
 	int			numrecs,
-	int			*count)
+	xfs_filblks_t		*count)
 {
 	int		b;
 	xfs_bmbt_rec_t	*frp;
@@ -262,17 +264,18 @@ xfs_bmap_disk_count_leaves(
  * Recursively walks each level of a btree
  * to count total fsblocks in use.
  */
-STATIC int                                     /* error */
+STATIC int
 xfs_bmap_count_tree(
-	xfs_mount_t     *mp,            /* file system mount point */
-	xfs_trans_t     *tp,            /* transaction pointer */
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_fsblock_t   blockno,	/* file system block number */
-	int             levelin,	/* level in btree */
-	int		*count)		/* Count of blocks */
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct xfs_ifork	*ifp,
+	xfs_fsblock_t		blockno,
+	int			levelin,
+	xfs_extnum_t		*nextents,
+	xfs_filblks_t		*count)
 {
 	int			error;
-	xfs_buf_t		*bp, *nbp;
+	struct xfs_buf		*bp, *nbp;
 	int			level = levelin;
 	__be64			*pp;
 	xfs_fsblock_t           bno = blockno;
@@ -305,8 +308,9 @@ xfs_bmap_count_tree(
 		/* Dive to the next level */
 		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 		bno = be64_to_cpu(*pp);
-		if (unlikely((error =
-		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
+		error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents,
+				count);
+		if (error) {
 			xfs_trans_brelse(tp, bp);
 			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
 					 XFS_ERRLEVEL_LOW, mp);
@@ -318,6 +322,7 @@ xfs_bmap_count_tree(
 		for (;;) {
 			nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 			numrecs = be16_to_cpu(block->bb_numrecs);
+			(*nextents) += numrecs;
 			xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
 			xfs_trans_brelse(tp, bp);
 			if (nextbno == NULLFSBLOCK)
@@ -339,44 +344,61 @@ xfs_bmap_count_tree(
  * Count fsblocks of the given fork.  Delayed allocation extents are
  * not counted towards the totals.
  */
-static int					/* error */
+int
 xfs_bmap_count_blocks(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_inode_t		*ip,		/* incore inode */
-	int			whichfork,	/* data or attr fork */
-	int			*count)		/* out: count of blocks */
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	int			whichfork,
+	xfs_extnum_t		*nextents,
+	xfs_filblks_t		*count)
 {
+	struct xfs_mount	*mp;	/* file system mount structure */
+	__be64			*pp;	/* pointer to block address */
 	struct xfs_btree_block	*block;	/* current btree block */
+	struct xfs_ifork	*ifp;	/* fork structure */
 	xfs_fsblock_t		bno;	/* block # of "block" */
-	xfs_ifork_t		*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
-	xfs_mount_t		*mp;	/* file system mount structure */
-	__be64			*pp;	/* pointer to block address */
+	int			error;
 
 	bno = NULLFSBLOCK;
 	mp = ip->i_mount;
+	*nextents = 0;
+	*count = 0;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
-		xfs_bmap_count_leaves(ifp, count);
+	if (!ifp)
 		return 0;
-	}
 
-	/*
-	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
-	 */
-	block = ifp->if_broot;
-	level = be16_to_cpu(block->bb_level);
-	ASSERT(level > 0);
-	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
-	bno = be64_to_cpu(*pp);
-	ASSERT(bno != NULLFSBLOCK);
-	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
-	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
-
-	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
-		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
-				 mp);
-		return -EFSCORRUPTED;
+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	case XFS_DINODE_FMT_EXTENTS:
+		xfs_bmap_count_leaves(ifp, nextents, count);
+		return 0;
+	case XFS_DINODE_FMT_BTREE:
+		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+			error = xfs_iread_extents(tp, ip, whichfork);
+			if (error)
+				return error;
+		}
+
+		/*
+		 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+		 */
+		block = ifp->if_broot;
+		level = be16_to_cpu(block->bb_level);
+		ASSERT(level > 0);
+		pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+		bno = be64_to_cpu(*pp);
+		ASSERT(bno != NULLFSBLOCK);
+		ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+		ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
+		error = xfs_bmap_count_tree(mp, tp, ifp, bno, level,
+				nextents, count);
+		if (error) {
+			XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)",
+					XFS_ERRLEVEL_LOW, mp);
+			return -EFSCORRUPTED;
+		}
+		return 0;
 	}
 
 	return 0;
@@ -1792,8 +1814,9 @@ xfs_swap_extent_forks(
 	int			*target_log_flags)
 {
 	struct xfs_ifork	tempifp, *ifp, *tifp;
-	int			aforkblks = 0;
-	int			taforkblks = 0;
+	xfs_filblks_t		aforkblks = 0;
+	xfs_filblks_t		taforkblks = 0;
+	xfs_extnum_t		junk;
 	xfs_extnum_t		nextents;
 	uint64_t		tmp;
 	int			error;
@@ -1803,14 +1826,14 @@ xfs_swap_extent_forks(
 	 */
 	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
 	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
-		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK,
+		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
 				&aforkblks);
 		if (error)
 			return error;
 	}
 	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
 	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
-		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
+		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
 				&taforkblks);
 		if (error)
 			return error;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 135d8267e2846..0cede10435717 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -70,4 +70,8 @@ int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
 
 xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
 
+int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+			  int whichfork, xfs_extnum_t *nextents,
+			  xfs_filblks_t *count);
+
 #endif	/* __XFS_BMAP_UTIL_H__ */
-- 
GitLab


From 8e8877e6edf2b593fe488b6efa8b3b7cfba96738 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:13 -0700
Subject: [PATCH 0318/1958] xfs: return the hash value of a leaf1 directory
 block

Modify the existing dir leafn lasthash function to enable us to
calculate the highest hash value of a leaf1 block.  This will be used by
the directory scrubbing code to check the sanity of hashes in leaf1
directory blocks.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_da_btree.c  | 10 +++++-----
 fs/xfs/libxfs/xfs_dir2_node.c | 10 ++++++----
 fs/xfs/libxfs/xfs_dir2_priv.h |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 48f1136b80bf4..356f21d5cd357 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1282,7 +1282,7 @@ xfs_da3_fixhashpath(
 			return;
 		break;
 	case XFS_DIR2_LEAFN_MAGIC:
-		lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);
+		lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
 		if (count == 0)
 			return;
 		break;
@@ -1502,8 +1502,8 @@ xfs_da3_node_lookup_int(
 		if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
 		    blk->magic == XFS_DIR3_LEAFN_MAGIC) {
 			blk->magic = XFS_DIR2_LEAFN_MAGIC;
-			blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
-							       blk->bp, NULL);
+			blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
+							      blk->bp, NULL);
 			break;
 		}
 
@@ -1929,8 +1929,8 @@ xfs_da3_path_shift(
 			blk->magic = XFS_DIR2_LEAFN_MAGIC;
 			ASSERT(level == path->active-1);
 			blk->index = 0;
-			blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
-							       blk->bp, NULL);
+			blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
+							      blk->bp, NULL);
 			break;
 		default:
 			ASSERT(0);
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index bbd1238852b3c..682e2bf370c72 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -528,7 +528,7 @@ xfs_dir2_free_hdr_check(
  * Stale entries are ok.
  */
 xfs_dahash_t					/* hash value */
-xfs_dir2_leafn_lasthash(
+xfs_dir2_leaf_lasthash(
 	struct xfs_inode *dp,
 	struct xfs_buf	*bp,			/* leaf buffer */
 	int		*count)			/* count of entries in leaf */
@@ -540,7 +540,9 @@ xfs_dir2_leafn_lasthash(
 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
 
 	ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
-	       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
+	       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
+	       leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
+	       leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
 
 	if (count)
 		*count = leafhdr.count;
@@ -1405,8 +1407,8 @@ xfs_dir2_leafn_split(
 	/*
 	 * Update last hashval in each block since we added the name.
 	 */
-	oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL);
-	newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL);
+	oldblk->hashval = xfs_dir2_leaf_lasthash(dp, oldblk->bp, NULL);
+	newblk->hashval = xfs_dir2_leaf_lasthash(dp, newblk->bp, NULL);
 	xfs_dir3_leaf_check(dp, oldblk->bp);
 	xfs_dir3_leaf_check(dp, newblk->bp);
 	return error;
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 576f2d267fa73..6d2420974d4f9 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -95,7 +95,7 @@ extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
 /* xfs_dir2_node.c */
 extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
 		struct xfs_buf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
+extern xfs_dahash_t xfs_dir2_leaf_lasthash(struct xfs_inode *dp,
 		struct xfs_buf *bp, int *count);
 extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
 		struct xfs_da_args *args, int *indexp,
-- 
GitLab


From acb9553cab552cf17154814f079f54401eefa474 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:14 -0700
Subject: [PATCH 0319/1958] xfs: pass along transaction context when reading
 directory block buffers

Teach the directory reading functions to pass along a transaction context
if one was supplied.  The directory scrub code will use transactions to
lock buffers and avoid deadlocking with itself in the case of loops.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_dir2_priv.h |  4 ++--
 fs/xfs/xfs_dir2_readdir.c     | 15 +++++++++++----
 fs/xfs/xfs_file.c             |  2 +-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 6d2420974d4f9..4badd26c47e64 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -130,7 +130,7 @@ extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
 extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
 
 /* xfs_dir2_readdir.c */
-extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
-		       size_t bufsize);
+extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
+		       struct dir_context *ctx, size_t bufsize);
 
 #endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index ede4790753bc1..ba2638d370315 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -170,7 +170,7 @@ xfs_dir2_block_getdents(
 		return 0;
 
 	lock_mode = xfs_ilock_data_map_shared(dp);
-	error = xfs_dir3_block_read(NULL, dp, &bp);
+	error = xfs_dir3_block_read(args->trans, dp, &bp);
 	xfs_iunlock(dp, lock_mode);
 	if (error)
 		return error;
@@ -228,7 +228,7 @@ xfs_dir2_block_getdents(
 		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
 			    be64_to_cpu(dep->inumber),
 			    xfs_dir3_get_dtype(dp->i_mount, filetype))) {
-			xfs_trans_brelse(NULL, bp);
+			xfs_trans_brelse(args->trans, bp);
 			return 0;
 		}
 	}
@@ -239,7 +239,7 @@ xfs_dir2_block_getdents(
 	 */
 	ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
 								0x7fffffff;
-	xfs_trans_brelse(NULL, bp);
+	xfs_trans_brelse(args->trans, bp);
 	return 0;
 }
 
@@ -495,15 +495,21 @@ xfs_dir2_leaf_getdents(
 	else
 		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
 	if (bp)
-		xfs_trans_brelse(NULL, bp);
+		xfs_trans_brelse(args->trans, bp);
 	return error;
 }
 
 /*
  * Read a directory.
+ *
+ * If supplied, the transaction collects locked dir buffers to avoid
+ * nested buffer deadlocks.  This function does not dirty the
+ * transaction.  The caller should ensure that the inode is locked
+ * before calling this function.
  */
 int
 xfs_readdir(
+	struct xfs_trans	*tp,
 	struct xfs_inode	*dp,
 	struct dir_context	*ctx,
 	size_t			bufsize)
@@ -522,6 +528,7 @@ xfs_readdir(
 
 	args.dp = dp;
 	args.geo = dp->i_mount->m_dir_geo;
+	args.trans = tp;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_getdents(&args, ctx);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5fb5a0958a148..36c129303fcfd 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -950,7 +950,7 @@ xfs_file_readdir(
 	 */
 	bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
 
-	return xfs_readdir(ip, ctx, bufsize);
+	return xfs_readdir(NULL, ip, ctx, bufsize);
 }
 
 /*
-- 
GitLab


From ad017f6537dee30a67b89f937a16e2f6c82e3774 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:14 -0700
Subject: [PATCH 0320/1958] xfs: pass along transaction context when reading
 xattr block buffers

Teach the extended attribute reading functions to pass along a
transaction context if one was supplied.  The extended attribute scrub
code will use transactions to lock buffers and avoid deadlocking with
itself in the case of loops; since it will already have the inode
locked, also create xattr get/list helpers that don't take locks.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 26 ++++++++++-----
 fs/xfs/libxfs/xfs_attr_remote.c |  5 +--
 fs/xfs/xfs_attr.h               |  3 ++
 fs/xfs/xfs_attr_list.c          | 59 ++++++++++++++++++---------------
 4 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6622d46ddec38..ef8a1c75a467a 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -114,6 +114,23 @@ xfs_inode_hasattr(
  * Overall external interface routines.
  *========================================================================*/
 
+/* Retrieve an extended attribute and its value.  Must have iolock. */
+int
+xfs_attr_get_ilocked(
+	struct xfs_inode	*ip,
+	struct xfs_da_args	*args)
+{
+	if (!xfs_inode_hasattr(ip))
+		return -ENOATTR;
+	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+		return xfs_attr_shortform_getvalue(args);
+	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
+		return xfs_attr_leaf_get(args);
+	else
+		return xfs_attr_node_get(args);
+}
+
+/* Retrieve an extended attribute by name, and its value. */
 int
 xfs_attr_get(
 	struct xfs_inode	*ip,
@@ -141,14 +158,7 @@ xfs_attr_get(
 	args.op_flags = XFS_DA_OP_OKNOENT;
 
 	lock_mode = xfs_ilock_attr_map_shared(ip);
-	if (!xfs_inode_hasattr(ip))
-		error = -ENOATTR;
-	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
-		error = xfs_attr_shortform_getvalue(&args);
-	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
-		error = xfs_attr_leaf_get(&args);
-	else
-		error = xfs_attr_node_get(&args);
+	error = xfs_attr_get_ilocked(ip, &args);
 	xfs_iunlock(ip, lock_mode);
 
 	*valuelenp = args.valuelen;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index da72b16bef8ef..5236d8e451463 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -386,7 +386,8 @@ xfs_attr_rmtval_get(
 			       (map[i].br_startblock != HOLESTARTBLOCK));
 			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
 			dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+			error = xfs_trans_read_buf(mp, args->trans,
+						   mp->m_ddev_targp,
 						   dblkno, dblkcnt, 0, &bp,
 						   &xfs_attr3_rmt_buf_ops);
 			if (error)
@@ -395,7 +396,7 @@ xfs_attr_rmtval_get(
 			error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
 							&offset, &valuelen,
 							&dst);
-			xfs_buf_relse(bp);
+			xfs_trans_brelse(args->trans, bp);
 			if (error)
 				return error;
 
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index d14691aa02b44..5d5a5e277f35d 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -117,6 +117,7 @@ typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
 			      unsigned char *, int, int);
 
 typedef struct xfs_attr_list_context {
+	struct xfs_trans		*tp;
 	struct xfs_inode		*dp;		/* inode */
 	struct attrlist_cursor_kern	*cursor;	/* position in list */
 	char				*alist;		/* output buffer */
@@ -140,8 +141,10 @@ typedef struct xfs_attr_list_context {
  * Overall external interface routines.
  */
 int xfs_attr_inactive(struct xfs_inode *dp);
+int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *);
 int xfs_attr_list_int(struct xfs_attr_list_context *);
 int xfs_inode_hasattr(struct xfs_inode *ip);
+int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args);
 int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
 		 unsigned char *value, int *valuelenp, int flags);
 int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 9bc1e12179899..545eca508d42f 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -230,7 +230,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	 */
 	bp = NULL;
 	if (cursor->blkno > 0) {
-		error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
+		error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1,
 					      &bp, XFS_ATTR_FORK);
 		if ((error != 0) && (error != -EFSCORRUPTED))
 			return error;
@@ -242,7 +242,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 			case XFS_DA_NODE_MAGIC:
 			case XFS_DA3_NODE_MAGIC:
 				trace_xfs_attr_list_wrong_blk(context);
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(context->tp, bp);
 				bp = NULL;
 				break;
 			case XFS_ATTR_LEAF_MAGIC:
@@ -254,18 +254,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				if (cursor->hashval > be32_to_cpu(
 						entries[leafhdr.count - 1].hashval)) {
 					trace_xfs_attr_list_wrong_blk(context);
-					xfs_trans_brelse(NULL, bp);
+					xfs_trans_brelse(context->tp, bp);
 					bp = NULL;
 				} else if (cursor->hashval <= be32_to_cpu(
 						entries[0].hashval)) {
 					trace_xfs_attr_list_wrong_blk(context);
-					xfs_trans_brelse(NULL, bp);
+					xfs_trans_brelse(context->tp, bp);
 					bp = NULL;
 				}
 				break;
 			default:
 				trace_xfs_attr_list_wrong_blk(context);
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(context->tp, bp);
 				bp = NULL;
 			}
 		}
@@ -281,7 +281,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 		for (;;) {
 			uint16_t magic;
 
-			error = xfs_da3_node_read(NULL, dp,
+			error = xfs_da3_node_read(context->tp, dp,
 						      cursor->blkno, -1, &bp,
 						      XFS_ATTR_FORK);
 			if (error)
@@ -297,7 +297,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 						     XFS_ERRLEVEL_LOW,
 						     context->dp->i_mount,
 						     node);
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(context->tp, bp);
 				return -EFSCORRUPTED;
 			}
 
@@ -313,10 +313,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				}
 			}
 			if (i == nodehdr.count) {
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(context->tp, bp);
 				return 0;
 			}
-			xfs_trans_brelse(NULL, bp);
+			xfs_trans_brelse(context->tp, bp);
 		}
 	}
 	ASSERT(bp != NULL);
@@ -333,12 +333,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 		if (context->seen_enough || leafhdr.forw == 0)
 			break;
 		cursor->blkno = leafhdr.forw;
-		xfs_trans_brelse(NULL, bp);
-		error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp);
+		xfs_trans_brelse(context->tp, bp);
+		error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, -1, &bp);
 		if (error)
 			return error;
 	}
-	xfs_trans_brelse(NULL, bp);
+	xfs_trans_brelse(context->tp, bp);
 	return 0;
 }
 
@@ -448,15 +448,33 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
 	trace_xfs_attr_leaf_list(context);
 
 	context->cursor->blkno = 0;
-	error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
+	error = xfs_attr3_leaf_read(context->tp, context->dp, 0, -1, &bp);
 	if (error)
 		return error;
 
 	xfs_attr3_leaf_list_int(bp, context);
-	xfs_trans_brelse(NULL, bp);
+	xfs_trans_brelse(context->tp, bp);
 	return 0;
 }
 
+int
+xfs_attr_list_int_ilocked(
+	struct xfs_attr_list_context	*context)
+{
+	struct xfs_inode		*dp = context->dp;
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (!xfs_inode_hasattr(dp))
+		return 0;
+	else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+		return xfs_attr_shortform_list(context);
+	else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+		return xfs_attr_leaf_list(context);
+	return xfs_attr_node_list(context);
+}
+
 int
 xfs_attr_list_int(
 	xfs_attr_list_context_t *context)
@@ -470,19 +488,8 @@ xfs_attr_list_int(
 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return -EIO;
 
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
 	lock_mode = xfs_ilock_attr_map_shared(dp);
-	if (!xfs_inode_hasattr(dp)) {
-		error = 0;
-	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-		error = xfs_attr_shortform_list(context);
-	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
-		error = xfs_attr_leaf_list(context);
-	} else {
-		error = xfs_attr_node_list(context);
-	}
+	error = xfs_attr_list_int_ilocked(context);
 	xfs_iunlock(dp, lock_mode);
 	return error;
 }
-- 
GitLab


From 5da8f2f8908aabcfa4113d37d470039c79962ba9 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 16 Jun 2017 11:00:15 -0700
Subject: [PATCH 0321/1958] xfs: allow reading of already-locked remote
 symbolic link

Expose the readlink variant that doesn't take the inode lock so that
the scrubber can inspect symlink contents.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_symlink.c | 6 +++---
 fs/xfs/xfs_symlink.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index f2cb45ed1d54c..493804857d67a 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -43,8 +43,8 @@
 #include "xfs_log.h"
 
 /* ----- Kernel only functions below ----- */
-STATIC int
-xfs_readlink_bmap(
+int
+xfs_readlink_bmap_ilocked(
 	struct xfs_inode	*ip,
 	char			*link)
 {
@@ -153,7 +153,7 @@ xfs_readlink(
 	}
 
 
-	error = xfs_readlink_bmap(ip, link);
+	error = xfs_readlink_bmap_ilocked(ip, link);
 
  out:
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index e75245d091161..aeaee89236171 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -21,6 +21,7 @@
 
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
 		const char *target_path, umode_t mode, struct xfs_inode **ipp);
+int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link);
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_inactive_symlink(struct xfs_inode *ip);
 
-- 
GitLab


From 61d819e7bcb7f33da710bf3f5dcb2bcf1e48203c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 19 Jun 2017 13:19:08 -0700
Subject: [PATCH 0322/1958] xfs: don't allow bmap on rt files

bmap returns a dumb LBA address but not the block device that goes with
that LBA.  Swapfiles don't care about this and will blindly assume that
the data volume is the correct blockdev, which is totally bogus for
files on the rt subvolume.  This results in the swap code doing IOs to
arbitrary locations on the data device(!) if the passed in mapping is a
realtime file, so just turn off bmap for rt files.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6bab8c449b814..716b5eeeed945 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1316,9 +1316,12 @@ xfs_vm_bmap(
 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
 	 * bypasseѕ the file system for actual I/O.  We really can't allow
 	 * that on reflinks inodes, so we have to skip out here.  And yes,
-	 * 0 is the magic code for a bmap error..
+	 * 0 is the magic code for a bmap error.
+	 *
+	 * Since we don't pass back blockdev info, we can't return bmap
+	 * information for rt files either.
 	 */
-	if (xfs_is_reflink_inode(ip))
+	if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
 		return 0;
 
 	filemap_write_and_wait(mapping);
-- 
GitLab


From df1d178879f53d7f790d4bc372aa0fc5d5423328 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 23 May 2017 07:30:19 +0200
Subject: [PATCH 0323/1958] dt-bindings: mtd: make partitions doc a bit more
 generic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the only documented partitioning is "fixed-partitions" but
there are more methods in use that we may want to support in the future.
Mention them and make it clear Fixed Partitions are just a single case.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/mtd/partition.txt     | 32 +++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/partition.txt b/Documentation/devicetree/bindings/mtd/partition.txt
index 81a224da63be5..36f3b769a6267 100644
--- a/Documentation/devicetree/bindings/mtd/partition.txt
+++ b/Documentation/devicetree/bindings/mtd/partition.txt
@@ -1,29 +1,49 @@
-Representing flash partitions in devicetree
+Flash partitions in device tree
+===============================
 
-Partitions can be represented by sub-nodes of an mtd device. This can be used
+Flash devices can be partitioned into one or more functional ranges (e.g. "boot
+code", "nvram", "kernel").
+
+Different devices may be partitioned in a different ways. Some may use a fixed
+flash layout set at production time. Some may use on-flash table that describes
+the geometry and naming/purpose of each functional region. It is also possible
+to see these methods mixed.
+
+To assist system software in locating partitions, we allow describing which
+method is used for a given flash device. To describe the method there should be
+a subnode of the flash device that is named 'partitions'. It must have a
+'compatible' property, which is used to identify the method to use.
+
+We currently only document a binding for fixed layouts.
+
+
+Fixed Partitions
+================
+
+Partitions can be represented by sub-nodes of a flash device. This can be used
 on platforms which have strong conventions about which portions of a flash are
 used for what purposes, but which don't use an on-flash partition table such
 as RedBoot.
 
-The partition table should be a subnode of the mtd node and should be named
+The partition table should be a subnode of the flash node and should be named
 'partitions'. This node should have the following property:
 - compatible : (required) must be "fixed-partitions"
 Partitions are then defined in subnodes of the partitions node.
 
-For backwards compatibility partitions as direct subnodes of the mtd device are
+For backwards compatibility partitions as direct subnodes of the flash device are
 supported. This use is discouraged.
 NOTE: also for backwards compatibility, direct subnodes that have a compatible
 string are not considered partitions, as they may be used for other bindings.
 
 #address-cells & #size-cells must both be present in the partitions subnode of the
-mtd device. There are two valid values for both:
+flash device. There are two valid values for both:
 <1>: for partitions that require a single 32-bit cell to represent their
      size/address (aka the value is below 4 GiB)
 <2>: for partitions that require two 32-bit cells to represent their
      size/address (aka the value is 4 GiB or greater).
 
 Required properties:
-- reg : The partition's offset and size within the mtd bank.
+- reg : The partition's offset and size within the flash
 
 Optional properties:
 - label : The label / name for this partition.  If omitted, the label is taken
-- 
GitLab


From 01f9c7240a900d5676a8496496f2974dd36996b1 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 23 May 2017 07:30:20 +0200
Subject: [PATCH 0324/1958] mtd: partitions: factor out code calling parser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This code is going to be reused for parsers matched using OF so let's
factor it out to make this easier.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 2e152e53ace0f..7ececd3d77993 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -807,6 +807,27 @@ static const char * const default_mtd_part_types[] = {
 	NULL
 };
 
+static int mtd_part_do_parse(struct mtd_part_parser *parser,
+			     struct mtd_info *master,
+			     struct mtd_partitions *pparts,
+			     struct mtd_part_parser_data *data)
+{
+	int ret;
+
+	ret = (*parser->parse_fn)(master, &pparts->parts, data);
+	pr_debug("%s: parser %s: %i\n", master->name, parser->name, ret);
+	if (ret <= 0)
+		return ret;
+
+	pr_notice("%d %s partitions found on MTD device %s\n", ret,
+		  parser->name, master->name);
+
+	pparts->nr_parts = ret;
+	pparts->parser = parser;
+
+	return ret;
+}
+
 /**
  * parse_mtd_partitions - parse MTD partitions
  * @master: the master partition (describes whole MTD device)
@@ -847,16 +868,10 @@ int parse_mtd_partitions(struct mtd_info *master, const char *const *types,
 			 parser ? parser->name : NULL);
 		if (!parser)
 			continue;
-		ret = (*parser->parse_fn)(master, &pparts->parts, data);
-		pr_debug("%s: parser %s: %i\n",
-			 master->name, parser->name, ret);
-		if (ret > 0) {
-			printk(KERN_NOTICE "%d %s partitions found on MTD device %s\n",
-			       ret, parser->name, master->name);
-			pparts->nr_parts = ret;
-			pparts->parser = parser;
+		ret = mtd_part_do_parse(parser, master, pparts, data);
+		/* Found partitions! */
+		if (ret > 0)
 			return 0;
-		}
 		mtd_part_parser_put(parser);
 		/*
 		 * Stash the first error we see; only report it if no parser
-- 
GitLab


From 0cbef932bd82857823c0e52b08218b0c9920a475 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 21 Jun 2017 00:18:43 +0930
Subject: [PATCH 0325/1958] spi-nor: Add Winbond w25m512jv

Similar to the other ones, different size. The "JV" suffix is in
the datasheet, I haven't seen mentions of a different one.

The datasheet indicates DUAL and QUAD are supported.

 http://www.winbond.com/resource-files/w25m512jv%20revc%2001062017.pdf

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/spi-nor.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index eef55b597ec73..43de5adafcb70 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1146,6 +1146,8 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
 	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
+	{ "w25m512jv", INFO(0xef7119, 0, 64 * 1024, 1024,
+			SECT_4K | SPI_NOR_QUAD_READ | SPI_NOR_DUAL_READ) },
 
 	/* Catalyst / On Semiconductor -- non-JEDEC */
 	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
-- 
GitLab


From f993c123b461ff47c10edfee65465fe651d0521f Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Fri, 16 Jun 2017 15:05:17 +0530
Subject: [PATCH 0326/1958] mtd: spi-nor: cqspi: make of_device_ids const

of_device_ids are not supposed to change at runtime. All functions
working with of_device_ids provided by <linux/of.h> work with const
of_device_ids. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/cadence-quadspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index 40096d73536cf..d315c326e72f0 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -1283,7 +1283,7 @@ static const struct dev_pm_ops cqspi__dev_pm_ops = {
 #define CQSPI_DEV_PM_OPS	NULL
 #endif
 
-static struct of_device_id const cqspi_dt_ids[] = {
+static const struct of_device_id const cqspi_dt_ids[] = {
 	{.compatible = "cdns,qspi-nor",},
 	{ /* end of table */ }
 };
-- 
GitLab


From ce398a8141b485e4c97bd313c4c06cb6f64a22d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Wed, 19 Apr 2017 16:00:19 +0200
Subject: [PATCH 0327/1958] mtd: spi-nor: Add support for Macronix mx66l1g45g
 spi flash
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These modules are used on the OpenPOWER Witherspoon systems to hold
the POWER9 host firmware image. The SPI_NOR_DUAL_READ flags is added
for the Aspeed SoCs which do not support QUAD reads.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/spi-nor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 43de5adafcb70..032cb13c8135b 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1012,6 +1012,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_4B_OPCODES) },
 	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
 	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) },
+	{ "mx66l1g45g",  INFO(0xc2201b, 0, 64 * 1024, 2048, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
 
 	/* Micron */
-- 
GitLab


From d7c9ade2e31dbae4deaa8a5198107ecde4c56968 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Wed, 19 Apr 2017 16:00:20 +0200
Subject: [PATCH 0328/1958] mtd: spi-nor: add Dual and Quad read mode support
 to some flash devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These devices are used on OpenPOWER systems. The SPI_NOR_DUAL_READ
flags is added for the Aspeed SoCs which do not support QUAD reads.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/spi-nor.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 032cb13c8135b..040ab5d7ac052 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1008,10 +1008,10 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "mx25u6435f",  INFO(0xc22537, 0, 64 * 1024, 128, SECT_4K) },
 	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
 	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
-	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
+	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_4B_OPCODES) },
 	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
-	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) },
+	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "mx66l1g45g",  INFO(0xc2201b, 0, 64 * 1024, 2048, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
 
@@ -1023,7 +1023,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "n25q064a",    INFO(0x20bb17, 0, 64 * 1024,  128, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
-	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
+	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "n25q256ax1",  INFO(0x20bb19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
 	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
@@ -1146,7 +1146,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
-	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
+	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "w25m512jv", INFO(0xef7119, 0, 64 * 1024, 1024,
 			SECT_4K | SPI_NOR_QUAD_READ | SPI_NOR_DUAL_READ) },
 
-- 
GitLab


From a9f127bb5f92379ecb135a66bb89f8403e998d96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Thu, 20 Apr 2017 13:56:34 +0200
Subject: [PATCH 0329/1958] mtd: spi-nor: aspeed: remove dummies from keep mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no need to keep the dummy bytes in the control register if
the command mode is not kept also. This could lead to an inconsistent
setting : normal read mode (command 0x3) and dummy bytes. It is to be
noted that the HW allows such a configuration.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/aspeed-smc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
index 3f875c8d63398..d76663b369dbb 100644
--- a/drivers/mtd/spi-nor/aspeed-smc.c
+++ b/drivers/mtd/spi-nor/aspeed-smc.c
@@ -180,8 +180,7 @@ struct aspeed_smc_controller {
 
 #define CONTROL_KEEP_MASK						\
 	(CONTROL_AAF_MODE | CONTROL_CE_INACTIVE_MASK | CONTROL_CLK_DIV4 | \
-	 CONTROL_IO_DUMMY_MASK | CONTROL_CLOCK_FREQ_SEL_MASK |		\
-	 CONTROL_LSB_FIRST | CONTROL_CLOCK_MODE_3)
+	 CONTROL_CLOCK_FREQ_SEL_MASK | CONTROL_LSB_FIRST | CONTROL_CLOCK_MODE_3)
 
 /*
  * The Segment Register uses a 8MB unit to encode the start address
-- 
GitLab


From e6f0acb2f0da89625aefc84b448f457b9161436b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 15 Jun 2017 10:54:23 -0700
Subject: [PATCH 0330/1958] bridge: Fix panel-bridge error return on !panel.

ERR_PTR() needs a negative errno argument.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Archit Taneja <architt@codeaurora.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170615175423.17954-1-eric@anholt.net
---
 drivers/gpu/drm/bridge/panel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index 99f9a4beb8592..67fe19e5a9c6e 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -161,7 +161,7 @@ struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel,
 	int ret;
 
 	if (!panel)
-		return ERR_PTR(EINVAL);
+		return ERR_PTR(-EINVAL);
 
 	panel_bridge = devm_kzalloc(panel->dev, sizeof(*panel_bridge),
 				    GFP_KERNEL);
-- 
GitLab


From 7ef0e5e16d18a38af489bea7193ba3b2be31ee58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Thu, 20 Apr 2017 13:56:36 +0200
Subject: [PATCH 0331/1958] mtd: spi-nor: aspeed: configure chip window on AHB
 bus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The segment registers of the SMC controller provide a way to configure
the mapping windows of the chips on the AHB bus. The settings are
required to be correct when the controller operates in Command mode,
which is the case for DMAs and the LPC mapping.

This tries to set the segment registers of each chip depending on the
size of the flash device and depending on the previous segment
settings, in order to have a contiguous window across multiple chips.

Unfortunately, the AST2500 SPI controller has a bug and it is not
possible to configure a full 128MB window for a chip of the same
size. The window size needs to be restricted to 120MB. This issue only
applies to CE0.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/aspeed-smc.c | 157 ++++++++++++++++++++++++++++++-
 1 file changed, 154 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
index d76663b369dbb..0106357421bd3 100644
--- a/drivers/mtd/spi-nor/aspeed-smc.c
+++ b/drivers/mtd/spi-nor/aspeed-smc.c
@@ -19,6 +19,7 @@
 #include <linux/mtd/spi-nor.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/sizes.h>
 #include <linux/sysfs.h>
 
 #define DEVICE_NAME	"aspeed-smc"
@@ -97,6 +98,7 @@ struct aspeed_smc_chip {
 	struct aspeed_smc_controller *controller;
 	void __iomem *ctl;			/* control register */
 	void __iomem *ahb_base;			/* base of chip window */
+	u32 ahb_window_size;			/* chip mapping window size */
 	u32 ctl_val[smc_max];			/* control settings */
 	enum aspeed_smc_flash_type type;	/* what type of flash */
 	struct spi_nor nor;
@@ -109,6 +111,7 @@ struct aspeed_smc_controller {
 	const struct aspeed_smc_info *info;	/* type info of controller */
 	void __iomem *regs;			/* controller registers */
 	void __iomem *ahb_base;			/* per-chip windows resource */
+	u32 ahb_window_size;			/* full mapping window size */
 
 	struct aspeed_smc_chip *chips[0];	/* pointers to attached chips */
 };
@@ -193,6 +196,10 @@ struct aspeed_smc_controller {
 #define SEGMENT_ADDR_REG0		0x30
 #define SEGMENT_ADDR_START(_r)		((((_r) >> 16) & 0xFF) << 23)
 #define SEGMENT_ADDR_END(_r)		((((_r) >> 24) & 0xFF) << 23)
+#define SEGMENT_ADDR_VALUE(start, end)					\
+	(((((start) >> 23) & 0xFF) << 16) | ((((end) >> 23) & 0xFF) << 24))
+#define SEGMENT_ADDR_REG(controller, cs)	\
+	((controller)->regs + SEGMENT_ADDR_REG0 + (cs) * 4)
 
 /*
  * In user mode all data bytes read or written to the chip decode address
@@ -438,8 +445,7 @@ static void __iomem *aspeed_smc_chip_base(struct aspeed_smc_chip *chip,
 	u32 reg;
 
 	if (controller->info->nce > 1) {
-		reg = readl(controller->regs + SEGMENT_ADDR_REG0 +
-			    chip->cs * 4);
+		reg = readl(SEGMENT_ADDR_REG(controller, chip->cs));
 
 		if (SEGMENT_ADDR_START(reg) >= SEGMENT_ADDR_END(reg))
 			return NULL;
@@ -450,6 +456,146 @@ static void __iomem *aspeed_smc_chip_base(struct aspeed_smc_chip *chip,
 	return controller->ahb_base + offset;
 }
 
+static u32 aspeed_smc_ahb_base_phy(struct aspeed_smc_controller *controller)
+{
+	u32 seg0_val = readl(SEGMENT_ADDR_REG(controller, 0));
+
+	return SEGMENT_ADDR_START(seg0_val);
+}
+
+static u32 chip_set_segment(struct aspeed_smc_chip *chip, u32 cs, u32 start,
+			    u32 size)
+{
+	struct aspeed_smc_controller *controller = chip->controller;
+	void __iomem *seg_reg;
+	u32 seg_oldval, seg_newval, ahb_base_phy, end;
+
+	ahb_base_phy = aspeed_smc_ahb_base_phy(controller);
+
+	seg_reg = SEGMENT_ADDR_REG(controller, cs);
+	seg_oldval = readl(seg_reg);
+
+	/*
+	 * If the chip size is not specified, use the default segment
+	 * size, but take into account the possible overlap with the
+	 * previous segment
+	 */
+	if (!size)
+		size = SEGMENT_ADDR_END(seg_oldval) - start;
+
+	/*
+	 * The segment cannot exceed the maximum window size of the
+	 * controller.
+	 */
+	if (start + size > ahb_base_phy + controller->ahb_window_size) {
+		size = ahb_base_phy + controller->ahb_window_size - start;
+		dev_warn(chip->nor.dev, "CE%d window resized to %dMB",
+			 cs, size >> 20);
+	}
+
+	end = start + size;
+	seg_newval = SEGMENT_ADDR_VALUE(start, end);
+	writel(seg_newval, seg_reg);
+
+	/*
+	 * Restore default value if something goes wrong. The chip
+	 * might have set some bogus value and we would loose access
+	 * to the chip.
+	 */
+	if (seg_newval != readl(seg_reg)) {
+		dev_err(chip->nor.dev, "CE%d window invalid", cs);
+		writel(seg_oldval, seg_reg);
+		start = SEGMENT_ADDR_START(seg_oldval);
+		end = SEGMENT_ADDR_END(seg_oldval);
+		size = end - start;
+	}
+
+	dev_info(chip->nor.dev, "CE%d window [ 0x%.8x - 0x%.8x ] %dMB",
+		 cs, start, end, size >> 20);
+
+	return size;
+}
+
+/*
+ * The segment register defines the mapping window on the AHB bus and
+ * it needs to be configured depending on the chip size. The segment
+ * register of the following CE also needs to be tuned in order to
+ * provide a contiguous window across multiple chips.
+ *
+ * This is expected to be called in increasing CE order
+ */
+static u32 aspeed_smc_chip_set_segment(struct aspeed_smc_chip *chip)
+{
+	struct aspeed_smc_controller *controller = chip->controller;
+	u32 ahb_base_phy, start;
+	u32 size = chip->nor.mtd.size;
+
+	/*
+	 * Each controller has a chip size limit for direct memory
+	 * access
+	 */
+	if (size > controller->info->maxsize)
+		size = controller->info->maxsize;
+
+	/*
+	 * The AST2400 SPI controller only handles one chip and does
+	 * not have segment registers. Let's use the chip size for the
+	 * AHB window.
+	 */
+	if (controller->info == &spi_2400_info)
+		goto out;
+
+	/*
+	 * The AST2500 SPI controller has a HW bug when the CE0 chip
+	 * size reaches 128MB. Enforce a size limit of 120MB to
+	 * prevent the controller from using bogus settings in the
+	 * segment register.
+	 */
+	if (chip->cs == 0 && controller->info == &spi_2500_info &&
+	    size == SZ_128M) {
+		size = 120 << 20;
+		dev_info(chip->nor.dev,
+			 "CE%d window resized to %dMB (AST2500 HW quirk)",
+			 chip->cs, size >> 20);
+	}
+
+	ahb_base_phy = aspeed_smc_ahb_base_phy(controller);
+
+	/*
+	 * As a start address for the current segment, use the default
+	 * start address if we are handling CE0 or use the previous
+	 * segment ending address
+	 */
+	if (chip->cs) {
+		u32 prev = readl(SEGMENT_ADDR_REG(controller, chip->cs - 1));
+
+		start = SEGMENT_ADDR_END(prev);
+	} else {
+		start = ahb_base_phy;
+	}
+
+	size = chip_set_segment(chip, chip->cs, start, size);
+
+	/* Update chip base address on the AHB bus */
+	chip->ahb_base = controller->ahb_base + (start - ahb_base_phy);
+
+	/*
+	 * Now, make sure the next segment does not overlap with the
+	 * current one we just configured, even if there is no
+	 * available chip. That could break access in Command Mode.
+	 */
+	if (chip->cs < controller->info->nce - 1)
+		chip_set_segment(chip, chip->cs + 1, start + size, 0);
+
+out:
+	if (size < chip->nor.mtd.size)
+		dev_warn(chip->nor.dev,
+			 "CE%d window too small for chip %dMB",
+			 chip->cs, (u32)chip->nor.mtd.size >> 20);
+
+	return size;
+}
+
 static void aspeed_smc_chip_enable_write(struct aspeed_smc_chip *chip)
 {
 	struct aspeed_smc_controller *controller = chip->controller;
@@ -523,7 +669,7 @@ static int aspeed_smc_chip_setup_init(struct aspeed_smc_chip *chip,
 	 */
 	chip->ahb_base = aspeed_smc_chip_base(chip, res);
 	if (!chip->ahb_base) {
-		dev_warn(chip->nor.dev, "CE segment window closed.\n");
+		dev_warn(chip->nor.dev, "CE%d window closed", chip->cs);
 		return -EINVAL;
 	}
 
@@ -570,6 +716,9 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 	if (chip->nor.addr_width == 4 && info->set_4b)
 		info->set_4b(chip);
 
+	/* This is for direct AHB access when using Command Mode. */
+	chip->ahb_window_size = aspeed_smc_chip_set_segment(chip);
+
 	/*
 	 * base mode has not been optimized yet. use it for writes.
 	 */
@@ -733,6 +882,8 @@ static int aspeed_smc_probe(struct platform_device *pdev)
 	if (IS_ERR(controller->ahb_base))
 		return PTR_ERR(controller->ahb_base);
 
+	controller->ahb_window_size = resource_size(res);
+
 	ret = aspeed_smc_setup_flash(controller, np, res);
 	if (ret)
 		dev_err(dev, "Aspeed SMC probe failed %d\n", ret);
-- 
GitLab


From cde1f95f407a593ad6baf1b7b01daa2c6cbd34fd Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:36 +0300
Subject: [PATCH 0332/1958] ACPI: Constify argument to acpi_device_is_present()

This will be needed in constifying the fwnode API.

The side effects the function had have been moved to the callers.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 4 +++-
 drivers/acpi/internal.h  | 2 +-
 drivers/acpi/scan.c      | 9 +++------
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 993fd31394c85..e565ed329f11d 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -261,8 +261,10 @@ int acpi_bus_init_power(struct acpi_device *device)
 		return -EINVAL;
 
 	device->power.state = ACPI_STATE_UNKNOWN;
-	if (!acpi_device_is_present(device))
+	if (!acpi_device_is_present(device)) {
+		device->flags.initialized = false;
 		return -ENXIO;
+	}
 
 	result = acpi_device_get_power(device, &state);
 	if (result)
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 66229ffa909b5..73002402c461c 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -111,7 +111,7 @@ int acpi_device_setup_files(struct acpi_device *dev);
 void acpi_device_remove_files(struct acpi_device *dev);
 void acpi_device_add_finalize(struct acpi_device *device);
 void acpi_free_pnp_ids(struct acpi_device_pnp *pnp);
-bool acpi_device_is_present(struct acpi_device *adev);
+bool acpi_device_is_present(const struct acpi_device *adev);
 bool acpi_device_is_battery(struct acpi_device *adev);
 bool acpi_device_is_first_physical_node(struct acpi_device *adev,
 					const struct device *dev);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 3a10d7573477e..c98f88b28948e 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1567,13 +1567,9 @@ static int acpi_bus_type_and_status(acpi_handle handle, int *type,
 	return 0;
 }
 
-bool acpi_device_is_present(struct acpi_device *adev)
+bool acpi_device_is_present(const struct acpi_device *adev)
 {
-	if (adev->status.present || adev->status.functional)
-		return true;
-
-	adev->flags.initialized = false;
-	return false;
+	return adev->status.present || adev->status.functional;
 }
 
 static bool acpi_scan_handler_matching(struct acpi_scan_handler *handler,
@@ -1831,6 +1827,7 @@ static void acpi_bus_attach(struct acpi_device *device)
 	acpi_bus_get_status(device);
 	/* Skip devices that are not present. */
 	if (!acpi_device_is_present(device)) {
+		device->flags.initialized = false;
 		acpi_device_clear_enumerated(device);
 		device->flags.power_manageable = 0;
 		return;
-- 
GitLab


From 3708184afc77bb67709a67a35d9f367ebd32cbc4 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:37 +0300
Subject: [PATCH 0333/1958] device property: Move FW type specific
 functionality to FW specific files

The device and fwnode property API supports Devicetree, ACPI and pset
properties. The implementation of this functionality for each firmware
type was embedded in the fwnode property core. Move it out to firmware
type specific locations, making it easier to maintain.

Depends-on: ("of: Move OF property and graph API from base.c to property.c")
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c |  68 +++++++++++++
 drivers/acpi/scan.c     |   1 +
 drivers/base/property.c | 208 ++++++++++++++++------------------------
 drivers/of/property.c   |  90 +++++++++++++++++
 include/linux/acpi.h    |   4 +
 include/linux/fwnode.h  |  54 +++++++++++
 include/linux/of.h      |   2 +
 7 files changed, 302 insertions(+), 125 deletions(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 9364398204e91..14013f635db61 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -57,6 +57,7 @@ static bool acpi_nondev_subnode_extract(const union acpi_object *desc,
 
 	dn->name = link->package.elements[0].string.pointer;
 	dn->fwnode.type = FWNODE_ACPI_DATA;
+	dn->fwnode.ops = &acpi_fwnode_ops;
 	dn->parent = parent;
 	INIT_LIST_HEAD(&dn->data.subnodes);
 
@@ -1119,3 +1120,70 @@ int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode,
 
 	return 0;
 }
+
+static bool acpi_fwnode_property_present(struct fwnode_handle *fwnode,
+					 const char *propname)
+{
+	return !acpi_node_prop_get(fwnode, propname, NULL);
+}
+
+static int acpi_fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					       const char *propname,
+					       unsigned int elem_size,
+					       void *val, size_t nval)
+{
+	enum dev_prop_type type;
+
+	switch (elem_size) {
+	case sizeof(u8):
+		type = DEV_PROP_U8;
+		break;
+	case sizeof(u16):
+		type = DEV_PROP_U16;
+		break;
+	case sizeof(u32):
+		type = DEV_PROP_U32;
+		break;
+	case sizeof(u64):
+		type = DEV_PROP_U64;
+		break;
+	default:
+		return -ENXIO;
+	}
+
+	return acpi_node_prop_read(fwnode, propname, type, val, nval);
+}
+
+static int acpi_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						  const char *propname,
+						  const char **val, size_t nval)
+{
+	return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
+				   val, nval);
+}
+
+static struct fwnode_handle *
+acpi_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
+				 const char *childname)
+{
+	struct fwnode_handle *child;
+
+	/*
+	 * Find first matching named child node of this fwnode.
+	 * For ACPI this will be a data only sub-node.
+	 */
+	fwnode_for_each_child_node(fwnode, child)
+		if (acpi_data_node_match(child, childname))
+			return child;
+
+	return NULL;
+}
+
+const struct fwnode_operations acpi_fwnode_ops = {
+	.property_present = acpi_fwnode_property_present,
+	.property_read_int_array = acpi_fwnode_property_read_int_array,
+	.property_read_string_array = acpi_fwnode_property_read_string_array,
+	.get_parent = acpi_node_get_parent,
+	.get_next_child_node = acpi_get_next_subnode,
+	.get_named_child_node = acpi_fwnode_get_named_child_node,
+};
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index c98f88b28948e..bc2a525c495f7 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1436,6 +1436,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	device->handle = handle;
 	device->parent = acpi_bus_get_parent(handle);
 	device->fwnode.type = FWNODE_ACPI;
+	device->fwnode.ops = &acpi_fwnode_ops;
 	acpi_set_device_status(device, sta);
 	acpi_device_get_busid(device);
 	acpi_set_pnp_ids(handle, &device->pnp, type);
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 149de311a10e6..fee0705e40e9e 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -187,6 +187,50 @@ struct fwnode_handle *dev_fwnode(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_fwnode);
 
+static bool pset_fwnode_property_present(struct fwnode_handle *fwnode,
+					 const char *propname)
+{
+	return !!pset_prop_get(to_pset_node(fwnode), propname);
+}
+
+static int pset_fwnode_read_int_array(struct fwnode_handle *fwnode,
+				      const char *propname,
+				      unsigned int elem_size, void *val,
+				      size_t nval)
+{
+	struct property_set *node = to_pset_node(fwnode);
+
+	if (!val)
+		return pset_prop_count_elems_of_size(node, propname, elem_size);
+
+	switch (elem_size) {
+	case sizeof(u8):
+		return pset_prop_read_u8_array(node, propname, val, nval);
+	case sizeof(u16):
+		return pset_prop_read_u16_array(node, propname, val, nval);
+	case sizeof(u32):
+		return pset_prop_read_u32_array(node, propname, val, nval);
+	case sizeof(u64):
+		return pset_prop_read_u64_array(node, propname, val, nval);
+	}
+
+	return -ENXIO;
+}
+
+static int pset_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						  const char *propname,
+						  const char **val, size_t nval)
+{
+	return pset_prop_read_string_array(to_pset_node(fwnode), propname,
+					   val, nval);
+}
+
+static const struct fwnode_operations pset_fwnode_ops = {
+	.property_present = pset_fwnode_property_present,
+	.property_read_int_array = pset_fwnode_read_int_array,
+	.property_read_string_array = pset_fwnode_property_read_string_array,
+};
+
 /**
  * device_property_present - check if a property of a device is present
  * @dev: Device whose property is being checked
@@ -200,18 +244,6 @@ bool device_property_present(struct device *dev, const char *propname)
 }
 EXPORT_SYMBOL_GPL(device_property_present);
 
-static bool __fwnode_property_present(struct fwnode_handle *fwnode,
-				      const char *propname)
-{
-	if (is_of_node(fwnode))
-		return of_property_read_bool(to_of_node(fwnode), propname);
-	else if (is_acpi_node(fwnode))
-		return !acpi_node_prop_get(fwnode, propname, NULL);
-	else if (is_pset_node(fwnode))
-		return !!pset_prop_get(to_pset_node(fwnode), propname);
-	return false;
-}
-
 /**
  * fwnode_property_present - check if a property of a firmware node is present
  * @fwnode: Firmware node whose property to check
@@ -221,10 +253,11 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
 {
 	bool ret;
 
-	ret = __fwnode_property_present(fwnode, propname);
+	ret = fwnode_call_int_op(fwnode, property_present, propname);
 	if (ret == false && !IS_ERR_OR_NULL(fwnode) &&
 	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = __fwnode_property_present(fwnode->secondary, propname);
+		ret = fwnode_call_int_op(fwnode->secondary, property_present,
+					 propname);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fwnode_property_present);
@@ -398,42 +431,23 @@ int device_property_match_string(struct device *dev, const char *propname,
 }
 EXPORT_SYMBOL_GPL(device_property_match_string);
 
-#define OF_DEV_PROP_READ_ARRAY(node, propname, type, val, nval)				\
-	(val) ? of_property_read_##type##_array((node), (propname), (val), (nval))	\
-	      : of_property_count_elems_of_size((node), (propname), sizeof(type))
-
-#define PSET_PROP_READ_ARRAY(node, propname, type, val, nval)				\
-	(val) ? pset_prop_read_##type##_array((node), (propname), (val), (nval))	\
-	      : pset_prop_count_elems_of_size((node), (propname), sizeof(type))
-
-#define FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_, _val_, _nval_)	\
-({											\
-	int _ret_;									\
-	if (is_of_node(_fwnode_))							\
-		_ret_ = OF_DEV_PROP_READ_ARRAY(to_of_node(_fwnode_), _propname_,	\
-					       _type_, _val_, _nval_);			\
-	else if (is_acpi_node(_fwnode_))						\
-		_ret_ = acpi_node_prop_read(_fwnode_, _propname_, _proptype_,		\
-					    _val_, _nval_);				\
-	else if (is_pset_node(_fwnode_)) 						\
-		_ret_ = PSET_PROP_READ_ARRAY(to_pset_node(_fwnode_), _propname_,	\
-					     _type_, _val_, _nval_);			\
-	else										\
-		_ret_ = -ENXIO;								\
-	_ret_;										\
-})
-
-#define FWNODE_PROP_READ_ARRAY(_fwnode_, _propname_, _type_, _proptype_, _val_, _nval_)	\
-({											\
-	int _ret_;									\
-	_ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_,		\
-				 _val_, _nval_);					\
-	if (_ret_ == -EINVAL && !IS_ERR_OR_NULL(_fwnode_) &&				\
-	    !IS_ERR_OR_NULL(_fwnode_->secondary))					\
-		_ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_,	\
-				_proptype_, _val_, _nval_);				\
-	_ret_;										\
-})
+static int fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					  const char *propname,
+					  unsigned int elem_size, void *val,
+					  size_t nval)
+{
+	int ret;
+
+	ret = fwnode_call_int_op(fwnode, property_read_int_array, propname,
+				 elem_size, val, nval);
+	if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
+	    !IS_ERR_OR_NULL(fwnode->secondary))
+		ret = fwnode_call_int_op(
+			fwnode->secondary, property_read_int_array, propname,
+			elem_size, val, nval);
+
+	return ret;
+}
 
 /**
  * fwnode_property_read_u8_array - return a u8 array property of firmware node
@@ -456,8 +470,8 @@ EXPORT_SYMBOL_GPL(device_property_match_string);
 int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
 				  const char *propname, u8 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u8, DEV_PROP_U8,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u8),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);
 
@@ -482,8 +496,8 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);
 int fwnode_property_read_u16_array(struct fwnode_handle *fwnode,
 				   const char *propname, u16 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u16, DEV_PROP_U16,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u16),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);
 
@@ -508,8 +522,8 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);
 int fwnode_property_read_u32_array(struct fwnode_handle *fwnode,
 				   const char *propname, u32 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u32, DEV_PROP_U32,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u32),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);
 
@@ -534,29 +548,11 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);
 int fwnode_property_read_u64_array(struct fwnode_handle *fwnode,
 				   const char *propname, u64 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u64, DEV_PROP_U64,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u64),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u64_array);
 
-static int __fwnode_property_read_string_array(struct fwnode_handle *fwnode,
-					       const char *propname,
-					       const char **val, size_t nval)
-{
-	if (is_of_node(fwnode))
-		return val ?
-			of_property_read_string_array(to_of_node(fwnode),
-						      propname, val, nval) :
-			of_property_count_strings(to_of_node(fwnode), propname);
-	else if (is_acpi_node(fwnode))
-		return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
-					   val, nval);
-	else if (is_pset_node(fwnode))
-		return pset_prop_read_string_array(to_pset_node(fwnode),
-						   propname, val, nval);
-	return -ENXIO;
-}
-
 /**
  * fwnode_property_read_string_array - return string array property of a node
  * @fwnode: Firmware node to get the property of
@@ -581,11 +577,13 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
 {
 	int ret;
 
-	ret = __fwnode_property_read_string_array(fwnode, propname, val, nval);
+	ret = fwnode_call_int_op(fwnode, property_read_string_array, propname,
+				 val, nval);
 	if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
 	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = __fwnode_property_read_string_array(fwnode->secondary,
-							  propname, val, nval);
+		ret = fwnode_call_int_op(fwnode->secondary,
+					 property_read_string_array, propname,
+					 val, nval);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_string_array);
@@ -903,6 +901,7 @@ int device_add_properties(struct device *dev,
 		return PTR_ERR(p);
 
 	p->fwnode.type = FWNODE_PDATA;
+	p->fwnode.ops = &pset_fwnode_ops;
 	set_secondary_fwnode(dev, &p->fwnode);
 	return 0;
 }
@@ -938,19 +937,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_parent);
  */
 struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *parent = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_get_parent(to_of_node(fwnode));
-		if (node)
-			parent = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		parent = acpi_node_get_parent(fwnode);
-	}
-
-	return parent;
+	return fwnode_call_ptr_op(fwnode, get_parent);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_parent);
 
@@ -962,18 +949,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_parent);
 struct fwnode_handle *fwnode_get_next_child_node(struct fwnode_handle *fwnode,
 						 struct fwnode_handle *child)
 {
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_get_next_available_child(to_of_node(fwnode),
-						   to_of_node(child));
-		if (node)
-			return &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		return acpi_get_next_subnode(fwnode, child);
-	}
-
-	return NULL;
+	return fwnode_call_ptr_op(fwnode, get_next_child_node, child);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_next_child_node);
 
@@ -1005,23 +981,7 @@ EXPORT_SYMBOL_GPL(device_get_next_child_node);
 struct fwnode_handle *fwnode_get_named_child_node(struct fwnode_handle *fwnode,
 						  const char *childname)
 {
-	struct fwnode_handle *child;
-
-	/*
-	 * Find first matching named child node of this fwnode.
-	 * For ACPI this will be a data only sub-node.
-	 */
-	fwnode_for_each_child_node(fwnode, child) {
-		if (is_of_node(child)) {
-			if (!of_node_cmp(to_of_node(child)->name, childname))
-				return child;
-		} else if (is_acpi_data_node(child)) {
-			if (acpi_data_node_match(child, childname))
-				return child;
-		}
-	}
-
-	return NULL;
+	return fwnode_call_ptr_op(fwnode, get_named_child_node, childname);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_named_child_node);
 
@@ -1043,8 +1003,7 @@ EXPORT_SYMBOL_GPL(device_get_named_child_node);
  */
 void fwnode_handle_get(struct fwnode_handle *fwnode)
 {
-	if (is_of_node(fwnode))
-		of_node_get(to_of_node(fwnode));
+	fwnode_call_void_op(fwnode, get);
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_get);
 
@@ -1058,8 +1017,7 @@ EXPORT_SYMBOL_GPL(fwnode_handle_get);
  */
 void fwnode_handle_put(struct fwnode_handle *fwnode)
 {
-	if (is_of_node(fwnode))
-		of_node_put(to_of_node(fwnode));
+	fwnode_call_void_op(fwnode, put);
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_put);
 
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 457c313a89249..250859234fb02 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -764,3 +764,93 @@ struct device_node *of_graph_get_remote_node(const struct device_node *node,
 	return remote;
 }
 EXPORT_SYMBOL(of_graph_get_remote_node);
+
+static void of_fwnode_get(struct fwnode_handle *fwnode)
+{
+	of_node_get(to_of_node(fwnode));
+}
+
+static void of_fwnode_put(struct fwnode_handle *fwnode)
+{
+	of_node_put(to_of_node(fwnode));
+}
+
+static bool of_fwnode_property_present(struct fwnode_handle *fwnode,
+				       const char *propname)
+{
+	return of_property_read_bool(to_of_node(fwnode), propname);
+}
+
+static int of_fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					     const char *propname,
+					     unsigned int elem_size, void *val,
+					     size_t nval)
+{
+	struct device_node *node = to_of_node(fwnode);
+
+	if (!val)
+		return of_property_count_elems_of_size(node, propname,
+						       elem_size);
+
+	switch (elem_size) {
+	case sizeof(u8):
+		return of_property_read_u8_array(node, propname, val, nval);
+	case sizeof(u16):
+		return of_property_read_u16_array(node, propname, val, nval);
+	case sizeof(u32):
+		return of_property_read_u32_array(node, propname, val, nval);
+	case sizeof(u64):
+		return of_property_read_u64_array(node, propname, val, nval);
+	}
+
+	return -ENXIO;
+}
+
+static int of_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						const char *propname,
+						const char **val, size_t nval)
+{
+	struct device_node *node = to_of_node(fwnode);
+
+	return val ?
+		of_property_read_string_array(node, propname, val, nval) :
+		of_property_count_strings(node, propname);
+}
+
+static struct fwnode_handle *of_fwnode_get_parent(struct fwnode_handle *fwnode)
+{
+	return of_fwnode_handle(of_get_parent(to_of_node(fwnode)));
+}
+
+static struct fwnode_handle *
+of_fwnode_get_next_child_node(struct fwnode_handle *fwnode,
+			      struct fwnode_handle *child)
+{
+	return of_fwnode_handle(of_get_next_available_child(to_of_node(fwnode),
+							    to_of_node(child)));
+}
+
+static struct fwnode_handle *
+of_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
+			       const char *childname)
+{
+	struct device_node *node = to_of_node(fwnode);
+	struct device_node *child;
+
+	for_each_available_child_of_node(node, child)
+		if (!of_node_cmp(child->name, childname))
+			return of_fwnode_handle(child);
+
+	return NULL;
+}
+
+const struct fwnode_operations of_fwnode_ops = {
+	.get = of_fwnode_get,
+	.put = of_fwnode_put,
+	.property_present = of_fwnode_property_present,
+	.property_read_int_array = of_fwnode_property_read_int_array,
+	.property_read_string_array = of_fwnode_property_read_string_array,
+	.get_parent = of_fwnode_get_parent,
+	.get_next_child_node = of_fwnode_get_next_child_node,
+	.get_named_child_node = of_fwnode_get_named_child_node,
+};
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 137e4a3d89c52..b8f23c521b673 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -56,6 +56,9 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
 	acpi_fwnode_handle(adev) : NULL)
 #define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
 
+
+extern const struct fwnode_operations acpi_fwnode_ops;
+
 static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
 {
 	struct fwnode_handle *fwnode;
@@ -65,6 +68,7 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
 		return NULL;
 
 	fwnode->type = FWNODE_ACPI_STATIC;
+	fwnode->ops = &acpi_fwnode_ops;
 
 	return fwnode;
 }
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 3dff2398a5f0b..8f64b3ae9c579 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -12,6 +12,8 @@
 #ifndef _LINUX_FWNODE_H_
 #define _LINUX_FWNODE_H_
 
+#include <linux/types.h>
+
 enum fwnode_type {
 	FWNODE_INVALID = 0,
 	FWNODE_OF,
@@ -22,9 +24,12 @@ enum fwnode_type {
 	FWNODE_IRQCHIP
 };
 
+struct fwnode_operations;
+
 struct fwnode_handle {
 	enum fwnode_type type;
 	struct fwnode_handle *secondary;
+	const struct fwnode_operations *ops;
 };
 
 /**
@@ -39,4 +44,53 @@ struct fwnode_endpoint {
 	const struct fwnode_handle *local_fwnode;
 };
 
+/**
+ * struct fwnode_operations - Operations for fwnode interface
+ * @get: Get a reference to an fwnode.
+ * @put: Put a reference to an fwnode.
+ * @property_present: Return true if a property is present.
+ * @property_read_integer_array: Read an array of integer properties. Return
+ *				 zero on success, a negative error code
+ *				 otherwise.
+ * @property_read_string_array: Read an array of string properties. Return zero
+ *				on success, a negative error code otherwise.
+ * @get_parent: Return the parent of an fwnode.
+ * @get_next_child_node: Return the next child node in an iteration.
+ * @get_named_child_node: Return a child node with a given name.
+ */
+struct fwnode_operations {
+	void (*get)(struct fwnode_handle *fwnode);
+	void (*put)(struct fwnode_handle *fwnode);
+	bool (*property_present)(struct fwnode_handle *fwnode,
+				 const char *propname);
+	int (*property_read_int_array)(struct fwnode_handle *fwnode,
+				       const char *propname,
+				       unsigned int elem_size, void *val,
+				       size_t nval);
+	int (*property_read_string_array)(struct fwnode_handle *fwnode_handle,
+					  const char *propname,
+					  const char **val, size_t nval);
+	struct fwnode_handle *(*get_parent)(struct fwnode_handle *fwnode);
+	struct fwnode_handle *
+	(*get_next_child_node)(struct fwnode_handle *fwnode,
+			       struct fwnode_handle *child);
+	struct fwnode_handle *
+	(*get_named_child_node)(struct fwnode_handle *fwnode, const char *name);
+};
+
+#define fwnode_has_op(fwnode, op)				\
+	((fwnode) && (fwnode)->ops && (fwnode)->ops->op)
+#define fwnode_call_int_op(fwnode, op, ...)				\
+	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
+		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \
+	 -EINVAL)
+#define fwnode_call_ptr_op(fwnode, op, ...)		\
+	(fwnode_has_op(fwnode, op) ?			\
+	 (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL)
+#define fwnode_call_void_op(fwnode, op, ...)				\
+	do {								\
+		if (fwnode_has_op(fwnode, op))				\
+			(fwnode)->ops->op(fwnode, ## __VA_ARGS__);	\
+	} while (false)
+
 #endif
diff --git a/include/linux/of.h b/include/linux/of.h
index 29b7b738b5090..cdbfa88c32cfa 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -100,10 +100,12 @@ struct of_reconfig_data {
 
 /* initialize a node */
 extern struct kobj_type of_node_ktype;
+extern const struct fwnode_operations of_fwnode_ops;
 static inline void of_node_init(struct device_node *node)
 {
 	kobject_init(&node->kobj, &of_node_ktype);
 	node->fwnode.type = FWNODE_OF;
+	node->fwnode.ops = &of_fwnode_ops;
 }
 
 /* true when node is initialized */
-- 
GitLab


From 3b27d00e7b6d7c889d87fd00df600c495b968e30 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:38 +0300
Subject: [PATCH 0334/1958] device property: Move fwnode graph ops to firmware
 specific locations

Move firmware specific implementations of the fwnode graph operations to
firmware specific locations.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 40 ++++++++++++++++++
 drivers/base/property.c | 91 ++++-------------------------------------
 drivers/of/property.c   | 52 +++++++++++++++++++++++
 include/linux/fwnode.h  | 14 +++++++
 4 files changed, 114 insertions(+), 83 deletions(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 14013f635db61..a24ca61294eb5 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1179,6 +1179,42 @@ acpi_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
 	return NULL;
 }
 
+static struct fwnode_handle *
+acpi_fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
+				    struct fwnode_handle *prev)
+{
+	struct fwnode_handle *endpoint;
+
+	endpoint = acpi_graph_get_next_endpoint(fwnode, prev);
+	if (IS_ERR(endpoint))
+		return NULL;
+
+	return endpoint;
+}
+
+static struct fwnode_handle *
+acpi_fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *endpoint = NULL;
+
+	acpi_graph_get_remote_endpoint(fwnode, NULL, NULL, &endpoint);
+
+	return endpoint;
+}
+
+static int acpi_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
+					    struct fwnode_endpoint *endpoint)
+{
+	struct fwnode_handle *port_fwnode = fwnode_get_parent(fwnode);
+
+	endpoint->local_fwnode = fwnode;
+
+	fwnode_property_read_u32(port_fwnode, "port", &endpoint->port);
+	fwnode_property_read_u32(fwnode, "endpoint", &endpoint->id);
+
+	return 0;
+}
+
 const struct fwnode_operations acpi_fwnode_ops = {
 	.property_present = acpi_fwnode_property_present,
 	.property_read_int_array = acpi_fwnode_property_read_int_array,
@@ -1186,4 +1222,8 @@ const struct fwnode_operations acpi_fwnode_ops = {
 	.get_parent = acpi_node_get_parent,
 	.get_next_child_node = acpi_get_next_subnode,
 	.get_named_child_node = acpi_fwnode_get_named_child_node,
+	.graph_get_next_endpoint = acpi_fwnode_graph_get_next_endpoint,
+	.graph_get_remote_endpoint = acpi_fwnode_graph_get_remote_endpoint,
+	.graph_get_port_parent = acpi_node_get_parent,
+	.graph_parse_endpoint = acpi_fwnode_graph_parse_endpoint,
 };
diff --git a/drivers/base/property.c b/drivers/base/property.c
index fee0705e40e9e..e1a58ac8840ea 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1156,24 +1156,7 @@ struct fwnode_handle *
 fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
 			       struct fwnode_handle *prev)
 {
-	struct fwnode_handle *endpoint = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_graph_get_next_endpoint(to_of_node(fwnode),
-						  to_of_node(prev));
-
-		if (node)
-			endpoint = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		endpoint = acpi_graph_get_next_endpoint(fwnode, prev);
-		if (IS_ERR(endpoint))
-			endpoint = NULL;
-	}
-
-	return endpoint;
-
+	return fwnode_call_ptr_op(fwnode, graph_get_next_endpoint, prev);
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 
@@ -1186,22 +1169,12 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 struct fwnode_handle *
 fwnode_graph_get_remote_port_parent(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *parent = NULL;
+	struct fwnode_handle *port, *parent;
 
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
+	port = fwnode_graph_get_remote_port(fwnode);
+	parent = fwnode_call_ptr_op(port, graph_get_port_parent);
 
-		node = of_graph_get_remote_port_parent(to_of_node(fwnode));
-		if (node)
-			parent = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, &parent, NULL,
-						     NULL);
-		if (ret)
-			return NULL;
-	}
+	fwnode_handle_put(port);
 
 	return parent;
 }
@@ -1215,23 +1188,7 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port_parent);
  */
 struct fwnode_handle *fwnode_graph_get_remote_port(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *port = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_graph_get_remote_port(to_of_node(fwnode));
-		if (node)
-			port = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, NULL, &port, NULL);
-		if (ret)
-			return NULL;
-	}
-
-	return port;
+	return fwnode_get_next_parent(fwnode_graph_get_remote_endpoint(fwnode));
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port);
 
@@ -1244,25 +1201,7 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port);
 struct fwnode_handle *
 fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *endpoint = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_parse_phandle(to_of_node(fwnode), "remote-endpoint",
-					0);
-		if (node)
-			endpoint = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, NULL, NULL,
-						     &endpoint);
-		if (ret)
-			return NULL;
-	}
-
-	return endpoint;
+	return fwnode_call_ptr_op(fwnode, graph_get_remote_endpoint);
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 
@@ -1278,22 +1217,8 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 				struct fwnode_endpoint *endpoint)
 {
-	struct fwnode_handle *port_fwnode = fwnode_get_parent(fwnode);
-
 	memset(endpoint, 0, sizeof(*endpoint));
 
-	endpoint->local_fwnode = fwnode;
-
-	if (is_acpi_node(port_fwnode)) {
-		fwnode_property_read_u32(port_fwnode, "port", &endpoint->port);
-		fwnode_property_read_u32(fwnode, "endpoint", &endpoint->id);
-	} else {
-		fwnode_property_read_u32(port_fwnode, "reg", &endpoint->port);
-		fwnode_property_read_u32(fwnode, "reg", &endpoint->id);
-	}
-
-	fwnode_handle_put(port_fwnode);
-
-	return 0;
+	return fwnode_call_int_op(fwnode, graph_parse_endpoint, endpoint);
 }
 EXPORT_SYMBOL(fwnode_graph_parse_endpoint);
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 250859234fb02..e859e41e33f3a 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -844,6 +844,54 @@ of_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
 	return NULL;
 }
 
+static struct fwnode_handle *
+of_fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
+				  struct fwnode_handle *prev)
+{
+	return of_fwnode_handle(of_graph_get_next_endpoint(to_of_node(fwnode),
+							   to_of_node(prev)));
+}
+
+static struct fwnode_handle *
+of_fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
+{
+	return of_fwnode_handle(of_parse_phandle(to_of_node(fwnode),
+						 "remote-endpoint", 0));
+}
+
+static struct fwnode_handle *
+of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode)
+{
+	struct device_node *np;
+
+	/* Get the parent of the port */
+	np = of_get_next_parent(to_of_node(fwnode));
+	if (!np)
+		return NULL;
+
+	/* Is this the "ports" node? If not, it's the port parent. */
+	if (of_node_cmp(np->name, "ports"))
+		return of_fwnode_handle(np);
+
+	return of_fwnode_handle(of_get_next_parent(np));
+}
+
+static int of_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
+					  struct fwnode_endpoint *endpoint)
+{
+	struct device_node *node = to_of_node(fwnode);
+	struct device_node *port_node = of_get_parent(node);
+
+	endpoint->local_fwnode = fwnode;
+
+	of_property_read_u32(port_node, "reg", &endpoint->port);
+	of_property_read_u32(node, "reg", &endpoint->id);
+
+	of_node_put(port_node);
+
+	return 0;
+}
+
 const struct fwnode_operations of_fwnode_ops = {
 	.get = of_fwnode_get,
 	.put = of_fwnode_put,
@@ -853,4 +901,8 @@ const struct fwnode_operations of_fwnode_ops = {
 	.get_parent = of_fwnode_get_parent,
 	.get_next_child_node = of_fwnode_get_next_child_node,
 	.get_named_child_node = of_fwnode_get_named_child_node,
+	.graph_get_next_endpoint = of_fwnode_graph_get_next_endpoint,
+	.graph_get_remote_endpoint = of_fwnode_graph_get_remote_endpoint,
+	.graph_get_port_parent = of_fwnode_graph_get_port_parent,
+	.graph_parse_endpoint = of_fwnode_graph_parse_endpoint,
 };
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 8f64b3ae9c579..e315d867d631c 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -57,6 +57,11 @@ struct fwnode_endpoint {
  * @get_parent: Return the parent of an fwnode.
  * @get_next_child_node: Return the next child node in an iteration.
  * @get_named_child_node: Return a child node with a given name.
+ * @graph_get_next_endpoint: Return an endpoint node in an iteration.
+ * @graph_get_remote_endpoint: Return the remote endpoint node of a local
+ *			       endpoint node.
+ * @graph_get_port_parent: Return the parent node of a port node.
+ * @graph_parse_endpoint: Parse endpoint for port and endpoint id.
  */
 struct fwnode_operations {
 	void (*get)(struct fwnode_handle *fwnode);
@@ -76,6 +81,15 @@ struct fwnode_operations {
 			       struct fwnode_handle *child);
 	struct fwnode_handle *
 	(*get_named_child_node)(struct fwnode_handle *fwnode, const char *name);
+	struct fwnode_handle *
+	(*graph_get_next_endpoint)(struct fwnode_handle *fwnode,
+				   struct fwnode_handle *prev);
+	struct fwnode_handle *
+	(*graph_get_remote_endpoint)(struct fwnode_handle *fwnode);
+	struct fwnode_handle *
+	(*graph_get_port_parent)(struct fwnode_handle *fwnode);
+	int (*graph_parse_endpoint)(struct fwnode_handle *fwnode,
+				    struct fwnode_endpoint *endpoint);
 };
 
 #define fwnode_has_op(fwnode, op)				\
-- 
GitLab


From 2294b3af05e9b3fe0b84a78971e709037bd7593c Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:39 +0300
Subject: [PATCH 0335/1958] device property: Introduce
 fwnode_device_is_available()

Add fwnode_device_is_available() to tell whether the device corresponding
to a certain fwnode_handle is available for use.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c  |  9 +++++++++
 drivers/base/property.c  | 10 ++++++++++
 drivers/of/property.c    |  6 ++++++
 include/linux/fwnode.h   |  1 +
 include/linux/property.h |  1 +
 5 files changed, 27 insertions(+)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index a24ca61294eb5..917c789f953df 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1121,6 +1121,14 @@ int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode,
 	return 0;
 }
 
+static bool acpi_fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	if (!is_acpi_device_node(fwnode))
+		return false;
+
+	return acpi_device_is_present(to_acpi_device_node(fwnode));
+}
+
 static bool acpi_fwnode_property_present(struct fwnode_handle *fwnode,
 					 const char *propname)
 {
@@ -1216,6 +1224,7 @@ static int acpi_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 }
 
 const struct fwnode_operations acpi_fwnode_ops = {
+	.device_is_available = acpi_fwnode_device_is_available,
 	.property_present = acpi_fwnode_property_present,
 	.property_read_int_array = acpi_fwnode_property_read_int_array,
 	.property_read_string_array = acpi_fwnode_property_read_string_array,
diff --git a/drivers/base/property.c b/drivers/base/property.c
index e1a58ac8840ea..b979f8a2f4fbc 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1021,6 +1021,16 @@ void fwnode_handle_put(struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_put);
 
+/**
+ * fwnode_device_is_available - check if a device is available for use
+ * @fwnode: Pointer to the fwnode of the device.
+ */
+bool fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	return fwnode_call_int_op(fwnode, device_is_available);
+}
+EXPORT_SYMBOL_GPL(fwnode_device_is_available);
+
 /**
  * device_get_child_node_count - return the number of child nodes for device
  * @dev: Device to cound the child nodes for
diff --git a/drivers/of/property.c b/drivers/of/property.c
index e859e41e33f3a..c96389b7c6b32 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -775,6 +775,11 @@ static void of_fwnode_put(struct fwnode_handle *fwnode)
 	of_node_put(to_of_node(fwnode));
 }
 
+static bool of_fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	return of_device_is_available(to_of_node(fwnode));
+}
+
 static bool of_fwnode_property_present(struct fwnode_handle *fwnode,
 				       const char *propname)
 {
@@ -895,6 +900,7 @@ static int of_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 const struct fwnode_operations of_fwnode_ops = {
 	.get = of_fwnode_get,
 	.put = of_fwnode_put,
+	.device_is_available = of_fwnode_device_is_available,
 	.property_present = of_fwnode_property_present,
 	.property_read_int_array = of_fwnode_property_read_int_array,
 	.property_read_string_array = of_fwnode_property_read_string_array,
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index e315d867d631c..9ab3754191892 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -66,6 +66,7 @@ struct fwnode_endpoint {
 struct fwnode_operations {
 	void (*get)(struct fwnode_handle *fwnode);
 	void (*put)(struct fwnode_handle *fwnode);
+	bool (*device_is_available)(struct fwnode_handle *fwnode);
 	bool (*property_present)(struct fwnode_handle *fwnode,
 				 const char *propname);
 	int (*property_read_int_array)(struct fwnode_handle *fwnode,
diff --git a/include/linux/property.h b/include/linux/property.h
index 2f482616a2f22..7be014af78ed7 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -51,6 +51,7 @@ int device_property_read_string(struct device *dev, const char *propname,
 int device_property_match_string(struct device *dev,
 				 const char *propname, const char *string);
 
+bool fwnode_device_is_available(struct fwnode_handle *fwnode);
 bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname);
 int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
 				  const char *propname, u8 *val,
-- 
GitLab


From 125ee6b3b0fa920c730b0991e6f083a9f5b1e4c3 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 6 Jun 2017 12:37:40 +0300
Subject: [PATCH 0336/1958] device property: Add FW type agnostic
 fwnode_graph_get_remote_node

Add fwnode_graph_get_remote_node() function which is equivalent to
of_graph_get_remote_node() on OF.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/property.h |  2 ++
 2 files changed, 39 insertions(+)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index b979f8a2f4fbc..ac3590b1f93d0 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1215,6 +1215,43 @@ fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 
+/**
+ * fwnode_graph_get_remote_node - get remote parent node for given port/endpoint
+ * @fwnode: pointer to parent fwnode_handle containing graph port/endpoint
+ * @port_id: identifier of the parent port node
+ * @endpoint_id: identifier of the endpoint node
+ *
+ * Return: Remote fwnode handle associated with remote endpoint node linked
+ *	   to @node. Use fwnode_node_put() on it when done.
+ */
+struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode,
+						   u32 port_id, u32 endpoint_id)
+{
+	struct fwnode_handle *endpoint = NULL;
+
+	while ((endpoint = fwnode_graph_get_next_endpoint(fwnode, endpoint))) {
+		struct fwnode_endpoint fwnode_ep;
+		struct fwnode_handle *remote;
+		int ret;
+
+		ret = fwnode_graph_parse_endpoint(endpoint, &fwnode_ep);
+		if (ret < 0)
+			continue;
+
+		if (fwnode_ep.port != port_id || fwnode_ep.id != endpoint_id)
+			continue;
+
+		remote = fwnode_graph_get_remote_port_parent(endpoint);
+		if (!remote)
+			return NULL;
+
+		return fwnode_device_is_available(remote) ? remote : NULL;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_node);
+
 /**
  * fwnode_graph_parse_endpoint - parse common endpoint node properties
  * @fwnode: pointer to endpoint fwnode_handle
diff --git a/include/linux/property.h b/include/linux/property.h
index 7be014af78ed7..0597a743aa66f 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -281,6 +281,8 @@ struct fwnode_handle *fwnode_graph_get_remote_port(
 	struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_endpoint(
 	struct fwnode_handle *fwnode);
+struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode,
+						   u32 port, u32 endpoint);
 
 int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 				struct fwnode_endpoint *endpoint);
-- 
GitLab


From 6a71d8d77795e0f7d887baa95bfc0d1d2bc74899 Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Date: Tue, 6 Jun 2017 12:37:41 +0300
Subject: [PATCH 0337/1958] device property: Add fwnode_graph_get_port_parent

Provide a helper to obtain the parent device fwnode without first
parsing the remote-endpoint as per fwnode_graph_get_remote_port_parent.

Signed-off-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 28 ++++++++++++++++++++++++----
 include/linux/property.h |  2 ++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index ac3590b1f93d0..692007e5a94b1 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1170,6 +1170,26 @@ fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 
+/**
+ * fwnode_graph_get_port_parent - Return the device fwnode of a port endpoint
+ * @endpoint: Endpoint firmware node of the port
+ *
+ * Return: the firmware node of the device the @endpoint belongs to.
+ */
+struct fwnode_handle *
+fwnode_graph_get_port_parent(struct fwnode_handle *endpoint)
+{
+	struct fwnode_handle *port, *parent;
+
+	port = fwnode_get_parent(endpoint);
+	parent = fwnode_call_ptr_op(port, graph_get_port_parent);
+
+	fwnode_handle_put(port);
+
+	return parent;
+}
+EXPORT_SYMBOL_GPL(fwnode_graph_get_port_parent);
+
 /**
  * fwnode_graph_get_remote_port_parent - Return fwnode of a remote device
  * @fwnode: Endpoint firmware node pointing to the remote endpoint
@@ -1179,12 +1199,12 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 struct fwnode_handle *
 fwnode_graph_get_remote_port_parent(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *port, *parent;
+	struct fwnode_handle *endpoint, *parent;
 
-	port = fwnode_graph_get_remote_port(fwnode);
-	parent = fwnode_call_ptr_op(port, graph_get_port_parent);
+	endpoint = fwnode_graph_get_remote_endpoint(fwnode);
+	parent = fwnode_graph_get_port_parent(endpoint);
 
-	fwnode_handle_put(port);
+	fwnode_handle_put(endpoint);
 
 	return parent;
 }
diff --git a/include/linux/property.h b/include/linux/property.h
index 0597a743aa66f..7e77039e6b818 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -275,6 +275,8 @@ void *device_get_mac_address(struct device *dev, char *addr, int alen);
 
 struct fwnode_handle *fwnode_graph_get_next_endpoint(
 	struct fwnode_handle *fwnode, struct fwnode_handle *prev);
+struct fwnode_handle *
+fwnode_graph_get_port_parent(struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_port_parent(
 	struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_port(
-- 
GitLab


From 111237415393acdef9b4f700c747a2e92c9be62e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 16 Jun 2017 16:09:54 +0200
Subject: [PATCH 0338/1958] iommu/amd: Disable IOMMUs at boot if they are
 enabled

When booting, make sure the IOMMUs are disabled. They could
be previously enabled if we boot into a kexec or kdump
kernel. So make sure they are off.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index cf7896550e758..f123fa5a7adb3 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2318,6 +2318,9 @@ static int __init early_amd_iommu_init(void)
 	if (ret)
 		goto out;
 
+	/* Disable any previously enabled IOMMUs */
+	disable_iommus();
+
 	if (amd_iommu_irq_remap)
 		amd_iommu_irq_remap = check_ioapic_information();
 
-- 
GitLab


From 90b3eb03e1b2b2ece299c485ae9da0cd221e700d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 16 Jun 2017 16:09:55 +0200
Subject: [PATCH 0339/1958] iommu/amd: Rename free_on_init_error()

The function will also be used to free iommu resources when
amd_iommu=off was specified on the kernel command line. So
rename the function to reflect that.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index f123fa5a7adb3..74f4eea6be775 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2108,7 +2108,7 @@ static struct syscore_ops amd_iommu_syscore_ops = {
 	.resume = amd_iommu_resume,
 };
 
-static void __init free_on_init_error(void)
+static void __init free_iommu_resources(void)
 {
 	kmemleak_free(irq_lookup_table);
 	free_pages((unsigned long)irq_lookup_table,
@@ -2536,7 +2536,7 @@ static int __init amd_iommu_init(void)
 		free_dma_resources();
 		if (!irq_remapping_enabled) {
 			disable_iommus();
-			free_on_init_error();
+			free_iommu_resources();
 		} else {
 			struct amd_iommu *iommu;
 
-- 
GitLab


From 1b1e942e344d3d41de187c1275024e915cb15844 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 16 Jun 2017 16:09:56 +0200
Subject: [PATCH 0340/1958] iommu/amd: Add new init-state
 IOMMU_CMDLINE_DISABLED

This will be used when during initialization we detect that
the iommu should be disabled.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 74f4eea6be775..df9ec85271f56 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -237,6 +237,7 @@ enum iommu_init_state {
 	IOMMU_INITIALIZED,
 	IOMMU_NOT_FOUND,
 	IOMMU_INIT_ERROR,
+	IOMMU_CMDLINE_DISABLED,
 };
 
 /* Early ioapic and hpet maps from kernel command line */
@@ -2452,6 +2453,7 @@ static int __init state_next(void)
 		break;
 	case IOMMU_NOT_FOUND:
 	case IOMMU_INIT_ERROR:
+	case IOMMU_CMDLINE_DISABLED:
 		/* Error states => do nothing */
 		ret = -EINVAL;
 		break;
@@ -2469,8 +2471,9 @@ static int __init iommu_go_to_state(enum iommu_init_state state)
 
 	while (init_state != state) {
 		ret = state_next();
-		if (init_state == IOMMU_NOT_FOUND ||
-		    init_state == IOMMU_INIT_ERROR)
+		if (init_state == IOMMU_NOT_FOUND         ||
+		    init_state == IOMMU_INIT_ERROR        ||
+		    init_state == IOMMU_CMDLINE_DISABLED)
 			break;
 	}
 
-- 
GitLab


From 151b09031a76ba6b6b83f94953074d6f10aa30b3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 16 Jun 2017 16:09:57 +0200
Subject: [PATCH 0341/1958] iommu/amd: Check for error states first in
 iommu_go_to_state()

Check if we are in an error state already before calling
into state_next().

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index df9ec85271f56..a6b81a05a0d1e 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2467,14 +2467,14 @@ static int __init state_next(void)
 
 static int __init iommu_go_to_state(enum iommu_init_state state)
 {
-	int ret = 0;
+	int ret = -EINVAL;
 
 	while (init_state != state) {
-		ret = state_next();
 		if (init_state == IOMMU_NOT_FOUND         ||
 		    init_state == IOMMU_INIT_ERROR        ||
 		    init_state == IOMMU_CMDLINE_DISABLED)
 			break;
+		ret = state_next();
 	}
 
 	return ret;
-- 
GitLab


From f601927136d69be49f0a14ae820b44c02fa591ba Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 16 Jun 2017 16:09:58 +0200
Subject: [PATCH 0342/1958] iommu/amd: Set global pointers to NULL after
 freeing them

Avoid any tries to double-free these pointers.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index a6b81a05a0d1e..8cc507f96f3a8 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2114,18 +2114,22 @@ static void __init free_iommu_resources(void)
 	kmemleak_free(irq_lookup_table);
 	free_pages((unsigned long)irq_lookup_table,
 		   get_order(rlookup_table_size));
+	irq_lookup_table = NULL;
 
 	kmem_cache_destroy(amd_iommu_irq_cache);
 	amd_iommu_irq_cache = NULL;
 
 	free_pages((unsigned long)amd_iommu_rlookup_table,
 		   get_order(rlookup_table_size));
+	amd_iommu_rlookup_table = NULL;
 
 	free_pages((unsigned long)amd_iommu_alias_table,
 		   get_order(alias_table_size));
+	amd_iommu_alias_table = NULL;
 
 	free_pages((unsigned long)amd_iommu_dev_table,
 		   get_order(dev_table_size));
+	amd_iommu_dev_table = NULL;
 
 	free_iommu_all();
 
@@ -2195,6 +2199,7 @@ static void __init free_dma_resources(void)
 {
 	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
 		   get_order(MAX_DOMAIN_ID/8));
+	amd_iommu_pd_alloc_bitmap = NULL;
 
 	free_unity_maps();
 }
-- 
GitLab


From 7ad820e43330bbf974792c5ab4e2c2355e08d597 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 16 Jun 2017 16:09:59 +0200
Subject: [PATCH 0343/1958] iommu/amd: Free IOMMU resources when disabled on
 command line

After we made sure that all IOMMUs have been disabled we
need to make sure that all resources we allocated are
released again.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 8cc507f96f3a8..128f9665c326a 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2430,6 +2430,13 @@ static int __init state_next(void)
 	case IOMMU_IVRS_DETECTED:
 		ret = early_amd_iommu_init();
 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
+		if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
+			pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n");
+			free_dma_resources();
+			free_iommu_resources();
+			init_state = IOMMU_CMDLINE_DISABLED;
+			ret = -EINVAL;
+		}
 		break;
 	case IOMMU_ACPI_FINISHED:
 		early_enable_iommus();
-- 
GitLab


From ffa080ebb5405b27b0b84a3a75c9cdf4ed3d28da Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 16 Jun 2017 16:10:00 +0200
Subject: [PATCH 0344/1958] iommu/amd: Remove amd_iommu_disabled check from
 amd_iommu_detect()

This check needs to happens later now, when all previously
enabled IOMMUs have been disabled.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 128f9665c326a..5cc597b383c72 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2578,9 +2578,6 @@ int __init amd_iommu_detect(void)
 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
 		return -ENODEV;
 
-	if (amd_iommu_disabled)
-		return -ENODEV;
-
 	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
 	if (ret)
 		return ret;
-- 
GitLab


From 9ce3a72cd7f7e0b9ba1c5952e4461b363824bca9 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 22 Jun 2017 12:16:33 +0200
Subject: [PATCH 0345/1958] iommu/amd: Free already flushed ring-buffer entries
 before full-check

To benefit from IOTLB flushes on other CPUs we have to free
the already flushed IOVAs from the ring-buffer before we do
the queue_ring_full() check.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 95ee360a51991..c618c26a3b334 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1910,15 +1910,21 @@ static void queue_add(struct dma_ops_domain *dom,
 	queue = get_cpu_ptr(dom->flush_queue);
 	spin_lock_irqsave(&queue->lock, flags);
 
+	/*
+	 * First remove the enries from the ring-buffer that are already
+	 * flushed to make the below queue_ring_full() check less likely
+	 */
+	queue_ring_free_flushed(dom, queue);
+
 	/*
 	 * When ring-queue is full, flush the entries from the IOTLB so
 	 * that we can free all entries with queue_ring_free_flushed()
 	 * below.
 	 */
-	if (queue_ring_full(queue))
+	if (queue_ring_full(queue)) {
 		dma_ops_domain_flush_tlb(dom);
-
-	queue_ring_free_flushed(dom, queue);
+		queue_ring_free_flushed(dom, queue);
+	}
 
 	idx = queue_ring_add(queue);
 
-- 
GitLab


From fe496e23b74852cbb2df7e0a6e26752131c41bb6 Mon Sep 17 00:00:00 2001
From: Tom Rini <trini@konsulko.com>
Date: Wed, 21 Jun 2017 08:22:06 -0400
Subject: [PATCH 0346/1958] dt-bindings: mtd: elm: Correct compatible string
 requirement

The binding says that the compatible string must be "ti,am33xx-elm"
but the code checks only for, and all functioning users set, this as
"ti,am3352-elm" so correct the binding.

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Tom Rini <trini@konsulko.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 Documentation/devicetree/bindings/mtd/elm.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mtd/elm.txt b/Documentation/devicetree/bindings/mtd/elm.txt
index 8c1528c421d47..59ddc61c10768 100644
--- a/Documentation/devicetree/bindings/mtd/elm.txt
+++ b/Documentation/devicetree/bindings/mtd/elm.txt
@@ -1,7 +1,7 @@
 Error location module
 
 Required properties:
-- compatible: Must be "ti,am33xx-elm"
+- compatible: Must be "ti,am3352-elm"
 - reg: physical base address and size of the registers map.
 - interrupts: Interrupt number for the elm.
 
-- 
GitLab


From a7adb70a73b7cb220f4515745d2671d2226e0097 Mon Sep 17 00:00:00 2001
From: Tom Rini <trini@konsulko.com>
Date: Wed, 21 Jun 2017 08:14:54 -0400
Subject: [PATCH 0347/1958] dt-bindings: gpmc: Correct location of generic gpmc
 binding

The binding bus/ti-gpmc.txt has been moved to
memory-controllers/omap-gpmc.txt.  Update all references to this in
order to make reading and understanding a given binding easier.

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc:Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Tom Rini <trini@konsulko.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 Documentation/devicetree/bindings/mtd/gpmc-nand.txt    | 2 +-
 Documentation/devicetree/bindings/mtd/gpmc-nor.txt     | 4 ++--
 Documentation/devicetree/bindings/mtd/gpmc-onenand.txt | 2 +-
 Documentation/devicetree/bindings/net/gpmc-eth.txt     | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
index 174f68c26c1b2..dd559045593d7 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
@@ -5,7 +5,7 @@ the GPMC controller with a name of "nand".
 
 All timing relevant properties as well as generic gpmc child properties are
 explained in a separate documents - please refer to
-Documentation/devicetree/bindings/bus/ti-gpmc.txt
+Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 For NAND specific properties such as ECC modes or bus width, please refer to
 Documentation/devicetree/bindings/mtd/nand.txt
diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
index 4828c17bb784b..131d3a74d0bd4 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
@@ -5,7 +5,7 @@ child nodes of the GPMC controller with a name of "nor".
 
 All timing relevant properties as well as generic GPMC child properties are
 explained in a separate documents. Please refer to
-Documentation/devicetree/bindings/bus/ti-gpmc.txt
+Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 Required properties:
 - bank-width: 		Width of NOR flash in bytes. GPMC supports 8-bit and
@@ -28,7 +28,7 @@ Required properties:
 
 Optional properties:
 - gpmc,XXX		Additional GPMC timings and settings parameters. See
-			Documentation/devicetree/bindings/bus/ti-gpmc.txt
+			Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 Optional properties for partition table parsing:
 - #address-cells: should be set to 1
diff --git a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
index 5d8fa527c496a..b6e8bfd024f46 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
@@ -5,7 +5,7 @@ the GPMC controller with a name of "onenand".
 
 All timing relevant properties as well as generic gpmc child properties are
 explained in a separate documents - please refer to
-Documentation/devicetree/bindings/bus/ti-gpmc.txt
+Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 Required properties:
 
diff --git a/Documentation/devicetree/bindings/net/gpmc-eth.txt b/Documentation/devicetree/bindings/net/gpmc-eth.txt
index ace4a64b36959..f7da3d73ca1b2 100644
--- a/Documentation/devicetree/bindings/net/gpmc-eth.txt
+++ b/Documentation/devicetree/bindings/net/gpmc-eth.txt
@@ -9,7 +9,7 @@ the GPMC controller with an "ethernet" name.
 
 All timing relevant properties as well as generic GPMC child properties are
 explained in a separate documents. Please refer to
-Documentation/devicetree/bindings/bus/ti-gpmc.txt
+Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 For the properties relevant to the ethernet controller connected to the GPMC
 refer to the binding documentation of the device. For example, the documentation
@@ -43,7 +43,7 @@ Required properties:
 
 Optional properties:
 - gpmc,XXX		Additional GPMC timings and settings parameters. See
-			Documentation/devicetree/bindings/bus/ti-gpmc.txt
+			Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 Example:
 
-- 
GitLab


From a57ce439505da3801e264656a3bdf746505c77ec Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 21 Jun 2017 18:23:11 +0900
Subject: [PATCH 0348/1958] MAINTAINERS: add entry for Denali NAND controller
 driver

The Denali NAND controller driver (drivers/mtd/nand/denali*) has been
largely reworked by me.  Add myself as its maintainer now.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f7d568b8f133d..4abec10fe6309 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3888,6 +3888,12 @@ M:	Pali Rohár <pali.rohar@gmail.com>
 S:	Maintained
 F:	drivers/platform/x86/dell-wmi.c
 
+DENALI NAND DRIVER
+M:	Masahiro Yamada <yamada.masahiro@socionext.com>
+L:	linux-mtd@lists.infradead.org
+S:	Supported
+F:	drivers/mtd/nand/denali*
+
 DESIGNWARE USB2 DRD IP DRIVER
 M:	John Youn <johnyoun@synopsys.com>
 L:	linux-usb@vger.kernel.org
-- 
GitLab


From 08263a9ae664b24fa777d20b365601534842b236 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 21 Jun 2017 08:26:42 +0200
Subject: [PATCH 0349/1958] mtd: partitions: add helper for deleting partition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are two similar functions handling deletion. One handles single
partition and another the whole MTD flash device. They share (duplicate)
some code so it makes sense to add a small helper for that part.

Function del_mtd_partitions has been moved a bit to keep all deleting
stuff together.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c | 75 +++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 32 deletions(-)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 7ececd3d77993..bcec72148d0b5 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -363,32 +363,6 @@ static inline void free_partition(struct mtd_part *p)
 	kfree(p);
 }
 
-/*
- * This function unregisters and destroy all slave MTD objects which are
- * attached to the given master MTD object.
- */
-
-int del_mtd_partitions(struct mtd_info *master)
-{
-	struct mtd_part *slave, *next;
-	int ret, err = 0;
-
-	mutex_lock(&mtd_partitions_mutex);
-	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
-		if (slave->master == master) {
-			ret = del_mtd_device(&slave->mtd);
-			if (ret < 0) {
-				err = ret;
-				continue;
-			}
-			list_del(&slave->list);
-			free_partition(slave);
-		}
-	mutex_unlock(&mtd_partitions_mutex);
-
-	return err;
-}
-
 static struct mtd_part *allocate_partition(struct mtd_info *master,
 			const struct mtd_partition *part, int partno,
 			uint64_t cur_offset)
@@ -675,6 +649,48 @@ int mtd_add_partition(struct mtd_info *master, const char *name,
 }
 EXPORT_SYMBOL_GPL(mtd_add_partition);
 
+/**
+ * __mtd_del_partition - delete MTD partition
+ *
+ * @priv: internal MTD struct for partition to be deleted
+ *
+ * This function must be called with the partitions mutex locked.
+ */
+static int __mtd_del_partition(struct mtd_part *priv)
+{
+	int err;
+
+	err = del_mtd_device(&priv->mtd);
+	if (err)
+		return err;
+
+	list_del(&priv->list);
+	free_partition(priv);
+
+	return 0;
+}
+
+/*
+ * This function unregisters and destroy all slave MTD objects which are
+ * attached to the given master MTD object.
+ */
+int del_mtd_partitions(struct mtd_info *master)
+{
+	struct mtd_part *slave, *next;
+	int ret, err = 0;
+
+	mutex_lock(&mtd_partitions_mutex);
+	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
+		if (slave->master == master) {
+			ret = __mtd_del_partition(slave);
+			if (ret < 0)
+				err = ret;
+		}
+	mutex_unlock(&mtd_partitions_mutex);
+
+	return err;
+}
+
 int mtd_del_partition(struct mtd_info *master, int partno)
 {
 	struct mtd_part *slave, *next;
@@ -686,12 +702,7 @@ int mtd_del_partition(struct mtd_info *master, int partno)
 		    (slave->mtd.index == partno)) {
 			sysfs_remove_files(&slave->mtd.dev.kobj,
 					   mtd_partition_attrs);
-			ret = del_mtd_device(&slave->mtd);
-			if (ret < 0)
-				break;
-
-			list_del(&slave->list);
-			free_partition(slave);
+			ret = __mtd_del_partition(slave);
 			break;
 		}
 	mutex_unlock(&mtd_partitions_mutex);
-- 
GitLab


From c5ceaba74083daf619bdb34d4871e297a177eebf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 21 Jun 2017 08:26:43 +0200
Subject: [PATCH 0350/1958] mtd: partitions: remove sysfs files when deleting
 all master's partitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When support for sysfs "offset" file was added it missed to update the
del_mtd_partitions function. It deletes partitions just like
mtd_del_partition does so both should also take care of removing sysfs
files.

This change moves sysfs_remove_files call to the shared function to fix
this issue.

Fixes: a62c24d755291 ("mtd: part: Add sysfs variable for offset of partition")
Cc: Dan Ehrenberg <dehrenberg@chromium.org>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index bcec72148d0b5..9434050accc79 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -660,6 +660,8 @@ static int __mtd_del_partition(struct mtd_part *priv)
 {
 	int err;
 
+	sysfs_remove_files(&priv->mtd.dev.kobj, mtd_partition_attrs);
+
 	err = del_mtd_device(&priv->mtd);
 	if (err)
 		return err;
@@ -700,8 +702,6 @@ int mtd_del_partition(struct mtd_info *master, int partno)
 	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
 		if ((slave->master == master) &&
 		    (slave->mtd.index == partno)) {
-			sysfs_remove_files(&slave->mtd.dev.kobj,
-					   mtd_partition_attrs);
 			ret = __mtd_del_partition(slave);
 			break;
 		}
-- 
GitLab


From 0a9d72b69da6d8dae1abd7990c6c4c749846ef3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 21 Jun 2017 08:26:44 +0200
Subject: [PATCH 0351/1958] mtd: partitions: rename "master" to the "parent"
 where appropriate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This prepares mtd subsystem for the new feature: subpartitions. In some
cases flash device partition can be a container with extra subpartitions
(volumes).

So far there was a flat structure implemented. One master (flash device)
could be partitioned into few partitions. Every partition got its master
and it was enough to get things running.

To support subpartitions we need to store pointer to the parent for each
partition. This is required to implement more natural tree structure and
handle all recursion and offsets calculation.

To make code consistent this patch renamed "master" to the "parent" in
places where we can be dealing with subpartitions.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c | 204 ++++++++++++++++++++++--------------------
 1 file changed, 105 insertions(+), 99 deletions(-)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 9434050accc79..208822040735b 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -37,10 +37,16 @@
 static LIST_HEAD(mtd_partitions);
 static DEFINE_MUTEX(mtd_partitions_mutex);
 
-/* Our partition node structure */
+/**
+ * struct mtd_part - our partition node structure
+ *
+ * @mtd: struct holding partition details
+ * @parent: parent mtd - flash device or another partition
+ * @offset: partition offset relative to the *flash device*
+ */
 struct mtd_part {
 	struct mtd_info mtd;
-	struct mtd_info *master;
+	struct mtd_info *parent;
 	uint64_t offset;
 	struct list_head list;
 };
@@ -67,15 +73,15 @@ static int part_read(struct mtd_info *mtd, loff_t from, size_t len,
 	struct mtd_ecc_stats stats;
 	int res;
 
-	stats = part->master->ecc_stats;
-	res = part->master->_read(part->master, from + part->offset, len,
+	stats = part->parent->ecc_stats;
+	res = part->parent->_read(part->parent, from + part->offset, len,
 				  retlen, buf);
 	if (unlikely(mtd_is_eccerr(res)))
 		mtd->ecc_stats.failed +=
-			part->master->ecc_stats.failed - stats.failed;
+			part->parent->ecc_stats.failed - stats.failed;
 	else
 		mtd->ecc_stats.corrected +=
-			part->master->ecc_stats.corrected - stats.corrected;
+			part->parent->ecc_stats.corrected - stats.corrected;
 	return res;
 }
 
@@ -84,7 +90,7 @@ static int part_point(struct mtd_info *mtd, loff_t from, size_t len,
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return part->master->_point(part->master, from + part->offset, len,
+	return part->parent->_point(part->parent, from + part->offset, len,
 				    retlen, virt, phys);
 }
 
@@ -92,7 +98,7 @@ static int part_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return part->master->_unpoint(part->master, from + part->offset, len);
+	return part->parent->_unpoint(part->parent, from + part->offset, len);
 }
 
 static unsigned long part_get_unmapped_area(struct mtd_info *mtd,
@@ -103,7 +109,7 @@ static unsigned long part_get_unmapped_area(struct mtd_info *mtd,
 	struct mtd_part *part = mtd_to_part(mtd);
 
 	offset += part->offset;
-	return part->master->_get_unmapped_area(part->master, len, offset,
+	return part->parent->_get_unmapped_area(part->parent, len, offset,
 						flags);
 }
 
@@ -132,7 +138,7 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from,
 			return -EINVAL;
 	}
 
-	res = part->master->_read_oob(part->master, from + part->offset, ops);
+	res = part->parent->_read_oob(part->parent, from + part->offset, ops);
 	if (unlikely(res)) {
 		if (mtd_is_bitflip(res))
 			mtd->ecc_stats.corrected++;
@@ -146,7 +152,7 @@ static int part_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_read_user_prot_reg(part->master, from, len,
+	return part->parent->_read_user_prot_reg(part->parent, from, len,
 						 retlen, buf);
 }
 
@@ -154,7 +160,7 @@ static int part_get_user_prot_info(struct mtd_info *mtd, size_t len,
 				   size_t *retlen, struct otp_info *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_get_user_prot_info(part->master, len, retlen,
+	return part->parent->_get_user_prot_info(part->parent, len, retlen,
 						 buf);
 }
 
@@ -162,7 +168,7 @@ static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_read_fact_prot_reg(part->master, from, len,
+	return part->parent->_read_fact_prot_reg(part->parent, from, len,
 						 retlen, buf);
 }
 
@@ -170,7 +176,7 @@ static int part_get_fact_prot_info(struct mtd_info *mtd, size_t len,
 				   size_t *retlen, struct otp_info *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_get_fact_prot_info(part->master, len, retlen,
+	return part->parent->_get_fact_prot_info(part->parent, len, retlen,
 						 buf);
 }
 
@@ -178,7 +184,7 @@ static int part_write(struct mtd_info *mtd, loff_t to, size_t len,
 		size_t *retlen, const u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_write(part->master, to + part->offset, len,
+	return part->parent->_write(part->parent, to + part->offset, len,
 				    retlen, buf);
 }
 
@@ -186,7 +192,7 @@ static int part_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 		size_t *retlen, const u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_panic_write(part->master, to + part->offset, len,
+	return part->parent->_panic_write(part->parent, to + part->offset, len,
 					  retlen, buf);
 }
 
@@ -199,14 +205,14 @@ static int part_write_oob(struct mtd_info *mtd, loff_t to,
 		return -EINVAL;
 	if (ops->datbuf && to + ops->len > mtd->size)
 		return -EINVAL;
-	return part->master->_write_oob(part->master, to + part->offset, ops);
+	return part->parent->_write_oob(part->parent, to + part->offset, ops);
 }
 
 static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_write_user_prot_reg(part->master, from, len,
+	return part->parent->_write_user_prot_reg(part->parent, from, len,
 						  retlen, buf);
 }
 
@@ -214,14 +220,14 @@ static int part_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_lock_user_prot_reg(part->master, from, len);
+	return part->parent->_lock_user_prot_reg(part->parent, from, len);
 }
 
 static int part_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		unsigned long count, loff_t to, size_t *retlen)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_writev(part->master, vecs, count,
+	return part->parent->_writev(part->parent, vecs, count,
 				     to + part->offset, retlen);
 }
 
@@ -231,7 +237,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
 	int ret;
 
 	instr->addr += part->offset;
-	ret = part->master->_erase(part->master, instr);
+	ret = part->parent->_erase(part->parent, instr);
 	if (ret) {
 		if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 			instr->fail_addr -= part->offset;
@@ -257,51 +263,51 @@ EXPORT_SYMBOL_GPL(mtd_erase_callback);
 static int part_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_lock(part->master, ofs + part->offset, len);
+	return part->parent->_lock(part->parent, ofs + part->offset, len);
 }
 
 static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_unlock(part->master, ofs + part->offset, len);
+	return part->parent->_unlock(part->parent, ofs + part->offset, len);
 }
 
 static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_is_locked(part->master, ofs + part->offset, len);
+	return part->parent->_is_locked(part->parent, ofs + part->offset, len);
 }
 
 static void part_sync(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	part->master->_sync(part->master);
+	part->parent->_sync(part->parent);
 }
 
 static int part_suspend(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_suspend(part->master);
+	return part->parent->_suspend(part->parent);
 }
 
 static void part_resume(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	part->master->_resume(part->master);
+	part->parent->_resume(part->parent);
 }
 
 static int part_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 	ofs += part->offset;
-	return part->master->_block_isreserved(part->master, ofs);
+	return part->parent->_block_isreserved(part->parent, ofs);
 }
 
 static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 	ofs += part->offset;
-	return part->master->_block_isbad(part->master, ofs);
+	return part->parent->_block_isbad(part->parent, ofs);
 }
 
 static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
@@ -310,7 +316,7 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	int res;
 
 	ofs += part->offset;
-	res = part->master->_block_markbad(part->master, ofs);
+	res = part->parent->_block_markbad(part->parent, ofs);
 	if (!res)
 		mtd->ecc_stats.badblocks++;
 	return res;
@@ -319,13 +325,13 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
 static int part_get_device(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_get_device(part->master);
+	return part->parent->_get_device(part->parent);
 }
 
 static void part_put_device(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	part->master->_put_device(part->master);
+	part->parent->_put_device(part->parent);
 }
 
 static int part_ooblayout_ecc(struct mtd_info *mtd, int section,
@@ -333,7 +339,7 @@ static int part_ooblayout_ecc(struct mtd_info *mtd, int section,
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return mtd_ooblayout_ecc(part->master, section, oobregion);
+	return mtd_ooblayout_ecc(part->parent, section, oobregion);
 }
 
 static int part_ooblayout_free(struct mtd_info *mtd, int section,
@@ -341,7 +347,7 @@ static int part_ooblayout_free(struct mtd_info *mtd, int section,
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return mtd_ooblayout_free(part->master, section, oobregion);
+	return mtd_ooblayout_free(part->parent, section, oobregion);
 }
 
 static const struct mtd_ooblayout_ops part_ooblayout_ops = {
@@ -353,7 +359,7 @@ static int part_max_bad_blocks(struct mtd_info *mtd, loff_t ofs, size_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return part->master->_max_bad_blocks(part->master,
+	return part->parent->_max_bad_blocks(part->parent,
 					     ofs + part->offset, len);
 }
 
@@ -363,12 +369,12 @@ static inline void free_partition(struct mtd_part *p)
 	kfree(p);
 }
 
-static struct mtd_part *allocate_partition(struct mtd_info *master,
+static struct mtd_part *allocate_partition(struct mtd_info *parent,
 			const struct mtd_partition *part, int partno,
 			uint64_t cur_offset)
 {
-	int wr_alignment = (master->flags & MTD_NO_ERASE) ? master->writesize:
-							    master->erasesize;
+	int wr_alignment = (parent->flags & MTD_NO_ERASE) ? parent->writesize:
+							    parent->erasesize;
 	struct mtd_part *slave;
 	u32 remainder;
 	char *name;
@@ -379,25 +385,25 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 	name = kstrdup(part->name, GFP_KERNEL);
 	if (!name || !slave) {
 		printk(KERN_ERR"memory allocation error while creating partitions for \"%s\"\n",
-		       master->name);
+		       parent->name);
 		kfree(name);
 		kfree(slave);
 		return ERR_PTR(-ENOMEM);
 	}
 
 	/* set up the MTD object for this partition */
-	slave->mtd.type = master->type;
-	slave->mtd.flags = master->flags & ~part->mask_flags;
+	slave->mtd.type = parent->type;
+	slave->mtd.flags = parent->flags & ~part->mask_flags;
 	slave->mtd.size = part->size;
-	slave->mtd.writesize = master->writesize;
-	slave->mtd.writebufsize = master->writebufsize;
-	slave->mtd.oobsize = master->oobsize;
-	slave->mtd.oobavail = master->oobavail;
-	slave->mtd.subpage_sft = master->subpage_sft;
-	slave->mtd.pairing = master->pairing;
+	slave->mtd.writesize = parent->writesize;
+	slave->mtd.writebufsize = parent->writebufsize;
+	slave->mtd.oobsize = parent->oobsize;
+	slave->mtd.oobavail = parent->oobavail;
+	slave->mtd.subpage_sft = parent->subpage_sft;
+	slave->mtd.pairing = parent->pairing;
 
 	slave->mtd.name = name;
-	slave->mtd.owner = master->owner;
+	slave->mtd.owner = parent->owner;
 
 	/* NOTE: Historically, we didn't arrange MTDs as a tree out of
 	 * concern for showing the same data in multiple partitions.
@@ -408,70 +414,70 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 	 * distinguish between the master and the partition in sysfs.
 	 */
 	slave->mtd.dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ?
-				&master->dev :
-				master->dev.parent;
+				&parent->dev :
+				parent->dev.parent;
 	slave->mtd.dev.of_node = part->of_node;
 
 	slave->mtd._read = part_read;
 	slave->mtd._write = part_write;
 
-	if (master->_panic_write)
+	if (parent->_panic_write)
 		slave->mtd._panic_write = part_panic_write;
 
-	if (master->_point && master->_unpoint) {
+	if (parent->_point && parent->_unpoint) {
 		slave->mtd._point = part_point;
 		slave->mtd._unpoint = part_unpoint;
 	}
 
-	if (master->_get_unmapped_area)
+	if (parent->_get_unmapped_area)
 		slave->mtd._get_unmapped_area = part_get_unmapped_area;
-	if (master->_read_oob)
+	if (parent->_read_oob)
 		slave->mtd._read_oob = part_read_oob;
-	if (master->_write_oob)
+	if (parent->_write_oob)
 		slave->mtd._write_oob = part_write_oob;
-	if (master->_read_user_prot_reg)
+	if (parent->_read_user_prot_reg)
 		slave->mtd._read_user_prot_reg = part_read_user_prot_reg;
-	if (master->_read_fact_prot_reg)
+	if (parent->_read_fact_prot_reg)
 		slave->mtd._read_fact_prot_reg = part_read_fact_prot_reg;
-	if (master->_write_user_prot_reg)
+	if (parent->_write_user_prot_reg)
 		slave->mtd._write_user_prot_reg = part_write_user_prot_reg;
-	if (master->_lock_user_prot_reg)
+	if (parent->_lock_user_prot_reg)
 		slave->mtd._lock_user_prot_reg = part_lock_user_prot_reg;
-	if (master->_get_user_prot_info)
+	if (parent->_get_user_prot_info)
 		slave->mtd._get_user_prot_info = part_get_user_prot_info;
-	if (master->_get_fact_prot_info)
+	if (parent->_get_fact_prot_info)
 		slave->mtd._get_fact_prot_info = part_get_fact_prot_info;
-	if (master->_sync)
+	if (parent->_sync)
 		slave->mtd._sync = part_sync;
-	if (!partno && !master->dev.class && master->_suspend &&
-	    master->_resume) {
+	if (!partno && !parent->dev.class && parent->_suspend &&
+	    parent->_resume) {
 			slave->mtd._suspend = part_suspend;
 			slave->mtd._resume = part_resume;
 	}
-	if (master->_writev)
+	if (parent->_writev)
 		slave->mtd._writev = part_writev;
-	if (master->_lock)
+	if (parent->_lock)
 		slave->mtd._lock = part_lock;
-	if (master->_unlock)
+	if (parent->_unlock)
 		slave->mtd._unlock = part_unlock;
-	if (master->_is_locked)
+	if (parent->_is_locked)
 		slave->mtd._is_locked = part_is_locked;
-	if (master->_block_isreserved)
+	if (parent->_block_isreserved)
 		slave->mtd._block_isreserved = part_block_isreserved;
-	if (master->_block_isbad)
+	if (parent->_block_isbad)
 		slave->mtd._block_isbad = part_block_isbad;
-	if (master->_block_markbad)
+	if (parent->_block_markbad)
 		slave->mtd._block_markbad = part_block_markbad;
-	if (master->_max_bad_blocks)
+	if (parent->_max_bad_blocks)
 		slave->mtd._max_bad_blocks = part_max_bad_blocks;
 
-	if (master->_get_device)
+	if (parent->_get_device)
 		slave->mtd._get_device = part_get_device;
-	if (master->_put_device)
+	if (parent->_put_device)
 		slave->mtd._put_device = part_put_device;
 
 	slave->mtd._erase = part_erase;
-	slave->master = master;
+	slave->parent = parent;
 	slave->offset = part->offset;
 
 	if (slave->offset == MTDPART_OFS_APPEND)
@@ -489,25 +495,25 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 	}
 	if (slave->offset == MTDPART_OFS_RETAIN) {
 		slave->offset = cur_offset;
-		if (master->size - slave->offset >= slave->mtd.size) {
-			slave->mtd.size = master->size - slave->offset
+		if (parent->size - slave->offset >= slave->mtd.size) {
+			slave->mtd.size = parent->size - slave->offset
 							- slave->mtd.size;
 		} else {
 			printk(KERN_ERR "mtd partition \"%s\" doesn't have enough space: %#llx < %#llx, disabled\n",
-				part->name, master->size - slave->offset,
+				part->name, parent->size - slave->offset,
 				slave->mtd.size);
 			/* register to preserve ordering */
 			goto out_register;
 		}
 	}
 	if (slave->mtd.size == MTDPART_SIZ_FULL)
-		slave->mtd.size = master->size - slave->offset;
+		slave->mtd.size = parent->size - slave->offset;
 
 	printk(KERN_NOTICE "0x%012llx-0x%012llx : \"%s\"\n", (unsigned long long)slave->offset,
 		(unsigned long long)(slave->offset + slave->mtd.size), slave->mtd.name);
 
 	/* let's do some sanity checks */
-	if (slave->offset >= master->size) {
+	if (slave->offset >= parent->size) {
 		/* let's register it anyway to preserve ordering */
 		slave->offset = 0;
 		slave->mtd.size = 0;
@@ -515,16 +521,16 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 			part->name);
 		goto out_register;
 	}
-	if (slave->offset + slave->mtd.size > master->size) {
-		slave->mtd.size = master->size - slave->offset;
+	if (slave->offset + slave->mtd.size > parent->size) {
+		slave->mtd.size = parent->size - slave->offset;
 		printk(KERN_WARNING"mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#llx\n",
-			part->name, master->name, (unsigned long long)slave->mtd.size);
+			part->name, parent->name, (unsigned long long)slave->mtd.size);
 	}
-	if (master->numeraseregions > 1) {
+	if (parent->numeraseregions > 1) {
 		/* Deal with variable erase size stuff */
-		int i, max = master->numeraseregions;
+		int i, max = parent->numeraseregions;
 		u64 end = slave->offset + slave->mtd.size;
-		struct mtd_erase_region_info *regions = master->eraseregions;
+		struct mtd_erase_region_info *regions = parent->eraseregions;
 
 		/* Find the first erase regions which is part of this
 		 * partition. */
@@ -543,7 +549,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 		BUG_ON(slave->mtd.erasesize == 0);
 	} else {
 		/* Single erase size */
-		slave->mtd.erasesize = master->erasesize;
+		slave->mtd.erasesize = parent->erasesize;
 	}
 
 	tmp = slave->offset;
@@ -566,17 +572,17 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 	}
 
 	mtd_set_ooblayout(&slave->mtd, &part_ooblayout_ops);
-	slave->mtd.ecc_step_size = master->ecc_step_size;
-	slave->mtd.ecc_strength = master->ecc_strength;
-	slave->mtd.bitflip_threshold = master->bitflip_threshold;
+	slave->mtd.ecc_step_size = parent->ecc_step_size;
+	slave->mtd.ecc_strength = parent->ecc_strength;
+	slave->mtd.bitflip_threshold = parent->bitflip_threshold;
 
-	if (master->_block_isbad) {
+	if (parent->_block_isbad) {
 		uint64_t offs = 0;
 
 		while (offs < slave->mtd.size) {
-			if (mtd_block_isreserved(master, offs + slave->offset))
+			if (mtd_block_isreserved(parent, offs + slave->offset))
 				slave->mtd.ecc_stats.bbtblocks++;
-			else if (mtd_block_isbad(master, offs + slave->offset))
+			else if (mtd_block_isbad(parent, offs + slave->offset))
 				slave->mtd.ecc_stats.badblocks++;
 			offs += slave->mtd.erasesize;
 		}
@@ -610,7 +616,7 @@ static int mtd_add_partition_attrs(struct mtd_part *new)
 	return ret;
 }
 
-int mtd_add_partition(struct mtd_info *master, const char *name,
+int mtd_add_partition(struct mtd_info *parent, const char *name,
 		      long long offset, long long length)
 {
 	struct mtd_partition part;
@@ -623,7 +629,7 @@ int mtd_add_partition(struct mtd_info *master, const char *name,
 		return -EINVAL;
 
 	if (length == MTDPART_SIZ_FULL)
-		length = master->size - offset;
+		length = parent->size - offset;
 
 	if (length <= 0)
 		return -EINVAL;
@@ -633,7 +639,7 @@ int mtd_add_partition(struct mtd_info *master, const char *name,
 	part.size = length;
 	part.offset = offset;
 
-	new = allocate_partition(master, &part, -1, offset);
+	new = allocate_partition(parent, &part, -1, offset);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
@@ -683,7 +689,7 @@ int del_mtd_partitions(struct mtd_info *master)
 
 	mutex_lock(&mtd_partitions_mutex);
 	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
-		if (slave->master == master) {
+		if (slave->parent == master) {
 			ret = __mtd_del_partition(slave);
 			if (ret < 0)
 				err = ret;
@@ -700,7 +706,7 @@ int mtd_del_partition(struct mtd_info *master, int partno)
 
 	mutex_lock(&mtd_partitions_mutex);
 	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
-		if ((slave->master == master) &&
+		if ((slave->parent == master) &&
 		    (slave->mtd.index == partno)) {
 			ret = __mtd_del_partition(slave);
 			break;
@@ -933,6 +939,6 @@ uint64_t mtd_get_device_size(const struct mtd_info *mtd)
 	if (!mtd_is_partition(mtd))
 		return mtd->size;
 
-	return mtd_to_part(mtd)->master->size;
+	return mtd_to_part(mtd)->parent->size;
 }
 EXPORT_SYMBOL_GPL(mtd_get_device_size);
-- 
GitLab


From 97519dc52b44af054d7654776e78eaa211cf1842 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 21 Jun 2017 08:26:45 +0200
Subject: [PATCH 0352/1958] mtd: partitions: add support for subpartitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some flash device partitions can be containers with extra subpartitions
(volumes). All callbacks are already capable of this additional level of
indirection.

This patch makes sure we always display subpartitions using a tree
structure and takes care of deleting subpartitions when parent gets
removed.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 208822040735b..c0d464d192ee9 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -413,7 +413,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent,
 	 * parent conditional on that option. Note, this is a way to
 	 * distinguish between the master and the partition in sysfs.
 	 */
-	slave->mtd.dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ?
+	slave->mtd.dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd_is_partition(parent) ?
 				&parent->dev :
 				parent->dev.parent;
 	slave->mtd.dev.of_node = part->of_node;
@@ -664,8 +664,17 @@ EXPORT_SYMBOL_GPL(mtd_add_partition);
  */
 static int __mtd_del_partition(struct mtd_part *priv)
 {
+	struct mtd_part *child, *next;
 	int err;
 
+	list_for_each_entry_safe(child, next, &mtd_partitions, list) {
+		if (child->parent == &priv->mtd) {
+			err = __mtd_del_partition(child);
+			if (err)
+				return err;
+		}
+	}
+
 	sysfs_remove_files(&priv->mtd.dev.kobj, mtd_partition_attrs);
 
 	err = del_mtd_device(&priv->mtd);
@@ -680,16 +689,16 @@ static int __mtd_del_partition(struct mtd_part *priv)
 
 /*
  * This function unregisters and destroy all slave MTD objects which are
- * attached to the given master MTD object.
+ * attached to the given MTD object.
  */
-int del_mtd_partitions(struct mtd_info *master)
+int del_mtd_partitions(struct mtd_info *mtd)
 {
 	struct mtd_part *slave, *next;
 	int ret, err = 0;
 
 	mutex_lock(&mtd_partitions_mutex);
 	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
-		if (slave->parent == master) {
+		if (slave->parent == mtd) {
 			ret = __mtd_del_partition(slave);
 			if (ret < 0)
 				err = ret;
@@ -699,14 +708,14 @@ int del_mtd_partitions(struct mtd_info *master)
 	return err;
 }
 
-int mtd_del_partition(struct mtd_info *master, int partno)
+int mtd_del_partition(struct mtd_info *mtd, int partno)
 {
 	struct mtd_part *slave, *next;
 	int ret = -EINVAL;
 
 	mutex_lock(&mtd_partitions_mutex);
 	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
-		if ((slave->parent == master) &&
+		if ((slave->parent == mtd) &&
 		    (slave->mtd.index == partno)) {
 			ret = __mtd_del_partition(slave);
 			break;
@@ -939,6 +948,6 @@ uint64_t mtd_get_device_size(const struct mtd_info *mtd)
 	if (!mtd_is_partition(mtd))
 		return mtd->size;
 
-	return mtd_to_part(mtd)->parent->size;
+	return mtd_get_device_size(mtd_to_part(mtd)->parent);
 }
 EXPORT_SYMBOL_GPL(mtd_get_device_size);
-- 
GitLab


From 1a0915be192606fee64830b9c5d70b7ed59426b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 21 Jun 2017 08:26:46 +0200
Subject: [PATCH 0353/1958] mtd: partitions: add support for partition parsers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some devices have partitions that are kind of containers with extra
subpartitions / volumes instead of e.g. a simple filesystem data. To
support such cases we need to first create normal flash device
partitions and then take care of these special ones.

It's very common case for home routers. Depending on the vendor there
are formats like TRX, Seama, TP-Link, WRGG & more. All of them are used
to embed few partitions into a single one / single firmware file.

Ideally all vendors would use some well documented / standardized format
like UBI (and some probably start doing so), but there are still
countless devices on the market using these poor vendor specific
formats.

This patch extends MTD subsystem by allowing to specify list of parsers
that should be tried for a given partition. Supporting such poor formats
is highly unlikely to be the top priority so these changes try to
minimize maintenance cost to the minimum. It reuses existing code for
these new parsers and just adds a one property and one new function.

This implementation requires setting partition parsers in a flash
parser. A proper change of bcm47xxpart will follow and in the future we
will hopefully also find a solution for doing it with ofpart
("fixed-partitions").

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c          | 31 +++++++++++++++++++++++++++++++
 include/linux/mtd/partitions.h |  7 +++++++
 2 files changed, 38 insertions(+)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index c0d464d192ee9..2ad9493703f98 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -369,6 +369,35 @@ static inline void free_partition(struct mtd_part *p)
 	kfree(p);
 }
 
+/**
+ * mtd_parse_part - parse MTD partition looking for subpartitions
+ *
+ * @slave: part that is supposed to be a container and should be parsed
+ * @types: NULL-terminated array with names of partition parsers to try
+ *
+ * Some partitions are kind of containers with extra subpartitions (volumes).
+ * There can be various formats of such containers. This function tries to use
+ * specified parsers to analyze given partition and registers found
+ * subpartitions on success.
+ */
+static int mtd_parse_part(struct mtd_part *slave, const char *const *types)
+{
+	struct mtd_partitions parsed;
+	int err;
+
+	err = parse_mtd_partitions(&slave->mtd, types, &parsed, NULL);
+	if (err)
+		return err;
+	else if (!parsed.nr_parts)
+		return -ENOENT;
+
+	err = add_mtd_partitions(&slave->mtd, parsed.parts, parsed.nr_parts);
+
+	mtd_part_parser_cleanup(&parsed);
+
+	return err;
+}
+
 static struct mtd_part *allocate_partition(struct mtd_info *parent,
 			const struct mtd_partition *part, int partno,
 			uint64_t cur_offset)
@@ -758,6 +787,8 @@ int add_mtd_partitions(struct mtd_info *master,
 
 		add_mtd_device(&slave->mtd);
 		mtd_add_partition_attrs(slave);
+		if (parts[i].types)
+			mtd_parse_part(slave, parts[i].types);
 
 		cur_offset = slave->offset + slave->mtd.size;
 	}
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 06df1e06b6e03..c4beb70dacbd6 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -20,6 +20,12 @@
  *
  * For each partition, these fields are available:
  * name: string that will be used to label the partition's MTD device.
+ * types: some partitions can be containers using specific format to describe
+ *	embedded subpartitions / volumes. E.g. many home routers use "firmware"
+ *	partition that contains at least kernel and rootfs. In such case an
+ *	extra parser is needed that will detect these dynamic partitions and
+ *	report them to the MTD subsystem. If set this property stores an array
+ *	of parser names to use when looking for subpartitions.
  * size: the partition size; if defined as MTDPART_SIZ_FULL, the partition
  * 	will extend to the end of the master MTD device.
  * offset: absolute starting position within the master MTD device; if
@@ -38,6 +44,7 @@
 
 struct mtd_partition {
 	const char *name;		/* identifier string */
+	const char *const *types;	/* names of parsers to use if any */
 	uint64_t size;			/* partition size */
 	uint64_t offset;		/* offset within the master MTD space */
 	uint32_t mask_flags;		/* master MTD flags to mask out for this partition */
-- 
GitLab


From 99352afe8f169c95b294b6b9a8d0e18cd9e3c2a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 21 Jun 2017 08:26:47 +0200
Subject: [PATCH 0354/1958] mtd: extract TRX parser out of bcm47xxpart into a
 separated module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes TRX parsing code reusable with other platforms and parsers.

Please note this patch doesn't really change anything in the existing
code, just moves it. There is still some place for improvement (e.g.
working on non-hacky method of checking rootfs format) but it's not
really a subject of this change.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/Kconfig              |   4 +
 drivers/mtd/Makefile             |   1 +
 drivers/mtd/bcm47xxpart.c        |  99 ++----------------------
 drivers/mtd/parsers/Kconfig      |   8 ++
 drivers/mtd/parsers/Makefile     |   1 +
 drivers/mtd/parsers/parser_trx.c | 126 +++++++++++++++++++++++++++++++
 6 files changed, 145 insertions(+), 94 deletions(-)
 create mode 100644 drivers/mtd/parsers/Kconfig
 create mode 100644 drivers/mtd/parsers/Makefile
 create mode 100644 drivers/mtd/parsers/parser_trx.c

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index e83a279f1217e..5a2d71729b9ac 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -155,6 +155,10 @@ config MTD_BCM47XX_PARTS
 	  This provides partitions parser for devices based on BCM47xx
 	  boards.
 
+menu "Partition parsers"
+source "drivers/mtd/parsers/Kconfig"
+endmenu
+
 comment "User Modules And Translation Layers"
 
 #
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 99bb9a1f6e16f..151d60df303ac 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_MTD_AFS_PARTS)	+= afs.o
 obj-$(CONFIG_MTD_AR7_PARTS)	+= ar7part.o
 obj-$(CONFIG_MTD_BCM63XX_PARTS)	+= bcm63xxpart.o
 obj-$(CONFIG_MTD_BCM47XX_PARTS)	+= bcm47xxpart.o
+obj-y				+= parsers/
 
 # 'Users' - code which presents functionality to userspace.
 obj-$(CONFIG_MTD_BLKDEVS)	+= mtd_blkdevs.o
diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index d10fa6c8f0746..fe2581d9d882f 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c
@@ -43,7 +43,8 @@
 #define ML_MAGIC2			0x26594131
 #define TRX_MAGIC			0x30524448
 #define SHSQ_MAGIC			0x71736873	/* shsq (weird ZTE H218N endianness) */
-#define UBI_EC_MAGIC			0x23494255	/* UBI# */
+
+static const char * const trx_types[] = { "trx", NULL };
 
 struct trx_header {
 	uint32_t magic;
@@ -62,89 +63,6 @@ static void bcm47xxpart_add_part(struct mtd_partition *part, const char *name,
 	part->mask_flags = mask_flags;
 }
 
-static const char *bcm47xxpart_trx_data_part_name(struct mtd_info *master,
-						  size_t offset)
-{
-	uint32_t buf;
-	size_t bytes_read;
-	int err;
-
-	err  = mtd_read(master, offset, sizeof(buf), &bytes_read,
-			(uint8_t *)&buf);
-	if (err && !mtd_is_bitflip(err)) {
-		pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
-			offset, err);
-		goto out_default;
-	}
-
-	if (buf == UBI_EC_MAGIC)
-		return "ubi";
-
-out_default:
-	return "rootfs";
-}
-
-static int bcm47xxpart_parse_trx(struct mtd_info *master,
-				 struct mtd_partition *trx,
-				 struct mtd_partition *parts,
-				 size_t parts_len)
-{
-	struct trx_header header;
-	size_t bytes_read;
-	int curr_part = 0;
-	int i, err;
-
-	if (parts_len < 3) {
-		pr_warn("No enough space to add TRX partitions!\n");
-		return -ENOMEM;
-	}
-
-	err = mtd_read(master, trx->offset, sizeof(header), &bytes_read,
-		       (uint8_t *)&header);
-	if (err && !mtd_is_bitflip(err)) {
-		pr_err("mtd_read error while reading TRX header: %d\n", err);
-		return err;
-	}
-
-	i = 0;
-
-	/* We have LZMA loader if offset[2] points to sth */
-	if (header.offset[2]) {
-		bcm47xxpart_add_part(&parts[curr_part++], "loader",
-				     trx->offset + header.offset[i], 0);
-		i++;
-	}
-
-	if (header.offset[i]) {
-		bcm47xxpart_add_part(&parts[curr_part++], "linux",
-				     trx->offset + header.offset[i], 0);
-		i++;
-	}
-
-	if (header.offset[i]) {
-		size_t offset = trx->offset + header.offset[i];
-		const char *name = bcm47xxpart_trx_data_part_name(master,
-								  offset);
-
-		bcm47xxpart_add_part(&parts[curr_part++], name, offset, 0);
-		i++;
-	}
-
-	/*
-	 * Assume that every partition ends at the beginning of the one it is
-	 * followed by.
-	 */
-	for (i = 0; i < curr_part; i++) {
-		u64 next_part_offset = (i < curr_part - 1) ?
-					parts[i + 1].offset :
-					trx->offset + trx->size;
-
-		parts[i].size = next_part_offset - parts[i].offset;
-	}
-
-	return curr_part;
-}
-
 /**
  * bcm47xxpart_bootpartition - gets index of TRX partition used by bootloader
  *
@@ -362,17 +280,10 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 	for (i = 0; i < trx_num; i++) {
 		struct mtd_partition *trx = &parts[trx_parts[i]];
 
-		if (i == bcm47xxpart_bootpartition()) {
-			int num_parts;
-
-			num_parts = bcm47xxpart_parse_trx(master, trx,
-							  parts + curr_part,
-							  BCM47XXPART_MAX_PARTS - curr_part);
-			if (num_parts > 0)
-				curr_part += num_parts;
-		} else {
+		if (i == bcm47xxpart_bootpartition())
+			trx->types = trx_types;
+		else
 			trx->name = "failsafe";
-		}
 	}
 
 	*pparts = parts;
diff --git a/drivers/mtd/parsers/Kconfig b/drivers/mtd/parsers/Kconfig
new file mode 100644
index 0000000000000..d206b3c533bcb
--- /dev/null
+++ b/drivers/mtd/parsers/Kconfig
@@ -0,0 +1,8 @@
+config MTD_PARSER_TRX
+	tristate "Parser for TRX format partitions"
+	depends on MTD && (BCM47XX || ARCH_BCM_5301X || COMPILE_TEST)
+	help
+	  TRX is a firmware format used by Broadcom on their devices. It
+	  may contain up to 3/4 partitions (depending on the version).
+	  This driver will parse TRX header and report at least two partitions:
+	  kernel and rootfs.
diff --git a/drivers/mtd/parsers/Makefile b/drivers/mtd/parsers/Makefile
new file mode 100644
index 0000000000000..4d9024e0be3bf
--- /dev/null
+++ b/drivers/mtd/parsers/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MTD_PARSER_TRX)		+= parser_trx.o
diff --git a/drivers/mtd/parsers/parser_trx.c b/drivers/mtd/parsers/parser_trx.c
new file mode 100644
index 0000000000000..e805108afd317
--- /dev/null
+++ b/drivers/mtd/parsers/parser_trx.c
@@ -0,0 +1,126 @@
+/*
+ * Parser for TRX format partitions
+ *
+ * Copyright (C) 2012 - 2017 Rafał Miłecki <rafal@milecki.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+
+#define TRX_PARSER_MAX_PARTS		4
+
+/* Magics */
+#define TRX_MAGIC			0x30524448
+#define UBI_EC_MAGIC			0x23494255	/* UBI# */
+
+struct trx_header {
+	uint32_t magic;
+	uint32_t length;
+	uint32_t crc32;
+	uint16_t flags;
+	uint16_t version;
+	uint32_t offset[3];
+} __packed;
+
+static const char *parser_trx_data_part_name(struct mtd_info *master,
+					     size_t offset)
+{
+	uint32_t buf;
+	size_t bytes_read;
+	int err;
+
+	err  = mtd_read(master, offset, sizeof(buf), &bytes_read,
+			(uint8_t *)&buf);
+	if (err && !mtd_is_bitflip(err)) {
+		pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+			offset, err);
+		goto out_default;
+	}
+
+	if (buf == UBI_EC_MAGIC)
+		return "ubi";
+
+out_default:
+	return "rootfs";
+}
+
+static int parser_trx_parse(struct mtd_info *mtd,
+			    const struct mtd_partition **pparts,
+			    struct mtd_part_parser_data *data)
+{
+	struct mtd_partition *parts;
+	struct mtd_partition *part;
+	struct trx_header trx;
+	size_t bytes_read;
+	uint8_t curr_part = 0, i = 0;
+	int err;
+
+	parts = kzalloc(sizeof(struct mtd_partition) * TRX_PARSER_MAX_PARTS,
+			GFP_KERNEL);
+	if (!parts)
+		return -ENOMEM;
+
+	err = mtd_read(mtd, 0, sizeof(trx), &bytes_read, (uint8_t *)&trx);
+	if (err) {
+		pr_err("MTD reading error: %d\n", err);
+		kfree(parts);
+		return err;
+	}
+
+	if (trx.magic != TRX_MAGIC) {
+		kfree(parts);
+		return -ENOENT;
+	}
+
+	/* We have LZMA loader if there is address in offset[2] */
+	if (trx.offset[2]) {
+		part = &parts[curr_part++];
+		part->name = "loader";
+		part->offset = trx.offset[i];
+		i++;
+	}
+
+	if (trx.offset[i]) {
+		part = &parts[curr_part++];
+		part->name = "linux";
+		part->offset = trx.offset[i];
+		i++;
+	}
+
+	if (trx.offset[i]) {
+		part = &parts[curr_part++];
+		part->name = parser_trx_data_part_name(mtd, trx.offset[i]);
+		part->offset = trx.offset[i];
+		i++;
+	}
+
+	/*
+	 * Assume that every partition ends at the beginning of the one it is
+	 * followed by.
+	 */
+	for (i = 0; i < curr_part; i++) {
+		u64 next_part_offset = (i < curr_part - 1) ?
+				       parts[i + 1].offset : mtd->size;
+
+		parts[i].size = next_part_offset - parts[i].offset;
+	}
+
+	*pparts = parts;
+	return i;
+};
+
+static struct mtd_part_parser mtd_parser_trx = {
+	.parse_fn = parser_trx_parse,
+	.name = "trx",
+};
+module_mtd_part_parser(mtd_parser_trx);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Parser for TRX format partitions");
-- 
GitLab


From 6eab81e6358730fd566e2c069d7be2309713ac3a Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Date: Tue, 13 Jun 2017 21:46:24 +0200
Subject: [PATCH 0355/1958] MAINTAINERS: Update SPI NOR subsystem git
 repositories

SPI NOR branches are now hosted on MTD repos, spi-nor/next is on l2-mtd
and spi-nor/fixes is on linux-mtd.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9e984645c4b08..16d536082a59a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12122,7 +12122,8 @@ M:	Marek Vasut <marek.vasut@gmail.com>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
-T:	git git://github.com/spi-nor/linux.git
+T:	git git://git.infradead.org/linux-mtd.git spi-nor/fixes
+T:	git git://git.infradead.org/l2-mtd.git spi-nor/next
 S:	Maintained
 F:	drivers/mtd/spi-nor/
 F:	include/linux/mtd/spi-nor.h
-- 
GitLab


From 54930dfeb46e978b447af0fb8ab4e181c1bf9d7a Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 22 Jun 2017 16:35:04 -0400
Subject: [PATCH 0356/1958] reiserfs: don't preallocate blocks for extended
 attributes

Most extended attributes will fit in a single block.  More importantly,
we drop the reference to the inode while holding the transaction open
so the preallocated blocks aren't released.  As a result, the inode
may be evicted before it's removed from the transaction's prealloc list
which can cause memory corruption.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/reiserfs/bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index dc198bc64c61d..a6f39fe2b6643 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1128,7 +1128,7 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
 	hint->prealloc_size = 0;
 
 	if (!hint->formatted_node && hint->preallocate) {
-		if (S_ISREG(hint->inode->i_mode)
+		if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
 		    && hint->inode->i_size >=
 		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
 		    preallocmin * hint->inode->i_sb->s_blocksize)
-- 
GitLab


From 08db141b5313ac2f64b844fb5725b8d81744b417 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 22 Jun 2017 16:47:34 -0400
Subject: [PATCH 0357/1958] reiserfs: fix race in prealloc discard

The main loop in __discard_prealloc is protected by the reiserfs write lock
which is dropped across schedules like the BKL it replaced.  The problem is
that it checks the value, calls a routine that schedules, and then adjusts
the state.  As a result, two threads that are calling
reiserfs_prealloc_discard at the same time can race when one calls
reiserfs_free_prealloc_block, the lock is dropped, and the other calls
reiserfs_free_prealloc_block with the same block number.  In the right
circumstances, it can cause the prealloc count to go negative.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/reiserfs/bitmap.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index a6f39fe2b6643..edc8ef78b63fc 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -513,9 +513,17 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
 			       "inode has negative prealloc blocks count.");
 #endif
 	while (ei->i_prealloc_count > 0) {
-		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
-		ei->i_prealloc_block++;
+		b_blocknr_t block_to_free;
+
+		/*
+		 * reiserfs_free_prealloc_block can drop the write lock,
+		 * which could allow another caller to free the same block.
+		 * We can protect against it by modifying the prealloc
+		 * state before calling it.
+		 */
+		block_to_free = ei->i_prealloc_block++;
 		ei->i_prealloc_count--;
+		reiserfs_free_prealloc_block(th, inode, block_to_free);
 		dirty = 1;
 	}
 	if (dirty)
-- 
GitLab


From b847de4e5087fc8577c38a697d14fd2a5ce93352 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Fri, 5 May 2017 16:47:46 +0530
Subject: [PATCH 0358/1958] iommu/arm-smmu-v3: Increase CMDQ drain timeout
 value

Waiting for a CMD_SYNC to be processed involves waiting for the command
queue to drain, which can take an awful lot longer than waiting for a
single entry to become available. Consequently, the common timeout value
of 100us has been observed to be too short on some platforms when a
CMD_SYNC is issued into a queued full of TLBI commands.

This patch resolves the issue by using a different (1s) timeout when
waiting for the CMDQ to drain and using a simple back-off mechanism
when polling the cons pointer in the absence of WFE support.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
[will: rewrote commit message and cosmetic changes]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 380969aa60d5a..6a06be7626dbe 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -408,6 +408,7 @@
 
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US	100
+#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US	1000000 /* 1s! */
 
 #define MSI_IOVA_BASE			0x8000000
 #define MSI_IOVA_LENGTH			0x100000
@@ -737,7 +738,13 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
  */
 static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 {
-	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
+	ktime_t timeout;
+	unsigned int delay = 1;
+
+	/* Wait longer if it's queue drain */
+	timeout = ktime_add_us(ktime_get(), drain ?
+					    ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US :
+					    ARM_SMMU_POLL_TIMEOUT_US);
 
 	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
 		if (ktime_compare(ktime_get(), timeout) > 0)
@@ -747,7 +754,8 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 			wfe();
 		} else {
 			cpu_relax();
-			udelay(1);
+			udelay(delay);
+			delay *= 2;
 		}
 	}
 
-- 
GitLab


From 60ab7a75c8d83049b0e6189b4128247513880b19 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 13 Jun 2017 15:58:30 +0530
Subject: [PATCH 0359/1958] iommu/io-pgtable-arm-v7s: constify dummy_tlb_ops.

File size before:
   text	   data	    bss	    dec	    hex	filename
   6146	     56	      9	   6211	   1843	drivers/iommu/io-pgtable-arm-v7s.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   6170	     24	      9	   6203	   183b	drivers/iommu/io-pgtable-arm-v7s.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 8d6ca28c3e1f1..f8869951610c0 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -749,7 +749,7 @@ static void dummy_tlb_sync(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static struct iommu_gather_ops dummy_tlb_ops = {
+static const struct iommu_gather_ops dummy_tlb_ops = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
-- 
GitLab


From 84c24379a783c514e5ff7c8fc8a21cf8d64fd05f Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 19 Jun 2017 16:41:56 +0100
Subject: [PATCH 0360/1958] iommu/arm-smmu: Plumb in new ACPI identifiers

Revision C of IORT now allows us to identify ARM MMU-401 and the Cavium
ThunderX implementation. Wire them up so that we can probe these models
once firmware starts using the new codes in place of generic ones, and
so that the appropriate features and quirks get enabled when we do.

For the sake of backports and mitigating sychronisation problems with
the ACPICA headers, we'll carry a backup copy of the new definitions
locally for the short term to make life simpler.

CC: stable@vger.kernel.org # 4.10
Acked-by: Robert Richter <rrichter@cavium.com>
Tested-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7ec30b08b3bdc..7ecd1a0b84192 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -312,6 +312,14 @@ enum arm_smmu_implementation {
 	CAVIUM_SMMUV2,
 };
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
+#define ACPI_IORT_SMMU_CORELINK_MMU401	0x4
+#endif
+#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
+#define ACPI_IORT_SMMU_CAVIUM_THUNDERX	0x5
+#endif
+
 struct arm_smmu_s2cr {
 	struct iommu_group		*group;
 	int				count;
@@ -2073,6 +2081,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
 		smmu->version = ARM_SMMU_V1;
 		smmu->model = GENERIC_SMMU;
 		break;
+	case ACPI_IORT_SMMU_CORELINK_MMU401:
+		smmu->version = ARM_SMMU_V1_64K;
+		smmu->model = GENERIC_SMMU;
+		break;
 	case ACPI_IORT_SMMU_V2:
 		smmu->version = ARM_SMMU_V2;
 		smmu->model = GENERIC_SMMU;
@@ -2081,6 +2093,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
 		smmu->version = ARM_SMMU_V2;
 		smmu->model = ARM_MMU500;
 		break;
+	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
+		smmu->version = ARM_SMMU_V2;
+		smmu->model = CAVIUM_SMMUV2;
+		break;
 	default:
 		ret = -ENODEV;
 	}
-- 
GitLab


From ebdd13c93f8e878afcdba642f48cd1bd85619e2a Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 22 Jun 2017 12:51:00 +0530
Subject: [PATCH 0361/1958] iommu: arm-smmu-v3: make of_device_ids const

of_device_ids are not supposed to change at runtime. All functions
working with of_device_ids provided by <linux/of.h> work with const
of_device_ids. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 6a06be7626dbe..0fd09745822f5 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2776,7 +2776,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct of_device_id arm_smmu_of_match[] = {
+static const struct of_device_id arm_smmu_of_match[] = {
 	{ .compatible = "arm,smmu-v3", },
 	{ },
 };
-- 
GitLab


From 5c2d0218290afa3c335f38583bf4f8e8adad4c76 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 22 Jun 2017 12:57:42 +0530
Subject: [PATCH 0362/1958] iommu: arm-smmu: Handle return of
 iommu_device_register.

iommu_device_register returns an error code and, although it currently
never fails, we should check its return value anyway.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
[will: adjusted to follow arm-smmu.c]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 0fd09745822f5..029fe0cffee76 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2744,6 +2744,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
 
 	ret = iommu_device_register(&smmu->iommu);
+	if (ret) {
+		dev_err(dev, "Failed to register iommu\n");
+		return ret;
+	}
 
 #ifdef CONFIG_PCI
 	if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
-- 
GitLab


From 9db829d2818501f07583542c05d01513b9161e14 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Jun 2017 16:53:50 +0100
Subject: [PATCH 0363/1958] iommu/io-pgtable-arm-v7s: Check table PTEs more
 precisely

Whilst we don't support the PXN bit at all, so should never encounter a
level 1 section or supersection PTE with it set, it would still be wise
to check both table type bits to resolve any theoretical ambiguity.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index f8869951610c0..46da7aa7c7d0f 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -92,7 +92,8 @@
 #define ARM_V7S_PTE_TYPE_CONT_PAGE	0x1
 
 #define ARM_V7S_PTE_IS_VALID(pte)	(((pte) & 0x3) != 0)
-#define ARM_V7S_PTE_IS_TABLE(pte, lvl)	(lvl == 1 && ((pte) & ARM_V7S_PTE_TYPE_TABLE))
+#define ARM_V7S_PTE_IS_TABLE(pte, lvl) \
+	((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE))
 
 /* Page table bits */
 #define ARM_V7S_ATTR_XN(lvl)		BIT(4 * (2 - (lvl)))
-- 
GitLab


From fb3a95795da53d05a4fc5fcdc0d3ec69e7163355 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Jun 2017 16:53:51 +0100
Subject: [PATCH 0364/1958] iommu/io-pgtable-arm: Improve split_blk_unmap

The current split_blk_unmap implementation suffers from some inscrutable
pointer trickery for creating the tables to replace the block entry, but
more than that it also suffers from hideous inefficiency. For example,
the most pathological case of unmapping a level 3 page from a level 1
block will allocate 513 lower-level tables to remap the entire block at
page granularity, when only 2 are actually needed (the rest can be
covered by level 2 block entries).

Also, we would like to be able to relax the spinlock requirement in
future, for which the roll-back-and-try-again logic for race resolution
would be pretty hideous under the current paradigm.

Both issues can be resolved most neatly by turning things sideways:
instead of repeatedly recursing into __arm_lpae_map() map to build up an
entire new sub-table depth-first, we can directly replace the block
entry with a next-level table of block/page entries, then repeat by
unmapping at the next level if necessary. With a little refactoring of
some helper functions, the code ends up not much bigger than before, but
considerably easier to follow and to adapt in future.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 114 +++++++++++++++++++--------------
 1 file changed, 67 insertions(+), 47 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 6e5df5e0a3bdc..dd7477010291b 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -264,19 +264,38 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 			    unsigned long iova, size_t size, int lvl,
 			    arm_lpae_iopte *ptep);
 
+static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
+				phys_addr_t paddr, arm_lpae_iopte prot,
+				int lvl, arm_lpae_iopte *ptep)
+{
+	arm_lpae_iopte pte = prot;
+
+	if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		pte |= ARM_LPAE_PTE_NS;
+
+	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
+		pte |= ARM_LPAE_PTE_TYPE_PAGE;
+	else
+		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
+
+	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
+	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+
+	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
+}
+
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			     unsigned long iova, phys_addr_t paddr,
 			     arm_lpae_iopte prot, int lvl,
 			     arm_lpae_iopte *ptep)
 {
-	arm_lpae_iopte pte = prot;
-	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	arm_lpae_iopte pte = *ptep;
 
-	if (iopte_leaf(*ptep, lvl)) {
+	if (iopte_leaf(pte, lvl)) {
 		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
-	} else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) {
+	} else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) {
 		/*
 		 * We need to unmap and free the old table before
 		 * overwriting it with a block entry.
@@ -289,19 +308,22 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			return -EINVAL;
 	}
 
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-		pte |= ARM_LPAE_PTE_NS;
+	__arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+	return 0;
+}
 
-	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
-		pte |= ARM_LPAE_PTE_TYPE_PAGE;
-	else
-		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
+static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
+					     arm_lpae_iopte *ptep,
+					     struct io_pgtable_cfg *cfg)
+{
+	arm_lpae_iopte new;
 
-	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
-	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+	new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		new |= ARM_LPAE_PTE_NSTABLE;
 
-	__arm_lpae_set_pte(ptep, pte, cfg);
-	return 0;
+	__arm_lpae_set_pte(ptep, new, cfg);
+	return new;
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
@@ -331,10 +353,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		if (!cptep)
 			return -ENOMEM;
 
-		pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
-		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-			pte |= ARM_LPAE_PTE_NSTABLE;
-		__arm_lpae_set_pte(ptep, pte, cfg);
+		arm_lpae_install_table(cptep, ptep, cfg);
 	} else if (!iopte_leaf(pte, lvl)) {
 		cptep = iopte_deref(pte, data);
 	} else {
@@ -452,40 +471,43 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 
 static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 				    unsigned long iova, size_t size,
-				    arm_lpae_iopte prot, int lvl,
-				    arm_lpae_iopte *ptep, size_t blk_size)
+				    arm_lpae_iopte blk_pte, int lvl,
+				    arm_lpae_iopte *ptep)
 {
-	unsigned long blk_start, blk_end;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	arm_lpae_iopte pte, *tablep;
 	phys_addr_t blk_paddr;
-	arm_lpae_iopte table = 0;
+	size_t tablesz = ARM_LPAE_GRANULE(data);
+	size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+	int i, unmap_idx = -1;
+
+	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+		return 0;
 
-	blk_start = iova & ~(blk_size - 1);
-	blk_end = blk_start + blk_size;
-	blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift;
+	tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg);
+	if (!tablep)
+		return 0; /* Bytes unmapped */
 
-	for (; blk_start < blk_end; blk_start += size, blk_paddr += size) {
-		arm_lpae_iopte *tablep;
+	if (size == split_sz)
+		unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
+	blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
+	pte = iopte_prot(blk_pte);
+
+	for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
 		/* Unmap! */
-		if (blk_start == iova)
+		if (i == unmap_idx)
 			continue;
 
-		/* __arm_lpae_map expects a pointer to the start of the table */
-		tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data);
-		if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl,
-				   tablep) < 0) {
-			if (table) {
-				/* Free the table we allocated */
-				tablep = iopte_deref(table, data);
-				__arm_lpae_free_pgtable(data, lvl + 1, tablep);
-			}
-			return 0; /* Bytes unmapped */
-		}
+		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
 	}
 
-	__arm_lpae_set_pte(ptep, table, &data->iop.cfg);
-	iova &= ~(blk_size - 1);
-	io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
+	arm_lpae_install_table(tablep, ptep, cfg);
+
+	if (unmap_idx < 0)
+		return __arm_lpae_unmap(data, iova, size, lvl, tablep);
+
+	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
 	return size;
 }
 
@@ -495,7 +517,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 {
 	arm_lpae_iopte pte;
 	struct io_pgtable *iop = &data->iop;
-	size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
 	/* Something went horribly wrong and we ran out of page table */
 	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
@@ -507,7 +528,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 		return 0;
 
 	/* If the size matches this level, we're in the right place */
-	if (size == blk_size) {
+	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
 		__arm_lpae_set_pte(ptep, 0, &iop->cfg);
 
 		if (!iopte_leaf(pte, lvl)) {
@@ -527,9 +548,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
 		 */
-		return arm_lpae_split_blk_unmap(data, iova, size,
-						iopte_prot(pte), lvl, ptep,
-						blk_size);
+		return arm_lpae_split_blk_unmap(data, iova, size, pte,
+						lvl + 1, ptep);
 	}
 
 	/* Keep on walkin' */
-- 
GitLab


From b9f1ef30ac2e9942b8628d551f4a21e8cec1415c Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Jun 2017 16:53:52 +0100
Subject: [PATCH 0365/1958] iommu/io-pgtable-arm-v7s: Refactor split_blk_unmap

Whilst the short-descriptor format's split_blk_unmap implementation has
no need to be recursive, it followed the pattern of the LPAE version
anyway for the sake of consistency. With the latter now reworked for
both efficiency and future scalability improvements, tweak the former
similarly, not least to make it less obtuse.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 85 ++++++++++++++++--------------
 1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 46da7aa7c7d0f..dc74631322e4e 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -281,6 +281,13 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 	else if (prot & IOMMU_CACHE)
 		pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
 
+	pte |= ARM_V7S_PTE_TYPE_PAGE;
+	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
+		pte |= ARM_V7S_ATTR_NS_SECTION;
+
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
+		pte |= ARM_V7S_ATTR_MTK_4GB;
+
 	return pte;
 }
 
@@ -353,7 +360,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 			    int lvl, int num_entries, arm_v7s_iopte *ptep)
 {
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
-	arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
+	arm_v7s_iopte pte;
 	int i;
 
 	for (i = 0; i < num_entries; i++)
@@ -375,13 +382,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 			return -EEXIST;
 		}
 
-	pte |= ARM_V7S_PTE_TYPE_PAGE;
-	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
-		pte |= ARM_V7S_ATTR_NS_SECTION;
-
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
-		pte |= ARM_V7S_ATTR_MTK_4GB;
-
+	pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
 	if (num_entries > 1)
 		pte = arm_v7s_pte_to_cont(pte, lvl);
 
@@ -391,6 +392,20 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 	return 0;
 }
 
+static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
+					   arm_v7s_iopte *ptep,
+					   struct io_pgtable_cfg *cfg)
+{
+	arm_v7s_iopte new;
+
+	new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		new |= ARM_V7S_ATTR_NS_TABLE;
+
+	__arm_v7s_set_pte(ptep, new, 1, cfg);
+	return new;
+}
+
 static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 			 phys_addr_t paddr, size_t size, int prot,
 			 int lvl, arm_v7s_iopte *ptep)
@@ -418,11 +433,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 		if (!cptep)
 			return -ENOMEM;
 
-		pte = virt_to_phys(cptep) | ARM_V7S_PTE_TYPE_TABLE;
-		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-			pte |= ARM_V7S_ATTR_NS_TABLE;
-
-		__arm_v7s_set_pte(ptep, pte, 1, cfg);
+		arm_v7s_install_table(cptep, ptep, cfg);
 	} else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
 		cptep = iopte_deref(pte, lvl);
 	} else {
@@ -503,41 +514,35 @@ static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
 
 static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 				   unsigned long iova, size_t size,
-				   arm_v7s_iopte *ptep)
+				   arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep)
 {
-	unsigned long blk_start, blk_end, blk_size;
-	phys_addr_t blk_paddr;
-	arm_v7s_iopte table = 0;
-	int prot = arm_v7s_pte_to_prot(*ptep, 1);
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	arm_v7s_iopte pte, *tablep;
+	int i, unmap_idx, num_entries, num_ptes;
+
+	tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
+	if (!tablep)
+		return 0; /* Bytes unmapped */
 
-	blk_size = ARM_V7S_BLOCK_SIZE(1);
-	blk_start = iova & ARM_V7S_LVL_MASK(1);
-	blk_end = blk_start + ARM_V7S_BLOCK_SIZE(1);
-	blk_paddr = *ptep & ARM_V7S_LVL_MASK(1);
+	num_ptes = ARM_V7S_PTES_PER_LVL(2);
+	num_entries = size >> ARM_V7S_LVL_SHIFT(2);
+	unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
 
-	for (; blk_start < blk_end; blk_start += size, blk_paddr += size) {
-		arm_v7s_iopte *tablep;
+	pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
+	if (num_entries > 1)
+		pte = arm_v7s_pte_to_cont(pte, 2);
 
+	for (i = 0; i < num_ptes; i += num_entries, pte += size) {
 		/* Unmap! */
-		if (blk_start == iova)
+		if (i == unmap_idx)
 			continue;
 
-		/* __arm_v7s_map expects a pointer to the start of the table */
-		tablep = &table - ARM_V7S_LVL_IDX(blk_start, 1);
-		if (__arm_v7s_map(data, blk_start, blk_paddr, size, prot, 1,
-				  tablep) < 0) {
-			if (table) {
-				/* Free the table we allocated */
-				tablep = iopte_deref(table, 1);
-				__arm_v7s_free_table(tablep, 2, data);
-			}
-			return 0; /* Bytes unmapped */
-		}
+		__arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
 	}
 
-	__arm_v7s_set_pte(ptep, table, 1, &data->iop.cfg);
-	iova &= ~(blk_size - 1);
-	io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
+	arm_v7s_install_table(tablep, ptep, cfg);
+
+	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
 	return size;
 }
 
@@ -594,7 +599,7 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
 		 */
-		return arm_v7s_split_blk_unmap(data, iova, size, ptep);
+		return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
 	}
 
 	/* Keep on walkin' */
-- 
GitLab


From 81b3c25218447c65f93adf08b099a322b6803536 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Jun 2017 16:53:53 +0100
Subject: [PATCH 0366/1958] iommu/io-pgtable: Introduce explicit coherency

Once we remove the serialising spinlock, a potential race opens up for
non-coherent IOMMUs whereby a caller of .map() can be sure that cache
maintenance has been performed on their new PTE, but will have no
guarantee that such maintenance for table entries above it has actually
completed (e.g. if another CPU took an interrupt immediately after
writing the table entry, but before initiating the DMA sync).

Handling this race safely will add some potentially non-trivial overhead
to installing a table entry, which we would much rather avoid on
coherent systems where it will be unnecessary, and where we are stirivng
to minimise latency by removing the locking in the first place.

To that end, let's introduce an explicit notion of cache-coherency to
io-pgtable, such that we will be able to avoid penalising IOMMUs which
know enough to know when they are coherent.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c        |  3 +++
 drivers/iommu/arm-smmu.c           |  3 +++
 drivers/iommu/io-pgtable-arm-v7s.c | 17 ++++++++++-------
 drivers/iommu/io-pgtable-arm.c     | 11 ++++++-----
 drivers/iommu/io-pgtable.h         |  6 ++++++
 5 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 029fe0cffee76..d50c8d4b9af9f 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1563,6 +1563,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 		.iommu_dev	= smmu->dev,
 	};
 
+	if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
+		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops)
 		return -ENOMEM;
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7ecd1a0b84192..2bc5df8a490f0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1018,6 +1018,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		.iommu_dev	= smmu->dev,
 	};
 
+	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
 	smmu_domain->smmu = smmu;
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops) {
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index dc74631322e4e..ec024c75a09e1 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -187,7 +187,8 @@ static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl)
 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
 				   struct arm_v7s_io_pgtable *data)
 {
-	struct device *dev = data->iop.cfg.iommu_dev;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	struct device *dev = cfg->iommu_dev;
 	dma_addr_t dma;
 	size_t size = ARM_V7S_TABLE_SIZE(lvl);
 	void *table = NULL;
@@ -196,7 +197,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
 		table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
 	else if (lvl == 2)
 		table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA);
-	if (table && !selftest_running) {
+	if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
 		dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, dma))
 			goto out_free;
@@ -225,10 +226,11 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
 static void __arm_v7s_free_table(void *table, int lvl,
 				 struct arm_v7s_io_pgtable *data)
 {
-	struct device *dev = data->iop.cfg.iommu_dev;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	struct device *dev = cfg->iommu_dev;
 	size_t size = ARM_V7S_TABLE_SIZE(lvl);
 
-	if (!selftest_running)
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
 		dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
 				 DMA_TO_DEVICE);
 	if (lvl == 1)
@@ -240,7 +242,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
 			       struct io_pgtable_cfg *cfg)
 {
-	if (selftest_running)
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
 		return;
 
 	dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
@@ -657,7 +659,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 			    IO_PGTABLE_QUIRK_NO_PERMS |
 			    IO_PGTABLE_QUIRK_TLBI_ON_MAP |
-			    IO_PGTABLE_QUIRK_ARM_MTK_4GB))
+			    IO_PGTABLE_QUIRK_ARM_MTK_4GB |
+			    IO_PGTABLE_QUIRK_NO_DMA))
 		return NULL;
 
 	/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
@@ -774,7 +777,7 @@ static int __init arm_v7s_do_selftests(void)
 		.tlb = &dummy_tlb_ops,
 		.oas = 32,
 		.ias = 32,
-		.quirks = IO_PGTABLE_QUIRK_ARM_NS,
+		.quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA,
 		.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
 	};
 	unsigned int iova, size, iova_start;
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index dd7477010291b..6334f51912ea7 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -217,7 +217,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	if (!selftest_running) {
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
 		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, dma))
 			goto out_free;
@@ -243,7 +243,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 static void __arm_lpae_free_pages(void *pages, size_t size,
 				  struct io_pgtable_cfg *cfg)
 {
-	if (!selftest_running)
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
 		dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
 				 size, DMA_TO_DEVICE);
 	free_pages_exact(pages, size);
@@ -254,7 +254,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 {
 	*ptep = pte;
 
-	if (!selftest_running)
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
 		dma_sync_single_for_device(cfg->iommu_dev,
 					   __arm_lpae_dma_addr(ptep),
 					   sizeof(pte), DMA_TO_DEVICE);
@@ -693,7 +693,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	u64 reg;
 	struct arm_lpae_io_pgtable *data;
 
-	if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS)
+	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA))
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
@@ -782,7 +782,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	struct arm_lpae_io_pgtable *data;
 
 	/* The NS quirk doesn't apply at stage 2 */
-	if (cfg->quirks)
+	if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA)
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
@@ -1086,6 +1086,7 @@ static int __init arm_lpae_do_selftests(void)
 	struct io_pgtable_cfg cfg = {
 		.tlb = &dummy_tlb_ops,
 		.oas = 48,
+		.quirks = IO_PGTABLE_QUIRK_NO_DMA,
 	};
 
 	for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 969d82cc92ca6..524263a7ae6f5 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -65,11 +65,17 @@ struct io_pgtable_cfg {
 	 *	PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
 	 *	when the SoC is in "4GB mode" and they can only access the high
 	 *	remap of DRAM (0x1_00000000 to 0x1_ffffffff).
+	 *
+	 * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
+	 *	be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
+	 *	software-emulated IOMMU), such that pagetable updates need not
+	 *	be treated as explicit DMA data.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
 	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
 	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
+	#define IO_PGTABLE_QUIRK_NO_DMA		BIT(4)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;
-- 
GitLab


From 2c3d273eabe8b1ed3b3cffe2c79643b1bf7e2d4a Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Jun 2017 16:53:54 +0100
Subject: [PATCH 0367/1958] iommu/io-pgtable-arm: Support lockless operation

For parallel I/O with multiple concurrent threads servicing the same
device (or devices, if several share a domain), serialising page table
updates becomes a massive bottleneck. On reflection, though, we don't
strictly need to do that - for valid IOMMU API usage, there are in fact
only two races that we need to guard against: multiple map requests for
different blocks within the same region, when the intermediate-level
table for that region does not yet exist; and multiple unmaps of
different parts of the same block entry. Both of those are fairly easily
solved by using a cmpxchg to install the new table, such that if we then
find that someone else's table got there first, we can simply free ours
and continue.

Make the requisite changes such that we can withstand being called
without the caller maintaining a lock. In theory, this opens up a few
corners in which wildly misbehaving callers making nonsensical
overlapping requests might lead to crashes instead of just unpredictable
results, but correct code really does not deserve to pay a significant
performance cost for the sake of masking bugs in theoretical broken code.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 72 +++++++++++++++++++++++++++-------
 1 file changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 6334f51912ea7..52700fa958c20 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -20,6 +20,7 @@
 
 #define pr_fmt(fmt)	"arm-lpae io-pgtable: " fmt
 
+#include <linux/atomic.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
@@ -99,6 +100,8 @@
 #define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
 #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK |	\
 					 ARM_LPAE_PTE_ATTR_HI_MASK)
+/* Software bit for solving coherency races */
+#define ARM_LPAE_PTE_SW_SYNC		(((arm_lpae_iopte)1) << 55)
 
 /* Stage-1 PTE */
 #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
@@ -249,15 +252,20 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
 	free_pages_exact(pages, size);
 }
 
+static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
+				struct io_pgtable_cfg *cfg)
+{
+	dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
+				   sizeof(*ptep), DMA_TO_DEVICE);
+}
+
 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 			       struct io_pgtable_cfg *cfg)
 {
 	*ptep = pte;
 
 	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
-		dma_sync_single_for_device(cfg->iommu_dev,
-					   __arm_lpae_dma_addr(ptep),
-					   sizeof(pte), DMA_TO_DEVICE);
+		__arm_lpae_sync_pte(ptep, cfg);
 }
 
 static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
@@ -314,16 +322,30 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 
 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 					     arm_lpae_iopte *ptep,
+					     arm_lpae_iopte curr,
 					     struct io_pgtable_cfg *cfg)
 {
-	arm_lpae_iopte new;
+	arm_lpae_iopte old, new;
 
 	new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		new |= ARM_LPAE_PTE_NSTABLE;
 
-	__arm_lpae_set_pte(ptep, new, cfg);
-	return new;
+	/* Ensure the table itself is visible before its PTE can be */
+	wmb();
+
+	old = cmpxchg64_relaxed(ptep, curr, new);
+
+	if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) ||
+	    (old & ARM_LPAE_PTE_SW_SYNC))
+		return old;
+
+	/* Even if it's not ours, there's no point waiting; just kick it */
+	__arm_lpae_sync_pte(ptep, cfg);
+	if (old == curr)
+		WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
+
+	return old;
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
@@ -332,6 +354,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 {
 	arm_lpae_iopte *cptep, pte;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+	size_t tblsz = ARM_LPAE_GRANULE(data);
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	/* Find our entry at the current level */
@@ -346,17 +369,23 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		return -EINVAL;
 
 	/* Grab a pointer to the next level */
-	pte = *ptep;
+	pte = READ_ONCE(*ptep);
 	if (!pte) {
-		cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
-					       GFP_ATOMIC, cfg);
+		cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg);
 		if (!cptep)
 			return -ENOMEM;
 
-		arm_lpae_install_table(cptep, ptep, cfg);
-	} else if (!iopte_leaf(pte, lvl)) {
+		pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
+		if (pte)
+			__arm_lpae_free_pages(cptep, tblsz, cfg);
+	} else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) &&
+		   !(pte & ARM_LPAE_PTE_SW_SYNC)) {
+		__arm_lpae_sync_pte(ptep, cfg);
+	}
+
+	if (pte && !iopte_leaf(pte, lvl)) {
 		cptep = iopte_deref(pte, data);
-	} else {
+	} else if (pte) {
 		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
@@ -502,7 +531,19 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
 	}
 
-	arm_lpae_install_table(tablep, ptep, cfg);
+	pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
+	if (pte != blk_pte) {
+		__arm_lpae_free_pages(tablep, tablesz, cfg);
+		/*
+		 * We may race against someone unmapping another part of this
+		 * block, but anything else is invalid. We can't misinterpret
+		 * a page entry here since we're never at the last level.
+		 */
+		if (iopte_type(pte, lvl - 1) != ARM_LPAE_PTE_TYPE_TABLE)
+			return 0;
+
+		tablep = iopte_deref(pte, data);
+	}
 
 	if (unmap_idx < 0)
 		return __arm_lpae_unmap(data, iova, size, lvl, tablep);
@@ -523,7 +564,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 		return 0;
 
 	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
-	pte = *ptep;
+	pte = READ_ONCE(*ptep);
 	if (WARN_ON(!pte))
 		return 0;
 
@@ -585,7 +626,8 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 			return 0;
 
 		/* Grab the IOPTE we're interested in */
-		pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data));
+		ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+		pte = READ_ONCE(*ptep);
 
 		/* Valid entry? */
 		if (!pte)
-- 
GitLab


From 119ff305b02793dc31eb1e921b85925ce10dc0a1 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Jun 2017 16:53:55 +0100
Subject: [PATCH 0368/1958] iommu/io-pgtable-arm-v7s: Support lockless
 operation

Mirroring the LPAE implementation, rework the v7s code to be robust
against concurrent operations. The same two potential races exist, and
are solved in the same manner, with the fixed 2-level structure making
life ever so slightly simpler.

What complicates matters compared to LPAE, however, is large page
entries, since we can't update a block of 16 PTEs atomically, nor assume
available software bits to do clever things with. As most users are
never likely to do partial unmaps anyway (due to DMA API rules), it
doesn't seem unreasonable for this case to remain behind a serialising
lock; we just pull said lock down into the bowels of the implementation
so it's well out of the way of the normal call paths.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 84 ++++++++++++++++++++++--------
 1 file changed, 63 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index ec024c75a09e1..6906294575656 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -32,6 +32,7 @@
 
 #define pr_fmt(fmt)	"arm-v7s io-pgtable: " fmt
 
+#include <linux/atomic.h>
 #include <linux/dma-mapping.h>
 #include <linux/gfp.h>
 #include <linux/iommu.h>
@@ -39,6 +40,7 @@
 #include <linux/kmemleak.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
@@ -168,6 +170,7 @@ struct arm_v7s_io_pgtable {
 
 	arm_v7s_iopte		*pgd;
 	struct kmem_cache	*l2_tables;
+	spinlock_t		split_lock;
 };
 
 static dma_addr_t __arm_v7s_dma_addr(void *pages)
@@ -396,16 +399,22 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 
 static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
 					   arm_v7s_iopte *ptep,
+					   arm_v7s_iopte curr,
 					   struct io_pgtable_cfg *cfg)
 {
-	arm_v7s_iopte new;
+	arm_v7s_iopte old, new;
 
 	new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		new |= ARM_V7S_ATTR_NS_TABLE;
 
-	__arm_v7s_set_pte(ptep, new, 1, cfg);
-	return new;
+	/* Ensure the table itself is visible before its PTE can be */
+	wmb();
+
+	old = cmpxchg_relaxed(ptep, curr, new);
+	__arm_v7s_pte_sync(ptep, 1, cfg);
+
+	return old;
 }
 
 static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
@@ -429,16 +438,23 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 		return -EINVAL;
 
 	/* Grab a pointer to the next level */
-	pte = *ptep;
+	pte = READ_ONCE(*ptep);
 	if (!pte) {
 		cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data);
 		if (!cptep)
 			return -ENOMEM;
 
-		arm_v7s_install_table(cptep, ptep, cfg);
-	} else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
-		cptep = iopte_deref(pte, lvl);
+		pte = arm_v7s_install_table(cptep, ptep, 0, cfg);
+		if (pte)
+			__arm_v7s_free_table(cptep, lvl + 1, data);
 	} else {
+		/* We've no easy way of knowing if it's synced yet, so... */
+		__arm_v7s_pte_sync(ptep, 1, cfg);
+	}
+
+	if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
+		cptep = iopte_deref(pte, lvl);
+	} else if (pte) {
 		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
@@ -491,27 +507,31 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 	kfree(data);
 }
 
-static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
-			       unsigned long iova, int idx, int lvl,
-			       arm_v7s_iopte *ptep)
+static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
+					unsigned long iova, int idx, int lvl,
+					arm_v7s_iopte *ptep)
 {
 	struct io_pgtable *iop = &data->iop;
 	arm_v7s_iopte pte;
 	size_t size = ARM_V7S_BLOCK_SIZE(lvl);
 	int i;
 
+	/* Check that we didn't lose a race to get the lock */
+	pte = *ptep;
+	if (!arm_v7s_pte_is_cont(pte, lvl))
+		return pte;
+
 	ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
-	pte = arm_v7s_cont_to_pte(*ptep, lvl);
-	for (i = 0; i < ARM_V7S_CONT_PAGES; i++) {
-		ptep[i] = pte;
-		pte += size;
-	}
+	pte = arm_v7s_cont_to_pte(pte, lvl);
+	for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
+		ptep[i] = pte + i * size;
 
 	__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
 
 	size *= ARM_V7S_CONT_PAGES;
 	io_pgtable_tlb_add_flush(iop, iova, size, size, true);
 	io_pgtable_tlb_sync(iop);
+	return pte;
 }
 
 static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
@@ -542,7 +562,16 @@ static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 		__arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
 	}
 
-	arm_v7s_install_table(tablep, ptep, cfg);
+	pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
+	if (pte != blk_pte) {
+		__arm_v7s_free_table(tablep, 2, data);
+
+		if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
+			return 0;
+
+		tablep = iopte_deref(pte, 1);
+		return __arm_v7s_unmap(data, iova, size, 2, tablep);
+	}
 
 	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
 	return size;
@@ -563,17 +592,28 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 	idx = ARM_V7S_LVL_IDX(iova, lvl);
 	ptep += idx;
 	do {
-		if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i])))
+		pte[i] = READ_ONCE(ptep[i]);
+		if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i])))
 			return 0;
-		pte[i] = ptep[i];
 	} while (++i < num_entries);
 
 	/*
 	 * If we've hit a contiguous 'large page' entry at this level, it
 	 * needs splitting first, unless we're unmapping the whole lot.
+	 *
+	 * For splitting, we can't rewrite 16 PTEs atomically, and since we
+	 * can't necessarily assume TEX remap we don't have a software bit to
+	 * mark live entries being split. In practice (i.e. DMA API code), we
+	 * will never be splitting large pages anyway, so just wrap this edge
+	 * case in a lock for the sake of correctness and be done with it.
 	 */
-	if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl))
-		arm_v7s_split_cont(data, iova, idx, lvl, ptep);
+	if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&data->split_lock, flags);
+		pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
+		spin_unlock_irqrestore(&data->split_lock, flags);
+	}
 
 	/* If the size matches this level, we're in the right place */
 	if (num_entries) {
@@ -631,7 +671,8 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
 	u32 mask;
 
 	do {
-		pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)];
+		ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
+		pte = READ_ONCE(*ptep);
 		ptep = iopte_deref(pte, lvl);
 	} while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
 
@@ -672,6 +713,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 	if (!data)
 		return NULL;
 
+	spin_lock_init(&data->split_lock);
 	data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
 					    ARM_V7S_TABLE_SIZE(2),
 					    ARM_V7S_TABLE_SIZE(2),
-- 
GitLab


From 523d7423e21bfe78452a640878d1de189ac45d91 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Jun 2017 16:53:56 +0100
Subject: [PATCH 0369/1958] iommu/arm-smmu: Remove io-pgtable spinlock

With the io-pgtable code now robust against (valid) races, we no longer
need to serialise all operations with a lock. This might make broken
callers who issue concurrent operations on overlapping addresses go even
more wrong than before, but hey, they already had little hope of useful
or deterministic results.

We do however still have to keep a lock around to serialise the ATS1*
translation ops, as parallel iova_to_phys() calls could lead to
unpredictable hardware behaviour otherwise.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 45 +++++++++++++---------------------------
 1 file changed, 14 insertions(+), 31 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 2bc5df8a490f0..bc89b4d6c043d 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -433,10 +433,10 @@ enum arm_smmu_domain_stage {
 struct arm_smmu_domain {
 	struct arm_smmu_device		*smmu;
 	struct io_pgtable_ops		*pgtbl_ops;
-	spinlock_t			pgtbl_lock;
 	struct arm_smmu_cfg		cfg;
 	enum arm_smmu_domain_stage	stage;
 	struct mutex			init_mutex; /* Protects smmu pointer */
+	spinlock_t			cb_lock; /* Serialises ATS1* ops */
 	struct iommu_domain		domain;
 };
 
@@ -1113,7 +1113,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	}
 
 	mutex_init(&smmu_domain->init_mutex);
-	spin_lock_init(&smmu_domain->pgtbl_lock);
+	spin_lock_init(&smmu_domain->cb_lock);
 
 	return &smmu_domain->domain;
 }
@@ -1391,35 +1391,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
-	int ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return -ENODEV;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->map(ops, iova, paddr, size, prot);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-	return ret;
+	return ops->map(ops, iova, paddr, size, prot);
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 			     size_t size)
 {
-	size_t ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->unmap(ops, iova, size);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-	return ret;
+	return ops->unmap(ops, iova, size);
 }
 
 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1433,10 +1421,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 	void __iomem *cb_base;
 	u32 tmp;
 	u64 phys;
-	unsigned long va;
+	unsigned long va, flags;
 
 	cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
 
+	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
 	/* ATS1 registers can only be written atomically */
 	va = iova & ~0xfffUL;
 	if (smmu->version == ARM_SMMU_V2)
@@ -1446,6 +1435,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 
 	if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
 				      !(tmp & ATSR_ACTIVE), 5, 50)) {
+		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
 		dev_err(dev,
 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
 			&iova);
@@ -1453,6 +1443,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 	}
 
 	phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
+	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
 	if (phys & CB_PAR_F) {
 		dev_err(dev, "translation fault!\n");
 		dev_err(dev, "PAR = 0x%llx\n", phys);
@@ -1465,10 +1456,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
 					dma_addr_t iova)
 {
-	phys_addr_t ret;
-	unsigned long flags;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
 
 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
 		return iova;
@@ -1476,17 +1465,11 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
 	if (!ops)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
-			smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-		ret = arm_smmu_iova_to_phys_hard(domain, iova);
-	} else {
-		ret = ops->iova_to_phys(ops, iova);
-	}
-
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
+			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+		return arm_smmu_iova_to_phys_hard(domain, iova);
 
-	return ret;
+	return ops->iova_to_phys(ops, iova);
 }
 
 static bool arm_smmu_capable(enum iommu_cap cap)
-- 
GitLab


From 58188afeb727e0f73706f1460707bd3ba6ccc221 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Jun 2017 16:53:57 +0100
Subject: [PATCH 0370/1958] iommu/arm-smmu-v3: Remove io-pgtable spinlock

As for SMMUv2, take advantage of io-pgtable's newfound tolerance for
concurrency. Unfortunately in this case the command queue lock remains a
point of serialisation for the unmap path, but there may be a little
more we can do to ameliorate that in future.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 33 ++++++---------------------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index d50c8d4b9af9f..884b1a49a52a2 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -646,7 +646,6 @@ struct arm_smmu_domain {
 	struct mutex			init_mutex; /* Protects smmu pointer */
 
 	struct io_pgtable_ops		*pgtbl_ops;
-	spinlock_t			pgtbl_lock;
 
 	enum arm_smmu_domain_stage	stage;
 	union {
@@ -1414,7 +1413,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	}
 
 	mutex_init(&smmu_domain->init_mutex);
-	spin_lock_init(&smmu_domain->pgtbl_lock);
 	return &smmu_domain->domain;
 }
 
@@ -1686,44 +1684,29 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
-	int ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return -ENODEV;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->map(ops, iova, paddr, size, prot);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-	return ret;
+	return ops->map(ops, iova, paddr, size, prot);
 }
 
 static size_t
 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 {
-	size_t ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->unmap(ops, iova, size);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-	return ret;
+	return ops->unmap(ops, iova, size);
 }
 
 static phys_addr_t
 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
-	phys_addr_t ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
 		return iova;
@@ -1731,11 +1714,7 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 	if (!ops)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->iova_to_phys(ops, iova);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-
-	return ret;
+	return ops->iova_to_phys(ops, iova);
 }
 
 static struct platform_driver arm_smmu_driver;
-- 
GitLab


From c1004803b40596c1aabbbc78a6b1b33e4dfd96c6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 23 Jun 2017 11:45:57 +0100
Subject: [PATCH 0371/1958] iommu/io-pgtable: depend on !GENERIC_ATOMIC64 when
 using COMPILE_TEST with LPAE

The LPAE/ARMv8 page table format relies on the ability to read and write
64-bit page table entries in an atomic fashion. With the move to a lockless
implementation, we also need support for cmpxchg64 to resolve races when
installing table entries concurrently.

Unfortunately, not all architectures support cmpxchg64, so the code can
fail to compiler when building for these architectures using COMPILE_TEST.
Rather than disable COMPILE_TEST altogether, instead check that
GENERIC_ATOMIC64 is not selected, which is a reasonable indication that
the architecture has support for 64-bit cmpxchg.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6ee3a25ae7318..c88cfa7522b27 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE
 config IOMMU_IO_PGTABLE_LPAE
 	bool "ARMv7/v8 Long Descriptor Format"
 	select IOMMU_IO_PGTABLE
-	depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
+	depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64))
 	help
 	  Enable support for the ARM long descriptor pagetable format.
 	  This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
-- 
GitLab


From 77f3445866c39d8866b31d8d9fa47c7c20938e05 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 23 Jun 2017 12:02:38 +0100
Subject: [PATCH 0372/1958] iommu/io-pgtable-arm: Use dma_wmb() instead of
 wmb() when publishing table

When writing a new table entry, we must ensure that the contents of the
table is made visible to the SMMU page table walker before the updated
table entry itself.

This is currently achieved using wmb(), which expands to an expensive and
unnecessary DSB instruction. Ideally, we'd just use cmpxchg64_release when
writing the table entry, but this doesn't have memory ordering semantics
on !SMP systems.

Instead, use dma_wmb(), which emits DMB OSHST. Strictly speaking, this
does more than we require (since it targets the outer-shareable domain),
but it's likely to be significantly faster than the DSB approach.

Reported-by: Linu Cherian <linu.cherian@cavium.com>
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 8 ++++++--
 drivers/iommu/io-pgtable-arm.c     | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 6906294575656..af330f513653d 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -408,8 +408,12 @@ static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		new |= ARM_V7S_ATTR_NS_TABLE;
 
-	/* Ensure the table itself is visible before its PTE can be */
-	wmb();
+	/*
+	 * Ensure the table itself is visible before its PTE can be.
+	 * Whilst we could get away with cmpxchg64_release below, this
+	 * doesn't have any ordering semantics when !CONFIG_SMP.
+	 */
+	dma_wmb();
 
 	old = cmpxchg_relaxed(ptep, curr, new);
 	__arm_v7s_pte_sync(ptep, 1, cfg);
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 52700fa958c20..b182039862c50 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -331,8 +331,12 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		new |= ARM_LPAE_PTE_NSTABLE;
 
-	/* Ensure the table itself is visible before its PTE can be */
-	wmb();
+	/*
+	 * Ensure the table itself is visible before its PTE can be.
+	 * Whilst we could get away with cmpxchg64_release below, this
+	 * doesn't have any ordering semantics when !CONFIG_SMP.
+	 */
+	dma_wmb();
 
 	old = cmpxchg64_relaxed(ptep, curr, new);
 
-- 
GitLab


From 12275bf0a4deb690a5dc9903d207060737b7bae6 Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@cavium.com>
Date: Thu, 22 Jun 2017 21:20:54 +0200
Subject: [PATCH 0373/1958] iommu/arm-smmu-v3, acpi: Add temporary Cavium
 SMMU-V3 IORT model number definitions

The model number is already defined in acpica and we are actually
waiting for the acpi maintainers to include it:

 https://github.com/acpica/acpica/commit/d00a4eb86e64

Adding those temporary definitions until the change makes it into
include/acpi/actbl2.h. Once that is done this patch can be reverted.

Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c   | 5 +++++
 drivers/iommu/arm-smmu-v3.c | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index c5fecf97ee2f5..da225710b009e 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -31,6 +31,11 @@
 #define IORT_IOMMU_TYPE		((1 << ACPI_IORT_NODE_SMMU) |	\
 				(1 << ACPI_IORT_NODE_SMMU_V3))
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
+#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX		0x2
+#endif
+
 struct iort_its_msi_chip {
 	struct list_head	list;
 	struct fwnode_handle	*fw_node;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 884b1a49a52a2..da481d53e09b9 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -413,6 +413,11 @@
 #define MSI_IOVA_BASE			0x8000000
 #define MSI_IOVA_LENGTH			0x100000
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
+#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX		0x2
+#endif
+
 static bool disable_bypass;
 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
-- 
GitLab


From 403e8c7c5bcaff3291a2c7012fe80f707a854d10 Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Thu, 22 Jun 2017 17:35:36 +0530
Subject: [PATCH 0374/1958] ACPI/IORT: Fixup SMMUv3 resource size for Cavium
 ThunderX2 SMMUv3 model

Cavium ThunderX2 implementation doesn't support second page in SMMU
register space. Hence, resource size is set as 64k for this model.

Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Geetha Sowjanya <geethasowjanya.akula@cavium.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index da225710b009e..a8ebda9f7e974 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -833,6 +833,18 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node)
 	return num_res;
 }
 
+static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu)
+{
+	/*
+	 * Override the size, for Cavium ThunderX2 implementation
+	 * which doesn't support the page 1 SMMU register space.
+	 */
+	if (smmu->model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+		return SZ_64K;
+
+	return SZ_128K;
+}
+
 static void __init arm_smmu_v3_init_resources(struct resource *res,
 					      struct acpi_iort_node *node)
 {
@@ -843,7 +855,8 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
 	smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 
 	res[num_res].start = smmu->base_address;
-	res[num_res].end = smmu->base_address + SZ_128K - 1;
+	res[num_res].end = smmu->base_address +
+				arm_smmu_v3_resource_size(smmu) - 1;
 	res[num_res].flags = IORESOURCE_MEM;
 
 	num_res++;
-- 
GitLab


From e5b829de053d9994dfc8652ce558e90e3406c578 Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Thu, 22 Jun 2017 17:35:37 +0530
Subject: [PATCH 0375/1958] iommu/arm-smmu-v3: Add workaround for Cavium
 ThunderX2 erratum #74

Cavium ThunderX2 SMMU implementation doesn't support page 1 register space
and PAGE0_REGS_ONLY option is enabled as an errata workaround.
This option when turned on, replaces all page 1 offsets used for
EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets.

SMMU resource size checks are now based on SMMU option PAGE0_REGS_ONLY,
since resource size can be either 64k/128k.
For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
platform_get_resource call, so that SMMU options are set beforehand.

Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Geetha Sowjanya <geethasowjanya.akula@cavium.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/arm64/silicon-errata.txt        |  1 +
 .../devicetree/bindings/iommu/arm,smmu-v3.txt |  6 ++
 drivers/iommu/arm-smmu-v3.c                   | 68 ++++++++++++++-----
 3 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 10f2dddbf4494..4693a328947a5 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -62,6 +62,7 @@ stable kernels.
 | Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
 | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
 | Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
+| Cavium         | ThunderX2 SMMUv3| #74             | N/A                         |
 |                |                 |                 |                             |
 | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
 |                |                 |                 |                             |
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
index be57550e14e48..e7855cf6038eb 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
@@ -49,6 +49,12 @@ the PCIe specification.
 - hisilicon,broken-prefetch-cmd
                     : Avoid sending CMD_PREFETCH_* commands to the SMMU.
 
+- cavium,cn9900-broken-page1-regspace
+                    : Replaces all page 1 offsets used for EVTQ_PROD/CONS,
+		      PRIQ_PROD/CONS register access with page 0 offsets.
+		      Set for Cavium ThunderX2 silicon that doesn't support
+		      SMMU page1 register space.
+
 ** Example
 
         smmu@2b400000 {
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index da481d53e09b9..2d5b48b4260ae 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -603,6 +603,7 @@ struct arm_smmu_device {
 	u32				features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
+#define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
 	u32				options;
 
 	struct arm_smmu_cmdq		cmdq;
@@ -668,9 +669,20 @@ struct arm_smmu_option_prop {
 
 static struct arm_smmu_option_prop arm_smmu_options[] = {
 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
+	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
 	{ 0, NULL},
 };
 
+static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
+						 struct arm_smmu_device *smmu)
+{
+	if ((offset > SZ_64K) &&
+	    (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
+		offset -= SZ_64K;
+
+	return smmu->base + offset;
+}
+
 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 {
 	return container_of(dom, struct arm_smmu_domain, domain);
@@ -1956,8 +1968,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 		return -ENOMEM;
 	}
 
-	q->prod_reg	= smmu->base + prod_off;
-	q->cons_reg	= smmu->base + cons_off;
+	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
+	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
 	q->ent_dwords	= dwords;
 
 	q->q_base  = Q_BASE_RWA;
@@ -2358,8 +2370,10 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
 	/* Event queue */
 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
-	writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
-	writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
+	writel_relaxed(smmu->evtq.q.prod,
+		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
+	writel_relaxed(smmu->evtq.q.cons,
+		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
 
 	enables |= CR0_EVTQEN;
 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -2374,9 +2388,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 		writeq_relaxed(smmu->priq.q.q_base,
 			       smmu->base + ARM_SMMU_PRIQ_BASE);
 		writel_relaxed(smmu->priq.q.prod,
-			       smmu->base + ARM_SMMU_PRIQ_PROD);
+			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
 		writel_relaxed(smmu->priq.q.cons,
-			       smmu->base + ARM_SMMU_PRIQ_CONS);
+			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
 
 		enables |= CR0_PRIQEN;
 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -2600,6 +2614,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 }
 
 #ifdef CONFIG_ACPI
+static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
+{
+	if (model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
+
+	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
+}
+
 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
 				      struct arm_smmu_device *smmu)
 {
@@ -2612,6 +2634,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
 	/* Retrieve SMMUv3 specific data */
 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 
+	acpi_smmu_get_options(iort_smmu->model, smmu);
+
 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
 
@@ -2647,6 +2671,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
 	return ret;
 }
 
+static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
+{
+	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
+		return SZ_64K;
+	else
+		return SZ_128K;
+}
+
 static int arm_smmu_device_probe(struct platform_device *pdev)
 {
 	int irq, ret;
@@ -2663,9 +2695,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	}
 	smmu->dev = dev;
 
+	if (dev->of_node) {
+		ret = arm_smmu_device_dt_probe(pdev, smmu);
+	} else {
+		ret = arm_smmu_device_acpi_probe(pdev, smmu);
+		if (ret == -ENODEV)
+			return ret;
+	}
+
+	/* Set bypass mode according to firmware probing result */
+	bypass = !!ret;
+
 	/* Base address */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (resource_size(res) + 1 < SZ_128K) {
+	if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
 		dev_err(dev, "MMIO region too small (%pr)\n", res);
 		return -EINVAL;
 	}
@@ -2692,17 +2735,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	if (irq > 0)
 		smmu->gerr_irq = irq;
 
-	if (dev->of_node) {
-		ret = arm_smmu_device_dt_probe(pdev, smmu);
-	} else {
-		ret = arm_smmu_device_acpi_probe(pdev, smmu);
-		if (ret == -ENODEV)
-			return ret;
-	}
-
-	/* Set bypass mode according to firmware probing result */
-	bypass = !!ret;
-
 	/* Probe the h/w */
 	ret = arm_smmu_device_hw_probe(smmu);
 	if (ret)
-- 
GitLab


From 99caf177f6fd3e67575f6ce05b36e8e041bcef60 Mon Sep 17 00:00:00 2001
From: shameer <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 17 May 2017 10:12:05 +0100
Subject: [PATCH 0376/1958] iommu/arm-smmu-v3: Enable ACPI based HiSilicon
 CMD_PREFETCH quirk(erratum 161010701)

HiSilicon SMMUv3 on Hip06/Hip07 platforms doesn't support CMD_PREFETCH
command. The dt based support for this quirk is already present in the
driver(hisilicon,broken-prefetch-cmd). This adds ACPI support for the
quirk using the IORT smmu model number.

Signed-off-by: shameer <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: hanjun <guohanjun@huawei.com>
[will: rewrote patch]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 drivers/iommu/arm-smmu-v3.c            | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 4693a328947a5..ef4e43590685d 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -67,6 +67,7 @@ stable kernels.
 | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
 |                |                 |                 |                             |
 | Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
+| Hisilicon      | Hip0{6,7}       | #161010701      | N/A                         |
 |                |                 |                 |                             |
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 2d5b48b4260ae..81fc1b5c91ee1 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -414,6 +414,10 @@
 #define MSI_IOVA_LENGTH			0x100000
 
 /* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_HISILICON_HI161X
+#define ACPI_IORT_SMMU_HISILICON_HI161X		0x1
+#endif
+
 #ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
 #define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX		0x2
 #endif
@@ -2616,8 +2620,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 #ifdef CONFIG_ACPI
 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
 {
-	if (model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+	switch (model) {
+	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
+		break;
+	case ACPI_IORT_SMMU_HISILICON_HI161X:
+		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
+		break;
+	}
 
 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
 }
-- 
GitLab


From f935448acf462c26142e8b04f1c8829b28d3b9d8 Mon Sep 17 00:00:00 2001
From: Geetha Sowjanya <geethasowjanya.akula@cavium.com>
Date: Fri, 23 Jun 2017 19:04:36 +0530
Subject: [PATCH 0377/1958] iommu/arm-smmu-v3: Add workaround for Cavium
 ThunderX2 erratum #126

Cavium ThunderX2 SMMU doesn't support MSI and also doesn't have unique irq
lines for gerror, eventq and cmdq-sync.

New named irq "combined" is set as a errata workaround, which allows to
share the irq line by register single irq handler for all the interrupts.

Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Geetha sowjanya <gakula@caviumnetworks.com>
[will: reworked irq equality checking and added SPI check]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/arm64/silicon-errata.txt        |  1 +
 .../devicetree/bindings/iommu/arm,smmu-v3.txt |  6 ++
 drivers/acpi/arm64/iort.c                     | 63 +++++++++----
 drivers/iommu/arm-smmu-v3.c                   | 94 ++++++++++++++-----
 4 files changed, 121 insertions(+), 43 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index ef4e43590685d..8564795257760 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -63,6 +63,7 @@ stable kernels.
 | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
 | Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
 | Cavium         | ThunderX2 SMMUv3| #74             | N/A                         |
+| Cavium         | ThunderX2 SMMUv3| #126            | N/A                         |
 |                |                 |                 |                             |
 | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
 |                |                 |                 |                             |
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
index e7855cf6038eb..c9abbf3e4f682 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
@@ -26,6 +26,12 @@ the PCIe specification.
                       * "priq"      - PRI Queue not empty
                       * "cmdq-sync" - CMD_SYNC complete
                       * "gerror"    - Global Error activated
+                      * "combined"  - The combined interrupt is optional,
+				      and should only be provided if the
+				      hardware supports just a single,
+				      combined interrupt line.
+				      If provided, then the combined interrupt
+				      will be used in preference to any others.
 
 - #iommu-cells      : See the generic IOMMU binding described in
                         devicetree/bindings/pci/pci-iommu.txt
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index a8ebda9f7e974..83d65d96676f1 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -833,6 +833,24 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node)
 	return num_res;
 }
 
+static bool arm_smmu_v3_is_combined_irq(struct acpi_iort_smmu_v3 *smmu)
+{
+	/*
+	 * Cavium ThunderX2 implementation doesn't not support unique
+	 * irq line. Use single irq line for all the SMMUv3 interrupts.
+	 */
+	if (smmu->model != ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+		return false;
+
+	/*
+	 * ThunderX2 doesn't support MSIs from the SMMU, so we're checking
+	 * SPI numbers here.
+	 */
+	return smmu->event_gsiv == smmu->pri_gsiv &&
+	       smmu->event_gsiv == smmu->gerr_gsiv &&
+	       smmu->event_gsiv == smmu->sync_gsiv;
+}
+
 static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu)
 {
 	/*
@@ -860,26 +878,33 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
 	res[num_res].flags = IORESOURCE_MEM;
 
 	num_res++;
+	if (arm_smmu_v3_is_combined_irq(smmu)) {
+		if (smmu->event_gsiv)
+			acpi_iort_register_irq(smmu->event_gsiv, "combined",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
+	} else {
 
-	if (smmu->event_gsiv)
-		acpi_iort_register_irq(smmu->event_gsiv, "eventq",
-				       ACPI_EDGE_SENSITIVE,
-				       &res[num_res++]);
-
-	if (smmu->pri_gsiv)
-		acpi_iort_register_irq(smmu->pri_gsiv, "priq",
-				       ACPI_EDGE_SENSITIVE,
-				       &res[num_res++]);
-
-	if (smmu->gerr_gsiv)
-		acpi_iort_register_irq(smmu->gerr_gsiv, "gerror",
-				       ACPI_EDGE_SENSITIVE,
-				       &res[num_res++]);
-
-	if (smmu->sync_gsiv)
-		acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync",
-				       ACPI_EDGE_SENSITIVE,
-				       &res[num_res++]);
+		if (smmu->event_gsiv)
+			acpi_iort_register_irq(smmu->event_gsiv, "eventq",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
+
+		if (smmu->pri_gsiv)
+			acpi_iort_register_irq(smmu->pri_gsiv, "priq",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
+
+		if (smmu->gerr_gsiv)
+			acpi_iort_register_irq(smmu->gerr_gsiv, "gerror",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
+
+		if (smmu->sync_gsiv)
+			acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
+	}
 }
 
 static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 81fc1b5c91ee1..568c400eeaed8 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -615,6 +615,7 @@ struct arm_smmu_device {
 	struct arm_smmu_priq		priq;
 
 	int				gerr_irq;
+	int				combined_irq;
 
 	unsigned long			ias; /* IPA */
 	unsigned long			oas; /* PA */
@@ -1330,6 +1331,24 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
+{
+	struct arm_smmu_device *smmu = dev;
+
+	arm_smmu_evtq_thread(irq, dev);
+	if (smmu->features & ARM_SMMU_FEAT_PRI)
+		arm_smmu_priq_thread(irq, dev);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
+{
+	arm_smmu_gerror_handler(irq, dev);
+	arm_smmu_cmdq_sync_handler(irq, dev);
+	return IRQ_WAKE_THREAD;
+}
+
 /* IO_PGTABLE API */
 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
@@ -2229,18 +2248,9 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
 	devm_add_action(dev, arm_smmu_free_msis, dev);
 }
 
-static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 {
-	int ret, irq;
-	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
-
-	/* Disable IRQs first */
-	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
-				      ARM_SMMU_IRQ_CTRLACK);
-	if (ret) {
-		dev_err(smmu->dev, "failed to disable irqs\n");
-		return ret;
-	}
+	int irq, ret;
 
 	arm_smmu_setup_msis(smmu);
 
@@ -2283,10 +2293,41 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 			if (ret < 0)
 				dev_warn(smmu->dev,
 					 "failed to enable priq irq\n");
-			else
-				irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
 		}
 	}
+}
+
+static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+{
+	int ret, irq;
+	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
+
+	/* Disable IRQs first */
+	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
+				      ARM_SMMU_IRQ_CTRLACK);
+	if (ret) {
+		dev_err(smmu->dev, "failed to disable irqs\n");
+		return ret;
+	}
+
+	irq = smmu->combined_irq;
+	if (irq) {
+		/*
+		 * Cavium ThunderX2 implementation doesn't not support unique
+		 * irq lines. Use single irq line for all the SMMUv3 interrupts.
+		 */
+		ret = devm_request_threaded_irq(smmu->dev, irq,
+					arm_smmu_combined_irq_handler,
+					arm_smmu_combined_irq_thread,
+					IRQF_ONESHOT,
+					"arm-smmu-v3-combined-irq", smmu);
+		if (ret < 0)
+			dev_warn(smmu->dev, "failed to enable combined irq\n");
+	} else
+		arm_smmu_setup_unique_irqs(smmu);
+
+	if (smmu->features & ARM_SMMU_FEAT_PRI)
+		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
 
 	/* Enable interrupt generation on the SMMU */
 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
@@ -2729,22 +2770,27 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 		return PTR_ERR(smmu->base);
 
 	/* Interrupt lines */
-	irq = platform_get_irq_byname(pdev, "eventq");
-	if (irq > 0)
-		smmu->evtq.q.irq = irq;
 
-	irq = platform_get_irq_byname(pdev, "priq");
+	irq = platform_get_irq_byname(pdev, "combined");
 	if (irq > 0)
-		smmu->priq.q.irq = irq;
+		smmu->combined_irq = irq;
+	else {
+		irq = platform_get_irq_byname(pdev, "eventq");
+		if (irq > 0)
+			smmu->evtq.q.irq = irq;
 
-	irq = platform_get_irq_byname(pdev, "cmdq-sync");
-	if (irq > 0)
-		smmu->cmdq.q.irq = irq;
+		irq = platform_get_irq_byname(pdev, "priq");
+		if (irq > 0)
+			smmu->priq.q.irq = irq;
 
-	irq = platform_get_irq_byname(pdev, "gerror");
-	if (irq > 0)
-		smmu->gerr_irq = irq;
+		irq = platform_get_irq_byname(pdev, "cmdq-sync");
+		if (irq > 0)
+			smmu->cmdq.q.irq = irq;
 
+		irq = platform_get_irq_byname(pdev, "gerror");
+		if (irq > 0)
+			smmu->gerr_irq = irq;
+	}
 	/* Probe the h/w */
 	ret = arm_smmu_device_hw_probe(smmu);
 	if (ret)
-- 
GitLab


From 58de51ca77e2166262b043b0ff93290b184a2364 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Fri, 23 Jun 2017 07:07:14 +0200
Subject: [PATCH 0378/1958] mtd: parsers: trx: fix pr_err format for printing
 offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes following warning:
include/linux/kern_levels.h:4:18: warning: format '%X' expects argument of type 'unsigned int', but argument 2 has type 'size_t {aka long unsigned int}' [-Wformat=]

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/parsers/parser_trx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/parsers/parser_trx.c b/drivers/mtd/parsers/parser_trx.c
index e805108afd317..df360a75e1eb1 100644
--- a/drivers/mtd/parsers/parser_trx.c
+++ b/drivers/mtd/parsers/parser_trx.c
@@ -39,7 +39,7 @@ static const char *parser_trx_data_part_name(struct mtd_info *master,
 	err  = mtd_read(master, offset, sizeof(buf), &bytes_read,
 			(uint8_t *)&buf);
 	if (err && !mtd_is_bitflip(err)) {
-		pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+		pr_err("mtd_read error while parsing (offset: 0x%zX): %d\n",
 			offset, err);
 		goto out_default;
 	}
-- 
GitLab


From c169e3d3c049d93929a92f909a4b80ba0f85a3c3 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Thu, 22 Jun 2017 14:09:18 -0700
Subject: [PATCH 0379/1958] mtd: partitions: fixup some allocate_partition()
 whitespace

Some recent patches caused churn around this area, and checkpatch
noticed the existing issues.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 2ad9493703f98..5736b0c90b339 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -402,7 +402,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent,
 			const struct mtd_partition *part, int partno,
 			uint64_t cur_offset)
 {
-	int wr_alignment = (parent->flags & MTD_NO_ERASE) ? parent->writesize:
+	int wr_alignment = (parent->flags & MTD_NO_ERASE) ? parent->writesize :
 							    parent->erasesize;
 	struct mtd_part *slave;
 	u32 remainder;
@@ -480,8 +480,8 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent,
 		slave->mtd._sync = part_sync;
 	if (!partno && !parent->dev.class && parent->_suspend &&
 	    parent->_resume) {
-			slave->mtd._suspend = part_suspend;
-			slave->mtd._resume = part_resume;
+		slave->mtd._suspend = part_suspend;
+		slave->mtd._resume = part_resume;
 	}
 	if (parent->_writev)
 		slave->mtd._writev = part_writev;
-- 
GitLab


From b395ba21699dbbca2dbf05833a71f5fbf5245a3f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 19 Jun 2017 23:41:46 +0200
Subject: [PATCH 0380/1958] i2c: rcar: document HW incapabilities

Add recent findings (IGNORE_NAK) and document in a bit more detail why
the feature is not possible.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-rcar.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 214a71cb64429..030952e2ddc7e 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -783,7 +783,12 @@ static int rcar_unreg_slave(struct i2c_client *slave)
 
 static u32 rcar_i2c_func(struct i2c_adapter *adap)
 {
-	/* This HW can't do SMBUS_QUICK and NOSTART */
+	/*
+	 * This HW can't do:
+	 * I2C_SMBUS_QUICK (setting FSB during START didn't work)
+	 * I2C_M_NOSTART (automatically sends address after START)
+	 * I2C_M_IGNORE_NAK (automatically sends STOP after NAK)
+	 */
 	return I2C_FUNC_I2C | I2C_FUNC_SLAVE |
 		(I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
 }
-- 
GitLab


From 10a6218e382d9fafd4c2add1b171d89a079c5a47 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Tue, 20 Jun 2017 14:15:14 -0700
Subject: [PATCH 0381/1958] i2c: aspeed: added documentation for Aspeed I2C
 driver

Added device tree binding documentation for Aspeed I2C busses.

Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../devicetree/bindings/i2c/i2c-aspeed.txt    | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-aspeed.txt

diff --git a/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt b/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
new file mode 100644
index 0000000000000..bd6480b19535f
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
@@ -0,0 +1,48 @@
+Device tree configuration for the I2C busses on the AST24XX and AST25XX SoCs.
+
+Required Properties:
+- #address-cells	: should be 1
+- #size-cells		: should be 0
+- reg			: address offset and range of bus
+- compatible		: should be "aspeed,ast2400-i2c-bus"
+			  or "aspeed,ast2500-i2c-bus"
+- clocks		: root clock of bus, should reference the APB
+			  clock
+- interrupts		: interrupt number
+- interrupt-parent	: interrupt controller for bus, should reference a
+			  aspeed,ast2400-i2c-ic or aspeed,ast2500-i2c-ic
+			  interrupt controller
+
+Optional Properties:
+- bus-frequency	: frequency of the bus clock in Hz defaults to 100 kHz when not
+		  specified
+- multi-master	: states that there is another master active on this bus.
+
+Example:
+
+i2c {
+	compatible = "simple-bus";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	ranges = <0 0x1e78a000 0x1000>;
+
+	i2c_ic: interrupt-controller@0 {
+		#interrupt-cells = <1>;
+		compatible = "aspeed,ast2400-i2c-ic";
+		reg = <0x0 0x40>;
+		interrupts = <12>;
+		interrupt-controller;
+	};
+
+	i2c0: i2c-bus@40 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x40 0x40>;
+		compatible = "aspeed,ast2400-i2c-bus";
+		clocks = <&clk_apb>;
+		bus-frequency = <100000>;
+		interrupts = <0>;
+		interrupt-parent = <&i2c_ic>;
+	};
+};
-- 
GitLab


From f327c686d3ba44eda79a2d9e02a6a242e0b75787 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Tue, 20 Jun 2017 14:15:15 -0700
Subject: [PATCH 0382/1958] i2c: aspeed: added driver for Aspeed I2C

Added initial master support for Aspeed I2C controller. Supports
fourteen busses present in AST24XX and AST25XX BMC SoCs by Aspeed.

Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig      |  10 +
 drivers/i2c/busses/Makefile     |   1 +
 drivers/i2c/busses/i2c-aspeed.c | 690 ++++++++++++++++++++++++++++++++
 3 files changed, 701 insertions(+)
 create mode 100644 drivers/i2c/busses/i2c-aspeed.c

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index ed7106e48ba4f..ceb4df4e92ab0 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -328,6 +328,16 @@ config I2C_POWERMAC
 
 comment "I2C system bus drivers (mostly embedded / system-on-chip)"
 
+config I2C_ASPEED
+	tristate "Aspeed I2C Controller"
+	depends on ARCH_ASPEED || COMPILE_TEST
+	help
+	  If you say yes to this option, support will be included for the
+	  Aspeed I2C controller.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called i2c-aspeed.
+
 config I2C_AT91
 	tristate "Atmel AT91 I2C Two-Wire interface (TWI)"
 	depends on ARCH_AT91
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index c89a4314c5633..a43f28819a8cc 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_I2C_HYDRA)		+= i2c-hydra.o
 obj-$(CONFIG_I2C_POWERMAC)	+= i2c-powermac.o
 
 # Embedded system I2C/SMBus host controller drivers
+obj-$(CONFIG_I2C_ASPEED)	+= i2c-aspeed.o
 obj-$(CONFIG_I2C_AT91)		+= i2c-at91.o
 obj-$(CONFIG_I2C_AU1550)	+= i2c-au1550.o
 obj-$(CONFIG_I2C_AXXIA)		+= i2c-axxia.o
diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
new file mode 100644
index 0000000000000..55f241f2cfcc5
--- /dev/null
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -0,0 +1,690 @@
+/*
+ *  Aspeed 24XX/25XX I2C Controller.
+ *
+ *  Copyright (C) 2012-2017 ASPEED Technology Inc.
+ *  Copyright 2017 IBM Corporation
+ *  Copyright 2017 Google, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* I2C Register */
+#define ASPEED_I2C_FUN_CTRL_REG				0x00
+#define ASPEED_I2C_AC_TIMING_REG1			0x04
+#define ASPEED_I2C_AC_TIMING_REG2			0x08
+#define ASPEED_I2C_INTR_CTRL_REG			0x0c
+#define ASPEED_I2C_INTR_STS_REG				0x10
+#define ASPEED_I2C_CMD_REG				0x14
+#define ASPEED_I2C_DEV_ADDR_REG				0x18
+#define ASPEED_I2C_BYTE_BUF_REG				0x20
+
+/* Global Register Definition */
+/* 0x00 : I2C Interrupt Status Register  */
+/* 0x08 : I2C Interrupt Target Assignment  */
+
+/* Device Register Definition */
+/* 0x00 : I2CD Function Control Register  */
+#define ASPEED_I2CD_MULTI_MASTER_DIS			BIT(15)
+#define ASPEED_I2CD_SDA_DRIVE_1T_EN			BIT(8)
+#define ASPEED_I2CD_M_SDA_DRIVE_1T_EN			BIT(7)
+#define ASPEED_I2CD_M_HIGH_SPEED_EN			BIT(6)
+#define ASPEED_I2CD_MASTER_EN				BIT(0)
+
+/* 0x04 : I2CD Clock and AC Timing Control Register #1 */
+#define ASPEED_I2CD_TIME_SCL_HIGH_SHIFT			16
+#define ASPEED_I2CD_TIME_SCL_HIGH_MASK			GENMASK(19, 16)
+#define ASPEED_I2CD_TIME_SCL_LOW_SHIFT			12
+#define ASPEED_I2CD_TIME_SCL_LOW_MASK			GENMASK(15, 12)
+#define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK		GENMASK(3, 0)
+#define ASPEED_I2CD_TIME_SCL_REG_MAX			GENMASK(3, 0)
+/* 0x08 : I2CD Clock and AC Timing Control Register #2 */
+#define ASPEED_NO_TIMEOUT_CTRL				0
+
+/* 0x0c : I2CD Interrupt Control Register &
+ * 0x10 : I2CD Interrupt Status Register
+ *
+ * These share bit definitions, so use the same values for the enable &
+ * status bits.
+ */
+#define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT			BIT(14)
+#define ASPEED_I2CD_INTR_BUS_RECOVER_DONE		BIT(13)
+#define ASPEED_I2CD_INTR_SCL_TIMEOUT			BIT(6)
+#define ASPEED_I2CD_INTR_ABNORMAL			BIT(5)
+#define ASPEED_I2CD_INTR_NORMAL_STOP			BIT(4)
+#define ASPEED_I2CD_INTR_ARBIT_LOSS			BIT(3)
+#define ASPEED_I2CD_INTR_RX_DONE			BIT(2)
+#define ASPEED_I2CD_INTR_TX_NAK				BIT(1)
+#define ASPEED_I2CD_INTR_TX_ACK				BIT(0)
+#define ASPEED_I2CD_INTR_ALL						       \
+		(ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
+		 ASPEED_I2CD_INTR_BUS_RECOVER_DONE |			       \
+		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
+		 ASPEED_I2CD_INTR_ABNORMAL |				       \
+		 ASPEED_I2CD_INTR_NORMAL_STOP |				       \
+		 ASPEED_I2CD_INTR_ARBIT_LOSS |				       \
+		 ASPEED_I2CD_INTR_RX_DONE |				       \
+		 ASPEED_I2CD_INTR_TX_NAK |				       \
+		 ASPEED_I2CD_INTR_TX_ACK)
+
+/* 0x14 : I2CD Command/Status Register   */
+#define ASPEED_I2CD_SCL_LINE_STS			BIT(18)
+#define ASPEED_I2CD_SDA_LINE_STS			BIT(17)
+#define ASPEED_I2CD_BUS_BUSY_STS			BIT(16)
+#define ASPEED_I2CD_BUS_RECOVER_CMD			BIT(11)
+
+/* Command Bit */
+#define ASPEED_I2CD_M_STOP_CMD				BIT(5)
+#define ASPEED_I2CD_M_S_RX_CMD_LAST			BIT(4)
+#define ASPEED_I2CD_M_RX_CMD				BIT(3)
+#define ASPEED_I2CD_S_TX_CMD				BIT(2)
+#define ASPEED_I2CD_M_TX_CMD				BIT(1)
+#define ASPEED_I2CD_M_START_CMD				BIT(0)
+
+enum aspeed_i2c_master_state {
+	ASPEED_I2C_MASTER_START,
+	ASPEED_I2C_MASTER_TX_FIRST,
+	ASPEED_I2C_MASTER_TX,
+	ASPEED_I2C_MASTER_RX_FIRST,
+	ASPEED_I2C_MASTER_RX,
+	ASPEED_I2C_MASTER_STOP,
+	ASPEED_I2C_MASTER_INACTIVE,
+};
+
+struct aspeed_i2c_bus {
+	struct i2c_adapter		adap;
+	struct device			*dev;
+	void __iomem			*base;
+	/* Synchronizes I/O mem access to base. */
+	spinlock_t			lock;
+	struct completion		cmd_complete;
+	unsigned long			parent_clk_frequency;
+	u32				bus_frequency;
+	/* Transaction state. */
+	enum aspeed_i2c_master_state	master_state;
+	struct i2c_msg			*msgs;
+	size_t				buf_index;
+	size_t				msgs_index;
+	size_t				msgs_count;
+	bool				send_stop;
+	int				cmd_err;
+	/* Protected only by i2c_lock_bus */
+	int				master_xfer_result;
+};
+
+static int aspeed_i2c_reset(struct aspeed_i2c_bus *bus);
+
+static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
+{
+	unsigned long time_left, flags;
+	int ret = 0;
+	u32 command;
+
+	spin_lock_irqsave(&bus->lock, flags);
+	command = readl(bus->base + ASPEED_I2C_CMD_REG);
+
+	if (command & ASPEED_I2CD_SDA_LINE_STS) {
+		/* Bus is idle: no recovery needed. */
+		if (command & ASPEED_I2CD_SCL_LINE_STS)
+			goto out;
+		dev_dbg(bus->dev, "SCL hung (state %x), attempting recovery\n",
+			command);
+
+		reinit_completion(&bus->cmd_complete);
+		writel(ASPEED_I2CD_M_STOP_CMD, bus->base + ASPEED_I2C_CMD_REG);
+		spin_unlock_irqrestore(&bus->lock, flags);
+
+		time_left = wait_for_completion_timeout(
+				&bus->cmd_complete, bus->adap.timeout);
+
+		spin_lock_irqsave(&bus->lock, flags);
+		if (time_left == 0)
+			goto reset_out;
+		else if (bus->cmd_err)
+			goto reset_out;
+		/* Recovery failed. */
+		else if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
+			   ASPEED_I2CD_SCL_LINE_STS))
+			goto reset_out;
+	/* Bus error. */
+	} else {
+		dev_dbg(bus->dev, "SDA hung (state %x), attempting recovery\n",
+			command);
+
+		reinit_completion(&bus->cmd_complete);
+		/* Writes 1 to 8 SCL clock cycles until SDA is released. */
+		writel(ASPEED_I2CD_BUS_RECOVER_CMD,
+		       bus->base + ASPEED_I2C_CMD_REG);
+		spin_unlock_irqrestore(&bus->lock, flags);
+
+		time_left = wait_for_completion_timeout(
+				&bus->cmd_complete, bus->adap.timeout);
+
+		spin_lock_irqsave(&bus->lock, flags);
+		if (time_left == 0)
+			goto reset_out;
+		else if (bus->cmd_err)
+			goto reset_out;
+		/* Recovery failed. */
+		else if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
+			   ASPEED_I2CD_SDA_LINE_STS))
+			goto reset_out;
+	}
+
+out:
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return ret;
+
+reset_out:
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return aspeed_i2c_reset(bus);
+}
+
+/* precondition: bus.lock has been acquired. */
+static void aspeed_i2c_do_start(struct aspeed_i2c_bus *bus)
+{
+	u32 command = ASPEED_I2CD_M_START_CMD | ASPEED_I2CD_M_TX_CMD;
+	struct i2c_msg *msg = &bus->msgs[bus->msgs_index];
+	u8 slave_addr = msg->addr << 1;
+
+	bus->master_state = ASPEED_I2C_MASTER_START;
+	bus->buf_index = 0;
+
+	if (msg->flags & I2C_M_RD) {
+		slave_addr |= 1;
+		command |= ASPEED_I2CD_M_RX_CMD;
+		/* Need to let the hardware know to NACK after RX. */
+		if (msg->len == 1 && !(msg->flags & I2C_M_RECV_LEN))
+			command |= ASPEED_I2CD_M_S_RX_CMD_LAST;
+	}
+
+	writel(slave_addr, bus->base + ASPEED_I2C_BYTE_BUF_REG);
+	writel(command, bus->base + ASPEED_I2C_CMD_REG);
+}
+
+/* precondition: bus.lock has been acquired. */
+static void aspeed_i2c_do_stop(struct aspeed_i2c_bus *bus)
+{
+	bus->master_state = ASPEED_I2C_MASTER_STOP;
+	writel(ASPEED_I2CD_M_STOP_CMD, bus->base + ASPEED_I2C_CMD_REG);
+}
+
+/* precondition: bus.lock has been acquired. */
+static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
+{
+	if (bus->msgs_index + 1 < bus->msgs_count) {
+		bus->msgs_index++;
+		aspeed_i2c_do_start(bus);
+	} else {
+		aspeed_i2c_do_stop(bus);
+	}
+}
+
+static int aspeed_i2c_is_irq_error(u32 irq_status)
+{
+	if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
+		return -EAGAIN;
+	if (irq_status & (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |
+			  ASPEED_I2CD_INTR_SCL_TIMEOUT))
+		return -EBUSY;
+	if (irq_status & (ASPEED_I2CD_INTR_ABNORMAL))
+		return -EPROTO;
+
+	return 0;
+}
+
+static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
+{
+	u32 irq_status, status_ack = 0, command = 0;
+	struct i2c_msg *msg;
+	u8 recv_byte;
+	int ret;
+
+	spin_lock(&bus->lock);
+	irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
+	/* Ack all interrupt bits. */
+	writel(irq_status, bus->base + ASPEED_I2C_INTR_STS_REG);
+
+	if (irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
+		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		status_ack |= ASPEED_I2CD_INTR_BUS_RECOVER_DONE;
+		goto out_complete;
+	}
+
+	/*
+	 * We encountered an interrupt that reports an error: the hardware
+	 * should clear the command queue effectively taking us back to the
+	 * INACTIVE state.
+	 */
+	ret = aspeed_i2c_is_irq_error(irq_status);
+	if (ret < 0) {
+		dev_dbg(bus->dev, "received error interrupt: 0x%08x",
+			irq_status);
+		bus->cmd_err = ret;
+		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		goto out_complete;
+	}
+
+	/* We are in an invalid state; reset bus to a known state. */
+	if (!bus->msgs && bus->master_state != ASPEED_I2C_MASTER_STOP) {
+		dev_err(bus->dev, "bus in unknown state");
+		bus->cmd_err = -EIO;
+		aspeed_i2c_do_stop(bus);
+		goto out_no_complete;
+	}
+	msg = &bus->msgs[bus->msgs_index];
+
+	/*
+	 * START is a special case because we still have to handle a subsequent
+	 * TX or RX immediately after we handle it, so we handle it here and
+	 * then update the state and handle the new state below.
+	 */
+	if (bus->master_state == ASPEED_I2C_MASTER_START) {
+		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
+			pr_devel("no slave present at %02x", msg->addr);
+			status_ack |= ASPEED_I2CD_INTR_TX_NAK;
+			bus->cmd_err = -ENXIO;
+			aspeed_i2c_do_stop(bus);
+			goto out_no_complete;
+		}
+		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
+		if (msg->len == 0) { /* SMBUS_QUICK */
+			aspeed_i2c_do_stop(bus);
+			goto out_no_complete;
+		}
+		if (msg->flags & I2C_M_RD)
+			bus->master_state = ASPEED_I2C_MASTER_RX_FIRST;
+		else
+			bus->master_state = ASPEED_I2C_MASTER_TX_FIRST;
+	}
+
+	switch (bus->master_state) {
+	case ASPEED_I2C_MASTER_TX:
+		if (unlikely(irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
+			dev_dbg(bus->dev, "slave NACKed TX");
+			status_ack |= ASPEED_I2CD_INTR_TX_NAK;
+			goto error_and_stop;
+		} else if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
+			dev_err(bus->dev, "slave failed to ACK TX");
+			goto error_and_stop;
+		}
+		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
+		/* fallthrough intended */
+	case ASPEED_I2C_MASTER_TX_FIRST:
+		if (bus->buf_index < msg->len) {
+			bus->master_state = ASPEED_I2C_MASTER_TX;
+			writel(msg->buf[bus->buf_index++],
+			       bus->base + ASPEED_I2C_BYTE_BUF_REG);
+			writel(ASPEED_I2CD_M_TX_CMD,
+			       bus->base + ASPEED_I2C_CMD_REG);
+		} else {
+			aspeed_i2c_next_msg_or_stop(bus);
+		}
+		goto out_no_complete;
+	case ASPEED_I2C_MASTER_RX_FIRST:
+		/* RX may not have completed yet (only address cycle) */
+		if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE))
+			goto out_no_complete;
+		/* fallthrough intended */
+	case ASPEED_I2C_MASTER_RX:
+		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
+			dev_err(bus->dev, "master failed to RX");
+			goto error_and_stop;
+		}
+		status_ack |= ASPEED_I2CD_INTR_RX_DONE;
+
+		recv_byte = readl(bus->base + ASPEED_I2C_BYTE_BUF_REG) >> 8;
+		msg->buf[bus->buf_index++] = recv_byte;
+
+		if (msg->flags & I2C_M_RECV_LEN) {
+			if (unlikely(recv_byte > I2C_SMBUS_BLOCK_MAX)) {
+				bus->cmd_err = -EPROTO;
+				aspeed_i2c_do_stop(bus);
+				goto out_no_complete;
+			}
+			msg->len = recv_byte +
+					((msg->flags & I2C_CLIENT_PEC) ? 2 : 1);
+			msg->flags &= ~I2C_M_RECV_LEN;
+		}
+
+		if (bus->buf_index < msg->len) {
+			bus->master_state = ASPEED_I2C_MASTER_RX;
+			command = ASPEED_I2CD_M_RX_CMD;
+			if (bus->buf_index + 1 == msg->len)
+				command |= ASPEED_I2CD_M_S_RX_CMD_LAST;
+			writel(command, bus->base + ASPEED_I2C_CMD_REG);
+		} else {
+			aspeed_i2c_next_msg_or_stop(bus);
+		}
+		goto out_no_complete;
+	case ASPEED_I2C_MASTER_STOP:
+		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_NORMAL_STOP))) {
+			dev_err(bus->dev, "master failed to STOP");
+			bus->cmd_err = -EIO;
+			/* Do not STOP as we have already tried. */
+		} else {
+			status_ack |= ASPEED_I2CD_INTR_NORMAL_STOP;
+		}
+
+		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		goto out_complete;
+	case ASPEED_I2C_MASTER_INACTIVE:
+		dev_err(bus->dev,
+			"master received interrupt 0x%08x, but is inactive",
+			irq_status);
+		bus->cmd_err = -EIO;
+		/* Do not STOP as we should be inactive. */
+		goto out_complete;
+	default:
+		WARN(1, "unknown master state\n");
+		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		bus->cmd_err = -EINVAL;
+		goto out_complete;
+	}
+error_and_stop:
+	bus->cmd_err = -EIO;
+	aspeed_i2c_do_stop(bus);
+	goto out_no_complete;
+out_complete:
+	bus->msgs = NULL;
+	if (bus->cmd_err)
+		bus->master_xfer_result = bus->cmd_err;
+	else
+		bus->master_xfer_result = bus->msgs_index + 1;
+	complete(&bus->cmd_complete);
+out_no_complete:
+	if (irq_status != status_ack)
+		dev_err(bus->dev,
+			"irq handled != irq. expected 0x%08x, but was 0x%08x\n",
+			irq_status, status_ack);
+	spin_unlock(&bus->lock);
+	return !!irq_status;
+}
+
+static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
+{
+	struct aspeed_i2c_bus *bus = dev_id;
+
+	return aspeed_i2c_master_irq(bus) ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
+				  struct i2c_msg *msgs, int num)
+{
+	struct aspeed_i2c_bus *bus = i2c_get_adapdata(adap);
+	unsigned long time_left, flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&bus->lock, flags);
+	bus->cmd_err = 0;
+
+	/* If bus is busy, attempt recovery. We assume a single master
+	 * environment.
+	 */
+	if (readl(bus->base + ASPEED_I2C_CMD_REG) & ASPEED_I2CD_BUS_BUSY_STS) {
+		spin_unlock_irqrestore(&bus->lock, flags);
+		ret = aspeed_i2c_recover_bus(bus);
+		if (ret)
+			return ret;
+		spin_lock_irqsave(&bus->lock, flags);
+	}
+
+	bus->cmd_err = 0;
+	bus->msgs = msgs;
+	bus->msgs_index = 0;
+	bus->msgs_count = num;
+
+	reinit_completion(&bus->cmd_complete);
+	aspeed_i2c_do_start(bus);
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	time_left = wait_for_completion_timeout(&bus->cmd_complete,
+						bus->adap.timeout);
+
+	if (time_left == 0)
+		return -ETIMEDOUT;
+	else
+		return bus->master_xfer_result;
+}
+
+static u32 aspeed_i2c_functionality(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_SMBUS_BLOCK_DATA;
+}
+
+static const struct i2c_algorithm aspeed_i2c_algo = {
+	.master_xfer	= aspeed_i2c_master_xfer,
+	.functionality	= aspeed_i2c_functionality,
+};
+
+static u32 aspeed_i2c_get_clk_reg_val(u32 divisor)
+{
+	u32 base_clk, clk_high, clk_low, tmp;
+
+	/*
+	 * The actual clock frequency of SCL is:
+	 *	SCL_freq = APB_freq / (base_freq * (SCL_high + SCL_low))
+	 *		 = APB_freq / divisor
+	 * where base_freq is a programmable clock divider; its value is
+	 *	base_freq = 1 << base_clk
+	 * SCL_high is the number of base_freq clock cycles that SCL stays high
+	 * and SCL_low is the number of base_freq clock cycles that SCL stays
+	 * low for a period of SCL.
+	 * The actual register has a minimum SCL_high and SCL_low minimum of 1;
+	 * thus, they start counting at zero. So
+	 *	SCL_high = clk_high + 1
+	 *	SCL_low	 = clk_low + 1
+	 * Thus,
+	 *	SCL_freq = APB_freq /
+	 *		((1 << base_clk) * (clk_high + 1 + clk_low + 1))
+	 * The documentation recommends clk_high >= 8 and clk_low >= 7 when
+	 * possible; this last constraint gives us the following solution:
+	 */
+	base_clk = divisor > 33 ? ilog2((divisor - 1) / 32) + 1 : 0;
+	tmp = divisor / (1 << base_clk);
+	clk_high = tmp / 2 + tmp % 2;
+	clk_low = tmp - clk_high;
+
+	clk_high -= 1;
+	clk_low -= 1;
+
+	return ((clk_high << ASPEED_I2CD_TIME_SCL_HIGH_SHIFT)
+		& ASPEED_I2CD_TIME_SCL_HIGH_MASK)
+			| ((clk_low << ASPEED_I2CD_TIME_SCL_LOW_SHIFT)
+			   & ASPEED_I2CD_TIME_SCL_LOW_MASK)
+			| (base_clk & ASPEED_I2CD_TIME_BASE_DIVISOR_MASK);
+}
+
+/* precondition: bus.lock has been acquired. */
+static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
+{
+	u32 divisor, clk_reg_val;
+
+	divisor = bus->parent_clk_frequency / bus->bus_frequency;
+	clk_reg_val = aspeed_i2c_get_clk_reg_val(divisor);
+	writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
+	writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
+
+	return 0;
+}
+
+/* precondition: bus.lock has been acquired. */
+static int aspeed_i2c_init(struct aspeed_i2c_bus *bus,
+			     struct platform_device *pdev)
+{
+	u32 fun_ctrl_reg = ASPEED_I2CD_MASTER_EN;
+	int ret;
+
+	/* Disable everything. */
+	writel(0, bus->base + ASPEED_I2C_FUN_CTRL_REG);
+
+	ret = aspeed_i2c_init_clk(bus);
+	if (ret < 0)
+		return ret;
+
+	if (!of_property_read_bool(pdev->dev.of_node, "multi-master"))
+		fun_ctrl_reg |= ASPEED_I2CD_MULTI_MASTER_DIS;
+
+	/* Enable Master Mode */
+	writel(readl(bus->base + ASPEED_I2C_FUN_CTRL_REG) | fun_ctrl_reg,
+	       bus->base + ASPEED_I2C_FUN_CTRL_REG);
+
+	/* Set interrupt generation of I2C controller */
+	writel(ASPEED_I2CD_INTR_ALL, bus->base + ASPEED_I2C_INTR_CTRL_REG);
+
+	return 0;
+}
+
+static int aspeed_i2c_reset(struct aspeed_i2c_bus *bus)
+{
+	struct platform_device *pdev = to_platform_device(bus->dev);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&bus->lock, flags);
+
+	/* Disable and ack all interrupts. */
+	writel(0, bus->base + ASPEED_I2C_INTR_CTRL_REG);
+	writel(0xffffffff, bus->base + ASPEED_I2C_INTR_STS_REG);
+
+	ret = aspeed_i2c_init(bus, pdev);
+
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return ret;
+}
+
+static int aspeed_i2c_probe_bus(struct platform_device *pdev)
+{
+	struct aspeed_i2c_bus *bus;
+	struct clk *parent_clk;
+	struct resource *res;
+	int irq, ret;
+
+	bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	bus->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(bus->base))
+		return PTR_ERR(bus->base);
+
+	parent_clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(parent_clk))
+		return PTR_ERR(parent_clk);
+	bus->parent_clk_frequency = clk_get_rate(parent_clk);
+	/* We just need the clock rate, we don't actually use the clk object. */
+	devm_clk_put(&pdev->dev, parent_clk);
+
+	ret = of_property_read_u32(pdev->dev.of_node,
+				   "bus-frequency", &bus->bus_frequency);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Could not read bus-frequency property\n");
+		bus->bus_frequency = 100000;
+	}
+
+	/* Initialize the I2C adapter */
+	spin_lock_init(&bus->lock);
+	init_completion(&bus->cmd_complete);
+	bus->adap.owner = THIS_MODULE;
+	bus->adap.retries = 0;
+	bus->adap.timeout = 5 * HZ;
+	bus->adap.algo = &aspeed_i2c_algo;
+	bus->adap.dev.parent = &pdev->dev;
+	bus->adap.dev.of_node = pdev->dev.of_node;
+	strlcpy(bus->adap.name, pdev->name, sizeof(bus->adap.name));
+	i2c_set_adapdata(&bus->adap, bus);
+
+	bus->dev = &pdev->dev;
+
+	/* Clean up any left over interrupt state. */
+	writel(0, bus->base + ASPEED_I2C_INTR_CTRL_REG);
+	writel(0xffffffff, bus->base + ASPEED_I2C_INTR_STS_REG);
+	/*
+	 * bus.lock does not need to be held because the interrupt handler has
+	 * not been enabled yet.
+	 */
+	ret = aspeed_i2c_init(bus, pdev);
+	if (ret < 0)
+		return ret;
+
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	ret = devm_request_irq(&pdev->dev, irq, aspeed_i2c_bus_irq,
+			       0, dev_name(&pdev->dev), bus);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_add_adapter(&bus->adap);
+	if (ret < 0)
+		return ret;
+
+	platform_set_drvdata(pdev, bus);
+
+	dev_info(bus->dev, "i2c bus %d registered, irq %d\n",
+		 bus->adap.nr, irq);
+
+	return 0;
+}
+
+static int aspeed_i2c_remove_bus(struct platform_device *pdev)
+{
+	struct aspeed_i2c_bus *bus = platform_get_drvdata(pdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bus->lock, flags);
+
+	/* Disable everything. */
+	writel(0, bus->base + ASPEED_I2C_FUN_CTRL_REG);
+	writel(0, bus->base + ASPEED_I2C_INTR_CTRL_REG);
+
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	i2c_del_adapter(&bus->adap);
+
+	return 0;
+}
+
+static const struct of_device_id aspeed_i2c_bus_of_table[] = {
+	{ .compatible = "aspeed,ast2400-i2c-bus", },
+	{ .compatible = "aspeed,ast2500-i2c-bus", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, aspeed_i2c_bus_of_table);
+
+static struct platform_driver aspeed_i2c_bus_driver = {
+	.probe		= aspeed_i2c_probe_bus,
+	.remove		= aspeed_i2c_remove_bus,
+	.driver		= {
+		.name		= "aspeed-i2c-bus",
+		.of_match_table	= aspeed_i2c_bus_of_table,
+	},
+};
+module_platform_driver(aspeed_i2c_bus_driver);
+
+MODULE_AUTHOR("Brendan Higgins <brendanhiggins@google.com>");
+MODULE_DESCRIPTION("Aspeed I2C Bus Driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From f9eb91350bb20b3e881bc59158071e14ee4f0326 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Tue, 20 Jun 2017 14:15:16 -0700
Subject: [PATCH 0383/1958] i2c: aspeed: added slave support for Aspeed I2C
 driver

Added slave support for Aspeed I2C controller. Supports fourteen busses
present in AST24XX and AST25XX BMC SoCs by Aspeed.

Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-aspeed.c | 201 ++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)

diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 55f241f2cfcc5..f19348328a715 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -49,6 +49,7 @@
 #define ASPEED_I2CD_SDA_DRIVE_1T_EN			BIT(8)
 #define ASPEED_I2CD_M_SDA_DRIVE_1T_EN			BIT(7)
 #define ASPEED_I2CD_M_HIGH_SPEED_EN			BIT(6)
+#define ASPEED_I2CD_SLAVE_EN				BIT(1)
 #define ASPEED_I2CD_MASTER_EN				BIT(0)
 
 /* 0x04 : I2CD Clock and AC Timing Control Register #1 */
@@ -69,6 +70,7 @@
  */
 #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT			BIT(14)
 #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE		BIT(13)
+#define ASPEED_I2CD_INTR_SLAVE_MATCH			BIT(7)
 #define ASPEED_I2CD_INTR_SCL_TIMEOUT			BIT(6)
 #define ASPEED_I2CD_INTR_ABNORMAL			BIT(5)
 #define ASPEED_I2CD_INTR_NORMAL_STOP			BIT(4)
@@ -101,6 +103,9 @@
 #define ASPEED_I2CD_M_TX_CMD				BIT(1)
 #define ASPEED_I2CD_M_START_CMD				BIT(0)
 
+/* 0x18 : I2CD Slave Device Address Register   */
+#define ASPEED_I2CD_DEV_ADDR_MASK			GENMASK(6, 0)
+
 enum aspeed_i2c_master_state {
 	ASPEED_I2C_MASTER_START,
 	ASPEED_I2C_MASTER_TX_FIRST,
@@ -111,6 +116,15 @@ enum aspeed_i2c_master_state {
 	ASPEED_I2C_MASTER_INACTIVE,
 };
 
+enum aspeed_i2c_slave_state {
+	ASPEED_I2C_SLAVE_START,
+	ASPEED_I2C_SLAVE_READ_REQUESTED,
+	ASPEED_I2C_SLAVE_READ_PROCESSED,
+	ASPEED_I2C_SLAVE_WRITE_REQUESTED,
+	ASPEED_I2C_SLAVE_WRITE_RECEIVED,
+	ASPEED_I2C_SLAVE_STOP,
+};
+
 struct aspeed_i2c_bus {
 	struct i2c_adapter		adap;
 	struct device			*dev;
@@ -130,6 +144,10 @@ struct aspeed_i2c_bus {
 	int				cmd_err;
 	/* Protected only by i2c_lock_bus */
 	int				master_xfer_result;
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+	struct i2c_client		*slave;
+	enum aspeed_i2c_slave_state	slave_state;
+#endif /* CONFIG_I2C_SLAVE */
 };
 
 static int aspeed_i2c_reset(struct aspeed_i2c_bus *bus);
@@ -202,6 +220,110 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
 	return aspeed_i2c_reset(bus);
 }
 
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
+{
+	u32 command, irq_status, status_ack = 0;
+	struct i2c_client *slave = bus->slave;
+	bool irq_handled = true;
+	u8 value;
+
+	spin_lock(&bus->lock);
+	if (!slave) {
+		irq_handled = false;
+		goto out;
+	}
+
+	command = readl(bus->base + ASPEED_I2C_CMD_REG);
+	irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
+
+	/* Slave was requested, restart state machine. */
+	if (irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
+		status_ack |= ASPEED_I2CD_INTR_SLAVE_MATCH;
+		bus->slave_state = ASPEED_I2C_SLAVE_START;
+	}
+
+	/* Slave is not currently active, irq was for someone else. */
+	if (bus->slave_state == ASPEED_I2C_SLAVE_STOP) {
+		irq_handled = false;
+		goto out;
+	}
+
+	dev_dbg(bus->dev, "slave irq status 0x%08x, cmd 0x%08x\n",
+		irq_status, command);
+
+	/* Slave was sent something. */
+	if (irq_status & ASPEED_I2CD_INTR_RX_DONE) {
+		value = readl(bus->base + ASPEED_I2C_BYTE_BUF_REG) >> 8;
+		/* Handle address frame. */
+		if (bus->slave_state == ASPEED_I2C_SLAVE_START) {
+			if (value & 0x1)
+				bus->slave_state =
+						ASPEED_I2C_SLAVE_READ_REQUESTED;
+			else
+				bus->slave_state =
+						ASPEED_I2C_SLAVE_WRITE_REQUESTED;
+		}
+		status_ack |= ASPEED_I2CD_INTR_RX_DONE;
+	}
+
+	/* Slave was asked to stop. */
+	if (irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
+		status_ack |= ASPEED_I2CD_INTR_NORMAL_STOP;
+		bus->slave_state = ASPEED_I2C_SLAVE_STOP;
+	}
+	if (irq_status & ASPEED_I2CD_INTR_TX_NAK) {
+		status_ack |= ASPEED_I2CD_INTR_TX_NAK;
+		bus->slave_state = ASPEED_I2C_SLAVE_STOP;
+	}
+
+	switch (bus->slave_state) {
+	case ASPEED_I2C_SLAVE_READ_REQUESTED:
+		if (irq_status & ASPEED_I2CD_INTR_TX_ACK)
+			dev_err(bus->dev, "Unexpected ACK on read request.\n");
+		bus->slave_state = ASPEED_I2C_SLAVE_READ_PROCESSED;
+
+		i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value);
+		writel(value, bus->base + ASPEED_I2C_BYTE_BUF_REG);
+		writel(ASPEED_I2CD_S_TX_CMD, bus->base + ASPEED_I2C_CMD_REG);
+		break;
+	case ASPEED_I2C_SLAVE_READ_PROCESSED:
+		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
+		if (!(irq_status & ASPEED_I2CD_INTR_TX_ACK))
+			dev_err(bus->dev,
+				"Expected ACK after processed read.\n");
+		i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value);
+		writel(value, bus->base + ASPEED_I2C_BYTE_BUF_REG);
+		writel(ASPEED_I2CD_S_TX_CMD, bus->base + ASPEED_I2C_CMD_REG);
+		break;
+	case ASPEED_I2C_SLAVE_WRITE_REQUESTED:
+		bus->slave_state = ASPEED_I2C_SLAVE_WRITE_RECEIVED;
+		i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value);
+		break;
+	case ASPEED_I2C_SLAVE_WRITE_RECEIVED:
+		i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, &value);
+		break;
+	case ASPEED_I2C_SLAVE_STOP:
+		i2c_slave_event(slave, I2C_SLAVE_STOP, &value);
+		break;
+	default:
+		dev_err(bus->dev, "unhandled slave_state: %d\n",
+			bus->slave_state);
+		break;
+	}
+
+	if (status_ack != irq_status)
+		dev_err(bus->dev,
+			"irq handled != irq. expected %x, but was %x\n",
+			irq_status, status_ack);
+	writel(status_ack, bus->base + ASPEED_I2C_INTR_STS_REG);
+
+out:
+	spin_unlock(&bus->lock);
+	return irq_handled;
+}
+#endif /* CONFIG_I2C_SLAVE */
+
 /* precondition: bus.lock has been acquired. */
 static void aspeed_i2c_do_start(struct aspeed_i2c_bus *bus)
 {
@@ -427,6 +549,13 @@ static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
 {
 	struct aspeed_i2c_bus *bus = dev_id;
 
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+	if (aspeed_i2c_slave_irq(bus)) {
+		dev_dbg(bus->dev, "irq handled by slave.\n");
+		return IRQ_HANDLED;
+	}
+#endif /* CONFIG_I2C_SLAVE */
+
 	return aspeed_i2c_master_irq(bus) ? IRQ_HANDLED : IRQ_NONE;
 }
 
@@ -474,9 +603,75 @@ static u32 aspeed_i2c_functionality(struct i2c_adapter *adap)
 	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_SMBUS_BLOCK_DATA;
 }
 
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+/* precondition: bus.lock has been acquired. */
+static void __aspeed_i2c_reg_slave(struct aspeed_i2c_bus *bus, u16 slave_addr)
+{
+	u32 addr_reg_val, func_ctrl_reg_val;
+
+	/* Set slave addr. */
+	addr_reg_val = readl(bus->base + ASPEED_I2C_DEV_ADDR_REG);
+	addr_reg_val &= ~ASPEED_I2CD_DEV_ADDR_MASK;
+	addr_reg_val |= slave_addr & ASPEED_I2CD_DEV_ADDR_MASK;
+	writel(addr_reg_val, bus->base + ASPEED_I2C_DEV_ADDR_REG);
+
+	/* Turn on slave mode. */
+	func_ctrl_reg_val = readl(bus->base + ASPEED_I2C_FUN_CTRL_REG);
+	func_ctrl_reg_val |= ASPEED_I2CD_SLAVE_EN;
+	writel(func_ctrl_reg_val, bus->base + ASPEED_I2C_FUN_CTRL_REG);
+}
+
+static int aspeed_i2c_reg_slave(struct i2c_client *client)
+{
+	struct aspeed_i2c_bus *bus = i2c_get_adapdata(client->adapter);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bus->lock, flags);
+	if (bus->slave) {
+		spin_unlock_irqrestore(&bus->lock, flags);
+		return -EINVAL;
+	}
+
+	__aspeed_i2c_reg_slave(bus, client->addr);
+
+	bus->slave = client;
+	bus->slave_state = ASPEED_I2C_SLAVE_STOP;
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return 0;
+}
+
+static int aspeed_i2c_unreg_slave(struct i2c_client *client)
+{
+	struct aspeed_i2c_bus *bus = i2c_get_adapdata(client->adapter);
+	u32 func_ctrl_reg_val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bus->lock, flags);
+	if (!bus->slave) {
+		spin_unlock_irqrestore(&bus->lock, flags);
+		return -EINVAL;
+	}
+
+	/* Turn off slave mode. */
+	func_ctrl_reg_val = readl(bus->base + ASPEED_I2C_FUN_CTRL_REG);
+	func_ctrl_reg_val &= ~ASPEED_I2CD_SLAVE_EN;
+	writel(func_ctrl_reg_val, bus->base + ASPEED_I2C_FUN_CTRL_REG);
+
+	bus->slave = NULL;
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return 0;
+}
+#endif /* CONFIG_I2C_SLAVE */
+
 static const struct i2c_algorithm aspeed_i2c_algo = {
 	.master_xfer	= aspeed_i2c_master_xfer,
 	.functionality	= aspeed_i2c_functionality,
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+	.reg_slave	= aspeed_i2c_reg_slave,
+	.unreg_slave	= aspeed_i2c_unreg_slave,
+#endif /* CONFIG_I2C_SLAVE */
 };
 
 static u32 aspeed_i2c_get_clk_reg_val(u32 divisor)
@@ -551,6 +746,12 @@ static int aspeed_i2c_init(struct aspeed_i2c_bus *bus,
 	writel(readl(bus->base + ASPEED_I2C_FUN_CTRL_REG) | fun_ctrl_reg,
 	       bus->base + ASPEED_I2C_FUN_CTRL_REG);
 
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+	/* If slave has already been registered, re-enable it. */
+	if (bus->slave)
+		__aspeed_i2c_reg_slave(bus, bus->slave->addr);
+#endif /* CONFIG_I2C_SLAVE */
+
 	/* Set interrupt generation of I2C controller */
 	writel(ASPEED_I2CD_INTR_ALL, bus->base + ASPEED_I2C_INTR_CTRL_REG);
 
-- 
GitLab


From 413dfbbfca54904760aa04af40ca26e2a70f3eda Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Tue, 20 Jun 2017 14:15:13 -0700
Subject: [PATCH 0384/1958] MAINTAINERS: add entry for Aspeed I2C driver

Added myself as maintainer of the Aspeed I2C driver and the associated
I2C interrupt controller and added Joel Stanley and Benjamin
Herrenschmidt as reviewers.

Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 34b0324d53c5c..e83769d59e5bc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1123,6 +1123,18 @@ F:	arch/arm/mach-aspeed/
 F:	arch/arm/boot/dts/aspeed-*
 F:	drivers/*/*aspeed*
 
+ARM/ASPEED I2C DRIVER
+M:	Brendan Higgins <brendanhiggins@google.com>
+R:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
+R:	Joel Stanley <joel@jms.id.au>
+L:	linux-i2c@vger.kernel.org
+L:	openbmc@lists.ozlabs.org
+S:	Maintained
+F:	drivers/irqchip/irq-aspeed-i2c-ic.c
+F:	drivers/i2c/busses/i2c-aspeed.c
+F:	Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.txt
+F:	Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
+
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 M:	Alexandre Belloni <alexandre.belloni@free-electrons.com>
-- 
GitLab


From 606fd2788b0fce6f5138078a4fbe8979ecba5697 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Wed, 21 Jun 2017 09:24:02 +0200
Subject: [PATCH 0385/1958] i2c: algo-bit: add support for I2C_M_STOP

Support for enforced STOPs will allow us to use SCCB compatible devices.

Based on a preliminary patch by Wolfram Sang.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/algos/i2c-algo-bit.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index a8e89df665b90..1147bddb8b2c8 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -553,9 +553,16 @@ static int bit_xfer(struct i2c_adapter *i2c_adap,
 		nak_ok = pmsg->flags & I2C_M_IGNORE_NAK;
 		if (!(pmsg->flags & I2C_M_NOSTART)) {
 			if (i) {
-				bit_dbg(3, &i2c_adap->dev, "emitting "
-					"repeated start condition\n");
-				i2c_repstart(adap);
+				if (msgs[i - 1].flags & I2C_M_STOP) {
+					bit_dbg(3, &i2c_adap->dev,
+						"emitting enforced stop/start condition\n");
+					i2c_stop(adap);
+					i2c_start(adap);
+				} else {
+					bit_dbg(3, &i2c_adap->dev,
+						"emitting repeated start condition\n");
+					i2c_repstart(adap);
+				}
 			}
 			ret = bit_doAddress(i2c_adap, pmsg);
 			if ((ret != 0) && !nak_ok) {
-- 
GitLab


From c0022504b500c4394ff106f0f13acb879ab2c70f Mon Sep 17 00:00:00 2001
From: Baoyou Xie <baoyou.xie@linaro.org>
Date: Thu, 22 Jun 2017 20:56:54 +0800
Subject: [PATCH 0386/1958] dt: bindings: add documentation for zx2967 family
 i2c controller

This patch adds dt-binding documentation for zx2967 family
i2c controller.

Signed-off-by: Baoyou Xie <baoyou.xie@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../devicetree/bindings/i2c/i2c-zx2967.txt    | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-zx2967.txt

diff --git a/Documentation/devicetree/bindings/i2c/i2c-zx2967.txt b/Documentation/devicetree/bindings/i2c/i2c-zx2967.txt
new file mode 100644
index 0000000000000..cb806d1ae4c9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-zx2967.txt
@@ -0,0 +1,22 @@
+ZTE zx2967 I2C controller
+
+Required properties:
+ - compatible: must be "zte,zx296718-i2c"
+ - reg: physical address and length of the device registers
+ - interrupts: a single interrupt specifier
+ - clocks: clock for the device
+ - #address-cells: should be <1>
+ - #size-cells: should be <0>
+ - clock-frequency: the desired I2C bus clock frequency.
+
+Examples:
+
+	i2c@112000 {
+		compatible = "zte,zx296718-i2c";
+		reg = <0x00112000 0x1000>;
+		interrupts = <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&osc24m>;
+		#address-cells = <1>
+		#size-cells = <0>;
+		clock-frequency = <1600000>;
+	};
-- 
GitLab


From 9615a01f71ca02858f5265b1b545280758562aa2 Mon Sep 17 00:00:00 2001
From: Baoyou Xie <baoyou.xie@linaro.org>
Date: Thu, 22 Jun 2017 20:56:55 +0800
Subject: [PATCH 0387/1958] i2c: zx2967: add i2c controller driver for ZTE's
 zx2967 family

This patch adds i2c controller driver for ZTE's zx2967 family.

Signed-off-by: Baoyou Xie <baoyou.xie@linaro.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Jun Nie <jun.nie@linaro.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig      |   9 +
 drivers/i2c/busses/Makefile     |   1 +
 drivers/i2c/busses/i2c-zx2967.c | 609 ++++++++++++++++++++++++++++++++
 3 files changed, 619 insertions(+)
 create mode 100644 drivers/i2c/busses/i2c-zx2967.c

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index ceb4df4e92ab0..5134b3cde14e2 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1270,4 +1270,13 @@ config I2C_OPAL
 	  This driver can also be built as a module. If so, the module will be
 	  called as i2c-opal.
 
+config I2C_ZX2967
+	tristate "ZTE ZX2967 I2C support"
+	depends on ARCH_ZX || COMPILE_TEST
+	default y
+	help
+	  Selecting this option will add ZX2967 I2C driver.
+	  This driver can also be built as a module. If so, the module will be
+	  called i2c-zx2967.
+
 endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index a43f28819a8cc..a846511340484 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_I2C_XILINX)	+= i2c-xiic.o
 obj-$(CONFIG_I2C_XLR)		+= i2c-xlr.o
 obj-$(CONFIG_I2C_XLP9XX)	+= i2c-xlp9xx.o
 obj-$(CONFIG_I2C_RCAR)		+= i2c-rcar.o
+obj-$(CONFIG_I2C_ZX2967)	+= i2c-zx2967.o
 
 # External I2C/SMBus adapter drivers
 obj-$(CONFIG_I2C_DIOLAN_U2C)	+= i2c-diolan-u2c.o
diff --git a/drivers/i2c/busses/i2c-zx2967.c b/drivers/i2c/busses/i2c-zx2967.c
new file mode 100644
index 0000000000000..3e381edc0f223
--- /dev/null
+++ b/drivers/i2c/busses/i2c-zx2967.c
@@ -0,0 +1,609 @@
+/*
+ * Copyright (C) 2017 Sanechips Technology Co., Ltd.
+ * Copyright 2017 Linaro Ltd.
+ *
+ * Author: Baoyou Xie <baoyou.xie@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#define REG_CMD				0x04
+#define REG_DEVADDR_H			0x0C
+#define REG_DEVADDR_L			0x10
+#define REG_CLK_DIV_FS			0x14
+#define REG_CLK_DIV_HS			0x18
+#define REG_WRCONF			0x1C
+#define REG_RDCONF			0x20
+#define REG_DATA			0x24
+#define REG_STAT			0x28
+
+#define I2C_STOP			0
+#define I2C_MASTER			BIT(0)
+#define I2C_ADDR_MODE_TEN		BIT(1)
+#define I2C_IRQ_MSK_ENABLE		BIT(3)
+#define I2C_RW_READ			BIT(4)
+#define I2C_CMB_RW_EN			BIT(5)
+#define I2C_START			BIT(6)
+
+#define I2C_ADDR_LOW_MASK		GENMASK(6, 0)
+#define I2C_ADDR_LOW_SHIFT		0
+#define I2C_ADDR_HI_MASK		GENMASK(2, 0)
+#define I2C_ADDR_HI_SHIFT		7
+
+#define I2C_WFIFO_RESET			BIT(7)
+#define I2C_RFIFO_RESET			BIT(7)
+
+#define I2C_IRQ_ACK_CLEAR		BIT(7)
+#define I2C_INT_MASK			GENMASK(6, 0)
+
+#define I2C_TRANS_DONE			BIT(0)
+#define I2C_SR_EDEVICE			BIT(1)
+#define I2C_SR_EDATA			BIT(2)
+
+#define I2C_FIFO_MAX			16
+
+#define I2C_TIMEOUT			msecs_to_jiffies(1000)
+
+#define DEV(i2c)			(&i2c->adap.dev)
+
+struct zx2967_i2c {
+	struct i2c_adapter	adap;
+	struct clk		*clk;
+	struct completion	complete;
+	u32			clk_freq;
+	void __iomem		*reg_base;
+	size_t			residue;
+	int			irq;
+	int			msg_rd;
+	u8			*cur_trans;
+	u8			access_cnt;
+	bool			is_suspended;
+	int			error;
+};
+
+static void zx2967_i2c_writel(struct zx2967_i2c *i2c,
+			      u32 val, unsigned long reg)
+{
+	writel_relaxed(val, i2c->reg_base + reg);
+}
+
+static u32 zx2967_i2c_readl(struct zx2967_i2c *i2c, unsigned long reg)
+{
+	return readl_relaxed(i2c->reg_base + reg);
+}
+
+static void zx2967_i2c_writesb(struct zx2967_i2c *i2c,
+			       void *data, unsigned long reg, int len)
+{
+	writesb(i2c->reg_base + reg, data, len);
+}
+
+static void zx2967_i2c_readsb(struct zx2967_i2c *i2c,
+			      void *data, unsigned long reg, int len)
+{
+	readsb(i2c->reg_base + reg, data, len);
+}
+
+static void zx2967_i2c_start_ctrl(struct zx2967_i2c *i2c)
+{
+	u32 status;
+	u32 ctl;
+
+	status = zx2967_i2c_readl(i2c, REG_STAT);
+	status |= I2C_IRQ_ACK_CLEAR;
+	zx2967_i2c_writel(i2c, status, REG_STAT);
+
+	ctl = zx2967_i2c_readl(i2c, REG_CMD);
+	if (i2c->msg_rd)
+		ctl |= I2C_RW_READ;
+	else
+		ctl &= ~I2C_RW_READ;
+	ctl &= ~I2C_CMB_RW_EN;
+	ctl |= I2C_START;
+	zx2967_i2c_writel(i2c, ctl, REG_CMD);
+}
+
+static void zx2967_i2c_flush_fifos(struct zx2967_i2c *i2c)
+{
+	u32 offset;
+	u32 val;
+
+	if (i2c->msg_rd) {
+		offset = REG_RDCONF;
+		val = I2C_RFIFO_RESET;
+	} else {
+		offset = REG_WRCONF;
+		val = I2C_WFIFO_RESET;
+	}
+
+	val |= zx2967_i2c_readl(i2c, offset);
+	zx2967_i2c_writel(i2c, val, offset);
+}
+
+static int zx2967_i2c_empty_rx_fifo(struct zx2967_i2c *i2c, u32 size)
+{
+	u8 val[I2C_FIFO_MAX] = {0};
+	int i;
+
+	if (size > I2C_FIFO_MAX) {
+		dev_err(DEV(i2c), "fifo size %d over the max value %d\n",
+			size, I2C_FIFO_MAX);
+		return -EINVAL;
+	}
+
+	zx2967_i2c_readsb(i2c, val, REG_DATA, size);
+	for (i = 0; i < size; i++) {
+		*i2c->cur_trans++ = val[i];
+		i2c->residue--;
+	}
+
+	barrier();
+
+	return 0;
+}
+
+static int zx2967_i2c_fill_tx_fifo(struct zx2967_i2c *i2c)
+{
+	size_t residue = i2c->residue;
+	u8 *buf = i2c->cur_trans;
+
+	if (residue == 0) {
+		dev_err(DEV(i2c), "residue is %d\n", (int)residue);
+		return -EINVAL;
+	}
+
+	if (residue <= I2C_FIFO_MAX) {
+		zx2967_i2c_writesb(i2c, buf, REG_DATA, residue);
+
+		/* Again update before writing to FIFO to make sure isr sees. */
+		i2c->residue = 0;
+		i2c->cur_trans = NULL;
+	} else {
+		zx2967_i2c_writesb(i2c, buf, REG_DATA, I2C_FIFO_MAX);
+		i2c->residue -= I2C_FIFO_MAX;
+		i2c->cur_trans += I2C_FIFO_MAX;
+	}
+
+	barrier();
+
+	return 0;
+}
+
+static int zx2967_i2c_reset_hardware(struct zx2967_i2c *i2c)
+{
+	u32 val;
+	u32 clk_div;
+
+	val = I2C_MASTER | I2C_IRQ_MSK_ENABLE;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	clk_div = clk_get_rate(i2c->clk) / i2c->clk_freq - 1;
+	zx2967_i2c_writel(i2c, clk_div, REG_CLK_DIV_FS);
+	zx2967_i2c_writel(i2c, clk_div, REG_CLK_DIV_HS);
+
+	zx2967_i2c_writel(i2c, I2C_FIFO_MAX - 1, REG_WRCONF);
+	zx2967_i2c_writel(i2c, I2C_FIFO_MAX - 1, REG_RDCONF);
+	zx2967_i2c_writel(i2c, 1, REG_RDCONF);
+
+	zx2967_i2c_flush_fifos(i2c);
+
+	return 0;
+}
+
+static void zx2967_i2c_isr_clr(struct zx2967_i2c *i2c)
+{
+	u32 status;
+
+	status = zx2967_i2c_readl(i2c, REG_STAT);
+	status |= I2C_IRQ_ACK_CLEAR;
+	zx2967_i2c_writel(i2c, status, REG_STAT);
+}
+
+static irqreturn_t zx2967_i2c_isr(int irq, void *dev_id)
+{
+	u32 status;
+	struct zx2967_i2c *i2c = (struct zx2967_i2c *)dev_id;
+
+	status = zx2967_i2c_readl(i2c, REG_STAT) & I2C_INT_MASK;
+	zx2967_i2c_isr_clr(i2c);
+
+	if (status & I2C_SR_EDEVICE)
+		i2c->error = -ENXIO;
+	else if (status & I2C_SR_EDATA)
+		i2c->error = -EIO;
+	else if (status & I2C_TRANS_DONE)
+		i2c->error = 0;
+	else
+		goto done;
+
+	complete(&i2c->complete);
+done:
+	return IRQ_HANDLED;
+}
+
+static void zx2967_set_addr(struct zx2967_i2c *i2c, u16 addr)
+{
+	u16 val;
+
+	val = (addr >> I2C_ADDR_LOW_SHIFT) & I2C_ADDR_LOW_MASK;
+	zx2967_i2c_writel(i2c, val, REG_DEVADDR_L);
+
+	val = (addr >> I2C_ADDR_HI_SHIFT) & I2C_ADDR_HI_MASK;
+	zx2967_i2c_writel(i2c, val, REG_DEVADDR_H);
+	if (val)
+		val = zx2967_i2c_readl(i2c, REG_CMD) | I2C_ADDR_MODE_TEN;
+	else
+		val = zx2967_i2c_readl(i2c, REG_CMD) & ~I2C_ADDR_MODE_TEN;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+}
+
+static int zx2967_i2c_xfer_bytes(struct zx2967_i2c *i2c, u32 bytes)
+{
+	unsigned long time_left;
+	int rd = i2c->msg_rd;
+	int ret;
+
+	reinit_completion(&i2c->complete);
+
+	if (rd) {
+		zx2967_i2c_writel(i2c, bytes - 1, REG_RDCONF);
+	} else {
+		ret = zx2967_i2c_fill_tx_fifo(i2c);
+		if (ret)
+			return ret;
+	}
+
+	zx2967_i2c_start_ctrl(i2c);
+
+	time_left = wait_for_completion_timeout(&i2c->complete,
+						I2C_TIMEOUT);
+	if (time_left == 0)
+		return -ETIMEDOUT;
+
+	if (i2c->error)
+		return i2c->error;
+
+	return rd ? zx2967_i2c_empty_rx_fifo(i2c, bytes) : 0;
+}
+
+static int zx2967_i2c_xfer_msg(struct zx2967_i2c *i2c,
+			       struct i2c_msg *msg)
+{
+	int ret;
+	int i;
+
+	if (msg->len == 0)
+		return -EINVAL;
+
+	zx2967_i2c_flush_fifos(i2c);
+
+	i2c->cur_trans = msg->buf;
+	i2c->residue = msg->len;
+	i2c->access_cnt = msg->len / I2C_FIFO_MAX;
+	i2c->msg_rd = msg->flags & I2C_M_RD;
+
+	for (i = 0; i < i2c->access_cnt; i++) {
+		ret = zx2967_i2c_xfer_bytes(i2c, I2C_FIFO_MAX);
+		if (ret)
+			return ret;
+	}
+
+	if (i2c->residue > 0) {
+		ret = zx2967_i2c_xfer_bytes(i2c, i2c->residue);
+		if (ret)
+			return ret;
+	}
+
+	i2c->residue = 0;
+	i2c->access_cnt = 0;
+
+	return 0;
+}
+
+static int zx2967_i2c_xfer(struct i2c_adapter *adap,
+			   struct i2c_msg *msgs, int num)
+{
+	struct zx2967_i2c *i2c = i2c_get_adapdata(adap);
+	int ret;
+	int i;
+
+	if (i2c->is_suspended)
+		return -EBUSY;
+
+	zx2967_set_addr(i2c, msgs->addr);
+
+	for (i = 0; i < num; i++) {
+		ret = zx2967_i2c_xfer_msg(i2c, &msgs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return num;
+}
+
+static void
+zx2967_smbus_xfer_prepare(struct zx2967_i2c *i2c, u16 addr,
+			  char read_write, u8 command, int size,
+			  union i2c_smbus_data *data)
+{
+	u32 val;
+
+	val = zx2967_i2c_readl(i2c, REG_RDCONF);
+	val |= I2C_RFIFO_RESET;
+	zx2967_i2c_writel(i2c, val, REG_RDCONF);
+	zx2967_set_addr(i2c, addr);
+	val = zx2967_i2c_readl(i2c, REG_CMD);
+	val &= ~I2C_RW_READ;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+		zx2967_i2c_writel(i2c, command, REG_DATA);
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		zx2967_i2c_writel(i2c, command, REG_DATA);
+		if (read_write == I2C_SMBUS_WRITE)
+			zx2967_i2c_writel(i2c, data->byte, REG_DATA);
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		zx2967_i2c_writel(i2c, command, REG_DATA);
+		if (read_write == I2C_SMBUS_WRITE) {
+			zx2967_i2c_writel(i2c, (data->word >> 8), REG_DATA);
+			zx2967_i2c_writel(i2c, (data->word & 0xff),
+					  REG_DATA);
+		}
+		break;
+	}
+}
+
+static int zx2967_smbus_xfer_read(struct zx2967_i2c *i2c, int size,
+				  union i2c_smbus_data *data)
+{
+	unsigned long time_left;
+	u8 buf[2];
+	u32 val;
+
+	reinit_completion(&i2c->complete);
+
+	val = zx2967_i2c_readl(i2c, REG_CMD);
+	val |= I2C_CMB_RW_EN;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	val = zx2967_i2c_readl(i2c, REG_CMD);
+	val |= I2C_START;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	time_left = wait_for_completion_timeout(&i2c->complete,
+						I2C_TIMEOUT);
+	if (time_left == 0)
+		return -ETIMEDOUT;
+
+	if (i2c->error)
+		return i2c->error;
+
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+	case I2C_SMBUS_BYTE_DATA:
+		val = zx2967_i2c_readl(i2c, REG_DATA);
+		data->byte = val;
+		break;
+	case I2C_SMBUS_WORD_DATA:
+	case I2C_SMBUS_PROC_CALL:
+		buf[0] = zx2967_i2c_readl(i2c, REG_DATA);
+		buf[1] = zx2967_i2c_readl(i2c, REG_DATA);
+		data->word = (buf[0] << 8) | buf[1];
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int zx2967_smbus_xfer_write(struct zx2967_i2c *i2c)
+{
+	unsigned long time_left;
+	u32 val;
+
+	reinit_completion(&i2c->complete);
+	val = zx2967_i2c_readl(i2c, REG_CMD);
+	val |= I2C_START;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	time_left = wait_for_completion_timeout(&i2c->complete,
+						I2C_TIMEOUT);
+	if (time_left == 0)
+		return -ETIMEDOUT;
+
+	if (i2c->error)
+		return i2c->error;
+
+	return 0;
+}
+
+static int zx2967_smbus_xfer(struct i2c_adapter *adap, u16 addr,
+			     unsigned short flags, char read_write,
+			     u8 command, int size, union i2c_smbus_data *data)
+{
+	struct zx2967_i2c *i2c = i2c_get_adapdata(adap);
+
+	if (size == I2C_SMBUS_QUICK)
+		read_write = I2C_SMBUS_WRITE;
+
+	switch (size) {
+	case I2C_SMBUS_QUICK:
+	case I2C_SMBUS_BYTE:
+	case I2C_SMBUS_BYTE_DATA:
+	case I2C_SMBUS_WORD_DATA:
+		zx2967_smbus_xfer_prepare(i2c, addr, read_write,
+					  command, size, data);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (read_write == I2C_SMBUS_READ)
+		return zx2967_smbus_xfer_read(i2c, size, data);
+
+	return zx2967_smbus_xfer_write(i2c);
+}
+
+static u32 zx2967_i2c_func(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C |
+	       I2C_FUNC_SMBUS_QUICK |
+	       I2C_FUNC_SMBUS_BYTE |
+	       I2C_FUNC_SMBUS_BYTE_DATA |
+	       I2C_FUNC_SMBUS_WORD_DATA |
+	       I2C_FUNC_SMBUS_BLOCK_DATA |
+	       I2C_FUNC_SMBUS_PROC_CALL |
+	       I2C_FUNC_SMBUS_I2C_BLOCK;
+}
+
+static int __maybe_unused zx2967_i2c_suspend(struct device *dev)
+{
+	struct zx2967_i2c *i2c = dev_get_drvdata(dev);
+
+	i2c->is_suspended = true;
+	clk_disable_unprepare(i2c->clk);
+
+	return 0;
+}
+
+static int __maybe_unused zx2967_i2c_resume(struct device *dev)
+{
+	struct zx2967_i2c *i2c = dev_get_drvdata(dev);
+
+	i2c->is_suspended = false;
+	clk_prepare_enable(i2c->clk);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(zx2967_i2c_dev_pm_ops,
+			 zx2967_i2c_suspend, zx2967_i2c_resume);
+
+static const struct i2c_algorithm zx2967_i2c_algo = {
+	.master_xfer = zx2967_i2c_xfer,
+	.smbus_xfer = zx2967_smbus_xfer,
+	.functionality = zx2967_i2c_func,
+};
+
+static const struct of_device_id zx2967_i2c_of_match[] = {
+	{ .compatible = "zte,zx296718-i2c", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, zx2967_i2c_of_match);
+
+static int zx2967_i2c_probe(struct platform_device *pdev)
+{
+	struct zx2967_i2c *i2c;
+	void __iomem *reg_base;
+	struct resource *res;
+	struct clk *clk;
+	int ret;
+
+	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(reg_base))
+		return PTR_ERR(reg_base);
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "missing controller clock");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable i2c_clk\n");
+		return ret;
+	}
+
+	ret = device_property_read_u32(&pdev->dev, "clock-frequency",
+				       &i2c->clk_freq);
+	if (ret) {
+		dev_err(&pdev->dev, "missing clock-frequency");
+		return ret;
+	}
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		return ret;
+
+	i2c->irq = ret;
+	i2c->reg_base = reg_base;
+	i2c->clk = clk;
+
+	init_completion(&i2c->complete);
+	platform_set_drvdata(pdev, i2c);
+
+	ret = zx2967_i2c_reset_hardware(i2c);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to initialize i2c controller\n");
+		goto err_clk_unprepare;
+	}
+
+	ret = devm_request_irq(&pdev->dev, i2c->irq,
+			zx2967_i2c_isr, 0, dev_name(&pdev->dev), i2c);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request irq %i\n", i2c->irq);
+		goto err_clk_unprepare;
+	}
+
+	i2c_set_adapdata(&i2c->adap, i2c);
+	strlcpy(i2c->adap.name, "zx2967 i2c adapter",
+		sizeof(i2c->adap.name));
+	i2c->adap.algo = &zx2967_i2c_algo;
+	i2c->adap.nr = pdev->id;
+	i2c->adap.dev.parent = &pdev->dev;
+	i2c->adap.dev.of_node = pdev->dev.of_node;
+
+	ret = i2c_add_numbered_adapter(&i2c->adap);
+	if (ret)
+		goto err_clk_unprepare;
+
+	return 0;
+
+err_clk_unprepare:
+	clk_disable_unprepare(i2c->clk);
+	return ret;
+}
+
+static int zx2967_i2c_remove(struct platform_device *pdev)
+{
+	struct zx2967_i2c *i2c = platform_get_drvdata(pdev);
+
+	i2c_del_adapter(&i2c->adap);
+	clk_disable_unprepare(i2c->clk);
+
+	return 0;
+}
+
+static struct platform_driver zx2967_i2c_driver = {
+	.probe	= zx2967_i2c_probe,
+	.remove	= zx2967_i2c_remove,
+	.driver	= {
+		.name  = "zx2967_i2c",
+		.of_match_table = zx2967_i2c_of_match,
+		.pm = &zx2967_i2c_dev_pm_ops,
+	},
+};
+module_platform_driver(zx2967_i2c_driver);
+
+MODULE_AUTHOR("Baoyou Xie <baoyou.xie@linaro.org>");
+MODULE_DESCRIPTION("ZTE ZX2967 I2C Bus Controller driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 8064c616984eaa015f018dba595d78cd24a0cc8c Mon Sep 17 00:00:00 2001
From: Matt Weber <matthew.weber@rockwellcollins.com>
Date: Thu, 22 Jun 2017 15:00:33 -0500
Subject: [PATCH 0388/1958] i2c: cadance: fix ctrl/addr reg write order

The driver was clearing the hold bit in the control register before
writing to the address register which resulted in a stop condition
being generated rather than a repeated start.

This issue was only observed when a system was running much
slower than a normal processor would execute.  The IP data sheet
mentions a ordering of writing to the address register before
clearing the hold.

Fixes: df8eb5691c4 ("i2c: Add driver for Cadence I2C controller")
Signed-off-by: John Linn <john.linn@xilinx.com>
Signed-off-by: Paresh Chaudhary <paresh.chaudhary@rockwellcollins.com>
Signed-off-by: Matthew Weber <matthew.weber@rockwellcollins.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-cadence.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 45d6771fac8ce..75d80161931f2 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -405,14 +405,14 @@ static void cdns_i2c_mrecv(struct cdns_i2c *id)
 		cdns_i2c_writereg(id->recv_count, CDNS_I2C_XFER_SIZE_OFFSET);
 	}
 
+	/* Set the slave address in address register - triggers operation */
+	cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
+						CDNS_I2C_ADDR_OFFSET);
 	/* Clear the bus hold flag if bytes to receive is less than FIFO size */
 	if (!id->bus_hold_flag &&
 		((id->p_msg->flags & I2C_M_RECV_LEN) != I2C_M_RECV_LEN) &&
 		(id->recv_count <= CDNS_I2C_FIFO_DEPTH))
 			cdns_i2c_clear_bus_hold(id);
-	/* Set the slave address in address register - triggers operation */
-	cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
-						CDNS_I2C_ADDR_OFFSET);
 	cdns_i2c_writereg(CDNS_I2C_ENABLED_INTR_MASK, CDNS_I2C_IER_OFFSET);
 }
 
-- 
GitLab


From bce70fef7279243d62adbf5f53998b8d3d016144 Mon Sep 17 00:00:00 2001
From: Shawn Nematbakhsh <shawnn@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0389/1958] platform/chrome: cros_ec_lpc: Add R/W helpers to
 LPC protocol variants

Call common functions for read / write to prepare support for future
LPC protocol variants which use different I/O ops than inb / outb.

Signed-off-by: Shawn Nematbakhsh <shawnn@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/Makefile          |  3 +-
 drivers/platform/chrome/cros_ec_lpc.c     | 88 ++++++++++-------------
 drivers/platform/chrome/cros_ec_lpc_reg.c | 64 +++++++++++++++++
 include/linux/mfd/cros_ec_lpc_reg.h       | 47 ++++++++++++
 4 files changed, 151 insertions(+), 51 deletions(-)
 create mode 100644 drivers/platform/chrome/cros_ec_lpc_reg.c
 create mode 100644 include/linux/mfd/cros_ec_lpc_reg.h

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 3870afeefcf42..61182fd8f597f 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -5,6 +5,7 @@ cros_ec_devs-objs			:= cros_ec_dev.o cros_ec_sysfs.o \
 					   cros_ec_lightbar.o cros_ec_vbc.o \
 					   cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_devs.o
-obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpc.o
+cros_ec_lpcs-objs			:= cros_ec_lpc.o cros_ec_lpc_reg.o
+obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index f9a245465fd09..6a782a695eb63 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -26,19 +26,22 @@
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_reg.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 
-#define DRV_NAME "cros_ec_lpc"
+#define DRV_NAME "cros_ec_lpcs"
 
 static int ec_response_timed_out(void)
 {
 	unsigned long one_second = jiffies + HZ;
+	u8 data;
 
 	usleep_range(200, 300);
 	do {
-		if (!(inb(EC_LPC_ADDR_HOST_CMD) & EC_LPC_STATUS_BUSY_MASK))
+		if (!(cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_CMD, 1, &data) &
+		    EC_LPC_STATUS_BUSY_MASK))
 			return 0;
 		usleep_range(100, 200);
 	} while (time_before(jiffies, one_second));
@@ -51,21 +54,20 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 {
 	struct ec_host_request *request;
 	struct ec_host_response response;
-	u8 sum = 0;
-	int i;
+	u8 sum;
 	int ret = 0;
 	u8 *dout;
 
 	ret = cros_ec_prepare_tx(ec, msg);
 
 	/* Write buffer */
-	for (i = 0; i < ret; i++)
-		outb(ec->dout[i], EC_LPC_ADDR_HOST_PACKET + i);
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_PACKET, ret, ec->dout);
 
 	request = (struct ec_host_request *)ec->dout;
 
 	/* Here we go */
-	outb(EC_COMMAND_PROTOCOL_3, EC_LPC_ADDR_HOST_CMD);
+	sum = EC_COMMAND_PROTOCOL_3;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_CMD, 1, &sum);
 
 	if (ec_response_timed_out()) {
 		dev_warn(ec->dev, "EC responsed timed out\n");
@@ -74,17 +76,15 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Check result */
-	msg->result = inb(EC_LPC_ADDR_HOST_DATA);
+	msg->result = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_DATA, 1, &sum);
 	ret = cros_ec_check_result(ec, msg);
 	if (ret)
 		goto done;
 
 	/* Read back response */
 	dout = (u8 *)&response;
-	for (i = 0; i < sizeof(response); i++) {
-		dout[i] = inb(EC_LPC_ADDR_HOST_PACKET + i);
-		sum += dout[i];
-	}
+	sum = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PACKET, sizeof(response),
+				     dout);
 
 	msg->result = response.result;
 
@@ -97,11 +97,9 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Read response and process checksum */
-	for (i = 0; i < response.data_len; i++) {
-		msg->data[i] =
-			inb(EC_LPC_ADDR_HOST_PACKET + sizeof(response) + i);
-		sum += msg->data[i];
-	}
+	sum += cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PACKET +
+				      sizeof(response), response.data_len,
+				      msg->data);
 
 	if (sum) {
 		dev_err(ec->dev,
@@ -121,8 +119,7 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 				struct cros_ec_command *msg)
 {
 	struct ec_lpc_host_args args;
-	int csum;
-	int i;
+	u8 sum;
 	int ret = 0;
 
 	if (msg->outsize > EC_PROTO2_MAX_PARAM_SIZE ||
@@ -139,24 +136,20 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	args.data_size = msg->outsize;
 
 	/* Initialize checksum */
-	csum = msg->command + args.flags +
-		args.command_version + args.data_size;
+	sum = msg->command + args.flags + args.command_version + args.data_size;
 
 	/* Copy data and update checksum */
-	for (i = 0; i < msg->outsize; i++) {
-		outb(msg->data[i], EC_LPC_ADDR_HOST_PARAM + i);
-		csum += msg->data[i];
-	}
+	sum += cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_PARAM, msg->outsize,
+				       msg->data);
 
 	/* Finalize checksum and write args */
-	args.checksum = csum & 0xFF;
-	outb(args.flags, EC_LPC_ADDR_HOST_ARGS);
-	outb(args.command_version, EC_LPC_ADDR_HOST_ARGS + 1);
-	outb(args.data_size, EC_LPC_ADDR_HOST_ARGS + 2);
-	outb(args.checksum, EC_LPC_ADDR_HOST_ARGS + 3);
+	args.checksum = sum;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_ARGS, sizeof(args),
+				(u8 *)&args);
 
 	/* Here we go */
-	outb(msg->command, EC_LPC_ADDR_HOST_CMD);
+	sum = msg->command;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_CMD, 1, &sum);
 
 	if (ec_response_timed_out()) {
 		dev_warn(ec->dev, "EC responsed timed out\n");
@@ -165,16 +158,14 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Check result */
-	msg->result = inb(EC_LPC_ADDR_HOST_DATA);
+	msg->result = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_DATA, 1, &sum);
 	ret = cros_ec_check_result(ec, msg);
 	if (ret)
 		goto done;
 
 	/* Read back args */
-	args.flags = inb(EC_LPC_ADDR_HOST_ARGS);
-	args.command_version = inb(EC_LPC_ADDR_HOST_ARGS + 1);
-	args.data_size = inb(EC_LPC_ADDR_HOST_ARGS + 2);
-	args.checksum = inb(EC_LPC_ADDR_HOST_ARGS + 3);
+	cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_ARGS, sizeof(args),
+			       (u8 *)&args);
 
 	if (args.data_size > msg->insize) {
 		dev_err(ec->dev,
@@ -185,20 +176,17 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Start calculating response checksum */
-	csum = msg->command + args.flags +
-		args.command_version + args.data_size;
+	sum = msg->command + args.flags + args.command_version + args.data_size;
 
 	/* Read response and update checksum */
-	for (i = 0; i < args.data_size; i++) {
-		msg->data[i] = inb(EC_LPC_ADDR_HOST_PARAM + i);
-		csum += msg->data[i];
-	}
+	sum += cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PARAM, args.data_size,
+				      msg->data);
 
 	/* Verify checksum */
-	if (args.checksum != (csum & 0xFF)) {
+	if (args.checksum != sum) {
 		dev_err(ec->dev,
 			"bad packet checksum, expected %02x, got %02x\n",
-			args.checksum, csum & 0xFF);
+			args.checksum, sum);
 		ret = -EBADMSG;
 		goto done;
 	}
@@ -222,14 +210,13 @@ static int cros_ec_lpc_readmem(struct cros_ec_device *ec, unsigned int offset,
 
 	/* fixed length */
 	if (bytes) {
-		for (; cnt < bytes; i++, s++, cnt++)
-			*s = inb(EC_LPC_ADDR_MEMMAP + i);
-		return cnt;
+		cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + offset, bytes, s);
+		return bytes;
 	}
 
 	/* string */
 	for (; i < EC_MEMMAP_SIZE; i++, s++) {
-		*s = inb(EC_LPC_ADDR_MEMMAP + i);
+		cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + i, 1, s);
 		cnt++;
 		if (!*s)
 			break;
@@ -242,6 +229,7 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct cros_ec_device *ec_dev;
+	u8 buf[2];
 	int ret;
 
 	if (!devm_request_region(dev, EC_LPC_ADDR_MEMMAP, EC_MEMMAP_SIZE,
@@ -250,8 +238,8 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 
-	if ((inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID) != 'E') ||
-	    (inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID + 1) != 'C')) {
+	cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID, 2, buf);
+	if (buf[0] != 'E' || buf[1] != 'C') {
 		dev_err(dev, "EC ID not detected\n");
 		return -ENODEV;
 	}
diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.c b/drivers/platform/chrome/cros_ec_lpc_reg.c
new file mode 100644
index 0000000000000..03c97813171ea
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.c
@@ -0,0 +1,64 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/io.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+
+static u8 lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	int i;
+	int sum = 0;
+
+	for (i = 0; i < length; ++i) {
+		dest[i] = inb(offset + i);
+		sum += dest[i];
+	}
+
+	/* Return checksum of all bytes read */
+	return sum;
+}
+
+static u8 lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	int i;
+	int sum = 0;
+
+	for (i = 0; i < length; ++i) {
+		outb(msg[i], offset + i);
+		sum += msg[i];
+	}
+
+	/* Return checksum of all bytes written */
+	return sum;
+}
+
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	return lpc_read_bytes(offset, length, dest);
+}
+
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	return lpc_write_bytes(offset, length, msg);
+}
diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h
new file mode 100644
index 0000000000000..4089bd5c83138
--- /dev/null
+++ b/include/linux/mfd/cros_ec_lpc_reg.h
@@ -0,0 +1,47 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_REG_H
+#define __LINUX_MFD_CROS_EC_REG_H
+
+/**
+ * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes read.
+ *
+ * @offset: Base read address
+ * @length: Number of bytes to read
+ * @dest: Destination buffer
+ */
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
+
+/**
+ * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes written.
+ *
+ * @offset: Base write address
+ * @length: Number of bytes to write
+ * @msg: Write data buffer
+ */
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
+
+#endif /* __LINUX_MFD_CROS_EC_REG_H */
-- 
GitLab


From 8d4a3dc423a2695be51ac864eefb8ba7688b1240 Mon Sep 17 00:00:00 2001
From: Shawn Nematbakhsh <shawnn@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0390/1958] platform/chrome: cros_ec_lpc: Add support for
 mec1322 EC

This adds support for the ChromeOS LPC Microchip Embedded Controller
(mec1322) variant.

mec1322 accesses I/O region [800h, 9ffh] through embedded memory
interface (EMI) rather than LPC.

Signed-off-by: Shawn Nematbakhsh <shawnn@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/Kconfig           |  12 ++
 drivers/platform/chrome/Makefile          |   1 +
 drivers/platform/chrome/cros_ec_lpc.c     |   5 +
 drivers/platform/chrome/cros_ec_lpc_mec.c | 140 ++++++++++++++++++++++
 drivers/platform/chrome/cros_ec_lpc_reg.c |  69 +++++++++++
 include/linux/mfd/cros_ec_lpc_mec.h       |  90 ++++++++++++++
 include/linux/mfd/cros_ec_lpc_reg.h       |  14 +++
 7 files changed, 331 insertions(+)
 create mode 100644 drivers/platform/chrome/cros_ec_lpc_mec.c
 create mode 100644 include/linux/mfd/cros_ec_lpc_mec.h

diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 76bdae1a93bbc..6d80fb5076b69 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -59,6 +59,18 @@ config CROS_EC_LPC
           To compile this driver as a module, choose M here: the
           module will be called cros_ec_lpc.
 
+config CROS_EC_LPC_MEC
+	bool "ChromeOS Embedded Controller LPC Microchip EC (MEC) variant"
+	depends on CROS_EC_LPC
+	default n
+	help
+	  If you say Y here, a variant LPC protocol for the Microchip EC
+	  will be used. Note that this variant is not backward compatible
+	  with non-Microchip ECs.
+
+	  If you have a ChromeOS Embedded Controller Microchip EC variant
+	  choose Y here.
+
 config CROS_EC_PROTO
         bool
         help
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 61182fd8f597f..66c345ca35fcc 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -6,6 +6,7 @@ cros_ec_devs-objs			:= cros_ec_dev.o cros_ec_sysfs.o \
 					   cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_devs.o
 cros_ec_lpcs-objs			:= cros_ec_lpc.o cros_ec_lpc_reg.o
+cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC)	+= cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 6a782a695eb63..bc2dc6210d7bd 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -346,10 +346,13 @@ static int __init cros_ec_lpc_init(void)
 		return -ENODEV;
 	}
 
+	cros_ec_lpc_reg_init();
+
 	/* Register the driver */
 	ret = platform_driver_register(&cros_ec_lpc_driver);
 	if (ret) {
 		pr_err(DRV_NAME ": can't register driver: %d\n", ret);
+		cros_ec_lpc_reg_destroy();
 		return ret;
 	}
 
@@ -358,6 +361,7 @@ static int __init cros_ec_lpc_init(void)
 	if (ret) {
 		pr_err(DRV_NAME ": can't register device: %d\n", ret);
 		platform_driver_unregister(&cros_ec_lpc_driver);
+		cros_ec_lpc_reg_destroy();
 		return ret;
 	}
 
@@ -368,6 +372,7 @@ static void __exit cros_ec_lpc_exit(void)
 {
 	platform_device_unregister(&cros_ec_lpc_device);
 	platform_driver_unregister(&cros_ec_lpc_driver);
+	cros_ec_lpc_reg_destroy();
 }
 
 module_init(cros_ec_lpc_init);
diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.c b/drivers/platform/chrome/cros_ec_lpc_mec.c
new file mode 100644
index 0000000000000..2eda2c2fc210f
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.c
@@ -0,0 +1,140 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_mec.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/*
+ * This mutex must be held while accessing the EMI unit. We can't rely on the
+ * EC mutex because memmap data may be accessed without it being held.
+ */
+static struct mutex io_mutex;
+
+/*
+ * cros_ec_lpc_mec_emi_write_address
+ *
+ * Initialize EMI read / write at a given address.
+ *
+ * @addr:        Starting read / write address
+ * @access_type: Type of access, typically 32-bit auto-increment
+ */
+static void cros_ec_lpc_mec_emi_write_address(u16 addr,
+			enum cros_ec_lpc_mec_emi_access_mode access_type)
+{
+	/* Address relative to start of EMI range */
+	addr -= MEC_EMI_RANGE_START;
+	outb((addr & 0xfc) | access_type, MEC_EMI_EC_ADDRESS_B0);
+	outb((addr >> 8) & 0x7f, MEC_EMI_EC_ADDRESS_B1);
+}
+
+/*
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset:  Base read / write address
+ * @length:  Number of bytes to read / write
+ * @buf:     Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+			    unsigned int offset, unsigned int length,
+			    u8 *buf)
+{
+	int i = 0;
+	int io_addr;
+	u8 sum = 0;
+	enum cros_ec_lpc_mec_emi_access_mode access, new_access;
+
+	/*
+	 * Long access cannot be used on misaligned data since reading B0 loads
+	 * the data register and writing B3 flushes.
+	 */
+	if (offset & 0x3 || length < 4)
+		access = ACCESS_TYPE_BYTE;
+	else
+		access = ACCESS_TYPE_LONG_AUTO_INCREMENT;
+
+	mutex_lock(&io_mutex);
+
+	/* Initialize I/O at desired address */
+	cros_ec_lpc_mec_emi_write_address(offset, access);
+
+	/* Skip bytes in case of misaligned offset */
+	io_addr = MEC_EMI_EC_DATA_B0 + (offset & 0x3);
+	while (i < length) {
+		while (io_addr <= MEC_EMI_EC_DATA_B3) {
+			if (io_type == MEC_IO_READ)
+				buf[i] = inb(io_addr++);
+			else
+				outb(buf[i], io_addr++);
+
+			sum += buf[i++];
+			offset++;
+
+			/* Extra bounds check in case of misaligned length */
+			if (i == length)
+				goto done;
+		}
+
+		/*
+		 * Use long auto-increment access except for misaligned write,
+		 * since writing B3 triggers the flush.
+		 */
+		if (length - i < 4 && io_type == MEC_IO_WRITE)
+			new_access = ACCESS_TYPE_BYTE;
+		else
+			new_access = ACCESS_TYPE_LONG_AUTO_INCREMENT;
+
+		if (new_access != access ||
+		    access != ACCESS_TYPE_LONG_AUTO_INCREMENT) {
+			access = new_access;
+			cros_ec_lpc_mec_emi_write_address(offset, access);
+		}
+
+		/* Access [B0, B3] on each loop pass */
+		io_addr = MEC_EMI_EC_DATA_B0;
+	}
+
+done:
+	mutex_unlock(&io_mutex);
+
+	return sum;
+}
+EXPORT_SYMBOL(cros_ec_lpc_io_bytes_mec);
+
+void cros_ec_lpc_mec_init(void)
+{
+	mutex_init(&io_mutex);
+}
+EXPORT_SYMBOL(cros_ec_lpc_mec_init);
+
+void cros_ec_lpc_mec_destroy(void)
+{
+	mutex_destroy(&io_mutex);
+}
+EXPORT_SYMBOL(cros_ec_lpc_mec_destroy);
diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.c b/drivers/platform/chrome/cros_ec_lpc_reg.c
index 03c97813171ea..dcc7a3e30604b 100644
--- a/drivers/platform/chrome/cros_ec_lpc_reg.c
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.c
@@ -24,6 +24,7 @@
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_mec.h>
 
 static u8 lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
 {
@@ -53,12 +54,80 @@ static u8 lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
 	return sum;
 }
 
+#ifdef CONFIG_CROS_EC_LPC_MEC
+
 u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
 {
+	if (length == 0)
+		return 0;
+
+	/* Access desired range through EMI interface */
+	if (offset >= MEC_EMI_RANGE_START && offset <= MEC_EMI_RANGE_END) {
+		/* Ensure we don't straddle EMI region */
+		if (WARN_ON(offset + length - 1 > MEC_EMI_RANGE_END))
+			return 0;
+
+		return cros_ec_lpc_io_bytes_mec(MEC_IO_READ, offset, length,
+						dest);
+	}
+
+	if (WARN_ON(offset + length > MEC_EMI_RANGE_START &&
+		    offset < MEC_EMI_RANGE_START))
+		return 0;
+
 	return lpc_read_bytes(offset, length, dest);
 }
 
 u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
 {
+	if (length == 0)
+		return 0;
+
+	/* Access desired range through EMI interface */
+	if (offset >= MEC_EMI_RANGE_START && offset <= MEC_EMI_RANGE_END) {
+		/* Ensure we don't straddle EMI region */
+		if (WARN_ON(offset + length - 1 > MEC_EMI_RANGE_END))
+			return 0;
+
+		return cros_ec_lpc_io_bytes_mec(MEC_IO_WRITE, offset, length,
+						msg);
+	}
+
+	if (WARN_ON(offset + length > MEC_EMI_RANGE_START &&
+		    offset < MEC_EMI_RANGE_START))
+		return 0;
+
 	return lpc_write_bytes(offset, length, msg);
 }
+
+void cros_ec_lpc_reg_init(void)
+{
+	cros_ec_lpc_mec_init();
+}
+
+void cros_ec_lpc_reg_destroy(void)
+{
+	cros_ec_lpc_mec_destroy();
+}
+
+#else /* CONFIG_CROS_EC_LPC_MEC */
+
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	return lpc_read_bytes(offset, length, dest);
+}
+
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	return lpc_write_bytes(offset, length, msg);
+}
+
+void cros_ec_lpc_reg_init(void)
+{
+}
+
+void cros_ec_lpc_reg_destroy(void)
+{
+}
+
+#endif /* CONFIG_CROS_EC_LPC_MEC */
diff --git a/include/linux/mfd/cros_ec_lpc_mec.h b/include/linux/mfd/cros_ec_lpc_mec.h
new file mode 100644
index 0000000000000..176496ddc66c2
--- /dev/null
+++ b/include/linux/mfd/cros_ec_lpc_mec.h
@@ -0,0 +1,90 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_MEC_H
+#define __LINUX_MFD_CROS_EC_MEC_H
+
+#include <linux/mfd/cros_ec_commands.h>
+
+enum cros_ec_lpc_mec_emi_access_mode {
+	/* 8-bit access */
+	ACCESS_TYPE_BYTE = 0x0,
+	/* 16-bit access */
+	ACCESS_TYPE_WORD = 0x1,
+	/* 32-bit access */
+	ACCESS_TYPE_LONG = 0x2,
+	/*
+	 * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the
+	 * EC data register to be incremented.
+	 */
+	ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3,
+};
+
+enum cros_ec_lpc_mec_io_type {
+	MEC_IO_READ,
+	MEC_IO_WRITE,
+};
+
+/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
+#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0
+#define MEC_EMI_RANGE_END   (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE)
+
+/* EMI registers are relative to base */
+#define MEC_EMI_BASE 0x800
+#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0)
+#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1)
+#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2)
+#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3)
+#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4)
+#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5)
+#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6)
+#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7)
+
+/*
+ * cros_ec_lpc_mec_init
+ *
+ * Initialize MEC I/O.
+ */
+void cros_ec_lpc_mec_init(void);
+
+/*
+ * cros_ec_lpc_mec_destroy
+ *
+ * Cleanup MEC I/O.
+ */
+void cros_ec_lpc_mec_destroy(void);
+
+/**
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset:  Base read / write address
+ * @length:  Number of bytes to read / write
+ * @buf:     Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+			    unsigned int offset, unsigned int length, u8 *buf);
+
+#endif /* __LINUX_MFD_CROS_EC_MEC_H */
diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h
index 4089bd5c83138..5560bef63c2bd 100644
--- a/include/linux/mfd/cros_ec_lpc_reg.h
+++ b/include/linux/mfd/cros_ec_lpc_reg.h
@@ -44,4 +44,18 @@ u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
  */
 u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
 
+/**
+ * cros_ec_lpc_reg_init
+ *
+ * Initialize register I/O.
+ */
+void cros_ec_lpc_reg_init(void);
+
+/**
+ * cros_ec_lpc_reg_destroy
+ *
+ * Cleanup reg I/O.
+ */
+void cros_ec_lpc_reg_destroy(void);
+
 #endif /* __LINUX_MFD_CROS_EC_REG_H */
-- 
GitLab


From 12278dc7c572e87727715ecce9cda15b70efeca5 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0391/1958] platform/chrome: cros_ec_lpc: Add support for
 GOOG004 ACPI device

This patch removes platform_device_register() call and adds an ACPI
device id structure. The driver is now automatically probed for devices
with a GOOG0004 ACPI entry.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/Kconfig       |  2 +-
 drivers/platform/chrome/cros_ec_lpc.c | 23 +++++++++--------------
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 6d80fb5076b69..0ad6e290bbda3 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -49,7 +49,7 @@ config CROS_EC_CHARDEV
 
 config CROS_EC_LPC
         tristate "ChromeOS Embedded Controller (LPC)"
-        depends on MFD_CROS_EC && (X86 || COMPILE_TEST)
+        depends on MFD_CROS_EC && ACPI && (X86 || COMPILE_TEST)
         help
           If you say Y here, you get support for talking to the ChromeOS EC
           over an LPC bus. This uses a simple byte-level protocol with a
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index bc2dc6210d7bd..9070352168a64 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -21,6 +21,7 @@
  * expensive.
  */
 
+#include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/delay.h>
 #include <linux/io.h>
@@ -32,6 +33,7 @@
 #include <linux/printk.h>
 
 #define DRV_NAME "cros_ec_lpcs"
+#define ACPI_DRV_NAME "GOOG0004"
 
 static int ec_response_timed_out(void)
 {
@@ -288,6 +290,12 @@ static int cros_ec_lpc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct acpi_device_id cros_ec_lpc_acpi_device_ids[] = {
+	{ ACPI_DRV_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, cros_ec_lpc_acpi_device_ids);
+
 static struct dmi_system_id cros_ec_lpc_dmi_table[] __initdata = {
 	{
 		/*
@@ -328,15 +336,12 @@ MODULE_DEVICE_TABLE(dmi, cros_ec_lpc_dmi_table);
 static struct platform_driver cros_ec_lpc_driver = {
 	.driver = {
 		.name = DRV_NAME,
+		.acpi_match_table = cros_ec_lpc_acpi_device_ids,
 	},
 	.probe = cros_ec_lpc_probe,
 	.remove = cros_ec_lpc_remove,
 };
 
-static struct platform_device cros_ec_lpc_device = {
-	.name = DRV_NAME
-};
-
 static int __init cros_ec_lpc_init(void)
 {
 	int ret;
@@ -356,21 +361,11 @@ static int __init cros_ec_lpc_init(void)
 		return ret;
 	}
 
-	/* Register the device, and it'll get hooked up automatically */
-	ret = platform_device_register(&cros_ec_lpc_device);
-	if (ret) {
-		pr_err(DRV_NAME ": can't register device: %d\n", ret);
-		platform_driver_unregister(&cros_ec_lpc_driver);
-		cros_ec_lpc_reg_destroy();
-		return ret;
-	}
-
 	return 0;
 }
 
 static void __exit cros_ec_lpc_exit(void)
 {
-	platform_device_unregister(&cros_ec_lpc_device);
 	platform_driver_unregister(&cros_ec_lpc_driver);
 	cros_ec_lpc_reg_destroy();
 }
-- 
GitLab


From 450de8f43fa665189b8663ae54985152781fb6a4 Mon Sep 17 00:00:00 2001
From: Archana Patni <archana.patni@intel.com>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0392/1958] platform/chrome: cros_ec_lpc: Add power management
 ops

This patch adds suspend and resume pm ops to the LPC ChromeOS EC driver.
These LPC handlers call the croc_ec generic handlers.

Signed-off-by: Archana Patni <archana.patni@intel.com>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_lpc.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 9070352168a64..89afad7bb16e5 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -333,10 +333,31 @@ static struct dmi_system_id cros_ec_lpc_dmi_table[] __initdata = {
 };
 MODULE_DEVICE_TABLE(dmi, cros_ec_lpc_dmi_table);
 
+#ifdef CONFIG_PM_SLEEP
+static int cros_ec_lpc_suspend(struct device *dev)
+{
+	struct cros_ec_device *ec_dev = dev_get_drvdata(dev);
+
+	return cros_ec_suspend(ec_dev);
+}
+
+static int cros_ec_lpc_resume(struct device *dev)
+{
+	struct cros_ec_device *ec_dev = dev_get_drvdata(dev);
+
+	return cros_ec_resume(ec_dev);
+}
+#endif
+
+const struct dev_pm_ops cros_ec_lpc_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(cros_ec_lpc_suspend, cros_ec_lpc_resume)
+};
+
 static struct platform_driver cros_ec_lpc_driver = {
 	.driver = {
 		.name = DRV_NAME,
 		.acpi_match_table = cros_ec_lpc_acpi_device_ids,
+		.pm = &cros_ec_lpc_pm_ops,
 	},
 	.probe = cros_ec_lpc_probe,
 	.remove = cros_ec_lpc_remove,
-- 
GitLab


From a6df7798d03b29095723c5fe9174ee2b53135ed0 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0393/1958] platform/chrome: cros_ec_lpc: Add MKBP events
 support over ACPI

This patch installs a notify handler to process MKBP events for EC
firmware directing them over ACPI.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_lpc.c | 32 +++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 89afad7bb16e5..eeb187e558ce9 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -227,9 +227,20 @@ static int cros_ec_lpc_readmem(struct cros_ec_device *ec, unsigned int offset,
 	return cnt;
 }
 
+static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
+{
+	struct cros_ec_device *ec_dev = data;
+
+	if (ec_dev->mkbp_event_supported && cros_ec_get_next_event(ec_dev) > 0)
+		blocking_notifier_call_chain(&ec_dev->event_notifier, 0,
+					     ec_dev);
+}
+
 static int cros_ec_lpc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct acpi_device *adev;
+	acpi_status status;
 	struct cros_ec_device *ec_dev;
 	u8 buf[2];
 	int ret;
@@ -277,12 +288,33 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	/*
+	 * Connect a notify handler to process MKBP messages if we have a
+	 * companion ACPI device.
+	 */
+	adev = ACPI_COMPANION(dev);
+	if (adev) {
+		status = acpi_install_notify_handler(adev->handle,
+						     ACPI_ALL_NOTIFY,
+						     cros_ec_lpc_acpi_notify,
+						     ec_dev);
+		if (ACPI_FAILURE(status))
+			dev_warn(dev, "Failed to register notifier %08x\n",
+				 status);
+	}
+
 	return 0;
 }
 
 static int cros_ec_lpc_remove(struct platform_device *pdev)
 {
 	struct cros_ec_device *ec_dev;
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(&pdev->dev);
+	if (adev)
+		acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY,
+					   cros_ec_lpc_acpi_notify);
 
 	ec_dev = platform_get_drvdata(pdev);
 	cros_ec_remove(ec_dev);
-- 
GitLab


From be3ebebf4377fe924f0419f78fc82cf01a31e692 Mon Sep 17 00:00:00 2001
From: Eric Caruso <ejcaruso@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0394/1958] platform/chrome: cros_ec_lightbar - Add lightbar
 program feature to sysfs

Add a program feature so we can upload and run programs for lightbar
sequences. We should be able to use this to shift sequences out of the
EC and save space there.

  $ cat <suitable program bin> > /sys/devices/.../cros_ec/program
  $ echo program > /sys/devices/.../cros_ec/sequence

Signed-off-by: Eric Caruso <ejcaruso@chromium.org>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_lightbar.c | 69 +++++++++++++++++++++-
 include/linux/mfd/cros_ec_commands.h       | 12 +++-
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 8df3d447cacf0..26675059707e6 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -295,7 +295,8 @@ static ssize_t led_rgb_store(struct device *dev, struct device_attribute *attr,
 
 static char const *seqname[] = {
 	"ERROR", "S5", "S3", "S0", "S5S3", "S3S0",
-	"S0S3", "S3S5", "STOP", "RUN", "PULSE", "TEST", "KONAMI",
+	"S0S3", "S3S5", "STOP", "RUN", "KONAMI",
+	"TAP", "PROGRAM",
 };
 
 static ssize_t sequence_show(struct device *dev,
@@ -390,6 +391,69 @@ static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
 	return ret;
 }
 
+static ssize_t program_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int extra_bytes, max_size, ret;
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+
+	/*
+	 * We might need to reject the program for size reasons. The EC
+	 * enforces a maximum program size, but we also don't want to try
+	 * and send a program that is too big for the protocol. In order
+	 * to ensure the latter, we also need to ensure we have extra bytes
+	 * to represent the rest of the packet.
+	 */
+	extra_bytes = sizeof(*param) - sizeof(param->set_program.data);
+	max_size = min(EC_LB_PROG_LEN, ec->ec_dev->max_request - extra_bytes);
+	if (count > max_size) {
+		dev_err(dev, "Program is %u bytes, too long to send (max: %u)",
+			(unsigned int)count, max_size);
+
+		return -EINVAL;
+	}
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = lb_throttle();
+	if (ret)
+		goto exit;
+
+	dev_info(dev, "Copying %zu byte program to EC", count);
+
+	param = (struct ec_params_lightbar *)msg->data;
+	param->cmd = LIGHTBAR_CMD_SET_PROGRAM;
+
+	param->set_program.size = count;
+	memcpy(param->set_program.data, buf, count);
+
+	/*
+	 * We need to set the message size manually or else it will use
+	 * EC_LB_PROG_LEN. This might be too long, and the program
+	 * is unlikely to use all of the space.
+	 */
+	msg->outsize = count + extra_bytes;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto exit;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = count;
+exit:
+	kfree(msg);
+
+	return ret;
+}
+
 /* Module initialization */
 
 static DEVICE_ATTR_RW(interval_msec);
@@ -397,12 +461,15 @@ static DEVICE_ATTR_RO(version);
 static DEVICE_ATTR_WO(brightness);
 static DEVICE_ATTR_WO(led_rgb);
 static DEVICE_ATTR_RW(sequence);
+static DEVICE_ATTR_WO(program);
+
 static struct attribute *__lb_cmds_attrs[] = {
 	&dev_attr_interval_msec.attr,
 	&dev_attr_version.attr,
 	&dev_attr_brightness.attr,
 	&dev_attr_led_rgb.attr,
 	&dev_attr_sequence.attr,
+	&dev_attr_program.attr,
 	NULL,
 };
 
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 1b19e424e1cf6..dbea5802e83b1 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -1162,6 +1162,13 @@ struct lightbar_params_v1 {
 	struct rgb_s color[8];			/* 0-3 are Google colors */
 } __packed;
 
+/* Lightbar program */
+#define EC_LB_PROG_LEN 192
+struct lightbar_program {
+	uint8_t size;
+	uint8_t data[EC_LB_PROG_LEN];
+};
+
 struct ec_params_lightbar {
 	uint8_t cmd;		      /* Command (see enum lightbar_command) */
 	union {
@@ -1188,6 +1195,7 @@ struct ec_params_lightbar {
 
 		struct lightbar_params_v0 set_params_v0;
 		struct lightbar_params_v1 set_params_v1;
+		struct lightbar_program set_program;
 	};
 } __packed;
 
@@ -1220,7 +1228,8 @@ struct ec_response_lightbar {
 		struct {
 			/* no return params */
 		} off, on, init, set_brightness, seq, reg, set_rgb,
-			demo, set_params_v0, set_params_v1;
+			demo, set_params_v0, set_params_v1,
+			set_program;
 	};
 } __packed;
 
@@ -1244,6 +1253,7 @@ enum lightbar_command {
 	LIGHTBAR_CMD_GET_DEMO = 15,
 	LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
 	LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
+	LIGHTBAR_CMD_SET_PROGRAM = 18,
 	LIGHTBAR_NUM_CMDS
 };
 
-- 
GitLab


From 405c84308c4335ee7cb58b9304b77b85e61f7129 Mon Sep 17 00:00:00 2001
From: Eric Caruso <ejcaruso@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0395/1958] platform/chrome: cros_ec_lightbar - Control of
 suspend/resume lightbar sequence

Don't let EC control suspend/resume sequence. If the EC controls the
lightbar and sets the sequence when it notices the chipset transitioning
between states, we can't make exceptions for cases where we don't want
to activate the lightbar. Instead, let's move the suspend/resume
notifications into the kernel so we can selectively play the sequences.

Signed-off-by: Eric Caruso <ejcaruso@chromium.org>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_dev.c      | 37 ++++++++++
 drivers/platform/chrome/cros_ec_dev.h      |  6 ++
 drivers/platform/chrome/cros_ec_lightbar.c | 85 +++++++++++++++++++++-
 include/linux/mfd/cros_ec_commands.h       | 11 ++-
 4 files changed, 134 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 20ce1c23fb5c0..7c26223192111 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -21,6 +21,7 @@
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
@@ -435,6 +436,10 @@ static int ec_device_probe(struct platform_device *pdev)
 	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE))
 		cros_ec_sensors_register(ec);
 
+	/* Take control of the lightbar from the EC. */
+	if (ec_has_lightbar(ec))
+		lb_manual_suspend_ctrl(ec, 1);
+
 	return 0;
 
 failed:
@@ -446,6 +451,10 @@ static int ec_device_remove(struct platform_device *pdev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
 
+	/* Let the EC take over the lightbar again. */
+	if (ec_has_lightbar(ec))
+		lb_manual_suspend_ctrl(ec, 0);
+
 	cros_ec_debugfs_remove(ec);
 
 	cdev_del(&ec->cdev);
@@ -459,9 +468,37 @@ static const struct platform_device_id cros_ec_id[] = {
 };
 MODULE_DEVICE_TABLE(platform, cros_ec_id);
 
+static int ec_device_suspend(struct device *dev)
+{
+	struct cros_ec_dev *ec = dev_get_drvdata(dev);
+
+	if (ec_has_lightbar(ec))
+		lb_suspend(ec);
+
+	return 0;
+}
+
+static int ec_device_resume(struct device *dev)
+{
+	struct cros_ec_dev *ec = dev_get_drvdata(dev);
+
+	if (ec_has_lightbar(ec))
+		lb_resume(ec);
+
+	return 0;
+}
+
+static const struct dev_pm_ops cros_ec_dev_pm_ops = {
+#ifdef CONFIG_PM_SLEEP
+	.suspend = ec_device_suspend,
+	.resume = ec_device_resume,
+#endif
+};
+
 static struct platform_driver cros_ec_dev_driver = {
 	.driver = {
 		.name = "cros-ec-ctl",
+		.pm = &cros_ec_dev_pm_ops,
 	},
 	.probe = ec_device_probe,
 	.remove = ec_device_remove,
diff --git a/drivers/platform/chrome/cros_ec_dev.h b/drivers/platform/chrome/cros_ec_dev.h
index bfd2c84c3571c..45e9453608c58 100644
--- a/drivers/platform/chrome/cros_ec_dev.h
+++ b/drivers/platform/chrome/cros_ec_dev.h
@@ -43,4 +43,10 @@ struct cros_ec_readmem {
 #define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
 #define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
 
+/* Lightbar utilities */
+extern bool ec_has_lightbar(struct cros_ec_dev *ec);
+extern int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable);
+extern int lb_suspend(struct cros_ec_dev *ec);
+extern int lb_resume(struct cros_ec_dev *ec);
+
 #endif /* _CROS_EC_DEV_H_ */
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 26675059707e6..4df379dc4bb99 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -341,6 +341,80 @@ static ssize_t sequence_show(struct device *dev,
 	return ret;
 }
 
+static int lb_send_empty_cmd(struct cros_ec_dev *ec, uint8_t cmd)
+{
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	int ret;
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	param = (struct ec_params_lightbar *)msg->data;
+	param->cmd = cmd;
+
+	ret = lb_throttle();
+	if (ret)
+		goto error;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto error;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto error;
+	}
+	ret = 0;
+error:
+	kfree(msg);
+
+	return ret;
+}
+
+int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable)
+{
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	int ret;
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	param = (struct ec_params_lightbar *)msg->data;
+
+	param->cmd = LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL;
+	param->manual_suspend_ctrl.enable = enable;
+
+	ret = lb_throttle();
+	if (ret)
+		goto error;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto error;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto error;
+	}
+	ret = 0;
+error:
+	kfree(msg);
+
+	return ret;
+}
+
+int lb_suspend(struct cros_ec_dev *ec)
+{
+	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND);
+}
+
+int lb_resume(struct cros_ec_dev *ec)
+{
+	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME);
+}
+
 static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
@@ -473,6 +547,11 @@ static struct attribute *__lb_cmds_attrs[] = {
 	NULL,
 };
 
+bool ec_has_lightbar(struct cros_ec_dev *ec)
+{
+	return !!get_lightbar_version(ec, NULL, NULL);
+}
+
 static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
 						  struct attribute *a, int n)
 {
@@ -489,10 +568,10 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
 		return 0;
 
 	/* Only instantiate this stuff if the EC has a lightbar */
-	if (get_lightbar_version(ec, NULL, NULL))
+	if (ec_has_lightbar(ec))
 		return a->mode;
-	else
-		return 0;
+
+	return 0;
 }
 
 struct attribute_group cros_ec_lightbar_attr_group = {
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index dbea5802e83b1..190c8f4afa026 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -1175,7 +1175,7 @@ struct ec_params_lightbar {
 		struct {
 			/* no args */
 		} dump, off, on, init, get_seq, get_params_v0, get_params_v1,
-			version, get_brightness, get_demo;
+			version, get_brightness, get_demo, suspend, resume;
 
 		struct {
 			uint8_t num;
@@ -1193,6 +1193,10 @@ struct ec_params_lightbar {
 			uint8_t led;
 		} get_rgb;
 
+		struct {
+			uint8_t enable;
+		} manual_suspend_ctrl;
+
 		struct lightbar_params_v0 set_params_v0;
 		struct lightbar_params_v1 set_params_v1;
 		struct lightbar_program set_program;
@@ -1229,7 +1233,7 @@ struct ec_response_lightbar {
 			/* no return params */
 		} off, on, init, set_brightness, seq, reg, set_rgb,
 			demo, set_params_v0, set_params_v1,
-			set_program;
+			set_program, manual_suspend_ctrl, suspend, resume;
 	};
 } __packed;
 
@@ -1254,6 +1258,9 @@ enum lightbar_command {
 	LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
 	LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
 	LIGHTBAR_CMD_SET_PROGRAM = 18,
+	LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19,
+	LIGHTBAR_CMD_SUSPEND = 20,
+	LIGHTBAR_CMD_RESUME = 21,
 	LIGHTBAR_NUM_CMDS
 };
 
-- 
GitLab


From abbb054d53266bfbd45ca9f2ba6522e3fd5b7f86 Mon Sep 17 00:00:00 2001
From: Eric Caruso <ejcaruso@chromium.org>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0396/1958] platform/chrome: cros_ec_lightbar - Add userspace
 lightbar control bit to EC

Some devices might want to turn off the lightbar if e.g. the
system turns the screen off due to idleness. This prevents the
kernel from going through its normal suspend/resume pathways.

Signed-off-by: Eric Caruso <ejcaruso@chromium.org>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_lightbar.c | 38 ++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 4df379dc4bb99..e570c1ef77281 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -38,6 +38,12 @@
 /* Rate-limit the lightbar interface to prevent DoS. */
 static unsigned long lb_interval_jiffies = 50 * HZ / 1000;
 
+/*
+ * Whether or not we have given userspace control of the lightbar.
+ * If this is true, we won't do anything during suspend/resume.
+ */
+static bool userspace_control;
+
 static ssize_t interval_msec_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
@@ -407,11 +413,17 @@ int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable)
 
 int lb_suspend(struct cros_ec_dev *ec)
 {
+	if (userspace_control)
+		return 0;
+
 	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND);
 }
 
 int lb_resume(struct cros_ec_dev *ec)
 {
+	if (userspace_control)
+		return 0;
+
 	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME);
 }
 
@@ -528,6 +540,30 @@ static ssize_t program_store(struct device *dev, struct device_attribute *attr,
 	return ret;
 }
 
+static ssize_t userspace_control_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%d\n", userspace_control);
+}
+
+static ssize_t userspace_control_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf,
+				       size_t count)
+{
+	bool enable;
+	int ret;
+
+	ret = strtobool(buf, &enable);
+	if (ret < 0)
+		return ret;
+
+	userspace_control = enable;
+
+	return count;
+}
+
 /* Module initialization */
 
 static DEVICE_ATTR_RW(interval_msec);
@@ -536,6 +572,7 @@ static DEVICE_ATTR_WO(brightness);
 static DEVICE_ATTR_WO(led_rgb);
 static DEVICE_ATTR_RW(sequence);
 static DEVICE_ATTR_WO(program);
+static DEVICE_ATTR_RW(userspace_control);
 
 static struct attribute *__lb_cmds_attrs[] = {
 	&dev_attr_interval_msec.attr,
@@ -544,6 +581,7 @@ static struct attribute *__lb_cmds_attrs[] = {
 	&dev_attr_led_rgb.attr,
 	&dev_attr_sequence.attr,
 	&dev_attr_program.attr,
+	&dev_attr_userspace_control.attr,
 	NULL,
 };
 
-- 
GitLab


From 995c0ec9a81f9e4e75f280f095534f4ed1a9ea9d Mon Sep 17 00:00:00 2001
From: Jeffery Yu <jefferyy@nvidia.com>
Date: Tue, 16 May 2017 17:46:48 +0200
Subject: [PATCH 0397/1958] platform/chrome: cros_ec_lightbar - Avoid I2C xfer
 to EC during suspend

A Mutex lock in cros_ec_cmd_xfer which may be held by frozen
Userspace thread during system suspending. So should not
call this routine in suspend thread.

Signed-off-by: Jeffery Yu <jefferyy@nvidia.com>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_dev.c      | 12 ++++--------
 drivers/platform/chrome/cros_ec_lightbar.c | 13 +++++++++----
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 7c26223192111..b9bf086f7569a 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -437,8 +437,7 @@ static int ec_device_probe(struct platform_device *pdev)
 		cros_ec_sensors_register(ec);
 
 	/* Take control of the lightbar from the EC. */
-	if (ec_has_lightbar(ec))
-		lb_manual_suspend_ctrl(ec, 1);
+	lb_manual_suspend_ctrl(ec, 1);
 
 	return 0;
 
@@ -452,8 +451,7 @@ static int ec_device_remove(struct platform_device *pdev)
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
 
 	/* Let the EC take over the lightbar again. */
-	if (ec_has_lightbar(ec))
-		lb_manual_suspend_ctrl(ec, 0);
+	lb_manual_suspend_ctrl(ec, 0);
 
 	cros_ec_debugfs_remove(ec);
 
@@ -472,8 +470,7 @@ static int ec_device_suspend(struct device *dev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(dev);
 
-	if (ec_has_lightbar(ec))
-		lb_suspend(ec);
+	lb_suspend(ec);
 
 	return 0;
 }
@@ -482,8 +479,7 @@ static int ec_device_resume(struct device *dev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(dev);
 
-	if (ec_has_lightbar(ec))
-		lb_resume(ec);
+	lb_resume(ec);
 
 	return 0;
 }
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index e570c1ef77281..fd2b047a27482 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -43,6 +43,7 @@ static unsigned long lb_interval_jiffies = 50 * HZ / 1000;
  * If this is true, we won't do anything during suspend/resume.
  */
 static bool userspace_control;
+static struct cros_ec_dev *ec_with_lightbar;
 
 static ssize_t interval_msec_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
@@ -384,6 +385,9 @@ int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable)
 	struct cros_ec_command *msg;
 	int ret;
 
+	if (ec != ec_with_lightbar)
+		return 0;
+
 	msg = alloc_lightbar_cmd_msg(ec);
 	if (!msg)
 		return -ENOMEM;
@@ -413,7 +417,7 @@ int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable)
 
 int lb_suspend(struct cros_ec_dev *ec)
 {
-	if (userspace_control)
+	if (userspace_control || ec != ec_with_lightbar)
 		return 0;
 
 	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND);
@@ -421,7 +425,7 @@ int lb_suspend(struct cros_ec_dev *ec)
 
 int lb_resume(struct cros_ec_dev *ec)
 {
-	if (userspace_control)
+	if (userspace_control || ec != ec_with_lightbar)
 		return 0;
 
 	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME);
@@ -606,9 +610,10 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
 		return 0;
 
 	/* Only instantiate this stuff if the EC has a lightbar */
-	if (ec_has_lightbar(ec))
+	if (ec_has_lightbar(ec)) {
+		ec_with_lightbar = ec;
 		return a->mode;
-
+	}
 	return 0;
 }
 
-- 
GitLab


From 0ec7769a98b00866e37740328d65cba6594d178d Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Wed, 31 May 2017 18:39:01 +0530
Subject: [PATCH 0398/1958] rtc: opal: Implement rtc_class_ops.alarm_irq_enable
 callback

Provide an implementation of the callback
rtc_class_ops.alarm_irq_enable for rtc-opal driver. This callback is
called when the wake alarm is disabled via the command:

'echo 0 > /sys/class/rtc/rtc0/wakealarm'

Without this the Timed-Power-On(TPO) config remains set even when its
disabled by the above command and FSP will still force machine
boot at previously configured alarm time.

The callback is implemented as function opal_tpo_alarm_irq_enable()
which calls opal_set_tpo_time() with alarm.enabled == 0. A branch is
added to opal_set_tpo_time() to handle this case by passing y_m_d ==
h_m_s_ms == 0 to opal as arguments for opal_tpo_write() call.

Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-opal.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
index 2d84e2a28772c..e2a946c0e667e 100644
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c
@@ -167,7 +167,14 @@ static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm)
 	u32 y_m_d = 0;
 	int token, rc;
 
-	tm_to_opal(&alarm->time, &y_m_d, &h_m_s_ms);
+	/* if alarm is enabled */
+	if (alarm->enabled) {
+		tm_to_opal(&alarm->time, &y_m_d, &h_m_s_ms);
+		pr_debug("Alarm set to %x %llx\n", y_m_d, h_m_s_ms);
+
+	} else {
+		pr_debug("Alarm getting disabled\n");
+	}
 
 	token = opal_async_get_token_interruptible();
 	if (token < 0) {
@@ -200,6 +207,18 @@ static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm)
 	return rc;
 }
 
+int opal_tpo_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct rtc_wkalrm alarm = { .enabled = 0 };
+
+	/*
+	 * TPO is automatically enabled when opal_set_tpo_time() is called with
+	 * non-zero rtc-time. We only handle disable case which needs to be
+	 * explicitly told to opal.
+	 */
+	return enabled ? 0 : opal_set_tpo_time(dev, &alarm);
+}
+
 static struct rtc_class_ops opal_rtc_ops = {
 	.read_time	= opal_get_rtc_time,
 	.set_time	= opal_set_rtc_time,
@@ -215,6 +234,7 @@ static int opal_rtc_probe(struct platform_device *pdev)
 		device_set_wakeup_capable(&pdev->dev, true);
 		opal_rtc_ops.read_alarm	= opal_get_tpo_time;
 		opal_rtc_ops.set_alarm = opal_set_tpo_time;
+		opal_rtc_ops.alarm_irq_enable = opal_tpo_alarm_irq_enable;
 	}
 
 	rtc = devm_rtc_device_register(&pdev->dev, DRVNAME, &opal_rtc_ops,
-- 
GitLab


From fe6d94fe66b50c10c94911e525ad680a8f2dfc21 Mon Sep 17 00:00:00 2001
From: "Diaz de Grenu, Jose" <Jose.DiazdeGrenu@digi.com>
Date: Thu, 11 May 2017 17:38:39 +0200
Subject: [PATCH 0399/1958] rtc: mxc: remove unused variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This variable was never used. With GCC 6.2, we get the following warning:

drivers/rtc/rtc-mxc.c:44:18: warning: ‘PIE_BIT_DEF’ defined but not used [-Wunused-const-variable=]
 static const u32 PIE_BIT_DEF[MAX_PIE_NUM][2] = {

Signed-off-by: Diaz de Grenu, Jose <Jose.DiazdeGrenu@digi.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-mxc.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 77319122642ab..401f46d8f21b6 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -43,17 +43,6 @@
 
 #define MAX_PIE_NUM     9
 #define MAX_PIE_FREQ    512
-static const u32 PIE_BIT_DEF[MAX_PIE_NUM][2] = {
-	{ 2,		RTC_2HZ_BIT },
-	{ 4,		RTC_SAM0_BIT },
-	{ 8,		RTC_SAM1_BIT },
-	{ 16,		RTC_SAM2_BIT },
-	{ 32,		RTC_SAM3_BIT },
-	{ 64,		RTC_SAM4_BIT },
-	{ 128,		RTC_SAM5_BIT },
-	{ 256,		RTC_SAM6_BIT },
-	{ MAX_PIE_FREQ,	RTC_SAM7_BIT },
-};
 
 #define MXC_RTC_TIME	0
 #define MXC_RTC_ALARM	1
-- 
GitLab


From 39775431f82f890f4aaa08860a30883d081bffc7 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Sat, 24 Jun 2017 10:11:41 -0700
Subject: [PATCH 0400/1958] xfs: free uncommitted transactions during log
 recovery

Log recovery allocates in-core transaction and member item data
structures on-demand as it processes the on-disk log. Transactions
are allocated on first encounter on-disk and stored in a hash table
structure where they are easily accessible for subsequent lookups.
Transaction items are also allocated on demand and are attached to
the associated transactions.

When a commit record is encountered in the log, the transaction is
committed to the fs and the in-core structures are freed. If a
filesystem crashes or shuts down before all in-core log buffers are
flushed to the log, however, not all transactions may have commit
records in the log. As expected, the modifications in such an
incomplete transaction are not replayed to the fs. The in-core data
structures for the partial transaction are never freed, however,
resulting in a memory leak.

Update xlog_do_recovery_pass() to first correctly initialize the
hash table array so empty lists can be distinguished from populated
lists on function exit. Update xlog_recover_free_trans() to always
remove the transaction from the list prior to freeing the associated
memory. Finally, walk the hash table of transaction lists as the
last step before it goes out of scope and free any transactions that
may remain on the lists. This prevents a memory leak of partial
transactions in the log.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_recover.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e19b20c87f355..b6a40bd500992 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4152,7 +4152,7 @@ xlog_recover_commit_trans(
 
 	#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
 
-	hlist_del(&trans->r_list);
+	hlist_del_init(&trans->r_list);
 
 	error = xlog_recover_reorder_trans(log, trans, pass);
 	if (error)
@@ -4354,6 +4354,8 @@ xlog_recover_free_trans(
 	xlog_recover_item_t	*item, *n;
 	int			i;
 
+	hlist_del_init(&trans->r_list);
+
 	list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
 		/* Free the regions in the item. */
 		list_del(&item->ri_list);
@@ -5224,12 +5226,16 @@ xlog_do_recovery_pass(
 	int			error2 = 0;
 	int			bblks, split_bblks;
 	int			hblks, split_hblks, wrapped_hblks;
+	int			i;
 	struct hlist_head	rhash[XLOG_RHASH_SIZE];
 	LIST_HEAD		(buffer_list);
 
 	ASSERT(head_blk != tail_blk);
 	rhead_blk = 0;
 
+	for (i = 0; i < XLOG_RHASH_SIZE; i++)
+		INIT_HLIST_HEAD(&rhash[i]);
+
 	/*
 	 * Read the header of the tail block and get the iclog buffer size from
 	 * h_size.  Use this to tell how many sectors make up the log header.
@@ -5466,6 +5472,19 @@ xlog_do_recovery_pass(
 	if (error && first_bad)
 		*first_bad = rhead_blk;
 
+	/*
+	 * Transactions are freed at commit time but transactions without commit
+	 * records on disk are never committed. Free any that may be left in the
+	 * hash table.
+	 */
+	for (i = 0; i < XLOG_RHASH_SIZE; i++) {
+		struct hlist_node	*tmp;
+		struct xlog_recover	*trans;
+
+		hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list)
+			xlog_recover_free_trans(trans);
+	}
+
 	return error ? error : error2;
 }
 
-- 
GitLab


From 8768e7b3e3677fb51bf3ed5ceef174f40be4c287 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 16 Jun 2017 21:28:02 +0200
Subject: [PATCH 0401/1958] rtc: s3c: Jump to central exit point on getting src
 clock error

In other error paths in probe, centralized exit point was used so make
this consistent.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index d44fb34df8fe8..c5aa7a35d07fe 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -510,8 +510,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 			else
 				dev_dbg(&pdev->dev,
 					"probe deferred due to missing rtc src clk\n");
-			clk_disable_unprepare(info->rtc_clk);
-			return ret;
+			goto err_src_clk;
 		}
 		clk_prepare_enable(info->rtc_src_clk);
 	}
@@ -575,6 +574,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
 	if (info->data->needs_src_clk)
 		clk_disable_unprepare(info->rtc_src_clk);
+err_src_clk:
 	clk_disable_unprepare(info->rtc_clk);
 
 	return ret;
-- 
GitLab


From fc1afe60535eeb433648ebf80e23868cc6dd4fca Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 16 Jun 2017 21:28:03 +0200
Subject: [PATCH 0402/1958] rtc: s3c: Minor white-space cleanups

Minor cleanups to make the code easier to read. No functional changes.
1. Remove one space before labels as this is nowadays mostly preferred.
2. Fix indentation of arguments in function calls.
3. Split structure member declaration.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 47 +++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index c5aa7a35d07fe..2b503dab79578 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -49,7 +49,8 @@ struct s3c_rtc {
 	spinlock_t pie_lock;
 	spinlock_t alarm_clk_lock;
 
-	int ticnt_save, ticnt_en_save;
+	int ticnt_save;
+	int ticnt_en_save;
 	bool wake_en;
 };
 
@@ -169,7 +170,7 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 
 	s3c_rtc_enable_clk(info);
 
- retry_get_time:
+retry_get_time:
 	rtc_tm->tm_min  = readb(info->base + S3C2410_RTCMIN);
 	rtc_tm->tm_hour = readb(info->base + S3C2410_RTCHOUR);
 	rtc_tm->tm_mday = readb(info->base + S3C2410_RTCDATE);
@@ -199,8 +200,8 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_tm->tm_year += 100;
 
 	dev_dbg(dev, "read time %04d.%02d.%02d %02d:%02d:%02d\n",
-		 1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
-		 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
+		1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
+		rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
 	rtc_tm->tm_mon -= 1;
 
@@ -213,8 +214,8 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 	int year = tm->tm_year - 100;
 
 	dev_dbg(dev, "set time %04d.%02d.%02d %02d:%02d:%02d\n",
-		 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
-		 tm->tm_hour, tm->tm_min, tm->tm_sec);
+		1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
+		tm->tm_hour, tm->tm_min, tm->tm_sec);
 
 	/* we get around y2k by simply not supporting it */
 
@@ -259,9 +260,9 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	alrm->enabled = (alm_en & S3C2410_RTCALM_ALMEN) ? 1 : 0;
 
 	dev_dbg(dev, "read alarm %d, %04d.%02d.%02d %02d:%02d:%02d\n",
-		 alm_en,
-		 1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday,
-		 alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec);
+		alm_en,
+		1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday,
+		alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec);
 
 	/* decode the alarm enable field */
 	if (alm_en & S3C2410_RTCALM_SECEN)
@@ -295,9 +296,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	int year = tm->tm_year - 100;
 
 	dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
-		 alrm->enabled,
-		 1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
-		 tm->tm_hour, tm->tm_min, tm->tm_sec);
+		alrm->enabled,
+		1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
+		tm->tm_hour, tm->tm_min, tm->tm_sec);
 
 	s3c_rtc_enable_clk(info);
 
@@ -378,8 +379,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
 		dev_info(info->dev, "rtc disabled, re-enabling\n");
 
 		tmp = readw(info->base + S3C2410_RTCCON);
-		writew(tmp | S3C2410_RTCCON_RTCEN,
-			info->base + S3C2410_RTCCON);
+		writew(tmp | S3C2410_RTCCON_RTCEN, info->base + S3C2410_RTCCON);
 	}
 
 	if (con & S3C2410_RTCCON_CNTSEL) {
@@ -387,7 +387,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
 
 		tmp = readw(info->base + S3C2410_RTCCON);
 		writew(tmp & ~S3C2410_RTCCON_CNTSEL,
-			info->base + S3C2410_RTCCON);
+		       info->base + S3C2410_RTCCON);
 	}
 
 	if (con & S3C2410_RTCCON_CLKRST) {
@@ -395,7 +395,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
 
 		tmp = readw(info->base + S3C2410_RTCCON);
 		writew(tmp & ~S3C2410_RTCCON_CLKRST,
-			info->base + S3C2410_RTCCON);
+		       info->base + S3C2410_RTCCON);
 	}
 }
 
@@ -481,7 +481,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 	}
 
 	dev_dbg(&pdev->dev, "s3c2410_rtc: tick irq %d, alarm irq %d\n",
-		 info->irq_tick, info->irq_alarm);
+		info->irq_tick, info->irq_alarm);
 
 	/* get the memory region */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -520,7 +520,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 		info->data->enable(info);
 
 	dev_dbg(&pdev->dev, "s3c2410_rtc: RTCCON=%02x\n",
-		 readw(info->base + S3C2410_RTCCON));
+		readw(info->base + S3C2410_RTCCON));
 
 	device_init_wakeup(&pdev->dev, 1);
 
@@ -540,7 +540,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
 	/* register RTC and exit */
 	info->rtc = devm_rtc_device_register(&pdev->dev, "s3c", &s3c_rtcops,
-				  THIS_MODULE);
+					     THIS_MODULE);
 	if (IS_ERR(info->rtc)) {
 		dev_err(&pdev->dev, "cannot attach rtc\n");
 		ret = PTR_ERR(info->rtc);
@@ -548,14 +548,14 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 	}
 
 	ret = devm_request_irq(&pdev->dev, info->irq_alarm, s3c_rtc_alarmirq,
-			  0,  "s3c2410-rtc alarm", info);
+			       0, "s3c2410-rtc alarm", info);
 	if (ret) {
 		dev_err(&pdev->dev, "IRQ%d error %d\n", info->irq_alarm, ret);
 		goto err_nortc;
 	}
 
 	ret = devm_request_irq(&pdev->dev, info->irq_tick, s3c_rtc_tickirq,
-			  0,  "s3c2410-rtc tick", info);
+			       0, "s3c2410-rtc tick", info);
 	if (ret) {
 		dev_err(&pdev->dev, "IRQ%d error %d\n", info->irq_tick, ret);
 		goto err_nortc;
@@ -568,7 +568,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
 	return 0;
 
- err_nortc:
+err_nortc:
 	if (info->data->disable)
 		info->data->disable(info);
 
@@ -747,8 +747,7 @@ static void s3c6410_rtc_restore_tick_cnt(struct s3c_rtc *info)
 	writel(info->ticnt_save, info->base + S3C2410_TICNT);
 	if (info->ticnt_en_save) {
 		con = readw(info->base + S3C2410_RTCCON);
-		writew(con | info->ticnt_en_save,
-				info->base + S3C2410_RTCCON);
+		writew(con | info->ticnt_en_save, info->base + S3C2410_RTCCON);
 	}
 }
 
-- 
GitLab


From 21df6fed0db35cf4382734126bd206b6c750e17c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 16 Jun 2017 21:28:04 +0200
Subject: [PATCH 0403/1958] rtc: s3c: Drop unneeded cast to void pointer

There is no need for casting to void pointer for of_device_id data.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 2b503dab79578..bfc8660ff1e74 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -801,19 +801,19 @@ static struct s3c_rtc_data const s3c6410_rtc_data = {
 static const struct of_device_id s3c_rtc_dt_match[] = {
 	{
 		.compatible = "samsung,s3c2410-rtc",
-		.data = (void *)&s3c2410_rtc_data,
+		.data = &s3c2410_rtc_data,
 	}, {
 		.compatible = "samsung,s3c2416-rtc",
-		.data = (void *)&s3c2416_rtc_data,
+		.data = &s3c2416_rtc_data,
 	}, {
 		.compatible = "samsung,s3c2443-rtc",
-		.data = (void *)&s3c2443_rtc_data,
+		.data = &s3c2443_rtc_data,
 	}, {
 		.compatible = "samsung,s3c6410-rtc",
-		.data = (void *)&s3c6410_rtc_data,
+		.data = &s3c6410_rtc_data,
 	}, {
 		.compatible = "samsung,exynos3250-rtc",
-		.data = (void *)&s3c6410_rtc_data,
+		.data = &s3c6410_rtc_data,
 	},
 	{ /* sentinel */ },
 };
-- 
GitLab


From 6b72086d8594febb9e9d63cfb29d950da17e8f06 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 16 Jun 2017 21:28:05 +0200
Subject: [PATCH 0404/1958] rtc: s3c: Do not remove const from rodata memory

All instances of struct s3c_rtc_data are in fact static const thus
put in rodata so we should not drop the const while getting the pointer
to them.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index bfc8660ff1e74..c666b95fb8d7f 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -41,7 +41,7 @@ struct s3c_rtc {
 	struct clk *rtc_src_clk;
 	bool clk_disabled;
 
-	struct s3c_rtc_data *data;
+	const struct s3c_rtc_data *data;
 
 	int irq_alarm;
 	int irq_tick;
@@ -437,12 +437,12 @@ static int s3c_rtc_remove(struct platform_device *pdev)
 
 static const struct of_device_id s3c_rtc_dt_match[];
 
-static struct s3c_rtc_data *s3c_rtc_get_data(struct platform_device *pdev)
+static const struct s3c_rtc_data *s3c_rtc_get_data(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
 
 	match = of_match_node(s3c_rtc_dt_match, pdev->dev.of_node);
-	return (struct s3c_rtc_data *)match->data;
+	return match->data;
 }
 
 static int s3c_rtc_probe(struct platform_device *pdev)
-- 
GitLab


From 9903f68af6ca9d9b25f5c50bfedd2e0cb899f848 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 16 Jun 2017 21:28:06 +0200
Subject: [PATCH 0405/1958] rtc: s3c: Handle clock prepare failures in probe

clk_prepare_enable() can fail so handle such case.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index c666b95fb8d7f..0cb2f27a30b41 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -498,7 +498,9 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 			dev_dbg(&pdev->dev, "probe deferred due to missing rtc clk\n");
 		return ret;
 	}
-	clk_prepare_enable(info->rtc_clk);
+	ret = clk_prepare_enable(info->rtc_clk);
+	if (ret)
+		return ret;
 
 	if (info->data->needs_src_clk) {
 		info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src");
@@ -512,7 +514,9 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 					"probe deferred due to missing rtc src clk\n");
 			goto err_src_clk;
 		}
-		clk_prepare_enable(info->rtc_src_clk);
+		ret = clk_prepare_enable(info->rtc_src_clk);
+		if (ret)
+			goto err_src_clk;
 	}
 
 	/* check to see if everything is setup correctly */
-- 
GitLab


From 498bcf313985c76c4572d8ce885a6480728d28f9 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 16 Jun 2017 21:28:07 +0200
Subject: [PATCH 0406/1958] rtc: s3c: Handle clock enable failures

clk_enable() can fail so handle such case.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-s3c.c | 72 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 0cb2f27a30b41..a8992c227f611 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -68,18 +68,32 @@ struct s3c_rtc_data {
 	void (*disable) (struct s3c_rtc *info);
 };
 
-static void s3c_rtc_enable_clk(struct s3c_rtc *info)
+static int s3c_rtc_enable_clk(struct s3c_rtc *info)
 {
 	unsigned long irq_flags;
+	int ret = 0;
 
 	spin_lock_irqsave(&info->alarm_clk_lock, irq_flags);
+
 	if (info->clk_disabled) {
-		clk_enable(info->rtc_clk);
-		if (info->data->needs_src_clk)
-			clk_enable(info->rtc_src_clk);
+		ret = clk_enable(info->rtc_clk);
+		if (ret)
+			goto out;
+
+		if (info->data->needs_src_clk) {
+			ret = clk_enable(info->rtc_src_clk);
+			if (ret) {
+				clk_disable(info->rtc_clk);
+				goto out;
+			}
+		}
 		info->clk_disabled = false;
 	}
+
+out:
 	spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags);
+
+	return ret;
 }
 
 static void s3c_rtc_disable_clk(struct s3c_rtc *info)
@@ -122,10 +136,13 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	unsigned int tmp;
+	int ret;
 
 	dev_dbg(info->dev, "%s: aie=%d\n", __func__, enabled);
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	tmp = readb(info->base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
 
@@ -136,10 +153,13 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
 
 	s3c_rtc_disable_clk(info);
 
-	if (enabled)
-		s3c_rtc_enable_clk(info);
-	else
+	if (enabled) {
+		ret = s3c_rtc_enable_clk(info);
+		if (ret)
+			return ret;
+	} else {
 		s3c_rtc_disable_clk(info);
+	}
 
 	return 0;
 }
@@ -147,10 +167,14 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
 /* Set RTC frequency */
 static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq)
 {
+	int ret;
+
 	if (!is_power_of_2(freq))
 		return -EINVAL;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 	spin_lock_irq(&info->pie_lock);
 
 	if (info->data->set_freq)
@@ -167,8 +191,11 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	unsigned int have_retried = 0;
+	int ret;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 retry_get_time:
 	rtc_tm->tm_min  = readb(info->base + S3C2410_RTCMIN);
@@ -212,6 +239,7 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	int year = tm->tm_year - 100;
+	int ret;
 
 	dev_dbg(dev, "set time %04d.%02d.%02d %02d:%02d:%02d\n",
 		1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
@@ -224,7 +252,9 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 		return -EINVAL;
 	}
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	writeb(bin2bcd(tm->tm_sec),  info->base + S3C2410_RTCSEC);
 	writeb(bin2bcd(tm->tm_min),  info->base + S3C2410_RTCMIN);
@@ -243,8 +273,11 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	struct rtc_time *alm_tm = &alrm->time;
 	unsigned int alm_en;
+	int ret;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	alm_tm->tm_sec  = readb(info->base + S3C2410_ALMSEC);
 	alm_tm->tm_min  = readb(info->base + S3C2410_ALMMIN);
@@ -293,6 +326,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	struct rtc_time *tm = &alrm->time;
 	unsigned int alrm_en;
+	int ret;
 	int year = tm->tm_year - 100;
 
 	dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
@@ -300,7 +334,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 		1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
 		tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	alrm_en = readb(info->base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
 	writeb(0x00, info->base + S3C2410_RTCALM);
@@ -349,8 +385,11 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
+	int ret;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	if (info->data->enable_tick)
 		info->data->enable_tick(info, seq);
@@ -589,8 +628,11 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 static int s3c_rtc_suspend(struct device *dev)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
+	int ret;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	/* save TICNT for anyone using periodic interrupts */
 	if (info->data->save_tick_cnt)
-- 
GitLab


From ee0981be7704589635de23cc32556399c86653a8 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 18 Jun 2017 22:55:28 +0200
Subject: [PATCH 0407/1958] rtc: ds1307: Add support for Epson RX8130CE

Add support for yet another RTC chip, Epson RX8130CE. This time around,
the chip has slightly permutated registers and also the register starts
at 0x10 instead of 0x0 .

So far, we only support the RTC and NVRAM parts of the chip, Alarm and
Timer is not supported.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Nishanth Menon <nm@ti.com>
Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 175 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 175 insertions(+)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 16ace2fb0c0a8..3cbb7636b405c 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -44,6 +44,7 @@ enum ds_type {
 	m41t00,
 	mcp794xx,
 	rx_8025,
+	rx_8130,
 	last_ds_type /* always last */
 	/* rs5c372 too?  different address... */
 };
@@ -169,6 +170,12 @@ static struct chip_desc chips[last_ds_type] = {
 	[ds_3231] = {
 		.alarm		= 1,
 	},
+	[rx_8130] = {
+		.alarm		= 1,
+		/* this is battery backed SRAM */
+		.nvram_offset	= 0x20,
+		.nvram_size	= 4,	/* 32bit (4 word x 8 bit) */
+	},
 	[mcp794xx] = {
 		.alarm		= 1,
 		/* this is battery backed SRAM */
@@ -192,6 +199,7 @@ static const struct i2c_device_id ds1307_id[] = {
 	{ "pt7c4338", ds_1307 },
 	{ "rx8025", rx_8025 },
 	{ "isl12057", ds_1337 },
+	{ "rx8130", rx_8130 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, ds1307_id);
@@ -586,6 +594,165 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
 
 /*----------------------------------------------------------------------*/
 
+/*
+ * Alarm support for rx8130 devices.
+ */
+
+#define RX8130_REG_ALARM_MIN		0x07
+#define RX8130_REG_ALARM_HOUR		0x08
+#define RX8130_REG_ALARM_WEEK_OR_DAY	0x09
+#define RX8130_REG_EXTENSION		0x0c
+#define RX8130_REG_EXTENSION_WADA	(1 << 3)
+#define RX8130_REG_FLAG			0x0d
+#define RX8130_REG_FLAG_AF		(1 << 3)
+#define RX8130_REG_CONTROL0		0x0e
+#define RX8130_REG_CONTROL0_AIE		(1 << 3)
+
+static irqreturn_t rx8130_irq(int irq, void *dev_id)
+{
+	struct ds1307           *ds1307 = dev_id;
+	struct mutex            *lock = &ds1307->rtc->ops_lock;
+	u8 ctl[3];
+	int ret;
+
+	mutex_lock(lock);
+
+	/* Read control registers. */
+	ret = regmap_bulk_read(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		goto out;
+	if (!(ctl[1] & RX8130_REG_FLAG_AF))
+		goto out;
+	ctl[1] &= ~RX8130_REG_FLAG_AF;
+	ctl[2] &= ~RX8130_REG_CONTROL0_AIE;
+
+	ret = regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		goto out;
+
+	rtc_update_irq(ds1307->rtc, 1, RTC_AF | RTC_IRQF);
+
+out:
+	mutex_unlock(lock);
+
+	return IRQ_HANDLED;
+}
+
+static int rx8130_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
+	u8 ald[3], ctl[3];
+	int ret;
+
+	if (!test_bit(HAS_ALARM, &ds1307->flags))
+		return -EINVAL;
+
+	/* Read alarm registers. */
+	ret = regmap_bulk_read(ds1307->regmap, RX8130_REG_ALARM_MIN, ald, 3);
+	if (ret < 0)
+		return ret;
+
+	/* Read control registers. */
+	ret = regmap_bulk_read(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		return ret;
+
+	t->enabled = !!(ctl[2] & RX8130_REG_CONTROL0_AIE);
+	t->pending = !!(ctl[1] & RX8130_REG_FLAG_AF);
+
+	/* Report alarm 0 time assuming 24-hour and day-of-month modes. */
+	t->time.tm_sec = -1;
+	t->time.tm_min = bcd2bin(ald[0] & 0x7f);
+	t->time.tm_hour = bcd2bin(ald[1] & 0x7f);
+	t->time.tm_wday = -1;
+	t->time.tm_mday = bcd2bin(ald[2] & 0x7f);
+	t->time.tm_mon = -1;
+	t->time.tm_year = -1;
+	t->time.tm_yday = -1;
+	t->time.tm_isdst = -1;
+
+	dev_dbg(dev, "%s, sec=%d min=%d hour=%d wday=%d mday=%d mon=%d enabled=%d\n",
+		__func__, t->time.tm_sec, t->time.tm_min, t->time.tm_hour,
+		t->time.tm_wday, t->time.tm_mday, t->time.tm_mon, t->enabled);
+
+	return 0;
+}
+
+static int rx8130_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
+	u8 ald[3], ctl[3];
+	int ret;
+
+	if (!test_bit(HAS_ALARM, &ds1307->flags))
+		return -EINVAL;
+
+	dev_dbg(dev, "%s, sec=%d min=%d hour=%d wday=%d mday=%d mon=%d "
+		"enabled=%d pending=%d\n", __func__,
+		t->time.tm_sec, t->time.tm_min, t->time.tm_hour,
+		t->time.tm_wday, t->time.tm_mday, t->time.tm_mon,
+		t->enabled, t->pending);
+
+	/* Read control registers. */
+	ret = regmap_bulk_read(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		return ret;
+
+	ctl[0] &= ~RX8130_REG_EXTENSION_WADA;
+	ctl[1] |= RX8130_REG_FLAG_AF;
+	ctl[2] &= ~RX8130_REG_CONTROL0_AIE;
+
+	ret = regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		return ret;
+
+	/* Hardware alarm precision is 1 minute! */
+	ald[0] = bin2bcd(t->time.tm_min);
+	ald[1] = bin2bcd(t->time.tm_hour);
+	ald[2] = bin2bcd(t->time.tm_mday);
+
+	ret = regmap_bulk_write(ds1307->regmap, RX8130_REG_ALARM_MIN, ald, 3);
+	if (ret < 0)
+		return ret;
+
+	if (!t->enabled)
+		return 0;
+
+	ctl[2] |= RX8130_REG_CONTROL0_AIE;
+
+	return regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+}
+
+static int rx8130_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
+	int ret, reg;
+
+	if (!test_bit(HAS_ALARM, &ds1307->flags))
+		return -EINVAL;
+
+	ret = regmap_read(ds1307->regmap, RX8130_REG_CONTROL0, &reg);
+	if (ret < 0)
+		return ret;
+
+	if (enabled)
+		reg |= RX8130_REG_CONTROL0_AIE;
+	else
+		reg &= ~RX8130_REG_CONTROL0_AIE;
+
+	return regmap_write(ds1307->regmap, RX8130_REG_CONTROL0, reg);
+}
+
+static const struct rtc_class_ops rx8130_rtc_ops = {
+	.read_time	= ds1307_get_time,
+	.set_time	= ds1307_set_time,
+	.read_alarm	= rx8130_read_alarm,
+	.set_alarm	= rx8130_set_alarm,
+	.alarm_irq_enable = rx8130_alarm_irq_enable,
+};
+
+/*----------------------------------------------------------------------*/
+
 /*
  * Alarm support for mcp794xx devices.
  */
@@ -1402,6 +1569,14 @@ static int ds1307_probe(struct i2c_client *client,
 				     DS1307_REG_HOUR << 4 | 0x08, hour);
 		}
 		break;
+	case rx_8130:
+		ds1307->offset = 0x10; /* Seconds starts at 0x10 */
+		rtc_ops = &rx8130_rtc_ops;
+		if (chip->alarm && ds1307->irq > 0) {
+			irq_handler = rx8130_irq;
+			want_irq = true;
+		}
+		break;
 	case ds_1388:
 		ds1307->offset = 1; /* Seconds starts at 1 */
 		break;
-- 
GitLab


From 188986c70e09f0f3cd88e6fe14c89e439474e3ec Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Fri, 23 Jun 2017 15:12:24 +0800
Subject: [PATCH 0408/1958] mtd: nand: mtk: fix incorrect register setting
 order about ecc irq

Currently, we trigger ECC HW before setting ecc irq. It is incorrect.
Because ECC starts working once the register ECC_CTL_REG is set as
ECC_OP_ENABLE. And this may lead an abnormal behavior of ecc irq.
So, should enable ecc irq at first, then trigger ECC.

Fixes: 1d6b1e464950 ("mtd: mediatek: driver for MTK Smart Device")
Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_ecc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index 4958121cb8275..a855a4e5cc355 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c
@@ -276,8 +276,6 @@ int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 	if (ret)
 		return ret;
 
-	writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op));
-
 	init_completion(&ecc->done);
 	reg_val = ECC_IRQ_EN;
 	/*
@@ -289,6 +287,8 @@ int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 		reg_val |= ECC_PG_IRQ_SEL;
 	writew(reg_val, ecc->regs + ECC_IRQ_REG(op));
 
+	writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op));
+
 	return 0;
 }
 EXPORT_SYMBOL(mtk_ecc_enable);
-- 
GitLab


From 88404312556c10e4bcd1aeeb75b1b7e9e3226160 Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Fri, 23 Jun 2017 15:12:25 +0800
Subject: [PATCH 0409/1958] mtd: nand: mtk: disable ecc irq when writing page
 with hwecc

Currently, ecc encode irq is enabled when writing page with hwecc, but
we actually do not wait for this irq done. Because NFI and ECC work in
parallel, nfi irq and ecc irq almost come together.

Now, there are two steps to check whether page data are totally written.
First, wait for nfi irq INTR_AHB_DONE. This is to ensure all data
in RAM are received by NFI.
Second, polling the register NFI_ADDRCNTR till all data include ecc
parity data runtime generated by ECC are sent to NAND device.

So, it is redunant to enable ecc irq without waiting for it.

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_ecc.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index a855a4e5cc355..00ce22e0eaf0e 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c
@@ -276,16 +276,18 @@ int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 	if (ret)
 		return ret;
 
-	init_completion(&ecc->done);
-	reg_val = ECC_IRQ_EN;
-	/*
-	 * For ECC_NFI_MODE, if ecc->caps->pg_irq_sel is 1, then it
-	 * means this chip can only generate one ecc irq during page
-	 * read / write. If is 0, generate one ecc irq each ecc step.
-	 */
-	if ((ecc->caps->pg_irq_sel) && (config->mode == ECC_NFI_MODE))
-		reg_val |= ECC_PG_IRQ_SEL;
-	writew(reg_val, ecc->regs + ECC_IRQ_REG(op));
+	if (config->mode != ECC_NFI_MODE || op != ECC_ENCODE) {
+		init_completion(&ecc->done);
+		reg_val = ECC_IRQ_EN;
+		/*
+		 * For ECC_NFI_MODE, if ecc->caps->pg_irq_sel is 1, then it
+		 * means this chip can only generate one ecc irq during page
+		 * read / write. If is 0, generate one ecc irq each ecc step.
+		 */
+		if (ecc->caps->pg_irq_sel && config->mode == ECC_NFI_MODE)
+			reg_val |= ECC_PG_IRQ_SEL;
+		writew(reg_val, ecc->regs + ECC_IRQ_REG(op));
+	}
 
 	writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op));
 
-- 
GitLab


From 777a8d92dfca26e63b0fe7ea9889618291512f1d Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Fri, 23 Jun 2017 15:12:26 +0800
Subject: [PATCH 0410/1958] mtd: nand: mtk: remove unneeded mtk_nfc_hw_init
 from mtk_nfc_resume

chip->select_chip will do nfc runtime configuration. There is no need to
do mtk_nfc_hw_init before it.

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_nand.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c
index 4d9ee278b9a07..d51c58e618125 100644
--- a/drivers/mtd/nand/mtk_nand.c
+++ b/drivers/mtd/nand/mtk_nand.c
@@ -1490,8 +1490,6 @@ static int mtk_nfc_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	mtk_nfc_hw_init(nfc);
-
 	/* reset NAND chip if VCC was powered off */
 	list_for_each_entry(chip, &nfc->chips, node) {
 		nand = &chip->nand;
-- 
GitLab


From c4ec13543e322d4cc68eb6018f5a8a8858d0053a Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Fri, 23 Jun 2017 15:12:27 +0800
Subject: [PATCH 0411/1958] mtd: nand: mtk: remove unneeded mtk_ecc_hw_init
 from mtk_ecc_resume

There is no need to add mtk_ecc_hw_init during ecc resume, because there
always takes mtk_ecc_wait_idle in the function mtk_ecc_enable.

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_ecc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index 00ce22e0eaf0e..f38e2bb1953e9 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c
@@ -507,8 +507,6 @@ static int mtk_ecc_resume(struct device *dev)
 		return ret;
 	}
 
-	mtk_ecc_hw_init(ecc);
-
 	return 0;
 }
 
-- 
GitLab


From edfee3619c4955be38d414f25bd4f6c5c1b029ba Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Fri, 23 Jun 2017 15:12:28 +0800
Subject: [PATCH 0412/1958] mtd: nand: mtk: add ->setup_data_interface() hook

Currently, we use the fixed ACC timing 0x10804211. This is not the best
setting for each case. Actually, MTK NAND controller can adapt ACC timings
dynamically according to nfi clock frequence.
Implement the ->setup_data_interface() hook to optimize driver performance.

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_nand.c | 90 ++++++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 15 deletions(-)

diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c
index d51c58e618125..f7ae994643751 100644
--- a/drivers/mtd/nand/mtk_nand.c
+++ b/drivers/mtd/nand/mtk_nand.c
@@ -100,11 +100,15 @@
 #define MTK_MAX_SECTOR		(16)
 #define MTK_NAND_MAX_NSELS	(2)
 #define MTK_NFC_MIN_SPARE	(16)
+#define ACCTIMING(tpoecs, tprecs, tc2r, tw2r, twh, twst, trlt) \
+	((tpoecs) << 28 | (tprecs) << 22 | (tc2r) << 16 | \
+	(tw2r) << 12 | (twh) << 8 | (twst) << 4 | (trlt))
 
 struct mtk_nfc_caps {
 	const u8 *spare_size;
 	u8 num_spare_size;
 	u8 pageformat_spare_shift;
+	u8 nfi_clk_div;
 };
 
 struct mtk_nfc_bad_mark_ctl {
@@ -495,6 +499,74 @@ static void mtk_nfc_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
 		mtk_nfc_write_byte(mtd, buf[i]);
 }
 
+static int mtk_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
+{
+	struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
+	const struct nand_sdr_timings *timings;
+	u32 rate, tpoecs, tprecs, tc2r, tw2r, twh, twst, trlt;
+
+	timings = nand_get_sdr_timings(conf);
+	if (IS_ERR(timings))
+		return -ENOTSUPP;
+
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	rate = clk_get_rate(nfc->clk.nfi_clk);
+	/* There is a frequency divider in some IPs */
+	rate /= nfc->caps->nfi_clk_div;
+
+	/* turn clock rate into KHZ */
+	rate /= 1000;
+
+	tpoecs = max(timings->tALH_min, timings->tCLH_min) / 1000;
+	tpoecs = DIV_ROUND_UP(tpoecs * rate, 1000000);
+	tpoecs &= 0xf;
+
+	tprecs = max(timings->tCLS_min, timings->tALS_min) / 1000;
+	tprecs = DIV_ROUND_UP(tprecs * rate, 1000000);
+	tprecs &= 0x3f;
+
+	/* sdr interface has no tCR which means CE# low to RE# low */
+	tc2r = 0;
+
+	tw2r = timings->tWHR_min / 1000;
+	tw2r = DIV_ROUND_UP(tw2r * rate, 1000000);
+	tw2r = DIV_ROUND_UP(tw2r - 1, 2);
+	tw2r &= 0xf;
+
+	twh = max(timings->tREH_min, timings->tWH_min) / 1000;
+	twh = DIV_ROUND_UP(twh * rate, 1000000) - 1;
+	twh &= 0xf;
+
+	twst = timings->tWP_min / 1000;
+	twst = DIV_ROUND_UP(twst * rate, 1000000) - 1;
+	twst &= 0xf;
+
+	trlt = max(timings->tREA_max, timings->tRP_min) / 1000;
+	trlt = DIV_ROUND_UP(trlt * rate, 1000000) - 1;
+	trlt &= 0xf;
+
+	/*
+	 * ACCON: access timing control register
+	 * -------------------------------------
+	 * 31:28: tpoecs, minimum required time for CS post pulling down after
+	 *        accessing the device
+	 * 27:22: tprecs, minimum required time for CS pre pulling down before
+	 *        accessing the device
+	 * 21:16: tc2r, minimum required time from NCEB low to NREB low
+	 * 15:12: tw2r, minimum required time from NWEB high to NREB low.
+	 * 11:08: twh, write enable hold time
+	 * 07:04: twst, write wait states
+	 * 03:00: trlt, read wait states
+	 */
+	trlt = ACCTIMING(tpoecs, tprecs, tc2r, tw2r, twh, twst, trlt);
+	nfi_writel(nfc, trlt, NFI_ACCCON);
+
+	return 0;
+}
+
 static int mtk_nfc_sector_encode(struct nand_chip *chip, u8 *data)
 {
 	struct mtk_nfc *nfc = nand_get_controller_data(chip);
@@ -951,21 +1023,6 @@ static int mtk_nfc_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip,
 
 static inline void mtk_nfc_hw_init(struct mtk_nfc *nfc)
 {
-	/*
-	 * ACCON: access timing control register
-	 * -------------------------------------
-	 * 31:28: minimum required time for CS post pulling down after accessing
-	 *	the device
-	 * 27:22: minimum required time for CS pre pulling down before accessing
-	 *	the device
-	 * 21:16: minimum required time from NCEB low to NREB low
-	 * 15:12: minimum required time from NWEB high to NREB low.
-	 * 11:08: write enable hold time
-	 * 07:04: write wait states
-	 * 03:00: read wait states
-	 */
-	nfi_writel(nfc, 0x10804211, NFI_ACCCON);
-
 	/*
 	 * CNRNB: nand ready/busy register
 	 * -------------------------------
@@ -1240,6 +1297,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 	nand->read_byte = mtk_nfc_read_byte;
 	nand->read_buf = mtk_nfc_read_buf;
 	nand->cmd_ctrl = mtk_nfc_cmd_ctrl;
+	nand->setup_data_interface = mtk_nfc_setup_data_interface;
 
 	/* set default mode in case dt entry is missing */
 	nand->ecc.mode = NAND_ECC_HW;
@@ -1330,12 +1388,14 @@ static const struct mtk_nfc_caps mtk_nfc_caps_mt2701 = {
 	.spare_size = spare_size_mt2701,
 	.num_spare_size = 16,
 	.pageformat_spare_shift = 4,
+	.nfi_clk_div = 1,
 };
 
 static const struct mtk_nfc_caps mtk_nfc_caps_mt2712 = {
 	.spare_size = spare_size_mt2712,
 	.num_spare_size = 19,
 	.pageformat_spare_shift = 16,
+	.nfi_clk_div = 2,
 };
 
 static const struct of_device_id mtk_nfc_id_table[] = {
-- 
GitLab


From dbd2667a4fb9ce4f547982b07cd69dda127c47ea Mon Sep 17 00:00:00 2001
From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Thu, 22 Jun 2017 17:15:08 -0700
Subject: [PATCH 0413/1958] sparc64: Fix gup_huge_pmd

The function assumes that each PMD points to head of a
huge page. This is not correct as a PMD can point to
start of any 8M region with a, say 256M, hugepage. The
fix ensures that it points to the correct head of any PMD
huge page.

Cc: Julian Calaby <julian.calaby@gmail.com>
Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/gup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index cd0e32bbcb1de..f80cfc64c55ba 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -78,8 +78,8 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		return 0;
 
 	refs = 0;
-	head = pmd_page(pmd);
-	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+	page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+	head = compound_head(page);
 	do {
 		VM_BUG_ON(compound_head(page) != head);
 		pages[*nr] = page;
-- 
GitLab


From 354b6382847d1d95b576d0c851df7a5743b8a2c2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 8 Jun 2017 18:10:29 +0200
Subject: [PATCH 0414/1958] m68k: defconfig: Cleanup from old Kconfig options

Remove old, dead Kconfig option INET_LRO. It is gone since
commit 7bbf3cae65b6 ("ipv4: Remove inet_lro library").

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Greg Ungerer <gerg@linux-m68k.org>
---
 arch/m68k/configs/m5208evb_defconfig | 1 -
 arch/m68k/configs/m5249evb_defconfig | 1 -
 arch/m68k/configs/m5272c3_defconfig  | 1 -
 arch/m68k/configs/m5275evb_defconfig | 1 -
 arch/m68k/configs/m5307c3_defconfig  | 1 -
 arch/m68k/configs/m5407c3_defconfig  | 1 -
 6 files changed, 6 deletions(-)

diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig
index 4c7b7938d53af..a3102ff7e5eda 100644
--- a/arch/m68k/configs/m5208evb_defconfig
+++ b/arch/m68k/configs/m5208evb_defconfig
@@ -26,7 +26,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig
index a782f368650fe..f7bb9ed3efa84 100644
--- a/arch/m68k/configs/m5249evb_defconfig
+++ b/arch/m68k/configs/m5249evb_defconfig
@@ -27,7 +27,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig
index 6f5fb92f5cbf2..1e679f6a400f3 100644
--- a/arch/m68k/configs/m5272c3_defconfig
+++ b/arch/m68k/configs/m5272c3_defconfig
@@ -27,7 +27,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig
index b5d7cd1ce8562..d2987b40423e1 100644
--- a/arch/m68k/configs/m5275evb_defconfig
+++ b/arch/m68k/configs/m5275evb_defconfig
@@ -27,7 +27,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig
index 1b4c09461c409..97a78c99eeee0 100644
--- a/arch/m68k/configs/m5307c3_defconfig
+++ b/arch/m68k/configs/m5307c3_defconfig
@@ -27,7 +27,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig
index 275ad543d4bcb..766a97f39a3a2 100644
--- a/arch/m68k/configs/m5407c3_defconfig
+++ b/arch/m68k/configs/m5407c3_defconfig
@@ -28,7 +28,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
-- 
GitLab


From 62d02fd1f807bf5a259a242c483c9fb98a242630 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Mon, 26 Jun 2017 15:20:49 +0800
Subject: [PATCH 0415/1958] drm/i915/gvt: Fix possible recursive locking issue

vfio_unpin_pages will hold a read semaphore however it is already hold
in the same thread by vfio ioctl. It will cause below warning:

[ 5102.127454] ============================================
[ 5102.133379] WARNING: possible recursive locking detected
[ 5102.139304] 4.12.0-rc4+ #3 Not tainted
[ 5102.143483] --------------------------------------------
[ 5102.149407] qemu-system-x86/1620 is trying to acquire lock:
[ 5102.155624]  (&container->group_lock){++++++}, at: [<ffffffff817768c6>] vfio_unpin_pages+0x96/0xf0
[ 5102.165626]
but task is already holding lock:
[ 5102.172134]  (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280
[ 5102.182522]
other info that might help us debug this:
[ 5102.189806]  Possible unsafe locking scenario:

[ 5102.196411]        CPU0
[ 5102.199136]        ----
[ 5102.201861]   lock(&container->group_lock);
[ 5102.206527]   lock(&container->group_lock);
[ 5102.211191]
*** DEADLOCK ***

[ 5102.217796]  May be due to missing lock nesting notation

[ 5102.225370] 3 locks held by qemu-system-x86/1620:
[ 5102.230618]  #0:  (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280
[ 5102.241482]  #1:  (&(&iommu->notifier)->rwsem){++++..}, at: [<ffffffff810de775>] __blocking_notifier_call_chain+0x35/0x70
[ 5102.253713]  #2:  (&vgpu->vdev.cache_lock){+.+...}, at: [<ffffffff8157b007>] intel_vgpu_iommu_notifier+0x77/0x120
[ 5102.265163]
stack backtrace:
[ 5102.270022] CPU: 5 PID: 1620 Comm: qemu-system-x86 Not tainted 4.12.0-rc4+ #3
[ 5102.277991] Hardware name: Intel Corporation S1200RP/S1200RP, BIOS S1200RP.86B.03.01.APER.061220151418 06/12/2015
[ 5102.289445] Call Trace:
[ 5102.292175]  dump_stack+0x85/0xc7
[ 5102.295871]  validate_chain.isra.21+0x9da/0xaf0
[ 5102.300925]  __lock_acquire+0x405/0x820
[ 5102.305202]  lock_acquire+0xc7/0x220
[ 5102.309191]  ? vfio_unpin_pages+0x96/0xf0
[ 5102.313666]  down_read+0x2b/0x50
[ 5102.317259]  ? vfio_unpin_pages+0x96/0xf0
[ 5102.321732]  vfio_unpin_pages+0x96/0xf0
[ 5102.326024]  intel_vgpu_iommu_notifier+0xe5/0x120
[ 5102.331283]  notifier_call_chain+0x4a/0x70
[ 5102.335851]  __blocking_notifier_call_chain+0x4d/0x70
[ 5102.341490]  blocking_notifier_call_chain+0x16/0x20
[ 5102.346935]  vfio_iommu_type1_ioctl+0x87b/0x920
[ 5102.351994]  vfio_fops_unl_ioctl+0x81/0x280
[ 5102.356660]  ? __fget+0xf0/0x210
[ 5102.360261]  do_vfs_ioctl+0x93/0x6a0
[ 5102.364247]  ? __fget+0x111/0x210
[ 5102.367942]  SyS_ioctl+0x41/0x70
[ 5102.371542]  entry_SYSCALL_64_fastpath+0x1f/0xbe

put the vfio_unpin_pages in a workqueue can fix this.

v2:
- use for style instead of do{}while(1). (Zhenyu)
v3:
- rename gvt_cache_mark to gvt_cache_mark_remove. (Zhenyu)

Fixes: 659643f7d814 ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT")
Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h   |  3 ++
 drivers/gpu/drm/i915/gvt/kvmgt.c | 55 ++++++++++++++++++++++++++------
 2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 930732e5c7806..8b5876d636249 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -183,6 +183,9 @@ struct intel_vgpu {
 		struct kvm *kvm;
 		struct work_struct release_work;
 		atomic_t released;
+		struct work_struct unpin_work;
+		spinlock_t unpin_lock; /* To protect unpin_list */
+		struct list_head unpin_list;
 	} vdev;
 #endif
 };
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 1ae0b4083ce10..56a8d6a7d9c67 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -78,6 +78,7 @@ struct gvt_dma {
 	struct rb_node node;
 	gfn_t gfn;
 	unsigned long iova;
+	struct list_head list;
 };
 
 static inline bool handle_valid(unsigned long handle)
@@ -166,6 +167,7 @@ static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 
 	new->gfn = gfn;
 	new->iova = iova;
+	INIT_LIST_HEAD(&new->list);
 
 	mutex_lock(&vgpu->vdev.cache_lock);
 	while (*link) {
@@ -197,26 +199,52 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
 	kfree(entry);
 }
 
-static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
+static void intel_vgpu_unpin_work(struct work_struct *work)
 {
+	struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
+					       vdev.unpin_work);
 	struct device *dev = mdev_dev(vgpu->vdev.mdev);
 	struct gvt_dma *this;
-	unsigned long g1;
-	int rc;
+	unsigned long gfn;
+
+	for (;;) {
+		spin_lock(&vgpu->vdev.unpin_lock);
+		if (list_empty(&vgpu->vdev.unpin_list)) {
+			spin_unlock(&vgpu->vdev.unpin_lock);
+			break;
+		}
+		this = list_first_entry(&vgpu->vdev.unpin_list,
+					struct gvt_dma, list);
+		list_del(&this->list);
+		spin_unlock(&vgpu->vdev.unpin_lock);
+
+		gfn = this->gfn;
+		vfio_unpin_pages(dev, &gfn, 1);
+		kfree(this);
+	}
+}
+
+static bool gvt_cache_mark_remove(struct intel_vgpu *vgpu, gfn_t gfn)
+{
+	struct gvt_dma *this;
 
 	mutex_lock(&vgpu->vdev.cache_lock);
 	this  = __gvt_cache_find(vgpu, gfn);
 	if (!this) {
 		mutex_unlock(&vgpu->vdev.cache_lock);
-		return;
+		return false;
 	}
-
-	g1 = gfn;
 	gvt_dma_unmap_iova(vgpu, this->iova);
-	rc = vfio_unpin_pages(dev, &g1, 1);
-	WARN_ON(rc != 1);
-	__gvt_cache_remove_entry(vgpu, this);
+	/* remove this from rb tree */
+	rb_erase(&this->node, &vgpu->vdev.cache);
 	mutex_unlock(&vgpu->vdev.cache_lock);
+
+	/* put this to the unpin_list */
+	spin_lock(&vgpu->vdev.unpin_lock);
+	list_move_tail(&this->list, &vgpu->vdev.unpin_list);
+	spin_unlock(&vgpu->vdev.unpin_lock);
+
+	return true;
 }
 
 static void gvt_cache_init(struct intel_vgpu *vgpu)
@@ -453,6 +481,9 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 	}
 
 	INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
+	INIT_WORK(&vgpu->vdev.unpin_work, intel_vgpu_unpin_work);
+	spin_lock_init(&vgpu->vdev.unpin_lock);
+	INIT_LIST_HEAD(&vgpu->vdev.unpin_list);
 
 	vgpu->vdev.mdev = mdev;
 	mdev_set_drvdata(mdev, vgpu);
@@ -482,6 +513,7 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
 	struct intel_vgpu *vgpu = container_of(nb,
 					struct intel_vgpu,
 					vdev.iommu_notifier);
+	bool sched_unmap = false;
 
 	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
 		struct vfio_iommu_type1_dma_unmap *unmap = data;
@@ -491,7 +523,10 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
 		end_gfn = gfn + unmap->size / PAGE_SIZE;
 
 		while (gfn < end_gfn)
-			gvt_cache_remove(vgpu, gfn++);
+			sched_unmap |= gvt_cache_mark_remove(vgpu, gfn++);
+
+		if (sched_unmap)
+			schedule_work(&vgpu->vdev.unpin_work);
 	}
 
 	return NOTIFY_OK;
-- 
GitLab


From f16bd3dda2c8bf6699e808cd9cc540cfab10e60e Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Mon, 26 Jun 2017 15:20:50 +0800
Subject: [PATCH 0416/1958] drm/i915/gvt: Fix inconsistent locks holding
 sequence

There are two kinds of locking sequence.

One is in the thread which is started by vfio ioctl to do
the iommu unmapping. The locking sequence is:
	down_read(&group_lock) ----> mutex_lock(&cached_lock)

The other is in the vfio release thread which will unpin all
the cached pages. The lock sequence is:
	mutex_lock(&cached_lock) ---> down_read(&group_lock)

And, the cache_lock is used to protect the rb tree of the cache
node and doing vfio unpin doesn't require this lock. Move the
vfio unpin out of the cache_lock protected region.

v2:
- use for style instead of do{}while(1). (Zhenyu)

Fixes: f30437c5e7bf ("drm/i915/gvt: add KVMGT support")
Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 56a8d6a7d9c67..75a6e1d8af0d3 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -260,16 +260,20 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu)
 	struct device *dev = mdev_dev(vgpu->vdev.mdev);
 	unsigned long gfn;
 
-	mutex_lock(&vgpu->vdev.cache_lock);
-	while ((node = rb_first(&vgpu->vdev.cache))) {
+	for (;;) {
+		mutex_lock(&vgpu->vdev.cache_lock);
+		node = rb_first(&vgpu->vdev.cache);
+		if (!node) {
+			mutex_unlock(&vgpu->vdev.cache_lock);
+			break;
+		}
 		dma = rb_entry(node, struct gvt_dma, node);
 		gvt_dma_unmap_iova(vgpu, dma->iova);
 		gfn = dma->gfn;
-
-		vfio_unpin_pages(dev, &gfn, 1);
 		__gvt_cache_remove_entry(vgpu, dma);
+		mutex_unlock(&vgpu->vdev.cache_lock);
+		vfio_unpin_pages(dev, &gfn, 1);
 	}
-	mutex_unlock(&vgpu->vdev.cache_lock);
 }
 
 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
-- 
GitLab


From 38cb266ad1a24e037220dc563ab46364595b17b6 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 23 Jun 2017 14:00:13 -0600
Subject: [PATCH 0417/1958] DRM: Fix an incorrectly formatted table

The "supported input formats" table in dw_hdmi.h was incorrectly formatted,
using "+" signs where "|" needs to be.  That, in turn, causes the PDF build
to fail.

Fixes: def23aa7e982 ("drm: bridge: dw-hdmi: Switch to V4L bus format and encodings")
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170623140013.0703107a@lwn.net
---
 include/drm/bridge/dw_hdmi.h | 70 ++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h
index 4c8d4c81a0e8f..182f83283e247 100644
--- a/include/drm/bridge/dw_hdmi.h
+++ b/include/drm/bridge/dw_hdmi.h
@@ -22,56 +22,56 @@ struct dw_hdmi;
  * 48bit bus.
  *
  * +----------------------+----------------------------------+------------------------------+
- * + Format Name          + Format Code                      + Encodings                    +
+ * | Format Name          | Format Code                      | Encodings                    |
  * +----------------------+----------------------------------+------------------------------+
- * + RGB 4:4:4 8bit       + ``MEDIA_BUS_FMT_RGB888_1X24``    + ``V4L2_YCBCR_ENC_DEFAULT``   +
+ * | RGB 4:4:4 8bit       | ``MEDIA_BUS_FMT_RGB888_1X24``    | ``V4L2_YCBCR_ENC_DEFAULT``   |
  * +----------------------+----------------------------------+------------------------------+
- * + RGB 4:4:4 10bits     + ``MEDIA_BUS_FMT_RGB101010_1X30`` + ``V4L2_YCBCR_ENC_DEFAULT``   +
+ * | RGB 4:4:4 10bits     | ``MEDIA_BUS_FMT_RGB101010_1X30`` | ``V4L2_YCBCR_ENC_DEFAULT``   |
  * +----------------------+----------------------------------+------------------------------+
- * + RGB 4:4:4 12bits     + ``MEDIA_BUS_FMT_RGB121212_1X36`` + ``V4L2_YCBCR_ENC_DEFAULT``   +
+ * | RGB 4:4:4 12bits     | ``MEDIA_BUS_FMT_RGB121212_1X36`` | ``V4L2_YCBCR_ENC_DEFAULT``   |
  * +----------------------+----------------------------------+------------------------------+
- * + RGB 4:4:4 16bits     + ``MEDIA_BUS_FMT_RGB161616_1X48`` + ``V4L2_YCBCR_ENC_DEFAULT``   +
+ * | RGB 4:4:4 16bits     | ``MEDIA_BUS_FMT_RGB161616_1X48`` | ``V4L2_YCBCR_ENC_DEFAULT``   |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:4:4 8bit     + ``MEDIA_BUS_FMT_YUV8_1X24``      + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV601``  +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV709``  +
+ * | YCbCr 4:4:4 8bit     | ``MEDIA_BUS_FMT_YUV8_1X24``      | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV601``  |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV709``  |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:4:4 10bits   + ``MEDIA_BUS_FMT_YUV10_1X30``     + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV601``  +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV709``  +
+ * | YCbCr 4:4:4 10bits   | ``MEDIA_BUS_FMT_YUV10_1X30``     | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV601``  |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV709``  |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:4:4 12bits   + ``MEDIA_BUS_FMT_YUV12_1X36``     + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV601``  +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV709``  +
+ * | YCbCr 4:4:4 12bits   | ``MEDIA_BUS_FMT_YUV12_1X36``     | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV601``  |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV709``  |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:4:4 16bits   + ``MEDIA_BUS_FMT_YUV16_1X48``     + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV601``  +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV709``  +
+ * | YCbCr 4:4:4 16bits   | ``MEDIA_BUS_FMT_YUV16_1X48``     | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV601``  |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV709``  |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:2 8bit     + ``MEDIA_BUS_FMT_UYVY8_1X16``     + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:2 8bit     | ``MEDIA_BUS_FMT_UYVY8_1X16``     | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:2 10bits   + ``MEDIA_BUS_FMT_UYVY10_1X20``    + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:2 10bits   | ``MEDIA_BUS_FMT_UYVY10_1X20``    | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:2 12bits   + ``MEDIA_BUS_FMT_UYVY12_1X24``    + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:2 12bits   | ``MEDIA_BUS_FMT_UYVY12_1X24``    | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:0 8bit     + ``MEDIA_BUS_FMT_UYYVYY8_0_5X24`` + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:0 8bit     | ``MEDIA_BUS_FMT_UYYVYY8_0_5X24`` | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:0 10bits   + ``MEDIA_BUS_FMT_UYYVYY10_0_5X30``+ ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:0 10bits   | ``MEDIA_BUS_FMT_UYYVYY10_0_5X30``| ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:0 12bits   + ``MEDIA_BUS_FMT_UYYVYY12_0_5X36``+ ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:0 12bits   | ``MEDIA_BUS_FMT_UYYVYY12_0_5X36``| ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:0 16bits   + ``MEDIA_BUS_FMT_UYYVYY16_0_5X48``+ ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:0 16bits   | ``MEDIA_BUS_FMT_UYYVYY16_0_5X48``| ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
  */
 
-- 
GitLab


From 1ba5c08b58a0c21fca222f1bf2fde184aa26103f Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe.montjoie@gmail.com>
Date: Tue, 6 Jun 2017 14:17:39 +0200
Subject: [PATCH 0418/1958] kernel/module.c: suppress warning about unused
 nowarn variable

This patch fix the following warning:
kernel/module.c: In function 'add_usage_links':
kernel/module.c:1653:6: warning: variable 'nowarn' set but not used [-Wunused-but-set-variable]

[jeyu: folded in first patch since it only swapped the function order
so that del_usage_links can be called from add_usage_links]
Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 kernel/module.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 3803449ca2191..f546d574f4361 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1666,31 +1666,36 @@ static inline void remove_notes_attrs(struct module *mod)
 }
 #endif /* CONFIG_KALLSYMS */
 
-static void add_usage_links(struct module *mod)
+static void del_usage_links(struct module *mod)
 {
 #ifdef CONFIG_MODULE_UNLOAD
 	struct module_use *use;
-	int nowarn;
 
 	mutex_lock(&module_mutex);
-	list_for_each_entry(use, &mod->target_list, target_list) {
-		nowarn = sysfs_create_link(use->target->holders_dir,
-					   &mod->mkobj.kobj, mod->name);
-	}
+	list_for_each_entry(use, &mod->target_list, target_list)
+		sysfs_remove_link(use->target->holders_dir, mod->name);
 	mutex_unlock(&module_mutex);
 #endif
 }
 
-static void del_usage_links(struct module *mod)
+static int add_usage_links(struct module *mod)
 {
+	int ret = 0;
 #ifdef CONFIG_MODULE_UNLOAD
 	struct module_use *use;
 
 	mutex_lock(&module_mutex);
-	list_for_each_entry(use, &mod->target_list, target_list)
-		sysfs_remove_link(use->target->holders_dir, mod->name);
+	list_for_each_entry(use, &mod->target_list, target_list) {
+		ret = sysfs_create_link(use->target->holders_dir,
+					&mod->mkobj.kobj, mod->name);
+		if (ret)
+			break;
+	}
 	mutex_unlock(&module_mutex);
+	if (ret)
+		del_usage_links(mod);
 #endif
+	return ret;
 }
 
 static int module_add_modinfo_attrs(struct module *mod)
@@ -1801,13 +1806,18 @@ static int mod_sysfs_setup(struct module *mod,
 	if (err)
 		goto out_unreg_param;
 
-	add_usage_links(mod);
+	err = add_usage_links(mod);
+	if (err)
+		goto out_unreg_modinfo_attrs;
+
 	add_sect_attrs(mod, info);
 	add_notes_attrs(mod, info);
 
 	kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
 	return 0;
 
+out_unreg_modinfo_attrs:
+	module_remove_modinfo_attrs(mod);
 out_unreg_param:
 	module_param_sysfs_remove(mod);
 out_unreg_holders:
-- 
GitLab


From af18ba48420e39833ccaedfe98b98fdef040c6eb Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Date: Thu, 22 Jun 2017 13:20:36 +0200
Subject: [PATCH 0419/1958] mtd: spi-nor: Add support for mx66u51235f

This chip supports stateless 4-byte opcodes, dual and quad read and
uniform 4K-byte erase.

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/spi-nor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 040ab5d7ac052..2659bbad5b9ac 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1012,6 +1012,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_4B_OPCODES) },
 	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
 	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "mx66u51235f", INFO(0xc2253a, 0, 64 * 1024, 1024, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) },
 	{ "mx66l1g45g",  INFO(0xc2201b, 0, 64 * 1024, 2048, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
 
-- 
GitLab


From d8b494a32889eae6eebb71bb4de3e14025026fe5 Mon Sep 17 00:00:00 2001
From: Harry Chou <harryyc.chou@gmail.com>
Date: Fri, 23 Jun 2017 03:58:11 +0800
Subject: [PATCH 0420/1958] mtd: spi-nor: Add support for Spansion S25FL064L

It's an 8 MiB flash with 4 KiB erase sectors.

Signed-off-by: Harry Chou <HarryYC.Chou@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/spi-nor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 2659bbad5b9ac..1413828ff1fbc 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1064,6 +1064,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "s25fl164k",  INFO(0x014017,      0,  64 * 1024, 128, SECT_4K) },
 	{ "s25fl204k",  INFO(0x014013,      0,  64 * 1024,   8, SECT_4K | SPI_NOR_DUAL_READ) },
 	{ "s25fl208k",  INFO(0x014014,      0,  64 * 1024,  16, SECT_4K | SPI_NOR_DUAL_READ) },
+	{ "s25fl064l",  INFO(0x016017,      0,  64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) },
 
 	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
 	{ "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
-- 
GitLab


From 3fde00a014ed1b0716a1d73fdebd96188e2ff6fe Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 26 Jun 2017 14:15:02 -0700
Subject: [PATCH 0421/1958] dt-bindings: Document the Broadcom STB wake-up
 timer node

Document the binding for the Broadcom STB SoCs wake-up timer node
allowing the system to generate alarms and exit low power states.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 .../bindings/rtc/brcm,brcmstb-waketimer.txt   | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.txt

diff --git a/Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.txt b/Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.txt
new file mode 100644
index 0000000000000..1d990bcc0baf5
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.txt
@@ -0,0 +1,22 @@
+Broadcom STB wake-up Timer
+
+The Broadcom STB wake-up timer provides a 27Mhz resolution timer, with the
+ability to wake up the system from low-power suspend/standby modes.
+
+Required properties:
+- compatible     : should contain "brcm,brcmstb-waketimer"
+- reg            : the register start and length for the WKTMR block
+- interrupts     : The TIMER interrupt
+- interrupt-parent: The phandle to the Always-On (AON) Power Management (PM) L2
+                    interrupt controller node
+- clocks	 : The phandle to the UPG fixed clock (27Mhz domain)
+
+Example:
+
+waketimer@f0411580 {
+	compatible = "brcm,brcmstb-waketimer";
+	reg = <0xf0411580 0x14>;
+	interrupts = <0x3>;
+	interrupt-parent = <&aon_pm_l2_intc>;
+	clocks = <&upg_fixed>;
+};
-- 
GitLab


From 295a0d0b55269fce2290a7aebfcc8c2525866c33 Mon Sep 17 00:00:00 2001
From: Xiong Zhang <xiong.y.zhang@intel.com>
Date: Tue, 20 Jun 2017 11:37:22 +0800
Subject: [PATCH 0422/1958] drm/i915/gvt: Set initial PORT_CLK_SEL vreg for BDW

On BDW, when host physical screen and guest virtual screen aren't on
the same DDI port, guest i915 driver prints the following error and
stop running.
[    6.775873] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000068
[    6.775928] IP: intel_ddi_clock_get+0x81/0x430 [i915]
[    6.776206] Call Trace:
[    6.776233]  ? vgpu_read32+0x4f/0x100 [i915]
[    6.776264]  intel_ddi_get_config+0x11c/0x230 [i915]
[    6.776298]  intel_modeset_setup_hw_state+0x313/0xd40 [i915]
[    6.776334]  intel_modeset_init+0xe49/0x18d0 [i915]
[    6.776368]  ? vgpu_write32+0x53/0x100 [i915]
[    6.776731]  ? intel_i2c_reset+0x42/0x50 [i915]
[    6.777085]  ? intel_setup_gmbus+0x32a/0x350 [i915]
[    6.777427]  i915_driver_load+0xabc/0x14d0 [i915]
[    6.777768]  i915_pci_probe+0x4f/0x70 [i915]

The null pointer is guest intel_crtc_state->shared_dpll which is
setted in haswell_get_ddi_pll(). When guest and host screen are
on different DDI port, host driver won't set PORT_CLK_SET(guest_port),
so haswell_get_ddi_pll() will return null and don't set
pipe_config->shared_dpll, once the following program refernce this
structure, it will print the above error.

This patch set the initial val of guest PORT_CLK_SEL(guest_port) to
LCPLL_810. And guest i915 driver will reset this value according to
guest screen mode.

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index e0261fcc5b504..818e94907c3b3 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -197,6 +197,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_B << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
@@ -211,6 +217,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_C << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
@@ -225,6 +237,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_D << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
-- 
GitLab


From 75e64ff2c2f5ce1ae5b47b2f372fe5b9dc99f5a9 Mon Sep 17 00:00:00 2001
From: Xiong Zhang <xiong.y.zhang@intel.com>
Date: Wed, 28 Jun 2017 02:03:16 +0800
Subject: [PATCH 0423/1958] drm/i915/gvt: Don't read ADPA_CRT_HOTPLUG_MONITOR
 from host

When host connects a crt screen, linux guest will detect two
screens: crt and dp. This is wrong as linux guest has only
one dp.

In order to avoid guest get host crt screen, we should set
ADPA_CRT_HOTPLUG_MONITOR to none. But MMIO_RO(PCH_ADPA) prevent
from that. So MMIO_DH should be used instead of MMIO_RO.

v2: Clear its staus to none at initialize, so guest don't
    get host crt.(Zhangyu)
v3: SKL doesn't have this register, limit it to pre_skl.(xiong)

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c  | 4 ++++
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 818e94907c3b3..2deb05f618fb1 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -262,6 +262,10 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
 	}
+
+	/* Clear host CRT status, so guest couldn't detect this host CRT. */
+	if (IS_BROADWELL(dev_priv))
+		vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
 }
 
 static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 0ffd696545927..437a35f41b62a 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1905,7 +1905,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_F(_PCH_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
 		dp_aux_ch_ctl_mmio_write);
 
-	MMIO_RO(PCH_ADPA, D_ALL, 0, ADPA_CRT_HOTPLUG_MONITOR_MASK, NULL, pch_adpa_mmio_write);
+	MMIO_DH(PCH_ADPA, D_PRE_SKL, NULL, pch_adpa_mmio_write);
 
 	MMIO_DH(_PCH_TRANSACONF, D_ALL, NULL, transconf_mmio_write);
 	MMIO_DH(_PCH_TRANSBCONF, D_ALL, NULL, transconf_mmio_write);
-- 
GitLab


From 170748db2deb2dedcb405ae091dbc74a478842e5 Mon Sep 17 00:00:00 2001
From: Bibby Hsieh <bibby.hsieh@mediatek.com>
Date: Tue, 24 Jan 2017 12:40:58 +0800
Subject: [PATCH 0424/1958] drm/mediatek: Support UYVY and YUYV format for
 overlay

MT8173 overlay can support UYVY and YUYV format,
we add the format in DRM driver.

Signed-off-by: Bibby Hsieh <bibby.hsieh@mediatek.com>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_disp_ovl.c  | 7 +++++++
 drivers/gpu/drm/mediatek/mtk_drm_plane.c | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index a14d7d64d7b11..35bc5babdbf70 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -42,9 +42,12 @@
 #define	OVL_RDMA_MEM_GMC	0x40402020
 
 #define OVL_CON_BYTE_SWAP	BIT(24)
+#define OVL_CON_MTX_YUV_TO_RGB	(6 << 16)
 #define OVL_CON_CLRFMT_RGB	(1 << 12)
 #define OVL_CON_CLRFMT_RGBA8888	(2 << 12)
 #define OVL_CON_CLRFMT_ARGB8888	(3 << 12)
+#define OVL_CON_CLRFMT_UYVY	(4 << 12)
+#define OVL_CON_CLRFMT_YUYV	(5 << 12)
 #define OVL_CON_CLRFMT_RGB565(ovl)	((ovl)->data->fmt_rgb565_is_0 ? \
 					0 : OVL_CON_CLRFMT_RGB)
 #define OVL_CON_CLRFMT_RGB888(ovl)	((ovl)->data->fmt_rgb565_is_0 ? \
@@ -176,6 +179,10 @@ static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt)
 	case DRM_FORMAT_XBGR8888:
 	case DRM_FORMAT_ABGR8888:
 		return OVL_CON_CLRFMT_RGBA8888 | OVL_CON_BYTE_SWAP;
+	case DRM_FORMAT_UYVY:
+		return OVL_CON_CLRFMT_UYVY | OVL_CON_MTX_YUV_TO_RGB;
+	case DRM_FORMAT_YUYV:
+		return OVL_CON_CLRFMT_YUYV | OVL_CON_MTX_YUV_TO_RGB;
 	}
 }
 
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index e405e89ed5e5d..1a59b9ab4aa81 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
@@ -28,6 +28,8 @@ static const u32 formats[] = {
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_ARGB8888,
 	DRM_FORMAT_RGB565,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_YUYV,
 };
 
 static void mtk_plane_reset(struct drm_plane *plane)
-- 
GitLab


From 446b8c542f8551b368dab1ff3539beb3aedb5928 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 17 Mar 2017 18:00:23 +0100
Subject: [PATCH 0425/1958] drm/mediatek: use platform_register_drivers

Use platform_register_drivers instead of open coding the iteration over
component platform drivers in the mtk_drm_drv and mtk_hdmi modules.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_drm_drv.c | 27 ++++----------------------
 drivers/gpu/drm/mediatek/mtk_hdmi.c    | 27 ++++----------------------
 2 files changed, 8 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index f6c8ec4c7dbcb..3754c3ffdcf24 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -576,33 +576,14 @@ static struct platform_driver * const mtk_drm_drivers[] = {
 
 static int __init mtk_drm_init(void)
 {
-	int ret;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mtk_drm_drivers); i++) {
-		ret = platform_driver_register(mtk_drm_drivers[i]);
-		if (ret < 0) {
-			pr_err("Failed to register %s driver: %d\n",
-			       mtk_drm_drivers[i]->driver.name, ret);
-			goto err;
-		}
-	}
-
-	return 0;
-
-err:
-	while (--i >= 0)
-		platform_driver_unregister(mtk_drm_drivers[i]);
-
-	return ret;
+	return platform_register_drivers(mtk_drm_drivers,
+					 ARRAY_SIZE(mtk_drm_drivers));
 }
 
 static void __exit mtk_drm_exit(void)
 {
-	int i;
-
-	for (i = ARRAY_SIZE(mtk_drm_drivers) - 1; i >= 0; i--)
-		platform_driver_unregister(mtk_drm_drivers[i]);
+	platform_unregister_drivers(mtk_drm_drivers,
+				    ARRAY_SIZE(mtk_drm_drivers));
 }
 
 module_init(mtk_drm_init);
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 0a4ffd7241468..71eb4fbbfc85f 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1778,33 +1778,14 @@ static struct platform_driver * const mtk_hdmi_drivers[] = {
 
 static int __init mtk_hdmitx_init(void)
 {
-	int ret;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mtk_hdmi_drivers); i++) {
-		ret = platform_driver_register(mtk_hdmi_drivers[i]);
-		if (ret < 0) {
-			pr_err("Failed to register %s driver: %d\n",
-			       mtk_hdmi_drivers[i]->driver.name, ret);
-			goto err;
-		}
-	}
-
-	return 0;
-
-err:
-	while (--i >= 0)
-		platform_driver_unregister(mtk_hdmi_drivers[i]);
-
-	return ret;
+	return platform_register_drivers(mtk_hdmi_drivers,
+					 ARRAY_SIZE(mtk_hdmi_drivers));
 }
 
 static void __exit mtk_hdmitx_exit(void)
 {
-	int i;
-
-	for (i = ARRAY_SIZE(mtk_hdmi_drivers) - 1; i >= 0; i--)
-		platform_driver_unregister(mtk_hdmi_drivers[i]);
+	platform_unregister_drivers(mtk_hdmi_drivers,
+				    ARRAY_SIZE(mtk_hdmi_drivers));
 }
 
 module_init(mtk_hdmitx_init);
-- 
GitLab


From afd89636f1846ec2c340de894dac103cd336fcee Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 11 Apr 2017 14:44:19 +0100
Subject: [PATCH 0426/1958] drm/mediatek: re-phrase DRM_INFO error message

The current message contains a spelling mistake and is not easily
parsable. Re-phrase it to be more understandable.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_dsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index b5cc6e12334cf..97253c8f813b0 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -930,7 +930,7 @@ static u32 mtk_dsi_recv_cnt(u8 type, u8 *read_data)
 		DRM_INFO("type is 0x02, try again\n");
 		break;
 	default:
-		DRM_INFO("type(0x%x) cannot be non-recognite\n", type);
+		DRM_INFO("type(0x%x) not recognized\n", type);
 		break;
 	}
 
-- 
GitLab


From 4a3bfb5c24306aa129c0de32e5d1ffb553a60a7c Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Fri, 9 Jun 2017 21:27:12 +0200
Subject: [PATCH 0427/1958] drm/mediatek: check for memory allocation failure

If 'devm_kmalloc_array' returns NULL, we should return -ENOMEM as already
done a few lines above instead of deferencing a NULL pointer a few lines
below.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index 6582e1f56d37c..cb32c9369f3a6 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -559,6 +559,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
 	mtk_crtc->ddp_comp = devm_kmalloc_array(dev, mtk_crtc->ddp_comp_nr,
 						sizeof(*mtk_crtc->ddp_comp),
 						GFP_KERNEL);
+	if (!mtk_crtc->ddp_comp)
+		return -ENOMEM;
 
 	mtk_crtc->mutex = mtk_disp_mutex_get(priv->mutex_dev, pipe);
 	if (IS_ERR(mtk_crtc->mutex)) {
-- 
GitLab


From 5ac5895a84d92bd2b3db490f8ee242ccb4d77e96 Mon Sep 17 00:00:00 2001
From: "yt.shen@mediatek.com" <yt.shen@mediatek.com>
Date: Fri, 16 Jun 2017 22:02:16 +0800
Subject: [PATCH 0428/1958] drm/mediatek: separate color module to fixup error
 memory reallocation

Previous patch (c5f228ef6c drm/mediatek: add *driver_data for different
hardware settings) calls devm_kfree() and then devm_kzalloc() to
reallocate color module data structure.  But this reallocation cannnot
guarantee the new address is unchanged, but the caller will use the
old address, which is wrong.

Fix it by separate color module from general components, this patch
separate color module to independent files, like mtk_disp_ovl.c and
mtk_disp_rdma.c do

Fixes: c5f228ef6ccd ("drm/mediatek: add *driver_data for different hardware settings")
Signed-off-by: YT Shen <yt.shen@mediatek.com>
Tested-by: Bibby Hsieh <bibby.hsieh@mediatek.com>
Signed-off-by: CK Hu <ck.hu@mediatek>
---
 drivers/gpu/drm/mediatek/Makefile           |   3 +-
 drivers/gpu/drm/mediatek/mtk_disp_color.c   | 176 ++++++++++++++++++++
 drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c |  80 +--------
 drivers/gpu/drm/mediatek/mtk_drm_drv.c      |   6 +-
 drivers/gpu/drm/mediatek/mtk_drm_drv.h      |   1 +
 5 files changed, 185 insertions(+), 81 deletions(-)
 create mode 100644 drivers/gpu/drm/mediatek/mtk_disp_color.c

diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile
index bf2e5be1ab30d..e37b55a23a659 100644
--- a/drivers/gpu/drm/mediatek/Makefile
+++ b/drivers/gpu/drm/mediatek/Makefile
@@ -1,4 +1,5 @@
-mediatek-drm-y := mtk_disp_ovl.o \
+mediatek-drm-y := mtk_disp_color.o \
+		  mtk_disp_ovl.o \
 		  mtk_disp_rdma.o \
 		  mtk_drm_crtc.o \
 		  mtk_drm_ddp.o \
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_color.c b/drivers/gpu/drm/mediatek/mtk_disp_color.c
new file mode 100644
index 0000000000000..ef79a6d556469
--- /dev/null
+++ b/drivers/gpu/drm/mediatek/mtk_disp_color.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <drm/drmP.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#include "mtk_drm_crtc.h"
+#include "mtk_drm_ddp_comp.h"
+
+#define DISP_COLOR_CFG_MAIN			0x0400
+#define DISP_COLOR_START_MT2701			0x0f00
+#define DISP_COLOR_START_MT8173			0x0c00
+#define DISP_COLOR_START(comp)			((comp)->data->color_offset)
+#define DISP_COLOR_WIDTH(comp)			(DISP_COLOR_START(comp) + 0x50)
+#define DISP_COLOR_HEIGHT(comp)			(DISP_COLOR_START(comp) + 0x54)
+
+#define COLOR_BYPASS_ALL			BIT(7)
+#define COLOR_SEQ_SEL				BIT(13)
+
+struct mtk_disp_color_data {
+	unsigned int color_offset;
+};
+
+/**
+ * struct mtk_disp_color - DISP_COLOR driver structure
+ * @ddp_comp - structure containing type enum and hardware resources
+ * @crtc - associated crtc to report irq events to
+ */
+struct mtk_disp_color {
+	struct mtk_ddp_comp			ddp_comp;
+	struct drm_crtc				*crtc;
+	const struct mtk_disp_color_data	*data;
+};
+
+static inline struct mtk_disp_color *comp_to_color(struct mtk_ddp_comp *comp)
+{
+	return container_of(comp, struct mtk_disp_color, ddp_comp);
+}
+
+static void mtk_color_config(struct mtk_ddp_comp *comp, unsigned int w,
+			     unsigned int h, unsigned int vrefresh,
+			     unsigned int bpc)
+{
+	struct mtk_disp_color *color = comp_to_color(comp);
+
+	writel(w, comp->regs + DISP_COLOR_WIDTH(color));
+	writel(h, comp->regs + DISP_COLOR_HEIGHT(color));
+}
+
+static void mtk_color_start(struct mtk_ddp_comp *comp)
+{
+	struct mtk_disp_color *color = comp_to_color(comp);
+
+	writel(COLOR_BYPASS_ALL | COLOR_SEQ_SEL,
+	       comp->regs + DISP_COLOR_CFG_MAIN);
+	writel(0x1, comp->regs + DISP_COLOR_START(color));
+}
+
+static const struct mtk_ddp_comp_funcs mtk_disp_color_funcs = {
+	.config = mtk_color_config,
+	.start = mtk_color_start,
+};
+
+static int mtk_disp_color_bind(struct device *dev, struct device *master,
+			       void *data)
+{
+	struct mtk_disp_color *priv = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	int ret;
+
+	ret = mtk_ddp_comp_register(drm_dev, &priv->ddp_comp);
+	if (ret < 0) {
+		dev_err(dev, "Failed to register component %s: %d\n",
+			dev->of_node->full_name, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void mtk_disp_color_unbind(struct device *dev, struct device *master,
+				  void *data)
+{
+	struct mtk_disp_color *priv = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+
+	mtk_ddp_comp_unregister(drm_dev, &priv->ddp_comp);
+}
+
+static const struct component_ops mtk_disp_color_component_ops = {
+	.bind	= mtk_disp_color_bind,
+	.unbind = mtk_disp_color_unbind,
+};
+
+static int mtk_disp_color_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_disp_color *priv;
+	int comp_id;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	comp_id = mtk_ddp_comp_get_id(dev->of_node, MTK_DISP_COLOR);
+	if (comp_id < 0) {
+		dev_err(dev, "Failed to identify by alias: %d\n", comp_id);
+		return comp_id;
+	}
+
+	ret = mtk_ddp_comp_init(dev, dev->of_node, &priv->ddp_comp, comp_id,
+				&mtk_disp_color_funcs);
+	if (ret) {
+		dev_err(dev, "Failed to initialize component: %d\n", ret);
+		return ret;
+	}
+
+	priv->data = of_device_get_match_data(dev);
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = component_add(dev, &mtk_disp_color_component_ops);
+	if (ret)
+		dev_err(dev, "Failed to add component: %d\n", ret);
+
+	return ret;
+}
+
+static int mtk_disp_color_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &mtk_disp_color_component_ops);
+
+	return 0;
+}
+
+static const struct mtk_disp_color_data mt2701_color_driver_data = {
+	.color_offset = DISP_COLOR_START_MT2701,
+};
+
+static const struct mtk_disp_color_data mt8173_color_driver_data = {
+	.color_offset = DISP_COLOR_START_MT8173,
+};
+
+static const struct of_device_id mtk_disp_color_driver_dt_match[] = {
+	{ .compatible = "mediatek,mt2701-disp-color",
+	  .data = &mt2701_color_driver_data},
+	{ .compatible = "mediatek,mt8173-disp-color",
+	  .data = &mt8173_color_driver_data},
+	{},
+};
+MODULE_DEVICE_TABLE(of, mtk_disp_color_driver_dt_match);
+
+struct platform_driver mtk_disp_color_driver = {
+	.probe		= mtk_disp_color_probe,
+	.remove		= mtk_disp_color_remove,
+	.driver		= {
+		.name	= "mediatek-disp-color",
+		.owner	= THIS_MODULE,
+		.of_match_table = mtk_disp_color_driver_dt_match,
+	},
+};
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 8b52416b6e41c..07d7ea2268eff 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
@@ -38,13 +38,6 @@
 
 #define DISP_REG_UFO_START			0x0000
 
-#define DISP_COLOR_CFG_MAIN			0x0400
-#define DISP_COLOR_START_MT2701			0x0f00
-#define DISP_COLOR_START_MT8173			0x0c00
-#define DISP_COLOR_START(comp)			((comp)->data->color_offset)
-#define DISP_COLOR_WIDTH(comp)			(DISP_COLOR_START(comp) + 0x50)
-#define DISP_COLOR_HEIGHT(comp)			(DISP_COLOR_START(comp) + 0x54)
-
 #define DISP_AAL_EN				0x0000
 #define DISP_AAL_SIZE				0x0030
 
@@ -55,9 +48,6 @@
 
 #define LUT_10BIT_MASK				0x03ff
 
-#define COLOR_BYPASS_ALL			BIT(7)
-#define COLOR_SEQ_SEL				BIT(13)
-
 #define OD_RELAYMODE				BIT(0)
 
 #define UFO_BYPASS				BIT(2)
@@ -82,20 +72,6 @@
 #define DITHER_ADD_LSHIFT_G(x)			(((x) & 0x7) << 4)
 #define DITHER_ADD_RSHIFT_G(x)			(((x) & 0x7) << 0)
 
-struct mtk_disp_color_data {
-	unsigned int color_offset;
-};
-
-struct mtk_disp_color {
-	struct mtk_ddp_comp			ddp_comp;
-	const struct mtk_disp_color_data	*data;
-};
-
-static inline struct mtk_disp_color *comp_to_color(struct mtk_ddp_comp *comp)
-{
-	return container_of(comp, struct mtk_disp_color, ddp_comp);
-}
-
 void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc,
 		    unsigned int CFG)
 {
@@ -119,25 +95,6 @@ void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc,
 	}
 }
 
-static void mtk_color_config(struct mtk_ddp_comp *comp, unsigned int w,
-			     unsigned int h, unsigned int vrefresh,
-			     unsigned int bpc)
-{
-	struct mtk_disp_color *color = comp_to_color(comp);
-
-	writel(w, comp->regs + DISP_COLOR_WIDTH(color));
-	writel(h, comp->regs + DISP_COLOR_HEIGHT(color));
-}
-
-static void mtk_color_start(struct mtk_ddp_comp *comp)
-{
-	struct mtk_disp_color *color = comp_to_color(comp);
-
-	writel(COLOR_BYPASS_ALL | COLOR_SEQ_SEL,
-	       comp->regs + DISP_COLOR_CFG_MAIN);
-	writel(0x1, comp->regs + DISP_COLOR_START(color));
-}
-
 static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w,
 			  unsigned int h, unsigned int vrefresh,
 			  unsigned int bpc)
@@ -229,11 +186,6 @@ static const struct mtk_ddp_comp_funcs ddp_gamma = {
 	.stop = mtk_gamma_stop,
 };
 
-static const struct mtk_ddp_comp_funcs ddp_color = {
-	.config = mtk_color_config,
-	.start = mtk_color_start,
-};
-
 static const struct mtk_ddp_comp_funcs ddp_od = {
 	.config = mtk_od_config,
 	.start = mtk_od_start,
@@ -268,8 +220,8 @@ struct mtk_ddp_comp_match {
 static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = {
 	[DDP_COMPONENT_AAL]	= { MTK_DISP_AAL,	0, &ddp_aal },
 	[DDP_COMPONENT_BLS]	= { MTK_DISP_BLS,	0, NULL },
-	[DDP_COMPONENT_COLOR0]	= { MTK_DISP_COLOR,	0, &ddp_color },
-	[DDP_COMPONENT_COLOR1]	= { MTK_DISP_COLOR,	1, &ddp_color },
+	[DDP_COMPONENT_COLOR0]	= { MTK_DISP_COLOR,	0, NULL },
+	[DDP_COMPONENT_COLOR1]	= { MTK_DISP_COLOR,	1, NULL },
 	[DDP_COMPONENT_DPI0]	= { MTK_DPI,		0, NULL },
 	[DDP_COMPONENT_DSI0]	= { MTK_DSI,		0, NULL },
 	[DDP_COMPONENT_DSI1]	= { MTK_DSI,		1, NULL },
@@ -286,22 +238,6 @@ static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = {
 	[DDP_COMPONENT_WDMA1]	= { MTK_DISP_WDMA,	1, NULL },
 };
 
-static const struct mtk_disp_color_data mt2701_color_driver_data = {
-	.color_offset = DISP_COLOR_START_MT2701,
-};
-
-static const struct mtk_disp_color_data mt8173_color_driver_data = {
-	.color_offset = DISP_COLOR_START_MT8173,
-};
-
-static const struct of_device_id mtk_disp_color_driver_dt_match[] = {
-	{ .compatible = "mediatek,mt2701-disp-color",
-	  .data = &mt2701_color_driver_data},
-	{ .compatible = "mediatek,mt8173-disp-color",
-	  .data = &mt8173_color_driver_data},
-	{},
-};
-
 int mtk_ddp_comp_get_id(struct device_node *node,
 			enum mtk_ddp_comp_type comp_type)
 {
@@ -324,23 +260,11 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node,
 	enum mtk_ddp_comp_type type;
 	struct device_node *larb_node;
 	struct platform_device *larb_pdev;
-	const struct of_device_id *match;
-	struct mtk_disp_color *color;
 
 	if (comp_id < 0 || comp_id >= DDP_COMPONENT_ID_MAX)
 		return -EINVAL;
 
 	type = mtk_ddp_matches[comp_id].type;
-	if (type == MTK_DISP_COLOR) {
-		devm_kfree(dev, comp);
-		color = devm_kzalloc(dev, sizeof(*color), GFP_KERNEL);
-		if (!color)
-			return -ENOMEM;
-
-		match = of_match_node(mtk_disp_color_driver_dt_match, node);
-		color->data = match->data;
-		comp = &color->ddp_comp;
-	}
 
 	comp->id = comp_id;
 	comp->funcs = funcs ?: mtk_ddp_matches[comp_id].funcs;
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 3754c3ffdcf24..41d2cffe953e5 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -439,11 +439,12 @@ static int mtk_drm_probe(struct platform_device *pdev)
 		private->comp_node[comp_id] = of_node_get(node);
 
 		/*
-		 * Currently only the OVL, RDMA, DSI, and DPI blocks have
+		 * Currently only the COLOR, OVL, RDMA, DSI, and DPI blocks have
 		 * separate component platform drivers and initialize their own
 		 * DDP component structure. The others are initialized here.
 		 */
-		if (comp_type == MTK_DISP_OVL ||
+		if (comp_type == MTK_DISP_COLOR ||
+		    comp_type == MTK_DISP_OVL ||
 		    comp_type == MTK_DISP_RDMA ||
 		    comp_type == MTK_DSI ||
 		    comp_type == MTK_DPI) {
@@ -566,6 +567,7 @@ static struct platform_driver mtk_drm_platform_driver = {
 
 static struct platform_driver * const mtk_drm_drivers[] = {
 	&mtk_ddp_driver,
+	&mtk_disp_color_driver,
 	&mtk_disp_ovl_driver,
 	&mtk_disp_rdma_driver,
 	&mtk_dpi_driver,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
index aef8747d810bd..c3378c452c0a0 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
@@ -59,6 +59,7 @@ struct mtk_drm_private {
 };
 
 extern struct platform_driver mtk_ddp_driver;
+extern struct platform_driver mtk_disp_color_driver;
 extern struct platform_driver mtk_disp_ovl_driver;
 extern struct platform_driver mtk_disp_rdma_driver;
 extern struct platform_driver mtk_dpi_driver;
-- 
GitLab


From 12597988319c8943531c7b2183580231b08b7471 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Mon, 15 May 2017 10:46:35 +0100
Subject: [PATCH 0429/1958] MIPS: Sort MIPS Kconfig Alphabetically.

Sort the entries in config MIPS alphabetically so as to make entries
easier to find.

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16068/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 98 +++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2828ecde133d9..0b15978c0f88a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1,75 +1,75 @@
 config MIPS
 	bool
 	default y
-	select ARCH_SUPPORTS_UPROBES
+	select ARCH_BINFMT_ELF_STATE
+	select ARCH_CLOCKSOURCE_DATA
+	select ARCH_DISCARD_MEMBLOCK
+	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
-	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
+	select ARCH_SUPPORTS_UPROBES
 	select ARCH_USE_BUILTIN_BSWAP
-	select HAVE_CONTEXT_TRACKING
-	select HAVE_GENERIC_DMA_COHERENT
-	select HAVE_IDE
-	select HAVE_IRQ_EXIT_ON_IRQ_STACK
-	select HAVE_OPROFILE
-	select HAVE_PERF_EVENTS
-	select PERF_USE_VMALLOC
+	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
+	select ARCH_WANT_IPC_PARSE_VERSION
+	select BUILDTIME_EXTABLE_SORT
+	select CLONE_BACKWARDS
+	select CPU_PM if CPU_IDLE
+	select GENERIC_ATOMIC64 if !64BIT
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_CMOS_UPDATE
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_IRQ_PROBE
+	select GENERIC_IRQ_SHOW
+	select GENERIC_PCI_IOMAP
+	select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_TIME_VSYSCALL
+	select HANDLE_DOMAIN_IRQ
+	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT
 	select HAVE_CBPF_JIT if !CPU_MICROMIPS
-	select HAVE_FUNCTION_TRACER
+	select HAVE_CC_STACKPROTECTOR
+	select HAVE_CONTEXT_TRACKING
+	select HAVE_COPY_THREAD_TLS
+	select HAVE_C_RECORDMCOUNT
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_DMA_API_DEBUG
+	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_EXIT_THREAD
 	select HAVE_FTRACE_MCOUNT_RECORD
-	select HAVE_C_RECORDMCOUNT
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER
+	select HAVE_GENERIC_DMA_COHERENT
+	select HAVE_IDE
+	select HAVE_IRQ_EXIT_ON_IRQ_STACK
+	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
-	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_SYSCALL_TRACEPOINTS
-	select ARCH_HAS_ELF_RANDOMIZE
-	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT
-	select RTC_LIB if !MACH_LOONGSON64
-	select GENERIC_ATOMIC64 if !64BIT
-	select HAVE_DMA_CONTIGUOUS
-	select HAVE_DMA_API_DEBUG
-	select GENERIC_IRQ_PROBE
-	select GENERIC_IRQ_SHOW
-	select GENERIC_PCI_IOMAP
-	select HAVE_ARCH_JUMP_LABEL
-	select ARCH_WANT_IPC_PARSE_VERSION
-	select IRQ_FORCED_THREADING
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select ARCH_DISCARD_MEMBLOCK
-	select GENERIC_SMP_IDLE_THREAD
-	select BUILDTIME_EXTABLE_SORT
-	select GENERIC_CPU_AUTOPROBE
-	select GENERIC_CLOCKEVENTS
-	select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
-	select GENERIC_CMOS_UPDATE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
-	select VIRT_TO_BUS
-	select MODULES_USE_ELF_REL if MODULES
+	select HAVE_OPROFILE
+	select HAVE_PERF_EVENTS
+	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_VIRT_CPU_ACCOUNTING_GEN
+	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA if MODULES && 64BIT
-	select CLONE_BACKWARDS
-	select HAVE_DEBUG_STACKOVERFLOW
-	select HAVE_CC_STACKPROTECTOR
-	select CPU_PM if CPU_IDLE
-	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
-	select ARCH_BINFMT_ELF_STATE
+	select MODULES_USE_ELF_REL if MODULES
+	select PERF_USE_VMALLOC
+	select RTC_LIB if !MACH_LOONGSON64
 	select SYSCTL_EXCEPTION_TRACE
-	select HAVE_VIRT_CPU_ACCOUNTING_GEN
-	select HAVE_IRQ_TIME_ACCOUNTING
-	select GENERIC_TIME_VSYSCALL
-	select ARCH_CLOCKSOURCE_DATA
-	select HANDLE_DOMAIN_IRQ
-	select HAVE_EXIT_THREAD
-	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_COPY_THREAD_TLS
+	select VIRT_TO_BUS
 
 menu "Machine selection"
 
-- 
GitLab


From 59baa24d8710add3689124fb2a232bac2decae9a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 8 Jun 2017 18:10:13 +0200
Subject: [PATCH 0430/1958] MIPS: defconfig: Cleanup from old Kconfig options

Remove old, dead Kconfig options (in order appearing in this commit):
 - EXPERIMENTAL is gone since v3.9;
 - INET_LRO: commit 7bbf3cae65b6 ("ipv4: Remove inet_lro library");
 - MTD_CONCAT: commit f53fdebcc3e1 ("mtd: drop MTD_CONCAT from Kconfig
   entirely");
 - MTD_CHAR: commit 660685d9d1b4 ("mtd: merge mtdchar module with
   mtdcore");
 - NETDEV_1000 and NETDEV_10000: commit f860b0522f65 ("drivers/net:
   Kconfig and Makefile cleanup"); NET_ETHERNET should be replaced with
   just ETHERNET but that is separate change;
 - MISC_DEVICES: commit 7c5763b8453a ("drivers: misc: Remove
   MISC_DEVICES config option");
 - HID_SUPPORT: commit 1f41a6a99476 ("HID: Fix the generic Kconfig
   options");
 - BT_L2CAP and BT_SCO: commit f1e91e1640d8 ("Bluetooth: Always compile
   SCO and L2CAP in Bluetooth Core");
 - DEBUG_ERRORS: commit b025a3f836d1 ("ARM: 6876/1: Kconfig.debug:
   Remove unused CONFIG_DEBUG_ERRORS");
 - USB_DEVICE_CLASS: commit 007bab91324e ("USB: remove
   CONFIG_USB_DEVICE_CLASS");
 - RCU_CPU_STALL_DETECTOR: commit a00e0d714fbd ("rcu: Remove conditional
   compilation for RCU CPU stall warnings");
 - IP_NF_QUEUE: commit 3dd6664fac7e ("netfilter: remove unused "config
   IP_NF_QUEUE"");
 - IP_NF_TARGET_ULOG: commit d4da843e6fad ("netfilter: kill remnants of
   ulog targets");
 - IP6_NF_QUEUE: commit d16cf20e2f2f ("netfilter: remove ip_queue
   support");
 - IP6_NF_TARGET_LOG: commit 6939c33a757b ("netfilter: merge ipt_LOG and
   ip6_LOG into xt_LOG");
 - USB_LED: commit a335aaf3125c ("usb: misc: remove outdated USB LED
   driver");
 - MMC_UNSAFE_RESUME: commit 2501c9179dff ("mmc: core: Use
   MMC_UNSAFE_RESUME as default behavior");
 - AUTOFS_FS: commit 561c5cf9236a ("staging: Remove autofs3");
 - VIDEO_OUTPUT_CONTROL: commit f167a64e9d67 ("video / output: Drop
   display output class support");
 - USB_LIBUSUAL: commit f61870ee6f8c ("usb: remove libusual");
 - CRYPTO_ZLIB: 110492183c4b ("crypto: compress - remove unused pcomp
   interface");
 - BLK_DEV_UB: commit 68a5059ecf82 ("block: remove the deprecated ub
   driver");

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: bcm-kernel-feedback-list@broadcom.com
Cc: linux-mips@linux-mips.org
Cc: linux-arm-kernel@lists.infradead.org
Patchwork: https://patchwork.linux-mips.org/patch/16342/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/configs/ar7_defconfig             |  6 ------
 arch/mips/configs/ath79_defconfig           |  2 --
 arch/mips/configs/bcm63xx_defconfig         |  7 -------
 arch/mips/configs/bigsur_defconfig          |  4 ----
 arch/mips/configs/bmips_be_defconfig        |  1 -
 arch/mips/configs/capcella_defconfig        |  4 ----
 arch/mips/configs/cavium_octeon_defconfig   |  1 -
 arch/mips/configs/ci20_defconfig            |  1 -
 arch/mips/configs/cobalt_defconfig          |  8 --------
 arch/mips/configs/decstation_defconfig      |  1 -
 arch/mips/configs/e55_defconfig             |  2 --
 arch/mips/configs/fuloong2e_defconfig       | 11 -----------
 arch/mips/configs/gpr_defconfig             |  8 --------
 arch/mips/configs/ip22_defconfig            | 11 -----------
 arch/mips/configs/ip27_defconfig            |  5 -----
 arch/mips/configs/ip28_defconfig            |  5 -----
 arch/mips/configs/ip32_defconfig            |  8 --------
 arch/mips/configs/jazz_defconfig            |  6 ------
 arch/mips/configs/jmr3927_defconfig         |  7 -------
 arch/mips/configs/lasat_defconfig           |  5 -----
 arch/mips/configs/lemote2f_defconfig        | 12 ------------
 arch/mips/configs/loongson3_defconfig       |  3 ---
 arch/mips/configs/malta_kvm_defconfig       |  1 -
 arch/mips/configs/malta_kvm_guest_defconfig |  1 -
 arch/mips/configs/malta_qemu_32r6_defconfig |  1 -
 arch/mips/configs/maltaaprp_defconfig       |  2 --
 arch/mips/configs/maltasmvp_defconfig       |  1 -
 arch/mips/configs/maltasmvp_eva_defconfig   |  2 --
 arch/mips/configs/maltaup_defconfig         |  2 --
 arch/mips/configs/markeins_defconfig        |  4 ----
 arch/mips/configs/mips_paravirt_defconfig   |  1 -
 arch/mips/configs/mpc30x_defconfig          |  5 -----
 arch/mips/configs/msp71xx_defconfig         |  2 --
 arch/mips/configs/mtx1_defconfig            | 11 -----------
 arch/mips/configs/nlm_xlp_defconfig         |  5 -----
 arch/mips/configs/nlm_xlr_defconfig         |  9 ---------
 arch/mips/configs/pnx8335_stb225_defconfig  |  7 -------
 arch/mips/configs/qi_lb60_defconfig         |  3 ---
 arch/mips/configs/rb532_defconfig           |  6 ------
 arch/mips/configs/rbtx49xx_defconfig        |  7 -------
 arch/mips/configs/rm200_defconfig           |  8 --------
 arch/mips/configs/rt305x_defconfig          |  2 --
 arch/mips/configs/sb1250_swarm_defconfig    |  1 -
 arch/mips/configs/tb0219_defconfig          |  6 ------
 arch/mips/configs/tb0226_defconfig          |  7 -------
 arch/mips/configs/tb0287_defconfig          |  5 -----
 arch/mips/configs/workpad_defconfig         |  4 ----
 47 files changed, 221 deletions(-)

diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig
index 320772caf0546..92fca3c42eac3 100644
--- a/arch/mips/configs/ar7_defconfig
+++ b/arch/mips/configs/ar7_defconfig
@@ -3,7 +3,6 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_KERNEL_LZMA=y
 CONFIG_SYSVIPC=y
@@ -41,7 +40,6 @@ CONFIG_SYN_COOKIES=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_TCP_CONG_ADVANCED=y
 # CONFIG_TCP_CONG_BIC is not set
@@ -86,7 +84,6 @@ CONFIG_MAC80211_RC_DEFAULT_PID=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -99,8 +96,6 @@ CONFIG_FIXED_PHY=y
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 CONFIG_CPMAC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_PPP=m
 CONFIG_PPP_MULTILINK=y
 CONFIG_PPP_FILTER=y
@@ -142,7 +137,6 @@ CONFIG_BSD_DISKLABEL=y
 # CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="rootfstype=squashfs,jffs2"
 CONFIG_CRYPTO=y
diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig
index 134879c1310a0..25ed914933e5c 100644
--- a/arch/mips/configs/ath79_defconfig
+++ b/arch/mips/configs/ath79_defconfig
@@ -7,7 +7,6 @@ CONFIG_ATH79_MACH_PB44=y
 CONFIG_ATH79_MACH_UBNT_XM=y
 CONFIG_HZ_100=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -35,7 +34,6 @@ CONFIG_IP_ADVANCED_ROUTER=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
diff --git a/arch/mips/configs/bcm63xx_defconfig b/arch/mips/configs/bcm63xx_defconfig
index 5599a9f1e3c67..131b350f014f1 100644
--- a/arch/mips/configs/bcm63xx_defconfig
+++ b/arch/mips/configs/bcm63xx_defconfig
@@ -5,7 +5,6 @@ CONFIG_BCM63XX_CPU_6348=y
 CONFIG_BCM63XX_CPU_6358=y
 CONFIG_NO_HZ=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_TINY_RCU=y
@@ -33,7 +32,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_CFG80211=y
@@ -50,13 +48,10 @@ CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_PHYSMAP=y
 # CONFIG_BLK_DEV is not set
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
 CONFIG_BCM63XX_PHY=y
 CONFIG_NET_ETHERNET=y
 CONFIG_BCM63XX_ENET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_B43=y
 # CONFIG_B43_PHY_LP is not set
 # CONFIG_INPUT is not set
@@ -70,7 +65,6 @@ CONFIG_SERIAL_BCM63XX_CONSOLE=y
 # CONFIG_HWMON is not set
 # CONFIG_VGA_ARB is not set
 CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
 CONFIG_USB_OHCI_HCD=y
@@ -84,7 +78,6 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_PROC_KCORE=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="console=ttyS0,115200"
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index d20b09d77b534..a55009edbb29a 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -4,7 +4,6 @@ CONFIG_SMP=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HZ_1000=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_BSD_PROCESS_ACCT=y
@@ -60,7 +59,6 @@ CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
-# CONFIG_INET_LRO is not set
 CONFIG_TCP_MD5SIG=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
@@ -182,7 +180,6 @@ CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS_FS=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
@@ -284,7 +281,6 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRC_T10DIF=m
 CONFIG_CRC7=m
diff --git a/arch/mips/configs/bmips_be_defconfig b/arch/mips/configs/bmips_be_defconfig
index acf7785c4cdb7..a7072a14d3968 100644
--- a/arch/mips/configs/bmips_be_defconfig
+++ b/arch/mips/configs/bmips_be_defconfig
@@ -23,7 +23,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_CFG80211=y
 CONFIG_NL80211_TESTMODE=y
diff --git a/arch/mips/configs/capcella_defconfig b/arch/mips/configs/capcella_defconfig
index 2924ba34a01bf..bd80b5c852dde 100644
--- a/arch/mips/configs/capcella_defconfig
+++ b/arch/mips/configs/capcella_defconfig
@@ -1,6 +1,5 @@
 CONFIG_MACH_VR41XX=y
 CONFIG_ZAO_CAPCELLA=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -46,8 +45,6 @@ CONFIG_SMSC_PHY=m
 CONFIG_NET_ETHERNET=y
 CONFIG_NET_PCI=y
 CONFIG_8139TOO=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -59,7 +56,6 @@ CONFIG_SERIAL_VR41XX_CONSOLE=y
 CONFIG_GPIO_VR41XX=y
 # CONFIG_HWMON is not set
 # CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_VR41XX=y
diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index d4fda41f00ba6..e5b18f1a31a07 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -42,7 +42,6 @@ CONFIG_IP_MROUTE=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
 CONFIG_IPV6=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig
index 43e0ba24470cd..b42cfa7865f98 100644
--- a/arch/mips/configs/ci20_defconfig
+++ b/arch/mips/configs/ci20_defconfig
@@ -41,7 +41,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig
index 23b66934e18d1..a9066f3006654 100644
--- a/arch/mips/configs/cobalt_defconfig
+++ b/arch/mips/configs/cobalt_defconfig
@@ -1,5 +1,4 @@
 CONFIG_MIPS_COBALT=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_RELAY=y
@@ -15,17 +14,14 @@ CONFIG_XFRM_USER=y
 CONFIG_NET_KEY=y
 CONFIG_NET_KEY_MIGRATE=y
 CONFIG_INET=y
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLKDEVS=y
 CONFIG_MTD_JEDECPROBE=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_BLK_DEV_LOOP=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_RAID_ATTRS=y
 CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
@@ -36,8 +32,6 @@ CONFIG_NET_ETHERNET=y
 CONFIG_NET_TULIP=y
 CONFIG_DE2104X=y
 CONFIG_TULIP=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT_MOUSEDEV is not set
 CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_KEYBOARD is not set
@@ -56,7 +50,6 @@ CONFIG_FB_COBALT=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_HID=m
 CONFIG_USB=m
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_EHCI_HCD=m
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
 CONFIG_USB_OHCI_HCD=m
@@ -84,6 +77,5 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFSD=y
 CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CRC16=y
 CONFIG_LIBCRC32C=y
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index 2b6cb41d57159..e149f78901f84 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -1,6 +1,5 @@
 CONFIG_MACH_DECSTATION=y
 CONFIG_CPU_R3000=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
diff --git a/arch/mips/configs/e55_defconfig b/arch/mips/configs/e55_defconfig
index e94d266c4b979..c3ac0209457cb 100644
--- a/arch/mips/configs/e55_defconfig
+++ b/arch/mips/configs/e55_defconfig
@@ -1,6 +1,5 @@
 CONFIG_MACH_VR41XX=y
 CONFIG_CASIO_E55=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -28,7 +27,6 @@ CONFIG_SERIAL_VR41XX_CONSOLE=y
 CONFIG_GPIO_VR41XX=y
 # CONFIG_HWMON is not set
 # CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_VR41XX=y
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 87435897fd50c..499f51498ecbf 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -3,7 +3,6 @@ CONFIG_64BIT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOCALVERSION="-fuloong2e"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
@@ -47,7 +46,6 @@ CONFIG_NET_IPGRE=m
 CONFIG_NET_IPGRE_BROADCAST=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
@@ -79,7 +77,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_TIME=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_ADDRTYPE=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -88,7 +85,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
@@ -101,7 +97,6 @@ CONFIG_NET_9P=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_FW_LOADER=m
 CONFIG_MTD=m
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=m
 CONFIG_MTD_CFI=m
 CONFIG_MTD_JEDECPROBE=m
@@ -163,7 +158,6 @@ CONFIG_I2C=m
 CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_VIAPRO=m
 # CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
 CONFIG_FB=y
 CONFIG_FB_RADEON=y
 # CONFIG_FB_RADEON_I2C is not set
@@ -184,7 +178,6 @@ CONFIG_USB_KBD=y
 CONFIG_USB_MOUSE=y
 CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_OTG_WHITELIST=y
 CONFIG_USB_WUSB_CBAF=m
 CONFIG_USB_C67X00_HCD=m
@@ -201,7 +194,6 @@ CONFIG_USB_TMC=m
 CONFIG_USB_STORAGE=y
 CONFIG_USB_STORAGE_ONETOUCH=y
 CONFIG_USB_STORAGE_CYPRESS_ATACB=y
-CONFIG_USB_LIBUSUAL=y
 CONFIG_USB_SEVSEG=m
 CONFIG_USB_ISIGHTFW=m
 CONFIG_UIO=m
@@ -215,7 +207,6 @@ CONFIG_EXT4_FS=m
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
 CONFIG_REISERFS_FS=m
-CONFIG_AUTOFS_FS=y
 CONFIG_AUTOFS4_FS=y
 CONFIG_FUSE_FS=y
 CONFIG_ISO9660_FS=m
@@ -256,8 +247,6 @@ CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_UTF8=y
 # CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CRYPTO_FIPS=y
 CONFIG_CRYPTO_AUTHENC=m
 CONFIG_CRYPTO_CCM=m
diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
index e24feb0633aa3..b1911816337c3 100644
--- a/arch/mips/configs/gpr_defconfig
+++ b/arch/mips/configs/gpr_defconfig
@@ -2,7 +2,6 @@ CONFIG_MIPS_ALCHEMY=y
 CONFIG_MIPS_GPR=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -59,7 +58,6 @@ CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_ADDRTYPE=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -68,7 +66,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
@@ -166,7 +163,6 @@ CONFIG_YAM=m
 CONFIG_CFG80211=y
 CONFIG_MAC80211=y
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -200,8 +196,6 @@ CONFIG_SMSC_PHY=m
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 CONFIG_MIPS_AU1X00_ENET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_ATH_COMMON=y
 CONFIG_ATH_DEBUG=y
 CONFIG_ATH5K=y
@@ -286,14 +280,12 @@ CONFIG_USB_HIDDEV=y
 CONFIG_USB_KBD=m
 CONFIG_USB_MOUSE=m
 CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=m
-CONFIG_USB_LIBUSUAL=y
 CONFIG_USB_SERIAL=y
 CONFIG_USB_EZUSB=y
 CONFIG_USB_SERIAL_GENERIC=y
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index ec8e9684296d0..83e8fe2064aaa 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -4,7 +4,6 @@ CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HZ_1000=y
 CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -46,7 +45,6 @@ CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
 CONFIG_INET_XFRM_MODE_BEET=m
-# CONFIG_INET_LRO is not set
 CONFIG_TCP_MD5SIG=y
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
@@ -139,7 +137,6 @@ CONFIG_IP_VS_SED=m
 CONFIG_IP_VS_NQ=m
 CONFIG_IP_VS_FTP=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_ADDRTYPE=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -148,7 +145,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
@@ -163,7 +159,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -174,7 +169,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
 CONFIG_IP6_NF_MATCH_MH=m
 CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
@@ -215,7 +209,6 @@ CONFIG_RFKILL=m
 CONFIG_CONNECTOR=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
-# CONFIG_MISC_DEVICES is not set
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -245,8 +238,6 @@ CONFIG_MDIO_BITBANG=m
 CONFIG_NET_ETHERNET=y
 CONFIG_SMC91X=m
 CONFIG_SGISEEQ=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_HOSTAP=m
 CONFIG_INPUT_MOUSEDEV=m
 CONFIG_MOUSE_PS2=m
@@ -286,7 +277,6 @@ CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS_FS=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
@@ -355,7 +345,6 @@ CONFIG_NLS_KOI8_U=m
 CONFIG_NLS_UTF8=m
 CONFIG_DLM=m
 CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_KEYS=y
 CONFIG_CRYPTO_FIPS=y
 CONFIG_CRYPTO_NULL=m
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index e582069b44fdf..a0d593248668f 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -5,7 +5,6 @@ CONFIG_SMP=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HZ_1000=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_IKCONFIG=y
@@ -104,7 +103,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
-# CONFIG_MISC_DEVICES is not set
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=y
@@ -325,7 +323,6 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
-CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_CUSE=m
 CONFIG_FSCACHE=m
@@ -342,7 +339,6 @@ CONFIG_NFS_V3=y
 CONFIG_RPCSEC_GSS_KRB5=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_DLM=m
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_KEYS=y
 CONFIG_SECURITYFS=y
 CONFIG_CRYPTO_FIPS=y
@@ -378,7 +374,6 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRYPTO_DEV_HIFN_795X=m
 CONFIG_CRC_T10DIF=m
diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig
index 4dbf6269b3f90..d0a4c2cfacf8b 100644
--- a/arch/mips/configs/ip28_defconfig
+++ b/arch/mips/configs/ip28_defconfig
@@ -1,7 +1,6 @@
 CONFIG_SGI_IP28=y
 CONFIG_ARC_CONSOLE=y
 CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -35,10 +34,8 @@ CONFIG_IP_PNP_BOOTP=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 CONFIG_TCP_MD5SIG=y
 # CONFIG_IPV6 is not set
-# CONFIG_MISC_DEVICES is not set
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
@@ -48,8 +45,6 @@ CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
 CONFIG_NET_ETHERNET=y
 CONFIG_SGISEEQ=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_SYNAPTICS is not set
 CONFIG_VT_HW_CONSOLE_BINDING=y
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index f9af98f63cff4..1e26e58b9dc36 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -1,6 +1,5 @@
 CONFIG_SGI_IP32=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_BSD_PROCESS_ACCT=y
@@ -38,7 +37,6 @@ CONFIG_NET_IPGRE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
 CONFIG_TCP_CONG_ADVANCED=y
 CONFIG_TCP_MD5SIG=y
 CONFIG_INET6_AH=m
@@ -76,8 +74,6 @@ CONFIG_NET_TULIP=y
 CONFIG_DE2104X=m
 CONFIG_TULIP=m
 CONFIG_TULIP_MMIO=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_INPUT_EVDEV=m
 # CONFIG_SERIO_I8042 is not set
 CONFIG_SERIO_MACEPS2=y
@@ -87,7 +83,6 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_GBE=y
@@ -117,7 +112,6 @@ CONFIG_EXT3_FS_SECURITY=y
 CONFIG_QUOTA=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS_FS=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
@@ -178,8 +172,6 @@ CONFIG_NLS_KOI8_R=m
 CONFIG_NLS_KOI8_U=m
 CONFIG_NLS_UTF8=m
 CONFIG_MAGIC_SYSRQ=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_KEYS=y
 CONFIG_CRYPTO_NULL=y
 CONFIG_CRYPTO_CBC=y
diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
index 3019fce63cd36..9ad1c94376c8d 100644
--- a/arch/mips/configs/jazz_defconfig
+++ b/arch/mips/configs/jazz_defconfig
@@ -1,7 +1,6 @@
 CONFIG_MACH_JAZZ=y
 CONFIG_OLIVETTI_M700=y
 CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_BSD_PROCESS_ACCT=y
@@ -85,7 +84,6 @@ CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_ADDRTYPE=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -94,7 +92,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
@@ -109,7 +106,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -120,7 +116,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
 CONFIG_IP6_NF_MATCH_MH=m
 CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
@@ -276,7 +271,6 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_QUOTA=y
-CONFIG_AUTOFS_FS=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
diff --git a/arch/mips/configs/jmr3927_defconfig b/arch/mips/configs/jmr3927_defconfig
index 9bc08f2751206..af12281a5c33f 100644
--- a/arch/mips/configs/jmr3927_defconfig
+++ b/arch/mips/configs/jmr3927_defconfig
@@ -18,23 +18,18 @@ CONFIG_IP_PNP_BOOTP=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_JEDECPROBE=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_PHYSMAP=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
 CONFIG_NET_ETHERNET=y
 CONFIG_NET_PCI=y
 CONFIG_TC35815=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
@@ -58,5 +53,3 @@ CONFIG_PROC_KCORE=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/mips/configs/lasat_defconfig b/arch/mips/configs/lasat_defconfig
index e620a2c3eba47..947a35c7c46cf 100644
--- a/arch/mips/configs/lasat_defconfig
+++ b/arch/mips/configs/lasat_defconfig
@@ -5,7 +5,6 @@ CONFIG_DS1603=y
 CONFIG_LASAT_SYSCTL=y
 CONFIG_HZ_1000=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_EXPERT=y
@@ -31,7 +30,6 @@ CONFIG_INET=y
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
@@ -44,8 +42,6 @@ CONFIG_NETDEVICES=y
 CONFIG_NET_ETHERNET=y
 CONFIG_NET_PCI=y
 CONFIG_PCNET32=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -56,7 +52,6 @@ CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_SERIAL_8250_PCI is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 8df80c6383f26..1ec8ed8d05d11 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -7,7 +7,6 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_PREEMPT=y
 CONFIG_KEXEC=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_BSD_PROCESS_ACCT=y
@@ -83,8 +82,6 @@ CONFIG_NET_SCHED=y
 CONFIG_NET_EMATCH=y
 CONFIG_NET_CLS_ACT=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=y
-CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
@@ -142,7 +139,6 @@ CONFIG_8139TOO=y
 # CONFIG_8139TOO_PIO is not set
 CONFIG_R8169=y
 CONFIG_R8169_VLAN=y
-# CONFIG_NETDEV_10000 is not set
 CONFIG_USB_USBNET=m
 CONFIG_USB_NET_CDC_EEM=m
 CONFIG_NETCONSOLE=m
@@ -205,7 +201,6 @@ CONFIG_USB_ZR364XX=m
 CONFIG_USB_STKWEBCAM=m
 CONFIG_USB_S2255=m
 # CONFIG_RADIO_ADAPTERS is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_MODE_HELPERS=y
@@ -290,7 +285,6 @@ CONFIG_HID_WACOM=m
 CONFIG_HID_ZEROPLUS=m
 CONFIG_ZEROPLUS_FF=y
 CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_DYNAMIC_MINORS=y
 CONFIG_USB_OTG_WHITELIST=y
 CONFIG_USB_MON=y
@@ -313,10 +307,8 @@ CONFIG_USB_STORAGE_SDDR09=m
 CONFIG_USB_STORAGE_SDDR55=m
 CONFIG_USB_STORAGE_JUMPSHOT=m
 CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_LIBUSUAL=y
 CONFIG_USB_SERIAL=m
 CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_LED=m
 CONFIG_USB_GADGET=m
 CONFIG_USB_GADGET_M66592=y
 CONFIG_MMC=m
@@ -341,7 +333,6 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_BTRFS_FS=m
 CONFIG_QUOTA=y
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS_FS=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
@@ -407,8 +398,6 @@ CONFIG_PRINTK_TIME=y
 CONFIG_FRAME_WARN=1024
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_KEYS=y
 CONFIG_CRYPTO_FIPS=y
 CONFIG_CRYPTO_NULL=m
@@ -446,6 +435,5 @@ CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRC_T10DIF=y
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 7f95c4b3ab2c5..324dfee23dfb2 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -95,7 +95,6 @@ CONFIG_IP_NF_MATCH_ECN=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
@@ -252,7 +251,6 @@ CONFIG_MEDIA_USB_SUPPORT=y
 CONFIG_USB_VIDEO_CLASS=m
 CONFIG_DRM=y
 CONFIG_DRM_RADEON=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
 CONFIG_FB_RADEON=y
 CONFIG_LCD_CLASS_DEVICE=y
 CONFIG_LCD_PLATFORM=m
@@ -335,7 +333,6 @@ CONFIG_STRIP_ASM_SYMS=y
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_DEBUG_PREEMPT is not set
-# CONFIG_RCU_CPU_STALL_VERBOSE is not set
 # CONFIG_FTRACE is not set
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index e233f878afef7..80ecd94ed1263 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -133,7 +133,6 @@ CONFIG_IP_NF_MATCH_ECN=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig
index fbe085c328abf..35ad1f8d1a79c 100644
--- a/arch/mips/configs/malta_kvm_guest_defconfig
+++ b/arch/mips/configs/malta_kvm_guest_defconfig
@@ -132,7 +132,6 @@ CONFIG_IP_NF_MATCH_ECN=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig
index cbf37dd0c4908..77145ecaa23bd 100644
--- a/arch/mips/configs/malta_qemu_32r6_defconfig
+++ b/arch/mips/configs/malta_qemu_32r6_defconfig
@@ -42,7 +42,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig
index 35f6ba260df8f..cc2687cfdc135 100644
--- a/arch/mips/configs/maltaaprp_defconfig
+++ b/arch/mips/configs/maltaaprp_defconfig
@@ -43,7 +43,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
@@ -135,7 +134,6 @@ CONFIG_HW_RANDOM=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_MATROX=y
diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
index 900f14543eeb9..55b68b981b057 100644
--- a/arch/mips/configs/maltasmvp_defconfig
+++ b/arch/mips/configs/maltasmvp_defconfig
@@ -46,7 +46,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig
index 8e2738b5e180a..5ca590cf16358 100644
--- a/arch/mips/configs/maltasmvp_eva_defconfig
+++ b/arch/mips/configs/maltasmvp_eva_defconfig
@@ -47,7 +47,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
@@ -140,7 +139,6 @@ CONFIG_HW_RANDOM=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_MATROX=y
diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig
index 6dc4e309a6918..7ea7c0ba26664 100644
--- a/arch/mips/configs/maltaup_defconfig
+++ b/arch/mips/configs/maltaup_defconfig
@@ -42,7 +42,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
@@ -134,7 +133,6 @@ CONFIG_HW_RANDOM=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
 CONFIG_FB_MATROX=y
diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig
index 0f08e4623ee41..43ce6576ab1c2 100644
--- a/arch/mips/configs/markeins_defconfig
+++ b/arch/mips/configs/markeins_defconfig
@@ -1,7 +1,6 @@
 CONFIG_NEC_MARKEINS=y
 CONFIG_HZ_1000=y
 CONFIG_PREEMPT=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_BSD_PROCESS_ACCT=y
@@ -92,7 +91,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
@@ -117,7 +115,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
 CONFIG_IP6_NF_MATCH_MH=m
 CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
@@ -125,7 +122,6 @@ CONFIG_IP6_NF_RAW=m
 CONFIG_FW_LOADER=m
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/mips/configs/mips_paravirt_defconfig b/arch/mips/configs/mips_paravirt_defconfig
index 84cfcb4bf2ea7..accf0db1dc6f2 100644
--- a/arch/mips/configs/mips_paravirt_defconfig
+++ b/arch/mips/configs/mips_paravirt_defconfig
@@ -39,7 +39,6 @@ CONFIG_IP_MROUTE=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
 CONFIG_IPV6=y
 # CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
diff --git a/arch/mips/configs/mpc30x_defconfig b/arch/mips/configs/mpc30x_defconfig
index a2c045fab6c58..3486b034f7264 100644
--- a/arch/mips/configs/mpc30x_defconfig
+++ b/arch/mips/configs/mpc30x_defconfig
@@ -1,6 +1,5 @@
 CONFIG_MACH_VR41XX=y
 CONFIG_VICTOR_MPC30X=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_RELAY=y
@@ -31,8 +30,6 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
 CONFIG_PATA_LEGACY=y
 CONFIG_NETDEVICES=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_USB_PEGASUS=m
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
@@ -45,13 +42,11 @@ CONFIG_SERIAL_VR41XX_CONSOLE=y
 CONFIG_GPIO_VR41XX=y
 # CONFIG_HWMON is not set
 # CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID_SUPPORT is not set
 CONFIG_USB=m
 CONFIG_USB_OHCI_HCD=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_VR41XX=y
 CONFIG_EXT2_FS=y
-CONFIG_AUTOFS_FS=y
 CONFIG_AUTOFS4_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_CONFIGFS_FS=m
diff --git a/arch/mips/configs/msp71xx_defconfig b/arch/mips/configs/msp71xx_defconfig
index 201edfb2637df..3c8c16b10732d 100644
--- a/arch/mips/configs/msp71xx_defconfig
+++ b/arch/mips/configs/msp71xx_defconfig
@@ -2,7 +2,6 @@ CONFIG_PMC_MSP=y
 CONFIG_PMC_MSP7120_GW=y
 CONFIG_CPU_MIPS32_R2=y
 CONFIG_PREEMPT=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_LOCALVERSION="-pmc"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
@@ -38,7 +37,6 @@ CONFIG_BRIDGE=y
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index f3f60056bc274..4011f1869e726 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -1,7 +1,6 @@
 CONFIG_MIPS_ALCHEMY=y
 CONFIG_MIPS_MTX1=y
 CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -81,7 +80,6 @@ CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_ADDRTYPE=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -90,7 +88,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
@@ -98,7 +95,6 @@ CONFIG_IP_NF_RAW=m
 CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP6_NF_QUEUE=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -108,7 +104,6 @@ CONFIG_IP6_NF_MATCH_HL=m
 CONFIG_IP6_NF_MATCH_IPV6HEADER=m
 CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
@@ -225,8 +220,6 @@ CONFIG_TOSHIBA_FIR=m
 CONFIG_VLSI_FIR=m
 CONFIG_MCS_FIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=y
-CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
@@ -246,7 +239,6 @@ CONFIG_BT_HCIBTUART=m
 CONFIG_BT_HCIVHCI=m
 CONFIG_CONNECTOR=m
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
@@ -257,7 +249,6 @@ CONFIG_MTD_PHYSMAP=y
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=65536
-# CONFIG_MISC_DEVICES is not set
 CONFIG_SCSI=m
 CONFIG_BLK_DEV_SD=m
 CONFIG_CHR_DEV_SG=m
@@ -596,7 +587,6 @@ CONFIG_USB_STORAGE_SDDR55=m
 CONFIG_USB_STORAGE_JUMPSHOT=m
 CONFIG_USB_STORAGE_ALAUDA=m
 CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_LIBUSUAL=y
 CONFIG_USB_MDC800=m
 CONFIG_USB_MICROTEK=m
 CONFIG_USB_SERIAL=m
@@ -640,7 +630,6 @@ CONFIG_USB_ADUTUX=m
 CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
-CONFIG_USB_LED=m
 CONFIG_USB_CYPRESS_CY7C63=m
 CONFIG_USB_CYTHERM=m
 CONFIG_USB_IDMOUSE=m
diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig
index 07d01827a9731..5720ce23e9aae 100644
--- a/arch/mips/configs/nlm_xlp_defconfig
+++ b/arch/mips/configs/nlm_xlp_defconfig
@@ -6,7 +6,6 @@ CONFIG_KSM=y
 CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
 CONFIG_SMP=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -183,14 +182,12 @@ CONFIG_IP_VS_SED=m
 CONFIG_IP_VS_NQ=m
 CONFIG_IP_VS_FTP=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
@@ -317,7 +314,6 @@ CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -607,7 +603,6 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRC_CCITT=m
 CONFIG_CRC7=m
diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
index f59969acb7243..fea56c535d923 100644
--- a/arch/mips/configs/nlm_xlr_defconfig
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -7,7 +7,6 @@ CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_KEXEC=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_CROSS_COMPILE=""
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
@@ -163,7 +162,6 @@ CONFIG_IP_VS_SED=m
 CONFIG_IP_VS_NQ=m
 CONFIG_IP_VS_FTP=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -171,7 +169,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
@@ -186,7 +183,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -197,7 +193,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
 CONFIG_IP6_NF_MATCH_MH=m
 CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
@@ -308,7 +303,6 @@ CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=65536
 CONFIG_CDROM_PKTCDVD=y
-CONFIG_MISC_DEVICES=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -369,7 +363,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1374=y
 # CONFIG_HWMON is not set
 # CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_UIO=y
 CONFIG_UIO_PDRV=m
@@ -522,7 +515,6 @@ CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_KGDB=y
@@ -568,7 +560,6 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRC_CCITT=m
 CONFIG_CRC7=m
diff --git a/arch/mips/configs/pnx8335_stb225_defconfig b/arch/mips/configs/pnx8335_stb225_defconfig
index c887066ecc2a7..81b5eb89446c7 100644
--- a/arch/mips/configs/pnx8335_stb225_defconfig
+++ b/arch/mips/configs/pnx8335_stb225_defconfig
@@ -5,7 +5,6 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HZ_128=y
 CONFIG_PREEMPT_VOLUNTARY=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
@@ -27,12 +26,10 @@ CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_INET_AH=y
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -41,15 +38,12 @@ CONFIG_MTD_CFI_GEOMETRY=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_BLK_DEV_LOOP=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_ATA=y
 CONFIG_NETDEVICES=y
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT_MOUSEDEV is not set
 CONFIG_INPUT_EVDEV=m
 CONFIG_INPUT_EVBUG=m
@@ -94,4 +88,3 @@ CONFIG_NLS_ASCII=m
 CONFIG_NLS_ISO8859_1=m
 CONFIG_NLS_ISO8859_15=m
 CONFIG_NLS_UTF8=m
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig
index d7bb8cce1068c..3f13335174057 100644
--- a/arch/mips/configs/qi_lb60_defconfig
+++ b/arch/mips/configs/qi_lb60_defconfig
@@ -34,7 +34,6 @@ CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_TCP_CONG_ADVANCED=y
 # CONFIG_TCP_CONG_BIC is not set
@@ -109,7 +108,6 @@ CONFIG_USB_GADGET_DEBUG=y
 CONFIG_USB_ETH=y
 # CONFIG_USB_ETH_RNDIS is not set
 CONFIG_MMC=y
-CONFIG_MMC_UNSAFE_RESUME=y
 # CONFIG_MMC_BLOCK_BOUNCE is not set
 CONFIG_MMC_JZ4740=y
 CONFIG_RTC_CLASS=y
@@ -183,7 +181,6 @@ CONFIG_PANIC_ON_OOPS=y
 # CONFIG_FTRACE is not set
 CONFIG_KGDB=y
 CONFIG_RUNTIME_DEBUG=y
-CONFIG_CRYPTO_ZLIB=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_FONTS=y
 CONFIG_FONT_SUN8x16=y
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
index 5d9d708e12e51..6fa56c6e53f50 100644
--- a/arch/mips/configs/rb532_defconfig
+++ b/arch/mips/configs/rb532_defconfig
@@ -3,7 +3,6 @@ CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HZ_100=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_BSD_PROCESS_ACCT=y
@@ -39,7 +38,6 @@ CONFIG_SYN_COOKIES=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_TCP_CONG_ADVANCED=y
 CONFIG_TCP_CONG_CUBIC=m
@@ -114,7 +112,6 @@ CONFIG_NET_CLS_IND=y
 CONFIG_HAMRADIO=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_BLOCK2MTD=y
 CONFIG_MTD_NAND=y
@@ -129,8 +126,6 @@ CONFIG_NET_ETHERNET=y
 CONFIG_KORINA=y
 CONFIG_NET_PCI=y
 CONFIG_VIA_RHINE=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_ATMEL=m
 CONFIG_PPP=m
 CONFIG_PPP_MULTILINK=y
@@ -183,7 +178,6 @@ CONFIG_BSD_DISKLABEL=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_CRYPTO=y
 CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_ZLIB=y
 # CONFIG_CRYPTO_HW is not set
 CONFIG_CRC16=m
 CONFIG_LIBCRC32C=m
diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig
index 43d55e5abacbe..fb195e29e4499 100644
--- a/arch/mips/configs/rbtx49xx_defconfig
+++ b/arch/mips/configs/rbtx49xx_defconfig
@@ -31,12 +31,10 @@ CONFIG_IP_PNP=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=m
 CONFIG_MTD_BLOCK_RO=m
 CONFIG_MTD_CFI=y
@@ -50,7 +48,6 @@ CONFIG_MTD_NAND_TXX9NDFMC=m
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
-# CONFIG_MISC_DEVICES is not set
 CONFIG_IDE=y
 CONFIG_BLK_DEV_IDE_TX4938=y
 CONFIG_BLK_DEV_IDE_TX4939=y
@@ -60,8 +57,6 @@ CONFIG_SMC91X=y
 CONFIG_NE2000=y
 CONFIG_NET_PCI=y
 CONFIG_TC35815=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_WLAN is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
@@ -108,5 +103,3 @@ CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index c2b4e3f33a738..99679e5140426 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -3,7 +3,6 @@ CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_ARC_CONSOLE=y
 CONFIG_HZ_1000=y
 CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_BSD_PROCESS_ACCT=y
@@ -94,7 +93,6 @@ CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_ADDRTYPE=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -103,7 +101,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
@@ -118,7 +115,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -129,7 +125,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
 CONFIG_IP6_NF_MATCH_MH=m
 CONFIG_IP6_NF_MATCH_RT=m
 CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
 CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
@@ -214,7 +209,6 @@ CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_UB=m
 CONFIG_BLK_DEV_RAM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
@@ -353,7 +347,6 @@ CONFIG_USB_SERIAL_OMNINET=m
 CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
-CONFIG_USB_LED=m
 CONFIG_USB_CYTHERM=m
 CONFIG_USB_SISUSBVGA=m
 CONFIG_USB_LD=m
@@ -366,7 +359,6 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_QUOTA=y
-CONFIG_AUTOFS_FS=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig
index d14ae2fa7d13e..c695b7b1c4ae8 100644
--- a/arch/mips/configs/rt305x_defconfig
+++ b/arch/mips/configs/rt305x_defconfig
@@ -5,7 +5,6 @@ CONFIG_CPU_MIPS32_R2=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HZ_100=y
 # CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -44,7 +43,6 @@ CONFIG_SYN_COOKIES=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_TCP_CONG_ADVANCED=y
 # CONFIG_TCP_CONG_BIC is not set
diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig
index 7fca09fedb591..c724bdd6a7e61 100644
--- a/arch/mips/configs/sb1250_swarm_defconfig
+++ b/arch/mips/configs/sb1250_swarm_defconfig
@@ -4,7 +4,6 @@ CONFIG_64BIT=y
 CONFIG_SMP=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HZ_1000=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=15
 CONFIG_CGROUPS=y
diff --git a/arch/mips/configs/tb0219_defconfig b/arch/mips/configs/tb0219_defconfig
index 11f51505d5628..4041597e3170b 100644
--- a/arch/mips/configs/tb0219_defconfig
+++ b/arch/mips/configs/tb0219_defconfig
@@ -1,6 +1,5 @@
 CONFIG_MACH_VR41XX=y
 CONFIG_TANBAC_TB0219=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_SYSFS_DEPRECATED_V2=y
@@ -31,7 +30,6 @@ CONFIG_SYN_COOKIES=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETWORK_SECMARK=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -40,7 +38,6 @@ CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_XIP=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
 CONFIG_PHYLIB=m
 CONFIG_MARVELL_PHY=m
@@ -57,7 +54,6 @@ CONFIG_VIA_RHINE=y
 CONFIG_VIA_RHINE_MMIO=y
 CONFIG_R8169=y
 CONFIG_VIA_VELOCITY=y
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -70,7 +66,6 @@ CONFIG_SERIAL_VR41XX_CONSOLE=y
 CONFIG_GPIO_TB0219=y
 # CONFIG_HWMON is not set
 # CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID_SUPPORT is not set
 CONFIG_USB=m
 CONFIG_USB_MON=m
 CONFIG_USB_EHCI_HCD=m
@@ -91,6 +86,5 @@ CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
 CONFIG_NFSD_V3=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="cca=3 mem=64M console=ttyVR0,115200 ip=any root=/dev/nfs"
diff --git a/arch/mips/configs/tb0226_defconfig b/arch/mips/configs/tb0226_defconfig
index 9327b3af32cd2..565f0441c50d2 100644
--- a/arch/mips/configs/tb0226_defconfig
+++ b/arch/mips/configs/tb0226_defconfig
@@ -1,6 +1,5 @@
 CONFIG_MACH_VR41XX=y
 CONFIG_TANBAC_TB0226=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_SYSFS_DEPRECATED_V2=y
@@ -29,7 +28,6 @@ CONFIG_SYN_COOKIES=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETWORK_SECMARK=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -37,7 +35,6 @@ CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_XIP=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_SCSI_MULTI_LUN=y
@@ -49,8 +46,6 @@ CONFIG_NETDEVICES=y
 CONFIG_NET_ETHERNET=y
 CONFIG_NET_PCI=y
 CONFIG_E100=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_USB_CATC=m
 CONFIG_USB_KAWETH=m
 CONFIG_USB_PEGASUS=m
@@ -66,7 +61,6 @@ CONFIG_SERIAL_VR41XX_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 # CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID_SUPPORT is not set
 CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -87,7 +81,6 @@ CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
 CONFIG_NFSD_V3=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="cca=3 mem=32M console=ttyVR0,115200"
 CONFIG_CRC32=m
diff --git a/arch/mips/configs/tb0287_defconfig b/arch/mips/configs/tb0287_defconfig
index a967289b79706..a702be602fb97 100644
--- a/arch/mips/configs/tb0287_defconfig
+++ b/arch/mips/configs/tb0287_defconfig
@@ -1,5 +1,4 @@
 CONFIG_MACH_VR41XX=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_SYSFS_DEPRECATED_V2=y
@@ -31,7 +30,6 @@ CONFIG_SYN_COOKIES=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 CONFIG_TCP_CONG_ADVANCED=y
 CONFIG_TCP_CONG_BIC=y
 CONFIG_TCP_CONG_CUBIC=m
@@ -43,7 +41,6 @@ CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_XIP=y
-# CONFIG_MISC_DEVICES is not set
 CONFIG_BLK_DEV_SD=y
 CONFIG_SCSI_SCAN_ASYNC=y
 # CONFIG_SCSI_LOWLEVEL is not set
@@ -64,7 +61,6 @@ CONFIG_VIA_RHINE=y
 CONFIG_VIA_RHINE_MMIO=y
 CONFIG_R8169=y
 CONFIG_VIA_VELOCITY=y
-# CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -76,7 +72,6 @@ CONFIG_SERIAL_VR41XX_CONSOLE=y
 CONFIG_GPIO_VR41XX=y
 # CONFIG_HWMON is not set
 CONFIG_MFD_SM501=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
 CONFIG_FB=y
 CONFIG_FB_SM501=y
 # CONFIG_VGA_CONSOLE is not set
diff --git a/arch/mips/configs/workpad_defconfig b/arch/mips/configs/workpad_defconfig
index ee4b2be43c44e..a84eac409c9c3 100644
--- a/arch/mips/configs/workpad_defconfig
+++ b/arch/mips/configs/workpad_defconfig
@@ -1,6 +1,5 @@
 CONFIG_MACH_VR41XX=y
 CONFIG_IBM_WORKPAD=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -28,13 +27,10 @@ CONFIG_IP_MULTICAST=y
 # CONFIG_IPV6 is not set
 CONFIG_NETWORK_SECMARK=y
 CONFIG_BLK_DEV_RAM=m
-# CONFIG_MISC_DEVICES is not set
 CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECS=m
 CONFIG_IDE_GENERIC=y
 CONFIG_NETDEVICES=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
 CONFIG_NET_PCMCIA=y
 CONFIG_PCMCIA_3C589=m
 CONFIG_PCMCIA_3C574=m
-- 
GitLab


From a27b8f31cb7929bfb8dc6ca3e6b7a0a39609d7f3 Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@collabora.com>
Date: Tue, 14 Feb 2017 20:58:00 +0100
Subject: [PATCH 0431/1958] cros_ec: Don't return error when checking command
 version

With this patch, cros_ec_query_all() does not return an error if it
fails to check for MKBP events support. Instead, the EC device structure
indicates that it does not support MKBP events (mkbp_event_supported
field) and cros_ec_query_all() returns 0.

Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_proto.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index 7428c2b965bba..8f57500d2b788 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -371,6 +371,8 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev)
 	else
 		ec_dev->mkbp_event_supported = 1;
 
+	ret = 0;
+
 exit:
 	kfree(proto_msg);
 	return ret;
-- 
GitLab


From d4da97e59e1004aa1a15dd75469def20cd84ab99 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Tue, 14 Feb 2017 20:58:01 +0100
Subject: [PATCH 0432/1958] cros_ec: Fix deadlock when EC is not responsive at
 probe

When the EC is not responsive at probe, we try to get basic information
(protocol to use) later on through cros_xfer_cmd() call.
This patch makes sure there is no deadlock when re-probing the EC by
replacing call to cros_xfer_cmd() with send_command() in the function
cros_ec_get_host_command_version_mask(). Also, this patch adds the
function header indicating it must be called protected.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_proto.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index 8f57500d2b788..d6942a67a182f 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -235,6 +235,22 @@ static int cros_ec_host_command_proto_query_v2(struct cros_ec_device *ec_dev)
 	return ret;
 }
 
+/*
+ * cros_ec_get_host_command_version_mask
+ *
+ * Get the version mask of a given command.
+ *
+ * @ec_dev: EC device to call
+ * @msg: message structure to use
+ * @cmd: command to get the version of.
+ * @mask: result when function returns 0.
+ *
+ * @return 0 on success, error code otherwise
+ *
+ * LOCKING:
+ * the caller has ec_dev->lock mutex or the caller knows there is
+ * no other command in progress.
+ */
 static int cros_ec_get_host_command_version_mask(struct cros_ec_device *ec_dev,
 	u16 cmd, u32 *mask)
 {
@@ -256,7 +272,7 @@ static int cros_ec_get_host_command_version_mask(struct cros_ec_device *ec_dev,
 	pver = (struct ec_params_get_cmd_versions *)msg->data;
 	pver->cmd = cmd;
 
-	ret = cros_ec_cmd_xfer(ec_dev, msg);
+	ret = send_command(ec_dev, msg);
 	if (ret > 0) {
 		rver = (struct ec_response_get_cmd_versions *)msg->data;
 		*mask = rver->version_mask;
-- 
GitLab


From 29d99b966d60029a11d08b9b004cd84b21ce0d67 Mon Sep 17 00:00:00 2001
From: Shawn Nematbakhsh <shawnn@chromium.org>
Date: Tue, 14 Feb 2017 20:58:02 +0100
Subject: [PATCH 0433/1958] cros_ec: Don't signal wake event for non-wake host
 events

The subset of wake-enabled host events is defined by the EC, but the EC
may still send non-wake host events if we're in the process of
suspending. Get the mask of wake-enabled host events from the EC and
filter out non-wake events to prevent spurious aborted suspend
attempts.

Signed-off-by: Shawn Nematbakhsh <shawnn@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Acked-for-MFD-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/mfd/cros_ec.c                   | 13 ++++-
 drivers/platform/chrome/cros_ec_lpc.c   |  3 +-
 drivers/platform/chrome/cros_ec_proto.c | 76 +++++++++++++++++++++++--
 include/linux/mfd/cros_ec.h             |  5 +-
 4 files changed, 87 insertions(+), 10 deletions(-)

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index d4a407e466b54..fc2569f3e7c9c 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -54,12 +54,19 @@ static const struct mfd_cell ec_pd_cell = {
 static irqreturn_t ec_irq_thread(int irq, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
+	bool wake_event = true;
 	int ret;
 
-	if (device_may_wakeup(ec_dev->dev))
+	ret = cros_ec_get_next_event(ec_dev, &wake_event);
+
+	/*
+	 * Signal only if wake host events or any interrupt if
+	 * cros_ec_get_next_event() returned an error (default value for
+	 * wake_event is true)
+	 */
+	if (wake_event && device_may_wakeup(ec_dev->dev))
 		pm_wakeup_event(ec_dev->dev, 0);
 
-	ret = cros_ec_get_next_event(ec_dev);
 	if (ret > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier,
 					     0, ec_dev);
@@ -221,7 +228,7 @@ EXPORT_SYMBOL(cros_ec_suspend);
 
 static void cros_ec_drain_events(struct cros_ec_device *ec_dev)
 {
-	while (cros_ec_get_next_event(ec_dev) > 0)
+	while (cros_ec_get_next_event(ec_dev, NULL) > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier,
 					     1, ec_dev);
 }
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index eeb187e558ce9..2b6436d1b6a47 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -231,7 +231,8 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
 
-	if (ec_dev->mkbp_event_supported && cros_ec_get_next_event(ec_dev) > 0)
+	if (ec_dev->mkbp_event_supported &&
+	    cros_ec_get_next_event(ec_dev, NULL) > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier, 0,
 					     ec_dev);
 }
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index d6942a67a182f..8dfa7fcb12488 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -150,6 +150,40 @@ int cros_ec_check_result(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_check_result);
 
+/*
+ * cros_ec_get_host_event_wake_mask
+ *
+ * Get the mask of host events that cause wake from suspend.
+ *
+ * @ec_dev: EC device to call
+ * @msg: message structure to use
+ * @mask: result when function returns >=0.
+ *
+ * LOCKING:
+ * the caller has ec_dev->lock mutex, or the caller knows there is
+ * no other command in progress.
+ */
+static int cros_ec_get_host_event_wake_mask(struct cros_ec_device *ec_dev,
+					    struct cros_ec_command *msg,
+					    uint32_t *mask)
+{
+	struct ec_response_host_event_mask *r;
+	int ret;
+
+	msg->command = EC_CMD_HOST_EVENT_GET_WAKE_MASK;
+	msg->version = 0;
+	msg->outsize = 0;
+	msg->insize = sizeof(*r);
+
+	ret = send_command(ec_dev, msg);
+	if (ret > 0) {
+		r = (struct ec_response_host_event_mask *)msg->data;
+		*mask = r->mask;
+	}
+
+	return ret;
+}
+
 static int cros_ec_host_command_proto_query(struct cros_ec_device *ec_dev,
 					    int devidx,
 					    struct cros_ec_command *msg)
@@ -387,6 +421,15 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev)
 	else
 		ec_dev->mkbp_event_supported = 1;
 
+	/*
+	 * Get host event wake mask, assume all events are wake events
+	 * if unavailable.
+	 */
+	ret = cros_ec_get_host_event_wake_mask(ec_dev, proto_msg,
+					       &ec_dev->host_event_wake_mask);
+	if (ret < 0)
+		ec_dev->host_event_wake_mask = U32_MAX;
+
 	ret = 0;
 
 exit:
@@ -504,12 +547,35 @@ static int get_keyboard_state_event(struct cros_ec_device *ec_dev)
 	return ec_dev->event_size;
 }
 
-int cros_ec_get_next_event(struct cros_ec_device *ec_dev)
+int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event)
 {
-	if (ec_dev->mkbp_event_supported)
-		return get_next_event(ec_dev);
-	else
-		return get_keyboard_state_event(ec_dev);
+	u32 host_event;
+	int ret;
+
+	if (!ec_dev->mkbp_event_supported) {
+		ret = get_keyboard_state_event(ec_dev);
+		if (ret < 0)
+			return ret;
+
+		if (wake_event)
+			*wake_event = true;
+
+		return ret;
+	}
+
+	ret = get_next_event(ec_dev);
+	if (ret < 0)
+		return ret;
+
+	if (wake_event) {
+		host_event = cros_ec_get_host_event(ec_dev);
+
+		/* Consider non-host_event as wake event */
+		*wake_event = !host_event ||
+			      !!(host_event & ec_dev->host_event_wake_mask);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL(cros_ec_get_next_event);
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 3b16c9009749c..4e887ba22635e 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -149,6 +149,7 @@ struct cros_ec_device {
 
 	struct ec_response_get_next_event event_data;
 	int event_size;
+	u32 host_event_wake_mask;
 };
 
 /**
@@ -299,10 +300,12 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev);
  * cros_ec_get_next_event -  Fetch next event from the ChromeOS EC
  *
  * @ec_dev: Device to fetch event from
+ * @wake_event: Pointer to a bool set to true upon return if the event might be
+ *              treated as a wake event. Ignored if null.
  *
  * Returns: 0 on success, Linux error number on failure
  */
-int cros_ec_get_next_event(struct cros_ec_device *ec_dev);
+int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
 
 /**
  * cros_ec_get_host_event - Return a mask of event set by the EC.
-- 
GitLab


From 5d6a312e8f99f0a0bf793256c203fa17c97a15e1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 27 Jun 2017 17:36:36 +0200
Subject: [PATCH 0434/1958] platform/chrome: cros_ec_lightbar - hide unused PM
 functions

The only reference to the new functions is inside of an #ifdef,
which now causes a harmless warning when CONFIG_PM_SLEEP is not set:

chrome/cros_ec_dev.c:478:12: error: 'ec_device_resume' defined but not used [-Werror=unused-function]
chrome/cros_ec_dev.c:469:12: error: 'ec_device_suspend' defined but not used [-Werror=unused-function]

This marks the two functions as __maybe_unused so they can get
silently dropped by the compiler.

Fixes: 405c84308c43 ("platform/chrome: cros_ec_lightbar - Control of suspend/resume lightbar sequence")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index b9bf086f7569a..cf6c4f0846b84 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -466,7 +466,7 @@ static const struct platform_device_id cros_ec_id[] = {
 };
 MODULE_DEVICE_TABLE(platform, cros_ec_id);
 
-static int ec_device_suspend(struct device *dev)
+static __maybe_unused int ec_device_suspend(struct device *dev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(dev);
 
@@ -475,7 +475,7 @@ static int ec_device_suspend(struct device *dev)
 	return 0;
 }
 
-static int ec_device_resume(struct device *dev)
+static __maybe_unused int ec_device_resume(struct device *dev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(dev);
 
-- 
GitLab


From 93437353daeff31bd5b11810daa4d2d509d1a64e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 26 May 2017 14:12:25 -0700
Subject: [PATCH 0435/1958] module: use list_for_each_entry_rcu() on
 find_module_all()

The module list has been using RCU in a lot of other calls
for a while now, we just overlooked changing this one over to
use RCU.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 kernel/module.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/module.c b/kernel/module.c
index f546d574f4361..afc6ede7bcdf3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -603,7 +603,7 @@ static struct module *find_module_all(const char *name, size_t len,
 
 	module_assert_mutex_or_preempt();
 
-	list_for_each_entry(mod, &modules, list) {
+	list_for_each_entry_rcu(mod, &modules, list) {
 		if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
 			continue;
 		if (strlen(mod->name) == len && !memcmp(mod->name, name, len))
-- 
GitLab


From 165d1cc0074b2f938586274776d029b9bce914c4 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 23 Jun 2017 12:19:12 -0700
Subject: [PATCH 0436/1958] kmod: reduce atomic operations on kmod_concurrent
 and simplify

When checking if we want to allow a kmod thread to kick off we increment,
then read to see if we should enable a thread. If we were over the allowed
limit limit we decrement. Splitting the increment far apart from decrement
means there could be a time where two increments happen potentially
giving a false failure on a thread which should have been allowed.

CPU1			CPU2
atomic_inc()
			atomic_inc()
atomic_read()
			atomic_read()
atomic_dec()
			atomic_dec()

In this case a read on CPU1 gets the atomic_inc()'s and we could negate
it from getting a kmod thread. We could try to prevent this with a lock
or preemption but that is overkill. We can fix by reducing the number of
atomic operations. We do this by inverting the logic of of the enabler,
instead of incrementing kmod_concurrent as we get new kmod users, define the
variable kmod_concurrent_max as the max number of currently allowed kmod
users and as we get new kmod users just decrement it if its still positive.
This combines the dec and read in one atomic operation.

In this case we no longer get the same false failure:

CPU1			CPU2
atomic_dec_if_positive()
			atomic_dec_if_positive()
atomic_inc()
			atomic_inc()

The number of threads is computed at init, and since the current computation
of kmod_concurrent includes the thread count we can avoid setting
kmod_concurrent_max later in boot through an init call by simply sticking to
50 as the kmod_concurrent_max. The assumption here is a system with modules
must at least have ~16 MiB of RAM.

Suggested-by: Petr Mladek <pmladek@suse.com>
Suggested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 kernel/kmod.c | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 563f97e2be361..ff68198fe83bc 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -45,8 +45,6 @@
 
 #include <trace/events/module.h>
 
-extern int max_threads;
-
 #define CAP_BSET	(void *)1
 #define CAP_PI		(void *)2
 
@@ -56,6 +54,20 @@ static DEFINE_SPINLOCK(umh_sysctl_lock);
 static DECLARE_RWSEM(umhelper_sem);
 
 #ifdef CONFIG_MODULES
+/*
+ * Assuming:
+ *
+ * threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
+ *		       (u64) THREAD_SIZE * 8UL);
+ *
+ * If you need less than 50 threads would mean we're dealing with systems
+ * smaller than 3200 pages. This assuems you are capable of having ~13M memory,
+ * and this would only be an be an upper limit, after which the OOM killer
+ * would take effect. Systems like these are very unlikely if modules are
+ * enabled.
+ */
+#define MAX_KMOD_CONCURRENT 50
+static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
 
 /*
 	modprobe_path is set via /proc/sys.
@@ -127,10 +139,7 @@ int __request_module(bool wait, const char *fmt, ...)
 {
 	va_list args;
 	char module_name[MODULE_NAME_LEN];
-	unsigned int max_modprobes;
 	int ret;
-	static atomic_t kmod_concurrent = ATOMIC_INIT(0);
-#define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
 	/*
@@ -154,21 +163,7 @@ int __request_module(bool wait, const char *fmt, ...)
 	if (ret)
 		return ret;
 
-	/* If modprobe needs a service that is in a module, we get a recursive
-	 * loop.  Limit the number of running kmod threads to max_threads/2 or
-	 * MAX_KMOD_CONCURRENT, whichever is the smaller.  A cleaner method
-	 * would be to run the parents of this process, counting how many times
-	 * kmod was invoked.  That would mean accessing the internals of the
-	 * process tables to get the command line, proc_pid_cmdline is static
-	 * and it is not worth changing the proc code just to handle this case. 
-	 * KAO.
-	 *
-	 * "trace the ppid" is simple, but will fail if someone's
-	 * parent exits.  I think this is as good as it gets. --RR
-	 */
-	max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
-	atomic_inc(&kmod_concurrent);
-	if (atomic_read(&kmod_concurrent) > max_modprobes) {
+	if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
 		/* We may be blaming an innocent here, but unlikely */
 		if (kmod_loop_msg < 5) {
 			printk(KERN_ERR
@@ -176,7 +171,6 @@ int __request_module(bool wait, const char *fmt, ...)
 			       module_name);
 			kmod_loop_msg++;
 		}
-		atomic_dec(&kmod_concurrent);
 		return -ENOMEM;
 	}
 
@@ -184,10 +178,12 @@ int __request_module(bool wait, const char *fmt, ...)
 
 	ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
 
-	atomic_dec(&kmod_concurrent);
+	atomic_inc(&kmod_concurrent_max);
+
 	return ret;
 }
 EXPORT_SYMBOL(__request_module);
+
 #endif /* CONFIG_MODULES */
 
 static void call_usermodehelper_freeinfo(struct subprocess_info *info)
-- 
GitLab


From 315e9c767d9a4668ad68cde1b57a14f916c90668 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 27 Jun 2017 17:34:19 +0200
Subject: [PATCH 0437/1958] mtd: spi-nor: cqspi: remove duplicate const

The variable was already marked 'const' before the previous
patch, but the qualifier was in an unusual place, and now the
extra 'const' causes a harmless warning:

drivers/mtd/spi-nor/cadence-quadspi.c:1286:34: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier]

This removes the other 'const' instead.

Fixes: f993c123b461 ("mtd: spi-nor: cqspi: make of_device_ids const")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
---
 drivers/mtd/spi-nor/cadence-quadspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index d315c326e72f0..53c7d8e0327aa 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -1283,7 +1283,7 @@ static const struct dev_pm_ops cqspi__dev_pm_ops = {
 #define CQSPI_DEV_PM_OPS	NULL
 #endif
 
-static const struct of_device_id const cqspi_dt_ids[] = {
+static const struct of_device_id cqspi_dt_ids[] = {
 	{.compatible = "cdns,qspi-nor",},
 	{ /* end of table */ }
 };
-- 
GitLab


From e00ad7ef68e4882a18ac78d82348be09e8d0263b Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 26 Jun 2017 12:44:30 +1200
Subject: [PATCH 0438/1958] dt-bindings: add bindings for i2c-pca-platform

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 .../bindings/i2c/i2c-pca-platform.txt         | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-pca-platform.txt

diff --git a/Documentation/devicetree/bindings/i2c/i2c-pca-platform.txt b/Documentation/devicetree/bindings/i2c/i2c-pca-platform.txt
new file mode 100644
index 0000000000000..f1f3876bb8e87
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-pca-platform.txt
@@ -0,0 +1,29 @@
+* NXP PCA PCA9564/PCA9665 I2C controller
+
+The PCA9564/PCA9665 serves as an interface between most standard
+parallel-bus microcontrollers/microprocessors and the serial I2C-bus
+and allows the parallel bus system to communicate bi-directionally
+with the I2C-bus.
+
+Required properties :
+
+ - reg : Offset and length of the register set for the device
+ - compatible : one of "nxp,pca9564" or "nxp,pca9665"
+
+Optional properties
+ - interrupts : the interrupt number
+ - interrupt-parent : the phandle for the interrupt controller.
+   If an interrupt is not specified polling will be used.
+ - reset-gpios : gpio specifier for gpio connected to RESET_N pin. As the line
+   is active low, it should be marked GPIO_ACTIVE_LOW.
+ - clock-frequency : I2C bus frequency.
+
+Example:
+	i2c0: i2c@80000 {
+		compatible = "nxp,pca9564";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x80000 0x4>;
+		reset-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
+		clock-frequency = <100000>;
+	};
-- 
GitLab


From 4cc7229daa46d1867e66ba09929d936377ae4837 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 26 Jun 2017 12:44:31 +1200
Subject: [PATCH 0439/1958] i2c: pca-platform: switch to struct gpio_desc

Make use of struct gpio_desc which allows us to specify the active state
of the reset pin.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pca-platform.c | 43 ++++++++++++---------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 3bd2e7d06e4bb..9f995b8ed5877 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -22,6 +22,7 @@
 #include <linux/i2c-algo-pca.h>
 #include <linux/i2c-pca-platform.h>
 #include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/io.h>
 
 #include <asm/irq.h>
@@ -29,7 +30,7 @@
 struct i2c_pca_pf_data {
 	void __iomem			*reg_base;
 	int				irq;	/* if 0, use polling */
-	int				gpio;
+	struct gpio_desc		*gpio;
 	wait_queue_head_t		wait;
 	struct i2c_adapter		adap;
 	struct i2c_algo_pca_data	algo_data;
@@ -112,9 +113,9 @@ static void i2c_pca_pf_resetchip(void *pd)
 {
 	struct i2c_pca_pf_data *i2c = pd;
 
-	gpio_set_value(i2c->gpio, 0);
+	gpiod_set_value(i2c->gpio, 1);
 	ndelay(100);
-	gpio_set_value(i2c->gpio, 1);
+	gpiod_set_value(i2c->gpio, 0);
 }
 
 static irqreturn_t i2c_pca_pf_handler(int this_irq, void *dev_id)
@@ -181,11 +182,24 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	if (platform_data) {
 		i2c->adap.timeout = platform_data->timeout;
 		i2c->algo_data.i2c_clock = platform_data->i2c_clock_speed;
-		i2c->gpio = platform_data->gpio;
+		if (gpio_is_valid(platform_data->gpio)) {
+			ret = devm_gpio_request_one(&pdev->dev,
+						    platform_data->gpio,
+						    GPIOF_ACTIVE_LOW,
+						    i2c->adap.name);
+			if (ret == 0) {
+				i2c->gpio = gpio_to_desc(platform_data->gpio);
+				gpiod_direction_output(i2c->gpio, 0);
+				i2c->algo_data.reset_chip = i2c_pca_pf_resetchip;
+			} else {
+				dev_warn(&pdev->dev, "Registering gpio failed!\n");
+				i2c->gpio = NULL;
+			}
+		}
 	} else {
 		i2c->adap.timeout = HZ;
 		i2c->algo_data.i2c_clock = 59000;
-		i2c->gpio = -1;
+		i2c->gpio = NULL;
 	}
 
 	i2c->algo_data.data = i2c;
@@ -208,19 +222,6 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 		break;
 	}
 
-	/* Use gpio_is_valid() when in mainline */
-	if (i2c->gpio > -1) {
-		ret = gpio_request(i2c->gpio, i2c->adap.name);
-		if (ret == 0) {
-			gpio_direction_output(i2c->gpio, 1);
-			i2c->algo_data.reset_chip = i2c_pca_pf_resetchip;
-		} else {
-			printk(KERN_WARNING "%s: Registering gpio failed!\n",
-				i2c->adap.name);
-			i2c->gpio = ret;
-		}
-	}
-
 	if (irq) {
 		ret = request_irq(irq, i2c_pca_pf_handler,
 			IRQF_TRIGGER_FALLING, pdev->name, i2c);
@@ -243,9 +244,6 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	if (irq)
 		free_irq(irq, i2c);
 e_reqirq:
-	if (i2c->gpio > -1)
-		gpio_free(i2c->gpio);
-
 	iounmap(i2c->reg_base);
 e_remap:
 	kfree(i2c);
@@ -265,9 +263,6 @@ static int i2c_pca_pf_remove(struct platform_device *pdev)
 	if (i2c->irq)
 		free_irq(i2c->irq, i2c);
 
-	if (i2c->gpio > -1)
-		gpio_free(i2c->gpio);
-
 	iounmap(i2c->reg_base);
 	release_mem_region(i2c->io_base, i2c->io_size);
 	kfree(i2c);
-- 
GitLab


From 0e8ce93bdceb6d36a3c1db2227d3f51168fb796c Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 26 Jun 2017 12:44:32 +1200
Subject: [PATCH 0440/1958] i2c: pca-platform: add devicetree awareness

Allow devices that use this driver to be registered via a
devicetree.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
[wsa: fixed leakage when registering GPIO failed]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pca-platform.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 9f995b8ed5877..31d7f76ddd94f 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -24,6 +24,8 @@
 #include <linux/gpio.h>
 #include <linux/gpio/consumer.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <asm/irq.h>
 
@@ -137,12 +139,15 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct i2c_pca9564_pf_platform_data *platform_data =
 				dev_get_platdata(&pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
 	int ret = 0;
 	int irq;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	/* If irq is 0, we do polling. */
+	if (irq < 0)
+		irq = 0;
 
 	if (res == NULL) {
 		ret = -ENODEV;
@@ -178,6 +183,7 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 		 (unsigned long) res->start);
 	i2c->adap.algo_data = &i2c->algo_data;
 	i2c->adap.dev.parent = &pdev->dev;
+	i2c->adap.dev.of_node = np;
 
 	if (platform_data) {
 		i2c->adap.timeout = platform_data->timeout;
@@ -196,6 +202,15 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 				i2c->gpio = NULL;
 			}
 		}
+	} else if (np) {
+		i2c->adap.timeout = HZ;
+		i2c->gpio = devm_gpiod_get_optional(&pdev->dev, "reset-gpios", GPIOD_OUT_LOW);
+		if (IS_ERR(i2c->gpio)) {
+			ret = PTR_ERR(i2c->gpio);
+			goto e_reqirq;
+		}
+		of_property_read_u32_index(np, "clock-frequency", 0,
+					   &i2c->algo_data.i2c_clock);
 	} else {
 		i2c->adap.timeout = HZ;
 		i2c->algo_data.i2c_clock = 59000;
@@ -270,11 +285,21 @@ static int i2c_pca_pf_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_pca_of_match_table[] = {
+	{ .compatible = "nxp,pca9564" },
+	{ .compatible = "nxp,pca9665" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, i2c_pca_of_match_table);
+#endif
+
 static struct platform_driver i2c_pca_pf_driver = {
 	.probe = i2c_pca_pf_probe,
 	.remove = i2c_pca_pf_remove,
 	.driver = {
 		.name = "i2c-pca-platform",
+		.of_match_table = of_match_ptr(i2c_pca_of_match_table),
 	},
 };
 
-- 
GitLab


From 9447332ffacceeb49467ae5314887a30f6c0aaa9 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@microchip.com>
Date: Mon, 26 Jun 2017 15:09:59 +0200
Subject: [PATCH 0441/1958] mtd: st_spi_fsm: remove SPINOR_OP_RDSR2 and use
 SPINOR_OP_RDCR instead

The 35h instruction op code has two aliases/macro definitions:
- SPINOR_OP_RDCR from include/linux/mtd/spi-nor.h
- SPINOR_OP_RDSR2 from drivers/mtd/devices/serial_flash_cmds.h

Actually, some manufacturers name the associated internal register Status
Register 2 whereas other manufacturers name it Configuration Register
hence the two different macros for the very same instruction op code.

Since the spi-nor.h file is the reference file for all SPI NOR instruction
op codes, this patch removes the definition of the SPINOR_OP_RDSR2 macro.

Also the SPINOR_OP_RDSR2 macro will be associated to another instruction
op code in a further patch so we need to avoid a conflict defining this
macro twice. Indeed the JESD216 rev B specification, defining the SFDP
tables, also refers to the 3Eh and 3Fh instruction op codes to write/read
the Status Register 2 on some SPI NOR flash memories, the 35h op code
still being used to read the Configuration Register/Status Register 2 on
other memories.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@microchip.com>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
---
 drivers/mtd/devices/serial_flash_cmds.h | 1 -
 drivers/mtd/devices/st_spi_fsm.c        | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/devices/serial_flash_cmds.h b/drivers/mtd/devices/serial_flash_cmds.h
index 8b81e15105dd4..eba125c9f23f4 100644
--- a/drivers/mtd/devices/serial_flash_cmds.h
+++ b/drivers/mtd/devices/serial_flash_cmds.h
@@ -13,7 +13,6 @@
 #define _MTD_SERIAL_FLASH_CMDS_H
 
 /* Generic Flash Commands/OPCODEs */
-#define SPINOR_OP_RDSR2		0x35
 #define SPINOR_OP_WRVCR		0x81
 #define SPINOR_OP_RDVCR		0x85
 
diff --git a/drivers/mtd/devices/st_spi_fsm.c b/drivers/mtd/devices/st_spi_fsm.c
index 804313a33f2be..21afd94cd904a 100644
--- a/drivers/mtd/devices/st_spi_fsm.c
+++ b/drivers/mtd/devices/st_spi_fsm.c
@@ -1445,7 +1445,7 @@ static int stfsm_s25fl_config(struct stfsm *fsm)
 	}
 
 	/* Check status of 'QE' bit, update if required. */
-	stfsm_read_status(fsm, SPINOR_OP_RDSR2, &cr1, 1);
+	stfsm_read_status(fsm, SPINOR_OP_RDCR, &cr1, 1);
 	data_pads = ((fsm->stfsm_seq_read.seq_cfg >> 16) & 0x3) + 1;
 	if (data_pads == 4) {
 		if (!(cr1 & STFSM_S25FL_CONFIG_QE)) {
@@ -1490,7 +1490,7 @@ static int stfsm_w25q_config(struct stfsm *fsm)
 		return ret;
 
 	/* Check status of 'QE' bit, update if required. */
-	stfsm_read_status(fsm, SPINOR_OP_RDSR2, &sr2, 1);
+	stfsm_read_status(fsm, SPINOR_OP_RDCR, &sr2, 1);
 	data_pads = ((fsm->stfsm_seq_read.seq_cfg >> 16) & 0x3) + 1;
 	if (data_pads == 4) {
 		if (!(sr2 & W25Q_STATUS_QE)) {
-- 
GitLab


From fa70ca7c287ee63b790fbb3de624a0c80a6e2be2 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 26 Jun 2017 12:44:33 +1200
Subject: [PATCH 0442/1958] i2c: pca-platform: use device managed allocations

Switch to using the devm_ APIs and remove the now unnecessary error
handling and most of the device removal code.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
[wsa: adapted error handling I added in previous patch]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pca-platform.c | 59 ++++++---------------------
 1 file changed, 12 insertions(+), 47 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 31d7f76ddd94f..7db481cbf4025 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -143,35 +143,23 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	int ret = 0;
 	int irq;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	/* If irq is 0, we do polling. */
 	if (irq < 0)
 		irq = 0;
 
-	if (res == NULL) {
-		ret = -ENODEV;
-		goto e_print;
-	}
+	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return -ENOMEM;
 
-	if (!request_mem_region(res->start, resource_size(res), res->name)) {
-		ret = -ENOMEM;
-		goto e_print;
-	}
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	i2c->reg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(i2c->reg_base))
+		return PTR_ERR(i2c->reg_base);
 
-	i2c = kzalloc(sizeof(struct i2c_pca_pf_data), GFP_KERNEL);
-	if (!i2c) {
-		ret = -ENOMEM;
-		goto e_alloc;
-	}
 
 	init_waitqueue_head(&i2c->wait);
 
-	i2c->reg_base = ioremap(res->start, resource_size(res));
-	if (!i2c->reg_base) {
-		ret = -ENOMEM;
-		goto e_remap;
-	}
 	i2c->io_base = res->start;
 	i2c->io_size = resource_size(res);
 	i2c->irq = irq;
@@ -205,10 +193,8 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	} else if (np) {
 		i2c->adap.timeout = HZ;
 		i2c->gpio = devm_gpiod_get_optional(&pdev->dev, "reset-gpios", GPIOD_OUT_LOW);
-		if (IS_ERR(i2c->gpio)) {
-			ret = PTR_ERR(i2c->gpio);
-			goto e_reqirq;
-		}
+		if (IS_ERR(i2c->gpio))
+			return PTR_ERR(i2c->gpio);
 		of_property_read_u32_index(np, "clock-frequency", 0,
 					   &i2c->algo_data.i2c_clock);
 	} else {
@@ -238,15 +224,14 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	}
 
 	if (irq) {
-		ret = request_irq(irq, i2c_pca_pf_handler,
+		ret = devm_request_irq(&pdev->dev, irq, i2c_pca_pf_handler,
 			IRQF_TRIGGER_FALLING, pdev->name, i2c);
 		if (ret)
-			goto e_reqirq;
+			return ret;
 	}
 
 	if (i2c_pca_add_numbered_bus(&i2c->adap) < 0) {
-		ret = -ENODEV;
-		goto e_adapt;
+		return -ENODEV;
 	}
 
 	platform_set_drvdata(pdev, i2c);
@@ -254,19 +239,6 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	printk(KERN_INFO "%s registered.\n", i2c->adap.name);
 
 	return 0;
-
-e_adapt:
-	if (irq)
-		free_irq(irq, i2c);
-e_reqirq:
-	iounmap(i2c->reg_base);
-e_remap:
-	kfree(i2c);
-e_alloc:
-	release_mem_region(res->start, resource_size(res));
-e_print:
-	printk(KERN_ERR "Registering PCA9564/PCA9665 FAILED! (%d)\n", ret);
-	return ret;
 }
 
 static int i2c_pca_pf_remove(struct platform_device *pdev)
@@ -275,13 +247,6 @@ static int i2c_pca_pf_remove(struct platform_device *pdev)
 
 	i2c_del_adapter(&i2c->adap);
 
-	if (i2c->irq)
-		free_irq(i2c->irq, i2c);
-
-	iounmap(i2c->reg_base);
-	release_mem_region(i2c->io_base, i2c->io_size);
-	kfree(i2c);
-
 	return 0;
 }
 
-- 
GitLab


From df40f247750cc6427216b3be2c0eebfd51a5c155 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 26 Jun 2017 12:44:34 +1200
Subject: [PATCH 0443/1958] i2c: pca-platform: use dev_warn/dev_info instead of
 printk

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pca-platform.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 7db481cbf4025..395eca0cbb1fc 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -107,8 +107,8 @@ static int i2c_pca_pf_waitforcompletion(void *pd)
 static void i2c_pca_pf_dummyreset(void *pd)
 {
 	struct i2c_pca_pf_data *i2c = pd;
-	printk(KERN_WARNING "%s: No reset-pin found. Chip may get stuck!\n",
-		i2c->adap.name);
+
+	dev_warn(&i2c->adap.dev, "No reset-pin found. Chip may get stuck!\n");
 }
 
 static void i2c_pca_pf_resetchip(void *pd)
@@ -236,7 +236,7 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, i2c);
 
-	printk(KERN_INFO "%s registered.\n", i2c->adap.name);
+	dev_info(&pdev->dev, "registered.\n");
 
 	return 0;
 }
-- 
GitLab


From c3518a4ed8753a88b0dee614a64ffadd5f67386b Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Fri, 23 Jun 2017 21:05:30 +0200
Subject: [PATCH 0444/1958] i2c: zx2967: always use the same device when
 printing errors

Let's always use the platform device for dev_* and not sometimes the
adapter device as well. Also fix this checkpatch check:

CHECK: Macro argument 'i2c' may be better as '(i2c)' to avoid precedence issues

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-zx2967.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-zx2967.c b/drivers/i2c/busses/i2c-zx2967.c
index 3e381edc0f223..48281c1b30c6d 100644
--- a/drivers/i2c/busses/i2c-zx2967.c
+++ b/drivers/i2c/busses/i2c-zx2967.c
@@ -53,7 +53,7 @@
 
 #define I2C_TIMEOUT			msecs_to_jiffies(1000)
 
-#define DEV(i2c)			(&i2c->adap.dev)
+#define DEV(i2c)			((i2c)->adap.dev.parent)
 
 struct zx2967_i2c {
 	struct i2c_adapter	adap;
-- 
GitLab


From 86766a3e15ac73dcf9542eebc60d3c34578d463a Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Sat, 24 Jun 2017 22:25:26 +0800
Subject: [PATCH 0445/1958] i2c: zx2967: drop COMPILE_TEST dependency

0-DAY kernel test reports the following build issue on IA64 architecture
with allmodconfig.

   drivers/i2c/busses/i2c-zx2967.c: In function 'zx2967_i2c_writesb':
>> drivers/i2c/busses/i2c-zx2967.c:87:2: error: implicit declaration of function 'writesb' [-Werror=implicit-function-declaration]
     writesb(i2c->reg_base + reg, data, len);
     ^~~~~~~
   drivers/i2c/busses/i2c-zx2967.c: In function 'zx2967_i2c_readsb':
>> drivers/i2c/busses/i2c-zx2967.c:93:2: error: implicit declaration of function 'readsb' [-Werror=implicit-function-declaration]
     readsb(i2c->reg_base + reg, data, len);
     ^~~~~~
   cc1: some warnings being treated as errors

It's caused by that writesb/readsb are unavailable on IA64 architecture.
Let's drop COMPILE_TEST dependency to avoid the build issue.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 5134b3cde14e2..c01cf3508d1c2 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1272,7 +1272,7 @@ config I2C_OPAL
 
 config I2C_ZX2967
 	tristate "ZTE ZX2967 I2C support"
-	depends on ARCH_ZX || COMPILE_TEST
+	depends on ARCH_ZX
 	default y
 	help
 	  Selecting this option will add ZX2967 I2C driver.
-- 
GitLab


From 9f3e065c54b05b03bd39dbbcc5a44f2f1807994d Mon Sep 17 00:00:00 2001
From: Luis Oliveira <Luis.Oliveira@synopsys.com>
Date: Thu, 22 Jun 2017 11:17:32 +0100
Subject: [PATCH 0446/1958] i2c: designware: add SLAVE mode functions

- Changes in Kconfig to enable I2C_DESIGNWARE_SLAVE support
- Slave functions added to core library file
- Slave abort sources added to common source file
- New driver: i2c-designware-slave added
- Changes in the Makefile to compile the I2C_DESIGNWARE_SLAVE module
  when supported by the architecture.

All the SLAVE flow is added but it is not enabled via platform
driver.

Signed-off-by: Luis Oliveira <lolivei@synopsys.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
[wsa: made a function static and one-lined a message]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig                 |  14 +-
 drivers/i2c/busses/Makefile                |   3 +
 drivers/i2c/busses/i2c-designware-common.c |   6 +
 drivers/i2c/busses/i2c-designware-core.h   |   2 +
 drivers/i2c/busses/i2c-designware-slave.c  | 394 +++++++++++++++++++++
 5 files changed, 418 insertions(+), 1 deletion(-)
 create mode 100644 drivers/i2c/busses/i2c-designware-slave.c

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index c01cf3508d1c2..c98c8e51c4482 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -483,14 +483,26 @@ config I2C_DESIGNWARE_CORE
 config I2C_DESIGNWARE_PLATFORM
 	tristate "Synopsys DesignWare Platform"
 	select I2C_DESIGNWARE_CORE
+	select I2C_DESIGNWARE_SLAVE
 	depends on (ACPI && COMMON_CLK) || !ACPI
 	help
 	  If you say yes to this option, support will be included for the
-	  Synopsys DesignWare I2C adapter. Only master mode is supported.
+	  Synopsys DesignWare I2C adapter.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-designware-platform.
 
+config I2C_DESIGNWARE_SLAVE
+	bool "Synopsys DesignWare Slave"
+	select I2C_SLAVE
+	depends on I2C_DESIGNWARE_PLATFORM
+	help
+	  If you say yes to this option, support will be included for the
+	  Synopsys DesignWare I2C slave adapter.
+
+	  This is not a standalone module, this module compiles together with
+	  i2c-designware-core.
+
 config I2C_DESIGNWARE_PCI
 	tristate "Synopsys DesignWare PCI"
 	depends on PCI
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index a846511340484..1b2fc815a4d83 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -42,6 +42,9 @@ obj-$(CONFIG_I2C_CPM)		+= i2c-cpm.o
 obj-$(CONFIG_I2C_DAVINCI)	+= i2c-davinci.o
 obj-$(CONFIG_I2C_DESIGNWARE_CORE)	+= i2c-designware-core.o
 i2c-designware-core-objs := i2c-designware-common.o i2c-designware-master.o
+ifeq ($(CONFIG_I2C_DESIGNWARE_SLAVE),y)
+i2c-designware-core-objs += i2c-designware-slave.o
+endif
 obj-$(CONFIG_I2C_DESIGNWARE_PLATFORM)	+= i2c-designware-platform.o
 i2c-designware-platform-objs := i2c-designware-platdrv.o
 i2c-designware-platform-$(CONFIG_I2C_DESIGNWARE_BAYTRAIL) += i2c-designware-baytrail.o
diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
index 0d0ccb73f0e66..d1a69372432fb 100644
--- a/drivers/i2c/busses/i2c-designware-common.c
+++ b/drivers/i2c/busses/i2c-designware-common.c
@@ -56,6 +56,12 @@ static char *abort_sources[] = {
 		"trying to use disabled adapter",
 	[ARB_LOST] =
 		"lost arbitration",
+	[ABRT_SLAVE_FLUSH_TXFIFO] =
+		"read command so flush old data in the TX FIFO",
+	[ABRT_SLAVE_ARBLOST] =
+		"slave lost the bus while transmitting data to a remote master",
+	[ABRT_SLAVE_RD_INTX] =
+		"incorrect slave-transmitter mode configuration",
 };
 
 u32 dw_readl(struct dw_i2c_dev *dev, int offset)
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 76807eafda537..64a39a9834103 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -305,9 +305,11 @@ u32 i2c_dw_func(struct i2c_adapter *adap);
 void i2c_dw_disable(struct dw_i2c_dev *dev);
 void i2c_dw_disable_int(struct dw_i2c_dev *dev);
 int i2c_dw_init(struct dw_i2c_dev *dev);
+int i2c_dw_init_slave(struct dw_i2c_dev *dev);
 
 extern u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev);
 extern int i2c_dw_probe(struct dw_i2c_dev *dev);
+extern int i2c_dw_probe_slave(struct dw_i2c_dev *dev);
 
 #if IS_ENABLED(CONFIG_I2C_DESIGNWARE_BAYTRAIL)
 extern int i2c_dw_probe_lock_support(struct dw_i2c_dev *dev);
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
new file mode 100644
index 0000000000000..4012c74e9785a
--- /dev/null
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -0,0 +1,394 @@
+/*
+ * Synopsys DesignWare I2C adapter driver (slave only).
+ *
+ * Based on the Synopsys DesignWare I2C adapter driver (master).
+ *
+ * Copyright (C) 2016 Synopsys Inc.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * ----------------------------------------------------------------------------
+ *
+ */
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+
+#include "i2c-designware-core.h"
+
+static void i2c_dw_configure_fifo_slave(struct dw_i2c_dev *dev)
+{
+	/* Configure Tx/Rx FIFO threshold levels. */
+	dw_writel(dev, 0, DW_IC_TX_TL);
+	dw_writel(dev, 0, DW_IC_RX_TL);
+
+	/* Configure the I2C slave. */
+	dw_writel(dev, dev->slave_cfg, DW_IC_CON);
+	dw_writel(dev, DW_IC_INTR_SLAVE_MASK, DW_IC_INTR_MASK);
+}
+
+/**
+ * i2c_dw_init_slave() - Initialize the designware i2c slave hardware
+ * @dev: device private data
+ *
+ * This function configures and enables the I2C in slave mode.
+ * This function is called during I2C init function, and in case of timeout at
+ * run time.
+ */
+int i2c_dw_init_slave(struct dw_i2c_dev *dev)
+{
+	u32 sda_falling_time, scl_falling_time;
+	u32 reg, comp_param1;
+	u32 hcnt, lcnt;
+	int ret;
+
+	ret = i2c_dw_acquire_lock(dev);
+	if (ret)
+		return ret;
+
+	reg = dw_readl(dev, DW_IC_COMP_TYPE);
+	if (reg == ___constant_swab32(DW_IC_COMP_TYPE_VALUE)) {
+		/* Configure register endianness access. */
+		dev->flags |= ACCESS_SWAP;
+	} else if (reg == (DW_IC_COMP_TYPE_VALUE & 0x0000ffff)) {
+		/* Configure register access mode 16bit. */
+		dev->flags |= ACCESS_16BIT;
+	} else if (reg != DW_IC_COMP_TYPE_VALUE) {
+		dev_err(dev->dev,
+			"Unknown Synopsys component type: 0x%08x\n", reg);
+		i2c_dw_release_lock(dev);
+		return -ENODEV;
+	}
+
+	comp_param1 = dw_readl(dev, DW_IC_COMP_PARAM_1);
+
+	/* Disable the adapter. */
+	__i2c_dw_enable_and_wait(dev, false);
+
+	/* Set standard and fast speed deviders for high/low periods. */
+	sda_falling_time = dev->sda_falling_time ?: 300; /* ns */
+	scl_falling_time = dev->scl_falling_time ?: 300; /* ns */
+
+	/* Set SCL timing parameters for standard-mode. */
+	if (dev->ss_hcnt && dev->ss_lcnt) {
+		hcnt = dev->ss_hcnt;
+		lcnt = dev->ss_lcnt;
+	} else {
+		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
+				       4000,	/* tHD;STA = tHIGH = 4.0 us */
+				       sda_falling_time,
+				       0,	/* 0: DW default, 1: Ideal */
+				       0);	/* No offset */
+		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
+				       4700,	/* tLOW = 4.7 us */
+				       scl_falling_time,
+				       0);	/* No offset */
+	}
+	dw_writel(dev, hcnt, DW_IC_SS_SCL_HCNT);
+	dw_writel(dev, lcnt, DW_IC_SS_SCL_LCNT);
+	dev_dbg(dev->dev, "Standard-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
+
+	/* Set SCL timing parameters for fast-mode or fast-mode plus. */
+	if ((dev->clk_freq == 1000000) && dev->fp_hcnt && dev->fp_lcnt) {
+		hcnt = dev->fp_hcnt;
+		lcnt = dev->fp_lcnt;
+	} else if (dev->fs_hcnt && dev->fs_lcnt) {
+		hcnt = dev->fs_hcnt;
+		lcnt = dev->fs_lcnt;
+	} else {
+		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
+				       600,	/* tHD;STA = tHIGH = 0.6 us */
+				       sda_falling_time,
+				       0,	/* 0: DW default, 1: Ideal */
+				       0);	/* No offset */
+		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
+				       1300,	/* tLOW = 1.3 us */
+				       scl_falling_time,
+				       0);	/* No offset */
+	}
+	dw_writel(dev, hcnt, DW_IC_FS_SCL_HCNT);
+	dw_writel(dev, lcnt, DW_IC_FS_SCL_LCNT);
+	dev_dbg(dev->dev, "Fast-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
+
+	if ((dev->slave_cfg & DW_IC_CON_SPEED_MASK) ==
+		DW_IC_CON_SPEED_HIGH) {
+		if ((comp_param1 & DW_IC_COMP_PARAM_1_SPEED_MODE_MASK)
+			!= DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH) {
+			dev_err(dev->dev, "High Speed not supported!\n");
+			dev->slave_cfg &= ~DW_IC_CON_SPEED_MASK;
+			dev->slave_cfg |= DW_IC_CON_SPEED_FAST;
+		} else if (dev->hs_hcnt && dev->hs_lcnt) {
+			hcnt = dev->hs_hcnt;
+			lcnt = dev->hs_lcnt;
+			dw_writel(dev, hcnt, DW_IC_HS_SCL_HCNT);
+			dw_writel(dev, lcnt, DW_IC_HS_SCL_LCNT);
+			dev_dbg(dev->dev, "HighSpeed-mode HCNT:LCNT = %d:%d\n",
+				hcnt, lcnt);
+		}
+	}
+
+	/* Configure SDA Hold Time if required. */
+	reg = dw_readl(dev, DW_IC_COMP_VERSION);
+	if (reg >= DW_IC_SDA_HOLD_MIN_VERS) {
+		if (!dev->sda_hold_time) {
+			/* Keep previous hold time setting if no one set it. */
+			dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD);
+		}
+		/*
+		 * Workaround for avoiding TX arbitration lost in case I2C
+		 * slave pulls SDA down "too quickly" after falling egde of
+		 * SCL by enabling non-zero SDA RX hold. Specification says it
+		 * extends incoming SDA low to high transition while SCL is
+		 * high but it apprears to help also above issue.
+		 */
+		if (!(dev->sda_hold_time & DW_IC_SDA_HOLD_RX_MASK))
+			dev->sda_hold_time |= 1 << DW_IC_SDA_HOLD_RX_SHIFT;
+		dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
+	} else {
+		dev_warn(dev->dev,
+			 "Hardware too old to adjust SDA hold time.\n");
+	}
+
+	i2c_dw_configure_fifo_slave(dev);
+	i2c_dw_release_lock(dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(i2c_dw_init_slave);
+
+static int i2c_dw_reg_slave(struct i2c_client *slave)
+{
+	struct dw_i2c_dev *dev = i2c_get_adapdata(slave->adapter);
+
+	if (dev->slave)
+		return -EBUSY;
+	if (slave->flags & I2C_CLIENT_TEN)
+		return -EAFNOSUPPORT;
+	/*
+	 * Set slave address in the IC_SAR register,
+	 * the address to which the DW_apb_i2c responds.
+	 */
+	__i2c_dw_enable(dev, false);
+	dw_writel(dev, slave->addr, DW_IC_SAR);
+	dev->slave = slave;
+
+	__i2c_dw_enable(dev, true);
+
+	dev->cmd_err = 0;
+	dev->msg_write_idx = 0;
+	dev->msg_read_idx = 0;
+	dev->msg_err = 0;
+	dev->status = STATUS_IDLE;
+	dev->abort_source = 0;
+	dev->rx_outstanding = 0;
+
+	return 0;
+}
+
+static int i2c_dw_unreg_slave(struct i2c_client *slave)
+{
+	struct dw_i2c_dev *dev = i2c_get_adapdata(slave->adapter);
+
+	dev->disable_int(dev);
+	dev->disable(dev);
+	dev->slave = NULL;
+
+	return 0;
+}
+
+static u32 i2c_dw_read_clear_intrbits_slave(struct dw_i2c_dev *dev)
+{
+	u32 stat;
+
+	/*
+	 * The IC_INTR_STAT register just indicates "enabled" interrupts.
+	 * Ths unmasked raw version of interrupt status bits are available
+	 * in the IC_RAW_INTR_STAT register.
+	 *
+	 * That is,
+	 *   stat = dw_readl(IC_INTR_STAT);
+	 * equals to,
+	 *   stat = dw_readl(IC_RAW_INTR_STAT) & dw_readl(IC_INTR_MASK);
+	 *
+	 * The raw version might be useful for debugging purposes.
+	 */
+	stat = dw_readl(dev, DW_IC_INTR_STAT);
+
+	/*
+	 * Do not use the IC_CLR_INTR register to clear interrupts, or
+	 * you'll miss some interrupts, triggered during the period from
+	 * dw_readl(IC_INTR_STAT) to dw_readl(IC_CLR_INTR).
+	 *
+	 * Instead, use the separately-prepared IC_CLR_* registers.
+	 */
+	if (stat & DW_IC_INTR_TX_ABRT)
+		dw_readl(dev, DW_IC_CLR_TX_ABRT);
+	if (stat & DW_IC_INTR_RX_UNDER)
+		dw_readl(dev, DW_IC_CLR_RX_UNDER);
+	if (stat & DW_IC_INTR_RX_OVER)
+		dw_readl(dev, DW_IC_CLR_RX_OVER);
+	if (stat & DW_IC_INTR_TX_OVER)
+		dw_readl(dev, DW_IC_CLR_TX_OVER);
+	if (stat & DW_IC_INTR_RX_DONE)
+		dw_readl(dev, DW_IC_CLR_RX_DONE);
+	if (stat & DW_IC_INTR_ACTIVITY)
+		dw_readl(dev, DW_IC_CLR_ACTIVITY);
+	if (stat & DW_IC_INTR_STOP_DET)
+		dw_readl(dev, DW_IC_CLR_STOP_DET);
+	if (stat & DW_IC_INTR_START_DET)
+		dw_readl(dev, DW_IC_CLR_START_DET);
+	if (stat & DW_IC_INTR_GEN_CALL)
+		dw_readl(dev, DW_IC_CLR_GEN_CALL);
+
+	return stat;
+}
+
+/*
+ * Interrupt service routine. This gets called whenever an I2C slave interrupt
+ * occurs.
+ */
+
+static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
+{
+	u32 raw_stat, stat, enabled;
+	u8 val, slave_activity;
+
+	stat = dw_readl(dev, DW_IC_INTR_STAT);
+	enabled = dw_readl(dev, DW_IC_ENABLE);
+	raw_stat = dw_readl(dev, DW_IC_RAW_INTR_STAT);
+	slave_activity = ((dw_readl(dev, DW_IC_STATUS) &
+		DW_IC_STATUS_SLAVE_ACTIVITY) >> 6);
+
+	if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY))
+		return 0;
+
+	dev_dbg(dev->dev,
+		"%#x STAUTS SLAVE_ACTTVITY=%#x : RAW_INTR_STAT=%#x : INTR_STAT=%#x\n",
+		enabled, slave_activity, raw_stat, stat);
+
+	if ((stat & DW_IC_INTR_RX_FULL) && (stat & DW_IC_INTR_STOP_DET))
+		i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED, &val);
+
+	if (stat & DW_IC_INTR_RD_REQ) {
+		if (slave_activity) {
+			if (stat & DW_IC_INTR_RX_FULL) {
+				val = dw_readl(dev, DW_IC_DATA_CMD);
+
+				if (!i2c_slave_event(dev->slave,
+						     I2C_SLAVE_WRITE_RECEIVED,
+						     &val)) {
+					dev_vdbg(dev->dev, "Byte %X acked!",
+						 val);
+				}
+				dw_readl(dev, DW_IC_CLR_RD_REQ);
+				stat = i2c_dw_read_clear_intrbits_slave(dev);
+			} else {
+				dw_readl(dev, DW_IC_CLR_RD_REQ);
+				dw_readl(dev, DW_IC_CLR_RX_UNDER);
+				stat = i2c_dw_read_clear_intrbits_slave(dev);
+			}
+			if (!i2c_slave_event(dev->slave,
+					     I2C_SLAVE_READ_REQUESTED,
+					     &val))
+				dw_writel(dev, val, DW_IC_DATA_CMD);
+		}
+	}
+
+	if (stat & DW_IC_INTR_RX_DONE) {
+		if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_PROCESSED,
+				     &val))
+			dw_readl(dev, DW_IC_CLR_RX_DONE);
+
+		i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val);
+		stat = i2c_dw_read_clear_intrbits_slave(dev);
+		return 1;
+	}
+
+	if (stat & DW_IC_INTR_RX_FULL) {
+		val = dw_readl(dev, DW_IC_DATA_CMD);
+		if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED,
+				     &val))
+			dev_vdbg(dev->dev, "Byte %X acked!", val);
+	} else {
+		i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val);
+		stat = i2c_dw_read_clear_intrbits_slave(dev);
+	}
+
+	return 1;
+}
+
+static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id)
+{
+	struct dw_i2c_dev *dev = dev_id;
+	int ret;
+
+	i2c_dw_read_clear_intrbits_slave(dev);
+	ret = i2c_dw_irq_handler_slave(dev);
+	if (ret > 0)
+		complete(&dev->cmd_complete);
+
+	return IRQ_RETVAL(ret);
+}
+
+static struct i2c_algorithm i2c_dw_algo = {
+	.functionality = i2c_dw_func,
+	.reg_slave = i2c_dw_reg_slave,
+	.unreg_slave = i2c_dw_unreg_slave,
+};
+
+int i2c_dw_probe_slave(struct dw_i2c_dev *dev)
+{
+	struct i2c_adapter *adap = &dev->adapter;
+	int ret;
+
+	init_completion(&dev->cmd_complete);
+
+	dev->init = i2c_dw_init_slave;
+	dev->disable = i2c_dw_disable;
+	dev->disable_int = i2c_dw_disable_int;
+
+	ret = dev->init(dev);
+	if (ret)
+		return ret;
+
+	snprintf(adap->name, sizeof(adap->name),
+		 "Synopsys DesignWare I2C Slave adapter");
+	adap->retries = 3;
+	adap->algo = &i2c_dw_algo;
+	adap->dev.parent = dev->dev;
+	i2c_set_adapdata(adap, dev);
+
+	ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr_slave,
+			       IRQF_SHARED, dev_name(dev->dev), dev);
+	if (ret) {
+		dev_err(dev->dev, "failure requesting irq %i: %d\n",
+			dev->irq, ret);
+		return ret;
+	}
+
+	ret = i2c_add_numbered_adapter(adap);
+	if (ret)
+		dev_err(dev->dev, "failure adding adapter: %d\n", ret);
+	pm_runtime_put_noidle(dev->dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_dw_probe_slave);
+
+MODULE_AUTHOR("Luis Oliveira <lolivei@synopsys.com>");
+MODULE_DESCRIPTION("Synopsys DesignWare I2C bus slave adapter");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 5b6d721b266acaef411520e28066e4624c6619e7 Mon Sep 17 00:00:00 2001
From: Luis Oliveira <Luis.Oliveira@synopsys.com>
Date: Thu, 22 Jun 2017 11:17:33 +0100
Subject: [PATCH 0447/1958] i2c: designware: enable SLAVE in platform module

- Slave mode selected in platform module if the support is detected in
  the DT.

Signed-off-by: Luis Oliveira <lolivei@synopsys.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-core.h    |  2 +
 drivers/i2c/busses/i2c-designware-platdrv.c | 41 ++++++++++++++++++---
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 64a39a9834103..7bfbcc62f6c51 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -228,6 +228,7 @@
  * @disable: function to disable the controller
  * @disable_int: function to disable all interrupts
  * @init: function to initialize the I2C hardware
+ * @mode: operation mode - DW_IC_MASTER or DW_IC_SLAVE
  *
  * HCNT and LCNT parameters can be used if the platform knows more accurate
  * values than the one computed based only on the input clock frequency.
@@ -282,6 +283,7 @@ struct dw_i2c_dev {
 	void			(*disable)(struct dw_i2c_dev *dev);
 	void			(*disable_int)(struct dw_i2c_dev *dev);
 	int			(*init)(struct dw_i2c_dev *dev);
+	int			mode;
 };
 
 #define ACCESS_SWAP		0x00000001
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 1f38c807be5f5..2ea6d0d25a01a 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -1,5 +1,5 @@
 /*
- * Synopsys DesignWare I2C adapter driver (master only).
+ * Synopsys DesignWare I2C adapter driver.
  *
  * Based on the TI DAVINCI I2C adapter driver.
  *
@@ -174,9 +174,13 @@ static inline int dw_i2c_acpi_configure(struct platform_device *pdev)
 
 static void i2c_dw_configure_master(struct dw_i2c_dev *dev)
 {
+	dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
+
 	dev->master_cfg = DW_IC_CON_MASTER | DW_IC_CON_SLAVE_DISABLE |
 			  DW_IC_CON_RESTART_EN;
 
+	dev->mode = DW_IC_MASTER;
+
 	switch (dev->clk_freq) {
 	case 100000:
 		dev->master_cfg |= DW_IC_CON_SPEED_STD;
@@ -189,6 +193,28 @@ static void i2c_dw_configure_master(struct dw_i2c_dev *dev)
 	}
 }
 
+static void i2c_dw_configure_slave(struct dw_i2c_dev *dev)
+{
+	dev->functionality = I2C_FUNC_SLAVE | DW_IC_DEFAULT_FUNCTIONALITY;
+
+	dev->slave_cfg = DW_IC_CON_RX_FIFO_FULL_HLD_CTRL |
+			 DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED |
+			 DW_IC_CON_SPEED_FAST;
+
+	dev->mode = DW_IC_SLAVE;
+
+	switch (dev->clk_freq) {
+	case 100000:
+		dev->slave_cfg |= DW_IC_CON_SPEED_STD;
+		break;
+	case 3400000:
+		dev->slave_cfg |= DW_IC_CON_SPEED_HIGH;
+		break;
+	default:
+		dev->slave_cfg |= DW_IC_CON_SPEED_FAST;
+	}
+}
+
 static int i2c_dw_plat_prepare_clk(struct dw_i2c_dev *i_dev, bool prepare)
 {
 	if (IS_ERR(i_dev->clk))
@@ -302,9 +328,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	if (ret)
 		goto exit_reset;
 
-	dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
-
-	i2c_dw_configure_master(dev);
+	if (i2c_detect_slave_mode(&pdev->dev))
+		i2c_dw_configure_slave(dev);
+	else
+		i2c_dw_configure_master(dev);
 
 	dev->clk = devm_clk_get(&pdev->dev, NULL);
 	if (!i2c_dw_plat_prepare_clk(dev, true)) {
@@ -333,7 +360,11 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 		pm_runtime_enable(&pdev->dev);
 	}
 
-	ret = i2c_dw_probe(dev);
+	if (dev->mode == DW_IC_SLAVE)
+		ret = i2c_dw_probe_slave(dev);
+	else
+		ret = i2c_dw_probe(dev);
+
 	if (ret)
 		goto exit_probe;
 
-- 
GitLab


From 31965ef34802f49903bba06dd7c3b96a2e2ed4e4 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 20 Jun 2017 17:54:46 -0700
Subject: [PATCH 0448/1958] xfs: make errortag a per-mountpoint structure

Remove the xfs_etest structure in favor of a per-mountpoint structure.
This will give us the flexibility to set as many error injection points
as we want, and later enable us to set up sysfs knobs to set the trigger
frequency as we wish.  This comes at a cost of higher memory use, but
unti we hit 1024 injection points (we're at 29) or a lot of mounts this
shouldn't be a huge issue.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/xfs_error.c | 154 +++++++++++++++++++++++----------------------
 fs/xfs/xfs_error.h |  25 ++++----
 fs/xfs/xfs_ioctl.c |   4 +-
 fs/xfs/xfs_mount.c |  10 ++-
 fs/xfs/xfs_mount.h |   7 +++
 5 files changed, 111 insertions(+), 89 deletions(-)

diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed7ee4e8af738..52f75bc1abacb 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -25,100 +25,106 @@
 
 #ifdef DEBUG
 
-int	xfs_etest[XFS_NUM_INJECT_ERROR];
-int64_t	xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
-char *	xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
-int	xfs_error_test_active;
+static unsigned int xfs_errortag_random_default[] = {
+	XFS_RANDOM_DEFAULT,
+	XFS_RANDOM_IFLUSH_1,
+	XFS_RANDOM_IFLUSH_2,
+	XFS_RANDOM_IFLUSH_3,
+	XFS_RANDOM_IFLUSH_4,
+	XFS_RANDOM_IFLUSH_5,
+	XFS_RANDOM_IFLUSH_6,
+	XFS_RANDOM_DA_READ_BUF,
+	XFS_RANDOM_BTREE_CHECK_LBLOCK,
+	XFS_RANDOM_BTREE_CHECK_SBLOCK,
+	XFS_RANDOM_ALLOC_READ_AGF,
+	XFS_RANDOM_IALLOC_READ_AGI,
+	XFS_RANDOM_ITOBP_INOTOBP,
+	XFS_RANDOM_IUNLINK,
+	XFS_RANDOM_IUNLINK_REMOVE,
+	XFS_RANDOM_DIR_INO_VALIDATE,
+	XFS_RANDOM_BULKSTAT_READ_CHUNK,
+	XFS_RANDOM_IODONE_IOERR,
+	XFS_RANDOM_STRATREAD_IOERR,
+	XFS_RANDOM_STRATCMPL_IOERR,
+	XFS_RANDOM_DIOWRITE_IOERR,
+	XFS_RANDOM_BMAPIFORMAT,
+	XFS_RANDOM_FREE_EXTENT,
+	XFS_RANDOM_RMAP_FINISH_ONE,
+	XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE,
+	XFS_RANDOM_REFCOUNT_FINISH_ONE,
+	XFS_RANDOM_BMAP_FINISH_ONE,
+	XFS_RANDOM_AG_RESV_CRITICAL,
+};
 
 int
-xfs_error_test(int error_tag, int *fsidp, char *expression,
-	       int line, char *file, unsigned long randfactor)
+xfs_errortag_init(
+	struct xfs_mount	*mp)
 {
-	int i;
-	int64_t fsid;
+	mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
+			KM_SLEEP | KM_MAYFAIL);
+	if (!mp->m_errortag)
+		return -ENOMEM;
+	return 0;
+}
 
-	if (prandom_u32() % randfactor)
-		return 0;
+void
+xfs_errortag_del(
+	struct xfs_mount	*mp)
+{
+	kmem_free(mp->m_errortag);
+}
 
-	memcpy(&fsid, fsidp, sizeof(xfs_fsid_t));
+bool
+xfs_errortag_test(
+	struct xfs_mount	*mp,
+	const char		*expression,
+	const char		*file,
+	int			line,
+	unsigned int		error_tag)
+{
+	unsigned int		randfactor;
 
-	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
-		if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
-			xfs_warn(NULL,
-	"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
-				expression, file, line, xfs_etest_fsname[i]);
-			return 1;
-		}
-	}
+	ASSERT(error_tag < XFS_ERRTAG_MAX);
+	randfactor = mp->m_errortag[error_tag];
+	if (!randfactor || prandom_u32() % randfactor)
+		return false;
 
-	return 0;
+	xfs_warn_ratelimited(mp,
+"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
+			expression, file, line, mp->m_fsname);
+	return true;
 }
 
 int
-xfs_errortag_add(unsigned int error_tag, xfs_mount_t *mp)
+xfs_errortag_set(
+	struct xfs_mount	*mp,
+	unsigned int		error_tag,
+	unsigned int		tag_value)
 {
-	int i;
-	int len;
-	int64_t fsid;
-
 	if (error_tag >= XFS_ERRTAG_MAX)
 		return -EINVAL;
 
-	memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
-
-	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
-		if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
-			xfs_warn(mp, "error tag #%d on", error_tag);
-			return 0;
-		}
-	}
-
-	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
-		if (xfs_etest[i] == 0) {
-			xfs_warn(mp, "Turned on XFS error tag #%d",
-				error_tag);
-			xfs_etest[i] = error_tag;
-			xfs_etest_fsid[i] = fsid;
-			len = strlen(mp->m_fsname);
-			xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
-			strcpy(xfs_etest_fsname[i], mp->m_fsname);
-			xfs_error_test_active++;
-			return 0;
-		}
-	}
-
-	xfs_warn(mp, "error tag overflow, too many turned on");
-
-	return 1;
+	mp->m_errortag[error_tag] = tag_value;
+	return 0;
 }
 
 int
-xfs_errortag_clearall(xfs_mount_t *mp, int loud)
+xfs_errortag_add(
+	struct xfs_mount	*mp,
+	unsigned int		error_tag)
 {
-	int64_t fsid;
-	int cleared = 0;
-	int i;
-
-	memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
-
-
-	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
-		if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) &&
-		     xfs_etest[i] != 0) {
-			cleared = 1;
-			xfs_warn(mp, "Clearing XFS error tag #%d",
-				xfs_etest[i]);
-			xfs_etest[i] = 0;
-			xfs_etest_fsid[i] = 0LL;
-			kmem_free(xfs_etest_fsname[i]);
-			xfs_etest_fsname[i] = NULL;
-			xfs_error_test_active--;
-		}
-	}
+	if (error_tag >= XFS_ERRTAG_MAX)
+		return -EINVAL;
 
-	if (loud || cleared)
-		xfs_warn(mp, "Cleared all XFS error tags for filesystem");
+	return xfs_errortag_set(mp, error_tag,
+			xfs_errortag_random_default[error_tag]);
+}
 
+int
+xfs_errortag_clearall(
+	struct xfs_mount	*mp)
+{
+	memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX);
 	return 0;
 }
 #endif /* DEBUG */
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 05f8666733a09..b4316d39e1ca5 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -131,21 +131,24 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_RANDOM_AG_RESV_CRITICAL			4
 
 #ifdef DEBUG
-extern int xfs_error_test_active;
-extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
-
-#define	XFS_NUM_INJECT_ERROR				10
+extern int xfs_errortag_init(struct xfs_mount *mp);
+extern void xfs_errortag_del(struct xfs_mount *mp);
+extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
+		const char *file, int line, unsigned int error_tag);
 #define XFS_TEST_ERROR(expr, mp, tag, rf)		\
-	((expr) || (xfs_error_test_active && \
-	 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
-			(rf))))
+	((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
 
-extern int xfs_errortag_add(unsigned int error_tag, struct xfs_mount *mp);
-extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
+extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
+		unsigned int tag_value);
+extern int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
+extern int xfs_errortag_clearall(struct xfs_mount *mp);
 #else
+#define xfs_errortag_init(mp)			(0)
+#define xfs_errortag_del(mp)
 #define XFS_TEST_ERROR(expr, mp, tag, rf)	(expr)
-#define xfs_errortag_add(tag, mp)		(ENOSYS)
-#define xfs_errortag_clearall(mp, loud)		(ENOSYS)
+#define xfs_errortag_set(mp, tag, val)		(ENOSYS)
+#define xfs_errortag_add(mp, tag)		(ENOSYS)
+#define xfs_errortag_clearall(mp)		(ENOSYS)
 #endif /* DEBUG */
 
 /*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8ffe4eac0b483..9c0c7a920304e 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -2037,14 +2037,14 @@ xfs_file_ioctl(
 		if (copy_from_user(&in, arg, sizeof(in)))
 			return -EFAULT;
 
-		return xfs_errortag_add(in.errtag, mp);
+		return xfs_errortag_add(mp, in.errtag);
 	}
 
 	case XFS_IOC_ERROR_CLEARALL:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
-		return xfs_errortag_clearall(mp, 1);
+		return xfs_errortag_clearall(mp);
 
 	case XFS_IOC_FREE_EOFBLOCKS: {
 		struct xfs_fs_eofblocks eofb;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index cc6789d352320..1a98c35e1ccf5 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -720,10 +720,13 @@ xfs_mountfs(
 	if (error)
 		goto out_del_stats;
 
+	error = xfs_errortag_init(mp);
+	if (error)
+		goto out_remove_error_sysfs;
 
 	error = xfs_uuid_mount(mp);
 	if (error)
-		goto out_remove_error_sysfs;
+		goto out_remove_errortag;
 
 	/*
 	 * Set the minimum read and write sizes
@@ -1042,6 +1045,8 @@ xfs_mountfs(
 	xfs_da_unmount(mp);
  out_remove_uuid:
 	xfs_uuid_unmount(mp);
+ out_remove_errortag:
+	xfs_errortag_del(mp);
  out_remove_error_sysfs:
 	xfs_error_sysfs_del(mp);
  out_del_stats:
@@ -1145,10 +1150,11 @@ xfs_unmountfs(
 	xfs_uuid_unmount(mp);
 
 #if defined(DEBUG)
-	xfs_errortag_clearall(mp, 0);
+	xfs_errortag_clearall(mp);
 #endif
 	xfs_free_perag(mp);
 
+	xfs_errortag_del(mp);
 	xfs_error_sysfs_del(mp);
 	xfs_sysfs_del(&mp->m_stats.xs_kobj);
 	xfs_sysfs_del(&mp->m_kobj);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 305d95394e2d1..e002ac52a4e69 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -198,6 +198,13 @@ typedef struct xfs_mount {
 
 	bool			m_fail_unmount;
 #ifdef DEBUG
+	/*
+	 * Frequency with which errors are injected.  Replaces xfs_etest; the
+	 * value stored in here is the inverse of the frequency with which the
+	 * error triggers.  1 = always, 2 = half the time, etc.
+	 */
+	unsigned int		*m_errortag;
+
 	/*
 	 * DEBUG mode instrumentation to test and/or trigger delayed allocation
 	 * block killing in the event of failed writes. When enabled, all
-- 
GitLab


From c684010115221978b17968dbddc8e31a09da85e7 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 20 Jun 2017 17:54:47 -0700
Subject: [PATCH 0449/1958] xfs: expose errortag knobs via sysfs

Creates a /sys/fs/xfs/$dev/errortag/ directory to control the errortag
values directly.  This enables us to control the randomness values,
rather than having to accept the defaults.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/xfs_error.c | 156 ++++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_error.h |   1 +
 fs/xfs/xfs_mount.h |   1 +
 3 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 52f75bc1abacb..e2278af6aed1e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -22,6 +22,7 @@
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
+#include "xfs_sysfs.h"
 
 #ifdef DEBUG
 
@@ -56,6 +57,145 @@ static unsigned int xfs_errortag_random_default[] = {
 	XFS_RANDOM_AG_RESV_CRITICAL,
 };
 
+struct xfs_errortag_attr {
+	struct attribute	attr;
+	unsigned int		tag;
+};
+
+static inline struct xfs_errortag_attr *
+to_attr(struct attribute *attr)
+{
+	return container_of(attr, struct xfs_errortag_attr, attr);
+}
+
+static inline struct xfs_mount *
+to_mp(struct kobject *kobject)
+{
+	struct xfs_kobj *kobj = to_kobj(kobject);
+
+	return container_of(kobj, struct xfs_mount, m_errortag_kobj);
+}
+
+STATIC ssize_t
+xfs_errortag_attr_store(
+	struct kobject		*kobject,
+	struct attribute	*attr,
+	const char		*buf,
+	size_t			count)
+{
+	struct xfs_mount	*mp = to_mp(kobject);
+	struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+	int			ret;
+	unsigned int		val;
+
+	if (strcmp(buf, "default") == 0) {
+		val = xfs_errortag_random_default[xfs_attr->tag];
+	} else {
+		ret = kstrtouint(buf, 0, &val);
+		if (ret)
+			return ret;
+	}
+
+	ret = xfs_errortag_set(mp, xfs_attr->tag, val);
+	if (ret)
+		return ret;
+	return count;
+}
+
+STATIC ssize_t
+xfs_errortag_attr_show(
+	struct kobject		*kobject,
+	struct attribute	*attr,
+	char			*buf)
+{
+	struct xfs_mount	*mp = to_mp(kobject);
+	struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+			xfs_errortag_get(mp, xfs_attr->tag));
+}
+
+static const struct sysfs_ops xfs_errortag_sysfs_ops = {
+	.show = xfs_errortag_attr_show,
+	.store = xfs_errortag_attr_store,
+};
+
+#define XFS_ERRORTAG_ATTR_RW(_name, _tag) \
+static struct xfs_errortag_attr xfs_errortag_attr_##_name = {		\
+	.attr = {.name = __stringify(_name),				\
+		 .mode = VERIFY_OCTAL_PERMISSIONS(S_IWUSR | S_IRUGO) },	\
+	.tag	= (_tag),						\
+}
+
+#define XFS_ERRORTAG_ATTR_LIST(_name) &xfs_errortag_attr_##_name.attr
+
+XFS_ERRORTAG_ATTR_RW(noerror,		XFS_ERRTAG_NOERROR);
+XFS_ERRORTAG_ATTR_RW(iflush1,		XFS_ERRTAG_IFLUSH_1);
+XFS_ERRORTAG_ATTR_RW(iflush2,		XFS_ERRTAG_IFLUSH_2);
+XFS_ERRORTAG_ATTR_RW(iflush3,		XFS_ERRTAG_IFLUSH_3);
+XFS_ERRORTAG_ATTR_RW(iflush4,		XFS_ERRTAG_IFLUSH_4);
+XFS_ERRORTAG_ATTR_RW(iflush5,		XFS_ERRTAG_IFLUSH_5);
+XFS_ERRORTAG_ATTR_RW(iflush6,		XFS_ERRTAG_IFLUSH_6);
+XFS_ERRORTAG_ATTR_RW(dareadbuf,		XFS_ERRTAG_DA_READ_BUF);
+XFS_ERRORTAG_ATTR_RW(btree_chk_lblk,	XFS_ERRTAG_BTREE_CHECK_LBLOCK);
+XFS_ERRORTAG_ATTR_RW(btree_chk_sblk,	XFS_ERRTAG_BTREE_CHECK_SBLOCK);
+XFS_ERRORTAG_ATTR_RW(readagf,		XFS_ERRTAG_ALLOC_READ_AGF);
+XFS_ERRORTAG_ATTR_RW(readagi,		XFS_ERRTAG_IALLOC_READ_AGI);
+XFS_ERRORTAG_ATTR_RW(itobp,		XFS_ERRTAG_ITOBP_INOTOBP);
+XFS_ERRORTAG_ATTR_RW(iunlink,		XFS_ERRTAG_IUNLINK);
+XFS_ERRORTAG_ATTR_RW(iunlinkrm,		XFS_ERRTAG_IUNLINK_REMOVE);
+XFS_ERRORTAG_ATTR_RW(dirinovalid,	XFS_ERRTAG_DIR_INO_VALIDATE);
+XFS_ERRORTAG_ATTR_RW(bulkstat,		XFS_ERRTAG_BULKSTAT_READ_CHUNK);
+XFS_ERRORTAG_ATTR_RW(logiodone,		XFS_ERRTAG_IODONE_IOERR);
+XFS_ERRORTAG_ATTR_RW(stratread,		XFS_ERRTAG_STRATREAD_IOERR);
+XFS_ERRORTAG_ATTR_RW(stratcmpl,		XFS_ERRTAG_STRATCMPL_IOERR);
+XFS_ERRORTAG_ATTR_RW(diowrite,		XFS_ERRTAG_DIOWRITE_IOERR);
+XFS_ERRORTAG_ATTR_RW(bmapifmt,		XFS_ERRTAG_BMAPIFORMAT);
+XFS_ERRORTAG_ATTR_RW(free_extent,	XFS_ERRTAG_FREE_EXTENT);
+XFS_ERRORTAG_ATTR_RW(rmap_finish_one,	XFS_ERRTAG_RMAP_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(refcount_continue_update,	XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE);
+XFS_ERRORTAG_ATTR_RW(refcount_finish_one,	XFS_ERRTAG_REFCOUNT_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(bmap_finish_one,	XFS_ERRTAG_BMAP_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(ag_resv_critical,	XFS_ERRTAG_AG_RESV_CRITICAL);
+
+static struct attribute *xfs_errortag_attrs[] = {
+	XFS_ERRORTAG_ATTR_LIST(noerror),
+	XFS_ERRORTAG_ATTR_LIST(iflush1),
+	XFS_ERRORTAG_ATTR_LIST(iflush2),
+	XFS_ERRORTAG_ATTR_LIST(iflush3),
+	XFS_ERRORTAG_ATTR_LIST(iflush4),
+	XFS_ERRORTAG_ATTR_LIST(iflush5),
+	XFS_ERRORTAG_ATTR_LIST(iflush6),
+	XFS_ERRORTAG_ATTR_LIST(dareadbuf),
+	XFS_ERRORTAG_ATTR_LIST(btree_chk_lblk),
+	XFS_ERRORTAG_ATTR_LIST(btree_chk_sblk),
+	XFS_ERRORTAG_ATTR_LIST(readagf),
+	XFS_ERRORTAG_ATTR_LIST(readagi),
+	XFS_ERRORTAG_ATTR_LIST(itobp),
+	XFS_ERRORTAG_ATTR_LIST(iunlink),
+	XFS_ERRORTAG_ATTR_LIST(iunlinkrm),
+	XFS_ERRORTAG_ATTR_LIST(dirinovalid),
+	XFS_ERRORTAG_ATTR_LIST(bulkstat),
+	XFS_ERRORTAG_ATTR_LIST(logiodone),
+	XFS_ERRORTAG_ATTR_LIST(stratread),
+	XFS_ERRORTAG_ATTR_LIST(stratcmpl),
+	XFS_ERRORTAG_ATTR_LIST(diowrite),
+	XFS_ERRORTAG_ATTR_LIST(bmapifmt),
+	XFS_ERRORTAG_ATTR_LIST(free_extent),
+	XFS_ERRORTAG_ATTR_LIST(rmap_finish_one),
+	XFS_ERRORTAG_ATTR_LIST(refcount_continue_update),
+	XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
+	XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
+	XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
+	NULL,
+};
+
+struct kobj_type xfs_errortag_ktype = {
+	.release = xfs_sysfs_release,
+	.sysfs_ops = &xfs_errortag_sysfs_ops,
+	.default_attrs = xfs_errortag_attrs,
+};
+
 int
 xfs_errortag_init(
 	struct xfs_mount	*mp)
@@ -64,13 +204,16 @@ xfs_errortag_init(
 			KM_SLEEP | KM_MAYFAIL);
 	if (!mp->m_errortag)
 		return -ENOMEM;
-	return 0;
+
+	return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+			       &mp->m_kobj, "errortag");
 }
 
 void
 xfs_errortag_del(
 	struct xfs_mount	*mp)
 {
+	xfs_sysfs_del(&mp->m_errortag_kobj);
 	kmem_free(mp->m_errortag);
 }
 
@@ -95,6 +238,17 @@ xfs_errortag_test(
 	return true;
 }
 
+int
+xfs_errortag_get(
+	struct xfs_mount	*mp,
+	unsigned int		error_tag)
+{
+	if (error_tag >= XFS_ERRTAG_MAX)
+		return -EINVAL;
+
+	return mp->m_errortag[error_tag];
+}
+
 int
 xfs_errortag_set(
 	struct xfs_mount	*mp,
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index b4316d39e1ca5..8915bdeb61280 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -138,6 +138,7 @@ extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
 #define XFS_TEST_ERROR(expr, mp, tag, rf)		\
 	((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
 
+extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
 extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
 		unsigned int tag_value);
 extern int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e002ac52a4e69..931e9fc21a1cb 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -204,6 +204,7 @@ typedef struct xfs_mount {
 	 * error triggers.  1 = always, 2 = half the time, etc.
 	 */
 	unsigned int		*m_errortag;
+	struct xfs_kobj		m_errortag_kobj;
 
 	/*
 	 * DEBUG mode instrumentation to test and/or trigger delayed allocation
-- 
GitLab


From 9e24cfd044853e0e46e7149b91b7bb09effb0a79 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 20 Jun 2017 17:54:47 -0700
Subject: [PATCH 0450/1958] xfs: remove unneeded parameter from XFS_TEST_ERROR

Since we moved the injected error frequency controls to the mountpoint,
we can get rid of the last argument to XFS_TEST_ERROR.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/libxfs/xfs_ag_resv.c   |  3 +--
 fs/xfs/libxfs/xfs_alloc.c     |  6 ++----
 fs/xfs/libxfs/xfs_bmap.c      | 13 ++++++-------
 fs/xfs/libxfs/xfs_btree.c     |  6 ++----
 fs/xfs/libxfs/xfs_dir2.c      |  3 +--
 fs/xfs/libxfs/xfs_ialloc.c    |  3 +--
 fs/xfs/libxfs/xfs_inode_buf.c |  3 +--
 fs/xfs/libxfs/xfs_refcount.c  |  6 ++----
 fs/xfs/libxfs/xfs_rmap.c      |  3 +--
 fs/xfs/xfs_error.h            |  4 ++--
 fs/xfs/xfs_inode.c            | 11 +++++------
 fs/xfs/xfs_iomap.c            |  2 +-
 fs/xfs/xfs_log.c              |  3 +--
 13 files changed, 26 insertions(+), 40 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 33db69be4832c..b008ff3250eba 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -111,8 +111,7 @@ xfs_ag_resv_critical(
 
 	/* Critically low if less than 10% or max btree height remains. */
 	return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
-			pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL,
-			XFS_RANDOM_AG_RESV_CRITICAL);
+			pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index fefa8daa1c36e..744dcaec34cca 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2454,8 +2454,7 @@ xfs_agf_read_verify(
 	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
 		xfs_buf_ioerror(bp, -EFSBADCRC);
 	else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
-				XFS_ERRTAG_ALLOC_READ_AGF,
-				XFS_RANDOM_ALLOC_READ_AGF))
+				XFS_ERRTAG_ALLOC_READ_AGF))
 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
 
 	if (bp->b_error)
@@ -2842,8 +2841,7 @@ xfs_free_extent(
 	ASSERT(type != XFS_AG_RESV_AGFL);
 
 	if (XFS_TEST_ERROR(false, mp,
-			XFS_ERRTAG_FREE_EXTENT,
-			XFS_RANDOM_FREE_EXTENT))
+			XFS_ERRTAG_FREE_EXTENT))
 		return -EIO;
 
 	error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 19480ed231a48..0a9880777c9c2 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3992,7 +3992,7 @@ xfs_bmapi_read(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
 	}
@@ -4473,7 +4473,7 @@ xfs_bmapi_write(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
 	}
@@ -4694,7 +4694,7 @@ xfs_bmapi_remap(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
 	}
@@ -6098,7 +6098,7 @@ xfs_bmap_shift_extents(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmap_shift_extents",
 				 XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
@@ -6250,7 +6250,7 @@ xfs_bmap_split_extent_at(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
 				 XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
@@ -6508,8 +6508,7 @@ xfs_bmap_finish_one(
 		return -EFSCORRUPTED;
 
 	if (XFS_TEST_ERROR(false, tp->t_mountp,
-			XFS_ERRTAG_BMAP_FINISH_ONE,
-			XFS_RANDOM_BMAP_FINISH_ONE))
+			XFS_ERRTAG_BMAP_FINISH_ONE))
 		return -EIO;
 
 	switch (type) {
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2f8075aa8725f..4da85fff69ad0 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -101,8 +101,7 @@ xfs_btree_check_lblock(
 			be64_to_cpu(block->bb_u.l.bb_rightsib)));
 
 	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
-			XFS_ERRTAG_BTREE_CHECK_LBLOCK,
-			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
+			XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
 		if (bp)
 			trace_xfs_btree_corrupt(bp, _RET_IP_);
 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
@@ -153,8 +152,7 @@ xfs_btree_check_sblock(
 		block->bb_u.s.bb_rightsib;
 
 	if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
-			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
-			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
+			XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
 		if (bp)
 			trace_xfs_btree_corrupt(bp, _RET_IP_);
 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 2f389d366e933..ccf9783fd3f08 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -218,8 +218,7 @@ xfs_dir_ino_validate(
 		agblkno != 0 &&
 		ioff < (1 << mp->m_sb.sb_inopblog) &&
 		XFS_AGINO_TO_INO(mp, agno, agino) == ino;
-	if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
-			XFS_RANDOM_DIR_INO_VALIDATE))) {
+	if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) {
 		xfs_warn(mp, "Invalid inode number 0x%Lx",
 				(unsigned long long) ino);
 		XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index c514fe98bbabc..ffd5a15d1bb6d 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2552,8 +2552,7 @@ xfs_agi_read_verify(
 	    !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
 		xfs_buf_ioerror(bp, -EFSBADCRC);
 	else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
-				XFS_ERRTAG_IALLOC_READ_AGI,
-				XFS_RANDOM_IALLOC_READ_AGI))
+				XFS_ERRTAG_IALLOC_READ_AGI))
 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
 
 	if (bp->b_error)
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 0c970cf7ab63f..378f8fbc91a77 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -105,8 +105,7 @@ xfs_inode_buf_verify(
 		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
 			xfs_dinode_good_version(mp, dip->di_version);
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
-						XFS_ERRTAG_ITOBP_INOTOBP,
-						XFS_RANDOM_ITOBP_INOTOBP))) {
+						XFS_ERRTAG_ITOBP_INOTOBP))) {
 			if (readahead) {
 				bp->b_flags &= ~XBF_DONE;
 				xfs_buf_ioerror(bp, -EIO);
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index e17016163542d..900ea231f9a3d 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -805,8 +805,7 @@ xfs_refcount_still_have_space(
 	 */
 	if (cur->bc_private.a.priv.refc.nr_ops > 2 &&
 	    XFS_TEST_ERROR(false, cur->bc_mp,
-			XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE,
-			XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE))
+			XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
 		return false;
 
 	if (cur->bc_private.a.priv.refc.nr_ops == 0)
@@ -1068,8 +1067,7 @@ xfs_refcount_finish_one(
 			blockcount);
 
 	if (XFS_TEST_ERROR(false, mp,
-			XFS_ERRTAG_REFCOUNT_FINISH_ONE,
-			XFS_RANDOM_REFCOUNT_FINISH_ONE))
+			XFS_ERRTAG_REFCOUNT_FINISH_ONE))
 		return -EIO;
 
 	/*
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index eda275beebe0b..55c88a732690e 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2087,8 +2087,7 @@ xfs_rmap_finish_one(
 			startoff, blockcount, state);
 
 	if (XFS_TEST_ERROR(false, mp,
-			XFS_ERRTAG_RMAP_FINISH_ONE,
-			XFS_RANDOM_RMAP_FINISH_ONE))
+			XFS_ERRTAG_RMAP_FINISH_ONE))
 		return -EIO;
 
 	/*
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 8915bdeb61280..ae8935b90a93b 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -135,7 +135,7 @@ extern int xfs_errortag_init(struct xfs_mount *mp);
 extern void xfs_errortag_del(struct xfs_mount *mp);
 extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
 		const char *file, int line, unsigned int error_tag);
-#define XFS_TEST_ERROR(expr, mp, tag, rf)		\
+#define XFS_TEST_ERROR(expr, mp, tag)		\
 	((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
 
 extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
@@ -146,7 +146,7 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp);
 #else
 #define xfs_errortag_init(mp)			(0)
 #define xfs_errortag_del(mp)
-#define XFS_TEST_ERROR(expr, mp, tag, rf)	(expr)
+#define XFS_TEST_ERROR(expr, mp, tag)		(expr)
 #define xfs_errortag_set(mp, tag, val)		(ENOSYS)
 #define xfs_errortag_add(mp, tag)		(ENOSYS)
 #define xfs_errortag_clearall(mp)		(ENOSYS)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ffbfe7d8bc6d9..67bd5631166e2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3489,7 +3489,7 @@ xfs_iflush_int(
 	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
 
 	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
-			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
+			       mp, XFS_ERRTAG_IFLUSH_1)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
@@ -3499,7 +3499,7 @@ xfs_iflush_int(
 		if (XFS_TEST_ERROR(
 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
-		    mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
+		    mp, XFS_ERRTAG_IFLUSH_3)) {
 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 				"%s: Bad regular inode %Lu, ptr 0x%p",
 				__func__, ip->i_ino, ip);
@@ -3510,7 +3510,7 @@ xfs_iflush_int(
 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
 		    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
-		    mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
+		    mp, XFS_ERRTAG_IFLUSH_4)) {
 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 				"%s: Bad directory inode %Lu, ptr 0x%p",
 				__func__, ip->i_ino, ip);
@@ -3518,8 +3518,7 @@ xfs_iflush_int(
 		}
 	}
 	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
-				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
-				XFS_RANDOM_IFLUSH_5)) {
+				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: detected corrupt incore inode %Lu, "
 			"total extents = %d, nblocks = %Ld, ptr 0x%p",
@@ -3529,7 +3528,7 @@ xfs_iflush_int(
 		goto corrupt_out;
 	}
 	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
-				mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
+				mp, XFS_ERRTAG_IFLUSH_6)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf7304cf..304b79d681e41 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -543,7 +543,7 @@ xfs_file_iomap_begin_delay(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
 		error = -EFSCORRUPTED;
 		goto out_unlock;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 98b39cbae42dc..2d1112ee1f860 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1189,8 +1189,7 @@ xlog_iodone(xfs_buf_t *bp)
 	 * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
 	 * CRC errors into log recovery.
 	 */
-	if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
-			   XFS_RANDOM_IODONE_IOERR) ||
+	if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
 	    iclog->ic_state & XLOG_STATE_IOABORT) {
 		if (iclog->ic_state & XLOG_STATE_IOABORT)
 			iclog->ic_state &= ~XLOG_STATE_IOABORT;
-- 
GitLab


From f8c47250ba46eb221d1ac537266ac65bcf2866d5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 20 Jun 2017 17:54:48 -0700
Subject: [PATCH 0451/1958] xfs: convert drop_writes to use the errortag
 mechanism

We now have enhanced error injection that can control the frequency
with which errors happen, so convert drop_writes to use this.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/xfs_error.c |  3 +++
 fs/xfs/xfs_error.h | 12 +++++++++++-
 fs/xfs/xfs_iomap.c |  2 +-
 fs/xfs/xfs_mount.h | 24 ------------------------
 fs/xfs/xfs_sysfs.c | 42 ------------------------------------------
 5 files changed, 15 insertions(+), 68 deletions(-)

diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index e2278af6aed1e..a2f23d2bab168 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -55,6 +55,7 @@ static unsigned int xfs_errortag_random_default[] = {
 	XFS_RANDOM_REFCOUNT_FINISH_ONE,
 	XFS_RANDOM_BMAP_FINISH_ONE,
 	XFS_RANDOM_AG_RESV_CRITICAL,
+	XFS_RANDOM_DROP_WRITES,
 };
 
 struct xfs_errortag_attr {
@@ -157,6 +158,7 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update,	XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA
 XFS_ERRORTAG_ATTR_RW(refcount_finish_one,	XFS_ERRTAG_REFCOUNT_FINISH_ONE);
 XFS_ERRORTAG_ATTR_RW(bmap_finish_one,	XFS_ERRTAG_BMAP_FINISH_ONE);
 XFS_ERRORTAG_ATTR_RW(ag_resv_critical,	XFS_ERRTAG_AG_RESV_CRITICAL);
+XFS_ERRORTAG_ATTR_RW(drop_writes,	XFS_ERRTAG_DROP_WRITES);
 
 static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -187,6 +189,7 @@ static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
 	XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
 	XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
+	XFS_ERRORTAG_ATTR_LIST(drop_writes),
 	NULL,
 };
 
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index ae8935b90a93b..e0e4cf776fac8 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -96,7 +96,16 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_ERRTAG_REFCOUNT_FINISH_ONE			25
 #define XFS_ERRTAG_BMAP_FINISH_ONE			26
 #define XFS_ERRTAG_AG_RESV_CRITICAL			27
-#define XFS_ERRTAG_MAX					28
+/*
+ * DEBUG mode instrumentation to test and/or trigger delayed allocation
+ * block killing in the event of failed writes. When enabled, all
+ * buffered writes are silenty dropped and handled as if they failed.
+ * All delalloc blocks in the range of the write (including pre-existing
+ * delalloc blocks!) are tossed as part of the write failure error
+ * handling sequence.
+ */
+#define XFS_ERRTAG_DROP_WRITES				28
+#define XFS_ERRTAG_MAX					29
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -129,6 +138,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_RANDOM_REFCOUNT_FINISH_ONE			1
 #define XFS_RANDOM_BMAP_FINISH_ONE			1
 #define XFS_RANDOM_AG_RESV_CRITICAL			4
+#define XFS_RANDOM_DROP_WRITES				1
 
 #ifdef DEBUG
 extern int xfs_errortag_init(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 304b79d681e41..86f1a9fa46d25 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1097,7 +1097,7 @@ xfs_file_iomap_end_delalloc(
 	 * Behave as if the write failed if drop writes is enabled. Set the NEW
 	 * flag to force delalloc cleanup.
 	 */
-	if (xfs_mp_drop_writes(mp)) {
+	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
 		iomap->flags |= IOMAP_F_NEW;
 		written = 0;
 	}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 931e9fc21a1cb..e0792d036be20 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -205,16 +205,6 @@ typedef struct xfs_mount {
 	 */
 	unsigned int		*m_errortag;
 	struct xfs_kobj		m_errortag_kobj;
-
-	/*
-	 * DEBUG mode instrumentation to test and/or trigger delayed allocation
-	 * block killing in the event of failed writes. When enabled, all
-	 * buffered writes are silenty dropped and handled as if they failed.
-	 * All delalloc blocks in the range of the write (including pre-existing
-	 * delalloc blocks!) are tossed as part of the write failure error
-	 * handling sequence.
-	 */
-	bool			m_drop_writes;
 #endif
 } xfs_mount_t;
 
@@ -333,20 +323,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
 }
 
-#ifdef DEBUG
-static inline bool
-xfs_mp_drop_writes(struct xfs_mount *mp)
-{
-	return mp->m_drop_writes;
-}
-#else
-static inline bool
-xfs_mp_drop_writes(struct xfs_mount *mp)
-{
-	return 0;
-}
-#endif
-
 /* per-AG block reservation data structures*/
 enum xfs_ag_resv_type {
 	XFS_AG_RESV_NONE = 0,
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index ec6e0e2f95d66..56610a9735931 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -90,49 +90,7 @@ to_mp(struct kobject *kobject)
 	return container_of(kobj, struct xfs_mount, m_kobj);
 }
 
-#ifdef DEBUG
-
-STATIC ssize_t
-drop_writes_store(
-	struct kobject		*kobject,
-	const char		*buf,
-	size_t			count)
-{
-	struct xfs_mount	*mp = to_mp(kobject);
-	int			ret;
-	int			val;
-
-	ret = kstrtoint(buf, 0, &val);
-	if (ret)
-		return ret;
-
-	if (val == 1)
-		mp->m_drop_writes = true;
-	else if (val == 0)
-		mp->m_drop_writes = false;
-	else
-		return -EINVAL;
-
-	return count;
-}
-
-STATIC ssize_t
-drop_writes_show(
-	struct kobject		*kobject,
-	char			*buf)
-{
-	struct xfs_mount	*mp = to_mp(kobject);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_drop_writes ? 1 : 0);
-}
-XFS_SYSFS_ATTR_RW(drop_writes);
-
-#endif /* DEBUG */
-
 static struct attribute *xfs_mp_attrs[] = {
-#ifdef DEBUG
-	ATTR_LIST(drop_writes),
-#endif
 	NULL,
 };
 
-- 
GitLab


From 3e88a0078ba8ef61816c85d33131827b4a307852 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 27 Jun 2017 09:52:32 -0700
Subject: [PATCH 0452/1958] xfs: replace log_badcrc_factor knob with error
 injection tag

Now that error injection tags support dynamic frequency adjustment,
replace the debug mode sysfs knob that controls log record CRC error
injection with an error injection tag.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_error.c |  3 +++
 fs/xfs/xfs_error.h |  4 +++-
 fs/xfs/xfs_log.c   |  5 +----
 fs/xfs/xfs_sysfs.c | 36 ------------------------------------
 4 files changed, 7 insertions(+), 41 deletions(-)

diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index a2f23d2bab168..26c32bc5cd34f 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -56,6 +56,7 @@ static unsigned int xfs_errortag_random_default[] = {
 	XFS_RANDOM_BMAP_FINISH_ONE,
 	XFS_RANDOM_AG_RESV_CRITICAL,
 	XFS_RANDOM_DROP_WRITES,
+	XFS_RANDOM_LOG_BAD_CRC,
 };
 
 struct xfs_errortag_attr {
@@ -159,6 +160,7 @@ XFS_ERRORTAG_ATTR_RW(refcount_finish_one,	XFS_ERRTAG_REFCOUNT_FINISH_ONE);
 XFS_ERRORTAG_ATTR_RW(bmap_finish_one,	XFS_ERRTAG_BMAP_FINISH_ONE);
 XFS_ERRORTAG_ATTR_RW(ag_resv_critical,	XFS_ERRTAG_AG_RESV_CRITICAL);
 XFS_ERRORTAG_ATTR_RW(drop_writes,	XFS_ERRTAG_DROP_WRITES);
+XFS_ERRORTAG_ATTR_RW(log_bad_crc,	XFS_ERRTAG_LOG_BAD_CRC);
 
 static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -190,6 +192,7 @@ static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
 	XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
 	XFS_ERRORTAG_ATTR_LIST(drop_writes),
+	XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
 	NULL,
 };
 
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index e0e4cf776fac8..7577be5f09bc0 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -105,7 +105,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
  * handling sequence.
  */
 #define XFS_ERRTAG_DROP_WRITES				28
-#define XFS_ERRTAG_MAX					29
+#define XFS_ERRTAG_LOG_BAD_CRC				29
+#define XFS_ERRTAG_MAX					30
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -139,6 +140,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_RANDOM_BMAP_FINISH_ONE			1
 #define XFS_RANDOM_AG_RESV_CRITICAL			4
 #define XFS_RANDOM_DROP_WRITES				1
+#define XFS_RANDOM_LOG_BAD_CRC				1
 
 #ifdef DEBUG
 extern int xfs_errortag_init(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2d1112ee1f860..31f11be42f01b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1841,7 +1841,6 @@ xlog_sync(
 	/* calculcate the checksum */
 	iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
 					    iclog->ic_datap, size);
-#ifdef DEBUG
 	/*
 	 * Intentionally corrupt the log record CRC based on the error injection
 	 * frequency, if defined. This facilitates testing log recovery in the
@@ -1849,15 +1848,13 @@ xlog_sync(
 	 * write on I/O completion and shutdown the fs. The subsequent mount
 	 * detects the bad CRC and attempts to recover.
 	 */
-	if (log->l_badcrc_factor &&
-	    (prandom_u32() % log->l_badcrc_factor == 0)) {
+	if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
 		iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
 		iclog->ic_state |= XLOG_STATE_IOABORT;
 		xfs_warn(log->l_mp,
 	"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
 			 be64_to_cpu(iclog->ic_header.h_lsn));
 	}
-#endif
 
 	bp->b_io_length = BTOBB(count);
 	bp->b_fspriv = iclog;
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 56610a9735931..8b2ccc234f36c 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -305,47 +305,11 @@ write_grant_head_show(
 }
 XFS_SYSFS_ATTR_RO(write_grant_head);
 
-#ifdef DEBUG
-STATIC ssize_t
-log_badcrc_factor_store(
-	struct kobject	*kobject,
-	const char	*buf,
-	size_t		count)
-{
-	struct xlog	*log = to_xlog(kobject);
-	int		ret;
-	uint32_t	val;
-
-	ret = kstrtouint(buf, 0, &val);
-	if (ret)
-		return ret;
-
-	log->l_badcrc_factor = val;
-
-	return count;
-}
-
-STATIC ssize_t
-log_badcrc_factor_show(
-	struct kobject	*kobject,
-	char		*buf)
-{
-	struct xlog	*log = to_xlog(kobject);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
-}
-
-XFS_SYSFS_ATTR_RW(log_badcrc_factor);
-#endif	/* DEBUG */
-
 static struct attribute *xfs_log_attrs[] = {
 	ATTR_LIST(log_head_lsn),
 	ATTR_LIST(log_tail_lsn),
 	ATTR_LIST(reserve_grant_head),
 	ATTR_LIST(write_grant_head),
-#ifdef DEBUG
-	ATTR_LIST(log_badcrc_factor),
-#endif
 	NULL,
 };
 
-- 
GitLab


From cf2cb7845d6e101cb17bd62f8aa08cd514fc8988 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 20 Jun 2017 14:36:19 -0700
Subject: [PATCH 0453/1958] xfs: free cowblocks and retry on buffered write
 ENOSPC

XFS runs an eofblocks reclaim scan before returning an ENOSPC error to
userspace for buffered writes. This facilitates aggressive speculative
preallocation without causing user visible side effects such as
premature ENOSPC.

Run a cowblocks scan in the same situation to reclaim lingering COW fork
preallocation throughout the filesystem.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 36c129303fcfd..8f9a491b25514 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -660,6 +660,7 @@ xfs_file_buffered_aio_write(
 		xfs_iunlock(ip, iolock);
 		eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
 		xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+		xfs_icache_free_cowblocks(ip->i_mount, &eofb);
 		goto write_retry;
 	}
 
-- 
GitLab


From 8ba358756aa08414fa9e65a1a41d28304ed6fd7f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 26 Jun 2017 08:48:18 -0700
Subject: [PATCH 0454/1958] xfs: Don't clear SGID when inheriting ACLs

When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit
set, DIR1 is expected to have SGID bit set (and owning group equal to
the owning group of 'DIR0'). However when 'DIR0' also has some default
ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on
'DIR1' to get cleared if user is not member of the owning group.

Fix the problem by calling __xfs_set_acl() instead of xfs_set_acl() when
setting up inode in xfs_generic_create(). That prevents SGID bit
clearing and mode is properly set by posix_acl_create() anyway. We also
reorder arguments of __xfs_set_acl() to match the ordering of
xfs_set_acl() to make things consistent.

Fixes: 073931017b49d9458aa351605b43a7e34598caef
CC: stable@vger.kernel.org
CC: Darrick J. Wong <darrick.wong@oracle.com>
CC: linux-xfs@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_acl.c  | 6 +++---
 fs/xfs/xfs_acl.h  | 1 +
 fs/xfs/xfs_iops.c | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b468e041f207a..7034e17535dee 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -170,8 +170,8 @@ xfs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-STATIC int
-__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+int
+__xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	struct xfs_inode *ip = XFS_I(inode);
 	unsigned char *ea_name;
@@ -268,5 +268,5 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	}
 
  set_acl:
-	return __xfs_set_acl(inode, type, acl);
+	return __xfs_set_acl(inode, acl, type);
 }
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 286fa89217f5c..04327318ef676 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -24,6 +24,7 @@ struct posix_acl;
 #ifdef CONFIG_XFS_POSIX_ACL
 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
 extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 #else
 static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
 {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ebfc13350f9ae..077e2b2ac7734 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -190,12 +190,12 @@ xfs_generic_create(
 
 #ifdef CONFIG_XFS_POSIX_ACL
 	if (default_acl) {
-		error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
 		if (error)
 			goto out_cleanup_inode;
 	}
 	if (acl) {
-		error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+		error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
 		if (error)
 			goto out_cleanup_inode;
 	}
-- 
GitLab


From 244e3dea58818e9520bf3dbaf4404a60b105bfb1 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Mon, 26 Jun 2017 08:54:16 -0700
Subject: [PATCH 0455/1958] xfs: fix semicolon.cocci warnings

fs/xfs/xfs_log.c:2092:38-39: Unneeded semicolon


 Remove unneeded semicolon.

Generated by: scripts/coccinelle/misc/semicolon.cocci

Fixes: d4ca1d550d05 ("xfs: dump transaction usage details on log reservation overrun")
CC: Brian Foster <bfoster@redhat.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 31f11be42f01b..0053bcf2b10a1 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2085,7 +2085,7 @@ xlog_print_trans(
 			xfs_warn(mp, "    type	= 0x%x", vec->i_type);
 			xfs_warn(mp, "    len	= %d", vec->i_len);
 			xfs_warn(mp, "    first %d bytes of iovec[%d]:", dumplen, i);
-			xfs_hex_dump(vec->i_addr, dumplen);;
+			xfs_hex_dump(vec->i_addr, dumplen);
 
 			vec++;
 		}
-- 
GitLab


From 50e0bdbe9f48f98bb02eac7030d682f4716884ae Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 27 Jun 2017 10:19:45 -0700
Subject: [PATCH 0456/1958] xfs: grab dquots without taking the ilock

Add a new dqget flag that grabs the dquot without taking the ilock.
This will be used by the scrubber (which will have already grabbed
the ilock) to perform basic sanity checking of the quota data.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_quota_defs.h |  2 ++
 fs/xfs/xfs_dquot.c             | 14 ++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index d69c772271cb0..2834574cb6e7d 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -136,6 +136,8 @@ typedef uint16_t	xfs_qwarncnt_t;
  */
 #define XFS_QMOPT_INHERIT	0x1000000
 
+#define XFS_QMOPT_NOLOCK	0x2000000 /* don't ilock during dqget */
+
 /*
  * flags to xfs_trans_mod_dquot.
  */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index e57c6cce91aab..79668142afc1f 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -472,18 +472,23 @@ xfs_qm_dqtobp(
 	struct xfs_mount	*mp = dqp->q_mount;
 	xfs_dqid_t		id = be32_to_cpu(dqp->q_core.d_id);
 	struct xfs_trans	*tp = (tpp ? *tpp : NULL);
-	uint			lock_mode;
+	uint			lock_mode = 0;
 
 	quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
 	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 
-	lock_mode = xfs_ilock_data_map_shared(quotip);
+	ASSERT(!(flags & XFS_QMOPT_NOLOCK) ||
+		xfs_isilocked(quotip, XFS_ILOCK_SHARED) ||
+		xfs_isilocked(quotip, XFS_ILOCK_EXCL));
+	if (!(flags & XFS_QMOPT_NOLOCK))
+		lock_mode = xfs_ilock_data_map_shared(quotip);
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 		/*
 		 * Return if this type of quotas is turned off while we
 		 * didn't have the quota inode lock.
 		 */
-		xfs_iunlock(quotip, lock_mode);
+		if (lock_mode)
+			xfs_iunlock(quotip, lock_mode);
 		return -ESRCH;
 	}
 
@@ -493,7 +498,8 @@ xfs_qm_dqtobp(
 	error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
 			       XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
 
-	xfs_iunlock(quotip, lock_mode);
+	if (lock_mode)
+		xfs_iunlock(quotip, lock_mode);
 	if (error)
 		return error;
 
-- 
GitLab


From 351b0940d473146923711bc943fc881354a4c1f3 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 11:37:44 -0700
Subject: [PATCH 0457/1958] MIPS: module: Ensure we always clean up
 r_mips_hi16_list

If we hit an error whilst processing a reloc then we would return early
from apply_relocate & potentially not free entries in r_mips_hi16_list,
thereby leaking memory. Fix this by ensuring that we always run the code
to free r_mipps_hi16_list when errors occur.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: 861667dc82f5 ("MIPS: Fix race condition in module relocation code.")
Fixes: 04211a574641 ("MIPS: Bail on unsupported module relocs")
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15831/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/module.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 94627a3a6a0d9..ddcfb59593b66 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -251,7 +251,7 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
 	u32 *location;
 	unsigned int i, type;
 	Elf_Addr v;
-	int res;
+	int err = 0;
 
 	pr_debug("Applying relocate section %u to %u\n", relsec,
 	       sechdrs[relsec].sh_info);
@@ -270,7 +270,8 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
 				continue;
 			pr_warn("%s: Unknown symbol %s\n",
 				me->name, strtab + sym->st_name);
-			return -ENOENT;
+			err = -ENOENT;
+			goto out;
 		}
 
 		type = ELF_MIPS_R_TYPE(rel[i]);
@@ -283,29 +284,32 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
 		if (!handler) {
 			pr_err("%s: Unknown relocation type %u\n",
 			       me->name, type);
-			return -EINVAL;
+			err = -EINVAL;
+			goto out;
 		}
 
 		v = sym->st_value;
-		res = handler(me, location, v);
-		if (res)
-			return res;
+		err = handler(me, location, v);
+		if (err)
+			goto out;
 	}
 
+out:
 	/*
-	 * Normally the hi16 list should be deallocated at this point.	A
+	 * Normally the hi16 list should be deallocated at this point. A
 	 * malformed binary however could contain a series of R_MIPS_HI16
-	 * relocations not followed by a R_MIPS_LO16 relocation.  In that
-	 * case, free up the list and return an error.
+	 * relocations not followed by a R_MIPS_LO16 relocation, or if we hit
+	 * an error processing a reloc we might have gotten here before
+	 * reaching the R_MIPS_LO16. In either case, free up the list and
+	 * return an error.
 	 */
 	if (me->arch.r_mips_hi16_list) {
 		free_relocation_chain(me->arch.r_mips_hi16_list);
 		me->arch.r_mips_hi16_list = NULL;
-
-		return -ENOEXEC;
+		err = err ?: -ENOEXEC;
 	}
 
-	return 0;
+	return err;
 }
 
 /* Given an address, look for it in the module exception tables. */
-- 
GitLab


From 430d0b88943afffd0da6d98799bf0afb008fd13f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 30 Mar 2017 11:37:45 -0700
Subject: [PATCH 0458/1958] MIPS: module: Unify rel & rela reloc handling

The module load code has previously had entirely separate
implementations for rel & rela style relocs, which unnecessarily
duplicates a whole lot of code. Unify the implementations of both types
of reloc, sharing the bulk of the code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15832/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/module.h |   8 +-
 arch/mips/kernel/Makefile      |   1 -
 arch/mips/kernel/module-rela.c | 202 ---------------------------------
 arch/mips/kernel/module.c      | 195 +++++++++++++++++++++++--------
 4 files changed, 154 insertions(+), 252 deletions(-)
 delete mode 100644 arch/mips/kernel/module-rela.c

diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index 702c273e67a95..e51add184717f 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -47,8 +47,8 @@ typedef struct {
 #define Elf_Mips_Rel	Elf32_Rel
 #define Elf_Mips_Rela	Elf32_Rela
 
-#define ELF_MIPS_R_SYM(rel) ELF32_R_SYM(rel.r_info)
-#define ELF_MIPS_R_TYPE(rel) ELF32_R_TYPE(rel.r_info)
+#define ELF_MIPS_R_SYM(rel) ELF32_R_SYM((rel).r_info)
+#define ELF_MIPS_R_TYPE(rel) ELF32_R_TYPE((rel).r_info)
 
 #endif
 
@@ -65,8 +65,8 @@ typedef struct {
 #define Elf_Mips_Rel	Elf64_Mips_Rel
 #define Elf_Mips_Rela	Elf64_Mips_Rela
 
-#define ELF_MIPS_R_SYM(rel) (rel.r_sym)
-#define ELF_MIPS_R_TYPE(rel) (rel.r_type)
+#define ELF_MIPS_R_SYM(rel) ((rel).r_sym)
+#define ELF_MIPS_R_TYPE(rel) ((rel).r_type)
 
 #endif
 
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 9a0e37b92ce07..f0edd7e8a0b78 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -31,7 +31,6 @@ obj-$(CONFIG_SYNC_R4K)		+= sync-r4k.o
 obj-$(CONFIG_DEBUG_FS)		+= segment.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_MODULES_USE_ELF_RELA) += module-rela.o
 
 obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
diff --git a/arch/mips/kernel/module-rela.c b/arch/mips/kernel/module-rela.c
deleted file mode 100644
index 7811688344565..0000000000000
--- a/arch/mips/kernel/module-rela.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- *  Copyright (C) 2001 Rusty Russell.
- *  Copyright (C) 2003, 2004 Ralf Baechle (ralf@linux-mips.org)
- *  Copyright (C) 2005 Thiemo Seufer
- *  Copyright (C) 2015 Imagination Technologies Ltd.
- */
-
-#include <linux/elf.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/moduleloader.h>
-
-extern int apply_r_mips_none(struct module *me, u32 *location, Elf_Addr v);
-
-static int apply_r_mips_32_rela(struct module *me, u32 *location, Elf_Addr v)
-{
-	*location = v;
-
-	return 0;
-}
-
-static int apply_r_mips_26_rela(struct module *me, u32 *location, Elf_Addr v)
-{
-	if (v % 4) {
-		pr_err("module %s: dangerous R_MIPS_26 RELA relocation\n",
-		       me->name);
-		return -ENOEXEC;
-	}
-
-	if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
-		pr_err("module %s: relocation overflow\n", me->name);
-		return -ENOEXEC;
-	}
-
-	*location = (*location & ~0x03ffffff) | ((v >> 2) & 0x03ffffff);
-
-	return 0;
-}
-
-static int apply_r_mips_hi16_rela(struct module *me, u32 *location, Elf_Addr v)
-{
-	*location = (*location & 0xffff0000) |
-		    ((((long long) v + 0x8000LL) >> 16) & 0xffff);
-
-	return 0;
-}
-
-static int apply_r_mips_lo16_rela(struct module *me, u32 *location, Elf_Addr v)
-{
-	*location = (*location & 0xffff0000) | (v & 0xffff);
-
-	return 0;
-}
-
-static int apply_r_mips_pc_rela(struct module *me, u32 *location, Elf_Addr v,
-				unsigned bits)
-{
-	unsigned long mask = GENMASK(bits - 1, 0);
-	unsigned long se_bits;
-	long offset;
-
-	if (v % 4) {
-		pr_err("module %s: dangerous R_MIPS_PC%u RELA relocation\n",
-		       me->name, bits);
-		return -ENOEXEC;
-	}
-
-	offset = ((long)v - (long)location) >> 2;
-
-	/* check the sign bit onwards are identical - ie. we didn't overflow */
-	se_bits = (offset & BIT(bits - 1)) ? ~0ul : 0;
-	if ((offset & ~mask) != (se_bits & ~mask)) {
-		pr_err("module %s: relocation overflow\n", me->name);
-		return -ENOEXEC;
-	}
-
-	*location = (*location & ~mask) | (offset & mask);
-
-	return 0;
-}
-
-static int apply_r_mips_pc16_rela(struct module *me, u32 *location, Elf_Addr v)
-{
-	return apply_r_mips_pc_rela(me, location, v, 16);
-}
-
-static int apply_r_mips_pc21_rela(struct module *me, u32 *location, Elf_Addr v)
-{
-	return apply_r_mips_pc_rela(me, location, v, 21);
-}
-
-static int apply_r_mips_pc26_rela(struct module *me, u32 *location, Elf_Addr v)
-{
-	return apply_r_mips_pc_rela(me, location, v, 26);
-}
-
-static int apply_r_mips_64_rela(struct module *me, u32 *location, Elf_Addr v)
-{
-	*(Elf_Addr *)location = v;
-
-	return 0;
-}
-
-static int apply_r_mips_higher_rela(struct module *me, u32 *location,
-				    Elf_Addr v)
-{
-	*location = (*location & 0xffff0000) |
-		    ((((long long) v + 0x80008000LL) >> 32) & 0xffff);
-
-	return 0;
-}
-
-static int apply_r_mips_highest_rela(struct module *me, u32 *location,
-				     Elf_Addr v)
-{
-	*location = (*location & 0xffff0000) |
-		    ((((long long) v + 0x800080008000LL) >> 48) & 0xffff);
-
-	return 0;
-}
-
-static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
-				Elf_Addr v) = {
-	[R_MIPS_NONE]		= apply_r_mips_none,
-	[R_MIPS_32]		= apply_r_mips_32_rela,
-	[R_MIPS_26]		= apply_r_mips_26_rela,
-	[R_MIPS_HI16]		= apply_r_mips_hi16_rela,
-	[R_MIPS_LO16]		= apply_r_mips_lo16_rela,
-	[R_MIPS_PC16]		= apply_r_mips_pc16_rela,
-	[R_MIPS_64]		= apply_r_mips_64_rela,
-	[R_MIPS_HIGHER]		= apply_r_mips_higher_rela,
-	[R_MIPS_HIGHEST]	= apply_r_mips_highest_rela,
-	[R_MIPS_PC21_S2]	= apply_r_mips_pc21_rela,
-	[R_MIPS_PC26_S2]	= apply_r_mips_pc26_rela,
-};
-
-int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
-		       unsigned int symindex, unsigned int relsec,
-		       struct module *me)
-{
-	Elf_Mips_Rela *rel = (void *) sechdrs[relsec].sh_addr;
-	int (*handler)(struct module *me, u32 *location, Elf_Addr v);
-	Elf_Sym *sym;
-	u32 *location;
-	unsigned int i, type;
-	Elf_Addr v;
-	int res;
-
-	pr_debug("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
-		/* This is where to make the change */
-		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
-		/* This is the symbol it is referring to */
-		sym = (Elf_Sym *)sechdrs[symindex].sh_addr
-			+ ELF_MIPS_R_SYM(rel[i]);
-		if (sym->st_value >= -MAX_ERRNO) {
-			/* Ignore unresolved weak symbol */
-			if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
-				continue;
-			pr_warn("%s: Unknown symbol %s\n",
-			       me->name, strtab + sym->st_name);
-			return -ENOENT;
-		}
-
-		type = ELF_MIPS_R_TYPE(rel[i]);
-
-		if (type < ARRAY_SIZE(reloc_handlers_rela))
-			handler = reloc_handlers_rela[type];
-		else
-			handler = NULL;
-
-		if (!handler) {
-			pr_err("%s: Unknown relocation type %u\n",
-			       me->name, type);
-			return -EINVAL;
-		}
-
-		v = sym->st_value + rel[i].r_addend;
-		res = handler(me, location, v);
-		if (res)
-			return res;
-	}
-
-	return 0;
-}
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index ddcfb59593b66..b250eb0c4fc12 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -53,22 +53,25 @@ void *module_alloc(unsigned long size)
 }
 #endif
 
-int apply_r_mips_none(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_mips_none(struct module *me, u32 *location,
+			     u32 base, Elf_Addr v, bool rela)
 {
 	return 0;
 }
 
-static int apply_r_mips_32_rel(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_mips_32(struct module *me, u32 *location,
+			   u32 base, Elf_Addr v, bool rela)
 {
-	*location += v;
+	*location = base + v;
 
 	return 0;
 }
 
-static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_mips_26(struct module *me, u32 *location,
+			   u32 base, Elf_Addr v, bool rela)
 {
 	if (v % 4) {
-		pr_err("module %s: dangerous R_MIPS_26 REL relocation\n",
+		pr_err("module %s: dangerous R_MIPS_26 relocation\n",
 		       me->name);
 		return -ENOEXEC;
 	}
@@ -80,15 +83,22 @@ static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
 	}
 
 	*location = (*location & ~0x03ffffff) |
-		    ((*location + (v >> 2)) & 0x03ffffff);
+		    ((base + (v >> 2)) & 0x03ffffff);
 
 	return 0;
 }
 
-static int apply_r_mips_hi16_rel(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_mips_hi16(struct module *me, u32 *location,
+			     u32 base, Elf_Addr v, bool rela)
 {
 	struct mips_hi16 *n;
 
+	if (rela) {
+		*location = (*location & 0xffff0000) |
+			    ((((long long) v + 0x8000LL) >> 16) & 0xffff);
+		return 0;
+	}
+
 	/*
 	 * We cannot relocate this one now because we don't know the value of
 	 * the carry we need to add.  Save the information, and let LO16 do the
@@ -117,12 +127,18 @@ static void free_relocation_chain(struct mips_hi16 *l)
 	}
 }
 
-static int apply_r_mips_lo16_rel(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_mips_lo16(struct module *me, u32 *location,
+			     u32 base, Elf_Addr v, bool rela)
 {
-	unsigned long insnlo = *location;
+	unsigned long insnlo = base;
 	struct mips_hi16 *l;
 	Elf_Addr val, vallo;
 
+	if (rela) {
+		*location = (*location & 0xffff0000) | (v & 0xffff);
+		return 0;
+	}
+
 	/* Sign extend the addend we extract from the lo insn.	*/
 	vallo = ((insnlo & 0xffff) ^ 0x8000) - 0x8000;
 
@@ -178,26 +194,26 @@ static int apply_r_mips_lo16_rel(struct module *me, u32 *location, Elf_Addr v)
 	free_relocation_chain(l);
 	me->arch.r_mips_hi16_list = NULL;
 
-	pr_err("module %s: dangerous R_MIPS_LO16 REL relocation\n", me->name);
+	pr_err("module %s: dangerous R_MIPS_LO16 relocation\n", me->name);
 
 	return -ENOEXEC;
 }
 
-static int apply_r_mips_pc_rel(struct module *me, u32 *location, Elf_Addr v,
-			       unsigned bits)
+static int apply_r_mips_pc(struct module *me, u32 *location, u32 base,
+			   Elf_Addr v, unsigned int bits)
 {
 	unsigned long mask = GENMASK(bits - 1, 0);
 	unsigned long se_bits;
 	long offset;
 
 	if (v % 4) {
-		pr_err("module %s: dangerous R_MIPS_PC%u REL relocation\n",
+		pr_err("module %s: dangerous R_MIPS_PC%u relocation\n",
 		       me->name, bits);
 		return -ENOEXEC;
 	}
 
-	/* retrieve & sign extend implicit addend */
-	offset = *location & mask;
+	/* retrieve & sign extend implicit addend if any */
+	offset = base & mask;
 	offset |= (offset & BIT(bits - 1)) ? ~mask : 0;
 
 	offset += ((long)v - (long)location) >> 2;
@@ -214,56 +230,121 @@ static int apply_r_mips_pc_rel(struct module *me, u32 *location, Elf_Addr v,
 	return 0;
 }
 
-static int apply_r_mips_pc16_rel(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_mips_pc16(struct module *me, u32 *location,
+			     u32 base, Elf_Addr v, bool rela)
 {
-	return apply_r_mips_pc_rel(me, location, v, 16);
+	return apply_r_mips_pc(me, location, base, v, 16);
 }
 
-static int apply_r_mips_pc21_rel(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_mips_pc21(struct module *me, u32 *location,
+			     u32 base, Elf_Addr v, bool rela)
 {
-	return apply_r_mips_pc_rel(me, location, v, 21);
+	return apply_r_mips_pc(me, location, base, v, 21);
 }
 
-static int apply_r_mips_pc26_rel(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_mips_pc26(struct module *me, u32 *location,
+			     u32 base, Elf_Addr v, bool rela)
 {
-	return apply_r_mips_pc_rel(me, location, v, 26);
+	return apply_r_mips_pc(me, location, base, v, 26);
 }
 
-static int (*reloc_handlers_rel[]) (struct module *me, u32 *location,
-				Elf_Addr v) = {
+static int apply_r_mips_64(struct module *me, u32 *location,
+			   u32 base, Elf_Addr v, bool rela)
+{
+	if (WARN_ON(!rela))
+		return -EINVAL;
+
+	*(Elf_Addr *)location = v;
+
+	return 0;
+}
+
+static int apply_r_mips_higher(struct module *me, u32 *location,
+			       u32 base, Elf_Addr v, bool rela)
+{
+	if (WARN_ON(!rela))
+		return -EINVAL;
+
+	*location = (*location & 0xffff0000) |
+		    ((((long long)v + 0x80008000LL) >> 32) & 0xffff);
+
+	return 0;
+}
+
+static int apply_r_mips_highest(struct module *me, u32 *location,
+				u32 base, Elf_Addr v, bool rela)
+{
+	if (WARN_ON(!rela))
+		return -EINVAL;
+
+	*location = (*location & 0xffff0000) |
+		    ((((long long)v + 0x800080008000LL) >> 48) & 0xffff);
+
+	return 0;
+}
+
+/**
+ * reloc_handler() - Apply a particular relocation to a module
+ * @me: the module to apply the reloc to
+ * @location: the address at which the reloc is to be applied
+ * @base: the existing value at location for REL-style; 0 for RELA-style
+ * @v: the value of the reloc, with addend for RELA-style
+ *
+ * Each implemented reloc_handler function applies a particular type of
+ * relocation to the module @me. Relocs that may be found in either REL or RELA
+ * variants can be handled by making use of the @base & @v parameters which are
+ * set to values which abstract the difference away from the particular reloc
+ * implementations.
+ *
+ * Return: 0 upon success, else -ERRNO
+ */
+typedef int (*reloc_handler)(struct module *me, u32 *location,
+			     u32 base, Elf_Addr v, bool rela);
+
+/* The handlers for known reloc types */
+static reloc_handler reloc_handlers[] = {
 	[R_MIPS_NONE]		= apply_r_mips_none,
-	[R_MIPS_32]		= apply_r_mips_32_rel,
-	[R_MIPS_26]		= apply_r_mips_26_rel,
-	[R_MIPS_HI16]		= apply_r_mips_hi16_rel,
-	[R_MIPS_LO16]		= apply_r_mips_lo16_rel,
-	[R_MIPS_PC16]		= apply_r_mips_pc16_rel,
-	[R_MIPS_PC21_S2]	= apply_r_mips_pc21_rel,
-	[R_MIPS_PC26_S2]	= apply_r_mips_pc26_rel,
+	[R_MIPS_32]		= apply_r_mips_32,
+	[R_MIPS_26]		= apply_r_mips_26,
+	[R_MIPS_HI16]		= apply_r_mips_hi16,
+	[R_MIPS_LO16]		= apply_r_mips_lo16,
+	[R_MIPS_PC16]		= apply_r_mips_pc16,
+	[R_MIPS_64]		= apply_r_mips_64,
+	[R_MIPS_HIGHER]		= apply_r_mips_higher,
+	[R_MIPS_HIGHEST]	= apply_r_mips_highest,
+	[R_MIPS_PC21_S2]	= apply_r_mips_pc21,
+	[R_MIPS_PC26_S2]	= apply_r_mips_pc26,
 };
 
-int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
-		   unsigned int symindex, unsigned int relsec,
-		   struct module *me)
+static int __apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
+			    unsigned int symindex, unsigned int relsec,
+			    struct module *me, bool rela)
 {
-	Elf_Mips_Rel *rel = (void *) sechdrs[relsec].sh_addr;
-	int (*handler)(struct module *me, u32 *location, Elf_Addr v);
+	union {
+		Elf_Mips_Rel *rel;
+		Elf_Mips_Rela *rela;
+	} r;
+	reloc_handler handler;
 	Elf_Sym *sym;
-	u32 *location;
+	u32 *location, base;
 	unsigned int i, type;
 	Elf_Addr v;
 	int err = 0;
+	size_t reloc_sz;
 
 	pr_debug("Applying relocate section %u to %u\n", relsec,
 	       sechdrs[relsec].sh_info);
 
+	r.rel = (void *)sechdrs[relsec].sh_addr;
+	reloc_sz = rela ? sizeof(*r.rela) : sizeof(*r.rel);
 	me->arch.r_mips_hi16_list = NULL;
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+	for (i = 0; i < sechdrs[relsec].sh_size / reloc_sz; i++) {
 		/* This is where to make the change */
 		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
+			+ r.rel->r_offset;
 		/* This is the symbol it is referring to */
 		sym = (Elf_Sym *)sechdrs[symindex].sh_addr
-			+ ELF_MIPS_R_SYM(rel[i]);
+			+ ELF_MIPS_R_SYM(*r.rel);
 		if (sym->st_value >= -MAX_ERRNO) {
 			/* Ignore unresolved weak symbol */
 			if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
@@ -274,10 +355,9 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
 			goto out;
 		}
 
-		type = ELF_MIPS_R_TYPE(rel[i]);
-
-		if (type < ARRAY_SIZE(reloc_handlers_rel))
-			handler = reloc_handlers_rel[type];
+		type = ELF_MIPS_R_TYPE(*r.rel);
+		if (type < ARRAY_SIZE(reloc_handlers))
+			handler = reloc_handlers[type];
 		else
 			handler = NULL;
 
@@ -288,8 +368,17 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
 			goto out;
 		}
 
-		v = sym->st_value;
-		err = handler(me, location, v);
+		if (rela) {
+			v = sym->st_value + r.rela->r_addend;
+			base = 0;
+			r.rela = &r.rela[1];
+		} else {
+			v = sym->st_value;
+			base = *location;
+			r.rel = &r.rel[1];
+		}
+
+		err = handler(me, location, base, v, rela);
 		if (err)
 			goto out;
 	}
@@ -312,6 +401,22 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
 	return err;
 }
 
+int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
+		   unsigned int symindex, unsigned int relsec,
+		   struct module *me)
+{
+	return __apply_relocate(sechdrs, strtab, symindex, relsec, me, false);
+}
+
+#ifdef CONFIG_MODULES_USE_ELF_RELA
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relsec,
+		       struct module *me)
+{
+	return __apply_relocate(sechdrs, strtab, symindex, relsec, me, true);
+}
+#endif /* CONFIG_MODULES_USE_ELF_RELA */
+
 /* Given an address, look for it in the module exception tables. */
 const struct exception_table_entry *search_module_dbetables(unsigned long addr)
 {
-- 
GitLab


From ce807d5f67ed309a6f357b88cc93185d89e921d3 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 13 Jun 2017 15:28:43 -0700
Subject: [PATCH 0459/1958] MIPS: Optimize uasm insn lookup.

Instead of doing a linear search through the insn_table for each
instruction, use the opcode as direct index into the table.  This will
give constant time lookup performance as the number of supported
opcodes increases.  Make the tables const as they are only ever read.
For uasm-mips.c sort the table alphabetically, and remove duplicate
entries, uasm-micromips.c was already sorted and duplicate free.
There is a small savings in object size as struct insn loses a field:

$ size arch/mips/mm/uasm-mips.o arch/mips/mm/uasm-mips.o.save
   text	   data	    bss	    dec	    hex	filename
  10040	      0	      0	  10040	   2738	arch/mips/mm/uasm-mips.o
   9240	   1120	      0	  10360	   2878	arch/mips/mm/uasm-mips.o.save

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16365/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/uasm-micromips.c | 188 ++++++++++++++---------------
 arch/mips/mm/uasm-mips.c      | 217 +++++++++++++++++-----------------
 arch/mips/mm/uasm.c           |   3 +-
 3 files changed, 199 insertions(+), 209 deletions(-)

diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index 277cf52d80e18..c28ff53c8da08 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c
@@ -40,93 +40,92 @@
 
 #include "uasm.c"
 
-static struct insn insn_table_MM[] = {
-	{ insn_addu, M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD },
-	{ insn_addiu, M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
-	{ insn_and, M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD },
-	{ insn_andi, M(mm_andi32_op, 0, 0, 0, 0, 0), RT | RS | UIMM },
-	{ insn_beq, M(mm_beq32_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
-	{ insn_beql, 0, 0 },
-	{ insn_bgez, M(mm_pool32i_op, mm_bgez_op, 0, 0, 0, 0), RS | BIMM },
-	{ insn_bgezl, 0, 0 },
-	{ insn_bltz, M(mm_pool32i_op, mm_bltz_op, 0, 0, 0, 0), RS | BIMM },
-	{ insn_bltzl, 0, 0 },
-	{ insn_bne, M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM },
-	{ insn_cache, M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM },
-	{ insn_cfc1, M(mm_pool32f_op, 0, 0, 0, mm_cfc1_op, mm_32f_73_op), RT | RS },
-	{ insn_cfcmsa, M(mm_pool32s_op, 0, msa_cfc_op, 0, 0, mm_32s_elm_op), RD | RE },
-	{ insn_ctc1, M(mm_pool32f_op, 0, 0, 0, mm_ctc1_op, mm_32f_73_op), RT | RS },
-	{ insn_ctcmsa, M(mm_pool32s_op, 0, msa_ctc_op, 0, 0, mm_32s_elm_op), RD | RE },
-	{ insn_daddu, 0, 0 },
-	{ insn_daddiu, 0, 0 },
-	{ insn_di, M(mm_pool32a_op, 0, 0, 0, mm_di_op, mm_pool32axf_op), RS },
-	{ insn_divu, M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS },
-	{ insn_dmfc0, 0, 0 },
-	{ insn_dmtc0, 0, 0 },
-	{ insn_dsll, 0, 0 },
-	{ insn_dsll32, 0, 0 },
-	{ insn_dsra, 0, 0 },
-	{ insn_dsrl, 0, 0 },
-	{ insn_dsrl32, 0, 0 },
-	{ insn_drotr, 0, 0 },
-	{ insn_drotr32, 0, 0 },
-	{ insn_dsubu, 0, 0 },
-	{ insn_eret, M(mm_pool32a_op, 0, 0, 0, mm_eret_op, mm_pool32axf_op), 0 },
-	{ insn_ins, M(mm_pool32a_op, 0, 0, 0, 0, mm_ins_op), RT | RS | RD | RE },
-	{ insn_ext, M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE },
-	{ insn_j, M(mm_j32_op, 0, 0, 0, 0, 0), JIMM },
-	{ insn_jal, M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM },
-	{ insn_jalr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RT | RS },
-	{ insn_jr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS },
-	{ insn_lb, M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
-	{ insn_ld, 0, 0 },
-	{ insn_lh, M(mm_lh32_op, 0, 0, 0, 0, 0), RS | RS | SIMM },
-	{ insn_ll, M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM },
-	{ insn_lld, 0, 0 },
-	{ insn_lui, M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM },
-	{ insn_lw, M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
-	{ insn_mfc0, M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD },
-	{ insn_mfhi, M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS },
-	{ insn_mflo, M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS },
-	{ insn_mtc0, M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD },
-	{ insn_mthi, M(mm_pool32a_op, 0, 0, 0, mm_mthi32_op, mm_pool32axf_op), RS },
-	{ insn_mtlo, M(mm_pool32a_op, 0, 0, 0, mm_mtlo32_op, mm_pool32axf_op), RS },
-	{ insn_mul, M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD },
-	{ insn_or, M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD },
-	{ insn_ori, M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM },
-	{ insn_pref, M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM },
-	{ insn_rfe, 0, 0 },
-	{ insn_sc, M(mm_pool32c_op, 0, 0, (mm_sc_func << 1), 0, 0), RT | RS | SIMM },
-	{ insn_scd, 0, 0 },
-	{ insn_sd, 0, 0 },
-	{ insn_sll, M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD },
-	{ insn_sllv, M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD },
-	{ insn_slt, M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD },
-	{ insn_sltiu, M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
-	{ insn_sltu, M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD },
-	{ insn_sra, M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD },
-	{ insn_srl, M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD },
-	{ insn_srlv, M(mm_pool32a_op, 0, 0, 0, 0, mm_srlv32_op), RT | RS | RD },
-	{ insn_rotr, M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD },
-	{ insn_subu, M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD },
-	{ insn_sw, M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM },
-	{ insn_sync, M(mm_pool32a_op, 0, 0, 0, mm_sync_op, mm_pool32axf_op), RS },
-	{ insn_tlbp, M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0 },
-	{ insn_tlbr, M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0 },
-	{ insn_tlbwi, M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0 },
-	{ insn_tlbwr, M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0 },
-	{ insn_wait, M(mm_pool32a_op, 0, 0, 0, mm_wait_op, mm_pool32axf_op), SCIMM },
-	{ insn_wsbh, M(mm_pool32a_op, 0, 0, 0, mm_wsbh_op, mm_pool32axf_op), RT | RS },
-	{ insn_xor, M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD },
-	{ insn_xori, M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM },
-	{ insn_dins, 0, 0 },
-	{ insn_dinsm, 0, 0 },
-	{ insn_syscall, M(mm_pool32a_op, 0, 0, 0, mm_syscall_op, mm_pool32axf_op), SCIMM},
-	{ insn_bbit0, 0, 0 },
-	{ insn_bbit1, 0, 0 },
-	{ insn_lwx, 0, 0 },
-	{ insn_ldx, 0, 0 },
-	{ insn_invalid, 0, 0 }
+static const struct insn const insn_table_MM[insn_invalid] = {
+	[insn_addu]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD},
+	[insn_addiu]	= {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_and]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD},
+	[insn_andi]	= {M(mm_andi32_op, 0, 0, 0, 0, 0), RT | RS | UIMM},
+	[insn_beq]	= {M(mm_beq32_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_beql]	= {0, 0},
+	[insn_bgez]	= {M(mm_pool32i_op, mm_bgez_op, 0, 0, 0, 0), RS | BIMM},
+	[insn_bgezl]	= {0, 0},
+	[insn_bltz]	= {M(mm_pool32i_op, mm_bltz_op, 0, 0, 0, 0), RS | BIMM},
+	[insn_bltzl]	= {0, 0},
+	[insn_bne]	= {M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM},
+	[insn_cache]	= {M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM},
+	[insn_cfc1]	= {M(mm_pool32f_op, 0, 0, 0, mm_cfc1_op, mm_32f_73_op), RT | RS},
+	[insn_cfcmsa]	= {M(mm_pool32s_op, 0, msa_cfc_op, 0, 0, mm_32s_elm_op), RD | RE},
+	[insn_ctc1]	= {M(mm_pool32f_op, 0, 0, 0, mm_ctc1_op, mm_32f_73_op), RT | RS},
+	[insn_ctcmsa]	= {M(mm_pool32s_op, 0, msa_ctc_op, 0, 0, mm_32s_elm_op), RD | RE},
+	[insn_daddu]	= {0, 0},
+	[insn_daddiu]	= {0, 0},
+	[insn_di]	= {M(mm_pool32a_op, 0, 0, 0, mm_di_op, mm_pool32axf_op), RS},
+	[insn_divu]	= {M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS},
+	[insn_dmfc0]	= {0, 0},
+	[insn_dmtc0]	= {0, 0},
+	[insn_dsll]	= {0, 0},
+	[insn_dsll32]	= {0, 0},
+	[insn_dsra]	= {0, 0},
+	[insn_dsrl]	= {0, 0},
+	[insn_dsrl32]	= {0, 0},
+	[insn_drotr]	= {0, 0},
+	[insn_drotr32]	= {0, 0},
+	[insn_dsubu]	= {0, 0},
+	[insn_eret]	= {M(mm_pool32a_op, 0, 0, 0, mm_eret_op, mm_pool32axf_op), 0},
+	[insn_ins]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_ins_op), RT | RS | RD | RE},
+	[insn_ext]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE},
+	[insn_j]	= {M(mm_j32_op, 0, 0, 0, 0, 0), JIMM},
+	[insn_jal]	= {M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM},
+	[insn_jalr]	= {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RT | RS},
+	[insn_jr]	= {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS},
+	[insn_lb]	= {M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_ld]	= {0, 0},
+	[insn_lh]	= {M(mm_lh32_op, 0, 0, 0, 0, 0), RS | RS | SIMM},
+	[insn_ll]	= {M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM},
+	[insn_lld]	= {0, 0},
+	[insn_lui]	= {M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM},
+	[insn_lw]	= {M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_mfc0]	= {M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD},
+	[insn_mfhi]	= {M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS},
+	[insn_mflo]	= {M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS},
+	[insn_mtc0]	= {M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD},
+	[insn_mthi]	= {M(mm_pool32a_op, 0, 0, 0, mm_mthi32_op, mm_pool32axf_op), RS},
+	[insn_mtlo]	= {M(mm_pool32a_op, 0, 0, 0, mm_mtlo32_op, mm_pool32axf_op), RS},
+	[insn_mul]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD},
+	[insn_or]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD},
+	[insn_ori]	= {M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM},
+	[insn_pref]	= {M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM},
+	[insn_rfe]	= {0, 0},
+	[insn_sc]	= {M(mm_pool32c_op, 0, 0, (mm_sc_func << 1), 0, 0), RT | RS | SIMM},
+	[insn_scd]	= {0, 0},
+	[insn_sd]	= {0, 0},
+	[insn_sll]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD},
+	[insn_sllv]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD},
+	[insn_slt]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD},
+	[insn_sltiu]	= {M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_sltu]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD},
+	[insn_sra]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD},
+	[insn_srl]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD},
+	[insn_srlv]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_srlv32_op), RT | RS | RD},
+	[insn_rotr]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD},
+	[insn_subu]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD},
+	[insn_sw]	= {M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
+	[insn_sync]	= {M(mm_pool32a_op, 0, 0, 0, mm_sync_op, mm_pool32axf_op), RS},
+	[insn_tlbp]	= {M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0},
+	[insn_tlbr]	= {M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0},
+	[insn_tlbwi]	= {M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0},
+	[insn_tlbwr]	= {M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0},
+	[insn_wait]	= {M(mm_pool32a_op, 0, 0, 0, mm_wait_op, mm_pool32axf_op), SCIMM},
+	[insn_wsbh]	= {M(mm_pool32a_op, 0, 0, 0, mm_wsbh_op, mm_pool32axf_op), RT | RS},
+	[insn_xor]	= {M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD},
+	[insn_xori]	= {M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM},
+	[insn_dins]	= {0, 0},
+	[insn_dinsm]	= {0, 0},
+	[insn_syscall]	= {M(mm_pool32a_op, 0, 0, 0, mm_syscall_op, mm_pool32axf_op), SCIMM},
+	[insn_bbit0]	= {0, 0},
+	[insn_bbit1]	= {0, 0},
+	[insn_lwx]	= {0, 0},
+	[insn_ldx]	= {0, 0},
 };
 
 #undef M
@@ -156,20 +155,17 @@ static inline u32 build_jimm(u32 arg)
  */
 static void build_insn(u32 **buf, enum opcode opc, ...)
 {
-	struct insn *ip = NULL;
-	unsigned int i;
+	const struct insn *ip;
 	va_list ap;
 	u32 op;
 
-	for (i = 0; insn_table_MM[i].opcode != insn_invalid; i++)
-		if (insn_table_MM[i].opcode == opc) {
-			ip = &insn_table_MM[i];
-			break;
-		}
-
-	if (!ip || (opc == insn_daddiu && r4k_daddiu_bug()))
+	if (opc < 0 || opc >= insn_invalid ||
+	    (opc == insn_daddiu && r4k_daddiu_bug()) ||
+	    (insn_table_MM[opc].match == 0 && insn_table_MM[opc].fields == 0))
 		panic("Unsupported Micro-assembler instruction %d", opc);
 
+	ip = &insn_table_MM[opc];
+
 	op = ip->match;
 	va_start(ap, opc);
 	if (ip->fields & RS) {
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 2277499fe6aec..f3937e315ba84 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -48,126 +48,124 @@
 
 #include "uasm.c"
 
-static struct insn insn_table[] = {
-	{ insn_addiu, M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
-	{ insn_addu, M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD },
-	{ insn_andi, M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM },
-	{ insn_and, M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD },
-	{ insn_bbit0, M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
-	{ insn_bbit1, M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
-	{ insn_beql, M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
-	{ insn_beq, M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
-	{ insn_bgezl, M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM },
-	{ insn_bgez, M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM },
-	{ insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM },
-	{ insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM },
-	{ insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
+static const struct insn const insn_table[insn_invalid] = {
+	[insn_addiu]	= {M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_addu]	= {M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD},
+	[insn_and]	= {M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD},
+	[insn_andi]	= {M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM},
+	[insn_bbit0]	= {M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_bbit1]	= {M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_beq]	= {M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_beql]	= {M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_bgez]	= {M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM},
+	[insn_bgezl]	= {M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM},
+	[insn_bltz]	= {M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM},
+	[insn_bltzl]	= {M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM},
+	[insn_bne]	= {M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
 #ifndef CONFIG_CPU_MIPSR6
-	{ insn_cache,  M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	[insn_cache]	= {M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 #else
-	{ insn_cache,  M6(spec3_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9 },
+	[insn_cache]	= {M6(spec3_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9},
 #endif
-	{ insn_cfc1, M(cop1_op, cfc_op, 0, 0, 0, 0), RT | RD },
-	{ insn_cfcmsa, M(msa_op, 0, msa_cfc_op, 0, 0, msa_elm_op), RD | RE },
-	{ insn_ctc1, M(cop1_op, ctc_op, 0, 0, 0, 0), RT | RD },
-	{ insn_ctcmsa, M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE },
-	{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
-	{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
-	{ insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE },
-	{ insn_di, M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT },
-	{ insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE },
-	{ insn_divu, M(spec_op, 0, 0, 0, 0, divu_op), RS | RT },
-	{ insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
-	{ insn_dmtc0, M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
-	{ insn_drotr32, M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE },
-	{ insn_drotr, M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE },
-	{ insn_dsll32, M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE },
-	{ insn_dsll, M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE },
-	{ insn_dsra, M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE },
-	{ insn_dsrl32, M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE },
-	{ insn_dsrl, M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE },
-	{ insn_dsubu, M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD },
-	{ insn_eret,  M(cop0_op, cop_op, 0, 0, 0, eret_op),  0 },
-	{ insn_ext, M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE },
-	{ insn_ins, M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE },
-	{ insn_j,  M(j_op, 0, 0, 0, 0, 0),  JIMM },
-	{ insn_jal,  M(jal_op, 0, 0, 0, 0, 0),	JIMM },
-	{ insn_jalr,  M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD },
-	{ insn_j,  M(j_op, 0, 0, 0, 0, 0),  JIMM },
+	[insn_cfc1]	= {M(cop1_op, cfc_op, 0, 0, 0, 0), RT | RD},
+	[insn_cfcmsa]	= {M(msa_op, 0, msa_cfc_op, 0, 0, msa_elm_op), RD | RE},
+	[insn_ctc1]	= {M(cop1_op, ctc_op, 0, 0, 0, 0), RT | RD},
+	[insn_ctcmsa]	= {M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE},
+	[insn_daddiu]	= {M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_daddu]	= {M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD},
+	[insn_di]	= {M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT},
+	[insn_dins]	= {M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE},
+	[insn_dinsm]	= {M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE},
+	[insn_divu]	= {M(spec_op, 0, 0, 0, 0, divu_op), RS | RT},
+	[insn_dmfc0]	= {M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
+	[insn_dmtc0]	= {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
+	[insn_drotr]	= {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE},
+	[insn_drotr32]	= {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE},
+	[insn_dsll]	= {M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE},
+	[insn_dsll32]	= {M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE},
+	[insn_dsra]	= {M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE},
+	[insn_dsrl]	= {M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE},
+	[insn_dsrl32]	= {M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE},
+	[insn_dsubu]	= {M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD},
+	[insn_eret]	= {M(cop0_op, cop_op, 0, 0, 0, eret_op),  0},
+	[insn_ext]	= {M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE},
+	[insn_ins]	= {M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE},
+	[insn_j]	= {M(j_op, 0, 0, 0, 0, 0),  JIMM},
+	[insn_jal]	= {M(jal_op, 0, 0, 0, 0, 0),	JIMM},
+	[insn_jalr]	= {M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD},
 #ifndef CONFIG_CPU_MIPSR6
-	{ insn_jr,  M(spec_op, 0, 0, 0, 0, jr_op),  RS },
+	[insn_jr]	= {M(spec_op, 0, 0, 0, 0, jr_op),  RS},
 #else
-	{ insn_jr,  M(spec_op, 0, 0, 0, 0, jalr_op),  RS },
+	[insn_jr]	= {M(spec_op, 0, 0, 0, 0, jalr_op),  RS},
 #endif
-	{ insn_lb, M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
-	{ insn_ld,  M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
-	{ insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD },
-	{ insn_lh,  M(lh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
-	{ insn_lhu,  M(lhu_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	[insn_lb]	= {M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_ld]	= {M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lddir]	= {M(lwc2_op, 0, 0, 0, lddir_op, mult_op), RS | RT | RD},
+	[insn_ldpte]	= {M(lwc2_op, 0, 0, 0, ldpte_op, mult_op), RS | RD},
+	[insn_ldx]	= {M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD},
+	[insn_lh]	= {M(lh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lhu]	= {M(lhu_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 #ifndef CONFIG_CPU_MIPSR6
-	{ insn_lld,  M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM },
-	{ insn_ll,  M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	[insn_ll]	= {M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lld]	= {M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM},
 #else
-	{ insn_lld,  M6(spec3_op, 0, 0, 0, lld6_op),  RS | RT | SIMM9 },
-	{ insn_ll,  M6(spec3_op, 0, 0, 0, ll6_op),  RS | RT | SIMM9 },
+	[insn_ll]	= {M6(spec3_op, 0, 0, 0, ll6_op),  RS | RT | SIMM9},
+	[insn_lld]	= {M6(spec3_op, 0, 0, 0, lld6_op),  RS | RT | SIMM9},
 #endif
-	{ insn_lui,  M(lui_op, 0, 0, 0, 0, 0),	RT | SIMM },
-	{ insn_lw,  M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
-	{ insn_lwx, M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD },
-	{ insn_mfc0,  M(cop0_op, mfc_op, 0, 0, 0, 0),  RT | RD | SET},
-	{ insn_mfhc0,  M(cop0_op, mfhc0_op, 0, 0, 0, 0),  RT | RD | SET},
-	{ insn_mfhi,  M(spec_op, 0, 0, 0, 0, mfhi_op), RD },
-	{ insn_mflo,  M(spec_op, 0, 0, 0, 0, mflo_op), RD },
-	{ insn_mtc0,  M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET},
-	{ insn_mthc0,  M(cop0_op, mthc0_op, 0, 0, 0, 0),  RT | RD | SET},
-	{ insn_mthi,  M(spec_op, 0, 0, 0, 0, mthi_op), RS },
-	{ insn_mtlo,  M(spec_op, 0, 0, 0, 0, mtlo_op), RS },
+	[insn_lui]	= {M(lui_op, 0, 0, 0, 0, 0),	RT | SIMM},
+	[insn_lw]	= {M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lwx]	= {M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD},
+	[insn_mfc0]	= {M(cop0_op, mfc_op, 0, 0, 0, 0),  RT | RD | SET},
+	[insn_mfhc0]	= {M(cop0_op, mfhc0_op, 0, 0, 0, 0),  RT | RD | SET},
+	[insn_mfhi]	= {M(spec_op, 0, 0, 0, 0, mfhi_op), RD},
+	[insn_mflo]	= {M(spec_op, 0, 0, 0, 0, mflo_op), RD},
+	[insn_mtc0]	= {M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET},
+	[insn_mthc0]	= {M(cop0_op, mthc0_op, 0, 0, 0, 0),  RT | RD | SET},
+	[insn_mthi]	= {M(spec_op, 0, 0, 0, 0, mthi_op), RS},
+	[insn_mtlo]	= {M(spec_op, 0, 0, 0, 0, mtlo_op), RS},
 #ifndef CONFIG_CPU_MIPSR6
-	{ insn_mul, M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
+	[insn_mul]	= {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
 #else
-	{ insn_mul, M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD},
+	[insn_mul]	= {M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD},
 #endif
-	{ insn_ori,  M(ori_op, 0, 0, 0, 0, 0),	RS | RT | UIMM },
-	{ insn_or,  M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD },
+	[insn_or]	= {M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD},
+	[insn_ori]	= {M(ori_op, 0, 0, 0, 0, 0),	RS | RT | UIMM},
 #ifndef CONFIG_CPU_MIPSR6
-	{ insn_pref,  M(pref_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	[insn_pref]	= {M(pref_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 #else
-	{ insn_pref,  M6(spec3_op, 0, 0, 0, pref6_op),  RS | RT | SIMM9 },
+	[insn_pref]	= {M6(spec3_op, 0, 0, 0, pref6_op),  RS | RT | SIMM9},
 #endif
-	{ insn_rfe,  M(cop0_op, cop_op, 0, 0, 0, rfe_op),  0 },
-	{ insn_rotr,  M(spec_op, 1, 0, 0, 0, srl_op),  RT | RD | RE },
+	[insn_rfe]	= {M(cop0_op, cop_op, 0, 0, 0, rfe_op),  0},
+	[insn_rotr]	= {M(spec_op, 1, 0, 0, 0, srl_op),  RT | RD | RE},
 #ifndef CONFIG_CPU_MIPSR6
-	{ insn_scd,  M(scd_op, 0, 0, 0, 0, 0),	RS | RT | SIMM },
-	{ insn_sc,  M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+	[insn_sc]	= {M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_scd]	= {M(scd_op, 0, 0, 0, 0, 0),	RS | RT | SIMM},
 #else
-	{ insn_scd,  M6(spec3_op, 0, 0, 0, scd6_op),  RS | RT | SIMM9 },
-	{ insn_sc,  M6(spec3_op, 0, 0, 0, sc6_op),  RS | RT | SIMM9 },
+	[insn_sc]	= {M6(spec3_op, 0, 0, 0, sc6_op),  RS | RT | SIMM9},
+	[insn_scd]	= {M6(spec3_op, 0, 0, 0, scd6_op),  RS | RT | SIMM9},
 #endif
-	{ insn_sd,  M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
-	{ insn_sll,  M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE },
-	{ insn_sllv,  M(spec_op, 0, 0, 0, 0, sllv_op),  RS | RT | RD },
-	{ insn_slt,  M(spec_op, 0, 0, 0, 0, slt_op),  RS | RT | RD },
-	{ insn_sltiu, M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
-	{ insn_sltu, M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD },
-	{ insn_sra,  M(spec_op, 0, 0, 0, 0, sra_op),  RT | RD | RE },
-	{ insn_srl,  M(spec_op, 0, 0, 0, 0, srl_op),  RT | RD | RE },
-	{ insn_srlv,  M(spec_op, 0, 0, 0, 0, srlv_op),  RS | RT | RD },
-	{ insn_subu,  M(spec_op, 0, 0, 0, 0, subu_op),	RS | RT | RD },
-	{ insn_sw,  M(sw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
-	{ insn_sync, M(spec_op, 0, 0, 0, 0, sync_op), RE },
-	{ insn_syscall, M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM},
-	{ insn_tlbp,  M(cop0_op, cop_op, 0, 0, 0, tlbp_op),  0 },
-	{ insn_tlbr,  M(cop0_op, cop_op, 0, 0, 0, tlbr_op),  0 },
-	{ insn_tlbwi,  M(cop0_op, cop_op, 0, 0, 0, tlbwi_op),  0 },
-	{ insn_tlbwr,  M(cop0_op, cop_op, 0, 0, 0, tlbwr_op),  0 },
-	{ insn_wait, M(cop0_op, cop_op, 0, 0, 0, wait_op), SCIMM },
-	{ insn_wsbh, M(spec3_op, 0, 0, 0, wsbh_op, bshfl_op), RT | RD },
-	{ insn_xori,  M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM },
-	{ insn_xor,  M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD },
-	{ insn_yield, M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD },
-	{ insn_ldpte, M(lwc2_op, 0, 0, 0, ldpte_op, mult_op), RS | RD },
-	{ insn_lddir, M(lwc2_op, 0, 0, 0, lddir_op, mult_op), RS | RT | RD },
-	{ insn_invalid, 0, 0 }
+	[insn_sd]	= {M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_sll]	= {M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE},
+	[insn_sllv]	= {M(spec_op, 0, 0, 0, 0, sllv_op),  RS | RT | RD},
+	[insn_slt]	= {M(spec_op, 0, 0, 0, 0, slt_op),  RS | RT | RD},
+	[insn_sltiu]	= {M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_sltu]	= {M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD},
+	[insn_sra]	= {M(spec_op, 0, 0, 0, 0, sra_op),  RT | RD | RE},
+	[insn_srl]	= {M(spec_op, 0, 0, 0, 0, srl_op),  RT | RD | RE},
+	[insn_srlv]	= {M(spec_op, 0, 0, 0, 0, srlv_op),  RS | RT | RD},
+	[insn_subu]	= {M(spec_op, 0, 0, 0, 0, subu_op),	RS | RT | RD},
+	[insn_sw]	= {M(sw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_sync]	= {M(spec_op, 0, 0, 0, 0, sync_op), RE},
+	[insn_syscall]	= {M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM},
+	[insn_tlbp]	= {M(cop0_op, cop_op, 0, 0, 0, tlbp_op),  0},
+	[insn_tlbr]	= {M(cop0_op, cop_op, 0, 0, 0, tlbr_op),  0},
+	[insn_tlbwi]	= {M(cop0_op, cop_op, 0, 0, 0, tlbwi_op),  0},
+	[insn_tlbwr]	= {M(cop0_op, cop_op, 0, 0, 0, tlbwr_op),  0},
+	[insn_wait]	= {M(cop0_op, cop_op, 0, 0, 0, wait_op), SCIMM},
+	[insn_wsbh]	= {M(spec3_op, 0, 0, 0, wsbh_op, bshfl_op), RT | RD},
+	[insn_xor]	= {M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD},
+	[insn_xori]	= {M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM},
+	[insn_yield]	= {M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD},
 };
 
 #undef M
@@ -196,20 +194,17 @@ static inline u32 build_jimm(u32 arg)
  */
 static void build_insn(u32 **buf, enum opcode opc, ...)
 {
-	struct insn *ip = NULL;
-	unsigned int i;
+	const struct insn *ip;
 	va_list ap;
 	u32 op;
 
-	for (i = 0; insn_table[i].opcode != insn_invalid; i++)
-		if (insn_table[i].opcode == opc) {
-			ip = &insn_table[i];
-			break;
-		}
-
-	if (!ip || (opc == insn_daddiu && r4k_daddiu_bug()))
+	if (opc < 0 || opc >= insn_invalid ||
+	    (opc == insn_daddiu && r4k_daddiu_bug()) ||
+	    (insn_table[opc].match == 0 && insn_table[opc].fields == 0))
 		panic("Unsupported Micro-assembler instruction %d", opc);
 
+	ip = &insn_table[opc];
+
 	op = ip->match;
 	va_start(ap, opc);
 	if (ip->fields & RS)
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 730363b59baca..f23ed85a0d6cd 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -46,7 +46,6 @@ enum fields {
 #define SIMM9_MASK	0x1ff
 
 enum opcode {
-	insn_invalid,
 	insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1,
 	insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
 	insn_bne, insn_cache, insn_cfc1, insn_cfcmsa, insn_ctc1, insn_ctcmsa,
@@ -62,10 +61,10 @@ enum opcode {
 	insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, insn_tlbp,
 	insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, insn_xor,
 	insn_xori, insn_yield, insn_lddir, insn_ldpte, insn_lhu,
+	insn_invalid /* insn_invalid must be last */
 };
 
 struct insn {
-	enum opcode opcode;
 	u32 match;
 	enum fields fields;
 };
-- 
GitLab


From 1f22d599c9849898ef9e3229434b87438278cccc Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 13 Jun 2017 15:28:44 -0700
Subject: [PATCH 0460/1958] MIPS: Correctly define DBSHFL type instruction
 opcodes.

DSHD was incorrectly classified as being BSHFL, and DSHD was missing
altogether.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16366/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/uapi/asm/inst.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index b5e46ae872d3b..e5f538584b8e1 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -276,11 +276,18 @@ enum lx_func {
  */
 enum bshfl_func {
 	wsbh_op = 0x2,
-	dshd_op = 0x5,
 	seb_op  = 0x10,
 	seh_op  = 0x18,
 };
 
+/*
+ * DBSHFL opcodes
+ */
+enum dbshfl_func {
+	dsbh_op = 0x2,
+	dshd_op = 0x5,
+};
+
 /*
  * MSA minor opcodes.
  */
-- 
GitLab


From dc190129f1c16e025a42a9c3717de7ed47fc6e2f Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 13 Jun 2017 15:28:45 -0700
Subject: [PATCH 0461/1958] MIPS: Add some instructions to uasm.

Follow on patches for eBPF JIT require these additional instructions:

   insn_bgtz, insn_blez, insn_break, insn_ddivu, insn_dmultu,
   insn_dsbh, insn_dshd, insn_dsllv, insn_dsra32, insn_dsrav,
   insn_dsrlv, insn_lbu, insn_movn, insn_movz, insn_multu, insn_nor,
   insn_sb, insn_sh, insn_slti, insn_dinsu, insn_lwu

... so, add them.

Sort the insn_* enumeration values alphabetically.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16367/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/uasm.h | 30 +++++++++++++++++++
 arch/mips/mm/uasm-mips.c     | 21 +++++++++++++
 arch/mips/mm/uasm.c          | 58 ++++++++++++++++++++++++++++--------
 3 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index 3748f4d120a5a..59dae37f6b8dd 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -72,9 +72,12 @@ Ip_u1u2s3(_beq);
 Ip_u1u2s3(_beql);
 Ip_u1s2(_bgez);
 Ip_u1s2(_bgezl);
+Ip_u1s2(_bgtz);
+Ip_u1s2(_blez);
 Ip_u1s2(_bltz);
 Ip_u1s2(_bltzl);
 Ip_u1u2s3(_bne);
+Ip_u1(_break);
 Ip_u2s3u1(_cache);
 Ip_u1u2(_cfc1);
 Ip_u2u1(_cfcmsa);
@@ -82,19 +85,28 @@ Ip_u1u2(_ctc1);
 Ip_u2u1(_ctcmsa);
 Ip_u2u1s3(_daddiu);
 Ip_u3u1u2(_daddu);
+Ip_u1u2(_ddivu);
 Ip_u1(_di);
 Ip_u2u1msbu3(_dins);
 Ip_u2u1msbu3(_dinsm);
+Ip_u2u1msbu3(_dinsu);
 Ip_u1u2(_divu);
 Ip_u1u2u3(_dmfc0);
 Ip_u1u2u3(_dmtc0);
+Ip_u1u2(_dmultu);
 Ip_u2u1u3(_drotr);
 Ip_u2u1u3(_drotr32);
+Ip_u2u1(_dsbh);
+Ip_u2u1(_dshd);
 Ip_u2u1u3(_dsll);
 Ip_u2u1u3(_dsll32);
+Ip_u3u2u1(_dsllv);
 Ip_u2u1u3(_dsra);
+Ip_u2u1u3(_dsra32);
+Ip_u3u2u1(_dsrav);
 Ip_u2u1u3(_dsrl);
 Ip_u2u1u3(_dsrl32);
+Ip_u3u2u1(_dsrlv);
 Ip_u3u1u2(_dsubu);
 Ip_0(_eret);
 Ip_u2u1msbu3(_ext);
@@ -104,6 +116,7 @@ Ip_u1(_jal);
 Ip_u2u1(_jalr);
 Ip_u1(_jr);
 Ip_u2s3u1(_lb);
+Ip_u2s3u1(_lbu);
 Ip_u2s3u1(_ld);
 Ip_u3u1u2(_ldx);
 Ip_u2s3u1(_lh);
@@ -112,27 +125,35 @@ Ip_u2s3u1(_ll);
 Ip_u2s3u1(_lld);
 Ip_u1s2(_lui);
 Ip_u2s3u1(_lw);
+Ip_u2s3u1(_lwu);
 Ip_u3u1u2(_lwx);
 Ip_u1u2u3(_mfc0);
 Ip_u1u2u3(_mfhc0);
 Ip_u1(_mfhi);
 Ip_u1(_mflo);
+Ip_u3u1u2(_movn);
+Ip_u3u1u2(_movz);
 Ip_u1u2u3(_mtc0);
 Ip_u1u2u3(_mthc0);
 Ip_u1(_mthi);
 Ip_u1(_mtlo);
 Ip_u3u1u2(_mul);
+Ip_u1u2(_multu);
+Ip_u3u1u2(_nor);
 Ip_u3u1u2(_or);
 Ip_u2u1u3(_ori);
 Ip_u2s3u1(_pref);
 Ip_0(_rfe);
 Ip_u2u1u3(_rotr);
+Ip_u2s3u1(_sb);
 Ip_u2s3u1(_sc);
 Ip_u2s3u1(_scd);
 Ip_u2s3u1(_sd);
+Ip_u2s3u1(_sh);
 Ip_u2u1u3(_sll);
 Ip_u3u2u1(_sllv);
 Ip_s3s1s2(_slt);
+Ip_u2u1s3(_slti);
 Ip_u2u1s3(_sltiu);
 Ip_u3u1u2(_sltu);
 Ip_u2u1u3(_sra);
@@ -248,6 +269,15 @@ static inline void uasm_i_dsrl_safe(u32 **p, unsigned int a1,
 		uasm_i_dsrl32(p, a1, a2, a3 - 32);
 }
 
+static inline void uasm_i_dsra_safe(u32 **p, unsigned int a1,
+				    unsigned int a2, unsigned int a3)
+{
+	if (a3 < 32)
+		uasm_i_dsra(p, a1, a2, a3);
+	else
+		uasm_i_dsra32(p, a1, a2, a3 - 32);
+}
+
 /* Handle relocations. */
 struct uasm_reloc {
 	u32 *addr;
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index f3937e315ba84..3f74f6c1f065f 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -59,9 +59,12 @@ static const struct insn const insn_table[insn_invalid] = {
 	[insn_beql]	= {M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
 	[insn_bgez]	= {M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM},
 	[insn_bgezl]	= {M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM},
+	[insn_bgtz]	= {M(bgtz_op, 0, 0, 0, 0, 0), RS | BIMM},
+	[insn_blez]	= {M(blez_op, 0, 0, 0, 0, 0), RS | BIMM},
 	[insn_bltz]	= {M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM},
 	[insn_bltzl]	= {M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM},
 	[insn_bne]	= {M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM},
+	[insn_break]	= {M(spec_op, 0, 0, 0, 0, break_op), SCIMM},
 #ifndef CONFIG_CPU_MIPSR6
 	[insn_cache]	= {M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 #else
@@ -73,19 +76,28 @@ static const struct insn const insn_table[insn_invalid] = {
 	[insn_ctcmsa]	= {M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE},
 	[insn_daddiu]	= {M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
 	[insn_daddu]	= {M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD},
+	[insn_ddivu]	= {M(spec_op, 0, 0, 0, 0, ddivu_op), RS | RT},
 	[insn_di]	= {M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT},
 	[insn_dins]	= {M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE},
 	[insn_dinsm]	= {M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE},
+	[insn_dinsu]	= {M(spec3_op, 0, 0, 0, 0, dinsu_op), RS | RT | RD | RE},
 	[insn_divu]	= {M(spec_op, 0, 0, 0, 0, divu_op), RS | RT},
 	[insn_dmfc0]	= {M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
 	[insn_dmtc0]	= {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
+	[insn_dmultu]	= {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT},
 	[insn_drotr]	= {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE},
 	[insn_drotr32]	= {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE},
+	[insn_dsbh]	= {M(spec3_op, 0, 0, 0, dsbh_op, dbshfl_op), RT | RD},
+	[insn_dshd]	= {M(spec3_op, 0, 0, 0, dshd_op, dbshfl_op), RT | RD},
 	[insn_dsll]	= {M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE},
 	[insn_dsll32]	= {M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE},
+	[insn_dsllv]	= {M(spec_op, 0, 0, 0, 0, dsllv_op),  RS | RT | RD},
 	[insn_dsra]	= {M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE},
+	[insn_dsra32]	= {M(spec_op, 0, 0, 0, 0, dsra32_op), RT | RD | RE},
+	[insn_dsrav]	= {M(spec_op, 0, 0, 0, 0, dsrav_op),  RS | RT | RD},
 	[insn_dsrl]	= {M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE},
 	[insn_dsrl32]	= {M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE},
+	[insn_dsrlv]	= {M(spec_op, 0, 0, 0, 0, dsrlv_op),  RS | RT | RD},
 	[insn_dsubu]	= {M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD},
 	[insn_eret]	= {M(cop0_op, cop_op, 0, 0, 0, eret_op),  0},
 	[insn_ext]	= {M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE},
@@ -99,6 +111,7 @@ static const struct insn const insn_table[insn_invalid] = {
 	[insn_jr]	= {M(spec_op, 0, 0, 0, 0, jalr_op),  RS},
 #endif
 	[insn_lb]	= {M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+	[insn_lbu]	= {M(lbu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
 	[insn_ld]	= {M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 	[insn_lddir]	= {M(lwc2_op, 0, 0, 0, lddir_op, mult_op), RS | RT | RD},
 	[insn_ldpte]	= {M(lwc2_op, 0, 0, 0, ldpte_op, mult_op), RS | RD},
@@ -114,11 +127,14 @@ static const struct insn const insn_table[insn_invalid] = {
 #endif
 	[insn_lui]	= {M(lui_op, 0, 0, 0, 0, 0),	RT | SIMM},
 	[insn_lw]	= {M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_lwu]	= {M(lwu_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 	[insn_lwx]	= {M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD},
 	[insn_mfc0]	= {M(cop0_op, mfc_op, 0, 0, 0, 0),  RT | RD | SET},
 	[insn_mfhc0]	= {M(cop0_op, mfhc0_op, 0, 0, 0, 0),  RT | RD | SET},
 	[insn_mfhi]	= {M(spec_op, 0, 0, 0, 0, mfhi_op), RD},
 	[insn_mflo]	= {M(spec_op, 0, 0, 0, 0, mflo_op), RD},
+	[insn_movn]	= {M(spec_op, 0, 0, 0, 0, movn_op), RS | RT | RD},
+	[insn_movz]	= {M(spec_op, 0, 0, 0, 0, movz_op), RS | RT | RD},
 	[insn_mtc0]	= {M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET},
 	[insn_mthc0]	= {M(cop0_op, mthc0_op, 0, 0, 0, 0),  RT | RD | SET},
 	[insn_mthi]	= {M(spec_op, 0, 0, 0, 0, mthi_op), RS},
@@ -128,6 +144,8 @@ static const struct insn const insn_table[insn_invalid] = {
 #else
 	[insn_mul]	= {M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD},
 #endif
+	[insn_multu]	= {M(spec_op, 0, 0, 0, 0, multu_op), RS | RT},
+	[insn_nor]	= {M(spec_op, 0, 0, 0, 0, nor_op),  RS | RT | RD},
 	[insn_or]	= {M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD},
 	[insn_ori]	= {M(ori_op, 0, 0, 0, 0, 0),	RS | RT | UIMM},
 #ifndef CONFIG_CPU_MIPSR6
@@ -137,6 +155,7 @@ static const struct insn const insn_table[insn_invalid] = {
 #endif
 	[insn_rfe]	= {M(cop0_op, cop_op, 0, 0, 0, rfe_op),  0},
 	[insn_rotr]	= {M(spec_op, 1, 0, 0, 0, srl_op),  RT | RD | RE},
+	[insn_sb]	= {M(sb_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 #ifndef CONFIG_CPU_MIPSR6
 	[insn_sc]	= {M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 	[insn_scd]	= {M(scd_op, 0, 0, 0, 0, 0),	RS | RT | SIMM},
@@ -145,9 +164,11 @@ static const struct insn const insn_table[insn_invalid] = {
 	[insn_scd]	= {M6(spec3_op, 0, 0, 0, scd6_op),  RS | RT | SIMM9},
 #endif
 	[insn_sd]	= {M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
+	[insn_sh]	= {M(sh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
 	[insn_sll]	= {M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE},
 	[insn_sllv]	= {M(spec_op, 0, 0, 0, 0, sllv_op),  RS | RT | RD},
 	[insn_slt]	= {M(spec_op, 0, 0, 0, 0, slt_op),  RS | RT | RD},
+	[insn_slti]	= {M(slti_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
 	[insn_sltiu]	= {M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
 	[insn_sltu]	= {M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD},
 	[insn_sra]	= {M(spec_op, 0, 0, 0, 0, sra_op),  RT | RD | RE},
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index f23ed85a0d6cd..57570c0649b46 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -47,20 +47,24 @@ enum fields {
 
 enum opcode {
 	insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1,
-	insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
-	insn_bne, insn_cache, insn_cfc1, insn_cfcmsa, insn_ctc1, insn_ctcmsa,
-	insn_daddiu, insn_daddu, insn_di, insn_dins, insn_dinsm, insn_divu,
-	insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll,
-	insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret,
-	insn_ext, insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb,
-	insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw,
-	insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, insn_mflo, insn_mtc0,
-	insn_mthc0, insn_mthi, insn_mtlo, insn_mul, insn_or, insn_ori,
-	insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd, insn_sd, insn_sll,
-	insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, insn_srl,
+	insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bgtz, insn_blez,
+	insn_bltz, insn_bltzl, insn_bne, insn_break, insn_cache, insn_cfc1,
+	insn_cfcmsa, insn_ctc1, insn_ctcmsa, insn_daddiu, insn_daddu, insn_ddivu,
+	insn_di, insn_dins, insn_dinsm, insn_dinsu, insn_divu, insn_dmfc0,
+	insn_dmtc0, insn_dmultu, insn_drotr, insn_drotr32, insn_dsbh, insn_dshd,
+	insn_dsll, insn_dsll32, insn_dsllv, insn_dsra, insn_dsra32, insn_dsrav,
+	insn_dsrl, insn_dsrl32, insn_dsrlv, insn_dsubu, insn_eret, insn_ext,
+	insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_lbu,
+	insn_ld, insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu,
+	insn_ll, insn_lld, insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0,
+	insn_mfhc0, insn_mfhi, insn_mflo, insn_movn, insn_movz, insn_mtc0,
+	insn_mthc0, insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_nor,
+	insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb,
+	insn_sc, insn_scd, insn_sd, insn_sh, insn_sll, insn_sllv,
+	insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, insn_srl,
 	insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, insn_tlbp,
 	insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, insn_xor,
-	insn_xori, insn_yield, insn_lddir, insn_ldpte, insn_lhu,
+	insn_xori, insn_yield,
 	insn_invalid /* insn_invalid must be last */
 };
 
@@ -214,6 +218,13 @@ Ip_u2u1msbu3(op)					\
 }							\
 UASM_EXPORT_SYMBOL(uasm_i##op);
 
+#define I_u2u1msb32msb3(op)				\
+Ip_u2u1msbu3(op)					\
+{							\
+	build_insn(buf, insn##op, b, a, c+d-33, c-32);	\
+}							\
+UASM_EXPORT_SYMBOL(uasm_i##op);
+
 #define I_u2u1msbdu3(op)				\
 Ip_u2u1msbu3(op)					\
 {							\
@@ -264,25 +275,36 @@ I_u1u2s3(_beq)
 I_u1u2s3(_beql)
 I_u1s2(_bgez)
 I_u1s2(_bgezl)
+I_u1s2(_bgtz)
+I_u1s2(_blez)
 I_u1s2(_bltz)
 I_u1s2(_bltzl)
 I_u1u2s3(_bne)
+I_u1(_break)
 I_u2s3u1(_cache)
 I_u1u2(_cfc1)
 I_u2u1(_cfcmsa)
 I_u1u2(_ctc1)
 I_u2u1(_ctcmsa)
+I_u1u2(_ddivu)
 I_u1u2u3(_dmfc0)
 I_u1u2u3(_dmtc0)
+I_u1u2(_dmultu)
 I_u2u1s3(_daddiu)
 I_u3u1u2(_daddu)
 I_u1(_di);
 I_u1u2(_divu)
+I_u2u1(_dsbh);
+I_u2u1(_dshd);
 I_u2u1u3(_dsll)
 I_u2u1u3(_dsll32)
+I_u3u2u1(_dsllv)
 I_u2u1u3(_dsra)
+I_u2u1u3(_dsra32)
+I_u3u2u1(_dsrav)
 I_u2u1u3(_dsrl)
 I_u2u1u3(_dsrl32)
+I_u3u2u1(_dsrlv)
 I_u2u1u3(_drotr)
 I_u2u1u3(_drotr32)
 I_u3u1u2(_dsubu)
@@ -294,6 +316,7 @@ I_u1(_jal)
 I_u2u1(_jalr)
 I_u1(_jr)
 I_u2s3u1(_lb)
+I_u2s3u1(_lbu)
 I_u2s3u1(_ld)
 I_u2s3u1(_lh)
 I_u2s3u1(_lhu)
@@ -301,8 +324,11 @@ I_u2s3u1(_ll)
 I_u2s3u1(_lld)
 I_u1s2(_lui)
 I_u2s3u1(_lw)
+I_u2s3u1(_lwu)
 I_u1u2u3(_mfc0)
 I_u1u2u3(_mfhc0)
+I_u3u1u2(_movn)
+I_u3u1u2(_movz)
 I_u1(_mfhi)
 I_u1(_mflo)
 I_u1u2u3(_mtc0)
@@ -310,15 +336,20 @@ I_u1u2u3(_mthc0)
 I_u1(_mthi)
 I_u1(_mtlo)
 I_u3u1u2(_mul)
-I_u2u1u3(_ori)
+I_u1u2(_multu)
+I_u3u1u2(_nor)
 I_u3u1u2(_or)
+I_u2u1u3(_ori)
 I_0(_rfe)
+I_u2s3u1(_sb)
 I_u2s3u1(_sc)
 I_u2s3u1(_scd)
 I_u2s3u1(_sd)
+I_u2s3u1(_sh)
 I_u2u1u3(_sll)
 I_u3u2u1(_sllv)
 I_s3s1s2(_slt)
+I_u2u1s3(_slti)
 I_u2u1s3(_sltiu)
 I_u3u1u2(_sltu)
 I_u2u1u3(_sra)
@@ -339,6 +370,7 @@ I_u2u1u3(_xori)
 I_u2u1(_yield)
 I_u2u1msbu3(_dins);
 I_u2u1msb32u3(_dinsm);
+I_u2u1msb32msb3(_dinsu);
 I_u1(_syscall);
 I_u1u2s3(_bbit0);
 I_u1u2s3(_bbit1);
-- 
GitLab


From f381bf6d82f032b7410185b35d000ea370ac706b Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 13 Jun 2017 15:28:46 -0700
Subject: [PATCH 0462/1958] MIPS: Add support for eBPF JIT.

Since the eBPF machine has 64-bit registers, we only support this in
64-bit kernels.  As of the writing of this commit log test-bpf is showing:

  test_bpf: Summary: 316 PASSED, 0 FAILED, [308/308 JIT'ed]

All current test cases are successfully compiled.

Many examples in samples/bpf are usable, specifically tracex5 which
uses tail calls works.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16369/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig      | 12 +++++++++++-
 arch/mips/net/Makefile |  3 ++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 0b15978c0f88a..9891a1285f25e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -33,7 +33,8 @@ config MIPS
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT
-	select HAVE_CBPF_JIT if !CPU_MICROMIPS
+	select HAVE_CBPF_JIT if (!64BIT && !CPU_MICROMIPS)
+	select HAVE_EBPF_JIT if (64BIT && !CPU_MICROMIPS)
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_COPY_THREAD_TLS
@@ -1178,6 +1179,15 @@ config SYS_SUPPORTS_RELOCATABLE
 	 The platform must provide plat_get_fdt() if it selects CONFIG_USE_OF
 	 to allow access to command line and entropy sources.
 
+config MIPS_CBPF_JIT
+	def_bool y
+	depends on BPF_JIT && HAVE_CBPF_JIT
+
+config MIPS_EBPF_JIT
+	def_bool y
+	depends on BPF_JIT && HAVE_EBPF_JIT
+
+
 #
 # Endianness selection.  Sufficiently obscure so many users don't know what to
 # answer,so we try hard to limit the available choices.  Also the use of a
diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile
index 8c2771401f545..47d678416715e 100644
--- a/arch/mips/net/Makefile
+++ b/arch/mips/net/Makefile
@@ -1,3 +1,4 @@
 # MIPS networking code
 
-obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_asm.o
+obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
+obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
-- 
GitLab


From 669c4092225f0ed5df12ebee654581b558a5e3ed Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 13 Jun 2017 15:28:47 -0700
Subject: [PATCH 0463/1958] MIPS: Give __secure_computing() access to syscall
 arguments.

KProbes of __seccomp_filter() are not very useful without access to
the syscall arguments.

Do what x86 does, and populate a struct seccomp_data to be passed to
__secure_computing().  This allows samples/bpf/tracex5 to extract a
sensible trace.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16368/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/ptrace.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 6931fe722a0b5..ba3b1f771256f 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -868,8 +868,26 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
 	    tracehook_report_syscall_entry(regs))
 		return -1;
 
-	if (secure_computing(NULL) == -1)
-		return -1;
+#ifdef CONFIG_SECCOMP
+	if (unlikely(test_thread_flag(TIF_SECCOMP))) {
+		int ret, i;
+		struct seccomp_data sd;
+
+		sd.nr = syscall;
+		sd.arch = syscall_get_arch();
+		for (i = 0; i < 6; i++) {
+			unsigned long v, r;
+
+			r = mips_get_syscall_arg(&v, current, regs, i);
+			sd.args[i] = r ? 0 : v;
+		}
+		sd.instruction_pointer = KSTK_EIP(current);
+
+		ret = __secure_computing(&sd);
+		if (ret == -1)
+			return ret;
+	}
+#endif
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[2]);
-- 
GitLab


From 498e9ade6593887397844eb8ac9d811252f7239e Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Tue, 13 Jun 2017 10:01:08 -0700
Subject: [PATCH 0464/1958] MIPS: Perform post-DMA cache flushes on systems
 with MAARs

Recent CPUs from Imagination Technologies such as the I6400 or P6600 are
able to speculatively fetch data from memory into caches. This means
that if used in a system with non-coherent DMA they require that caches
be invalidated after a device performs DMA, and before the CPU reads the
DMA'd data, in order to ensure that stale values weren't speculatively
prefetched.

Such CPUs also introduced Memory Accessibility Attribute Registers
(MAARs) in order to control the regions in which they are allowed to
speculate. Thus we can use the presence of MAARs as a good indication
that the CPU requires the above cache maintenance. Use the presence of
MAARs to determine the result of cpu_needs_post_dma_flush() in the
default case, in order to handle these recent CPUs correctly.

Note that the return type of cpu_needs_post_dma_flush() is changed to
bool, such that it's clearer what's happening when cpu_has_maar is cast
to bool for the return value. If this patch were backported to a
pre-v4.7 kernel then MIPS_CPU_MAAR was 1ull<<34, so when cast to an int
we would incorrectly return 0. It so happens that MIPS_CPU_MAAR is
currently 1ull<<30, so when truncated to an int gives a non-zero value
anyway, but even so the implicit conversion from long long int to bool
makes it clearer to understand what will happen than the implicit
conversion from long long int to int would. The bool return type also
fits this usage better semantically, so seems like an all-round win.

Thanks to Ed for spotting the issue for pre-v4.7 kernels & suggesting
the return type change.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Tested-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Cc: Ed Blake <ed.blake@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16363/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/dma-default.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index fe8df14b61698..e08598c70b3e7 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -68,12 +68,25 @@ static inline struct page *dma_addr_to_page(struct device *dev,
  * systems and only the R10000 and R12000 are used in such systems, the
  * SGI IP28 Indigo² rsp. SGI IP32 aka O2.
  */
-static inline int cpu_needs_post_dma_flush(struct device *dev)
+static inline bool cpu_needs_post_dma_flush(struct device *dev)
 {
-	return !plat_device_is_coherent(dev) &&
-	       (boot_cpu_type() == CPU_R10000 ||
-		boot_cpu_type() == CPU_R12000 ||
-		boot_cpu_type() == CPU_BMIPS5000);
+	if (plat_device_is_coherent(dev))
+		return false;
+
+	switch (boot_cpu_type()) {
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_BMIPS5000:
+		return true;
+
+	default:
+		/*
+		 * Presence of MAARs suggests that the CPU supports
+		 * speculatively prefetching data, and therefore requires
+		 * the post-DMA flush/invalidate.
+		 */
+		return cpu_has_maar;
+	}
 }
 
 static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)
-- 
GitLab


From 859aeb1b0dd1b9c6ff3d78f6cb913a73af9da247 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:39:04 -0700
Subject: [PATCH 0465/1958] MIPS: Probe the I6500 CPU

Introduce the I6500 PRID & probe it just the same way as I6400. The MIPS
I6500 is the latest in Imagination Technologies' I-Class range of CPUs,
with a focus on scalability & heterogeneity. It introduces the notion of
multiple clusters to the MIPS Coherent Processing System, allowing for a
far higher total number of cores & threads in a system when compared
with its predecessors. Clusters don't need to be identical, and may
contain differing numbers of cores & IOCUs, or cores with differing
properties.

This patch alone adds the basic support for booting Linux on an I6500
CPU without support for any of its new functionality, for which support
will be introduced in further patches.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16190/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cpu-type.h | 1 +
 arch/mips/include/asm/cpu.h      | 3 ++-
 arch/mips/kernel/cpu-probe.c     | 5 +++++
 arch/mips/mm/c-r4k.c             | 2 ++
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index bdd6dc18e65c6..175fe565f4e1e 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -84,6 +84,7 @@ static inline int __pure __get_cpu_type(const int cpu_type)
 
 #ifdef CONFIG_SYS_HAS_CPU_MIPS64_R6
 	case CPU_I6400:
+	case CPU_I6500:
 	case CPU_P6600:
 #endif
 
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 98f59307e6a35..3069359b01204 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -124,6 +124,7 @@
 #define PRID_IMP_P5600		0xa800
 #define PRID_IMP_I6400		0xa900
 #define PRID_IMP_M6250		0xab00
+#define PRID_IMP_I6500		0xb000
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_SIBYTE
@@ -322,7 +323,7 @@ enum cpu_type_enum {
 	 */
 	CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
 	CPU_LOONGSON3, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS,
-	CPU_CAVIUM_OCTEON2, CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP,
+	CPU_CAVIUM_OCTEON2, CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP, CPU_I6500,
 
 	CPU_QEMU_GENERIC,
 
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 1aba27786bd5a..353ade2c130a3 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -564,6 +564,7 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
 		back_to_back_c0_hazard();
 		break;
 	case CPU_I6400:
+	case CPU_I6500:
 		/* There's no way to disable the FTLB */
 		if (!(flags & FTLB_EN))
 			return 1;
@@ -1635,6 +1636,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_I6400;
 		__cpu_name[cpu] = "MIPS I6400";
 		break;
+	case PRID_IMP_I6500:
+		c->cputype = CPU_I6500;
+		__cpu_name[cpu] = "MIPS I6500";
+		break;
 	case PRID_IMP_M5150:
 		c->cputype = CPU_M5150;
 		__cpu_name[cpu] = "MIPS M5150";
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 3fe99cb271a9c..81d6a15c93d08 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1453,6 +1453,7 @@ static void probe_pcache(void)
 	case CPU_20KC:
 	case CPU_25KF:
 	case CPU_I6400:
+	case CPU_I6500:
 	case CPU_SB1:
 	case CPU_SB1A:
 	case CPU_XLR:
@@ -1512,6 +1513,7 @@ static void probe_pcache(void)
 
 	case CPU_ALCHEMY:
 	case CPU_I6400:
+	case CPU_I6500:
 		c->icache.flags |= MIPS_CACHE_IC_F_DC;
 		break;
 
-- 
GitLab


From 736add2412a7740d9d8b56fb83440e94b01bc1b4 Mon Sep 17 00:00:00 2001
From: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Date: Tue, 13 Jun 2017 11:23:39 +0200
Subject: [PATCH 0466/1958] MIPS: perf: add I6500 handling

Add a definition of the perf registers for the new I6500 core.

Since I6500 has the same event definitions as I6400, re-use the existing
i6400 map structures by renaming them to a slightly more generic
'i6x00_***_map'.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16362/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/perf_event_mipsxx.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index f3e301f95aef7..9e6c74bf66c48 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -814,7 +814,7 @@ static const struct mips_perf_event mipsxxcore_event_map2
 	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x27, CNTR_ODD, T },
 };
 
-static const struct mips_perf_event i6400_event_map[PERF_COUNT_HW_MAX] = {
+static const struct mips_perf_event i6x00_event_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_CPU_CYCLES]          = { 0x00, CNTR_EVEN | CNTR_ODD },
 	[PERF_COUNT_HW_INSTRUCTIONS]        = { 0x01, CNTR_EVEN | CNTR_ODD },
 	/* These only count dcache, not icache */
@@ -1014,7 +1014,7 @@ static const struct mips_perf_event mipsxxcore_cache_map2
 },
 };
 
-static const struct mips_perf_event i6400_cache_map
+static const struct mips_perf_event i6x00_cache_map
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -1610,6 +1610,7 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 #endif
 		break;
 	case CPU_I6400:
+	case CPU_I6500:
 		/* 8-bit event numbers */
 		base_id = config & 0xff;
 		raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
@@ -1770,8 +1771,13 @@ init_hw_perf_events(void)
 		break;
 	case CPU_I6400:
 		mipspmu.name = "mips/I6400";
-		mipspmu.general_event_map = &i6400_event_map;
-		mipspmu.cache_event_map = &i6400_cache_map;
+		mipspmu.general_event_map = &i6x00_event_map;
+		mipspmu.cache_event_map = &i6x00_cache_map;
+		break;
+	case CPU_I6500:
+		mipspmu.name = "mips/I6500";
+		mipspmu.general_event_map = &i6x00_event_map;
+		mipspmu.cache_event_map = &i6x00_cache_map;
 		break;
 	case CPU_1004K:
 		mipspmu.name = "mips/1004K";
-- 
GitLab


From 2ec420b26f7b6ff332393f0bb5a7d245f7ad87f0 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 31 May 2017 16:19:47 +0100
Subject: [PATCH 0467/1958] MIPS: Fix mips_atomic_set() retry condition

The inline asm retry check in the MIPS_ATOMIC_SET operation of the
sysmips system call has been backwards since commit f1e39a4a616c ("MIPS:
Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler")
merged in v2.6.32, resulting in the non R10000_LLSC_WAR case retrying
until the operation was inatomic, before returning the new value that
was probably just written multiple times instead of the old value.

Invert the branch condition to fix that particular issue.

Fixes: f1e39a4a616c ("MIPS: Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16148/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 1dfa7f5796c7c..95e1b300ac0d0 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -134,7 +134,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
 		"1:	ll	%[old], (%[addr])			\n"
 		"	move	%[tmp], %[new]				\n"
 		"2:	sc	%[tmp], (%[addr])			\n"
-		"	bnez	%[tmp], 4f				\n"
+		"	beqz	%[tmp], 4f				\n"
 		"3:							\n"
 		"	.insn						\n"
 		"	.subsection 2					\n"
-- 
GitLab


From 49955d84cd9ccdca5a16a495e448e1a06fad9e49 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 31 May 2017 16:19:48 +0100
Subject: [PATCH 0468/1958] MIPS: Save static registers before sysmips

The MIPS sysmips system call handler may return directly from the
MIPS_ATOMIC_SET case (mips_atomic_set()) to syscall_exit. This path
restores the static (callee saved) registers, however they won't have
been saved on entry to the system call.

Use the save_static_function() macro to create a __sys_sysmips wrapper
function which saves the static registers before calling sys_sysmips, so
that the correct static register state is restored by syscall_exit.

Fixes: f1e39a4a616c ("MIPS: Rewrite sysmips(MIPS_ATOMIC_SET, ...) in C with inline assembler")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16149/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/scall32-o32.S | 2 +-
 arch/mips/kernel/scall64-64.S  | 2 +-
 arch/mips/kernel/scall64-n32.S | 2 +-
 arch/mips/kernel/scall64-o32.S | 2 +-
 arch/mips/kernel/syscall.c     | 6 ++++++
 5 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 80ed68b2c95e4..27c2f90eeb21f 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -371,7 +371,7 @@ EXPORT(sys_call_table)
 	PTR	sys_writev
 	PTR	sys_cacheflush
 	PTR	sys_cachectl
-	PTR	sys_sysmips
+	PTR	__sys_sysmips
 	PTR	sys_ni_syscall			/* 4150 */
 	PTR	sys_getsid
 	PTR	sys_fdatasync
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 49765b44aa9b3..65d5aeeb9bdb5 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -311,7 +311,7 @@ EXPORT(sys_call_table)
 	PTR	sys_sched_getaffinity
 	PTR	sys_cacheflush
 	PTR	sys_cachectl
-	PTR	sys_sysmips
+	PTR	__sys_sysmips
 	PTR	sys_io_setup			/* 5200 */
 	PTR	sys_io_destroy
 	PTR	sys_io_getevents
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 90bad2d1b2d3e..cbf190ef9e8a5 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -302,7 +302,7 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_sched_getaffinity
 	PTR	sys_cacheflush
 	PTR	sys_cachectl
-	PTR	sys_sysmips
+	PTR	__sys_sysmips
 	PTR	compat_sys_io_setup			/* 6200 */
 	PTR	sys_io_destroy
 	PTR	compat_sys_io_getevents
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 2dd70bd104e1a..c30bc520885f9 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -371,7 +371,7 @@ EXPORT(sys32_call_table)
 	PTR	compat_sys_writev
 	PTR	sys_cacheflush
 	PTR	sys_cachectl
-	PTR	sys_sysmips
+	PTR	__sys_sysmips
 	PTR	sys_ni_syscall			/* 4150 */
 	PTR	sys_getsid
 	PTR	sys_fdatasync
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 95e1b300ac0d0..3e5d82e744ac1 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -192,6 +192,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
 	unreachable();
 }
 
+/*
+ * mips_atomic_set() normally returns directly via syscall_exit potentially
+ * clobbering static registers, so be sure to preserve them.
+ */
+save_static_function(sys_sysmips);
+
 SYSCALL_DEFINE3(sysmips, long, cmd, long, arg1, long, arg2)
 {
 	switch (cmd) {
-- 
GitLab


From 4915e1b043d6286928207b1f6968197b50407294 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 31 May 2017 16:19:49 +0100
Subject: [PATCH 0469/1958] MIPS: Fix mips_atomic_set() with EVA

EVA linked loads (LLE) and conditional stores (SCE) should be used on
EVA kernels for the MIPS_ATOMIC_SET operation of the sysmips system
call, or else the atomic set will apply to the kernel view of the
virtual address space (potentially unmapped on EVA kernels) rather than
the user view (TLB mapped).

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 3.15.x-
Patchwork: https://patchwork.linux-mips.org/patch/16151/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/syscall.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 3e5d82e744ac1..863f958c126c9 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -29,6 +29,7 @@
 #include <linux/sched/task_stack.h>
 
 #include <asm/asm.h>
+#include <asm/asm-eva.h>
 #include <asm/branch.h>
 #include <asm/cachectl.h>
 #include <asm/cacheflush.h>
@@ -131,9 +132,11 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
 		__asm__ __volatile__ (
 		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"	li	%[err], 0				\n"
-		"1:	ll	%[old], (%[addr])			\n"
+		"1:							\n"
+		user_ll("%[old]", "(%[addr])")
 		"	move	%[tmp], %[new]				\n"
-		"2:	sc	%[tmp], (%[addr])			\n"
+		"2:							\n"
+		user_sc("%[tmp]", "(%[addr])")
 		"	beqz	%[tmp], 4f				\n"
 		"3:							\n"
 		"	.insn						\n"
-- 
GitLab


From 203e090ade7357101e40a3d3aad3af77653da8df Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 31 May 2017 16:19:50 +0100
Subject: [PATCH 0470/1958] MIPS: Branch straight to ll in mips_atomic_set()

Adjust the atomic loop in the MIPS_ATOMIC_SET operation of the sysmips
system call to branch straight back to the linked load rather than
jumping via a different subsection (whose purpose remains a mystery to
me).

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16150/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/syscall.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 863f958c126c9..58c6f634b5506 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -137,13 +137,9 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
 		"	move	%[tmp], %[new]				\n"
 		"2:							\n"
 		user_sc("%[tmp]", "(%[addr])")
-		"	beqz	%[tmp], 4f				\n"
+		"	beqz	%[tmp], 1b				\n"
 		"3:							\n"
 		"	.insn						\n"
-		"	.subsection 2					\n"
-		"4:	b	1b					\n"
-		"	.previous					\n"
-		"							\n"
 		"	.section .fixup,\"ax\"				\n"
 		"5:	li	%[err], %[efault]			\n"
 		"	j	3b					\n"
-- 
GitLab


From efe4a1acdc2c70da9025cf405112667e5580a572 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:58 -0700
Subject: [PATCH 0471/1958] MIPS: SEAD-3: Remove GIC timer from DT

The SEAD-3 board doesn't & never has configured the GIC frequency.
Remove the timer node from the DT in order to avoid attempting to probe
the GIC clocksource/clockevent driver which will produce error messages
such as these during boot:

[    0.000000] GIC frequency not specified.
[    0.000000] Failed to initialize '/interrupt-controller@1b1c0000/timer': -22
[    0.000000] clocksource_probe: no matching clocksources found

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16188/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/boot/dts/mti/sead3.dts | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/mips/boot/dts/mti/sead3.dts b/arch/mips/boot/dts/mti/sead3.dts
index b112879a5d9d3..1bf58f841bbb7 100644
--- a/arch/mips/boot/dts/mti/sead3.dts
+++ b/arch/mips/boot/dts/mti/sead3.dts
@@ -54,11 +54,6 @@ gic: interrupt-controller@1b1c0000 {
 		 * controller & should be probed first.
 		 */
 		interrupt-parent = <&cpu_intc>;
-
-		timer {
-			compatible = "mti,gic-timer";
-			interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>;
-		};
 	};
 
 	ehci@1b200000 {
-- 
GitLab


From 571b7e69f7f775c531ffaf73ae476b1e46150f41 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:51 -0700
Subject: [PATCH 0472/1958] MIPS: generic/yamon-dt: Pull YAMON DT shim code out
 of SEAD-3 board

In preparation for supporting other YAMON-using boards (Malta) & sharing
code to translate information from YAMON into device tree properties,
pull the code doing so for the kernel command line, system memory &
serial configuration out of the SEAD-3 board code.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16181/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/generic/Kconfig        |   8 ++
 arch/mips/generic/Makefile       |   1 +
 arch/mips/generic/board-sead3.c  | 178 +----------------------------
 arch/mips/generic/yamon-dt.c     | 190 +++++++++++++++++++++++++++++++
 arch/mips/include/asm/yamon-dt.h |  48 ++++++++
 5 files changed, 251 insertions(+), 174 deletions(-)
 create mode 100644 arch/mips/generic/yamon-dt.c
 create mode 100644 arch/mips/include/asm/yamon-dt.h

diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index a606b3f9196c6..446b7c68133d7 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -9,9 +9,17 @@ config LEGACY_BOARDS
 	  kernel is booted without being provided with an FDT via the UHI
 	  boot protocol.
 
+config YAMON_DT_SHIM
+	bool
+	help
+	  Select this from your board if the board uses the YAMON bootloader
+	  and you wish to include code which helps translate various
+	  YAMON-provided environment variables into a device tree properties.
+
 config LEGACY_BOARD_SEAD3
 	bool "Support MIPS SEAD-3 boards"
 	select LEGACY_BOARDS
+	select YAMON_DT_SHIM
 	help
 	  Enable this to include support for booting on MIPS SEAD-3 FPGA-based
 	  development boards, which boot using a legacy boot protocol.
diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile
index acb9b6d62b16b..56b3ea565ed9b 100644
--- a/arch/mips/generic/Makefile
+++ b/arch/mips/generic/Makefile
@@ -12,5 +12,6 @@ obj-y += init.o
 obj-y += irq.o
 obj-y += proc.o
 
+obj-$(CONFIG_YAMON_DT_SHIM)		+= yamon-dt.o
 obj-$(CONFIG_LEGACY_BOARD_SEAD3)	+= board-sead3.o
 obj-$(CONFIG_KEXEC)			+= kexec.o
diff --git a/arch/mips/generic/board-sead3.c b/arch/mips/generic/board-sead3.c
index f4ae0584a33bd..63fdc98738bae 100644
--- a/arch/mips/generic/board-sead3.c
+++ b/arch/mips/generic/board-sead3.c
@@ -17,6 +17,7 @@
 #include <asm/fw/fw.h>
 #include <asm/io.h>
 #include <asm/machine.h>
+#include <asm/yamon-dt.h>
 
 #define SEAD_CONFIG			CKSEG1ADDR(0x1b100110)
 #define SEAD_CONFIG_GIC_PRESENT		BIT(1)
@@ -33,98 +34,6 @@ static __init bool sead3_detect(void)
 	return (rev & MIPS_REVISION_MACHINE) == MIPS_REVISION_MACHINE_SEAD3;
 }
 
-static __init int append_cmdline(void *fdt)
-{
-	int err, chosen_off;
-
-	/* find or add chosen node */
-	chosen_off = fdt_path_offset(fdt, "/chosen");
-	if (chosen_off == -FDT_ERR_NOTFOUND)
-		chosen_off = fdt_path_offset(fdt, "/chosen@0");
-	if (chosen_off == -FDT_ERR_NOTFOUND)
-		chosen_off = fdt_add_subnode(fdt, 0, "chosen");
-	if (chosen_off < 0) {
-		pr_err("Unable to find or add DT chosen node: %d\n",
-		       chosen_off);
-		return chosen_off;
-	}
-
-	err = fdt_setprop_string(fdt, chosen_off, "bootargs", fw_getcmdline());
-	if (err) {
-		pr_err("Unable to set bootargs property: %d\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
-static __init int append_memory(void *fdt)
-{
-	unsigned long phys_memsize, memsize;
-	__be32 mem_array[2];
-	int err, mem_off;
-	char *var;
-
-	/* find memory size from the bootloader environment */
-	var = fw_getenv("memsize");
-	if (var) {
-		err = kstrtoul(var, 0, &phys_memsize);
-		if (err) {
-			pr_err("Failed to read memsize env variable '%s'\n",
-			       var);
-			return -EINVAL;
-		}
-	} else {
-		pr_warn("The bootloader didn't provide memsize: defaulting to 32MB\n");
-		phys_memsize = 32 << 20;
-	}
-
-	/* default to using all available RAM */
-	memsize = phys_memsize;
-
-	/* allow the user to override the usable memory */
-	var = strstr(arcs_cmdline, "memsize=");
-	if (var)
-		memsize = memparse(var + strlen("memsize="), NULL);
-
-	/* if the user says there's more RAM than we thought, believe them */
-	phys_memsize = max_t(unsigned long, phys_memsize, memsize);
-
-	/* find or add a memory node */
-	mem_off = fdt_path_offset(fdt, "/memory");
-	if (mem_off == -FDT_ERR_NOTFOUND)
-		mem_off = fdt_add_subnode(fdt, 0, "memory");
-	if (mem_off < 0) {
-		pr_err("Unable to find or add memory DT node: %d\n", mem_off);
-		return mem_off;
-	}
-
-	err = fdt_setprop_string(fdt, mem_off, "device_type", "memory");
-	if (err) {
-		pr_err("Unable to set memory node device_type: %d\n", err);
-		return err;
-	}
-
-	mem_array[0] = 0;
-	mem_array[1] = cpu_to_be32(phys_memsize);
-	err = fdt_setprop(fdt, mem_off, "reg", mem_array, sizeof(mem_array));
-	if (err) {
-		pr_err("Unable to set memory regs property: %d\n", err);
-		return err;
-	}
-
-	mem_array[0] = 0;
-	mem_array[1] = cpu_to_be32(memsize);
-	err = fdt_setprop(fdt, mem_off, "linux,usable-memory",
-			  mem_array, sizeof(mem_array));
-	if (err) {
-		pr_err("Unable to set linux,usable-memory property: %d\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
 static __init int remove_gic(void *fdt)
 {
 	const unsigned int cpu_ehci_int = 2;
@@ -214,85 +123,6 @@ static __init int remove_gic(void *fdt)
 	return 0;
 }
 
-static __init int serial_config(void *fdt)
-{
-	const char *yamontty, *mode_var;
-	char mode_var_name[9], path[18], parity;
-	unsigned int uart, baud, stop_bits;
-	bool hw_flow;
-	int chosen_off, err;
-
-	yamontty = fw_getenv("yamontty");
-	if (!yamontty || !strcmp(yamontty, "tty0")) {
-		uart = 0;
-	} else if (!strcmp(yamontty, "tty1")) {
-		uart = 1;
-	} else {
-		pr_warn("yamontty environment variable '%s' invalid\n",
-			yamontty);
-		uart = 0;
-	}
-
-	baud = stop_bits = 0;
-	parity = 0;
-	hw_flow = false;
-
-	snprintf(mode_var_name, sizeof(mode_var_name), "modetty%u", uart);
-	mode_var = fw_getenv(mode_var_name);
-	if (mode_var) {
-		while (mode_var[0] >= '0' && mode_var[0] <= '9') {
-			baud *= 10;
-			baud += mode_var[0] - '0';
-			mode_var++;
-		}
-		if (mode_var[0] == ',')
-			mode_var++;
-		if (mode_var[0])
-			parity = mode_var[0];
-		if (mode_var[0] == ',')
-			mode_var++;
-		if (mode_var[0])
-			stop_bits = mode_var[0] - '0';
-		if (mode_var[0] == ',')
-			mode_var++;
-		if (!strcmp(mode_var, "hw"))
-			hw_flow = true;
-	}
-
-	if (!baud)
-		baud = 38400;
-
-	if (parity != 'e' && parity != 'n' && parity != 'o')
-		parity = 'n';
-
-	if (stop_bits != 7 && stop_bits != 8)
-		stop_bits = 8;
-
-	WARN_ON(snprintf(path, sizeof(path), "uart%u:%u%c%u%s",
-			 uart, baud, parity, stop_bits,
-			 hw_flow ? "r" : "") >= sizeof(path));
-
-	/* find or add chosen node */
-	chosen_off = fdt_path_offset(fdt, "/chosen");
-	if (chosen_off == -FDT_ERR_NOTFOUND)
-		chosen_off = fdt_path_offset(fdt, "/chosen@0");
-	if (chosen_off == -FDT_ERR_NOTFOUND)
-		chosen_off = fdt_add_subnode(fdt, 0, "chosen");
-	if (chosen_off < 0) {
-		pr_err("Unable to find or add DT chosen node: %d\n",
-		       chosen_off);
-		return chosen_off;
-	}
-
-	err = fdt_setprop_string(fdt, chosen_off, "stdout-path", path);
-	if (err) {
-		pr_err("Unable to set stdout-path property: %d\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
 static __init const void *sead3_fixup_fdt(const void *fdt,
 					  const void *match_data)
 {
@@ -311,11 +141,11 @@ static __init const void *sead3_fixup_fdt(const void *fdt,
 	if (err)
 		panic("Unable to open FDT: %d", err);
 
-	err = append_cmdline(fdt_buf);
+	err = yamon_dt_append_cmdline(fdt_buf);
 	if (err)
 		panic("Unable to patch FDT: %d", err);
 
-	err = append_memory(fdt_buf);
+	err = yamon_dt_append_memory(fdt_buf);
 	if (err)
 		panic("Unable to patch FDT: %d", err);
 
@@ -323,7 +153,7 @@ static __init const void *sead3_fixup_fdt(const void *fdt,
 	if (err)
 		panic("Unable to patch FDT: %d", err);
 
-	err = serial_config(fdt_buf);
+	err = yamon_dt_serial_config(fdt_buf);
 	if (err)
 		panic("Unable to patch FDT: %d", err);
 
diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c
new file mode 100644
index 0000000000000..9a0c8da5a796f
--- /dev/null
+++ b/arch/mips/generic/yamon-dt.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#define pr_fmt(fmt) "yamon-dt: " fmt
+
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+#include <linux/printk.h>
+
+#include <asm/fw/fw.h>
+
+__init int yamon_dt_append_cmdline(void *fdt)
+{
+	int err, chosen_off;
+
+	/* find or add chosen node */
+	chosen_off = fdt_path_offset(fdt, "/chosen");
+	if (chosen_off == -FDT_ERR_NOTFOUND)
+		chosen_off = fdt_path_offset(fdt, "/chosen@0");
+	if (chosen_off == -FDT_ERR_NOTFOUND)
+		chosen_off = fdt_add_subnode(fdt, 0, "chosen");
+	if (chosen_off < 0) {
+		pr_err("Unable to find or add DT chosen node: %d\n",
+		       chosen_off);
+		return chosen_off;
+	}
+
+	err = fdt_setprop_string(fdt, chosen_off, "bootargs", fw_getcmdline());
+	if (err) {
+		pr_err("Unable to set bootargs property: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+__init int yamon_dt_append_memory(void *fdt)
+{
+	unsigned long phys_memsize, memsize;
+	__be32 mem_array[2];
+	int err, mem_off;
+	char *var;
+
+	/* find memory size from the bootloader environment */
+	var = fw_getenv("memsize");
+	if (var) {
+		err = kstrtoul(var, 0, &phys_memsize);
+		if (err) {
+			pr_err("Failed to read memsize env variable '%s'\n",
+			       var);
+			return -EINVAL;
+		}
+	} else {
+		pr_warn("The bootloader didn't provide memsize: defaulting to 32MB\n");
+		phys_memsize = 32 << 20;
+	}
+
+	/* default to using all available RAM */
+	memsize = phys_memsize;
+
+	/* allow the user to override the usable memory */
+	var = strstr(arcs_cmdline, "memsize=");
+	if (var)
+		memsize = memparse(var + strlen("memsize="), NULL);
+
+	/* if the user says there's more RAM than we thought, believe them */
+	phys_memsize = max_t(unsigned long, phys_memsize, memsize);
+
+	/* find or add a memory node */
+	mem_off = fdt_path_offset(fdt, "/memory");
+	if (mem_off == -FDT_ERR_NOTFOUND)
+		mem_off = fdt_add_subnode(fdt, 0, "memory");
+	if (mem_off < 0) {
+		pr_err("Unable to find or add memory DT node: %d\n", mem_off);
+		return mem_off;
+	}
+
+	err = fdt_setprop_string(fdt, mem_off, "device_type", "memory");
+	if (err) {
+		pr_err("Unable to set memory node device_type: %d\n", err);
+		return err;
+	}
+
+	mem_array[0] = 0;
+	mem_array[1] = cpu_to_be32(phys_memsize);
+	err = fdt_setprop(fdt, mem_off, "reg", mem_array, sizeof(mem_array));
+	if (err) {
+		pr_err("Unable to set memory regs property: %d\n", err);
+		return err;
+	}
+
+	mem_array[0] = 0;
+	mem_array[1] = cpu_to_be32(memsize);
+	err = fdt_setprop(fdt, mem_off, "linux,usable-memory",
+			  mem_array, sizeof(mem_array));
+	if (err) {
+		pr_err("Unable to set linux,usable-memory property: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+__init int yamon_dt_serial_config(void *fdt)
+{
+	const char *yamontty, *mode_var;
+	char mode_var_name[9], path[18], parity;
+	unsigned int uart, baud, stop_bits;
+	bool hw_flow;
+	int chosen_off, err;
+
+	yamontty = fw_getenv("yamontty");
+	if (!yamontty || !strcmp(yamontty, "tty0")) {
+		uart = 0;
+	} else if (!strcmp(yamontty, "tty1")) {
+		uart = 1;
+	} else {
+		pr_warn("yamontty environment variable '%s' invalid\n",
+			yamontty);
+		uart = 0;
+	}
+
+	baud = stop_bits = 0;
+	parity = 0;
+	hw_flow = false;
+
+	snprintf(mode_var_name, sizeof(mode_var_name), "modetty%u", uart);
+	mode_var = fw_getenv(mode_var_name);
+	if (mode_var) {
+		while (mode_var[0] >= '0' && mode_var[0] <= '9') {
+			baud *= 10;
+			baud += mode_var[0] - '0';
+			mode_var++;
+		}
+		if (mode_var[0] == ',')
+			mode_var++;
+		if (mode_var[0])
+			parity = mode_var[0];
+		if (mode_var[0] == ',')
+			mode_var++;
+		if (mode_var[0])
+			stop_bits = mode_var[0] - '0';
+		if (mode_var[0] == ',')
+			mode_var++;
+		if (!strcmp(mode_var, "hw"))
+			hw_flow = true;
+	}
+
+	if (!baud)
+		baud = 38400;
+
+	if (parity != 'e' && parity != 'n' && parity != 'o')
+		parity = 'n';
+
+	if (stop_bits != 7 && stop_bits != 8)
+		stop_bits = 8;
+
+	WARN_ON(snprintf(path, sizeof(path), "uart%u:%u%c%u%s",
+			 uart, baud, parity, stop_bits,
+			 hw_flow ? "r" : "") >= sizeof(path));
+
+	/* find or add chosen node */
+	chosen_off = fdt_path_offset(fdt, "/chosen");
+	if (chosen_off == -FDT_ERR_NOTFOUND)
+		chosen_off = fdt_path_offset(fdt, "/chosen@0");
+	if (chosen_off == -FDT_ERR_NOTFOUND)
+		chosen_off = fdt_add_subnode(fdt, 0, "chosen");
+	if (chosen_off < 0) {
+		pr_err("Unable to find or add DT chosen node: %d\n",
+		       chosen_off);
+		return chosen_off;
+	}
+
+	err = fdt_setprop_string(fdt, chosen_off, "stdout-path", path);
+	if (err) {
+		pr_err("Unable to set stdout-path property: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/arch/mips/include/asm/yamon-dt.h b/arch/mips/include/asm/yamon-dt.h
new file mode 100644
index 0000000000000..3f3367de48369
--- /dev/null
+++ b/arch/mips/include/asm/yamon-dt.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __MIPS_ASM_YAMON_DT_H__
+#define __MIPS_ASM_YAMON_DT_H__
+
+/**
+ * yamon_dt_append_cmdline() - Append YAMON-provided command line to /chosen
+ * @fdt: the FDT blob
+ *
+ * Write the YAMON-provided command line to the bootargs property of the
+ * /chosen node in @fdt.
+ *
+ * Return: 0 on success, else -errno
+ */
+extern __init int yamon_dt_append_cmdline(void *fdt);
+
+/**
+ * yamon_dt_append_memory() - Append YAMON-provided memory info to /memory
+ * @fdt: the FDT blob
+ *
+ * Generate a /memory node in @fdt based upon memory size information provided
+ * by YAMON in its environment.
+ *
+ * Return: 0 on success, else -errno
+ */
+extern __init int yamon_dt_append_memory(void *fdt);
+
+/**
+ * yamon_dt_serial_config() - Append YAMON-provided serial config to /chosen
+ * @fdt: the FDT blob
+ *
+ * Generate a stdout-path property in the /chosen node of @fdt, based upon
+ * information provided in the YAMON environment about the UART configuration
+ * of the system.
+ *
+ * Return: 0 on success, else -errno
+ */
+extern __init int yamon_dt_serial_config(void *fdt);
+
+#endif /* __MIPS_ASM_YAMON_DT_H__ */
-- 
GitLab


From f41d2430bbd6b64ee934915a1856fa406677d55e Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:52 -0700
Subject: [PATCH 0473/1958] MIPS: generic/yamon-dt: Support > 256MB of RAM

YAMON can expose more than 256MB of RAM to Linux on Malta by passing an
ememsize environment variable with the full size, but the kernel then
needs to be careful to choose the corresponding physical memory regions,
avoiding the IO memory window. This is platform dependent, and on Malta
it also depends on the memory layout which varies between system
controllers.

Extend yamon_dt_amend_memory() to generically handle this by taking
[e]memsize bytes of memory from an array of memory regions passed in as
a new parameter. Board code provides this array as appropriate depending
on its own memory map.

[paul.burton@imgtec.com: SEAD-3 supports 384MB DDR from 0]

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16182/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/generic/board-sead3.c  | 17 +++++-
 arch/mips/generic/yamon-dt.c     | 92 ++++++++++++++++++++++++--------
 arch/mips/include/asm/yamon-dt.h | 22 ++++++--
 3 files changed, 106 insertions(+), 25 deletions(-)

diff --git a/arch/mips/generic/board-sead3.c b/arch/mips/generic/board-sead3.c
index 63fdc98738bae..97186a3a5d219 100644
--- a/arch/mips/generic/board-sead3.c
+++ b/arch/mips/generic/board-sead3.c
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/libfdt.h>
 #include <linux/printk.h>
+#include <linux/sizes.h>
 
 #include <asm/fw/fw.h>
 #include <asm/io.h>
@@ -26,6 +27,15 @@
 #define MIPS_REVISION_MACHINE		(0xf << 4)
 #define MIPS_REVISION_MACHINE_SEAD3	(0x4 << 4)
 
+/*
+ * Maximum 384MB RAM at physical address 0, preceding any I/O.
+ */
+static struct yamon_mem_region mem_regions[] __initdata = {
+	/* start	size */
+	{ 0,		SZ_256M + SZ_128M },
+	{}
+};
+
 static __init bool sead3_detect(void)
 {
 	uint32_t rev;
@@ -34,6 +44,11 @@ static __init bool sead3_detect(void)
 	return (rev & MIPS_REVISION_MACHINE) == MIPS_REVISION_MACHINE_SEAD3;
 }
 
+static __init int append_memory(void *fdt)
+{
+	return yamon_dt_append_memory(fdt, mem_regions);
+}
+
 static __init int remove_gic(void *fdt)
 {
 	const unsigned int cpu_ehci_int = 2;
@@ -145,7 +160,7 @@ static __init const void *sead3_fixup_fdt(const void *fdt,
 	if (err)
 		panic("Unable to patch FDT: %d", err);
 
-	err = yamon_dt_append_memory(fdt_buf);
+	err = append_memory(fdt_buf);
 	if (err)
 		panic("Unable to patch FDT: %d", err);
 
diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c
index 9a0c8da5a796f..8e36a5baaa7e5 100644
--- a/arch/mips/generic/yamon-dt.c
+++ b/arch/mips/generic/yamon-dt.c
@@ -17,6 +17,9 @@
 #include <linux/printk.h>
 
 #include <asm/fw/fw.h>
+#include <asm/yamon-dt.h>
+
+#define MAX_MEM_ARRAY_ENTRIES	2
 
 __init int yamon_dt_append_cmdline(void *fdt)
 {
@@ -43,23 +46,64 @@ __init int yamon_dt_append_cmdline(void *fdt)
 	return 0;
 }
 
-__init int yamon_dt_append_memory(void *fdt)
+static unsigned int __init gen_fdt_mem_array(
+					const struct yamon_mem_region *regions,
+					__be32 *mem_array,
+					unsigned int max_entries,
+					unsigned long memsize)
+{
+	const struct yamon_mem_region *mr;
+	unsigned long size;
+	unsigned int entries = 0;
+
+	for (mr = regions; mr->size && memsize; ++mr) {
+		if (entries >= max_entries) {
+			pr_warn("Number of regions exceeds max %u\n",
+				max_entries);
+			break;
+		}
+
+		/* How much of the remaining RAM fits in the next region? */
+		size = min_t(unsigned long, memsize, mr->size);
+		memsize -= size;
+
+		/* Emit a memory region */
+		*(mem_array++) = cpu_to_be32(mr->start);
+		*(mem_array++) = cpu_to_be32(size);
+		++entries;
+
+		/* Discard the next mr->discard bytes */
+		memsize -= min_t(unsigned long, memsize, mr->discard);
+	}
+	return entries;
+}
+
+__init int yamon_dt_append_memory(void *fdt,
+				  const struct yamon_mem_region *regions)
 {
 	unsigned long phys_memsize, memsize;
-	__be32 mem_array[2];
-	int err, mem_off;
-	char *var;
+	__be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES];
+	unsigned int mem_entries;
+	int i, err, mem_off;
+	char *var, param_name[10], *var_names[] = {
+		"ememsize", "memsize",
+	};
 
 	/* find memory size from the bootloader environment */
-	var = fw_getenv("memsize");
-	if (var) {
+	for (i = 0; i < ARRAY_SIZE(var_names); i++) {
+		var = fw_getenv(var_names[i]);
+		if (!var)
+			continue;
+
 		err = kstrtoul(var, 0, &phys_memsize);
-		if (err) {
-			pr_err("Failed to read memsize env variable '%s'\n",
-			       var);
-			return -EINVAL;
-		}
-	} else {
+		if (!err)
+			break;
+
+		pr_warn("Failed to read the '%s' env variable '%s'\n",
+			var_names[i], var);
+	}
+
+	if (!phys_memsize) {
 		pr_warn("The bootloader didn't provide memsize: defaulting to 32MB\n");
 		phys_memsize = 32 << 20;
 	}
@@ -68,9 +112,14 @@ __init int yamon_dt_append_memory(void *fdt)
 	memsize = phys_memsize;
 
 	/* allow the user to override the usable memory */
-	var = strstr(arcs_cmdline, "memsize=");
-	if (var)
-		memsize = memparse(var + strlen("memsize="), NULL);
+	for (i = 0; i < ARRAY_SIZE(var_names); i++) {
+		snprintf(param_name, sizeof(param_name), "%s=", var_names[i]);
+		var = strstr(arcs_cmdline, param_name);
+		if (!var)
+			continue;
+
+		memsize = memparse(var + strlen(param_name), NULL);
+	}
 
 	/* if the user says there's more RAM than we thought, believe them */
 	phys_memsize = max_t(unsigned long, phys_memsize, memsize);
@@ -90,18 +139,19 @@ __init int yamon_dt_append_memory(void *fdt)
 		return err;
 	}
 
-	mem_array[0] = 0;
-	mem_array[1] = cpu_to_be32(phys_memsize);
-	err = fdt_setprop(fdt, mem_off, "reg", mem_array, sizeof(mem_array));
+	mem_entries = gen_fdt_mem_array(regions, mem_array,
+					MAX_MEM_ARRAY_ENTRIES, phys_memsize);
+	err = fdt_setprop(fdt, mem_off, "reg",
+			  mem_array, mem_entries * 2 * sizeof(mem_array[0]));
 	if (err) {
 		pr_err("Unable to set memory regs property: %d\n", err);
 		return err;
 	}
 
-	mem_array[0] = 0;
-	mem_array[1] = cpu_to_be32(memsize);
+	mem_entries = gen_fdt_mem_array(regions, mem_array,
+					MAX_MEM_ARRAY_ENTRIES, memsize);
 	err = fdt_setprop(fdt, mem_off, "linux,usable-memory",
-			  mem_array, sizeof(mem_array));
+			  mem_array, mem_entries * 2 * sizeof(mem_array[0]));
 	if (err) {
 		pr_err("Unable to set linux,usable-memory property: %d\n", err);
 		return err;
diff --git a/arch/mips/include/asm/yamon-dt.h b/arch/mips/include/asm/yamon-dt.h
index 3f3367de48369..485cfe3e45e12 100644
--- a/arch/mips/include/asm/yamon-dt.h
+++ b/arch/mips/include/asm/yamon-dt.h
@@ -11,6 +11,20 @@
 #ifndef __MIPS_ASM_YAMON_DT_H__
 #define __MIPS_ASM_YAMON_DT_H__
 
+#include <linux/types.h>
+
+/**
+ * struct yamon_mem_region - Represents a contiguous range of physical RAM.
+ * @start:	Start physical address.
+ * @size:	Maximum size of region.
+ * @discard:	Length of additional memory to discard after the region.
+ */
+struct yamon_mem_region {
+	phys_addr_t	start;
+	phys_addr_t	size;
+	phys_addr_t	discard;
+};
+
 /**
  * yamon_dt_append_cmdline() - Append YAMON-provided command line to /chosen
  * @fdt: the FDT blob
@@ -24,14 +38,16 @@ extern __init int yamon_dt_append_cmdline(void *fdt);
 
 /**
  * yamon_dt_append_memory() - Append YAMON-provided memory info to /memory
- * @fdt: the FDT blob
+ * @fdt:	the FDT blob
+ * @regions:	zero size terminated array of physical memory regions
  *
  * Generate a /memory node in @fdt based upon memory size information provided
- * by YAMON in its environment.
+ * by YAMON in its environment and the @regions array.
  *
  * Return: 0 on success, else -errno
  */
-extern __init int yamon_dt_append_memory(void *fdt);
+extern __init int yamon_dt_append_memory(void *fdt,
+					const struct yamon_mem_region *regions);
 
 /**
  * yamon_dt_serial_config() - Append YAMON-provided serial config to /chosen
-- 
GitLab


From c3d62fc6a058d1024f3ad0525a251e9d6c5203ed Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:53 -0700
Subject: [PATCH 0474/1958] MIPS: generic/yamon-dt: Use serial* rather than
 uart* aliases

Name aliases in the SEAD-3 device tree serial0 & serial1, rather than
uart0 & uart1. This allows the core serial code to make use of the
aliases to ensure that the UARTs are consistently numbered as expected
rather than having the numbering depend upon probe order.

When translating YAMON-provided serial configuration to a device tree
stdout-path property adjust accordingly, such that we continue to
reference a valid alias.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16183/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/boot/dts/mti/sead3.dts | 6 +++---
 arch/mips/generic/yamon-dt.c     | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/mips/boot/dts/mti/sead3.dts b/arch/mips/boot/dts/mti/sead3.dts
index 1bf58f841bbb7..f327791cbcb5c 100644
--- a/arch/mips/boot/dts/mti/sead3.dts
+++ b/arch/mips/boot/dts/mti/sead3.dts
@@ -14,12 +14,12 @@ / {
 	interrupt-parent = <&gic>;
 
 	chosen {
-		stdout-path = "uart1:115200";
+		stdout-path = "serial1:115200";
 	};
 
 	aliases {
-		uart0 = &uart0;
-		uart1 = &uart1;
+		serial0 = &uart0;
+		serial1 = &uart1;
 	};
 
 	cpus {
diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c
index 8e36a5baaa7e5..6077bca9b364b 100644
--- a/arch/mips/generic/yamon-dt.c
+++ b/arch/mips/generic/yamon-dt.c
@@ -163,7 +163,7 @@ __init int yamon_dt_append_memory(void *fdt,
 __init int yamon_dt_serial_config(void *fdt)
 {
 	const char *yamontty, *mode_var;
-	char mode_var_name[9], path[18], parity;
+	char mode_var_name[9], path[20], parity;
 	unsigned int uart, baud, stop_bits;
 	bool hw_flow;
 	int chosen_off, err;
@@ -214,7 +214,7 @@ __init int yamon_dt_serial_config(void *fdt)
 	if (stop_bits != 7 && stop_bits != 8)
 		stop_bits = 8;
 
-	WARN_ON(snprintf(path, sizeof(path), "uart%u:%u%c%u%s",
+	WARN_ON(snprintf(path, sizeof(path), "serial%u:%u%c%u%s",
 			 uart, baud, parity, stop_bits,
 			 hw_flow ? "r" : "") >= sizeof(path));
 
-- 
GitLab


From e889dfca12ce95cdeaa50f66d1f33ad8fed4ca58 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:54 -0700
Subject: [PATCH 0475/1958] MIPS: generic: Abstract FDT fixup application

Introduce an apply_mips_fdt_fixups() function which can apply fixups to
an FDT based upon an array of fixup descriptions. This abstracts that
functionality such that legacy board code can apply FDT fixups without
requiring lots of duplication.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16184/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/generic/board-sead3.c | 33 +++++++++++----------------------
 arch/mips/generic/init.c        | 27 +++++++++++++++++++++++++++
 arch/mips/include/asm/machine.h | 31 +++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/arch/mips/generic/board-sead3.c b/arch/mips/generic/board-sead3.c
index 97186a3a5d219..39e11bd249cf8 100644
--- a/arch/mips/generic/board-sead3.c
+++ b/arch/mips/generic/board-sead3.c
@@ -138,6 +138,14 @@ static __init int remove_gic(void *fdt)
 	return 0;
 }
 
+static const struct mips_fdt_fixup sead3_fdt_fixups[] __initconst = {
+	{ yamon_dt_append_cmdline, "append command line" },
+	{ append_memory, "append memory" },
+	{ remove_gic, "remove GIC when not present" },
+	{ yamon_dt_serial_config, "append serial configuration" },
+	{ },
+};
+
 static __init const void *sead3_fixup_fdt(const void *fdt,
 					  const void *match_data)
 {
@@ -152,29 +160,10 @@ static __init const void *sead3_fixup_fdt(const void *fdt,
 
 	fw_init_cmdline();
 
-	err = fdt_open_into(fdt, fdt_buf, sizeof(fdt_buf));
-	if (err)
-		panic("Unable to open FDT: %d", err);
-
-	err = yamon_dt_append_cmdline(fdt_buf);
-	if (err)
-		panic("Unable to patch FDT: %d", err);
-
-	err = append_memory(fdt_buf);
-	if (err)
-		panic("Unable to patch FDT: %d", err);
-
-	err = remove_gic(fdt_buf);
-	if (err)
-		panic("Unable to patch FDT: %d", err);
-
-	err = yamon_dt_serial_config(fdt_buf);
-	if (err)
-		panic("Unable to patch FDT: %d", err);
-
-	err = fdt_pack(fdt_buf);
+	err = apply_mips_fdt_fixups(fdt_buf, sizeof(fdt_buf),
+				    fdt, sead3_fdt_fixups);
 	if (err)
-		panic("Unable to pack FDT: %d\n", err);
+		panic("Unable to fixup FDT: %d", err);
 
 	return fdt_buf;
 }
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index 4af619215410a..4a9a1edbfb298 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -122,6 +122,33 @@ void __init device_tree_init(void)
 		err = register_up_smp_ops();
 }
 
+int __init apply_mips_fdt_fixups(void *fdt_out, size_t fdt_out_size,
+				 const void *fdt_in,
+				 const struct mips_fdt_fixup *fixups)
+{
+	int err;
+
+	err = fdt_open_into(fdt_in, fdt_out, fdt_out_size);
+	if (err) {
+		pr_err("Failed to open FDT\n");
+		return err;
+	}
+
+	for (; fixups->apply; fixups++) {
+		err = fixups->apply(fdt_out);
+		if (err) {
+			pr_err("Failed to apply FDT fixup \"%s\"\n",
+			       fixups->description);
+			return err;
+		}
+	}
+
+	err = fdt_pack(fdt_out);
+	if (err)
+		pr_err("Failed to pack FDT\n");
+	return err;
+}
+
 void __init plat_time_init(void)
 {
 	struct device_node *np;
diff --git a/arch/mips/include/asm/machine.h b/arch/mips/include/asm/machine.h
index 6b444cd9526fe..ecb6c73354841 100644
--- a/arch/mips/include/asm/machine.h
+++ b/arch/mips/include/asm/machine.h
@@ -60,4 +60,35 @@ mips_machine_is_compatible(const struct mips_machine *mach, const void *fdt)
 	return NULL;
 }
 
+/**
+ * struct mips_fdt_fixup - Describe a fixup to apply to an FDT
+ * @apply: applies the fixup to @fdt, returns zero on success else -errno
+ * @description: a short description of the fixup
+ *
+ * Describes a fixup applied to an FDT blob by the @apply function. The
+ * @description field provides a short description of the fixup intended for
+ * use in error messages if the @apply function returns non-zero.
+ */
+struct mips_fdt_fixup {
+	int (*apply)(void *fdt);
+	const char *description;
+};
+
+/**
+ * apply_mips_fdt_fixups() - apply fixups to an FDT blob
+ * @fdt_out: buffer in which to place the fixed-up FDT
+ * @fdt_out_size: the size of the @fdt_out buffer
+ * @fdt_in: the FDT blob
+ * @fixups: pointer to an array of fixups to be applied
+ *
+ * Loop through the array of fixups pointed to by @fixups, calling the apply
+ * function on each until either one returns an error or we reach the end of
+ * the list as indicated by an entry with a NULL apply field.
+ *
+ * Return: zero on success, else -errno
+ */
+extern int __init apply_mips_fdt_fixups(void *fdt_out, size_t fdt_out_size,
+					const void *fdt_in,
+					const struct mips_fdt_fixup *fixups);
+
 #endif /* __MIPS_ASM_MACHINE_H__ */
-- 
GitLab


From ae7ce6b1e0a98132e2ecb2f64ebaa8c535b6b9f5 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:55 -0700
Subject: [PATCH 0476/1958] MIPS: generic: Set RTC_ALWAYS_BCD to 0

Drivers for the mc146818 RTC generally check control registers to
determine whether a value is encoded as binary or as a binary coded
decimal. Setting RTC_ALWAYS_BCD to 1 effectively bypasses these checks
and causes drivers to always expect binary coded decimal values,
regardless of control register values.

This does not seem like a sane default - defaulting to 0 allows the
drivers to check control registers to determine encoding type & allows
the driver to work generically with both binary & BCD encodings. Set
this in mach-generic/mc146818rtc.h such that the generic kernel, or
platforms which don't provide a custom mc146818rtc.h, can have an RTC
driver which works with both encodings.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16185/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mach-generic/mc146818rtc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/mach-generic/mc146818rtc.h b/arch/mips/include/asm/mach-generic/mc146818rtc.h
index 0b9a942f079d0..9c72e540ff561 100644
--- a/arch/mips/include/asm/mach-generic/mc146818rtc.h
+++ b/arch/mips/include/asm/mach-generic/mc146818rtc.h
@@ -27,7 +27,7 @@ static inline void CMOS_WRITE(unsigned char data, unsigned long addr)
 	outb_p(data, RTC_PORT(1));
 }
 
-#define RTC_ALWAYS_BCD	1
+#define RTC_ALWAYS_BCD	0
 
 #ifndef mc146818_decode_year
 #define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1900)
-- 
GitLab


From 032a469b1e6ef02209308a5b107c10beb4b12fb6 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:56 -0700
Subject: [PATCH 0477/1958] MIPS: generic: Add a MAINTAINERS entry

Add an entry to MAINTAINERS for the generic platform code, such that
relevant people, starting with myself, can be CC'd on patches.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16186/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 09b5ab6a8a5ce..bcf2d258c0dd6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8489,6 +8489,12 @@ F:	Documentation/devicetree/bindings/mips/
 F:	Documentation/mips/
 F:	arch/mips/
 
+MIPS GENERIC PLATFORM
+M:	Paul Burton <paul.burton@imgtec.com>
+L:	linux-mips@linux-mips.org
+S:	Supported
+F:	arch/mips/generic/
+
 MIPS/LOONGSON1 ARCHITECTURE
 M:	Keguang Zhang <keguang.zhang@gmail.com>
 L:	linux-mips@linux-mips.org
-- 
GitLab


From fbdc674ba33c3791b315a546019e570e3e94e599 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:57 -0700
Subject: [PATCH 0478/1958] MIPS: SEAD-3: Set interrupt-parent per-device, not
 at root node

The SEAD-3 board may be configured with or without a MIPS Global
Interrupt Controller (GIC). Because of this we have a device tree with a
default case of a GIC present, and code to fixup the device tree based
upon a configuration register that indicates the presence of the GIC.

In order to keep this DT fixup code simple, the interrupt-parent
property was specified at the root node of the SEAD-3 DT, allowing the
fixup code to simply change this property to the phandle of the CPU
interrupt controller if a GIC is not present & affect all
interrupt-using devices at once. This however causes a problem if we do
have a GIC & the device tree is used as-is, because the interrupt-parent
property of the root node applies to the CPU interrupt controller node.
This causes a cycle when of_irq_init() attempts to probe interrupt
controllers in order and boots fail due to a lack of configured
interrupts, with this message printed on the kernel console:

[    0.000000] OF: of_irq_init: children remain, but no parents

Fix this by removing the interrupt-parent property from the DT root node
& instead setting it for each device which uses interrupts, ensuring
that the CPU interrupt controller node has no interrupt-parent &
allowing of_irq_init() to identify it as the root interrupt controller.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reported-by: Keng Koh <keng.koh@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16187/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/boot/dts/mti/sead3.dts |  5 ++++-
 arch/mips/generic/board-sead3.c  | 26 ++++++++++++++++++++------
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/arch/mips/boot/dts/mti/sead3.dts b/arch/mips/boot/dts/mti/sead3.dts
index f327791cbcb5c..cabe256f9a684 100644
--- a/arch/mips/boot/dts/mti/sead3.dts
+++ b/arch/mips/boot/dts/mti/sead3.dts
@@ -11,7 +11,6 @@ / {
 	#size-cells = <1>;
 	compatible = "mti,sead-3";
 	model = "MIPS SEAD-3";
-	interrupt-parent = <&gic>;
 
 	chosen {
 		stdout-path = "serial1:115200";
@@ -60,6 +59,7 @@ ehci@1b200000 {
 		compatible = "generic-ehci";
 		reg = <0x1b200000 0x1000>;
 
+		interrupt-parent = <&gic>;
 		interrupts = <0>; /* GIC 0 or CPU 6 */
 
 		has-transaction-translator;
@@ -222,6 +222,7 @@ uart0: uart@1f000900 {
 
 		clock-frequency = <14745600>;
 
+		interrupt-parent = <&gic>;
 		interrupts = <3>; /* GIC 3 or CPU 4 */
 
 		no-loopback-test;
@@ -236,6 +237,7 @@ uart1: uart@1f000800 {
 
 		clock-frequency = <14745600>;
 
+		interrupt-parent = <&gic>;
 		interrupts = <2>; /* GIC 2 or CPU 4 */
 
 		no-loopback-test;
@@ -246,6 +248,7 @@ eth@1f010000 {
 		reg = <0x1f010000 0x10000>;
 		reg-io-width = <4>;
 
+		interrupt-parent = <&gic>;
 		interrupts = <0>; /* GIC 0 or CPU 6 */
 
 		phy-mode = "mii";
diff --git a/arch/mips/generic/board-sead3.c b/arch/mips/generic/board-sead3.c
index 39e11bd249cf8..f109a6b9fdd08 100644
--- a/arch/mips/generic/board-sead3.c
+++ b/arch/mips/generic/board-sead3.c
@@ -87,14 +87,16 @@ static __init int remove_gic(void *fdt)
 		return -EINVAL;
 	}
 
-	err = fdt_setprop_u32(fdt, 0, "interrupt-parent", cpu_phandle);
-	if (err) {
-		pr_err("unable to set root interrupt-parent: %d\n", err);
-		return err;
-	}
-
 	uart_off = fdt_node_offset_by_compatible(fdt, -1, "ns16550a");
 	while (uart_off >= 0) {
+		err = fdt_setprop_u32(fdt, uart_off, "interrupt-parent",
+				      cpu_phandle);
+		if (err) {
+			pr_warn("unable to set UART interrupt-parent: %d\n",
+				err);
+			return err;
+		}
+
 		err = fdt_setprop_u32(fdt, uart_off, "interrupts",
 				      cpu_uart_int);
 		if (err) {
@@ -117,6 +119,12 @@ static __init int remove_gic(void *fdt)
 		return eth_off;
 	}
 
+	err = fdt_setprop_u32(fdt, eth_off, "interrupt-parent", cpu_phandle);
+	if (err) {
+		pr_err("unable to set ethernet interrupt-parent: %d\n", err);
+		return err;
+	}
+
 	err = fdt_setprop_u32(fdt, eth_off, "interrupts", cpu_eth_int);
 	if (err) {
 		pr_err("unable to set ethernet interrupts property: %d\n", err);
@@ -129,6 +137,12 @@ static __init int remove_gic(void *fdt)
 		return ehci_off;
 	}
 
+	err = fdt_setprop_u32(fdt, ehci_off, "interrupt-parent", cpu_phandle);
+	if (err) {
+		pr_err("unable to set EHCI interrupt-parent: %d\n", err);
+		return err;
+	}
+
 	err = fdt_setprop_u32(fdt, ehci_off, "interrupts", cpu_ehci_int);
 	if (err) {
 		pr_err("unable to set EHCI interrupts property: %d\n", err);
-- 
GitLab


From d3f616346def161cfb0e4153692713f066755639 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 12:29:59 -0700
Subject: [PATCH 0479/1958] MIPS: SEAD-3: Fix GIC interrupt specifiers

The various interrupt specifiers in the device tree are not in a valid
format for the MIPS GIC interrupt controller binding. Where each
interrupt should provide 3 values - GIC_LOCAL or GIC_SHARED, the
pin number & the type of interrupt - the device tree was only providing
the pin number. This causes interrupts for those devices to not be used
when a GIC is present. SEAD-3 systems without a GIC are unaffected since
the DT fixup code generates interrupt specifiers that are valid for the
CPU interrupt controller.

Fix this by adding the GIC_SHARED & IRQ_TYPE_LEVEL_HIGH values to each
interrupt specifier.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: c11e3b48dbc3 ("MIPS: SEAD3: Probe UARTs using DT")
Fixes: a34e93882de4 ("MIPS: SEAD3: Probe ethernet controller using DT")
Fixes: 7afd2a5aec2e ("MIPS: SEAD3: Probe EHCI controller using DT")
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org  # v4.9+
Patchwork: https://patchwork.linux-mips.org/patch/16189/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/boot/dts/mti/sead3.dts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/mips/boot/dts/mti/sead3.dts b/arch/mips/boot/dts/mti/sead3.dts
index cabe256f9a684..4f8bc83c29605 100644
--- a/arch/mips/boot/dts/mti/sead3.dts
+++ b/arch/mips/boot/dts/mti/sead3.dts
@@ -60,7 +60,7 @@ ehci@1b200000 {
 		reg = <0x1b200000 0x1000>;
 
 		interrupt-parent = <&gic>;
-		interrupts = <0>; /* GIC 0 or CPU 6 */
+		interrupts = <GIC_SHARED 0 IRQ_TYPE_LEVEL_HIGH>; /* GIC 0 or CPU 6 */
 
 		has-transaction-translator;
 	};
@@ -223,7 +223,7 @@ uart0: uart@1f000900 {
 		clock-frequency = <14745600>;
 
 		interrupt-parent = <&gic>;
-		interrupts = <3>; /* GIC 3 or CPU 4 */
+		interrupts = <GIC_SHARED 3 IRQ_TYPE_LEVEL_HIGH>; /* GIC 3 or CPU 4 */
 
 		no-loopback-test;
 	};
@@ -238,7 +238,7 @@ uart1: uart@1f000800 {
 		clock-frequency = <14745600>;
 
 		interrupt-parent = <&gic>;
-		interrupts = <2>; /* GIC 2 or CPU 4 */
+		interrupts = <GIC_SHARED 2 IRQ_TYPE_LEVEL_HIGH>; /* GIC 2 or CPU 4 */
 
 		no-loopback-test;
 	};
@@ -249,7 +249,7 @@ eth@1f010000 {
 		reg-io-width = <4>;
 
 		interrupt-parent = <&gic>;
-		interrupts = <0>; /* GIC 0 or CPU 6 */
+		interrupts = <GIC_SHARED 0 IRQ_TYPE_LEVEL_HIGH>; /* GIC 0 or CPU 6 */
 
 		phy-mode = "mii";
 		smsc,irq-push-pull;
-- 
GitLab


From 0a00024d7a779b283db2a02130ffa46f47634d0c Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 22 Jun 2017 23:06:48 +0800
Subject: [PATCH 0480/1958] MIPS: Loongson: Add Loongson-3A R3 basic support

Loongson-3A R3 is very similar to Loongson-3A R2.

All Loongson-3 CPU family:

Code-name       Brand-name       PRId
Loongson-3A R1  Loongson-3A1000  0x6305
Loongson-3A R2  Loongson-3A2000  0x6308
Loongson-3A R3  Loongson-3A3000  0x6309
Loongson-3B R1  Loongson-3B1000  0x6306
Loongson-3B R2  Loongson-3B1500  0x6307

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16585/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cpu.h           |  1 +
 arch/mips/kernel/cpu-probe.c          |  6 ++++++
 arch/mips/loongson64/common/env.c     |  1 +
 arch/mips/loongson64/loongson-3/smp.c |  5 +++--
 drivers/platform/mips/cpu_hwmon.c     | 17 +++++++++++++----
 5 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 3069359b01204..53b8b1f490846 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -248,6 +248,7 @@
 #define PRID_REV_LOONGSON3B_R1	0x0006
 #define PRID_REV_LOONGSON3B_R2	0x0007
 #define PRID_REV_LOONGSON3A_R2	0x0008
+#define PRID_REV_LOONGSON3A_R3	0x0009
 
 /*
  * Older processors used to encode processor version and revision in two
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 353ade2c130a3..09462bba629fa 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1836,6 +1836,12 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R2);
 			break;
+		case PRID_REV_LOONGSON3A_R3:
+			c->cputype = CPU_LOONGSON3;
+			__cpu_name[cpu] = "ICT Loongson-3";
+			set_elf_platform(cpu, "loongson3a");
+			set_isa(c, MIPS_CPU_ISA_M64R2);
+			break;
 		}
 
 		decode_configs(c);
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index 6afa218502670..4707abfe9d641 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -193,6 +193,7 @@ void __init prom_init_env(void)
 			break;
 		case PRID_REV_LOONGSON3A_R1:
 		case PRID_REV_LOONGSON3A_R2:
+		case PRID_REV_LOONGSON3A_R3:
 			cpu_clock_freq = 900000000;
 			break;
 		case PRID_REV_LOONGSON3B_R1:
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 64659fc739405..1629743ba96aa 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -503,7 +503,7 @@ static void loongson3a_r1_play_dead(int *state_addr)
 		: "a1");
 }
 
-static void loongson3a_r2_play_dead(int *state_addr)
+static void loongson3a_r2r3_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -664,8 +664,9 @@ void play_dead(void)
 			(void *)CKSEG1ADDR((unsigned long)loongson3a_r1_play_dead);
 		break;
 	case PRID_REV_LOONGSON3A_R2:
+	case PRID_REV_LOONGSON3A_R3:
 		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3a_r2_play_dead);
+			(void *)CKSEG1ADDR((unsigned long)loongson3a_r2r3_play_dead);
 		break;
 	case PRID_REV_LOONGSON3B_R1:
 	case PRID_REV_LOONGSON3B_R2:
diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c
index 4300a558d0f39..46ab7d86ae1c8 100644
--- a/drivers/platform/mips/cpu_hwmon.c
+++ b/drivers/platform/mips/cpu_hwmon.c
@@ -17,14 +17,23 @@
  */
 int loongson3_cpu_temp(int cpu)
 {
-	u32 reg;
+	u32 reg, prid_rev;
 
 	reg = LOONGSON_CHIPTEMP(cpu);
-	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1)
+	prid_rev = read_c0_prid() & PRID_REV_MASK;
+	switch (prid_rev) {
+	case PRID_REV_LOONGSON3A_R1:
 		reg = (reg >> 8) & 0xff;
-	else
+		break;
+	case PRID_REV_LOONGSON3A_R2:
+	case PRID_REV_LOONGSON3B_R1:
+	case PRID_REV_LOONGSON3B_R2:
 		reg = ((reg >> 8) & 0xff) - 100;
-
+		break;
+	case PRID_REV_LOONGSON3A_R3:
+		reg = (reg & 0xffff)*731/0x4000 - 273;
+		break;
+	}
 	return (int)reg * 1000;
 }
 
-- 
GitLab


From aaffaa8a3b5950c47e5f7573c34bc47de8894a18 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 27 Jun 2017 18:16:47 +0200
Subject: [PATCH 0481/1958] iommu/iova: Don't disable preempt around
 this_cpu_ptr()

Commit 583248e6620a ("iommu/iova: Disable preemption around use of
this_cpu_ptr()") disables preemption while accessing a per-CPU variable.
This does keep lockdep quiet. However I don't see the point why it is
bad if we get migrated after its access to another CPU.
__iova_rcache_insert() and __iova_rcache_get() immediately locks the
variable after obtaining it - before accessing its members.
_If_ we get migrated away after retrieving the address of cpu_rcache
before taking the lock then the *other* task on the same CPU will
retrieve the same address of cpu_rcache and will spin on the lock.

alloc_iova_fast() disables preemption while invoking
free_cpu_cached_iovas() on each CPU. The function itself uses
per_cpu_ptr() which does not trigger a warning (like this_cpu_ptr()
does). It _could_ make sense to use get_online_cpus() instead but the we
have a hotplug notifier for CPU down (and none for up) so we are good.

Cc: Joerg Roedel <joro@8bytes.org>
Cc: iommu@lists.linux-foundation.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 5c88ba70e4e0f..f0ff0aa040819 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
+#include <linux/cpu.h>
 
 static bool iova_rcache_insert(struct iova_domain *iovad,
 			       unsigned long pfn,
@@ -398,10 +399,8 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 
 		/* Try replenishing IOVAs by flushing rcache. */
 		flushed_rcache = true;
-		preempt_disable();
 		for_each_online_cpu(cpu)
 			free_cpu_cached_iovas(cpu, iovad);
-		preempt_enable();
 		goto retry;
 	}
 
@@ -729,7 +728,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 	bool can_insert = false;
 	unsigned long flags;
 
-	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -759,7 +758,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-	put_cpu_ptr(rcache->cpu_rcaches);
 
 	if (mag_to_free) {
 		iova_magazine_free_pfns(mag_to_free, iovad);
@@ -793,7 +791,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 	bool has_pfn = false;
 	unsigned long flags;
 
-	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
@@ -815,7 +813,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-	put_cpu_ptr(rcache->cpu_rcaches);
 
 	return iova_pfn;
 }
-- 
GitLab


From 58c4a95f90839624b67f67acdb8a129f4383b569 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 27 Jun 2017 18:16:48 +0200
Subject: [PATCH 0482/1958] iommu/vt-d: Don't disable preemption while
 accessing deferred_flush()

get_cpu() disables preemption and returns the current CPU number. The
CPU number is only used once while retrieving the address of the local's
CPU deferred_flush pointer.
We can instead use raw_cpu_ptr() while we remain preemptible. The worst
thing that can happen is that flush_unmaps_timeout() is invoked multiple
times: once by taskA after seeing HIGH_WATER_MARK and then preempted to
another CPU and then by taskB which saw HIGH_WATER_MARK on the same CPU
as taskA. It is also likely that ->size got from HIGH_WATER_MARK to 0
right after its read because another CPU invoked flush_unmaps_timeout()
for this CPU.
The access to flush_data is protected by a spinlock so even if we get
migrated to another CPU or preempted - the data structure is protected.

While at it, I marked deferred_flush static since I can't find a
reference to it outside of this file.

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: iommu@lists.linux-foundation.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ee9c258d3ae0c..0ca985b418ec9 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -481,7 +481,7 @@ struct deferred_flush_data {
 	struct deferred_flush_table *tables;
 };
 
-DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
+static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
 
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
@@ -3710,10 +3710,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
 	struct intel_iommu *iommu;
 	struct deferred_flush_entry *entry;
 	struct deferred_flush_data *flush_data;
-	unsigned int cpuid;
 
-	cpuid = get_cpu();
-	flush_data = per_cpu_ptr(&deferred_flush, cpuid);
+	flush_data = raw_cpu_ptr(&deferred_flush);
 
 	/* Flush all CPUs' entries to avoid deferring too much.  If
 	 * this becomes a bottleneck, can just flush us, and rely on
@@ -3746,8 +3744,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
 	}
 	flush_data->size++;
 	spin_unlock_irqrestore(&flush_data->lock, flags);
-
-	put_cpu();
 }
 
 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
-- 
GitLab


From 0929deca40bbdd7e821aada2906ac67882cfbf28 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 15 Jun 2017 15:11:51 +0200
Subject: [PATCH 0483/1958] iommu/s390: Use iommu_group_get_for_dev() in
 s390_iommu_add_device()

The iommu_group_get_for_dev() function also attaches the
device to its group, so this code doesn't need to be in the
iommu driver.

Further by using this function the driver can make use of
default domains in the future.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/s390-iommu.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 179e636a4d916..8788640756a73 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -165,20 +165,14 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
 
 static int s390_iommu_add_device(struct device *dev)
 {
-	struct iommu_group *group;
-	int rc;
+	struct iommu_group *group = iommu_group_get_for_dev(dev);
 
-	group = iommu_group_get(dev);
-	if (!group) {
-		group = iommu_group_alloc();
-		if (IS_ERR(group))
-			return PTR_ERR(group);
-	}
+	if (IS_ERR(group))
+		return PTR_ERR(group);
 
-	rc = iommu_group_add_device(group, dev);
 	iommu_group_put(group);
 
-	return rc;
+	return 0;
 }
 
 static void s390_iommu_remove_device(struct device *dev)
@@ -344,6 +338,7 @@ static struct iommu_ops s390_iommu_ops = {
 	.iova_to_phys = s390_iommu_iova_to_phys,
 	.add_device = s390_iommu_add_device,
 	.remove_device = s390_iommu_remove_device,
+	.device_group = generic_device_group,
 	.pgsize_bitmap = S390_IOMMU_PGSIZES,
 };
 
-- 
GitLab


From 7f7a2304aabc4a8102bbbbeed2ec9eaee4a480c2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 28 Jun 2017 12:45:31 +0200
Subject: [PATCH 0484/1958] iommu: Return ERR_PTR() values from device_group
 call-backs

The generic device_group call-backs in iommu.c return NULL
in case of error. Since they are getting ERR_PTR values from
iommu_group_alloc(), just pass them up instead.

Reported-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index cf7ca7e70777d..de09e1e35830f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -915,13 +915,7 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
  */
 struct iommu_group *generic_device_group(struct device *dev)
 {
-	struct iommu_group *group;
-
-	group = iommu_group_alloc();
-	if (IS_ERR(group))
-		return NULL;
-
-	return group;
+	return iommu_group_alloc();
 }
 
 /*
@@ -988,11 +982,7 @@ struct iommu_group *pci_device_group(struct device *dev)
 		return group;
 
 	/* No shared group found, allocate new */
-	group = iommu_group_alloc();
-	if (IS_ERR(group))
-		return NULL;
-
-	return group;
+	return iommu_group_alloc();
 }
 
 /**
-- 
GitLab


From 8faf5e5a12c511410de1590cf310ec331c5ec7b1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 28 Jun 2017 12:50:16 +0200
Subject: [PATCH 0485/1958] iommu/omap: Return ERR_PTR in device_group
 call-back

Make sure that the device_group callback returns an ERR_PTR
instead of NULL.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 95dfca36ccb99..641e035cf8666 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1309,7 +1309,7 @@ static void omap_iommu_remove_device(struct device *dev)
 static struct iommu_group *omap_iommu_device_group(struct device *dev)
 {
 	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
-	struct iommu_group *group = NULL;
+	struct iommu_group *group = ERR_PTR(-EINVAL);
 
 	if (arch_data->iommu_dev)
 		group = arch_data->iommu_dev->group;
-- 
GitLab


From 72dcac633475a5b331cf21f3525467d0e123395a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 28 Jun 2017 12:52:48 +0200
Subject: [PATCH 0486/1958] iommu: Warn once when device_group callback returns
 NULL

This callback should never return NULL. Print a warning if
that happens so that we notice and can fix it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index de09e1e35830f..3f6ea160afed3 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1010,6 +1010,9 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 	if (ops && ops->device_group)
 		group = ops->device_group(dev);
 
+	if (WARN_ON_ONCE(group == NULL))
+		return ERR_PTR(-EINVAL);
+
 	if (IS_ERR(group))
 		return group;
 
-- 
GitLab


From 01e1932a1748e20b69ba23d0a01db5eb3a525782 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 28 Jun 2017 16:39:32 +0530
Subject: [PATCH 0487/1958] iommu/vt-d: Constify intel_dma_ops

Most dma_map_ops structures are never modified. Constify these
structures such that these can be write-protected.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0ca985b418ec9..1a79a4ec6f090 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3954,7 +3954,7 @@ static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
 	return !dma_addr;
 }
 
-struct dma_map_ops intel_dma_ops = {
+const struct dma_map_ops intel_dma_ops = {
 	.alloc = intel_alloc_coherent,
 	.free = intel_free_coherent,
 	.map_sg = intel_map_sg,
-- 
GitLab


From 87ff091c4061eae16c799af51030b539afec97ef Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 28 Jun 2017 12:44:35 -0500
Subject: [PATCH 0488/1958] ipmi:ssif: Check dev before setting drvdata

dev can be NULL.

Reported-by: Austin Christ <austinwc@codeaurora.org>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 61434830e641f..971ecda336578 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1777,7 +1777,8 @@ static int new_ssif_client(int addr, char *adapter_name,
 	addr_info->addr_src = addr_src;
 	addr_info->dev = dev;
 
-	dev_set_drvdata(dev, addr_info);
+	if (dev)
+		dev_set_drvdata(dev, addr_info);
 
 	list_add_tail(&addr_info->link, &ssif_infos);
 
-- 
GitLab


From 06eb8a56af23ae32e90fdd6b27fec30930364b52 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 19 Jun 2017 14:00:40 -0400
Subject: [PATCH 0489/1958] sunrpc: Disable splice for krb5i

Running a multi-threaded 8KB fio test (70/30 mix), three or four out
of twelve of the jobs fail when using krb5i. The failure is an EIO
on a read.

Troubleshooting confirmed the EIO results when the client fails to
verify the MIC of an NFS READ reply. Bruce suggested the problem
could be due to the data payload changing between the time the
reply's MIC was computed on the server and the time the reply was
actually sent.

krb5p gets around this problem by disabling RQ_SPLICE_OK. Use the
same mechanism for krb5i RPCs.

"iozone -i0 -i1 -s128m -y1k -az -I", export is tmpfs, mount is
sec=krb5i,vers=3,proto=rdma. The important numbers are the
read / reread column.

Here's without the RQ_SPLICE_OK patch:

              kB  reclen    write  rewrite    read    reread
          131072       1     7546     7929     8396     8267
          131072       2    14375    14600    15843    15639
          131072       4    19280    19248    21303    21410
          131072       8    32350    31772    35199    34883
          131072      16    36748    37477    49365    51706
          131072      32    55669    56059    57475    57389
          131072      64    74599    75190    74903    75550
          131072     128    99810   101446   102828   102724
          131072     256   122042   122612   124806   125026
          131072     512   137614   138004   141412   141267
          131072    1024   146601   148774   151356   151409
          131072    2048   180684   181727   293140   292840
          131072    4096   206907   207658   552964   549029
          131072    8192   223982   224360   454493   473469
          131072   16384   228927   228390   654734   632607

And here's with it:

              kB  reclen    write  rewrite    read    reread
          131072       1     7700     7365     7958     8011
          131072       2    13211    13303    14937    14414
          131072       4    19001    19265    20544    20657
          131072       8    30883    31097    34255    33566
          131072      16    36868    34908    51499    49944
          131072      32    56428    55535    58710    56952
          131072      64    73507    74676    75619    74378
          131072     128   100324   101442   103276   102736
          131072     256   122517   122995   124639   124150
          131072     512   137317   139007   140530   140830
          131072    1024   146807   148923   151246   151072
          131072    2048   179656   180732   292631   292034
          131072    4096   206216   208583   543355   541951
          131072    8192   223738   224273   494201   489372
          131072   16384   229313   229840   691719   668427

I would say that there is not much difference in this test.

For good measure, here's the same test with sec=krb5p:

              kB  reclen    write  rewrite    read    reread
          131072       1     5982     5881     6137     6218
          131072       2    10216    10252    10850    10932
          131072       4    12236    12575    15375    15526
          131072       8    15461    15462    23821    22351
          131072      16    25677    25811    27529    27640
          131072      32    31903    32354    34063    33857
          131072      64    42989    43188    45635    45561
          131072     128    52848    53210    56144    56141
          131072     256    59123    59214    62691    62933
          131072     512    63140    63277    66887    67025
          131072    1024    65255    65299    69213    69140
          131072    2048    76454    76555   133767   133862
          131072    4096    84726    84883   251925   250702
          131072    8192    89491    89482   270821   276085
          131072   16384    91572    91597   361768   336868

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=307
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/svcauth_gss.c | 8 ++++++++
 net/sunrpc/svc.c                  | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a54a7a3d28f53..7b1ee5a0b03cd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -838,6 +838,14 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
 	struct xdr_netobj mic;
 	struct xdr_buf integ_buf;
 
+	/* NFS READ normally uses splice to send data in-place. However
+	 * the data in cache can change after the reply's MIC is computed
+	 * but before the RPC reply is sent. To prevent the client from
+	 * rejecting the server-computed MIC in this somewhat rare case,
+	 * do not use splice with the GSS integrity service.
+	 */
+	clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+
 	/* Did we already verify the signature on the original pass through? */
 	if (rqstp->rq_deferred)
 		return 0;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 45b4f2d2e3bdf..85ce0db5b0a69 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1165,7 +1165,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	if (argv->iov_len < 6*4)
 		goto err_short_len;
 
-	/* Will be turned off only in gss privacy case: */
+	/* Will be turned off by GSS integrity and privacy services */
 	set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 	/* Will be turned off only when NFSv4 Sessions are used */
 	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
-- 
GitLab


From 91a08eae7979417bf10c98f149c6ea28e6632114 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:17:15 -0400
Subject: [PATCH 0490/1958] svcrdma: Squelch disconnection messages

The server displays "svcrdma: failed to post Send WR (-107)" in the
kernel log when the client disconnects. This could flood the server's
log, so remove the message.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1736337f3a557..5ba6d915c9067 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -313,13 +313,17 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
 	dma_addr = ib_dma_map_page(dev, virt_to_page(base),
 				   offset, len, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(dev, dma_addr))
-		return -EIO;
+		goto out_maperr;
 
 	ctxt->sge[sge_no].addr = dma_addr;
 	ctxt->sge[sge_no].length = len;
 	ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
 	svc_rdma_count_mappings(rdma, ctxt);
 	return 0;
+
+out_maperr:
+	pr_err("svcrdma: failed to map buffer\n");
+	return -EIO;
 }
 
 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
@@ -334,13 +338,17 @@ static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
 
 	dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(dev, dma_addr))
-		return -EIO;
+		goto out_maperr;
 
 	ctxt->sge[sge_no].addr = dma_addr;
 	ctxt->sge[sge_no].length = len;
 	ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
 	svc_rdma_count_mappings(rdma, ctxt);
 	return 0;
+
+out_maperr:
+	pr_err("svcrdma: failed to map page\n");
+	return -EIO;
 }
 
 /**
@@ -547,7 +555,6 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
 	return 0;
 
 err:
-	pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
 	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_context(ctxt, 1);
 	return ret;
-- 
GitLab


From 107c1d0a991abe632a051de697c5acc33c03fd96 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:17:24 -0400
Subject: [PATCH 0491/1958] svcrdma: Avoid Send Queue overflow

Sanity case: Catch the case where more Work Requests are being
posted to the Send Queue than there are Send Queue Entries.

This might happen if a client sends a chunk with more segments than
there are SQEs for the transport. The server can't send that reply,
so the transport will deadlock unless the client drops the RPC.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_rw.c     | 5 +++++
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 0cf6202776933..3b35364d9a6ba 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -232,6 +232,9 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
 	struct ib_cqe *cqe;
 	int ret;
 
+	if (cc->cc_sqecount > rdma->sc_sq_depth)
+		return -EINVAL;
+
 	first_wr = NULL;
 	cqe = &cc->cc_cqe;
 	list_for_each(tmp, &cc->cc_rwctxts) {
@@ -425,6 +428,7 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
  *
  * Returns a non-negative number of bytes the chunk consumed, or
  *	%-E2BIG if the payload was larger than the Write chunk,
+ *	%-EINVAL if client provided too many segments,
  *	%-ENOMEM if rdma_rw context pool was exhausted,
  *	%-ENOTCONN if posting failed (connection is lost),
  *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
@@ -465,6 +469,7 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
  *
  * Returns a non-negative number of bytes the chunk consumed, or
  *	%-E2BIG if the payload was larger than the Reply chunk,
+ *	%-EINVAL if client provided too many segments,
  *	%-ENOMEM if rdma_rw context pool was exhausted,
  *	%-ENOTCONN if posting failed (connection is lost),
  *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 5ba6d915c9067..19fd01e4b6900 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -684,7 +684,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	return 0;
 
  err2:
-	if (ret != -E2BIG)
+	if (ret != -E2BIG || ret != -EINVAL)
 		goto err1;
 
 	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
-- 
GitLab


From a80a32341fbabd4276165a9ce4fa4c80168c0bef Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:17:35 -0400
Subject: [PATCH 0492/1958] svcrdma: Remove svc_rdma_marshal.c

svc_rdma_marshal.c has one remaining exported function --
svc_rdma_xdr_decode_req -- and it has a single call site. Take
the same approach as the sendto path, and move this function
into the source file where it is called.

This is a refactoring change only.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h         |   3 -
 net/sunrpc/xprtrdma/Makefile            |   4 +-
 net/sunrpc/xprtrdma/svc_rdma_marshal.c  | 168 ------------------------
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 126 ++++++++++++++++++
 4 files changed, 128 insertions(+), 173 deletions(-)
 delete mode 100644 net/sunrpc/xprtrdma/svc_rdma_marshal.c

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f3787d800ba46..3ca991657889f 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -185,9 +185,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
 				    __be32 *rdma_resp,
 				    struct xdr_buf *rcvbuf);
 
-/* svc_rdma_marshal.c */
-extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
-
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
 extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *,
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index c1ae8142ab734..b8213ddce2f2b 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -3,6 +3,6 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 rpcrdma-y := transport.o rpc_rdma.o verbs.o \
 	fmr_ops.o frwr_ops.o \
 	svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
-	svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
-	svc_rdma_rw.o module.o
+	svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
+	module.o
 rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
deleted file mode 100644
index bdcf7d85a3dc0..0000000000000
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2016 Oracle. All rights reserved.
- * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the BSD-type
- * license below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *      Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *
- *      Redistributions in binary form must reproduce the above
- *      copyright notice, this list of conditions and the following
- *      disclaimer in the documentation and/or other materials provided
- *      with the distribution.
- *
- *      Neither the name of the Network Appliance, Inc. nor the names of
- *      its contributors may be used to endorse or promote products
- *      derived from this software without specific prior written
- *      permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Tom Tucker <tom@opengridcomputing.com>
- */
-
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/debug.h>
-#include <asm/unaligned.h>
-#include <linux/sunrpc/rpc_rdma.h>
-#include <linux/sunrpc/svc_rdma.h>
-
-#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
-
-static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	while (*p++ != xdr_zero) {
-		next = p + rpcrdma_readchunk_maxsz - 1;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	while (*p++ != xdr_zero) {
-		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	if (*p++ != xdr_zero) {
-		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-/**
- * svc_rdma_xdr_decode_req - Parse incoming RPC-over-RDMA header
- * @rq_arg: Receive buffer
- *
- * On entry, xdr->head[0].iov_base points to first byte in the
- * RPC-over-RDMA header.
- *
- * On successful exit, head[0] points to first byte past the
- * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
- * The length of the RPC-over-RDMA header is returned.
- */
-int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
-{
-	__be32 *p, *end, *rdma_argp;
-	unsigned int hdr_len;
-
-	/* Verify that there's enough bytes for header + something */
-	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
-		goto out_short;
-
-	rdma_argp = rq_arg->head[0].iov_base;
-	if (*(rdma_argp + 1) != rpcrdma_version)
-		goto out_version;
-
-	switch (*(rdma_argp + 3)) {
-	case rdma_msg:
-	case rdma_nomsg:
-		break;
-
-	case rdma_done:
-		goto out_drop;
-
-	case rdma_error:
-		goto out_drop;
-
-	default:
-		goto out_proc;
-	}
-
-	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
-	p = xdr_check_read_list(rdma_argp + 4, end);
-	if (!p)
-		goto out_inval;
-	p = xdr_check_write_list(p, end);
-	if (!p)
-		goto out_inval;
-	p = xdr_check_reply_chunk(p, end);
-	if (!p)
-		goto out_inval;
-	if (p > end)
-		goto out_inval;
-
-	rq_arg->head[0].iov_base = p;
-	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
-	rq_arg->head[0].iov_len -= hdr_len;
-	return hdr_len;
-
-out_short:
-	dprintk("svcrdma: header too short = %d\n", rq_arg->len);
-	return -EINVAL;
-
-out_version:
-	dprintk("svcrdma: bad xprt version: %u\n",
-		be32_to_cpup(rdma_argp + 1));
-	return -EPROTONOSUPPORT;
-
-out_drop:
-	dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
-	return 0;
-
-out_proc:
-	dprintk("svcrdma: bad rdma procedure (%u)\n",
-		be32_to_cpup(rdma_argp + 3));
-	return -EINVAL;
-
-out_inval:
-	dprintk("svcrdma: failed to parse transport header\n");
-	return -EINVAL;
-}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 27a99bf5b1a6f..55ad335bbef1b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2016, 2017 Oracle. All rights reserved.
  * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
@@ -40,6 +41,7 @@
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
 
+#include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/spinlock.h>
@@ -115,6 +117,130 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
+static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
+{
+	__be32 *next;
+
+	while (*p++ != xdr_zero) {
+		next = p + rpcrdma_readchunk_maxsz - 1;
+		if (next > end)
+			return NULL;
+		p = next;
+	}
+	return p;
+}
+
+static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
+{
+	__be32 *next;
+
+	while (*p++ != xdr_zero) {
+		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+		if (next > end)
+			return NULL;
+		p = next;
+	}
+	return p;
+}
+
+static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
+{
+	__be32 *next;
+
+	if (*p++ != xdr_zero) {
+		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+		if (next > end)
+			return NULL;
+		p = next;
+	}
+	return p;
+}
+
+/* On entry, xdr->head[0].iov_base points to first byte in the
+ * RPC-over-RDMA header.
+ *
+ * On successful exit, head[0] points to first byte past the
+ * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
+ * The length of the RPC-over-RDMA header is returned.
+ *
+ * Assumptions:
+ * - The transport header is entirely contained in the head iovec.
+ */
+static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
+{
+	__be32 *p, *end, *rdma_argp;
+	unsigned int hdr_len;
+	char *proc;
+
+	/* Verify that there's enough bytes for header + something */
+	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
+		goto out_short;
+
+	rdma_argp = rq_arg->head[0].iov_base;
+	if (*(rdma_argp + 1) != rpcrdma_version)
+		goto out_version;
+
+	switch (*(rdma_argp + 3)) {
+	case rdma_msg:
+		proc = "RDMA_MSG";
+		break;
+	case rdma_nomsg:
+		proc = "RDMA_NOMSG";
+		break;
+
+	case rdma_done:
+		goto out_drop;
+
+	case rdma_error:
+		goto out_drop;
+
+	default:
+		goto out_proc;
+	}
+
+	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
+	p = xdr_check_read_list(rdma_argp + 4, end);
+	if (!p)
+		goto out_inval;
+	p = xdr_check_write_list(p, end);
+	if (!p)
+		goto out_inval;
+	p = xdr_check_reply_chunk(p, end);
+	if (!p)
+		goto out_inval;
+	if (p > end)
+		goto out_inval;
+
+	rq_arg->head[0].iov_base = p;
+	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
+	rq_arg->head[0].iov_len -= hdr_len;
+	dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n",
+		proc, be32_to_cpup(rdma_argp), hdr_len);
+	return hdr_len;
+
+out_short:
+	dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+	return -EINVAL;
+
+out_version:
+	dprintk("svcrdma: bad xprt version: %u\n",
+		be32_to_cpup(rdma_argp + 1));
+	return -EPROTONOSUPPORT;
+
+out_drop:
+	dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+	return 0;
+
+out_proc:
+	dprintk("svcrdma: bad rdma procedure (%u)\n",
+		be32_to_cpup(rdma_argp + 3));
+	return -EINVAL;
+
+out_inval:
+	dprintk("svcrdma: failed to parse transport header\n");
+	return -EINVAL;
+}
+
 /* Issue an RDMA_READ using the local lkey to map the data sink */
 int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 			struct svc_rqst *rqstp,
-- 
GitLab


From e77340e00300df9b6591d686f186eea60c67206f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:17:44 -0400
Subject: [PATCH 0493/1958] svcrdma: Improve Read chunk sanity checking

Identify malformed transport headers and unsupported chunk
combinations as early as possible.

- Reject RPC-over-RDMA messages that contain more than one Read chunk,
  since this implementation currently does not support more than one
  per RPC transaction.

- Ensure that segment lengths are not crazy.

- Remove the segment count check. With a 1KB inline threshold, the
  largest number of Read segments that can be conveyed is about 40
  (for a RDMA_NOMSG Call message). This is nowhere near
  RPCSVC_MAXPAGES. As far as I can tell, that was just a sanity
  check and does not enforce an implementation limit.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 55 +++++++++++++++++--------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 55ad335bbef1b..885ad9503ee0a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -117,15 +117,47 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
-static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
+/* This accommodates the largest possible Position-Zero
+ * Read chunk or Reply chunk, in one segment.
+ */
+#define MAX_BYTES_SPECIAL_SEG	((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
+
+/* Sanity check the Read list.
+ *
+ * Implementation limits:
+ * - This implementation supports only one Read chunk.
+ *
+ * Sanity checks:
+ * - Read list does not overflow buffer.
+ * - Segment size limited by largest NFS data payload.
+ *
+ * The segment count is limited to how many segments can
+ * fit in the transport header without overflowing the
+ * buffer. That's about 40 Read segments for a 1KB inline
+ * threshold.
+ *
+ * Returns pointer to the following Write list.
+ */
+static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
 {
-	__be32 *next;
+	u32 position;
+	bool first;
 
+	first = true;
 	while (*p++ != xdr_zero) {
-		next = p + rpcrdma_readchunk_maxsz - 1;
-		if (next > end)
+		if (first) {
+			position = be32_to_cpup(p++);
+			first = false;
+		} else if (be32_to_cpup(p++) != position) {
+			return NULL;
+		}
+		p++;	/* handle */
+		if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)
+			return NULL;
+		p += 2;	/* offset */
+
+		if (p > end)
 			return NULL;
-		p = next;
 	}
 	return p;
 }
@@ -478,16 +510,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	return ret;
 }
 
-static unsigned int
-rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
-{
-	unsigned int count;
-
-	for (count = 0; ch->rc_discrim != xdr_zero; ch++)
-		count++;
-	return count;
-}
-
 /* If there was additional inline content, append it to the end of arg.pages.
  * Tail copy has to be done after the reader function has determined how many
  * pages are needed for RDMA READ.
@@ -567,9 +589,6 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 	if (!ch)
 		return 0;
 
-	if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES)
-		return -EINVAL;
-
 	/* The request is completed when the RDMA_READs complete. The
 	 * head context keeps all the pages that comprise the
 	 * request.
-- 
GitLab


From 3c22f326074d2306041b0c5c9df516464349564d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:17:52 -0400
Subject: [PATCH 0494/1958] svcrdma: Improve Write chunk sanity checking

Identify malformed transport headers and unsupported chunk
combinations as early as possible.

- Reject RPC-over-RDMA messages that contain more than one Write
chunk, since this implementation does not support more than one per
message.

- Ensure that segment lengths are not crazy.

- Ensure that the chunk's segment count is not crazy.

With a 1KB inline threshold, the largest number of Write segments
that can be conveyed is about 60 (for a RDMA_NOMSG Reply message).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 52 ++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 885ad9503ee0a..cf8be18f297a1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -117,6 +117,11 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
+/* This accommodates the largest possible Write chunk,
+ * in one segment.
+ */
+#define MAX_BYTES_WRITE_SEG	((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
+
 /* This accommodates the largest possible Position-Zero
  * Read chunk or Reply chunk, in one segment.
  */
@@ -162,15 +167,52 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
 	return p;
 }
 
-static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
+/* The segment count is limited to how many segments can
+ * fit in the transport header without overflowing the
+ * buffer. That's about 60 Write segments for a 1KB inline
+ * threshold.
+ */
+static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end,
+				     u32 maxlen)
 {
-	__be32 *next;
+	u32 i, segcount;
+
+	segcount = be32_to_cpup(p++);
+	for (i = 0; i < segcount; i++) {
+		p++;	/* handle */
+		if (be32_to_cpup(p++) > maxlen)
+			return NULL;
+		p += 2;	/* offset */
+
+		if (p > end)
+			return NULL;
+	}
+
+	return p;
+}
 
+/* Sanity check the Write list.
+ *
+ * Implementation limits:
+ * - This implementation supports only one Write chunk.
+ *
+ * Sanity checks:
+ * - Write list does not overflow buffer.
+ * - Segment size limited by largest NFS data payload.
+ *
+ * Returns pointer to the following Reply chunk.
+ */
+static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)
+{
+	u32 chcount;
+
+	chcount = 0;
 	while (*p++ != xdr_zero) {
-		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
-		if (next > end)
+		p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);
+		if (!p)
+			return NULL;
+		if (chcount++ > 1)
 			return NULL;
-		p = next;
 	}
 	return p;
 }
-- 
GitLab


From ca5c76aba7502d52a6019358ec04bd4d734037d7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:00 -0400
Subject: [PATCH 0495/1958] svcrdma: Improve Reply chunk sanity checking

Identify malformed transport headers and unsupported chunk
combinations as early as possible.

- Ensure that segment lengths are not crazy.

- Ensure that the Reply chunk's segment count is not crazy.

With a 1KB inline threshold, the largest number of Write segments
that can be conveyed is about 60 (for a RDMA_NOMSG Reply message).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index cf8be18f297a1..b48089314f859 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -217,15 +217,20 @@ static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)
 	return p;
 }
 
-static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
+/* Sanity check the Reply chunk.
+ *
+ * Sanity checks:
+ * - Reply chunk does not overflow buffer.
+ * - Segment size limited by largest NFS data payload.
+ *
+ * Returns pointer to the following RPC header.
+ */
+static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
 {
-	__be32 *next;
-
 	if (*p++ != xdr_zero) {
-		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
-		if (next > end)
+		p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
+		if (!p)
 			return NULL;
-		p = next;
 	}
 	return p;
 }
-- 
GitLab


From 2d6491a56c76f2d6c22aaa710e2a4d04ad41529b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:08 -0400
Subject: [PATCH 0496/1958] svcrdma: Don't account for Receive queue
 "starvation"

>From what I can tell, calling ->recvfrom when there is no work to do
is a normal part of operation. This is the only way svc_recv can
tell when there is no more data ready to receive on the transport.

Neither the TCP nor the UDP transport implementations have a
"starve" metric.

The cost of receive starvation accounting is bumping an atomic, which
results in extra (IMO unnecessary) bus traffic between CPU sockets,
while holding a spin lock.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index b48089314f859..1452bd02d857f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -844,9 +844,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	struct svc_xprt *xprt = rqstp->rq_xprt;
 	struct svcxprt_rdma *rdma_xprt =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
-	struct svc_rdma_op_ctxt *ctxt = NULL;
+	struct svc_rdma_op_ctxt *ctxt;
 	struct rpcrdma_msg *rmsgp;
-	int ret = 0;
+	int ret;
 
 	dprintk("svcrdma: rqstp=%p\n", rqstp);
 
@@ -863,21 +863,13 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 					struct svc_rdma_op_ctxt, list);
 		list_del(&ctxt->list);
 	} else {
-		atomic_inc(&rdma_stat_rq_starve);
+		/* No new incoming requests, terminate the loop */
 		clear_bit(XPT_DATA, &xprt->xpt_flags);
-		ctxt = NULL;
+		spin_unlock(&rdma_xprt->sc_rq_dto_lock);
+		return 0;
 	}
 	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
-	if (!ctxt) {
-		/* This is the EAGAIN path. The svc_recv routine will
-		 * return -EAGAIN, the nfsd thread will go to call into
-		 * svc_recv again and we shouldn't be on the active
-		 * transport list
-		 */
-		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
-			goto defer;
-		goto out;
-	}
+
 	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n",
 		ctxt, rdma_xprt, rqstp);
 	atomic_inc(&rdma_stat_recv);
@@ -920,7 +912,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		+ rqstp->rq_arg.page_len
 		+ rqstp->rq_arg.tail[0].iov_len;
 	svc_rdma_put_context(ctxt, 0);
- out:
 	dprintk("svcrdma: ret=%d, rq_arg.len=%u, "
 		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n",
 		ret, rqstp->rq_arg.len,
-- 
GitLab


From 811642d8d8a82c0cce8dc2debfdaf23c5a144839 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 19 Jun 2017 09:10:32 -0600
Subject: [PATCH 0497/1958] vfio: Fix group release deadlock

If vfio_iommu_group_notifier() acquires a group reference and that
reference becomes the last reference to the group, then vfio_group_put
introduces a deadlock code path where we're trying to unregister from
the iommu notifier chain from within a callout of that chain.  Use a
work_struct to release this reference asynchronously.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/vfio/vfio.c | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 6a49485eb49da..54dd2fbf83d92 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -423,6 +423,34 @@ static void vfio_group_put(struct vfio_group *group)
 	kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
 }
 
+struct vfio_group_put_work {
+	struct work_struct work;
+	struct vfio_group *group;
+};
+
+static void vfio_group_put_bg(struct work_struct *work)
+{
+	struct vfio_group_put_work *do_work;
+
+	do_work = container_of(work, struct vfio_group_put_work, work);
+
+	vfio_group_put(do_work->group);
+	kfree(do_work);
+}
+
+static void vfio_group_schedule_put(struct vfio_group *group)
+{
+	struct vfio_group_put_work *do_work;
+
+	do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
+	if (WARN_ON(!do_work))
+		return;
+
+	INIT_WORK(&do_work->work, vfio_group_put_bg);
+	do_work->group = group;
+	schedule_work(&do_work->work);
+}
+
 /* Assume group_lock or group reference is held */
 static void vfio_group_get(struct vfio_group *group)
 {
@@ -762,7 +790,14 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
 		break;
 	}
 
-	vfio_group_put(group);
+	/*
+	 * If we're the last reference to the group, the group will be
+	 * released, which includes unregistering the iommu group notifier.
+	 * We hold a read-lock on that notifier list, unregistering needs
+	 * a write-lock... deadlock.  Release our reference asynchronously
+	 * to avoid that situation.
+	 */
+	vfio_group_schedule_put(group);
 	return NOTIFY_OK;
 }
 
-- 
GitLab


From e323369b2e204da4dc771bbddceef986f4bf85d5 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 28 Jun 2017 13:49:52 -0600
Subject: [PATCH 0498/1958] kvm-vfio: Decouple only when we match a group

Unset-KVM and decrement-assignment only when we find the group in our
list.  Otherwise we can get out of sync if the user triggers this for
groups that aren't currently on our list.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
---
 virt/kvm/vfio.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 37d9118fd84be..6e002d0f31910 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -246,21 +246,20 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 				continue;
 
 			list_del(&kvg->node);
+			kvm_arch_end_assignment(dev->kvm);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+			kvm_spapr_tce_release_vfio_group(dev->kvm,
+							 kvg->vfio_group);
+#endif
+			kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
 			kvm_vfio_group_put_external_user(kvg->vfio_group);
 			kfree(kvg);
 			ret = 0;
 			break;
 		}
 
-		kvm_arch_end_assignment(dev->kvm);
-
 		mutex_unlock(&kv->lock);
 
-#ifdef CONFIG_SPAPR_TCE_IOMMU
-		kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group);
-#endif
-		kvm_vfio_group_set_kvm(vfio_group, NULL);
-
 		kvm_vfio_group_put_external_user(vfio_group);
 
 		kvm_vfio_update_coherency(dev);
-- 
GitLab


From 5d6dee80a1e94cc284d03e06d930e60e8d3ecf7d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 28 Jun 2017 13:50:05 -0600
Subject: [PATCH 0499/1958] vfio: New external user group/file match

At the point where the kvm-vfio pseudo device wants to release its
vfio group reference, we can't always acquire a new reference to make
that happen.  The group can be in a state where we wouldn't allow a
new reference to be added.  This new helper function allows a caller
to match a file to a group to facilitate this.  Given a file and
group, report if they match.  Thus the caller needs to already have a
group reference to match to the file.  This allows the deletion of a
group without acquiring a new reference.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/vfio/vfio.c  |  9 +++++++++
 include/linux/vfio.h |  2 ++
 virt/kvm/vfio.c      | 27 +++++++++++++++++++--------
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 54dd2fbf83d92..7597a377eb4e4 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1776,6 +1776,15 @@ void vfio_group_put_external_user(struct vfio_group *group)
 }
 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
 
+bool vfio_external_group_match_file(struct vfio_group *test_group,
+				    struct file *filep)
+{
+	struct vfio_group *group = filep->private_data;
+
+	return (filep->f_op == &vfio_group_fops) && (group == test_group);
+}
+EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
+
 int vfio_external_user_iommu_id(struct vfio_group *group)
 {
 	return iommu_group_id(group->iommu_group);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index edf9b2cad277d..9b34d0af5d275 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -97,6 +97,8 @@ extern void vfio_unregister_iommu_driver(
  */
 extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
+extern bool vfio_external_group_match_file(struct vfio_group *group,
+					   struct file *filep);
 extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 6e002d0f31910..d99850c462a18 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -51,6 +51,22 @@ static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
 	return vfio_group;
 }
 
+static bool kvm_vfio_external_group_match_file(struct vfio_group *group,
+					       struct file *filep)
+{
+	bool ret, (*fn)(struct vfio_group *, struct file *);
+
+	fn = symbol_get(vfio_external_group_match_file);
+	if (!fn)
+		return false;
+
+	ret = fn(group, filep);
+
+	symbol_put(vfio_external_group_match_file);
+
+	return ret;
+}
+
 static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
 {
 	void (*fn)(struct vfio_group *);
@@ -231,18 +247,13 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 		if (!f.file)
 			return -EBADF;
 
-		vfio_group = kvm_vfio_group_get_external_user(f.file);
-		fdput(f);
-
-		if (IS_ERR(vfio_group))
-			return PTR_ERR(vfio_group);
-
 		ret = -ENOENT;
 
 		mutex_lock(&kv->lock);
 
 		list_for_each_entry(kvg, &kv->group_list, node) {
-			if (kvg->vfio_group != vfio_group)
+			if (!kvm_vfio_external_group_match_file(kvg->vfio_group,
+								f.file))
 				continue;
 
 			list_del(&kvg->node);
@@ -260,7 +271,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 
 		mutex_unlock(&kv->lock);
 
-		kvm_vfio_group_put_external_user(vfio_group);
+		fdput(f);
 
 		kvm_vfio_update_coherency(dev);
 
-- 
GitLab


From b392ee07999aa1f19b3a845fad47ec4275341f71 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 22 Jun 2017 23:06:50 +0800
Subject: [PATCH 0500/1958] MIPS: Loongson: Add NMI handler support

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16587/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/loongson64/common/init.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c
index 9b987fe98b5b0..6ef17120722f5 100644
--- a/arch/mips/loongson64/common/init.c
+++ b/arch/mips/loongson64/common/init.c
@@ -10,13 +10,25 @@
 
 #include <linux/bootmem.h>
 #include <asm/bootinfo.h>
+#include <asm/traps.h>
 #include <asm/smp-ops.h>
+#include <asm/cacheflush.h>
 
 #include <loongson.h>
 
 /* Loongson CPU address windows config space base address */
 unsigned long __maybe_unused _loongson_addrwincfg_base;
 
+static void __init mips_nmi_setup(void)
+{
+	void *base;
+	extern char except_vec_nmi;
+
+	base = (void *)(CAC_BASE + 0x380);
+	memcpy(base, &except_vec_nmi, 0x80);
+	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
+}
+
 void __init prom_init(void)
 {
 #ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
@@ -40,6 +52,7 @@ void __init prom_init(void)
 	/*init the uart base address */
 	prom_init_uart_base();
 	register_smp_ops(&loongson3_smp_ops);
+	board_nmi_handler_setup = mips_nmi_setup;
 }
 
 void __init prom_free_prom_memory(void)
-- 
GitLab


From 99b0b5a3a1e994247e7533de0fd7e4d13ead0ddd Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 22 Jun 2017 23:06:51 +0800
Subject: [PATCH 0501/1958] MIPS: Loongson-3: Support 4 packages in CPU Hwmon
 driver

Loongson-3 machines may have as many as 4 physical packages.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16588/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/platform/mips/cpu_hwmon.c | 119 +++++++++++++++---------------
 1 file changed, 58 insertions(+), 61 deletions(-)

diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c
index 46ab7d86ae1c8..322de58eebaf5 100644
--- a/drivers/platform/mips/cpu_hwmon.c
+++ b/drivers/platform/mips/cpu_hwmon.c
@@ -37,6 +37,7 @@ int loongson3_cpu_temp(int cpu)
 	return (int)reg * 1000;
 }
 
+static int nr_packages;
 static struct device *cpu_hwmon_dev;
 
 static ssize_t get_hwmon_name(struct device *dev,
@@ -60,88 +61,74 @@ static ssize_t get_hwmon_name(struct device *dev,
 	return sprintf(buf, "cpu-hwmon\n");
 }
 
-static ssize_t get_cpu0_temp(struct device *dev,
+static ssize_t get_cpu_temp(struct device *dev,
 			struct device_attribute *attr, char *buf);
-static ssize_t get_cpu1_temp(struct device *dev,
-			struct device_attribute *attr, char *buf);
-static ssize_t cpu0_temp_label(struct device *dev,
-			struct device_attribute *attr, char *buf);
-static ssize_t cpu1_temp_label(struct device *dev,
+static ssize_t cpu_temp_label(struct device *dev,
 			struct device_attribute *attr, char *buf);
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, get_cpu0_temp, NULL, 1);
-static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, cpu0_temp_label, NULL, 1);
-static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, get_cpu1_temp, NULL, 2);
-static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, cpu1_temp_label, NULL, 2);
-
-static const struct attribute *hwmon_cputemp1[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_label.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute *hwmon_cputemp2[] = {
-	&sensor_dev_attr_temp2_input.dev_attr.attr,
-	&sensor_dev_attr_temp2_label.dev_attr.attr,
-	NULL
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, get_cpu_temp, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, cpu_temp_label, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, get_cpu_temp, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, cpu_temp_label, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, get_cpu_temp, NULL, 3);
+static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, cpu_temp_label, NULL, 3);
+static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, get_cpu_temp, NULL, 4);
+static SENSOR_DEVICE_ATTR(temp4_label, S_IRUGO, cpu_temp_label, NULL, 4);
+
+static const struct attribute *hwmon_cputemp[4][3] = {
+	{
+		&sensor_dev_attr_temp1_input.dev_attr.attr,
+		&sensor_dev_attr_temp1_label.dev_attr.attr,
+		NULL
+	},
+	{
+		&sensor_dev_attr_temp2_input.dev_attr.attr,
+		&sensor_dev_attr_temp2_label.dev_attr.attr,
+		NULL
+	},
+	{
+		&sensor_dev_attr_temp3_input.dev_attr.attr,
+		&sensor_dev_attr_temp3_label.dev_attr.attr,
+		NULL
+	},
+	{
+		&sensor_dev_attr_temp4_input.dev_attr.attr,
+		&sensor_dev_attr_temp4_label.dev_attr.attr,
+		NULL
+	}
 };
 
-static ssize_t cpu0_temp_label(struct device *dev,
+static ssize_t cpu_temp_label(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "CPU 0 Temperature\n");
+	int id = (to_sensor_dev_attr(attr))->index - 1;
+	return sprintf(buf, "CPU %d Temperature\n", id);
 }
 
-static ssize_t cpu1_temp_label(struct device *dev,
+static ssize_t get_cpu_temp(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "CPU 1 Temperature\n");
-}
-
-static ssize_t get_cpu0_temp(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	int value = loongson3_cpu_temp(0);
-	return sprintf(buf, "%d\n", value);
-}
-
-static ssize_t get_cpu1_temp(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	int value = loongson3_cpu_temp(1);
+	int id = (to_sensor_dev_attr(attr))->index - 1;
+	int value = loongson3_cpu_temp(id);
 	return sprintf(buf, "%d\n", value);
 }
 
 static int create_sysfs_cputemp_files(struct kobject *kobj)
 {
-	int ret;
-
-	ret = sysfs_create_files(kobj, hwmon_cputemp1);
-	if (ret)
-		goto sysfs_create_temp1_fail;
-
-	if (loongson_sysconf.nr_cpus <= loongson_sysconf.cores_per_package)
-		return 0;
-
-	ret = sysfs_create_files(kobj, hwmon_cputemp2);
-	if (ret)
-		goto sysfs_create_temp2_fail;
+	int i, ret = 0;
 
-	return 0;
+	for (i=0; i<nr_packages; i++)
+		ret = sysfs_create_files(kobj, hwmon_cputemp[i]);
 
-sysfs_create_temp2_fail:
-	sysfs_remove_files(kobj, hwmon_cputemp1);
-
-sysfs_create_temp1_fail:
-	return -1;
+	return ret;
 }
 
 static void remove_sysfs_cputemp_files(struct kobject *kobj)
 {
-	sysfs_remove_files(&cpu_hwmon_dev->kobj, hwmon_cputemp1);
+	int i;
 
-	if (loongson_sysconf.nr_cpus > loongson_sysconf.cores_per_package)
-		sysfs_remove_files(&cpu_hwmon_dev->kobj, hwmon_cputemp2);
+	for (i=0; i<nr_packages; i++)
+		sysfs_remove_files(kobj, hwmon_cputemp[i]);
 }
 
 #define CPU_THERMAL_THRESHOLD 90000
@@ -149,8 +136,15 @@ static struct delayed_work thermal_work;
 
 static void do_thermal_timer(struct work_struct *work)
 {
-	int value = loongson3_cpu_temp(0);
-	if (value <= CPU_THERMAL_THRESHOLD)
+	int i, value, temp_max = 0;
+
+	for (i=0; i<nr_packages; i++) {
+		value = loongson3_cpu_temp(i);
+		if (value > temp_max)
+			temp_max = value;
+	}
+
+	if (temp_max <= CPU_THERMAL_THRESHOLD)
 		schedule_delayed_work(&thermal_work, msecs_to_jiffies(5000));
 	else
 		orderly_poweroff(true);
@@ -169,6 +163,9 @@ static int __init loongson_hwmon_init(void)
 		goto fail_hwmon_device_register;
 	}
 
+	nr_packages = loongson_sysconf.nr_cpus /
+		loongson_sysconf.cores_per_package;
+
 	ret = sysfs_create_group(&cpu_hwmon_dev->kobj,
 				&cpu_hwmon_attribute_group);
 	if (ret) {
-- 
GitLab


From e1b88ca8d72193e48bac026b19b8c686cc7fea25 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 22 Jun 2017 23:06:52 +0800
Subject: [PATCH 0502/1958] MIPS: Loongson-3: IRQ balancing for PCI devices

IRQ0 (HPET), IRQ1 (Keyboard), IRQ2 (Cascade), IRQ7 (SCI), IRQ8 (RTC)
and IRQ12 (Mouse) are handled by core-0 locally. Other PCI IRQs (3, 4,
5, 6, 14, 15) are balanced by all cores from Node-0. This can improve
I/O performance significantly.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16589/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/loongson64/loongson-3/irq.c | 19 +++++++++++++++++--
 arch/mips/loongson64/loongson-3/smp.c | 18 +++++++++++++++++-
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c
index 548f759454dce..2e6e205ed9b9e 100644
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ b/arch/mips/loongson64/loongson-3/irq.c
@@ -9,17 +9,32 @@
 
 #include "smp.h"
 
+extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
 unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
+unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
 
 static void ht_irqdispatch(void)
 {
-	unsigned int i, irq;
+	unsigned int i, irq, irq0, irq1;
+	static unsigned int dest_cpu = 0;
 
 	irq = LOONGSON_HT1_INT_VECTOR(0);
 	LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
 
+	irq0 = irq & local_irq;  /* handled by local core */
+	irq1 = irq & ~local_irq; /* balanced by other cores */
+
+	if (dest_cpu == 0 || !cpu_online(dest_cpu))
+		irq0 |= irq1;
+	else
+		loongson3_send_irq_by_ipi(dest_cpu, irq1);
+
+	dest_cpu = dest_cpu + 1;
+	if (dest_cpu >= num_possible_cpus() || cpu_data[dest_cpu].package > 0)
+		dest_cpu = 0;
+
 	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
-		if (irq & (0x1 << ht_irq[i]))
+		if (irq0 & (0x1 << ht_irq[i]))
 			do_IRQ(ht_irq[i]);
 	}
 }
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 1629743ba96aa..b7a355c3c4081 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -254,13 +254,21 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
 		loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(i)]);
 }
 
+#define IPI_IRQ_OFFSET 6
+
+void loongson3_send_irq_by_ipi(int cpu, int irqs)
+{
+	loongson3_ipi_write32(irqs << IPI_IRQ_OFFSET, ipi_set0_regs[cpu_logical_map(cpu)]);
+}
+
 void loongson3_ipi_interrupt(struct pt_regs *regs)
 {
 	int i, cpu = smp_processor_id();
-	unsigned int action, c0count;
+	unsigned int action, c0count, irqs;
 
 	/* Load the ipi register to figure out what we're supposed to do */
 	action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+	irqs = action >> IPI_IRQ_OFFSET;
 
 	/* Clear the ipi register to clear the interrupt */
 	loongson3_ipi_write32((u32)action, ipi_clear0_regs[cpu_logical_map(cpu)]);
@@ -282,6 +290,14 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
 			core0_c0count[i] = c0count;
 		__wbflush(); /* Let others see the result ASAP */
 	}
+
+	if (irqs) {
+		int irq;
+		while ((irq = ffs(irqs))) {
+			do_IRQ(irq-1);
+			irqs &= ~(1<<(irq-1));
+		}
+	}
 }
 
 #define MAX_LOOPS 800
-- 
GitLab


From ecc38a0968ec3e0605079e49d276d9a4186abdb7 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 22 Jun 2017 23:06:53 +0800
Subject: [PATCH 0503/1958] MIPS: Loongson-3: support irq_set_affinity() in
 i8259 chip

With this patch we can set irq affinity via procfs, so as to improve
network performance.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16590/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/loongson64/loongson-3/irq.c | 67 +++++++++++++++++++++------
 1 file changed, 54 insertions(+), 13 deletions(-)

diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c
index 2e6e205ed9b9e..7202e52cd0469 100644
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ b/arch/mips/loongson64/loongson-3/irq.c
@@ -10,32 +10,68 @@
 #include "smp.h"
 
 extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
+
+unsigned int irq_cpu[16] = {[0 ... 15] = -1};
 unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
 unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
 
+int plat_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
+			  bool force)
+{
+	unsigned int cpu;
+	struct cpumask new_affinity;
+
+	/* I/O devices are connected on package-0 */
+	cpumask_copy(&new_affinity, affinity);
+	for_each_cpu(cpu, affinity)
+		if (cpu_data[cpu].package > 0)
+			cpumask_clear_cpu(cpu, &new_affinity);
+
+	if (cpumask_empty(&new_affinity))
+		return -EINVAL;
+
+	cpumask_copy(d->common->affinity, &new_affinity);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
 static void ht_irqdispatch(void)
 {
-	unsigned int i, irq, irq0, irq1;
-	static unsigned int dest_cpu = 0;
+	unsigned int i, irq;
+	struct irq_data *irqd;
+	struct cpumask affinity;
 
 	irq = LOONGSON_HT1_INT_VECTOR(0);
 	LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
 
-	irq0 = irq & local_irq;  /* handled by local core */
-	irq1 = irq & ~local_irq; /* balanced by other cores */
+	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
+		if (!(irq & (0x1 << ht_irq[i])))
+			continue;
 
-	if (dest_cpu == 0 || !cpu_online(dest_cpu))
-		irq0 |= irq1;
-	else
-		loongson3_send_irq_by_ipi(dest_cpu, irq1);
+		/* handled by local core */
+		if (local_irq & (0x1 << ht_irq[i])) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
 
-	dest_cpu = dest_cpu + 1;
-	if (dest_cpu >= num_possible_cpus() || cpu_data[dest_cpu].package > 0)
-		dest_cpu = 0;
+		irqd = irq_get_irq_data(ht_irq[i]);
+		cpumask_and(&affinity, irqd->common->affinity, cpu_active_mask);
+		if (cpumask_empty(&affinity)) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
 
-	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
-		if (irq0 & (0x1 << ht_irq[i]))
+		irq_cpu[ht_irq[i]] = cpumask_next(irq_cpu[ht_irq[i]], &affinity);
+		if (irq_cpu[ht_irq[i]] >= nr_cpu_ids)
+			irq_cpu[ht_irq[i]] = cpumask_first(&affinity);
+
+		if (irq_cpu[ht_irq[i]] == 0) {
 			do_IRQ(ht_irq[i]);
+			continue;
+		}
+
+		/* balanced by other cores */
+		loongson3_send_irq_by_ipi(irq_cpu[ht_irq[i]], (0x1 << ht_irq[i]));
 	}
 }
 
@@ -135,11 +171,16 @@ void irq_router_init(void)
 
 void __init mach_init_irq(void)
 {
+	struct irq_chip *chip;
+
 	clear_c0_status(ST0_IM | ST0_BEV);
 
 	irq_router_init();
 	mips_cpu_irq_init();
 	init_i8259_irqs();
+	chip = irq_get_chip(I8259A_IRQ_BASE);
+	chip->irq_set_affinity = plat_set_irq_affinity;
+
 	irq_set_chip_and_handler(LOONGSON_UART_IRQ,
 			&loongson_irq_chip, handle_level_irq);
 
-- 
GitLab


From b9c4dc2cf9af62bc0d2d8504c15175aeac49ad53 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 22 Jun 2017 23:06:54 +0800
Subject: [PATCH 0504/1958] MIPS: Loogson: Make enum loongson_cpu_type more
 clear

Sort enum loongson_cpu_type in a more reasonable manner, this makes the
CPU names more clear and extensible. Those already defined enum values
are renamed to Legacy_* for compatibility.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16591/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 .../include/asm/mach-loongson64/boot_param.h  | 22 ++++++++++++++-----
 arch/mips/loongson64/common/env.c             | 11 +++++++---
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h
index d3f3258b7cd4a..9f9bb9c537852 100644
--- a/arch/mips/include/asm/mach-loongson64/boot_param.h
+++ b/arch/mips/include/asm/mach-loongson64/boot_param.h
@@ -27,12 +27,22 @@ struct efi_memory_map_loongson {
 } __packed;
 
 enum loongson_cpu_type {
-	Loongson_2E = 0,
-	Loongson_2F = 1,
-	Loongson_3A = 2,
-	Loongson_3B = 3,
-	Loongson_1A = 4,
-	Loongson_1B = 5
+	Legacy_2E = 0x0,
+	Legacy_2F = 0x1,
+	Legacy_3A = 0x2,
+	Legacy_3B = 0x3,
+	Legacy_1A = 0x4,
+	Legacy_1B = 0x5,
+	Legacy_2G = 0x6,
+	Legacy_2H = 0x7,
+	Loongson_1A = 0x100,
+	Loongson_1B = 0x101,
+	Loongson_2E = 0x200,
+	Loongson_2F = 0x201,
+	Loongson_2G = 0x202,
+	Loongson_2H = 0x203,
+	Loongson_3A = 0x300,
+	Loongson_3B = 0x301
 };
 
 /*
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index 4707abfe9d641..1e8a955ae5a82 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -90,7 +90,9 @@ void __init prom_init_env(void)
 
 	cpu_clock_freq = ecpu->cpu_clock_freq;
 	loongson_sysconf.cputype = ecpu->cputype;
-	if (ecpu->cputype == Loongson_3A) {
+	switch (ecpu->cputype) {
+	case Legacy_3A:
+	case Loongson_3A:
 		loongson_sysconf.cores_per_node = 4;
 		loongson_sysconf.cores_per_package = 4;
 		smp_group[0] = 0x900000003ff01000;
@@ -111,7 +113,9 @@ void __init prom_init_env(void)
 		loongson_freqctrl[3] = 0x900030001fe001d0;
 		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
 		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
-	} else if (ecpu->cputype == Loongson_3B) {
+		break;
+	case Legacy_3B:
+	case Loongson_3B:
 		loongson_sysconf.cores_per_node = 4; /* One chip has 2 nodes */
 		loongson_sysconf.cores_per_package = 8;
 		smp_group[0] = 0x900000003ff01000;
@@ -132,7 +136,8 @@ void __init prom_init_env(void)
 		loongson_freqctrl[3] = 0x900060001fe001d0;
 		loongson_sysconf.ht_control_base = 0x90001EFDFB000000;
 		loongson_sysconf.workarounds = WORKAROUND_CPUHOTPLUG;
-	} else {
+		break;
+	default:
 		loongson_sysconf.cores_per_node = 1;
 		loongson_sysconf.cores_per_package = 1;
 		loongson_chipcfg[0] = 0x900000001fe00180;
-- 
GitLab


From 296a7624f5b292af610d728e7e347fda341a985e Mon Sep 17 00:00:00 2001
From: Miodrag Dinic <miodrag.dinic@imgtec.com>
Date: Mon, 19 Jun 2017 17:50:08 +0200
Subject: [PATCH 0505/1958] MIPS: cmdline: Add support for 'memmap' parameter

Implement support for parsing 'memmap' kernel command line parameter.

This patch covers parsing of the following two formats for 'memmap'
parameter values:

  - nn[KMG]@ss[KMG]
  - nn[KMG]$ss[KMG]

  ([KMG] = K M or G (kilo, mega, giga))

These two allowed formats for parameter value are already documented
in file kernel-parameters.txt in Documentation/admin-guide folder.
Some architectures already support them, but Mips did not prior to
this patch.

Excerpt from Documentation/admin-guide/kernel-parameters.txt:

memmap=nn[KMG]@ss[KMG]
    [KNL] Force usage of a specific region of memory.
    Region of memory to be used is from ss to ss+nn.

memmap=nn[KMG]$ss[KMG]
    Mark specific memory as reserved.
    Region of memory to be reserved is from ss to ss+nn.
    Example: Exclude memory from 0x18690000-0x1869ffff
        memmap=64K$0x18690000
        or
        memmap=0x10000$0x18690000

There is no need to update this documentation file with respect to
this patch.

Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Cc: James.Hogan@imgtec.com
Cc: Paul.Burton@imgtec.com
Cc: Raghu.Gandham@imgtec.com
Cc: Leonid.Yegoshin@imgtec.com
Cc: Douglas.Leung@imgtec.com
Cc: Petar.Jovanovic@imgtec.com
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16508/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/setup.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 01d1dbde5fbf1..fe39397267651 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -670,6 +670,46 @@ static int __init early_parse_mem(char *p)
 }
 early_param("mem", early_parse_mem);
 
+static int __init early_parse_memmap(char *p)
+{
+	char *oldp;
+	u64 start_at, mem_size;
+
+	if (!p)
+		return -EINVAL;
+
+	if (!strncmp(p, "exactmap", 8)) {
+		pr_err("\"memmap=exactmap\" invalid on MIPS\n");
+		return 0;
+	}
+
+	oldp = p;
+	mem_size = memparse(p, &p);
+	if (p == oldp)
+		return -EINVAL;
+
+	if (*p == '@') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, BOOT_MEM_RAM);
+	} else if (*p == '#') {
+		pr_err("\"memmap=nn#ss\" (force ACPI data) invalid on MIPS\n");
+		return -EINVAL;
+	} else if (*p == '$') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, BOOT_MEM_RESERVED);
+	} else {
+		pr_err("\"memmap\" invalid format!\n");
+		return -EINVAL;
+	}
+
+	if (*p == '\0') {
+		usermem = 1;
+		return 0;
+	} else
+		return -EINVAL;
+}
+early_param("memmap", early_parse_memmap);
+
 #ifdef CONFIG_PROC_VMCORE
 unsigned long setup_elfcorehdr, setup_elfcorehdr_size;
 static int __init early_parse_elfcorehdr(char *p)
-- 
GitLab


From 21855a6e5b44999fb9f5493cc2c8c9eed5f32876 Mon Sep 17 00:00:00 2001
From: Miodrag Dinic <miodrag.dinic@imgtec.com>
Date: Mon, 19 Jun 2017 17:50:09 +0200
Subject: [PATCH 0506/1958] MIPS: build: Fix "-modd-spreg" switch usage when
 compiling for mips32r6

Add "-modd-spreg" when compiling the kernel for mips32r6 target.

This makes sure the kernel builds properly even with toolchains that
use "-mno-odd-spreg" by default. This is the case with Android gcc.
Prior to this patch, kernel builds using gcc for Android failed with
following error messages, if target architecture is set to mips32r6:

arch/mips/kernel/r4k_switch.S: Assembler messages:
.../r4k_switch.S:210: Error: float register should be even, was 1
.../r4k_switch.S:212: Error: float register should be even, was 3
.../r4k_switch.S:214: Error: float register should be even, was 5
.../r4k_switch.S:216: Error: float register should be even, was 7
.../r4k_switch.S:218: Error: float register should be even, was 9
.../r4k_switch.S:220: Error: float register should be even, was 11
.../r4k_switch.S:222: Error: float register should be even, was 13
.../r4k_switch.S:224: Error: float register should be even, was 15
.../r4k_switch.S:226: Error: float register should be even, was 17
.../r4k_switch.S:228: Error: float register should be even, was 19
.../r4k_switch.S:230: Error: float register should be even, was 21
.../r4k_switch.S:232: Error: float register should be even, was 23
.../r4k_switch.S:234: Error: float register should be even, was 25
.../r4k_switch.S:236: Error: float register should be even, was 27
.../r4k_switch.S:238: Error: float register should be even, was 29
.../r4k_switch.S:240: Error: float register should be even, was 31
make[2]: *** [arch/mips/kernel/r4k_switch.o] Error 1

Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Cc: James.Hogan@imgtec.com
Cc: Paul.Burton@imgtec.com
Cc: Raghu.Gandham@imgtec.com
Cc: Leonid.Yegoshin@imgtec.com
Cc: Douglas.Leung@imgtec.com
Cc: Petar.Jovanovic@imgtec.com
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16509/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 02a1787c888c0..04343625b9292 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -160,7 +160,7 @@ cflags-$(CONFIG_CPU_MIPS32_R1)	+= $(call cc-option,-march=mips32,-mips32 -U_MIPS
 			-Wa,-mips32 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS32_R2)	+= $(call cc-option,-march=mips32r2,-mips32r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS32) \
 			-Wa,-mips32r2 -Wa,--trap
-cflags-$(CONFIG_CPU_MIPS32_R6)	+= -march=mips32r6 -Wa,--trap
+cflags-$(CONFIG_CPU_MIPS32_R6)	+= -march=mips32r6 -Wa,--trap -modd-spreg
 cflags-$(CONFIG_CPU_MIPS64_R1)	+= $(call cc-option,-march=mips64,-mips64 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
 			-Wa,-mips64 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R2)	+= $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
-- 
GitLab


From 3daf281f2ce3b80cc1abf04f355f066e37f69451 Mon Sep 17 00:00:00 2001
From: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Date: Mon, 19 Jun 2017 17:50:10 +0200
Subject: [PATCH 0507/1958] MIPS: R6: Fix PREF instruction usage by memcpy for
 MIPS R6

Disable usage of PREF instruction usage by memcpy for MIPS R6.

MIPS R6 redefines PREF instruction with smaller offset than
ordinary MIPS. However, the memcpy code uses PREF instruction
with offsets bigger than +-256 bytes.

Malta kernels already disable usage of PREF for memcpy.

This was found during adaptation of MIPS R6 for virtual board
used by Android emulator.

Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtech.com>
Cc: James.Hogan@imgtec.com
Cc: Paul.Burton@imgtec.com
Cc: Raghu.Gandham@imgtec.com
Cc: Leonid.Yegoshin@imgtec.com
Cc: Douglas.Leung@imgtec.com
Cc: Petar.Jovanovic@imgtec.com
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16510/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/lib/memcpy.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 3114a2ed1f4e5..03e3304d6ae58 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -28,6 +28,9 @@
 #ifdef CONFIG_MIPS_MALTA
 #undef CONFIG_CPU_HAS_PREFETCH
 #endif
+#ifdef CONFIG_CPU_MIPSR6
+#undef CONFIG_CPU_HAS_PREFETCH
+#endif
 
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-- 
GitLab


From 3f88ec633362efa454ca4ea289d4ad91cd44a976 Mon Sep 17 00:00:00 2001
From: Miodrag Dinic <miodrag.dinic@imgtec.com>
Date: Mon, 19 Jun 2017 17:50:11 +0200
Subject: [PATCH 0508/1958] MIPS: unaligned: Add DSP lwx & lhx missaligned
 access support

Add handling of missaligned access for DSP load instructions
lwx & lhx.

Since DSP instructions share SPECIAL3 opcode with other non-DSP
instructions, necessary logic was inserted for distinguishing
between instructions with SPECIAL3 opcode. For that purpose,
the instruction format for DSP instructions is added to
arch/mips/include/uapi/asm/inst.h.

Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtech.com>
Cc: James.Hogan@imgtec.com
Cc: Paul.Burton@imgtec.com
Cc: Raghu.Gandham@imgtec.com
Cc: Leonid.Yegoshin@imgtec.com
Cc: Douglas.Leung@imgtec.com
Cc: Petar.Jovanovic@imgtec.com
Cc: Goran.Ferenc@imgtec.com
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16511/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/uapi/asm/inst.h |  11 ++
 arch/mips/kernel/unaligned.c      | 174 +++++++++++++++++-------------
 2 files changed, 111 insertions(+), 74 deletions(-)

diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index e5f538584b8e1..d618975359263 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -762,6 +762,16 @@ struct msa_mi10_format {		/* MSA MI10 */
 	;))))))
 };
 
+struct dsp_format {		/* SPEC3 DSP format instructions */
+	__BITFIELD_FIELD(unsigned int opcode : 6,
+	__BITFIELD_FIELD(unsigned int base : 5,
+	__BITFIELD_FIELD(unsigned int index : 5,
+	__BITFIELD_FIELD(unsigned int rd : 5,
+	__BITFIELD_FIELD(unsigned int op : 5,
+	__BITFIELD_FIELD(unsigned int func : 6,
+	;))))))
+};
+
 struct spec3_format {   /* SPEC3 */
 	__BITFIELD_FIELD(unsigned int opcode:6,
 	__BITFIELD_FIELD(unsigned int rs:5,
@@ -1053,6 +1063,7 @@ union mips_instruction {
 	struct b_format b_format;
 	struct ps_format ps_format;
 	struct v_format v_format;
+	struct dsp_format dsp_format;
 	struct spec3_format spec3_format;
 	struct fb_format fb_format;
 	struct fp0_format fp0_format;
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index f806ee56e6393..67946bb98dd02 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -939,88 +939,114 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		 * The remaining opcodes are the ones that are really of
 		 * interest.
 		 */
-#ifdef CONFIG_EVA
 	case spec3_op:
-		/*
-		 * we can land here only from kernel accessing user memory,
-		 * so we need to "switch" the address limit to user space, so
-		 * address check can work properly.
-		 */
-		seg = get_fs();
-		set_fs(USER_DS);
-		switch (insn.spec3_format.func) {
-		case lhe_op:
-			if (!access_ok(VERIFY_READ, addr, 2)) {
-				set_fs(seg);
-				goto sigbus;
-			}
-			LoadHWE(addr, value, res);
-			if (res) {
-				set_fs(seg);
-				goto fault;
-			}
-			compute_return_epc(regs);
-			regs->regs[insn.spec3_format.rt] = value;
-			break;
-		case lwe_op:
-			if (!access_ok(VERIFY_READ, addr, 4)) {
-				set_fs(seg);
-				goto sigbus;
+		if (insn.dsp_format.func == lx_op) {
+			switch (insn.dsp_format.op) {
+			case lwx_op:
+				if (!access_ok(VERIFY_READ, addr, 4))
+					goto sigbus;
+				LoadW(addr, value, res);
+				if (res)
+					goto fault;
+				compute_return_epc(regs);
+				regs->regs[insn.dsp_format.rd] = value;
+				break;
+			case lhx_op:
+				if (!access_ok(VERIFY_READ, addr, 2))
+					goto sigbus;
+				LoadHW(addr, value, res);
+				if (res)
+					goto fault;
+				compute_return_epc(regs);
+				regs->regs[insn.dsp_format.rd] = value;
+				break;
+			default:
+				goto sigill;
 			}
+		}
+#ifdef CONFIG_EVA
+		else {
+			/*
+			 * we can land here only from kernel accessing user
+			 * memory, so we need to "switch" the address limit to
+			 * user space, so that address check can work properly.
+			 */
+			seg = get_fs();
+			set_fs(USER_DS);
+			switch (insn.spec3_format.func) {
+			case lhe_op:
+				if (!access_ok(VERIFY_READ, addr, 2)) {
+					set_fs(seg);
+					goto sigbus;
+				}
+				LoadHWE(addr, value, res);
+				if (res) {
+					set_fs(seg);
+					goto fault;
+				}
+				compute_return_epc(regs);
+				regs->regs[insn.spec3_format.rt] = value;
+				break;
+			case lwe_op:
+				if (!access_ok(VERIFY_READ, addr, 4)) {
+					set_fs(seg);
+					goto sigbus;
+				}
 				LoadWE(addr, value, res);
-			if (res) {
-				set_fs(seg);
-				goto fault;
-			}
-			compute_return_epc(regs);
-			regs->regs[insn.spec3_format.rt] = value;
-			break;
-		case lhue_op:
-			if (!access_ok(VERIFY_READ, addr, 2)) {
-				set_fs(seg);
-				goto sigbus;
-			}
-			LoadHWUE(addr, value, res);
-			if (res) {
-				set_fs(seg);
-				goto fault;
-			}
-			compute_return_epc(regs);
-			regs->regs[insn.spec3_format.rt] = value;
-			break;
-		case she_op:
-			if (!access_ok(VERIFY_WRITE, addr, 2)) {
-				set_fs(seg);
-				goto sigbus;
-			}
-			compute_return_epc(regs);
-			value = regs->regs[insn.spec3_format.rt];
-			StoreHWE(addr, value, res);
-			if (res) {
-				set_fs(seg);
-				goto fault;
-			}
-			break;
-		case swe_op:
-			if (!access_ok(VERIFY_WRITE, addr, 4)) {
-				set_fs(seg);
-				goto sigbus;
-			}
-			compute_return_epc(regs);
-			value = regs->regs[insn.spec3_format.rt];
-			StoreWE(addr, value, res);
-			if (res) {
+				if (res) {
+					set_fs(seg);
+					goto fault;
+				}
+				compute_return_epc(regs);
+				regs->regs[insn.spec3_format.rt] = value;
+				break;
+			case lhue_op:
+				if (!access_ok(VERIFY_READ, addr, 2)) {
+					set_fs(seg);
+					goto sigbus;
+				}
+				LoadHWUE(addr, value, res);
+				if (res) {
+					set_fs(seg);
+					goto fault;
+				}
+				compute_return_epc(regs);
+				regs->regs[insn.spec3_format.rt] = value;
+				break;
+			case she_op:
+				if (!access_ok(VERIFY_WRITE, addr, 2)) {
+					set_fs(seg);
+					goto sigbus;
+				}
+				compute_return_epc(regs);
+				value = regs->regs[insn.spec3_format.rt];
+				StoreHWE(addr, value, res);
+				if (res) {
+					set_fs(seg);
+					goto fault;
+				}
+				break;
+			case swe_op:
+				if (!access_ok(VERIFY_WRITE, addr, 4)) {
+					set_fs(seg);
+					goto sigbus;
+				}
+				compute_return_epc(regs);
+				value = regs->regs[insn.spec3_format.rt];
+				StoreWE(addr, value, res);
+				if (res) {
+					set_fs(seg);
+					goto fault;
+				}
+				break;
+			default:
 				set_fs(seg);
-				goto fault;
+				goto sigill;
 			}
-			break;
-		default:
 			set_fs(seg);
-			goto sigill;
 		}
-		set_fs(seg);
-		break;
 #endif
+		break;
 	case lh_op:
 		if (!access_ok(VERIFY_READ, addr, 2))
 			goto sigbus;
-- 
GitLab


From 6b1e76297c4ad4b906fdf054460e4e56914f6e34 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:33 -0700
Subject: [PATCH 0509/1958] MIPS: cmpxchg: Unify R10000_LLSC_WAR &
 non-R10000_LLSC_WAR cases

Prior to this patch the xchg & cmpxchg functions have duplicated code
which is for all intents & purposes identical apart from use of a
branch-likely instruction in the R10000_LLSC_WAR case & a regular branch
instruction in the non-R10000_LLSC_WAR case.

This patch removes the duplication, declaring a __scbeqz macro to select
the branch instruction suitable for use when checking the result of an
sc instruction & making use of it to unify the 2 cases.

In __xchg_u{32,64}() this means writing the branch in asm, where it was
previously being done in C as a do...while loop for the
non-R10000_LLSC_WAR case. As this is a single instruction, and adds
consistency with the R10000_LLSC_WAR cases & the cmpxchg() code, this
seems worthwhile.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16348/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h | 80 +++++++++------------------------
 1 file changed, 22 insertions(+), 58 deletions(-)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index b71ab4a5fd508..0582c01d229d8 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -13,44 +13,38 @@
 #include <asm/compiler.h>
 #include <asm/war.h>
 
+/*
+ * Using a branch-likely instruction to check the result of an sc instruction
+ * works around a bug present in R10000 CPUs prior to revision 3.0 that could
+ * cause ll-sc sequences to execute non-atomically.
+ */
+#if R10000_LLSC_WAR
+# define __scbeqz "beqzl"
+#else
+# define __scbeqz "beqz"
+#endif
+
 static inline unsigned long __xchg_u32(volatile int * m, unsigned int val)
 {
 	__u32 retval;
 
 	smp_mb__before_llsc();
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
+	if (kernel_uses_llsc) {
 		unsigned long dummy;
 
 		__asm__ __volatile__(
-		"	.set	arch=r4000				\n"
+		"	.set	" MIPS_ISA_ARCH_LEVEL "			\n"
 		"1:	ll	%0, %3			# xchg_u32	\n"
 		"	.set	mips0					\n"
 		"	move	%2, %z4					\n"
-		"	.set	arch=r4000				\n"
+		"	.set	" MIPS_ISA_ARCH_LEVEL "			\n"
 		"	sc	%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
+		"\t" __scbeqz "	%2, 1b					\n"
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m), "=&r" (dummy)
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
 		: "memory");
-	} else if (kernel_uses_llsc) {
-		unsigned long dummy;
-
-		do {
-			__asm__ __volatile__(
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	ll	%0, %3		# xchg_u32	\n"
-			"	.set	mips0				\n"
-			"	move	%2, %z4				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	sc	%2, %1				\n"
-			"	.set	mips0				\n"
-			: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m),
-			  "=&r" (dummy)
-			: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
-			: "memory");
-		} while (unlikely(!dummy));
 	} else {
 		unsigned long flags;
 
@@ -72,34 +66,19 @@ static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val)
 
 	smp_mb__before_llsc();
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
+	if (kernel_uses_llsc) {
 		unsigned long dummy;
 
 		__asm__ __volatile__(
-		"	.set	arch=r4000				\n"
+		"	.set	" MIPS_ISA_ARCH_LEVEL "			\n"
 		"1:	lld	%0, %3			# xchg_u64	\n"
 		"	move	%2, %z4					\n"
 		"	scd	%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
+		"\t" __scbeqz "	%2, 1b					\n"
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m), "=&r" (dummy)
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
 		: "memory");
-	} else if (kernel_uses_llsc) {
-		unsigned long dummy;
-
-		do {
-			__asm__ __volatile__(
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	lld	%0, %3		# xchg_u64	\n"
-			"	move	%2, %z4				\n"
-			"	scd	%2, %1				\n"
-			"	.set	mips0				\n"
-			: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m),
-			  "=&r" (dummy)
-			: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
-			: "memory");
-		} while (unlikely(!dummy));
 	} else {
 		unsigned long flags;
 
@@ -142,24 +121,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 ({									\
 	__typeof(*(m)) __ret;						\
 									\
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {			\
-		__asm__ __volatile__(					\
-		"	.set	push				\n"	\
-		"	.set	noat				\n"	\
-		"	.set	arch=r4000			\n"	\
-		"1:	" ld "	%0, %2		# __cmpxchg_asm \n"	\
-		"	bne	%0, %z3, 2f			\n"	\
-		"	.set	mips0				\n"	\
-		"	move	$1, %z4				\n"	\
-		"	.set	arch=r4000			\n"	\
-		"	" st "	$1, %1				\n"	\
-		"	beqzl	$1, 1b				\n"	\
-		"2:						\n"	\
-		"	.set	pop				\n"	\
-		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
-		: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)		\
-		: "memory");						\
-	} else if (kernel_uses_llsc) {					\
+	if (kernel_uses_llsc) {						\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
@@ -170,7 +132,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 		"	move	$1, %z4				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"	" st "	$1, %1				\n"	\
-		"	beqz	$1, 1b				\n"	\
+		"\t" __scbeqz "	$1, 1b				\n"	\
 		"	.set	pop				\n"	\
 		"2:						\n"	\
 		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
@@ -245,4 +207,6 @@ extern void __cmpxchg_called_with_bad_pointer(void);
 #define cmpxchg64(ptr, o, n) cmpxchg64_local((ptr), (o), (n))
 #endif
 
+#undef __scbeqz
+
 #endif /* __ASM_CMPXCHG_H */
-- 
GitLab


From 5154f3b4194910a4216866e7535c37e5bcb17800 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:34 -0700
Subject: [PATCH 0510/1958] MIPS: cmpxchg: Pull xchg() asm into a macro

Use a macro to generate the 32 & 64 bit variants of the backing code for
xchg(), much as is already done for cmpxchg(). This removes the
duplication that could previously be found in __xchg_u32() &
__xchg_u64().

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16349/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h | 81 ++++++++++++++-------------------
 1 file changed, 33 insertions(+), 48 deletions(-)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 0582c01d229d8..a19359649b60b 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -24,36 +24,43 @@
 # define __scbeqz "beqz"
 #endif
 
+#define __xchg_asm(ld, st, m, val)					\
+({									\
+	__typeof(*(m)) __ret;						\
+									\
+	if (kernel_uses_llsc) {						\
+		__asm__ __volatile__(					\
+		"	.set	push				\n"	\
+		"	.set	noat				\n"	\
+		"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"	\
+		"1:	" ld "	%0, %2		# __xchg_asm	\n"	\
+		"	.set	mips0				\n"	\
+		"	move	$1, %z3				\n"	\
+		"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"	\
+		"	" st "	$1, %1				\n"	\
+		"\t" __scbeqz "	$1, 1b				\n"	\
+		"	.set	pop				\n"	\
+		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
+		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)			\
+		: "memory");						\
+	} else {							\
+		unsigned long __flags;					\
+									\
+		raw_local_irq_save(__flags);				\
+		__ret = *m;						\
+		*m = val;						\
+		raw_local_irq_restore(__flags);				\
+	}								\
+									\
+	__ret;								\
+})
+
 static inline unsigned long __xchg_u32(volatile int * m, unsigned int val)
 {
 	__u32 retval;
 
 	smp_mb__before_llsc();
-
-	if (kernel_uses_llsc) {
-		unsigned long dummy;
-
-		__asm__ __volatile__(
-		"	.set	" MIPS_ISA_ARCH_LEVEL "			\n"
-		"1:	ll	%0, %3			# xchg_u32	\n"
-		"	.set	mips0					\n"
-		"	move	%2, %z4					\n"
-		"	.set	" MIPS_ISA_ARCH_LEVEL "			\n"
-		"	sc	%2, %1					\n"
-		"\t" __scbeqz "	%2, 1b					\n"
-		"	.set	mips0					\n"
-		: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m), "=&r" (dummy)
-		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
-		: "memory");
-	} else {
-		unsigned long flags;
-
-		raw_local_irq_save(flags);
-		retval = *m;
-		*m = val;
-		raw_local_irq_restore(flags);	/* implies memory barrier  */
-	}
-
+	retval = __xchg_asm("ll", "sc", m, val);
 	smp_llsc_mb();
 
 	return retval;
@@ -65,29 +72,7 @@ static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val)
 	__u64 retval;
 
 	smp_mb__before_llsc();
-
-	if (kernel_uses_llsc) {
-		unsigned long dummy;
-
-		__asm__ __volatile__(
-		"	.set	" MIPS_ISA_ARCH_LEVEL "			\n"
-		"1:	lld	%0, %3			# xchg_u64	\n"
-		"	move	%2, %z4					\n"
-		"	scd	%2, %1					\n"
-		"\t" __scbeqz "	%2, 1b					\n"
-		"	.set	mips0					\n"
-		: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m), "=&r" (dummy)
-		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
-		: "memory");
-	} else {
-		unsigned long flags;
-
-		raw_local_irq_save(flags);
-		retval = *m;
-		*m = val;
-		raw_local_irq_restore(flags);	/* implies memory barrier  */
-	}
-
+	retval = __xchg_asm("lld", "scd", m, val);
 	smp_llsc_mb();
 
 	return retval;
-- 
GitLab


From 77299db802d4a7ae43f7ca246adbff0420bc9f5a Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:35 -0700
Subject: [PATCH 0511/1958] MIPS: cmpxchg: Use __compiletime_error() for bad
 cmpxchg() pointers

Our cmpxchg() implementation relies upon generating a call to a function
which doesn't really exist (__cmpxchg_called_with_bad_pointer) to create
a link failure in cases where cmpxchg() is called with a pointer to a
value of an unsupported size.

The __compiletime_error macro can be used to decorate a function such
that a call to it generates a compile-time, rather than a link-time,
error. This patch uses __compiletime_error to cause bad cmpxchg() calls
to error out at compile time rather than link time, allowing errors to
occur more quickly & making it easier to spot where the problem comes
from.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16350/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index a19359649b60b..ee0214e00ab1c 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -137,10 +137,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 })
 
 /*
- * This function doesn't exist, so you'll get a linker error
- * if something tries to do an invalid cmpxchg().
+ * This function doesn't exist, so if it is called you'll either:
+ *
+ * - Get an error at compile-time due to __compiletime_error, if supported by
+ *   your compiler.
+ *
+ * or:
+ *
+ * - Get an error at link-time due to the call to the missing function.
  */
-extern void __cmpxchg_called_with_bad_pointer(void);
+extern void __cmpxchg_called_with_bad_pointer(void)
+	__compiletime_error("Bad argument size for cmpxchg");
 
 #define __cmpxchg(ptr, old, new, pre_barrier, post_barrier)		\
 ({									\
-- 
GitLab


From d15dc68c1143e249c36612c15bf4e930637b47c3 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:36 -0700
Subject: [PATCH 0512/1958] MIPS: cmpxchg: Error out on unsupported xchg()
 calls

xchg() has up until now simply returned the x parameter in cases where
it is called with a pointer to a value of an unsupported size. This will
often cause the calling code to hit a failure path, presuming that the
value of x differs from the content of the memory pointed at by ptr, but
we can do better by producing a compile-time or link-time error such
that unsupported calls to xchg() are detectable earlier than runtime.

This patch does this in the same was as is already done for cmpxchg(),
using a call to a missing function annotated with __compiletime_error().

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16351/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index ee0214e00ab1c..fe652c3e5d8cb 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -24,6 +24,21 @@
 # define __scbeqz "beqz"
 #endif
 
+/*
+ * These functions doesn't exist, so if they are called you'll either:
+ *
+ * - Get an error at compile-time due to __compiletime_error, if supported by
+ *   your compiler.
+ *
+ * or:
+ *
+ * - Get an error at link-time due to the call to the missing function.
+ */
+extern void __cmpxchg_called_with_bad_pointer(void)
+	__compiletime_error("Bad argument size for cmpxchg");
+extern unsigned long __xchg_called_with_bad_pointer(void)
+	__compiletime_error("Bad argument size for xchg");
+
 #define __xchg_asm(ld, st, m, val)					\
 ({									\
 	__typeof(*(m)) __ret;						\
@@ -89,9 +104,9 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 		return __xchg_u32(ptr, x);
 	case 8:
 		return __xchg_u64(ptr, x);
+	default:
+		return __xchg_called_with_bad_pointer();
 	}
-
-	return x;
 }
 
 #define xchg(ptr, x)							\
@@ -136,19 +151,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 	__ret;								\
 })
 
-/*
- * This function doesn't exist, so if it is called you'll either:
- *
- * - Get an error at compile-time due to __compiletime_error, if supported by
- *   your compiler.
- *
- * or:
- *
- * - Get an error at link-time due to the call to the missing function.
- */
-extern void __cmpxchg_called_with_bad_pointer(void)
-	__compiletime_error("Bad argument size for cmpxchg");
-
 #define __cmpxchg(ptr, old, new, pre_barrier, post_barrier)		\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
-- 
GitLab


From 62c6081dca75d6bec1198ed5a1ae50968b323a8c Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:37 -0700
Subject: [PATCH 0513/1958] MIPS: cmpxchg: Drop __xchg_u{32,64} functions

The __xchg_u32() & __xchg_u64() functions now add very little value.
This patch therefore removes them, by:

  - Moving memory barriers out of them & into xchg(), which also removes
    the duplication & readies us to support xchg_relaxed() if we wish to.

  - Calling __xchg_asm() directly from __xchg().

  - Performing the check for CONFIG_64BIT being enabled in the size=8
    case of __xchg().

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16352/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h | 48 ++++++++++++---------------------
 1 file changed, 17 insertions(+), 31 deletions(-)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index fe652c3e5d8cb..e9c1e97bc29da 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -70,40 +70,18 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
 	__ret;								\
 })
 
-static inline unsigned long __xchg_u32(volatile int * m, unsigned int val)
-{
-	__u32 retval;
-
-	smp_mb__before_llsc();
-	retval = __xchg_asm("ll", "sc", m, val);
-	smp_llsc_mb();
-
-	return retval;
-}
-
-#ifdef CONFIG_64BIT
-static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val)
-{
-	__u64 retval;
-
-	smp_mb__before_llsc();
-	retval = __xchg_asm("lld", "scd", m, val);
-	smp_llsc_mb();
-
-	return retval;
-}
-#else
-extern __u64 __xchg_u64_unsupported_on_32bit_kernels(volatile __u64 * m, __u64 val);
-#define __xchg_u64 __xchg_u64_unsupported_on_32bit_kernels
-#endif
-
 static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
 {
 	switch (size) {
 	case 4:
-		return __xchg_u32(ptr, x);
+		return __xchg_asm("ll", "sc", (volatile u32 *)ptr, x);
+
 	case 8:
-		return __xchg_u64(ptr, x);
+		if (!IS_ENABLED(CONFIG_64BIT))
+			return __xchg_called_with_bad_pointer();
+
+		return __xchg_asm("lld", "scd", (volatile u64 *)ptr, x);
+
 	default:
 		return __xchg_called_with_bad_pointer();
 	}
@@ -111,10 +89,18 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 
 #define xchg(ptr, x)							\
 ({									\
+	__typeof__(*(ptr)) __res;					\
+									\
 	BUILD_BUG_ON(sizeof(*(ptr)) & ~0xc);				\
 									\
-	((__typeof__(*(ptr)))						\
-		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))));	\
+	smp_mb__before_llsc();						\
+									\
+	__res = (__typeof__(*(ptr)))					\
+		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)));	\
+									\
+	smp_llsc_mb();							\
+									\
+	__res;								\
 })
 
 #define __cmpxchg_asm(ld, st, m, old, new)				\
-- 
GitLab


From 8263db4d7768448cb06adbbdd14c613a1ea09830 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:38 -0700
Subject: [PATCH 0514/1958] MIPS: cmpxchg: Implement __cmpxchg() as a function

Replace the macro definition of __cmpxchg() with an inline function,
which is easier to read & modify. The cmpxchg() & cmpxchg_local() macros
are adjusted to call the new __cmpxchg() function.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16353/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h | 59 ++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index e9c1e97bc29da..516cb66f066b2 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -34,7 +34,7 @@
  *
  * - Get an error at link-time due to the call to the missing function.
  */
-extern void __cmpxchg_called_with_bad_pointer(void)
+extern unsigned long __cmpxchg_called_with_bad_pointer(void)
 	__compiletime_error("Bad argument size for cmpxchg");
 extern unsigned long __xchg_called_with_bad_pointer(void)
 	__compiletime_error("Bad argument size for xchg");
@@ -137,38 +137,43 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 	__ret;								\
 })
 
-#define __cmpxchg(ptr, old, new, pre_barrier, post_barrier)		\
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, unsigned int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_asm("ll", "sc", (volatile u32 *)ptr, old, new);
+
+	case 8:
+		/* lld/scd are only available for MIPS64 */
+		if (!IS_ENABLED(CONFIG_64BIT))
+			return __cmpxchg_called_with_bad_pointer();
+
+		return __cmpxchg_asm("lld", "scd", (volatile u64 *)ptr, old, new);
+
+	default:
+		return __cmpxchg_called_with_bad_pointer();
+	}
+}
+
+#define cmpxchg_local(ptr, old, new)					\
+	((__typeof__(*(ptr)))						\
+		__cmpxchg((ptr),					\
+			  (unsigned long)(__typeof__(*(ptr)))(old),	\
+			  (unsigned long)(__typeof__(*(ptr)))(new),	\
+			  sizeof(*(ptr))))
+
+#define cmpxchg(ptr, old, new)						\
 ({									\
-	__typeof__(ptr) __ptr = (ptr);					\
-	__typeof__(*(ptr)) __old = (old);				\
-	__typeof__(*(ptr)) __new = (new);				\
-	__typeof__(*(ptr)) __res = 0;					\
-									\
-	pre_barrier;							\
-									\
-	switch (sizeof(*(__ptr))) {					\
-	case 4:								\
-		__res = __cmpxchg_asm("ll", "sc", __ptr, __old, __new); \
-		break;							\
-	case 8:								\
-		if (sizeof(long) == 8) {				\
-			__res = __cmpxchg_asm("lld", "scd", __ptr,	\
-					   __old, __new);		\
-			break;						\
-		}							\
-	default:							\
-		__cmpxchg_called_with_bad_pointer();			\
-		break;							\
-	}								\
+	__typeof__(*(ptr)) __res;					\
 									\
-	post_barrier;							\
+	smp_mb__before_llsc();						\
+	__res = cmpxchg_local((ptr), (old), (new));			\
+	smp_llsc_mb();							\
 									\
 	__res;								\
 })
 
-#define cmpxchg(ptr, old, new)		__cmpxchg(ptr, old, new, smp_mb__before_llsc(), smp_llsc_mb())
-#define cmpxchg_local(ptr, old, new)	__cmpxchg(ptr, old, new, , )
-
 #ifdef CONFIG_64BIT
 #define cmpxchg64_local(ptr, o, n)					\
   ({									\
-- 
GitLab


From b70eb30056dc84568f3d32440d9be6a558025843 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:39 -0700
Subject: [PATCH 0515/1958] MIPS: cmpxchg: Implement 1 byte & 2 byte xchg()

Implement 1 & 2 byte xchg() using read-modify-write atop a 4 byte
cmpxchg(). This allows us to support these atomic operations despite the
MIPS ISA only providing for 4 & 8 byte atomic operations.

This is required in order to support queued spinlocks (qspinlock) in a
later patch, since these make use of a 2 byte xchg() in their slow path.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16354/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h |  9 ++++--
 arch/mips/kernel/Makefile       |  2 +-
 arch/mips/kernel/cmpxchg.c      | 52 +++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 arch/mips/kernel/cmpxchg.c

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 516cb66f066b2..a633bf8456899 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -70,9 +70,16 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
 	__ret;								\
 })
 
+extern unsigned long __xchg_small(volatile void *ptr, unsigned long val,
+				  unsigned int size);
+
 static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
 {
 	switch (size) {
+	case 1:
+	case 2:
+		return __xchg_small(ptr, x, size);
+
 	case 4:
 		return __xchg_asm("ll", "sc", (volatile u32 *)ptr, x);
 
@@ -91,8 +98,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 ({									\
 	__typeof__(*(ptr)) __res;					\
 									\
-	BUILD_BUG_ON(sizeof(*(ptr)) & ~0xc);				\
-									\
 	smp_mb__before_llsc();						\
 									\
 	__res = (__typeof__(*(ptr)))					\
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index f0edd7e8a0b78..46c0581256f1d 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -4,7 +4,7 @@
 
 extra-y		:= head.o vmlinux.lds
 
-obj-y		+= cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
+obj-y		+= cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
 		   process.o prom.o ptrace.o reset.o setup.o signal.o \
 		   syscall.o time.o topology.o traps.o unaligned.o watch.o \
 		   vdso.o cacheinfo.o
diff --git a/arch/mips/kernel/cmpxchg.c b/arch/mips/kernel/cmpxchg.c
new file mode 100644
index 0000000000000..5acfbf9fb2c5a
--- /dev/null
+++ b/arch/mips/kernel/cmpxchg.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2017 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <asm/cmpxchg.h>
+
+unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int size)
+{
+	u32 old32, new32, load32, mask;
+	volatile u32 *ptr32;
+	unsigned int shift;
+
+	/* Check that ptr is naturally aligned */
+	WARN_ON((unsigned long)ptr & (size - 1));
+
+	/* Mask value to the correct size. */
+	mask = GENMASK((size * BITS_PER_BYTE) - 1, 0);
+	val &= mask;
+
+	/*
+	 * Calculate a shift & mask that correspond to the value we wish to
+	 * exchange within the naturally aligned 4 byte integerthat includes
+	 * it.
+	 */
+	shift = (unsigned long)ptr & 0x3;
+	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		shift ^= sizeof(u32) - size;
+	shift *= BITS_PER_BYTE;
+	mask <<= shift;
+
+	/*
+	 * Calculate a pointer to the naturally aligned 4 byte integer that
+	 * includes our byte of interest, and load its value.
+	 */
+	ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3);
+	load32 = *ptr32;
+
+	do {
+		old32 = load32;
+		new32 = (load32 & ~mask) | (val << shift);
+		load32 = cmpxchg(ptr32, old32, new32);
+	} while (load32 != old32);
+
+	return (load32 & mask) >> shift;
+}
-- 
GitLab


From 3ba7f44d2b19166b34031db48ce613d1bddbd384 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:40 -0700
Subject: [PATCH 0516/1958] MIPS: cmpxchg: Implement 1 byte & 2 byte cmpxchg()

Implement support for 1 & 2 byte cmpxchg() using read-modify-write atop
a 4 byte cmpxchg(). This allows us to support these atomic operations
despite the MIPS ISA only providing 4 & 8 byte atomic operations.

This is required in order to support queued rwlocks (qrwlock) in a later
patch, since these make use of a 1 byte cmpxchg() in their slow path.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16355/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h |  7 ++++
 arch/mips/kernel/cmpxchg.c      | 57 +++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index a633bf8456899..a633f61c55454 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -142,10 +142,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 	__ret;								\
 })
 
+extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old,
+				     unsigned long new, unsigned int size);
+
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 				      unsigned long new, unsigned int size)
 {
 	switch (size) {
+	case 1:
+	case 2:
+		return __cmpxchg_small(ptr, old, new, size);
+
 	case 4:
 		return __cmpxchg_asm("ll", "sc", (volatile u32 *)ptr, old, new);
 
diff --git a/arch/mips/kernel/cmpxchg.c b/arch/mips/kernel/cmpxchg.c
index 5acfbf9fb2c5a..7730f1d3434f4 100644
--- a/arch/mips/kernel/cmpxchg.c
+++ b/arch/mips/kernel/cmpxchg.c
@@ -50,3 +50,60 @@ unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int s
 
 	return (load32 & mask) >> shift;
 }
+
+unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old,
+			      unsigned long new, unsigned int size)
+{
+	u32 mask, old32, new32, load32;
+	volatile u32 *ptr32;
+	unsigned int shift;
+	u8 load;
+
+	/* Check that ptr is naturally aligned */
+	WARN_ON((unsigned long)ptr & (size - 1));
+
+	/* Mask inputs to the correct size. */
+	mask = GENMASK((size * BITS_PER_BYTE) - 1, 0);
+	old &= mask;
+	new &= mask;
+
+	/*
+	 * Calculate a shift & mask that correspond to the value we wish to
+	 * compare & exchange within the naturally aligned 4 byte integer
+	 * that includes it.
+	 */
+	shift = (unsigned long)ptr & 0x3;
+	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		shift ^= sizeof(u32) - size;
+	shift *= BITS_PER_BYTE;
+	mask <<= shift;
+
+	/*
+	 * Calculate a pointer to the naturally aligned 4 byte integer that
+	 * includes our byte of interest, and load its value.
+	 */
+	ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3);
+	load32 = *ptr32;
+
+	while (true) {
+		/*
+		 * Ensure the byte we want to exchange matches the expected
+		 * old value, and if not then bail.
+		 */
+		load = (load32 & mask) >> shift;
+		if (load != old)
+			return load;
+
+		/*
+		 * Calculate the old & new values of the naturally aligned
+		 * 4 byte integer that include the byte we want to exchange.
+		 * Attempt to exchange the old value for the new value, and
+		 * return if we succeed.
+		 */
+		old32 = (load32 & ~mask) | (old << shift);
+		new32 = (load32 & ~mask) | (new << shift);
+		load32 = cmpxchg(ptr32, old32, new32);
+		if (load32 == old32)
+			return old;
+	}
+}
-- 
GitLab


From 4843cf8d3be4d45f609c865698b63374ae935042 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:41 -0700
Subject: [PATCH 0517/1958] MIPS: cmpxchg: Rearrange __xchg() arguments to
 match xchg()

The __xchg() function declares its first 2 arguments in reverse order
compared to the xchg() macro, which is confusing & serves no purpose.
Reorder the arguments such that __xchg() & xchg() match.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16356/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index a633f61c55454..903f3bf48419c 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -73,7 +73,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
 extern unsigned long __xchg_small(volatile void *ptr, unsigned long val,
 				  unsigned int size);
 
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
+				   int size)
 {
 	switch (size) {
 	case 1:
@@ -101,7 +102,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 	smp_mb__before_llsc();						\
 									\
 	__res = (__typeof__(*(ptr)))					\
-		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)));	\
+		__xchg((ptr), (unsigned long)(x), sizeof(*(ptr)));	\
 									\
 	smp_llsc_mb();							\
 									\
-- 
GitLab


From 25da4e9dedbfa1630cc87903dcced5b249b936ef Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:42 -0700
Subject: [PATCH 0518/1958] MIPS: Use queued read/write locks (qrwlock)

This patch switches MIPS to make use of generically implemented queued
read/write locks, rather than the custom implementation used previously.
This allows us to drop a whole load of inline assembly, share more
generic code, and is also a performance win.

Results from running the AIM7 short workload on a MIPS Creator Ci40 (ie.
2 core 2 thread interAptiv CPU clocked at 546MHz) with v4.12-rc4
pistachio_defconfig, with ftrace disabled due to a current bug, and both
with & without use of queued rwlocks & spinlocks:

  Forks | v4.12-rc4 | +qlocks  | Change
 -------|-----------|----------|--------
     10 | 52630.32  | 53316.31 | +1.01%
     20 | 51777.80  | 52623.15 | +1.02%
     30 | 51645.92  | 52517.26 | +1.02%
     40 | 51634.88  | 52419.89 | +1.02%
     50 | 51506.75  | 52307.81 | +1.02%
     60 | 51500.74  | 52322.72 | +1.02%
     70 | 51434.81  | 52288.60 | +1.02%
     80 | 51423.22  | 52434.85 | +1.02%
     90 | 51428.65  | 52410.10 | +1.02%

The kernels used for these tests also had my "MIPS: Hardcode cpu_has_*
where known at compile time due to ISA" patch applied, which allows the
kernel_uses_llsc checks in cmpxchg() & xchg() to be optimised away at
compile time.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16357/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                      |   1 +
 arch/mips/include/asm/Kbuild           |   1 +
 arch/mips/include/asm/spinlock.h       | 216 +------------------------
 arch/mips/include/asm/spinlock_types.h |  10 +-
 4 files changed, 4 insertions(+), 224 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 9891a1285f25e..7c2a64b321790 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -11,6 +11,7 @@ config MIPS
 	select ARCH_SUPPORTS_UPROBES
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
+	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 2535c7b4c4821..ae6cb47e9d222 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += mm-arch-hooks.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += qrwlock.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index a8df44d60607b..3e7afff196cd5 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -13,6 +13,7 @@
 
 #include <asm/barrier.h>
 #include <asm/processor.h>
+#include <asm/qrwlock.h>
 #include <asm/compiler.h>
 #include <asm/war.h>
 
@@ -220,221 +221,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 	return tmp;
 }
 
-/*
- * Read-write spinlocks, allowing multiple readers but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts but no interrupt
- * writers. For those circumstances we can "mix" irq-safe locks - any writer
- * needs to get a irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-
-/*
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define arch_read_can_lock(rw)	((rw)->lock >= 0)
-
-/*
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define arch_write_can_lock(rw) (!(rw)->lock)
-
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
-	unsigned int tmp;
-
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	noreorder	# arch_read_lock	\n"
-		"1:	ll	%1, %2					\n"
-		"	bltz	%1, 1b					\n"
-		"	 addu	%1, 1					\n"
-		"	sc	%1, %0					\n"
-		"	beqzl	%1, 1b					\n"
-		"	 nop						\n"
-		"	.set	reorder					\n"
-		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
-		: GCC_OFF_SMALL_ASM() (rw->lock)
-		: "memory");
-	} else {
-		do {
-			__asm__ __volatile__(
-			"1:	ll	%1, %2	# arch_read_lock	\n"
-			"	bltz	%1, 1b				\n"
-			"	 addu	%1, 1				\n"
-			"2:	sc	%1, %0				\n"
-			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
-			: GCC_OFF_SMALL_ASM() (rw->lock)
-			: "memory");
-		} while (unlikely(!tmp));
-	}
-
-	smp_llsc_mb();
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-	unsigned int tmp;
-
-	smp_mb__before_llsc();
-
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"1:	ll	%1, %2		# arch_read_unlock	\n"
-		"	addiu	%1, -1					\n"
-		"	sc	%1, %0					\n"
-		"	beqzl	%1, 1b					\n"
-		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
-		: GCC_OFF_SMALL_ASM() (rw->lock)
-		: "memory");
-	} else {
-		do {
-			__asm__ __volatile__(
-			"1:	ll	%1, %2	# arch_read_unlock	\n"
-			"	addiu	%1, -1				\n"
-			"	sc	%1, %0				\n"
-			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
-			: GCC_OFF_SMALL_ASM() (rw->lock)
-			: "memory");
-		} while (unlikely(!tmp));
-	}
-}
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
-	unsigned int tmp;
-
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	noreorder	# arch_write_lock	\n"
-		"1:	ll	%1, %2					\n"
-		"	bnez	%1, 1b					\n"
-		"	 lui	%1, 0x8000				\n"
-		"	sc	%1, %0					\n"
-		"	beqzl	%1, 1b					\n"
-		"	 nop						\n"
-		"	.set	reorder					\n"
-		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
-		: GCC_OFF_SMALL_ASM() (rw->lock)
-		: "memory");
-	} else {
-		do {
-			__asm__ __volatile__(
-			"1:	ll	%1, %2	# arch_write_lock	\n"
-			"	bnez	%1, 1b				\n"
-			"	 lui	%1, 0x8000			\n"
-			"2:	sc	%1, %0				\n"
-			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
-			: GCC_OFF_SMALL_ASM() (rw->lock)
-			: "memory");
-		} while (unlikely(!tmp));
-	}
-
-	smp_llsc_mb();
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-	smp_mb__before_llsc();
-
-	__asm__ __volatile__(
-	"				# arch_write_unlock	\n"
-	"	sw	$0, %0					\n"
-	: "=m" (rw->lock)
-	: "m" (rw->lock)
-	: "memory");
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-	unsigned int tmp;
-	int ret;
-
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	noreorder	# arch_read_trylock	\n"
-		"	li	%2, 0					\n"
-		"1:	ll	%1, %3					\n"
-		"	bltz	%1, 2f					\n"
-		"	 addu	%1, 1					\n"
-		"	sc	%1, %0					\n"
-		"	.set	reorder					\n"
-		"	beqzl	%1, 1b					\n"
-		"	 nop						\n"
-		__WEAK_LLSC_MB
-		"	li	%2, 1					\n"
-		"2:							\n"
-		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
-		: GCC_OFF_SMALL_ASM() (rw->lock)
-		: "memory");
-	} else {
-		__asm__ __volatile__(
-		"	.set	noreorder	# arch_read_trylock	\n"
-		"	li	%2, 0					\n"
-		"1:	ll	%1, %3					\n"
-		"	bltz	%1, 2f					\n"
-		"	 addu	%1, 1					\n"
-		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
-		"	 nop						\n"
-		"	.set	reorder					\n"
-		__WEAK_LLSC_MB
-		"	li	%2, 1					\n"
-		"2:	.insn						\n"
-		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
-		: GCC_OFF_SMALL_ASM() (rw->lock)
-		: "memory");
-	}
-
-	return ret;
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-	unsigned int tmp;
-	int ret;
-
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	noreorder	# arch_write_trylock	\n"
-		"	li	%2, 0					\n"
-		"1:	ll	%1, %3					\n"
-		"	bnez	%1, 2f					\n"
-		"	 lui	%1, 0x8000				\n"
-		"	sc	%1, %0					\n"
-		"	beqzl	%1, 1b					\n"
-		"	 nop						\n"
-		__WEAK_LLSC_MB
-		"	li	%2, 1					\n"
-		"	.set	reorder					\n"
-		"2:							\n"
-		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
-		: GCC_OFF_SMALL_ASM() (rw->lock)
-		: "memory");
-	} else {
-		do {
-			__asm__ __volatile__(
-			"	ll	%1, %3	# arch_write_trylock	\n"
-			"	li	%2, 0				\n"
-			"	bnez	%1, 2f				\n"
-			"	lui	%1, 0x8000			\n"
-			"	sc	%1, %0				\n"
-			"	li	%2, 1				\n"
-			"2:	.insn					\n"
-			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp),
-			  "=&r" (ret)
-			: GCC_OFF_SMALL_ASM() (rw->lock)
-			: "memory");
-		} while (unlikely(!tmp));
-
-		smp_llsc_mb();
-	}
-
-	return ret;
-}
-
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h
index 9b2528e612c0c..3d38bfad9b490 100644
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ASM_SPINLOCK_TYPES_H
 #define _ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 #include <linux/types.h>
 
 #include <asm/byteorder.h>
@@ -28,10 +24,6 @@ typedef union {
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ .lock = 0 }
 
-typedef struct {
-	volatile unsigned int lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+#include <asm-generic/qrwlock_types.h>
 
 #endif
-- 
GitLab


From 0b17c9670590148656645be57f62f279f0d3ad52 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 9 Jun 2017 17:26:43 -0700
Subject: [PATCH 0519/1958] MIPS: Use queued spinlocks (qspinlock)

This patch switches MIPS to make use of generically implemented queued
spinlocks, rather than the ticket spinlocks used previously. This allows
us to drop a whole load of inline assembly, share more generic code, and
is also a performance win.

Results from running the AIM7 short workload on a MIPS Creator Ci40 (ie.
2 core 2 thread interAptiv CPU clocked at 546MHz) with v4.12-rc4
pistachio_defconfig, with ftrace disabled due to a current bug, and both
with & without use of queued rwlocks & spinlocks:

  Forks | v4.12-rc4 | +qlocks  | Change
 -------|-----------|----------|--------
     10 | 52630.32  | 53316.31 | +1.01%
     20 | 51777.80  | 52623.15 | +1.02%
     30 | 51645.92  | 52517.26 | +1.02%
     40 | 51634.88  | 52419.89 | +1.02%
     50 | 51506.75  | 52307.81 | +1.02%
     60 | 51500.74  | 52322.72 | +1.02%
     70 | 51434.81  | 52288.60 | +1.02%
     80 | 51423.22  | 52434.85 | +1.02%
     90 | 51428.65  | 52410.10 | +1.02%

The kernels used for these tests also had my "MIPS: Hardcode cpu_has_*
where known at compile time due to ISA" patch applied, which allows the
kernel_uses_llsc checks in cmpxchg() & xchg() to be optimised away at
compile time.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16358/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                      |   1 +
 arch/mips/include/asm/Kbuild           |   1 +
 arch/mips/include/asm/spinlock.h       | 210 +------------------------
 arch/mips/include/asm/spinlock_types.h |  24 +--
 4 files changed, 4 insertions(+), 232 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 7c2a64b321790..ef1cb20cfbb37 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -12,6 +12,7 @@ config MIPS
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
 	select ARCH_USE_QUEUED_RWLOCKS
+	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index ae6cb47e9d222..7c8aab23bce8d 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += qrwlock.h
+generic-y += qspinlock.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 3e7afff196cd5..a7d21da16b6a0 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -9,217 +9,9 @@
 #ifndef _ASM_SPINLOCK_H
 #define _ASM_SPINLOCK_H
 
-#include <linux/compiler.h>
-
-#include <asm/barrier.h>
 #include <asm/processor.h>
 #include <asm/qrwlock.h>
-#include <asm/compiler.h>
-#include <asm/war.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations.	 There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * These are fair FIFO ticket locks
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-
-/*
- * Ticket locks are conceptually two parts, one indicating the current head of
- * the queue, and the other indicating the current tail. The lock is acquired
- * by atomically noting the tail and incrementing it by one (thus adding
- * ourself to the queue and noting our position), then waiting until the head
- * becomes equal to the the initial value of the tail.
- */
-
-static inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
-	u32 counters = ACCESS_ONCE(lock->lock);
-
-	return ((counters >> 16) ^ counters) & 0xffff;
-}
-
-static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
-	return lock.h.serving_now == lock.h.ticket;
-}
-
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	u16 owner = READ_ONCE(lock->h.serving_now);
-	smp_rmb();
-	for (;;) {
-		arch_spinlock_t tmp = READ_ONCE(*lock);
-
-		if (tmp.h.serving_now == tmp.h.ticket ||
-		    tmp.h.serving_now != owner)
-			break;
-
-		cpu_relax();
-	}
-	smp_acquire__after_ctrl_dep();
-}
-
-static inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
-	u32 counters = ACCESS_ONCE(lock->lock);
-
-	return (((counters >> 16) - counters) & 0xffff) > 1;
-}
-#define arch_spin_is_contended	arch_spin_is_contended
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	int my_ticket;
-	int tmp;
-	int inc = 0x10000;
-
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__ (
-		"	.set push		# arch_spin_lock	\n"
-		"	.set noreorder					\n"
-		"							\n"
-		"1:	ll	%[ticket], %[ticket_ptr]		\n"
-		"	addu	%[my_ticket], %[ticket], %[inc]		\n"
-		"	sc	%[my_ticket], %[ticket_ptr]		\n"
-		"	beqzl	%[my_ticket], 1b			\n"
-		"	 nop						\n"
-		"	srl	%[my_ticket], %[ticket], 16		\n"
-		"	andi	%[ticket], %[ticket], 0xffff		\n"
-		"	bne	%[ticket], %[my_ticket], 4f		\n"
-		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
-		"2:							\n"
-		"	.subsection 2					\n"
-		"4:	andi	%[ticket], %[ticket], 0xffff		\n"
-		"	sll	%[ticket], 5				\n"
-		"							\n"
-		"6:	bnez	%[ticket], 6b				\n"
-		"	 subu	%[ticket], 1				\n"
-		"							\n"
-		"	lhu	%[ticket], %[serving_now_ptr]		\n"
-		"	beq	%[ticket], %[my_ticket], 2b		\n"
-		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
-		"	b	4b					\n"
-		"	 subu	%[ticket], %[ticket], 1			\n"
-		"	.previous					\n"
-		"	.set pop					\n"
-		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
-		  [serving_now_ptr] "+m" (lock->h.serving_now),
-		  [ticket] "=&r" (tmp),
-		  [my_ticket] "=&r" (my_ticket)
-		: [inc] "r" (inc));
-	} else {
-		__asm__ __volatile__ (
-		"	.set push		# arch_spin_lock	\n"
-		"	.set noreorder					\n"
-		"							\n"
-		"1:	ll	%[ticket], %[ticket_ptr]		\n"
-		"	addu	%[my_ticket], %[ticket], %[inc]		\n"
-		"	sc	%[my_ticket], %[ticket_ptr]		\n"
-		"	beqz	%[my_ticket], 1b			\n"
-		"	 srl	%[my_ticket], %[ticket], 16		\n"
-		"	andi	%[ticket], %[ticket], 0xffff		\n"
-		"	bne	%[ticket], %[my_ticket], 4f		\n"
-		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
-		"2:	.insn						\n"
-		"	.subsection 2					\n"
-		"4:	andi	%[ticket], %[ticket], 0xffff		\n"
-		"	sll	%[ticket], 5				\n"
-		"							\n"
-		"6:	bnez	%[ticket], 6b				\n"
-		"	 subu	%[ticket], 1				\n"
-		"							\n"
-		"	lhu	%[ticket], %[serving_now_ptr]		\n"
-		"	beq	%[ticket], %[my_ticket], 2b		\n"
-		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
-		"	b	4b					\n"
-		"	 subu	%[ticket], %[ticket], 1			\n"
-		"	.previous					\n"
-		"	.set pop					\n"
-		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
-		  [serving_now_ptr] "+m" (lock->h.serving_now),
-		  [ticket] "=&r" (tmp),
-		  [my_ticket] "=&r" (my_ticket)
-		: [inc] "r" (inc));
-	}
-
-	smp_llsc_mb();
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	unsigned int serving_now = lock->h.serving_now + 1;
-	wmb();
-	lock->h.serving_now = (u16)serving_now;
-	nudge_writes();
-}
-
-static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	int tmp, tmp2, tmp3;
-	int inc = 0x10000;
-
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__ (
-		"	.set push		# arch_spin_trylock	\n"
-		"	.set noreorder					\n"
-		"							\n"
-		"1:	ll	%[ticket], %[ticket_ptr]		\n"
-		"	srl	%[my_ticket], %[ticket], 16		\n"
-		"	andi	%[now_serving], %[ticket], 0xffff	\n"
-		"	bne	%[my_ticket], %[now_serving], 3f	\n"
-		"	 addu	%[ticket], %[ticket], %[inc]		\n"
-		"	sc	%[ticket], %[ticket_ptr]		\n"
-		"	beqzl	%[ticket], 1b				\n"
-		"	 li	%[ticket], 1				\n"
-		"2:							\n"
-		"	.subsection 2					\n"
-		"3:	b	2b					\n"
-		"	 li	%[ticket], 0				\n"
-		"	.previous					\n"
-		"	.set pop					\n"
-		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
-		  [ticket] "=&r" (tmp),
-		  [my_ticket] "=&r" (tmp2),
-		  [now_serving] "=&r" (tmp3)
-		: [inc] "r" (inc));
-	} else {
-		__asm__ __volatile__ (
-		"	.set push		# arch_spin_trylock	\n"
-		"	.set noreorder					\n"
-		"							\n"
-		"1:	ll	%[ticket], %[ticket_ptr]		\n"
-		"	srl	%[my_ticket], %[ticket], 16		\n"
-		"	andi	%[now_serving], %[ticket], 0xffff	\n"
-		"	bne	%[my_ticket], %[now_serving], 3f	\n"
-		"	 addu	%[ticket], %[ticket], %[inc]		\n"
-		"	sc	%[ticket], %[ticket_ptr]		\n"
-		"	beqz	%[ticket], 1b				\n"
-		"	 li	%[ticket], 1				\n"
-		"2:	.insn						\n"
-		"	.subsection 2					\n"
-		"3:	b	2b					\n"
-		"	 li	%[ticket], 0				\n"
-		"	.previous					\n"
-		"	.set pop					\n"
-		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
-		  [ticket] "=&r" (tmp),
-		  [my_ticket] "=&r" (tmp2),
-		  [now_serving] "=&r" (tmp3)
-		: [inc] "r" (inc));
-	}
-
-	smp_llsc_mb();
-
-	return tmp;
-}
+#include <asm/qspinlock.h>
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h
index 3d38bfad9b490..177e722eb96cc 100644
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -1,29 +1,7 @@
 #ifndef _ASM_SPINLOCK_TYPES_H
 #define _ASM_SPINLOCK_TYPES_H
 
-#include <linux/types.h>
-
-#include <asm/byteorder.h>
-
-typedef union {
-	/*
-	 * bits	 0..15 : serving_now
-	 * bits 16..31 : ticket
-	 */
-	u32 lock;
-	struct {
-#ifdef __BIG_ENDIAN
-		u16 ticket;
-		u16 serving_now;
-#else
-		u16 serving_now;
-		u16 ticket;
-#endif
-	} h;
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ .lock = 0 }
-
+#include <asm-generic/qspinlock_types.h>
 #include <asm-generic/qrwlock_types.h>
 
 #endif
-- 
GitLab


From 13769ebad0c42738831787e27c7c7f982e7da579 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:05:08 +0100
Subject: [PATCH 0520/1958] MIPS: math-emu: Prevent wrong ISA mode instruction
 emulation

Terminate FPU emulation immediately whenever an ISA mode switch has been
observed.  This is so that we do not interpret machine code in the wrong
mode, for example when a regular MIPS FPU instruction has been placed in
a delay slot of a jump that switches into the MIPS16 mode, as with the
following code (taken from a GCC test suite case):

00400650 <set_fast_math>:
  400650:	3c020100 	lui	v0,0x100
  400654:	03e00008 	jr	ra
  400658:	44c2f800 	ctc1	v0,c1_fcsr
  40065c:	00000000 	nop

[...]

004012d0 <__libc_csu_init>:
  4012d0:	f000 6a02 	li	v0,2
  4012d4:	f150 0b1c 	la	v1,3f9430 <_DYNAMIC-0x6df0>
  4012d8:	f400 3240 	sll	v0,16
  4012dc:	e269      	addu	v0,v1
  4012de:	659a      	move	gp,v0
  4012e0:	f00c 64f6 	save	a0-a2,48,ra,s0-s1
  4012e4:	673c      	move	s1,gp
  4012e6:	f010 9978 	lw	v1,-32744(s1)
  4012ea:	d204      	sw	v0,16(sp)
  4012ec:	eb40      	jalr	v1
  4012ee:	653b      	move	t9,v1
  4012f0:	f010 997c 	lw	v1,-32740(s1)
  4012f4:	f030 9920 	lw	s1,-32736(s1)
  4012f8:	e32f      	subu	v1,s1
  4012fa:	326b      	sra	v0,v1,2
  4012fc:	d206      	sw	v0,24(sp)
  4012fe:	220c      	beqz	v0,401318 <__libc_csu_init+0x48>
  401300:	6800      	li	s0,0
  401302:	99e0      	lw	a3,0(s1)
  401304:	4801      	addiu	s0,1
  401306:	960e      	lw	a2,56(sp)
  401308:	4904      	addiu	s1,4
  40130a:	950d      	lw	a1,52(sp)
  40130c:	940c      	lw	a0,48(sp)
  40130e:	ef40      	jalr	a3
  401310:	653f      	move	t9,a3
  401312:	9206      	lw	v0,24(sp)
  401314:	ea0a      	cmp	v0,s0
  401316:	61f5      	btnez	401302 <__libc_csu_init+0x32>
  401318:	6476      	restore	48,ra,s0-s1
  40131a:	e8a0      	jrc	ra

Here `set_fast_math' is called from `40130e' (`40130f' with the ISA bit)
and emulation triggers for the CTC1 instruction.  As it is in a jump
delay slot emulation continues from `401312' (`401313' with the ISA
bit).  However we have no path to handle MIPS16 FPU code emulation,
because there are no MIPS16 FPU instructions.  So the default emulation
path is taken, interpreting a 32-bit word fetched by `get_user' from
`401313' as a regular MIPS instruction, which is:

  401313:	f5ea0a92	sdc1	$f10,2706(t7)

This makes the FPU emulator proceed with the supposed SDC1 instruction
and consequently makes the program considered here terminate with
SIGSEGV.

A similar although less severe issue exists with pure-microMIPS
processors in the case where similarly an FPU instruction is emulated in
a delay slot of a register jump that (incorrectly) switches into the
regular MIPS mode.  A subsequent instruction fetch from the jump's
target is supposed to cause an Address Error exception, however instead
we proceed with regular MIPS FPU emulation.

For simplicity then, always terminate the emulation loop whenever a mode
change is detected, denoted by an ISA mode bit flip.  As from commit
377cb1b6c16a ("MIPS: Disable MIPS16/microMIPS crap for platforms not
supporting these ASEs.") the result of `get_isa16_mode' can be hardcoded
to 0, so we need to examine the ISA mode bit by hand.

This complements commit 102cedc32a6e ("MIPS: microMIPS: Floating point
support.") which added JALX decoding to FPU emulation.

Fixes: 102cedc32a6e ("MIPS: microMIPS: Floating point support.")
Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 3.9+
Patchwork: https://patchwork.linux-mips.org/patch/16393/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/math-emu/cp1emu.c | 38 +++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index f12fde10c8ad3..261915265942a 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -2524,6 +2524,35 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 	return 0;
 }
 
+/*
+ * Emulate FPU instructions.
+ *
+ * If we use FPU hardware, then we have been typically called to handle
+ * an unimplemented operation, such as where an operand is a NaN or
+ * denormalized.  In that case exit the emulation loop after a single
+ * iteration so as to let hardware execute any subsequent instructions.
+ *
+ * If we have no FPU hardware or it has been disabled, then continue
+ * emulating floating-point instructions until one of these conditions
+ * has occurred:
+ *
+ * - a non-FPU instruction has been encountered,
+ *
+ * - an attempt to emulate has ended with a signal,
+ *
+ * - the ISA mode has been switched.
+ *
+ * We need to terminate the emulation loop if we got switched to the
+ * MIPS16 mode, whether supported or not, so that we do not attempt
+ * to emulate a MIPS16 instruction as a regular MIPS FPU instruction.
+ * Similarly if we got switched to the microMIPS mode and only the
+ * regular MIPS mode is supported, so that we do not attempt to emulate
+ * a microMIPS instruction as a regular MIPS FPU instruction.  Or if
+ * we got switched to the regular MIPS mode and only the microMIPS mode
+ * is supported, so that we do not attempt to emulate a regular MIPS
+ * instruction that should cause an Address Error exception instead.
+ * For simplicity we always terminate upon an ISA mode switch.
+ */
 int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 	int has_fpu, void *__user *fault_addr)
 {
@@ -2609,6 +2638,15 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			break;
 		if (sig)
 			break;
+		/*
+		 * We have to check for the ISA bit explicitly here,
+		 * because `get_isa16_mode' may return 0 if support
+		 * for code compression has been globally disabled,
+		 * or otherwise we may produce the wrong signal or
+		 * even proceed successfully where we must not.
+		 */
+		if ((xcp->cp0_epc ^ prevepc) & 0x1)
+			break;
 
 		cond_resched();
 	} while (xcp->cp0_epc > prevepc);
-- 
GitLab


From a9db101b735a9d49295326ae41f610f6da62b08c Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:06:19 +0100
Subject: [PATCH 0521/1958] MIPS: Actually decode JALX in
 `__compute_return_epc_for_insn'

Complement commit fb6883e5809c ("MIPS: microMIPS: Support handling of
delay slots.") and actually decode the regular MIPS JALX major
instruction opcode, the handling of which has been added with the said
commit for EPC calculation in `__compute_return_epc_for_insn'.

Fixes: fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.")
Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 3.9+
Patchwork: https://patchwork.linux-mips.org/patch/16394/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/branch.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index f702a459a8300..40cc3def36a4f 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -556,6 +556,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	/*
 	 * These are unconditional and in j_format.
 	 */
+	case jalx_op:
 	case jal_op:
 		regs->regs[31] = regs->cp0_epc + 8;
 	case j_op:
-- 
GitLab


From 11a3799dbeb620bf0400b1fda5cc2c6bea55f20a Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:07:34 +0100
Subject: [PATCH 0522/1958] MIPS: Fix unaligned PC interpretation in
 `compute_return_epc'

Fix a regression introduced with commit fb6883e5809c ("MIPS: microMIPS:
Support handling of delay slots.") and defer to `__compute_return_epc'
if the ISA bit is set in EPC with non-MIPS16, non-microMIPS hardware,
which will then arrange for a SIGBUS due to an unaligned instruction
reference.  Returning EPC here is never correct as the API defines this
function's result to be either a negative error code on failure or one
of 0 and BRANCH_LIKELY_TAKEN on success.

Fixes: fb6883e5809c ("MIPS: microMIPS: Support handling of delay slots.")
Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 3.9+
Patchwork: https://patchwork.linux-mips.org/patch/16395/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/branch.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h
index de781cf54bc7a..da80878f2c0dc 100644
--- a/arch/mips/include/asm/branch.h
+++ b/arch/mips/include/asm/branch.h
@@ -74,10 +74,7 @@ static inline int compute_return_epc(struct pt_regs *regs)
 			return __microMIPS_compute_return_epc(regs);
 		if (cpu_has_mips16)
 			return __MIPS16e_compute_return_epc(regs);
-		return regs->cp0_epc;
-	}
-
-	if (!delay_slot(regs)) {
+	} else if (!delay_slot(regs)) {
 		regs->cp0_epc += 4;
 		return 0;
 	}
-- 
GitLab


From 7b82c1058ac1f8f8b9f2b8786b1f710a57a870a8 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:08:29 +0100
Subject: [PATCH 0523/1958] MIPS: Send SIGILL for BPOSGE32 in
 `__compute_return_epc_for_insn'

Fix commit e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") and
send SIGILL rather than SIGBUS whenever an unimplemented BPOSGE32 DSP
ASE instruction has been encountered in `__compute_return_epc_for_insn'
as our Reserved Instruction exception handler would in response to an
attempt to actually execute the instruction.  Sending SIGBUS only makes
sense for the unaligned PC case, since moved to `__compute_return_epc'.
Adjust function documentation accordingly, correct formatting and use
`pr_info' rather than `printk' as the other exit path already does.

Fixes: e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.")
Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 2.6.14+
Patchwork: https://patchwork.linux-mips.org/patch/16396/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/branch.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 40cc3def36a4f..81b5608acd5c8 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -399,7 +399,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs)
  *
  * @regs:	Pointer to pt_regs
  * @insn:	branch instruction to decode
- * @returns:	-EFAULT on error and forces SIGBUS, and on success
+ * @returns:	-EFAULT on error and forces SIGILL, and on success
  *		returns 0 or BRANCH_LIKELY_TAKEN as appropriate after
  *		evaluating the branch.
  *
@@ -832,8 +832,9 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	return ret;
 
 sigill_dsp:
-	printk("%s: DSP branch but not DSP ASE - sending SIGBUS.\n", current->comm);
-	force_sig(SIGBUS, current);
+	pr_info("%s: DSP branch but not DSP ASE - sending SIGILL.\n",
+		current->comm);
+	force_sig(SIGILL, current);
 	return -EFAULT;
 sigill_r6:
 	pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n",
-- 
GitLab


From 1f4edde422961397cf4470b347958c13c6a740bb Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:09:23 +0100
Subject: [PATCH 0524/1958] MIPS: Rename `sigill_r6' to `sigill_r2r6' in
 `__compute_return_epc_for_insn'

Use the more accurate `sigill_r2r6' name for the label used in the case
of sending SIGILL in the absence of the instruction emulator for an
earlier ISA level instruction that has been removed as from the R6 ISA,
so that the `sigill_r6' name is freed for the situation where an R6
instruction is not supposed to be interpreted, because the executing
processor does not support the R6 ISA.

Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 3.19+
Patchwork: https://patchwork.linux-mips.org/patch/16397/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/branch.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 81b5608acd5c8..df5c32b38408d 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -431,7 +431,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 			/* Fall through */
 		case jr_op:
 			if (NO_R6EMU && insn.r_format.func == jr_op)
-				goto sigill_r6;
+				goto sigill_r2r6;
 			regs->cp0_epc = regs->regs[insn.r_format.rs];
 			break;
 		}
@@ -446,7 +446,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		switch (insn.i_format.rt) {
 		case bltzl_op:
 			if (NO_R6EMU)
-				goto sigill_r6;
+				goto sigill_r2r6;
 		case bltz_op:
 			if ((long)regs->regs[insn.i_format.rs] < 0) {
 				epc = epc + 4 + (insn.i_format.simmediate << 2);
@@ -459,7 +459,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 
 		case bgezl_op:
 			if (NO_R6EMU)
-				goto sigill_r6;
+				goto sigill_r2r6;
 		case bgez_op:
 			if ((long)regs->regs[insn.i_format.rs] >= 0) {
 				epc = epc + 4 + (insn.i_format.simmediate << 2);
@@ -574,7 +574,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	 */
 	case beql_op:
 		if (NO_R6EMU)
-			goto sigill_r6;
+			goto sigill_r2r6;
 	case beq_op:
 		if (regs->regs[insn.i_format.rs] ==
 		    regs->regs[insn.i_format.rt]) {
@@ -588,7 +588,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 
 	case bnel_op:
 		if (NO_R6EMU)
-			goto sigill_r6;
+			goto sigill_r2r6;
 	case bne_op:
 		if (regs->regs[insn.i_format.rs] !=
 		    regs->regs[insn.i_format.rt]) {
@@ -602,7 +602,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 
 	case blezl_op: /* not really i_format */
 		if (!insn.i_format.rt && NO_R6EMU)
-			goto sigill_r6;
+			goto sigill_r2r6;
 	case blez_op:
 		/*
 		 * Compact branches for R6 for the
@@ -637,7 +637,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 
 	case bgtzl_op:
 		if (!insn.i_format.rt && NO_R6EMU)
-			goto sigill_r6;
+			goto sigill_r2r6;
 	case bgtz_op:
 		/*
 		 * Compact branches for R6 for the
@@ -836,7 +836,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		current->comm);
 	force_sig(SIGILL, current);
 	return -EFAULT;
-sigill_r6:
+sigill_r2r6:
 	pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n",
 		current->comm);
 	force_sig(SIGILL, current);
-- 
GitLab


From fef40be6da856afead4177aaa9d869a66fb3381f Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:12:53 +0100
Subject: [PATCH 0525/1958] MIPS: Send SIGILL for linked branches in
 `__compute_return_epc_for_insn'

Fix commit 319824eabc3f ("MIPS: kernel: branch: Do not emulate the
branch likelies on MIPS R6") and also send SIGILL rather than returning
-SIGILL for BLTZAL, BLTZALL, BGEZAL and BGEZALL instruction encodings no
longer supported in R6, except where emulated.  Returning -SIGILL is
never correct as the API defines this function's result upon error to be
-EFAULT and a signal actually issued.

Fixes: 319824eabc3f ("MIPS: kernel: branch: Do not emulate the branch likelies on MIPS R6")
Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 3.19+
Patchwork: https://patchwork.linux-mips.org/patch/16398/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/branch.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index df5c32b38408d..64c8360e3d62e 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -473,10 +473,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		case bltzal_op:
 		case bltzall_op:
 			if (NO_R6EMU && (insn.i_format.rs ||
-			    insn.i_format.rt == bltzall_op)) {
-				ret = -SIGILL;
-				break;
-			}
+			    insn.i_format.rt == bltzall_op))
+				goto sigill_r2r6;
 			regs->regs[31] = epc + 8;
 			/*
 			 * OK we are here either because we hit a NAL
@@ -507,10 +505,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		case bgezal_op:
 		case bgezall_op:
 			if (NO_R6EMU && (insn.i_format.rs ||
-			    insn.i_format.rt == bgezall_op)) {
-				ret = -SIGILL;
-				break;
-			}
+			    insn.i_format.rt == bgezall_op))
+				goto sigill_r2r6;
 			regs->regs[31] = epc + 8;
 			/*
 			 * OK we are here either because we hit a BAL
-- 
GitLab


From a60b1a5bf88a250f1a77977c0224e502c901c77b Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:14:12 +0100
Subject: [PATCH 0526/1958] MIPS: Send SIGILL for R6 branches in
 `__compute_return_epc_for_insn'

Fix:

* commit 8467ca0122e2 ("MIPS: Emulate the new MIPS R6 branch compact
(BC) instruction"),

* commit 84fef630127a ("MIPS: Emulate the new MIPS R6 BALC
instruction"),

* commit 69b9a2fd05a3 ("MIPS: Emulate the new MIPS R6 BEQZC and JIC
instructions"),

* commit 28d6f93d201d ("MIPS: Emulate the new MIPS R6 BNEZC and JIALC
instructions"),

* commit c893ce38b265 ("MIPS: Emulate the new MIPS R6 BOVC, BEQC and
BEQZALC instructions")

and send SIGILL rather than returning -SIGILL for R6 branch and jump
instructions.  Returning -SIGILL is never correct as the API defines
this function's result upon error to be -EFAULT and a signal actually
issued.

Fixes: 8467ca0122e2 ("MIPS: Emulate the new MIPS R6 branch compact (BC) instruction")
Fixes: 84fef630127a ("MIPS: Emulate the new MIPS R6 BALC instruction")
Fixes: 69b9a2fd05a3 ("MIPS: Emulate the new MIPS R6 BEQZC and JIC instructions")
Fixes: 28d6f93d201d ("MIPS: Emulate the new MIPS R6 BNEZC and JIALC instructions")
Fixes: c893ce38b265 ("MIPS: Emulate the new MIPS R6 BOVC, BEQC and BEQZALC instructions")
Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 3.19+
Patchwork: https://patchwork.linux-mips.org/patch/16399/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/branch.c | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 64c8360e3d62e..6d536e5581e10 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -771,35 +771,27 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 #else
 	case bc6_op:
 		/* Only valid for MIPS R6 */
-		if (!cpu_has_mips_r6) {
-			ret = -SIGILL;
-			break;
-		}
+		if (!cpu_has_mips_r6)
+			goto sigill_r6;
 		regs->cp0_epc += 8;
 		break;
 	case balc6_op:
-		if (!cpu_has_mips_r6) {
-			ret = -SIGILL;
-			break;
-		}
+		if (!cpu_has_mips_r6)
+			goto sigill_r6;
 		/* Compact branch: BALC */
 		regs->regs[31] = epc + 4;
 		epc += 4 + (insn.i_format.simmediate << 2);
 		regs->cp0_epc = epc;
 		break;
 	case pop66_op:
-		if (!cpu_has_mips_r6) {
-			ret = -SIGILL;
-			break;
-		}
+		if (!cpu_has_mips_r6)
+			goto sigill_r6;
 		/* Compact branch: BEQZC || JIC */
 		regs->cp0_epc += 8;
 		break;
 	case pop76_op:
-		if (!cpu_has_mips_r6) {
-			ret = -SIGILL;
-			break;
-		}
+		if (!cpu_has_mips_r6)
+			goto sigill_r6;
 		/* Compact branch: BNEZC || JIALC */
 		if (!insn.i_format.rs) {
 			/* JIALC: set $31/ra */
@@ -811,10 +803,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	case pop10_op:
 	case pop30_op:
 		/* Only valid for MIPS R6 */
-		if (!cpu_has_mips_r6) {
-			ret = -SIGILL;
-			break;
-		}
+		if (!cpu_has_mips_r6)
+			goto sigill_r6;
 		/*
 		 * Compact branches:
 		 * bovc, beqc, beqzalc, bnvc, bnec, bnezlac
@@ -837,6 +827,11 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		current->comm);
 	force_sig(SIGILL, current);
 	return -EFAULT;
+sigill_r6:
+	pr_info("%s: R6 branch but no MIPSr6 ISA support - sending SIGILL.\n",
+		current->comm);
+	force_sig(SIGILL, current);
+	return -EFAULT;
 }
 EXPORT_SYMBOL_GPL(__compute_return_epc_for_insn);
 
-- 
GitLab


From 27fe2200dad2de8207a694024a7b9037dff1b280 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:15:22 +0100
Subject: [PATCH 0527/1958] MIPS: Fix a typo: s/preset/present/ in r2-to-r6
 emulation error message

This is a user-visible message, so we want it to be spelled correctly.

Fixes: 5f9f41c474be ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6")
Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 3.19+
Patchwork: https://patchwork.linux-mips.org/patch/16400/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/branch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 6d536e5581e10..e53379c689b2d 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -823,7 +823,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	force_sig(SIGILL, current);
 	return -EFAULT;
 sigill_r2r6:
-	pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n",
+	pr_info("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n",
 		current->comm);
 	force_sig(SIGILL, current);
 	return -EFAULT;
-- 
GitLab


From 70f743d141d3b14ca904581dfd1bd50dbe685c4f Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:16:15 +0100
Subject: [PATCH 0528/1958] MIPS: math-emu: For MFHC1/MTHC1 also return SIGILL
 right away

Update commit 1ac944007bed ("MIPS: math-emu: Add mfhc1 & mthc1
support.") and like done throughout `cop1Emulate' for other cases also
for the MFHC1 and MTHC1 instructions return SIGILL right away rather
than jumping to a single `return' statement.

Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16401/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/math-emu/cp1emu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 261915265942a..f08a7b4facb9d 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -1142,7 +1142,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 
 		case mfhc_op:
 			if (!cpu_has_mips_r2_r6)
-				goto sigill;
+				return SIGILL;
 
 			/* copregister rd -> gpr[rt] */
 			if (MIPSInst_RT(ir) != 0) {
@@ -1153,7 +1153,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 
 		case mthc_op:
 			if (!cpu_has_mips_r2_r6)
-				goto sigill;
+				return SIGILL;
 
 			/* copregister rd <- gpr[rt] */
 			SITOHREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir));
@@ -1376,7 +1376,6 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 				xcp->regs[MIPSInst_RS(ir)];
 		break;
 	default:
-sigill:
 		return SIGILL;
 	}
 
-- 
GitLab


From f259fe295ef07aafadf3316f58c4ac4eddfeccf1 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Fri, 16 Jun 2017 00:18:11 +0100
Subject: [PATCH 0529/1958] MIPS: Use `pr_debug' for messages from
 `__compute_return_epc_for_insn'

Reduce the log level for branch emulation error messages issued before
sending SIGILL by `__compute_return_epc_for_insn' as these are triggered
by user software and are not an event that would normally require any
attention.  The same signal sent from elsewhere does not actually leave
any trace in the kernel log at all.

Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16402/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/branch.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index e53379c689b2d..b79ed9af98860 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -818,18 +818,18 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 	return ret;
 
 sigill_dsp:
-	pr_info("%s: DSP branch but not DSP ASE - sending SIGILL.\n",
-		current->comm);
+	pr_debug("%s: DSP branch but not DSP ASE - sending SIGILL.\n",
+		 current->comm);
 	force_sig(SIGILL, current);
 	return -EFAULT;
 sigill_r2r6:
-	pr_info("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n",
-		current->comm);
+	pr_debug("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n",
+		 current->comm);
 	force_sig(SIGILL, current);
 	return -EFAULT;
 sigill_r6:
-	pr_info("%s: R6 branch but no MIPSr6 ISA support - sending SIGILL.\n",
-		current->comm);
+	pr_debug("%s: R6 branch but no MIPSr6 ISA support - sending SIGILL.\n",
+		 current->comm);
 	force_sig(SIGILL, current);
 	return -EFAULT;
 }
-- 
GitLab


From 9b03d8abe06d92195712fd489b2e8983de27fa68 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 14:48:49 -0700
Subject: [PATCH 0530/1958] MIPS: Skip IPI setup if we only have 1 CPU

If we're running on a system with only 1 possible CPU then it makes no
sense to reserve or initialise IPIs since we'll never use them. Avoid
doing so.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16192/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index aba1afb64b620..770d4d1516cbb 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -335,6 +335,9 @@ int mips_smp_ipi_free(const struct cpumask *mask)
 
 static int __init mips_smp_ipi_init(void)
 {
+	if (num_possible_cpus() == 1)
+		return 0;
+
 	mips_smp_ipi_allocate(cpu_possible_mask);
 
 	call_desc = irq_to_desc(call_virq);
-- 
GitLab


From 516db1c61f3fd4328361699a2c74781ab1dbf84c Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 14:48:50 -0700
Subject: [PATCH 0531/1958] MIPS: CM: Avoid per-core locking with CM3 & higher

CM3 provides a GCR_CL_OTHER register per VP, rather than only per core.
This means that we don't need to prevent other VPs within a core from
racing with code that makes use of the core-other register region.

Reduce locking overhead by demoting the per-core spinlock providing
protection for CM2.5 & lower to a per-CPU/per-VP spinlock for CM3 &
higher.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16193/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/mips-cm.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index 659e6d3ae335b..99bb74dd12ce8 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c
@@ -265,15 +265,34 @@ void mips_cm_lock_other(unsigned int core, unsigned int vp)
 	u32 val;
 
 	preempt_disable();
-	curr_core = current_cpu_data.core;
-	spin_lock_irqsave(&per_cpu(cm_core_lock, curr_core),
-			  per_cpu(cm_core_lock_flags, curr_core));
 
 	if (mips_cm_revision() >= CM_REV_CM3) {
 		val = core << CM3_GCR_Cx_OTHER_CORE_SHF;
 		val |= vp << CM3_GCR_Cx_OTHER_VP_SHF;
+
+		/*
+		 * We need to disable interrupts in SMP systems in order to
+		 * ensure that we don't interrupt the caller with code which
+		 * may modify the redirect register. We do so here in a
+		 * slightly obscure way by using a spin lock, since this has
+		 * the neat property of also catching any nested uses of
+		 * mips_cm_lock_other() leading to a deadlock or a nice warning
+		 * with lockdep enabled.
+		 */
+		spin_lock_irqsave(this_cpu_ptr(&cm_core_lock),
+				  *this_cpu_ptr(&cm_core_lock_flags));
 	} else {
 		BUG_ON(vp != 0);
+
+		/*
+		 * We only have a GCR_CL_OTHER per core in systems with
+		 * CM 2.5 & older, so have to ensure other VP(E)s don't
+		 * race with us.
+		 */
+		curr_core = current_cpu_data.core;
+		spin_lock_irqsave(&per_cpu(cm_core_lock, curr_core),
+				  per_cpu(cm_core_lock_flags, curr_core));
+
 		val = core << CM_GCR_Cx_OTHER_CORENUM_SHF;
 	}
 
@@ -288,10 +307,17 @@ void mips_cm_lock_other(unsigned int core, unsigned int vp)
 
 void mips_cm_unlock_other(void)
 {
-	unsigned curr_core = current_cpu_data.core;
+	unsigned int curr_core;
+
+	if (mips_cm_revision() < CM_REV_CM3) {
+		curr_core = current_cpu_data.core;
+		spin_unlock_irqrestore(&per_cpu(cm_core_lock, curr_core),
+				       per_cpu(cm_core_lock_flags, curr_core));
+	} else {
+		spin_unlock_irqrestore(this_cpu_ptr(&cm_core_lock),
+				       *this_cpu_ptr(&cm_core_lock_flags));
+	}
 
-	spin_unlock_irqrestore(&per_cpu(cm_core_lock, curr_core),
-			       per_cpu(cm_core_lock_flags, curr_core));
 	preempt_enable();
 }
 
-- 
GitLab


From 2f93a60c3d829da07764eafd922beb40e7317aa3 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 14:48:51 -0700
Subject: [PATCH 0532/1958] MIPS: CM: WARN on attempt to lock invalid VP, not
 BUG

Rather than using BUG_ON in the case of an invalid attempt to lock
access to a non-zero VP on a pre-CM3 system, use WARN_ON so that we have
even the slightest chance of recovery.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16194/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/mips-cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index 99bb74dd12ce8..cb0c57f860d46 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c
@@ -282,7 +282,7 @@ void mips_cm_lock_other(unsigned int core, unsigned int vp)
 		spin_lock_irqsave(this_cpu_ptr(&cm_core_lock),
 				  *this_cpu_ptr(&cm_core_lock_flags));
 	} else {
-		BUG_ON(vp != 0);
+		WARN_ON(vp != 0);
 
 		/*
 		 * We only have a GCR_CL_OTHER per core in systems with
-- 
GitLab


From c8b7712c34d0604e2540608731bd5e9202c1139e Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 14:48:52 -0700
Subject: [PATCH 0533/1958] MIPS: CPS: Select CONFIG_SYS_SUPPORTS_SCHED_SMT for
 MIPSr6

Prior to MIPSr6 multithreading is only supported if CONFIG_MIPS_MT_SMP
is enabled, so CONFIG_MIPS_MT_SMP selects CONFIG_SYS_SUPPORTS_SCHED_SMT.
With MIPSr6 the CONFIG_MIPS_CPS SMP implementation always supports
multithreading, so have it select CONFIG_SYS_SUPPORTS_SCHED_SMT in order
to allow the scheduler to make better informed decisions on
multithreaded MIPSr6 systems (for example those using I6400 or I6500
CPUs).

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16195/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ef1cb20cfbb37..a986116dabb53 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2381,6 +2381,7 @@ config MIPS_CPS
 	select SMP
 	select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
 	select SYS_SUPPORTS_HOTPLUG_CPU
+	select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6
 	select SYS_SUPPORTS_SMP
 	select WEAK_ORDERING
 	help
-- 
GitLab


From 5570ba2ee920de4e7760a2802b842771845b2c32 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 14:48:53 -0700
Subject: [PATCH 0534/1958] MIPS: CPS: Prevent multi-core with dcache aliasing

Systems using the MIPS Coherence Manager (CM) cannot support multi-core
SMP with dcache aliasing. This is because CPU caches are VIPT, but
interventions in CM-based systems provide only the physical address to
remote caches. This means that interventions may behave incorrectly in
the presence of an aliasing dcache, since the physical address used
when handling an intervention may lead to operation on an aliased cache
line rather than the correct line.

Prevent us from running into this issue by refusing to boot secondary
cores in systems where dcache aliasing may occur.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16196/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp-cps.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 36954ddd0b9f5..90ecd099c4b09 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -142,9 +142,11 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
 
 	/* Warn the user if the CCA prevents multi-core */
 	ncores = mips_cm_numcores();
-	if (cca_unsuitable && ncores > 1) {
-		pr_warn("Using only one core due to unsuitable CCA 0x%x\n",
-			cca);
+	if ((cca_unsuitable || cpu_has_dc_aliases) && ncores > 1) {
+		pr_warn("Using only one core due to %s%s%s\n",
+			cca_unsuitable ? "unsuitable CCA" : "",
+			(cca_unsuitable && cpu_has_dc_aliases) ? " & " : "",
+			cpu_has_dc_aliases ? "dcache aliasing" : "");
 
 		for_each_present_cpu(c) {
 			if (cpu_data[c].core)
-- 
GitLab


From 4ad755c9e39c0eeae16f96b97602f1954f582c66 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 14:48:54 -0700
Subject: [PATCH 0535/1958] MIPS: CPS: Handle cores not powering down more
 gracefully

If we get into a state where a core that ought to power down isn't doing
so then the current result is that another CPU gets stuck inside
cps_cpu_die() waiting for CPU that ought to be powering down to do so.
The best case scenario is that we then trigger RCU stall messages or
lockup messages, but neither makes it particularly clear what's
happening.

Handle this more gracefully by introducing a timeout beyond which we
warn the user that the core didn't power down & stop waiting for it.
This at least allows the CPU running cps_cpu_die() to continue normally,
and hopefully presuming the CPU that powered back up is doing nothing
harmful the system will continue functioning as normal.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16197/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp-cps.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 90ecd099c4b09..f832e99ad4c38 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -490,6 +490,7 @@ static void cps_cpu_die(unsigned int cpu)
 {
 	unsigned core = cpu_data[cpu].core;
 	unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]);
+	ktime_t fail_time;
 	unsigned stat;
 	int err;
 
@@ -516,6 +517,7 @@ static void cps_cpu_die(unsigned int cpu)
 		 * state, the latter happening when a JTAG probe is connected
 		 * in which case the CPC will refuse to power down the core.
 		 */
+		fail_time = ktime_add_ms(ktime_get(), 2000);
 		do {
 			mips_cm_lock_other(core, 0);
 			mips_cpc_lock_other(core);
@@ -523,9 +525,28 @@ static void cps_cpu_die(unsigned int cpu)
 			stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK;
 			mips_cpc_unlock_other();
 			mips_cm_unlock_other();
-		} while (stat != CPC_Cx_STAT_CONF_SEQSTATE_D0 &&
-			 stat != CPC_Cx_STAT_CONF_SEQSTATE_D2 &&
-			 stat != CPC_Cx_STAT_CONF_SEQSTATE_U2);
+
+			if (stat == CPC_Cx_STAT_CONF_SEQSTATE_D0 ||
+			    stat == CPC_Cx_STAT_CONF_SEQSTATE_D2 ||
+			    stat == CPC_Cx_STAT_CONF_SEQSTATE_U2)
+				break;
+
+			/*
+			 * The core ought to have powered down, but didn't &
+			 * now we don't really know what state it's in. It's
+			 * likely that its _pwr_up pin has been wired to logic
+			 * 1 & it powered back up as soon as we powered it
+			 * down...
+			 *
+			 * The best we can do is warn the user & continue in
+			 * the hope that the core is doing nothing harmful &
+			 * might behave properly if we online it later.
+			 */
+			if (WARN(ktime_after(ktime_get(), fail_time),
+				 "CPU%u hasn't powered down, seq. state %u\n",
+				 cpu, stat >> CPC_Cx_STAT_CONF_SEQSTATE_SHF))
+				break;
+		} while (1);
 
 		/* Indicate the core is powered off */
 		bitmap_clear(core_power, core, 1);
-- 
GitLab


From fa7a3b4a7217b40bf58c4f38e5ee573b43a8aa2f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 14:48:55 -0700
Subject: [PATCH 0536/1958] MIPS: CPS: Handle spurious VP starts more
 gracefully

On pre-r6 systems with the MT ASE the CPS SMP code included checks to
halt the VPE running mips_cps_boot_vpes() if its bit in the struct
core_boot_config vpe_mask field is clear. This was largely done in order
to allow us to start arbitrary VPEs within a core despite the fact that
hardware is typically configured to run only VPE0 after powering up a
core. VPE0 would start the desired other VPEs, halt itself, and the fact
that VPE0 started would be largely hidden & irrelevant.

In MIPSr6 multithreading we have control over which VPs start executing
when a core powers up via the cores CPC registers accessed remotely
through the redirect block. For this reason the MIPSr6 multithreading
path in mips_cps_boot_vpes() hasn't bothered up until now to handle
halting the VP running it.

However it is possible to power up cores entirely in hardware by using a
pwr_up pin associated with the core. Unfortunately some systems wire
this pin to a logic 1, which means that it is possible for a core to
power up at a point that software doesn't expect. The result is that we
generally go execute the kernel on a CPU that ought not to be running &
the results can be unpredictable.

Handle this case by stopping VPs that we don't expect to be running in
mips_cps_boot_vpes() - with this change even if a core powers up it will
do nothing useful & all VPs within it will stop running before they
proceed to run general kernel code & do any damage. Ideally we would
produce some sort of warning here, but given the stage of core bringup
this happens at that would be non-trivial. We also will only hit this if
a core starts up after being offlined via hotplug, and when that happens
we will already produce a warning that the CPU didn't power down in
cps_cpu_die() which seems sufficient.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16198/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cps-vec.S | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index a00e87b0256d3..b849fe6aad941 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -22,6 +22,7 @@
 #define GCR_CL_COHERENCE_OFS	0x2008
 #define GCR_CL_ID_OFS		0x2028
 
+#define CPC_CL_VC_STOP_OFS	0x2020
 #define CPC_CL_VC_RUN_OFS	0x2028
 
 .extern mips_cm_base
@@ -376,8 +377,12 @@ LEAF(mips_cps_boot_vpes)
 	PTR_LI	t2, UNCAC_BASE
 	PTR_ADD	t1, t1, t2
 
-	/* Set VC_RUN to the VPE mask */
+	/* Start any other VPs that ought to be running */
 	PTR_S	ta2, CPC_CL_VC_RUN_OFS(t1)
+
+	/* Ensure this VP stops running if it shouldn't be */
+	not	ta2
+	PTR_S	ta2, CPC_CL_VC_STOP_OFS(t1)
 	ehb
 
 #elif defined(CONFIG_MIPS_MT)
-- 
GitLab


From e7bc8557428f069eaa613b3676ea6931c0f7fe43 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 15:38:01 -0700
Subject: [PATCH 0537/1958] MIPS: Add CPU shared FTLB feature detection

Some systems share FTLB RAMs or entries between sibling CPUs (ie.
hardware threads, or VP(E)s, within a core). These properties require
kernel handling in various places. As a start this patch introduces
cpu_has_shared_ftlb_ram & cpu_has_shared_ftlb_entries feature macros
which we set appropriately for I6400 & I6500 CPUs. Further patches will
make use of these macros as appropriate.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16202/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cpu-features.h | 41 ++++++++++++++++++++++++++++
 arch/mips/include/asm/cpu.h          |  4 +++
 arch/mips/kernel/cpu-probe.c         | 11 ++++++++
 3 files changed, 56 insertions(+)

diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 494d382741426..d6ea8e7c51079 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -487,6 +487,47 @@
 # define cpu_has_perf		(cpu_data[0].options & MIPS_CPU_PERF)
 #endif
 
+#if defined(CONFIG_SMP) && defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+/*
+ * Some systems share FTLB RAMs between threads within a core (siblings in
+ * kernel parlance). This means that FTLB entries may become invalid at almost
+ * any point when an entry is evicted due to a sibling thread writing an entry
+ * to the shared FTLB RAM.
+ *
+ * This is only relevant to SMP systems, and the only systems that exhibit this
+ * property implement MIPSr6 or higher so we constrain support for this to
+ * kernels that will run on such systems.
+ */
+# ifndef cpu_has_shared_ftlb_ram
+#  define cpu_has_shared_ftlb_ram \
+	(current_cpu_data.options & MIPS_CPU_SHARED_FTLB_RAM)
+# endif
+
+/*
+ * Some systems take this a step further & share FTLB entries between siblings.
+ * This is implemented as TLB writes happening as usual, but if an entry
+ * written by a sibling exists in the shared FTLB for a translation which would
+ * otherwise cause a TLB refill exception then the CPU will use the entry
+ * written by its sibling rather than triggering a refill & writing a matching
+ * TLB entry for itself.
+ *
+ * This is naturally only valid if a TLB entry is known to be suitable for use
+ * on all siblings in a CPU, and so it only takes effect when MMIDs are in use
+ * rather than ASIDs or when a TLB entry is marked global.
+ */
+# ifndef cpu_has_shared_ftlb_entries
+#  define cpu_has_shared_ftlb_entries \
+	(current_cpu_data.options & MIPS_CPU_SHARED_FTLB_ENTRIES)
+# endif
+#endif /* SMP && __mips_isa_rev >= 6 */
+
+#ifndef cpu_has_shared_ftlb_ram
+# define cpu_has_shared_ftlb_ram 0
+#endif
+#ifndef cpu_has_shared_ftlb_entries
+# define cpu_has_shared_ftlb_entries 0
+#endif
+
 /*
  * Guest capabilities
  */
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 53b8b1f490846..ce798594c868a 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -418,6 +418,10 @@ enum cpu_type_enum {
 #define MIPS_CPU_GUESTID	MBIT_ULL(51)	/* CPU uses VZ ASE GuestID feature */
 #define MIPS_CPU_DRG		MBIT_ULL(52)	/* CPU has VZ Direct Root to Guest (DRG) */
 #define MIPS_CPU_UFR		MBIT_ULL(53)	/* CPU supports User mode FR switching */
+#define MIPS_CPU_SHARED_FTLB_RAM \
+				MBIT_ULL(54)	/* CPU shares FTLB RAM with another */
+#define MIPS_CPU_SHARED_FTLB_ENTRIES \
+				MBIT_ULL(55)	/* CPU shares FTLB entries with another */
 
 /*
  * CPU ASE encodings
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 09462bba629fa..3f0d43ce994ab 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1653,6 +1653,17 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 	decode_configs(c);
 
 	spram_config();
+
+	switch (__get_cpu_type(c->cputype)) {
+	case CPU_I6500:
+		c->options |= MIPS_CPU_SHARED_FTLB_ENTRIES;
+		/* fall-through */
+	case CPU_I6400:
+		c->options |= MIPS_CPU_SHARED_FTLB_RAM;
+		/* fall-through */
+	default:
+		break;
+	}
 }
 
 static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu)
-- 
GitLab


From f39878cc5b09c75d35eaf52131e920b872e3feb4 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 15:38:02 -0700
Subject: [PATCH 0538/1958] MIPS: Handle tlbex-tlbp race condition

In systems where there are multiple actors updating the TLB, the
potential exists for a race condition wherein a CPU hits a TLB exception
but by the time it reaches a TLBP instruction the affected TLB entry may
have been replaced. This can happen if, for example, a CPU shares the
TLB between hardware threads (VPs) within a core and one of them
replaces the entry that another has just taken a TLB exception for.

We handle this race in the case of the Hardware Table Walker (HTW) being
the other actor already, but didn't take into account the potential for
multiple threads racing. Include the code for aborting TLB exception
handling in affected multi-threaded systems, those being the I6400 &
I6500 CPUs which share TLB entries between VPs.

In the case of using RiXi without dedicated exceptions we have never
handled this race even for HTW. This patch adds WARN()s to these cases
which ought never to be hit because all CPUs with either HTW or shared
FTLB RAMs also include dedicated RiXi exceptions, but the WARN()s will
ensure this is always the case.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16203/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/tlbex.c | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index ed1c5297547af..e6499209b81cb 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -2015,6 +2015,26 @@ static void build_r3000_tlb_modify_handler(void)
 }
 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
 
+static bool cpu_has_tlbex_tlbp_race(void)
+{
+	/*
+	 * When a Hardware Table Walker is running it can replace TLB entries
+	 * at any time, leading to a race between it & the CPU.
+	 */
+	if (cpu_has_htw)
+		return true;
+
+	/*
+	 * If the CPU shares FTLB RAM with its siblings then our entry may be
+	 * replaced at any time by a sibling performing a write to the FTLB.
+	 */
+	if (cpu_has_shared_ftlb_ram)
+		return true;
+
+	/* In all other cases there ought to be no race condition to handle */
+	return false;
+}
+
 /*
  * R4000 style TLB load/store/modify handlers.
  */
@@ -2051,7 +2071,7 @@ build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l,
 	iPTE_LW(p, wr.r1, wr.r2); /* get even pte */
 	if (!m4kc_tlbp_war()) {
 		build_tlb_probe_entry(p);
-		if (cpu_has_htw) {
+		if (cpu_has_tlbex_tlbp_race()) {
 			/* race condition happens, leaving */
 			uasm_i_ehb(p);
 			uasm_i_mfc0(p, wr.r3, C0_INDEX);
@@ -2125,6 +2145,14 @@ static void build_r4000_tlb_load_handler(void)
 		}
 		uasm_i_nop(&p);
 
+		/*
+		 * Warn if something may race with us & replace the TLB entry
+		 * before we read it here. Everything with such races should
+		 * also have dedicated RiXi exception handlers, so this
+		 * shouldn't be hit.
+		 */
+		WARN(cpu_has_tlbex_tlbp_race(), "Unhandled race in RiXi path");
+
 		uasm_i_tlbr(&p);
 
 		switch (current_cpu_type()) {
@@ -2192,6 +2220,14 @@ static void build_r4000_tlb_load_handler(void)
 		}
 		uasm_i_nop(&p);
 
+		/*
+		 * Warn if something may race with us & replace the TLB entry
+		 * before we read it here. Everything with such races should
+		 * also have dedicated RiXi exception handlers, so this
+		 * shouldn't be hit.
+		 */
+		WARN(cpu_has_tlbex_tlbp_race(), "Unhandled race in RiXi path");
+
 		uasm_i_tlbr(&p);
 
 		switch (current_cpu_type()) {
-- 
GitLab


From cebf8c0f4f4e378f5e82606023b92ffbb1ad6048 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 15:38:03 -0700
Subject: [PATCH 0539/1958] MIPS: Allow storing pgd in C0_CONTEXT for MIPSr6

CONFIG_MIPS_PGD_C0_CONTEXT, which allows a pointer to the page directory
to be stored in the cop0 Context register when enabled, was previously
only allowed for MIPSr2. MIPSr6 is just as able to make use of it, so
allow it there too.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16204/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a986116dabb53..609986e8b21e0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2073,7 +2073,7 @@ config CPU_SUPPORTS_UNCACHED_ACCELERATED
 	bool
 config MIPS_PGD_C0_CONTEXT
 	bool
-	default y if 64BIT && CPU_MIPSR2 && !CPU_XLP
+	default y if 64BIT && (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP
 
 #
 # Set to y for ptrace access to watch registers.
-- 
GitLab


From 5f930860e70a4761f026d3e3a9f7787eac3c4f60 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 2 Jun 2017 15:38:04 -0700
Subject: [PATCH 0540/1958] MIPS: Use current_cpu_type() in m4kc_tlbp_war()

Use current_cpu_type() to check for 4Kc processors instead of checking
the PRID directly. This will allow for the 4Kc case to be optimised out
of kernels that can't run on 4KC processors, thanks to __get_cpu_type()
and its unreachable() call.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16205/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/tlbex.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index e6499209b81cb..5aadc69c8ce39 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -153,8 +153,7 @@ static int scratchpad_offset(int i)
  */
 static int m4kc_tlbp_war(void)
 {
-	return (current_cpu_data.processor_id & 0xffff00) ==
-	       (PRID_COMP_MIPS | PRID_IMP_4KC);
+	return current_cpu_type() == CPU_4KC;
 }
 
 /* Handle labels (which must be positive integers). */
-- 
GitLab


From 8ec7f15b8cca4f790df5cdf33f26e2926d4ee2fd Mon Sep 17 00:00:00 2001
From: Goran Ferenc <goran.ferenc@imgtec.com>
Date: Wed, 28 Jun 2017 17:55:28 +0200
Subject: [PATCH 0541/1958] MIPS: VDSO: Fix conversions in
 do_monotonic()/do_monotonic_coarse()

Fix incorrect calculation in do_monotonic() and do_monotonic_coarse()
function that in turn caused incorrect values returned by the vdso
version of system call clock_gettime() on mips64 if its system clock
ID parameter was CLOCK_MONOTONIC or CLOCK_MONOTONIC_COARSE.

Consider these variables and their types on mips32 and mips64:

tk->wall_to_monotonic.tv_sec  s64, s64   (kernel/vdso.c)
vdso_data.wall_to_mono_sec    u32, u32   (kernel/vdso.c)
to_mono_sec                   u32, u32   (vdso/gettimeofday.c)
ts->tv_sec                    s32, s64   (vdso/gettimeofday.c)

For mips64 case, u32 vdso_data.wall_to_mono_sec variable is updated
from the 64-bit signed variable tk->wall_to_monotonic.tv_sec
(kernel/vdso.c:76) which is a negative number holding the time passed
from 1970-01-01 to the time boot started. This 64-bit signed value is
currently around 47+ years, in seconds. For instance, let this value
be:

-1489757461

or

11111111111111111111111111111111 10100111001101000001101011101011

By updating 32-bit vdso_data.wall_to_mono_sec variable, we lose upper
32 bits (signed 1's).

to_mono_sec variable is a parameter of do_monotonic() and
do_monotonic_coarse() functions which holds vdso_data.wall_to_mono_sec
value. Its value needs to be added (or subtracted considering it holds
negative value from the tk->wall_to_monotonic.tv_sec) to the current
time passed from 1970-01-01 (ts->tv_sec), which is again something like
47+ years, but increased by the time passed from the boot to the
current time. ts->tv_sec is 32-bit long in case of 32-bit architecture
and 64-bit long in case of 64-bit architecture. Consider the update of
ts->tv_sec (vdso/gettimeofday.c:55 & 167):

ts->tv_sec += to_mono_sec;

mips32 case: This update will be performed correctly, since both
ts->tv_sec and to_mono_sec are 32-bit long and the sign in to_mono_sec
is preserved. Implicit conversion from u32 to s32 will be done
correctly.

mips64 case: This update will be wrong, since the implicit conversion
will not be done correctly. The reason is that the conversion will be
from u32 to s64. This is because to_mono_sec is 32-bit long for both
mips32 and mips64 cases and s64..33 bits of converted to_mono_sec
variable will be zeros.

So, in order to make MIPS64 implementation work properly for
MONOTONIC and MONOTONIC_COARSE clock ids on mips64, the size of
wall_to_mono_sec variable in mips_vdso_data union and respective
parameters in do_monotonic() and do_monotonic_coarse() functions
should be changed from u32 to u64. Because of consistency, this
size change from u32 and u64 is also done for wall_to_mono_nsec
variable and corresponding function parameters.

As far as similar situations for other architectures are concerned,
let's take a look at arm. Arm has two distinct vdso_data structures
for 32-bit & 64-bit cases, and arm's wall_to_mono_sec and
wall_to_mono_nsec are u32 for 32-bit and u64 for 64-bit cases.
On the other hand, MIPS has only one structure (mips_vdso_data),
hence the need for changing the size of above mentioned parameters.

Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com>
Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Cc: Douglas Leung <douglas.leung@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Petar Jovanovic <petar.jovanovic@imgtec.com>
Cc: Raghu Gandham <raghu.gandham@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16638/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/vdso.h  | 4 ++--
 arch/mips/vdso/gettimeofday.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/vdso.h b/arch/mips/include/asm/vdso.h
index 8f4ca5dd992b1..b7cd6cf77b83e 100644
--- a/arch/mips/include/asm/vdso.h
+++ b/arch/mips/include/asm/vdso.h
@@ -79,8 +79,8 @@ union mips_vdso_data {
 	struct {
 		u64 xtime_sec;
 		u64 xtime_nsec;
-		u32 wall_to_mono_sec;
-		u32 wall_to_mono_nsec;
+		u64 wall_to_mono_sec;
+		u64 wall_to_mono_nsec;
 		u32 seq_count;
 		u32 cs_shift;
 		u8 clock_mode;
diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c
index ce89c9e294f93..fd7d433970bf6 100644
--- a/arch/mips/vdso/gettimeofday.c
+++ b/arch/mips/vdso/gettimeofday.c
@@ -39,8 +39,8 @@ static __always_inline int do_monotonic_coarse(struct timespec *ts,
 					       const union mips_vdso_data *data)
 {
 	u32 start_seq;
-	u32 to_mono_sec;
-	u32 to_mono_nsec;
+	u64 to_mono_sec;
+	u64 to_mono_nsec;
 
 	do {
 		start_seq = vdso_data_read_begin(data);
@@ -148,8 +148,8 @@ static __always_inline int do_monotonic(struct timespec *ts,
 {
 	u32 start_seq;
 	u64 ns;
-	u32 to_mono_sec;
-	u32 to_mono_nsec;
+	u64 to_mono_sec;
+	u64 to_mono_nsec;
 
 	do {
 		start_seq = vdso_data_read_begin(data);
-- 
GitLab


From 180902e08f051f72c89ffa366f4e4f7a8e9c753e Mon Sep 17 00:00:00 2001
From: Goran Ferenc <goran.ferenc@imgtec.com>
Date: Wed, 28 Jun 2017 17:55:29 +0200
Subject: [PATCH 0542/1958] MIPS: VDSO: Add implementation of clock_gettime()
 fallback

This patch adds clock_gettime_fallback() function that wraps assembly
invocation of clock_gettime() syscall using __NR_clock_gettime.

This function is used if pure VDSO implementation of clock_gettime()
does not succeed for any reason. For example, it is called if the
clkid parameter of clock_gettime() is not one of the clkids listed
in the switch-case block of the function __vdso_clock_gettime()
(one such case for clkid is CLOCK_BOOTIME).

If syscall invocation via __NR_clock_gettime fails, register a3 will
be set. So, after the syscall, register a3 is tested and the return
value is negated if it's set.

Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com>
Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Cc: Douglas Leung <douglas.leung@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Petar Jovanovic <petar.jovanovic@imgtec.com>
Cc: Raghu Gandham <raghu.gandham@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16639/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/vdso/gettimeofday.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c
index fd7d433970bf6..5f6337545ee2a 100644
--- a/arch/mips/vdso/gettimeofday.c
+++ b/arch/mips/vdso/gettimeofday.c
@@ -20,6 +20,24 @@
 #include <asm/unistd.h>
 #include <asm/vdso.h>
 
+static __always_inline long clock_gettime_fallback(clockid_t _clkid,
+					   struct timespec *_ts)
+{
+	register struct timespec *ts asm("a1") = _ts;
+	register clockid_t clkid asm("a0") = _clkid;
+	register long ret asm("v0");
+	register long nr asm("v0") = __NR_clock_gettime;
+	register long error asm("a3");
+
+	asm volatile(
+	"       syscall\n"
+	: "=r" (ret), "=r" (error)
+	: "r" (clkid), "r" (ts), "r" (nr)
+	: "memory");
+
+	return error ? -ret : ret;
+}
+
 static __always_inline int do_realtime_coarse(struct timespec *ts,
 					      const union mips_vdso_data *data)
 {
@@ -207,7 +225,7 @@ int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
 {
 	const union mips_vdso_data *data = get_vdso_data();
-	int ret;
+	int ret = -1;
 
 	switch (clkid) {
 	case CLOCK_REALTIME_COARSE:
@@ -223,10 +241,11 @@ int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
 		ret = do_monotonic(ts, data);
 		break;
 	default:
-		ret = -ENOSYS;
 		break;
 	}
 
-	/* If we return -ENOSYS libc should fall back to a syscall. */
+	if (ret)
+		ret = clock_gettime_fallback(clkid, ts);
+
 	return ret;
 }
-- 
GitLab


From 0b523a85e134d41f57ddd8c5193bd9f0a5e20b0d Mon Sep 17 00:00:00 2001
From: Goran Ferenc <goran.ferenc@imgtec.com>
Date: Wed, 28 Jun 2017 17:55:30 +0200
Subject: [PATCH 0543/1958] MIPS: VDSO: Add implementation of gettimeofday()
 fallback

This patch adds gettimeofday_fallback() function that wraps assembly
invocation of gettimeofday() syscall using __NR_gettimeofday.

This function is used if pure VDSO implementation gettimeofday()
does not succeed for any reason. Its imeplementation is enclosed in
"#ifdef CONFIG_MIPS_CLOCK_VSYSCALL" to be in sync with the similar
arrangement for __vdso_gettimeofday().

If syscall invocation via __NR_gettimeofday fails, register a3 will
be set. So, after the syscall, register a3 is tested and the return
valuem is negated if it's set.

Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com>
Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Cc: Douglas Leung <douglas.leung@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Petar Jovanovic <petar.jovanovic@imgtec.com>
Cc: Raghu Gandham <raghu.gandham@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16640/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/vdso/gettimeofday.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c
index 5f6337545ee2a..23305bf6c7a26 100644
--- a/arch/mips/vdso/gettimeofday.c
+++ b/arch/mips/vdso/gettimeofday.c
@@ -20,6 +20,28 @@
 #include <asm/unistd.h>
 #include <asm/vdso.h>
 
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
+
+static __always_inline long gettimeofday_fallback(struct timeval *_tv,
+					  struct timezone *_tz)
+{
+	register struct timezone *tz asm("a1") = _tz;
+	register struct timeval *tv asm("a0") = _tv;
+	register long ret asm("v0");
+	register long nr asm("v0") = __NR_gettimeofday;
+	register long error asm("a3");
+
+	asm volatile(
+	"       syscall\n"
+	: "=r" (ret), "=r" (error)
+	: "r" (tv), "r" (tz), "r" (nr)
+	: "memory");
+
+	return error ? -ret : ret;
+}
+
+#endif
+
 static __always_inline long clock_gettime_fallback(clockid_t _clkid,
 					   struct timespec *_ts)
 {
@@ -205,7 +227,7 @@ int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 
 	ret = do_realtime(&ts, data);
 	if (ret)
-		return ret;
+		return gettimeofday_fallback(tv, tz);
 
 	if (tv) {
 		tv->tv_sec = ts.tv_sec;
-- 
GitLab


From bdb94f6e824d5bd1577c3f80cbe0c6b4beab5a5c Mon Sep 17 00:00:00 2001
From: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Date: Wed, 28 Jun 2017 17:55:31 +0200
Subject: [PATCH 0544/1958] MIPS: VDSO: Fix a mismatch between comment and
 preprocessor constant

Sync the comment with its preprocessor constant counterpart.

Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Miodrag Dinic <miodrag.dinic@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Petar Jovanovic <petar.jovanovic@imgtec.com>
Cc: Raghu Gandham <raghu.gandham@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16641/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/vdso/gettimeofday.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c
index 23305bf6c7a26..974276e828b2c 100644
--- a/arch/mips/vdso/gettimeofday.c
+++ b/arch/mips/vdso/gettimeofday.c
@@ -242,7 +242,7 @@ int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 	return 0;
 }
 
-#endif /* CONFIG_CLKSRC_MIPS_GIC */
+#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */
 
 int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
 {
-- 
GitLab


From 1e7a75f74a193f92eb9e5da20d1fb1b388241631 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Fri, 23 Jun 2017 10:43:30 +0530
Subject: [PATCH 0545/1958] char: ipmi: constify bmc_dev_attr_group and
 bmc_device_type

File size before:
   text	   data	    bss	    dec	    hex	filename
  25678	   1024	     92	  26794	   68aa	drivers/char/ipmi/ipmi_msghandler.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  25806	    896	     92	  26794	   68aa	drivers/char/ipmi/ipmi_msghandler.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 49a7e9685e77f..810b138f5897b 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2397,7 +2397,7 @@ static umode_t bmc_dev_attr_is_visible(struct kobject *kobj,
 	return mode;
 }
 
-static struct attribute_group bmc_dev_attr_group = {
+static const struct attribute_group bmc_dev_attr_group = {
 	.attrs		= bmc_dev_attrs,
 	.is_visible	= bmc_dev_attr_is_visible,
 };
@@ -2407,7 +2407,7 @@ static const struct attribute_group *bmc_dev_attr_groups[] = {
 	NULL
 };
 
-static struct device_type bmc_device_type = {
+static const struct device_type bmc_device_type = {
 	.groups		= bmc_dev_attr_groups,
 };
 
-- 
GitLab


From 0d76d6e1eede5f2aa13695cb4c9d763bb3555e3e Mon Sep 17 00:00:00 2001
From: Willy WOLFF <willy.mh.wolff@gmail.com>
Date: Sat, 24 Jun 2017 14:06:03 +0100
Subject: [PATCH 0546/1958] thermal: fix source code documentation for
 parameters

Some parameters are not documented, or not present at all, in thermal
governors code.

Signed-off-by: Willy Wolff <willy.mh.wolff@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/fair_share.c | 1 +
 drivers/thermal/step_wise.c  | 3 +--
 drivers/thermal/user_space.c | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 68bd1b5691185..d3469fbc52072 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -71,6 +71,7 @@ static long get_target_state(struct thermal_zone_device *tz,
 /**
  * fair_share_throttle - throttles devices associated with the given zone
  * @tz - thermal_zone_device
+ * @trip - trip point index
  *
  * Throttling Logic: This uses three parameters to calculate the new
  * throttle state of the cooling devices associated with the given zone.
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index bcef2e7c4ec96..be95826631b72 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -186,8 +186,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 /**
  * step_wise_throttle - throttles devices associated with the given zone
  * @tz - thermal_zone_device
- * @trip - the trip point
- * @trip_type - type of the trip point
+ * @trip - trip point index
  *
  * Throttling Logic: This uses the trend of the thermal zone to throttle.
  * If the thermal zone is 'heating up' this throttles all the cooling
diff --git a/drivers/thermal/user_space.c b/drivers/thermal/user_space.c
index c908150c268df..8e92a06ef48a2 100644
--- a/drivers/thermal/user_space.c
+++ b/drivers/thermal/user_space.c
@@ -24,12 +24,13 @@
 
 #include <linux/thermal.h>
 #include <linux/slab.h>
+
 #include "thermal_core.h"
 
 /**
  * notify_user_space - Notifies user space about thermal events
  * @tz - thermal_zone_device
- * @trip - Trip point index
+ * @trip - trip point index
  *
  * This function notifies the user space through UEvents.
  */
-- 
GitLab


From 86326031e3d4dec828d966c19fe9fe1371d0a0a8 Mon Sep 17 00:00:00 2001
From: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Date: Tue, 30 May 2017 23:14:58 +0530
Subject: [PATCH 0547/1958] Thermal/int340x: Fix few typos and kernel-doc style

This patch fix the few typos in function header of
acpi_parse_trt. Also, fix the typo in kernel debug
message for acpi_parse_art.

Signed-off-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/int340x_thermal/acpi_thermal_rel.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
index 2c2ec7666eb18..51ceb80212a7a 100644
--- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
@@ -62,8 +62,8 @@ static int acpi_thermal_rel_release(struct inode *inode, struct file *file)
  * acpi_parse_trt - Thermal Relationship Table _TRT for passive cooling
  *
  * @handle: ACPI handle of the device contains _TRT
- * @art_count: the number of valid entries resulted from parsing _TRT
- * @artp: pointer to pointer of array of art entries in parsing result
+ * @trt_count: the number of valid entries resulted from parsing _TRT
+ * @trtp: pointer to pointer of array of _TRT entries in parsing result
  * @create_dev: whether to create platform devices for target and source
  *
  */
@@ -208,7 +208,7 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
 		if (art->target) {
 			result = acpi_bus_get_device(art->target, &adev);
 			if (result)
-				pr_warn("Failed to get source ACPI device\n");
+				pr_warn("Failed to get target ACPI device\n");
 		}
 	}
 
-- 
GitLab


From 4ca0e75e46b6ee408bdc334fff449323d476812c Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 6 Jun 2017 16:00:41 -0700
Subject: [PATCH 0548/1958] thermal: int340x: check for sensor when PTYP is
 missing

For INT3403 sensor PTYP field is mandatory. But some platforms didn't
have this field for sensors. This cause load failure for int3403 driver.

This change checks for the presence of _TMP method and if present, then
treats this device as a sensor.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/int340x_thermal/int3403_thermal.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
index c4890c9437eba..8a7f24dd9315e 100644
--- a/drivers/thermal/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c
@@ -238,8 +238,16 @@ static int int3403_add(struct platform_device *pdev)
 	status = acpi_evaluate_integer(priv->adev->handle, "PTYP",
 				       NULL, &priv->type);
 	if (ACPI_FAILURE(status)) {
-		result = -EINVAL;
-		goto err;
+		unsigned long long tmp;
+
+		status = acpi_evaluate_integer(priv->adev->handle, "_TMP",
+					       NULL, &tmp);
+		if (ACPI_FAILURE(status)) {
+			result = -EINVAL;
+			goto err;
+		} else {
+			priv->type = INT3403_TYPE_SENSOR;
+		}
 	}
 
 	platform_set_drvdata(pdev, priv);
-- 
GitLab


From 5cd82b757795228516bf60a0552d1a40fa8adeb2 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Tue, 13 Jun 2017 10:15:26 +0800
Subject: [PATCH 0549/1958] drm/i915/gvt: Make function dpy_reg_mmio_readx safe

The dpy_reg_mmio_read_x functions directly copy 4 bytes data to the
target address with considering the length. If may cause the target
memory corrupted if the requested length less than 4 bytes. Fix it
for safety even we already have some checking to avoid this happen.
And for convince, the 3 functions are merged.

Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 35 ++++++++++++++++-------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 437a35f41b62a..a9114aa1eddb1 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -366,21 +366,24 @@ static int lcpll_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 static int dpy_reg_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
-	*(u32 *)p_data = (1 << 17);
-	return 0;
-}
-
-static int dpy_reg_mmio_read_2(struct intel_vgpu *vgpu, unsigned int offset,
-		void *p_data, unsigned int bytes)
-{
-	*(u32 *)p_data = 3;
-	return 0;
-}
+	switch (offset) {
+	case 0xe651c:
+	case 0xe661c:
+	case 0xe671c:
+	case 0xe681c:
+		vgpu_vreg(vgpu, offset) = 1 << 17;
+		break;
+	case 0xe6c04:
+		vgpu_vreg(vgpu, offset) = 0x3;
+		break;
+	case 0xe6e1c:
+		vgpu_vreg(vgpu, offset) = 0x2f << 16;
+		break;
+	default:
+		return -EINVAL;
+	}
 
-static int dpy_reg_mmio_read_3(struct intel_vgpu *vgpu, unsigned int offset,
-		void *p_data, unsigned int bytes)
-{
-	*(u32 *)p_data = (0x2f << 16);
+	read_vreg(vgpu, offset, p_data, bytes);
 	return 0;
 }
 
@@ -1991,8 +1994,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(0xe661c, D_ALL, dpy_reg_mmio_read, NULL);
 	MMIO_DH(0xe671c, D_ALL, dpy_reg_mmio_read, NULL);
 	MMIO_DH(0xe681c, D_ALL, dpy_reg_mmio_read, NULL);
-	MMIO_DH(0xe6c04, D_ALL, dpy_reg_mmio_read_2, NULL);
-	MMIO_DH(0xe6e1c, D_ALL, dpy_reg_mmio_read_3, NULL);
+	MMIO_DH(0xe6c04, D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(0xe6e1c, D_ALL, dpy_reg_mmio_read, NULL);
 
 	MMIO_RO(PCH_PORT_HOTPLUG, D_ALL, 0,
 		PORTA_HOTPLUG_STATUS_MASK
-- 
GitLab


From 96b5b19459b3c2aed2872bac42cbe19edfae710f Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 28 Jun 2017 18:32:31 -0700
Subject: [PATCH 0550/1958] module: make the modinfo name const

This can be accomplished by making blacklisted() also accept const.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
[jeyu: fix typo]
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 kernel/module.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index afc6ede7bcdf3..d072877075576 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -302,7 +302,7 @@ int unregister_module_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL(unregister_module_notifier);
 
 struct load_info {
-	char *name;
+	const char *name;
 	Elf_Ehdr *hdr;
 	unsigned long len;
 	Elf_Shdr *sechdrs;
@@ -3265,7 +3265,7 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
 
 /* module_blacklist is a comma-separated list of module names */
 static char *module_blacklist;
-static bool blacklisted(char *module_name)
+static bool blacklisted(const char *module_name)
 {
 	const char *p;
 	size_t len;
-- 
GitLab


From 138b87fa43b629ed14429e2e056fe4d5ef132921 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 29 Jun 2017 15:37:20 +0300
Subject: [PATCH 0551/1958] drm: vblank: Fix vblank timestamp update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 3fcdcb270936 ("drm/vblank: Switch to bool in_vblank_irq in
get_vblank_timestamp") inverted a condition by mistake that resulted in
vblank timestamps always being 0 on hardware without a vblank counter.
Fix it.

Fixes: 3fcdcb270936 ("drm/vblank: Switch to bool in_vblank_irq in get_vblank_timestamp")
Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170629123720.27173-1-laurent.pinchart+renesas@ideasonboard.com
---
 drivers/gpu/drm/drm_vblank.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index 463e4d81fb0dd..e9f33cd805dd6 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -242,7 +242,7 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 	 * Otherwise reinitialize delayed at next vblank interrupt and assign 0
 	 * for now, to mark the vblanktimestamp as invalid.
 	 */
-	if (!rc && in_vblank_irq)
+	if (!rc && !in_vblank_irq)
 		t_vblank = (struct timeval) {0, 0};
 
 	store_vblank(dev, pipe, diff, &t_vblank, cur_vblank);
-- 
GitLab


From 4495ec6d770e1bca7a04e93ac453ab6720c56c5d Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 30 Jun 2017 07:18:08 -0500
Subject: [PATCH 0552/1958] ipmi:ssif: Add missing unlock in error branch

When getting flags, a response to a different message would
result in a deadlock because of a missing unlock.  Add that
unlock and a comment.  Found by static analysis.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable@vger.kernel.org # 3.19
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 971ecda336578..0aea3bcb61584 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -766,6 +766,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 			       result, len, data[2]);
 		} else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
 			   || data[1] != IPMI_GET_MSG_FLAGS_CMD) {
+			/*
+			 * Don't abort here, maybe it was a queued
+			 * response to a previous command.
+			 */
+			ipmi_ssif_unlock_cond(ssif_info, flags);
 			pr_warn(PFX "Invalid response getting flags: %x %x\n",
 				data[0], data[1]);
 		} else {
-- 
GitLab


From 3859a271a003aba01e45b85c9d8b355eb7bf25f9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 28 Oct 2016 01:22:25 -0700
Subject: [PATCH 0553/1958] randstruct: Mark various structs for randomization

This marks many critical kernel structures for randomization. These are
structures that have been targeted in the past in security exploits, or
contain functions pointers, pointers to function pointer tables, lists,
workqueues, ref-counters, credentials, permissions, or are otherwise
sensitive. This initial list was extracted from Brad Spengler/PaX Team's
code in the last public patch of grsecurity/PaX based on my understanding
of the code. Changes or omissions from the original code are mine and
don't reflect the original grsecurity/PaX code.

Left out of this list is task_struct, which requires special handling
and will be covered in a subsequent patch.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/include/asm/processor.h |  2 +-
 fs/mount.h                       |  4 ++--
 fs/namei.c                       |  2 +-
 fs/proc/internal.h               |  6 +++---
 include/linux/binfmts.h          |  4 ++--
 include/linux/cdev.h             |  2 +-
 include/linux/cred.h             |  4 ++--
 include/linux/dcache.h           |  2 +-
 include/linux/fs.h               | 17 +++++++++--------
 include/linux/fs_struct.h        |  2 +-
 include/linux/ipc.h              |  2 +-
 include/linux/ipc_namespace.h    |  2 +-
 include/linux/key-type.h         |  4 ++--
 include/linux/kmod.h             |  2 +-
 include/linux/kobject.h          |  2 +-
 include/linux/lsm_hooks.h        |  4 ++--
 include/linux/mm_types.h         |  4 ++--
 include/linux/module.h           |  4 ++--
 include/linux/mount.h            |  2 +-
 include/linux/msg.h              |  2 +-
 include/linux/path.h             |  2 +-
 include/linux/pid_namespace.h    |  2 +-
 include/linux/proc_ns.h          |  2 +-
 include/linux/sched.h            |  2 +-
 include/linux/sched/signal.h     |  2 +-
 include/linux/sem.h              |  2 +-
 include/linux/shm.h              |  2 +-
 include/linux/sysctl.h           |  2 +-
 include/linux/tty.h              |  2 +-
 include/linux/tty_driver.h       |  4 ++--
 include/linux/user_namespace.h   |  2 +-
 include/linux/utsname.h          |  2 +-
 include/net/af_unix.h            |  2 +-
 include/net/neighbour.h          |  2 +-
 include/net/net_namespace.h      |  2 +-
 include/net/sock.h               |  2 +-
 kernel/futex.c                   |  4 ++--
 security/keys/internal.h         |  2 +-
 38 files changed, 57 insertions(+), 56 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402a..e2335edb9fc56 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -129,7 +129,7 @@ struct cpuinfo_x86 {
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 	u32			microcode;
-};
+} __randomize_layout;
 
 struct cpuid_regs {
 	u32 eax, ebx, ecx, edx;
diff --git a/fs/mount.h b/fs/mount.h
index bf1fda6eed8f3..e406b286fba11 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -16,7 +16,7 @@ struct mnt_namespace {
 	u64 event;
 	unsigned int		mounts; /* # of mounts in the namespace */
 	unsigned int		pending_mounts;
-};
+} __randomize_layout;
 
 struct mnt_pcp {
 	int mnt_count;
@@ -68,7 +68,7 @@ struct mount {
 	struct hlist_head mnt_pins;
 	struct fs_pin mnt_umount;
 	struct dentry *mnt_ex_mountpoint;
-};
+} __randomize_layout;
 
 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
 
diff --git a/fs/namei.c b/fs/namei.c
index 6571a5f5112ed..1764620ac383e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -524,7 +524,7 @@ struct nameidata {
 	struct inode	*link_inode;
 	unsigned	root_seq;
 	int		dfd;
-};
+} __randomize_layout;
 
 static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
 {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c5ae09b6c726a..07b16318223fc 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -51,7 +51,7 @@ struct proc_dir_entry {
 	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
 	u8 namelen;
 	char name[];
-};
+} __randomize_layout;
 
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
@@ -70,7 +70,7 @@ struct proc_inode {
 	struct list_head sysctl_inodes;
 	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
-};
+} __randomize_layout;
 
 /*
  * General functions
@@ -279,7 +279,7 @@ struct proc_maps_private {
 #ifdef CONFIG_NUMA
 	struct mempolicy *task_mempolicy;
 #endif
-};
+} __randomize_layout;
 
 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode);
 
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 05488da3aee9d..3ae9013eeaaa4 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -46,7 +46,7 @@ struct linux_binprm {
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
-};
+} __randomize_layout;
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT)
@@ -81,7 +81,7 @@ struct linux_binfmt {
 	int (*load_shlib)(struct file *);
 	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
-};
+} __randomize_layout;
 
 extern void __register_binfmt(struct linux_binfmt *fmt, int insert);
 
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index 408bc09ce497b..cb28eb21e3ca5 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -17,7 +17,7 @@ struct cdev {
 	struct list_head list;
 	dev_t dev;
 	unsigned int count;
-};
+} __randomize_layout;
 
 void cdev_init(struct cdev *, const struct file_operations *);
 
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b03e7d049a64f..82c8a9e1aabbf 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -31,7 +31,7 @@ struct group_info {
 	atomic_t	usage;
 	int		ngroups;
 	kgid_t		gid[0];
-};
+} __randomize_layout;
 
 /**
  * get_group_info - Get a reference to a group info structure
@@ -145,7 +145,7 @@ struct cred {
 	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	struct rcu_head	rcu;		/* RCU deletion hook */
-};
+} __randomize_layout;
 
 extern void __put_cred(struct cred *);
 extern void exit_creds(struct task_struct *);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d2e38dc6172c0..7eb262e13d3c4 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -113,7 +113,7 @@ struct dentry {
 		struct hlist_bl_node d_in_lookup_hash;	/* only for in-lookup ones */
 	 	struct rcu_head d_rcu;
 	} d_u;
-};
+} __randomize_layout;
 
 /*
  * dentry->d_lock spinlock nesting subclasses:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b26542..8f28143486c40 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -275,7 +275,7 @@ struct kiocb {
 	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
 	void			*private;
 	int			ki_flags;
-};
+} __randomize_layout;
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
 {
@@ -392,7 +392,7 @@ struct address_space {
 	gfp_t			gfp_mask;	/* implicit gfp mask for allocations */
 	struct list_head	private_list;	/* ditto */
 	void			*private_data;	/* ditto */
-} __attribute__((aligned(sizeof(long))));
+} __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
 	 * On most architectures that alignment is already the case; but
 	 * must be enforced here for CRIS, to let the least significant bit
@@ -435,7 +435,7 @@ struct block_device {
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
-};
+} __randomize_layout;
 
 /*
  * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
@@ -653,7 +653,7 @@ struct inode {
 #endif
 
 	void			*i_private; /* fs or device private pointer */
-};
+} __randomize_layout;
 
 static inline unsigned int i_blocksize(const struct inode *node)
 {
@@ -868,7 +868,8 @@ struct file {
 	struct list_head	f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
-} __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
+} __randomize_layout
+  __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
 struct file_handle {
 	__u32 handle_bytes;
@@ -1005,7 +1006,7 @@ struct file_lock {
 			int state;		/* state of grant or error if -ve */
 		} afs;
 	} fl_u;
-};
+} __randomize_layout;
 
 struct file_lock_context {
 	spinlock_t		flc_lock;
@@ -1404,7 +1405,7 @@ struct super_block {
 
 	spinlock_t		s_inode_wblist_lock;
 	struct list_head	s_inodes_wb;	/* writeback inodes */
-};
+} __randomize_layout;
 
 /* Helper functions so that in most cases filesystems will
  * not need to deal directly with kuid_t and kgid_t and can
@@ -1690,7 +1691,7 @@ struct file_operations {
 			u64);
 	ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
 			u64);
-};
+} __randomize_layout;
 
 struct inode_operations {
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 0efc3e62843ae..7a026240cbb1b 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -12,7 +12,7 @@ struct fs_struct {
 	int umask;
 	int in_exec;
 	struct path root, pwd;
-};
+} __randomize_layout;
 
 extern struct kmem_cache *fs_cachep;
 
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 71fd92d81b266..ea0eb0b5f98cd 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -20,6 +20,6 @@ struct kern_ipc_perm {
 	umode_t		mode;
 	unsigned long	seq;
 	void		*security;
-} ____cacheline_aligned_in_smp;
+} ____cacheline_aligned_in_smp __randomize_layout;
 
 #endif /* _LINUX_IPC_H */
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 848e5796400e7..65327ee0936b3 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -61,7 +61,7 @@ struct ipc_namespace {
 	struct ucounts *ucounts;
 
 	struct ns_common ns;
-};
+} __randomize_layout;
 
 extern struct ipc_namespace init_ipc_ns;
 extern spinlock_t mq_lock;
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 8496cf64575c6..9520fc3c3b9ab 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -45,7 +45,7 @@ struct key_preparsed_payload {
 	size_t		datalen;	/* Raw datalen */
 	size_t		quotalen;	/* Quota length for proposed payload */
 	time_t		expiry;		/* Expiry time of key */
-};
+} __randomize_layout;
 
 typedef int (*request_key_actor_t)(struct key_construction *key,
 				   const char *op, void *aux);
@@ -158,7 +158,7 @@ struct key_type {
 	/* internal fields */
 	struct list_head	link;		/* link in types list */
 	struct lock_class_key	lock_class;	/* key->sem lock class */
-};
+} __randomize_layout;
 
 extern struct key_type key_type_keyring;
 
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index c4e441e00db57..655082c88fd93 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -64,7 +64,7 @@ struct subprocess_info {
 	int (*init)(struct subprocess_info *info, struct cred *new);
 	void (*cleanup)(struct subprocess_info *info);
 	void *data;
-};
+} __randomize_layout;
 
 extern int
 call_usermodehelper(const char *path, char **argv, char **envp, int wait);
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index ca85cb80e99a6..0845133503178 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -172,7 +172,7 @@ struct kset {
 	spinlock_t list_lock;
 	struct kobject kobj;
 	const struct kset_uevent_ops *uevent_ops;
-};
+} __randomize_layout;
 
 extern void kset_init(struct kset *kset);
 extern int __must_check kset_register(struct kset *kset);
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 080f34e66017a..565163fc9ad43 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1876,7 +1876,7 @@ struct security_hook_heads {
 	struct list_head audit_rule_match;
 	struct list_head audit_rule_free;
 #endif /* CONFIG_AUDIT */
-};
+} __randomize_layout;
 
 /*
  * Security module hook list structure.
@@ -1887,7 +1887,7 @@ struct security_hook_list {
 	struct list_head		*head;
 	union security_list_options	hook;
 	char				*lsm;
-};
+} __randomize_layout;
 
 /*
  * Initializing a security_hook_list structure takes
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 45cdb27791a33..ff151814a02d9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -342,7 +342,7 @@ struct vm_area_struct {
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 #endif
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
-};
+} __randomize_layout;
 
 struct core_thread {
 	struct task_struct *task;
@@ -500,7 +500,7 @@ struct mm_struct {
 	atomic_long_t hugetlb_usage;
 #endif
 	struct work_struct async_put_work;
-};
+} __randomize_layout;
 
 extern struct mm_struct init_mm;
 
diff --git a/include/linux/module.h b/include/linux/module.h
index 21f56393602f2..d93111d7def6c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -45,7 +45,7 @@ struct module_kobject {
 	struct kobject *drivers_dir;
 	struct module_param_attrs *mp;
 	struct completion *kobj_completion;
-};
+} __randomize_layout;
 
 struct module_attribute {
 	struct attribute attr;
@@ -475,7 +475,7 @@ struct module {
 	ctor_fn_t *ctors;
 	unsigned int num_ctors;
 #endif
-} ____cacheline_aligned;
+} ____cacheline_aligned __randomize_layout;
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
 #endif
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 8e0352af06b78..1ce85e6fd95f7 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -67,7 +67,7 @@ struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
 	int mnt_flags;
-};
+} __randomize_layout;
 
 struct file; /* forward dec */
 struct path;
diff --git a/include/linux/msg.h b/include/linux/msg.h
index f3f302f9c1975..a001305f5a799 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -29,7 +29,7 @@ struct msg_queue {
 	struct list_head q_messages;
 	struct list_head q_receivers;
 	struct list_head q_senders;
-};
+} __randomize_layout;
 
 /* Helper routines for sys_msgsnd and sys_msgrcv */
 extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
diff --git a/include/linux/path.h b/include/linux/path.h
index d1372186f4315..cde895cc4af4c 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -7,7 +7,7 @@ struct vfsmount;
 struct path {
 	struct vfsmount *mnt;
 	struct dentry *dentry;
-};
+} __randomize_layout;
 
 extern void path_get(const struct path *);
 extern void path_put(const struct path *);
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index c2a989dee8763..b09136f88cf45 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -52,7 +52,7 @@ struct pid_namespace {
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
 	struct ns_common ns;
-};
+} __randomize_layout;
 
 extern struct pid_namespace init_pid_ns;
 
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 58ab28d81fc2e..06844b54dfc17 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -21,7 +21,7 @@ struct proc_ns_operations {
 	int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
 	struct user_namespace *(*owner)(struct ns_common *ns);
 	struct ns_common *(*get_parent)(struct ns_common *ns);
-};
+} __randomize_layout;
 
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b69fc6502013..f833254fce009 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -408,7 +408,7 @@ struct sched_rt_entity {
 	/* rq "owned" by this entity/group: */
 	struct rt_rq			*my_q;
 #endif
-};
+} __randomize_layout;
 
 struct sched_dl_entity {
 	struct rb_node			rb_node;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c06d63b3a5838..2a0dd40b15dba 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -222,7 +222,7 @@ struct signal_struct {
 	struct mutex cred_guard_mutex;	/* guard against foreign influences on
 					 * credential calculations
 					 * (notably. ptrace) */
-};
+} __randomize_layout;
 
 /*
  * Bits in flags field of signal_struct.
diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9edec926e9d96..23bcbdfad4a6a 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -21,7 +21,7 @@ struct sem_array {
 	int			sem_nsems;	/* no. of semaphores in array */
 	int			complex_count;	/* pending complex operations */
 	unsigned int		use_global_lock;/* >0: global lock required */
-};
+} __randomize_layout;
 
 #ifdef CONFIG_SYSVIPC
 
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 04e8818296251..0fb7061ec54c1 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -22,7 +22,7 @@ struct shmid_kernel /* private to the kernel */
 	/* The task created the shm object.  NULL if the task is dead. */
 	struct task_struct	*shm_creator;
 	struct list_head	shm_clist;	/* list by creator */
-};
+} __randomize_layout;
 
 /* shm_mode upper byte flags */
 #define	SHM_DEST	01000	/* segment will be destroyed on last detach */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 80d07816def05..9ddeef2c03e22 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -117,7 +117,7 @@ struct ctl_table
 	struct ctl_table_poll *poll;
 	void *extra1;
 	void *extra2;
-};
+} __randomize_layout;
 
 struct ctl_node {
 	struct rb_node node;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d07cd2105a6c6..73f8d0977bb0b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -333,7 +333,7 @@ struct tty_struct {
 	/* If the tty has a pending do_SAK, queue it here - akpm */
 	struct work_struct SAK_work;
 	struct tty_port *port;
-};
+} __randomize_layout;
 
 /* Each of a tty's open files has private_data pointing to tty_file_private */
 struct tty_file_private {
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index b742b5e47cc20..00b2213f6a355 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -291,7 +291,7 @@ struct tty_operations {
 	void (*poll_put_char)(struct tty_driver *driver, int line, char ch);
 #endif
 	const struct file_operations *proc_fops;
-};
+} __randomize_layout;
 
 struct tty_driver {
 	int	magic;		/* magic number for this structure */
@@ -325,7 +325,7 @@ struct tty_driver {
 
 	const struct tty_operations *ops;
 	struct list_head tty_drivers;
-};
+} __randomize_layout;
 
 extern struct list_head tty_drivers;
 
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 32354b4b4b2ba..b3575ce291483 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -66,7 +66,7 @@ struct user_namespace {
 #endif
 	struct ucounts		*ucounts;
 	int ucount_max[UCOUNT_COUNTS];
-};
+} __randomize_layout;
 
 struct ucounts {
 	struct hlist_node node;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 60f0bb83b313a..da826ed059cfd 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -26,7 +26,7 @@ struct uts_namespace {
 	struct user_namespace *user_ns;
 	struct ucounts *ucounts;
 	struct ns_common ns;
-};
+} __randomize_layout;
 extern struct uts_namespace init_uts_ns;
 
 #ifdef CONFIG_UTS_NS
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index fd60eccb59a67..64e2a1e24a2cf 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -36,7 +36,7 @@ struct unix_skb_parms {
 	u32			secid;		/* Security ID		*/
 #endif
 	u32			consumed;
-};
+} __randomize_layout;
 
 #define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
 
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e4dd3a2140341..a62959d2b3f7b 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -155,7 +155,7 @@ struct neighbour {
 	struct rcu_head		rcu;
 	struct net_device	*dev;
 	u8			primary_key[0];
-};
+} __randomize_layout;
 
 struct neigh_ops {
 	int			family;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fe80bb48ab1f0..a224196d16ac9 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -147,7 +147,7 @@ struct net {
 #endif
 	struct sock		*diag_nlsk;
 	atomic_t		fnhe_genid;
-};
+} __randomize_layout;
 
 #include <linux/seq_file_net.h>
 
diff --git a/include/net/sock.h b/include/net/sock.h
index f33e3d134e0b7..d349297db9e9e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1113,7 +1113,7 @@ struct proto {
 	atomic_t		socks;
 #endif
 	int			(*diag_destroy)(struct sock *sk, int err);
-};
+} __randomize_layout;
 
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
diff --git a/kernel/futex.c b/kernel/futex.c
index 357348a6cf6b4..5616511abf39d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -212,7 +212,7 @@ struct futex_pi_state {
 	atomic_t refcount;
 
 	union futex_key key;
-};
+} __randomize_layout;
 
 /**
  * struct futex_q - The hashed futex queue entry, one per waiting task
@@ -246,7 +246,7 @@ struct futex_q {
 	struct rt_mutex_waiter *rt_waiter;
 	union futex_key *requeue_pi_key;
 	u32 bitset;
-};
+} __randomize_layout;
 
 static const struct futex_q futex_q_init = {
 	/* list gets initialized in queue_me()*/
diff --git a/security/keys/internal.h b/security/keys/internal.h
index c0f8682eba69a..6494954e9980a 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -197,7 +197,7 @@ struct request_key_auth {
 	void			*callout_info;
 	size_t			callout_len;
 	pid_t			pid;
-};
+} __randomize_layout;
 
 extern struct key_type key_type_request_key_auth;
 extern struct key *request_key_auth_new(struct key *target,
-- 
GitLab


From 29e48ce87f1eaaa4b1fe3d9af90c586ac2d1fb74 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 5 Apr 2017 22:43:33 -0700
Subject: [PATCH 0554/1958] task_struct: Allow randomized layout

This marks most of the layout of task_struct as randomizable, but leaves
thread_info and scheduler state untouched at the start, and thread_struct
untouched at the end.

Other parts of the kernel use unnamed structures, but the 0-day builder
using gcc-4.4 blows up on static initializers. Officially, it's documented
as only working on gcc 4.6 and later, which further confuses me:
	https://gcc.gnu.org/wiki/C11Status
The structure layout randomization already requires gcc 4.7, but instead
of depending on the plugin being enabled, just check the gcc versions
for wider build testing. At Linus's suggestion, the marking is hidden
in a macro to reduce how ugly it looks. Additionally, indenting is left
unchanged since it would make things harder to read.

Randomization of task_struct is modified from Brad Spengler/PaX Team's
code in the last public patch of grsecurity/PaX based on my understanding
of the code. Changes or omissions from the original code are mine and
don't reflect the original grsecurity/PaX code.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler-gcc.h | 13 ++++++++++++-
 include/linux/compiler.h     |  5 +++++
 include/linux/sched.h        | 14 ++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 7deaae3dc87d7..c4a66c036692c 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -231,6 +231,7 @@
 #endif /* GCC_VERSION >= 40500 */
 
 #if GCC_VERSION >= 40600
+
 /*
  * When used with Link Time Optimization, gcc can optimize away C functions or
  * variables which are referenced only from assembly code.  __visible tells the
@@ -238,7 +239,17 @@
  * this.
  */
 #define __visible	__attribute__((externally_visible))
-#endif
+
+/*
+ * RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only
+ * possible since GCC 4.6. To provide as much build testing coverage
+ * as possible, this is used for all GCC 4.6+ builds, and not just on
+ * RANDSTRUCT_PLUGIN builds.
+ */
+#define randomized_struct_fields_start	struct {
+#define randomized_struct_fields_end	} __randomize_layout;
+
+#endif /* GCC_VERSION >= 40600 */
 
 
 #if GCC_VERSION >= 40900 && !defined(__CHECKER__)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 55ee9ee814f8f..0b4ac3e8c63e8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -456,6 +456,11 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 # define __no_randomize_layout
 #endif
 
+#ifndef randomized_struct_fields_start
+# define randomized_struct_fields_start
+# define randomized_struct_fields_end
+#endif
+
 /*
  * Tell gcc if a function is cold. The compiler will assume any path
  * directly leading to the call is unlikely.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f833254fce009..e2ad3531e7fea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -490,6 +490,13 @@ struct task_struct {
 #endif
 	/* -1 unrunnable, 0 runnable, >0 stopped: */
 	volatile long			state;
+
+	/*
+	 * This begins the randomizable portion of task_struct. Only
+	 * scheduling-critical items should be added above here.
+	 */
+	randomized_struct_fields_start
+
 	void				*stack;
 	atomic_t			usage;
 	/* Per task flags (PF_*), defined further below: */
@@ -1051,6 +1058,13 @@ struct task_struct {
 	/* Used by LSM modules for access restriction: */
 	void				*security;
 #endif
+
+	/*
+	 * New fields for task_struct should be added above here, so that
+	 * they are included in the randomized portion of task_struct.
+	 */
+	randomized_struct_fields_end
+
 	/* CPU-specific state of this task: */
 	struct thread_struct		thread;
 
-- 
GitLab


From 8acdf5055974e49d337d51ac7011449cfd7b7d05 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 28 Oct 2016 00:45:16 -0700
Subject: [PATCH 0555/1958] randstruct: opt-out externally exposed function
 pointer structs

Some function pointer structures are used externally to the kernel, like
the paravirt structures. These should never be randomized, so mark them
as such, in preparation for enabling randstruct's automatic selection
of all-function-pointer structures.

These markings are verbatim from Brad Spengler/PaX Team's code in the
last public patch of grsecurity/PaX based on my understanding of the
code. Changes or omissions from the original code are mine and don't
reflect the original grsecurity/PaX code.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/include/asm/cacheflush.h     |  2 +-
 arch/x86/include/asm/paravirt_types.h | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index d69bebf697e76..74504b154256e 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -116,7 +116,7 @@ struct cpu_cache_fns {
 	void (*dma_unmap_area)(const void *, size_t, int);
 
 	void (*dma_flush_range)(const void *, const void *);
-};
+} __no_randomize_layout;
 
 /*
  * Select the calling method
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7465d6fe336f5..96c7e3cf43fae 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -83,7 +83,7 @@ struct pv_init_ops {
 	 */
 	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
 			  unsigned long addr, unsigned len);
-};
+} __no_randomize_layout;
 
 
 struct pv_lazy_ops {
@@ -91,12 +91,12 @@ struct pv_lazy_ops {
 	void (*enter)(void);
 	void (*leave)(void);
 	void (*flush)(void);
-};
+} __no_randomize_layout;
 
 struct pv_time_ops {
 	unsigned long long (*sched_clock)(void);
 	unsigned long long (*steal_clock)(int cpu);
-};
+} __no_randomize_layout;
 
 struct pv_cpu_ops {
 	/* hooks for various privileged instructions */
@@ -175,7 +175,7 @@ struct pv_cpu_ops {
 
 	void (*start_context_switch)(struct task_struct *prev);
 	void (*end_context_switch)(struct task_struct *next);
-};
+} __no_randomize_layout;
 
 struct pv_irq_ops {
 	/*
@@ -198,7 +198,7 @@ struct pv_irq_ops {
 #ifdef CONFIG_X86_64
 	void (*adjust_exception_frame)(void);
 #endif
-};
+} __no_randomize_layout;
 
 struct pv_mmu_ops {
 	unsigned long (*read_cr2)(void);
@@ -306,7 +306,7 @@ struct pv_mmu_ops {
 	   an mfn.  We can tell which is which from the index. */
 	void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
 			   phys_addr_t phys, pgprot_t flags);
-};
+} __no_randomize_layout;
 
 struct arch_spinlock;
 #ifdef CONFIG_SMP
@@ -323,7 +323,7 @@ struct pv_lock_ops {
 	void (*kick)(int cpu);
 
 	struct paravirt_callee_save vcpu_is_preempted;
-};
+} __no_randomize_layout;
 
 /* This contains all the paravirt structures: we get a convenient
  * number for each function using the offset which we use to indicate
@@ -335,7 +335,7 @@ struct paravirt_patch_template {
 	struct pv_irq_ops pv_irq_ops;
 	struct pv_mmu_ops pv_mmu_ops;
 	struct pv_lock_ops pv_lock_ops;
-};
+} __no_randomize_layout;
 
 extern struct pv_info pv_info;
 extern struct pv_init_ops pv_init_ops;
-- 
GitLab


From e3bdc8d7623d5875403ad40443e7b049ae200fcd Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 6 Jun 2017 15:12:37 +0530
Subject: [PATCH 0556/1958] thermal: imx: Handle return value of
 clk_prepare_enable

clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/imx_thermal.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index f7ec39f46ee4d..4798b4b1fd77f 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -660,8 +660,11 @@ static int imx_thermal_resume(struct device *dev)
 {
 	struct imx_thermal_data *data = dev_get_drvdata(dev);
 	struct regmap *map = data->tempmon;
+	int ret;
 
-	clk_prepare_enable(data->thermal_clk);
+	ret = clk_prepare_enable(data->thermal_clk);
+	if (ret)
+		return ret;
 	/* Enabled thermal sensor after resume */
 	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
 	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
-- 
GitLab


From 919054fdfc8adf58c5512fe9872eb53ea0f5525d Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 6 Jun 2017 15:04:46 +0530
Subject: [PATCH 0557/1958] thermal: hisilicon: Handle return value of
 clk_prepare_enable

clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/hisi_thermal.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index f6429666a1cfe..9c3ce341eb975 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -397,8 +397,11 @@ static int hisi_thermal_suspend(struct device *dev)
 static int hisi_thermal_resume(struct device *dev)
 {
 	struct hisi_thermal_data *data = dev_get_drvdata(dev);
+	int ret;
 
-	clk_prepare_enable(data->clk);
+	ret = clk_prepare_enable(data->clk);
+	if (ret)
+		return ret;
 
 	data->irq_enabled = true;
 	hisi_thermal_enable_bind_irq_sensor(data);
-- 
GitLab


From 1fe3854a83b580727c9464b37b62ba77ead1d6f6 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 14 Jun 2017 12:13:27 +0300
Subject: [PATCH 0558/1958] thermal: bcm2835: fix an error code in probe()

This causes a static checker because we're passing a valid pointer to
PTR_ERR().  "err" is already the correct error code, so we can just
delete this line.

Fixes: bcb7dd9ef206 ("thermal: bcm2835: add thermal driver for bcm2835 SoC")
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/broadcom/bcm2835_thermal.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c
index 0ecf80890c84f..e6863c8416627 100644
--- a/drivers/thermal/broadcom/bcm2835_thermal.c
+++ b/drivers/thermal/broadcom/bcm2835_thermal.c
@@ -245,7 +245,6 @@ static int bcm2835_thermal_probe(struct platform_device *pdev)
 		 */
 		err = tz->ops->get_trip_temp(tz, 0, &trip_temp);
 		if (err < 0) {
-			err = PTR_ERR(tz);
 			dev_err(&pdev->dev,
 				"Not able to read trip_temp: %d\n",
 				err);
-- 
GitLab


From d04c241c669209667dd8f44335591246e601139a Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Fri, 30 Jun 2017 09:46:07 -0700
Subject: [PATCH 0559/1958] xfs: Check for m_errortag initialization in
 xfs_errortag_test

While adding error injection into IO completion, I notice the lack of
initialization check in xfs_errortag_test(), make the error injection
mechanism unable to be used there.

IO completion is executed a few times before the error injection
mechanism is initialized, so to be safer, make xfs_errortag_test() check
if the errortag is properly initialized.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_error.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 26c32bc5cd34f..2f4feb959bfb4 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -233,6 +233,17 @@ xfs_errortag_test(
 {
 	unsigned int		randfactor;
 
+	/*
+	 * To be able to use error injection anywhere, we need to ensure error
+	 * injection mechanism is already initialized.
+	 *
+	 * Code paths like I/O completion can be called before the
+	 * initialization is complete, but be able to inject errors in such
+	 * places is still useful.
+	 */
+	if (!mp->m_errortag)
+		return false;
+
 	ASSERT(error_tag < XFS_ERRTAG_MAX);
 	randfactor = mp->m_errortag[error_tag];
 	if (!randfactor || prandom_u32() % randfactor)
-- 
GitLab


From bda250dbaf39f67f8910e183853e4e6a9e5ce899 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 29 Jun 2017 12:28:36 -0700
Subject: [PATCH 0560/1958] xfs: rewrite xfs_dq_get_next_id using
 xfs_iext_lookup_extent

This goes straight to a single lookup in the extent list and avoids a
roundtrip through two layers that don't add any value for the simple
quoata file that just has data or holes and no page cache, delayed
allocation, unwritten extent or COW fork (which btw, doesn't seem to
be handled by the existing SEEK HOLE/DATA code).

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_dquot.c | 66 ++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 44 deletions(-)

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 79668142afc1f..8848b397e7a81 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -701,21 +701,18 @@ xfs_qm_dqread(
  */
 static int
 xfs_dq_get_next_id(
-	xfs_mount_t		*mp,
+	struct xfs_mount	*mp,
 	uint			type,
-	xfs_dqid_t		*id,
-	loff_t			eof)
+	xfs_dqid_t		*id)
 {
-	struct xfs_inode	*quotip;
+	struct xfs_inode	*quotip = xfs_quota_inode(mp, type);
+	xfs_dqid_t		next_id = *id + 1; /* simple advance */
+	uint			lock_flags;
+	struct xfs_bmbt_irec	got;
+	xfs_extnum_t		idx;
 	xfs_fsblock_t		start;
-	loff_t			offset;
-	uint			lock;
-	xfs_dqid_t		next_id;
 	int			error = 0;
 
-	/* Simple advance */
-	next_id = *id + 1;
-
 	/* If we'd wrap past the max ID, stop */
 	if (next_id < *id)
 		return -ENOENT;
@@ -729,23 +726,20 @@ xfs_dq_get_next_id(
 	/* Nope, next_id is now past the current chunk, so find the next one */
 	start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
 
-	quotip = xfs_quota_inode(mp, type);
-	lock = xfs_ilock_data_map_shared(quotip);
-
-	offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
-				      eof, SEEK_DATA);
-	if (offset < 0)
-		error = offset;
-
-	xfs_iunlock(quotip, lock);
+	lock_flags = xfs_ilock_data_map_shared(quotip);
+	if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK);
+		if (error)
+			return error;
+	}
 
-	/* -ENXIO is essentially "no more data" */
-	if (error)
-		return (error == -ENXIO ? -ENOENT: error);
+	if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got))
+		*id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
+	else
+		error = -ENOENT;
+	xfs_iunlock(quotip, lock_flags);
 
-	/* Convert next data offset back to a quota id */
-	*id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
-	return 0;
+	return error;
 }
 
 /*
@@ -768,7 +762,6 @@ xfs_qm_dqget(
 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 	struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
 	struct xfs_dquot	*dqp;
-	loff_t			eof = 0;
 	int			error;
 
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -796,21 +789,6 @@ xfs_qm_dqget(
 	}
 #endif
 
-	/* Get the end of the quota file if we need it */
-	if (flags & XFS_QMOPT_DQNEXT) {
-		struct xfs_inode	*quotip;
-		xfs_fileoff_t		last;
-		uint			lock_mode;
-
-		quotip = xfs_quota_inode(mp, type);
-		lock_mode = xfs_ilock_data_map_shared(quotip);
-		error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
-		xfs_iunlock(quotip, lock_mode);
-		if (error)
-			return error;
-		eof = XFS_FSB_TO_B(mp, last);
-	}
-
 restart:
 	mutex_lock(&qi->qi_tree_lock);
 	dqp = radix_tree_lookup(tree, id);
@@ -829,7 +807,7 @@ xfs_qm_dqget(
 			if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
 				xfs_dqunlock(dqp);
 				mutex_unlock(&qi->qi_tree_lock);
-				error = xfs_dq_get_next_id(mp, type, &id, eof);
+				error = xfs_dq_get_next_id(mp, type, &id);
 				if (error)
 					return error;
 				goto restart;
@@ -864,7 +842,7 @@ xfs_qm_dqget(
 
 	/* If we are asked to find next active id, keep looking */
 	if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
-		error = xfs_dq_get_next_id(mp, type, &id, eof);
+		error = xfs_dq_get_next_id(mp, type, &id);
 		if (!error)
 			goto restart;
 	}
@@ -923,7 +901,7 @@ xfs_qm_dqget(
 	if (flags & XFS_QMOPT_DQNEXT) {
 		if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
 			xfs_qm_dqput(dqp);
-			error = xfs_dq_get_next_id(mp, type, &id, eof);
+			error = xfs_dq_get_next_id(mp, type, &id);
 			if (error)
 				return error;
 			goto restart;
-- 
GitLab


From 7175a11214f02e6184690c17cf5366012b667531 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 29 Jun 2017 11:43:19 -0700
Subject: [PATCH 0561/1958] xfs: remove a whitespace-only line from
 xfs_fs_get_nextdqblk

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_quotaops.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index f82d79a8c694a..de9493253edfb 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -269,7 +269,6 @@ xfs_fs_get_nextdqblk(
 	/* ID may be different, so convert back what we got */
 	*qid = make_kqid(current_user_ns(), qid->type, id);
 	return 0;
-	
 }
 
 STATIC int
-- 
GitLab


From d554b5e1ca64d23e4f839e6531490fee8479fbaf Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 27 Jun 2017 22:27:57 -0400
Subject: [PATCH 0562/1958] nvme: Quirks for PM1725 controllers

PM1725 controllers have a couple of quirks that need to be handled in
the driver:

 - I/O queue depth must be limited to 64 entries on controllers that do
   not report MQES.

 - The host interface registers go offline briefly while resetting the
   chip. Thus a delay is needed before checking whether the controller
   is ready.

Note that the admin queue depth is also limited to 64 on older versions
of this board. Since our NVME_AQ_DEPTH is now 32 that is no longer an
issue.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 32a98e2740adf..343263bcb49a8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1908,6 +1908,12 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
 			"set queue depth=%u to work around controller resets\n",
 			dev->q_depth);
+	} else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+		   (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+		   NVME_CAP_MQES(cap) == 0) {
+		dev->q_depth = 64;
+		dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+                        "set queue depth=%u\n", dev->q_depth);
 	}
 
 	/*
@@ -2454,6 +2460,10 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE(0x1c5f, 0x0540),	/* Memblaze Pblaze4 adapter */
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+	{ PCI_DEVICE(0x144d, 0xa821),   /* Samsung PM1725 */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+	{ PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
-- 
GitLab


From d858e5f04e58a42a6e0c8ec74ea15e3ea4bb45d0 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 24 Apr 2017 10:58:29 +0300
Subject: [PATCH 0563/1958] nvme: move queue_count to the nvme_ctrl

All all transports use the queue_count in exactly the same, so move it to
the generic struct nvme_ctrl. In the future it will also be maintained by
the core.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-By: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c     | 35 +++++++++++++++++-----------------
 drivers/nvme/host/nvme.h   |  1 +
 drivers/nvme/host/pci.c    | 15 +++++++--------
 drivers/nvme/host/rdma.c   | 39 +++++++++++++++++++-------------------
 drivers/nvme/target/loop.c | 15 +++++++--------
 5 files changed, 51 insertions(+), 54 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index ed87214fdc0e4..7eb006427caf8 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -148,7 +148,6 @@ struct nvme_fc_ctrl {
 	struct device		*dev;
 	struct nvme_fc_lport	*lport;
 	struct nvme_fc_rport	*rport;
-	u32			queue_count;
 	u32			cnum;
 
 	u64			association_id;
@@ -1614,7 +1613,7 @@ nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_fc_free_queue(&ctrl->queues[i]);
 }
 
@@ -1635,10 +1634,10 @@ __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl,
 static void
 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1];
+	struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1];
 	int i;
 
-	for (i = ctrl->queue_count - 1; i >= 1; i--, queue--)
+	for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--)
 		__nvme_fc_delete_hw_queue(ctrl, queue, i);
 }
 
@@ -1648,7 +1647,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 	struct nvme_fc_queue *queue = &ctrl->queues[1];
 	int i, ret;
 
-	for (i = 1; i < ctrl->queue_count; i++, queue++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) {
 		ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
 		if (ret)
 			goto delete_queues;
@@ -1667,7 +1666,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 {
 	int i, ret = 0;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize,
 					(qsize / 5));
 		if (ret)
@@ -1685,7 +1684,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize);
 }
 
@@ -2187,7 +2186,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 		return ret;
 	}
 
-	ctrl->queue_count = opts->nr_io_queues + 1;
+	ctrl->ctrl.queue_count = opts->nr_io_queues + 1;
 	if (!opts->nr_io_queues)
 		return 0;
 
@@ -2204,7 +2203,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 						sizeof(struct scatterlist)) +
 					ctrl->lport->ops->fcprqst_priv_sz;
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
 	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -2258,7 +2257,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	}
 
 	/* check for io queues existing */
-	if (ctrl->queue_count == 1)
+	if (ctrl->ctrl.queue_count == 1)
 		return 0;
 
 	nvme_fc_init_io_queues(ctrl);
@@ -2381,7 +2380,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	 * Create the io queues
 	 */
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		if (ctrl->ctrl.state == NVME_CTRL_NEW)
 			ret = nvme_fc_create_io_queues(ctrl);
 		else
@@ -2395,7 +2394,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
@@ -2447,7 +2446,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 * io requests back to the block layer as part of normal completions
 	 * (but with error status).
 	 */
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
@@ -2702,18 +2701,18 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
-	ctrl->queue_count = min_t(unsigned int,
+	ctrl->ctrl.queue_count = min_t(unsigned int,
 				opts->nr_io_queues,
 				lport->ops->max_hw_queues);
-	opts->nr_io_queues = ctrl->queue_count;	/* so opts has valid value */
-	ctrl->queue_count++;	/* +1 for admin queue */
+	opts->nr_io_queues = ctrl->ctrl.queue_count;	/* so opts has valid value */
+	ctrl->ctrl.queue_count++;	/* +1 for admin queue */
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ret = -ENOMEM;
-	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue),
-				GFP_KERNEL);
+	ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
+				sizeof(struct nvme_fc_queue), GFP_KERNEL);
 	if (!ctrl->queues)
 		goto out_free_ida;
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d70ff0fdd36bd..6c51d92b7fab3 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -142,6 +142,7 @@ struct nvme_ctrl {
 	u16 cntlid;
 
 	u32 ctrl_config;
+	u32 queue_count;
 
 	u32 page_size;
 	u32 max_hw_sectors;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 343263bcb49a8..6b50c9096fe41 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -74,7 +74,6 @@ struct nvme_dev {
 	struct device *dev;
 	struct dma_pool *prp_page_pool;
 	struct dma_pool *prp_small_pool;
-	unsigned queue_count;
 	unsigned online_queues;
 	unsigned max_qid;
 	int q_depth;
@@ -1099,9 +1098,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 {
 	int i;
 
-	for (i = dev->queue_count - 1; i >= lowest; i--) {
+	for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
 		struct nvme_queue *nvmeq = dev->queues[i];
-		dev->queue_count--;
+		dev->ctrl.queue_count--;
 		dev->queues[i] = NULL;
 		nvme_free_queue(nvmeq);
 	}
@@ -1221,7 +1220,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->qid = qid;
 	nvmeq->cq_vector = -1;
 	dev->queues[qid] = nvmeq;
-	dev->queue_count++;
+	dev->ctrl.queue_count++;
 
 	return nvmeq;
 
@@ -1441,7 +1440,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	unsigned i, max;
 	int ret = 0;
 
-	for (i = dev->queue_count; i <= dev->max_qid; i++) {
+	for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
 		/* vector == qid - 1, match nvme_create_queue */
 		if (!nvme_alloc_queue(dev, i, dev->q_depth,
 		     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
@@ -1450,7 +1449,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 		}
 	}
 
-	max = min(dev->max_qid, dev->queue_count - 1);
+	max = min(dev->max_qid, dev->ctrl.queue_count - 1);
 	for (i = dev->online_queues; i <= max; i++) {
 		ret = nvme_create_queue(dev->queues[i], i);
 		if (ret)
@@ -2001,7 +2000,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	nvme_stop_queues(&dev->ctrl);
 
 	queues = dev->online_queues - 1;
-	for (i = dev->queue_count - 1; i > 0; i--)
+	for (i = dev->ctrl.queue_count - 1; i > 0; i--)
 		nvme_suspend_queue(dev->queues[i]);
 
 	if (dead) {
@@ -2009,7 +2008,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 		 * probe, before the admin queue is configured. Thus,
 		 * queue_count can be 0 here.
 		 */
-		if (dev->queue_count)
+		if (dev->ctrl.queue_count)
 			nvme_suspend_queue(dev->queues[0]);
 	} else {
 		nvme_disable_io_queues(dev, queues);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6d4119dfbdaac..c4bacad3fe23f 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -103,7 +103,6 @@ struct nvme_rdma_queue {
 struct nvme_rdma_ctrl {
 	/* read only in the hot path */
 	struct nvme_rdma_queue	*queues;
-	u32			queue_count;
 
 	/* other member variables */
 	struct blk_mq_tag_set	tag_set;
@@ -349,7 +348,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_rdma_ctrl *ctrl = data;
 	struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
 
-	BUG_ON(hctx_idx >= ctrl->queue_count);
+	BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
 
 	hctx->driver_data = queue;
 	return 0;
@@ -587,7 +586,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
 }
 
@@ -595,7 +594,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
 	int i, ret = 0;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret) {
 			dev_info(ctrl->ctrl.device,
@@ -623,14 +622,14 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
 	if (ret)
 		return ret;
 
-	ctrl->queue_count = nr_io_queues + 1;
-	if (ctrl->queue_count < 2)
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
+	if (ctrl->ctrl.queue_count < 2)
 		return 0;
 
 	dev_info(ctrl->ctrl.device,
 		"creating %d I/O queues.\n", nr_io_queues);
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvme_rdma_init_queue(ctrl, i,
 					   ctrl->ctrl.opts->queue_size);
 		if (ret) {
@@ -705,7 +704,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	++ctrl->ctrl.nr_reconnects;
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_rdma_free_io_queues(ctrl);
 
 		ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -735,7 +734,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	nvme_start_keep_alive(&ctrl->ctrl);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		ret = nvme_rdma_init_io_queues(ctrl);
 		if (ret)
 			goto requeue;
@@ -749,7 +748,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	WARN_ON_ONCE(!changed);
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
 	}
@@ -772,15 +771,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
 	nvme_stop_keep_alive(&ctrl->ctrl);
 
-	for (i = 0; i < ctrl->queue_count; i++)
+	for (i = 0; i < ctrl->ctrl.queue_count; i++)
 		clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
 
-	if (ctrl->queue_count > 1)
+	if (ctrl->ctrl.queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
 
 	/* We must take care of fastfail/requeue all our inflight requests */
-	if (ctrl->queue_count > 1)
+	if (ctrl->ctrl.queue_count > 1)
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
@@ -1624,7 +1623,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->reconnect_work);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
@@ -1716,7 +1715,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		goto del_dead_ctrl;
 	}
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		ret = blk_mq_reinit_tagset(&ctrl->tag_set);
 		if (ret)
 			goto del_dead_ctrl;
@@ -1733,7 +1732,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
@@ -1785,7 +1784,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
 	ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
 		SG_CHUNK_SIZE * sizeof(struct scatterlist);
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
 	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -1863,12 +1862,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
 
-	ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+	ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ret = -ENOMEM;
-	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(*ctrl->queues),
+	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
 				GFP_KERNEL);
 	if (!ctrl->queues)
 		goto out_uninit_ctrl;
@@ -1925,7 +1924,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	if (opts->nr_io_queues) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
 	}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 5f55c683b338c..edf0e2ab19e39 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -44,7 +44,6 @@ struct nvme_loop_iod {
 
 struct nvme_loop_ctrl {
 	struct nvme_loop_queue	*queues;
-	u32			queue_count;
 
 	struct blk_mq_tag_set	admin_tag_set;
 
@@ -241,7 +240,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_loop_ctrl *ctrl = data;
 	struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
 
-	BUG_ON(hctx_idx >= ctrl->queue_count);
+	BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
 
 	hctx->driver_data = queue;
 	return 0;
@@ -307,7 +306,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
 }
 
@@ -330,7 +329,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
 		if (ret)
 			goto out_destroy_queues;
 
-		ctrl->queue_count++;
+		ctrl->ctrl.queue_count++;
 	}
 
 	return 0;
@@ -344,7 +343,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
 {
 	int i, ret;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret)
 			return ret;
@@ -372,7 +371,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
 	if (error)
 		return error;
-	ctrl->queue_count = 1;
+	ctrl->ctrl.queue_count = 1;
 
 	error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
 	if (error)
@@ -426,7 +425,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
 	nvme_stop_keep_alive(&ctrl->ctrl);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
@@ -559,7 +558,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 	ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
 		SG_CHUNK_SIZE * sizeof(struct scatterlist);
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 	ctrl->ctrl.tagset = &ctrl->tag_set;
 
-- 
GitLab


From 20d0dfe65afd3fb59d14720570a6921eb6bf5c1f Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 27 Jun 2017 22:16:38 +0300
Subject: [PATCH 0564/1958] nvme: move ctrl cap to struct nvme_ctrl

All transports use either a private cache of controller cap or an on-stack
copy, move it to the generic struct nvme_ctrl. In the future it will also
be maintained by the core.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c     |  8 +++-----
 drivers/nvme/host/nvme.h   |  1 +
 drivers/nvme/host/pci.c    | 20 +++++++++-----------
 drivers/nvme/host/rdma.c   | 10 +++++-----
 drivers/nvme/target/loop.c |  7 +++----
 5 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 7eb006427caf8..2f990d9790379 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -152,8 +152,6 @@ struct nvme_fc_ctrl {
 
 	u64			association_id;
 
-	u64			cap;
-
 	struct list_head	ctrl_list;	/* rport->ctrl_list */
 
 	struct blk_mq_tag_set	admin_tag_set;
@@ -2328,7 +2326,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	 * prior connection values
 	 */
 
-	ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
 	if (ret) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -2336,9 +2334,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
 
-	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (ret)
 		goto out_disconnect_admin_queue;
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 6c51d92b7fab3..e0b83311d5deb 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -144,6 +144,7 @@ struct nvme_ctrl {
 	u32 ctrl_config;
 	u32 queue_count;
 
+	u64 cap;
 	u32 page_size;
 	u32 max_hw_sectors;
 	u16 oncs;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6b50c9096fe41..fe3907a082ba5 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1144,8 +1144,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
 	if (shutdown)
 		nvme_shutdown_ctrl(&dev->ctrl);
 	else
-		nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
-						dev->bar + NVME_REG_CAP));
+		nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 
 	spin_lock_irq(&nvmeq->q_lock);
 	nvme_process_cq(nvmeq);
@@ -1388,7 +1387,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
-	u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 	struct nvme_queue *nvmeq;
 
 	result = nvme_remap_bar(dev, db_bar_size(dev, 0));
@@ -1396,13 +1394,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 		return result;
 
 	dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
-						NVME_CAP_NSSRC(cap) : 0;
+				NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
 
 	if (dev->subsystem &&
 	    (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
 		writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
 
-	result = nvme_disable_ctrl(&dev->ctrl, cap);
+	result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 	if (result < 0)
 		return result;
 
@@ -1421,7 +1419,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
 	lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
 
-	result = nvme_enable_ctrl(&dev->ctrl, cap);
+	result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
 	if (result)
 		return result;
 
@@ -1865,7 +1863,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 
 static int nvme_pci_enable(struct nvme_dev *dev)
 {
-	u64 cap;
 	int result = -ENOMEM;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
@@ -1892,10 +1889,11 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+	dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 
-	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
-	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+	dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+				NVME_Q_DEPTH);
+	dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
 	dev->dbs = dev->bar + 4096;
 
 	/*
@@ -1909,7 +1907,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 			dev->q_depth);
 	} else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
 		   (pdev->device == 0xa821 || pdev->device == 0xa822) &&
-		   NVME_CAP_MQES(cap) == 0) {
+		   NVME_CAP_MQES(dev->ctrl.cap) == 0) {
 		dev->q_depth = 64;
 		dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
                         "set queue depth=%u\n", dev->q_depth);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c4bacad3fe23f..2ab0cdb4d881c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -118,7 +118,6 @@ struct nvme_rdma_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 	struct nvme_rdma_device	*device;
 
-	u64			cap;
 	u32			max_fr_pages;
 
 	struct sockaddr_storage addr;
@@ -728,7 +727,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
-	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (ret)
 		goto requeue;
 
@@ -1573,7 +1572,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 
 	set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
-	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP,
+			&ctrl->ctrl.cap);
 	if (error) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -1581,9 +1581,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
-	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (error)
 		goto out_cleanup_queue;
 
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index edf0e2ab19e39..568ed86256968 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -48,7 +48,6 @@ struct nvme_loop_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 
 	struct list_head	list;
-	u64			cap;
 	struct blk_mq_tag_set	tag_set;
 	struct nvme_loop_iod	async_event_iod;
 	struct nvme_ctrl	ctrl;
@@ -387,7 +386,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
 	if (error) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -395,9 +394,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
-	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (error)
 		goto out_cleanup_queue;
 
-- 
GitLab


From 01ad0990467eaa17ae17db7376a4f02739f558c0 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 1 May 2017 00:27:17 +0300
Subject: [PATCH 0565/1958] nvme-pci: rename to nvme_pci_configure_admin_queue

we are going to need the name for the core routine...

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fe3907a082ba5..eb729ff70e7d1 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1383,7 +1383,7 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
 	return 0;
 }
 
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
@@ -2096,7 +2096,7 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
-	result = nvme_configure_admin_queue(dev);
+	result = nvme_pci_configure_admin_queue(dev);
 	if (result)
 		goto out;
 
-- 
GitLab


From 334fd34d76f237c0ee58dfc400d2c4e34d660544 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 29 Jun 2017 11:43:20 -0700
Subject: [PATCH 0566/1958] vfs: Add page_cache_seek_hole_data helper

Both ext4 and xfs implement seeking for the next hole or piece of data
in unwritten extents by scanning the page cache, and both versions share
the same bug when iterating the buffers of a page: the start offset into
the page isn't taken into account, so when a page fits more than two
filesystem blocks, things will go wrong.  For example, on a filesystem
with a block size of 1k, the following command will fail:

  xfs_io -f -c "falloc 0 4k" \
            -c "pwrite 1k 1k" \
            -c "pwrite 3k 1k" \
            -c "seek -a -r 0" foo

In this example, neither lseek(fd, 1024, SEEK_HOLE) nor lseek(fd, 2048,
SEEK_DATA) will return the correct result.

Introduce a generic vfs helper for seeking in the page cache that gets
this right.  The next commits will replace the filesystem specific
implementations.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[hch: dropped the export]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/buffer.c                 | 124 ++++++++++++++++++++++++++++++++++++
 include/linux/buffer_head.h |   2 +
 2 files changed, 126 insertions(+)

diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0f..b3674eb7c9c0a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3492,6 +3492,130 @@ int bh_submit_read(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(bh_submit_read);
 
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
+ *
+ * Returns the offset within the file on success, and -ENOENT otherwise.
+ */
+static loff_t
+page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
+{
+	loff_t offset = page_offset(page);
+	struct buffer_head *bh, *head;
+	bool seek_data = whence == SEEK_DATA;
+
+	if (lastoff < offset)
+		lastoff = offset;
+
+	bh = head = page_buffers(page);
+	do {
+		offset += bh->b_size;
+		if (lastoff >= offset)
+			continue;
+
+		/*
+		 * Unwritten extents that have data in the page cache covering
+		 * them can be identified by the BH_Unwritten state flag.
+		 * Pages with multiple buffers might have a mix of holes, data
+		 * and unwritten extents - any buffer with valid data in it
+		 * should have BH_Uptodate flag set on it.
+		 */
+
+		if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
+			return lastoff;
+
+		lastoff = offset;
+	} while ((bh = bh->b_this_page) != head);
+	return -ENOENT;
+}
+
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
+ *
+ * Within unwritten extents, the page cache determines which parts are holes
+ * and which are data: unwritten and uptodate buffer heads count as data;
+ * everything else counts as a hole.
+ *
+ * Returns the resulting offset on successs, and -ENOENT otherwise.
+ */
+loff_t
+page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
+			  int whence)
+{
+	pgoff_t index = offset >> PAGE_SHIFT;
+	pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
+	loff_t lastoff = offset;
+	struct pagevec pvec;
+
+	if (length <= 0)
+		return -ENOENT;
+
+	pagevec_init(&pvec, 0);
+
+	do {
+		unsigned want, nr_pages, i;
+
+		want = min_t(unsigned, end - index, PAGEVEC_SIZE);
+		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want);
+		if (nr_pages == 0)
+			break;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+
+			/*
+			 * At this point, the page may be truncated or
+			 * invalidated (changing page->mapping to NULL), or
+			 * even swizzled back from swapper_space to tmpfs file
+			 * mapping.  However, page->index will not change
+			 * because we have a reference on the page.
+                         *
+			 * If current page offset is beyond where we've ended,
+			 * we've found a hole.
+                         */
+			if (whence == SEEK_HOLE &&
+			    lastoff < page_offset(page))
+				goto check_range;
+
+			/* Searching done if the page index is out of range. */
+			if (page->index >= end)
+				goto not_found;
+
+			lock_page(page);
+			if (likely(page->mapping == inode->i_mapping) &&
+			    page_has_buffers(page)) {
+				lastoff = page_seek_hole_data(page, lastoff, whence);
+				if (lastoff >= 0) {
+					unlock_page(page);
+					goto check_range;
+				}
+			}
+			unlock_page(page);
+			lastoff = page_offset(page) + PAGE_SIZE;
+		}
+
+		/* Searching done if fewer pages returned than wanted. */
+		if (nr_pages < want)
+			break;
+
+		index = pvec.pages[i - 1]->index + 1;
+		pagevec_release(&pvec);
+	} while (index < end);
+
+	/* When no page at lastoff and we are not done, we found a hole. */
+	if (whence != SEEK_HOLE)
+		goto not_found;
+
+check_range:
+	if (lastoff < offset + length)
+		goto out;
+not_found:
+	lastoff = -ENOENT;
+out:
+	pagevec_release(&pvec);
+	return lastoff;
+}
+
 void __init buffer_init(void)
 {
 	unsigned long nrpages;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index bd029e52ef5ee..ad4e024ce17ef 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -201,6 +201,8 @@ void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
 int bh_uptodate_or_lock(struct buffer_head *bh);
 int bh_submit_read(struct buffer_head *bh);
+loff_t page_cache_seek_hole_data(struct inode *inode, loff_t offset,
+				 loff_t length, int whence);
 
 extern int buffer_heads_over_limit;
 
-- 
GitLab


From 0ed3b0d45fd39142e418220f518c8959c1a5f596 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 29 Jun 2017 11:43:21 -0700
Subject: [PATCH 0567/1958] vfs: Add iomap_seek_hole and iomap_seek_data
 helpers

Filesystems can use this for implementing lseek SEEK_HOLE / SEEK_DATA
support via iomap.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[hch: split functions, coding style cleanups]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 94 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/iomap.h |  4 ++
 2 files changed, 98 insertions(+)

diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5a..432eed8f091f1 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -584,6 +584,100 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
 }
 EXPORT_SYMBOL_GPL(iomap_fiemap);
 
+static loff_t
+iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
+		      void *data, struct iomap *iomap)
+{
+	switch (iomap->type) {
+	case IOMAP_UNWRITTEN:
+		offset = page_cache_seek_hole_data(inode, offset, length,
+						   SEEK_HOLE);
+		if (offset < 0)
+			return length;
+		/* fall through */
+	case IOMAP_HOLE:
+		*(loff_t *)data = offset;
+		return 0;
+	default:
+		return length;
+	}
+}
+
+loff_t
+iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+{
+	loff_t size = i_size_read(inode);
+	loff_t length = size - offset;
+	loff_t ret;
+
+	/* Nothing to be found beyond the end of the file. */
+	if (offset >= size)
+		return -ENXIO;
+
+	while (length > 0) {
+		ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
+				  &offset, iomap_seek_hole_actor);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			break;
+
+		offset += ret;
+		length -= ret;
+	}
+
+	return offset;
+}
+EXPORT_SYMBOL_GPL(iomap_seek_hole);
+
+static loff_t
+iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
+		      void *data, struct iomap *iomap)
+{
+	switch (iomap->type) {
+	case IOMAP_HOLE:
+		return length;
+	case IOMAP_UNWRITTEN:
+		offset = page_cache_seek_hole_data(inode, offset, length,
+						   SEEK_DATA);
+		if (offset < 0)
+			return length;
+		/*FALLTHRU*/
+	default:
+		*(loff_t *)data = offset;
+		return 0;
+	}
+}
+
+loff_t
+iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+{
+	loff_t size = i_size_read(inode);
+	loff_t length = size - offset;
+	loff_t ret;
+
+	/* Nothing to be found beyond the end of the file. */
+	if (offset >= size)
+		return -ENXIO;
+
+	while (length > 0) {
+		ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
+				  &offset, iomap_seek_data_actor);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			break;
+
+		offset += ret;
+		length -= ret;
+	}
+
+	if (length <= 0)
+		return -ENXIO;
+	return offset;
+}
+EXPORT_SYMBOL_GPL(iomap_seek_data);
+
 /*
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f753e788da310..8a03f5dcd89b3 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -83,6 +83,10 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, const struct iomap_ops *ops);
+loff_t iomap_seek_hole(struct inode *inode, loff_t offset,
+		const struct iomap_ops *ops);
+loff_t iomap_seek_data(struct inode *inode, loff_t offset,
+		const struct iomap_ops *ops);
 
 /*
  * Flags for direct I/O ->end_io:
-- 
GitLab


From 9b2970aacfd9aa5d9bad377a554a002b398f882e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 29 Jun 2017 11:43:21 -0700
Subject: [PATCH 0568/1958] xfs: Switch to iomap for SEEK_HOLE / SEEK_DATA

Switch to the iomap_seek_hole and iomap_seek_data helpers for
implementing lseek SEEK_HOLE / SEEK_DATA, and remove all the
code that isn't needed any more.

Based on patches from Andreas Gruenbacher <agruenba@redhat.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c  | 375 ++-------------------------------------------
 fs/xfs/xfs_inode.h |   3 -
 2 files changed, 14 insertions(+), 364 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8f9a491b25514..5b95e58c5bf22 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -954,378 +954,31 @@ xfs_file_readdir(
 	return xfs_readdir(NULL, ip, ctx, bufsize);
 }
 
-/*
- * This type is designed to indicate the type of offset we would like
- * to search from page cache for xfs_seek_hole_data().
- */
-enum {
-	HOLE_OFF = 0,
-	DATA_OFF,
-};
-
-/*
- * Lookup the desired type of offset from the given page.
- *
- * On success, return true and the offset argument will point to the
- * start of the region that was found.  Otherwise this function will
- * return false and keep the offset argument unchanged.
- */
-STATIC bool
-xfs_lookup_buffer_offset(
-	struct page		*page,
-	loff_t			*offset,
-	unsigned int		type)
-{
-	loff_t			lastoff = page_offset(page);
-	bool			found = false;
-	struct buffer_head	*bh, *head;
-
-	bh = head = page_buffers(page);
-	do {
-		/*
-		 * Unwritten extents that have data in the page
-		 * cache covering them can be identified by the
-		 * BH_Unwritten state flag.  Pages with multiple
-		 * buffers might have a mix of holes, data and
-		 * unwritten extents - any buffer with valid
-		 * data in it should have BH_Uptodate flag set
-		 * on it.
-		 */
-		if (buffer_unwritten(bh) ||
-		    buffer_uptodate(bh)) {
-			if (type == DATA_OFF)
-				found = true;
-		} else {
-			if (type == HOLE_OFF)
-				found = true;
-		}
-
-		if (found) {
-			*offset = lastoff;
-			break;
-		}
-		lastoff += bh->b_size;
-	} while ((bh = bh->b_this_page) != head);
-
-	return found;
-}
-
-/*
- * This routine is called to find out and return a data or hole offset
- * from the page cache for unwritten extents according to the desired
- * type for xfs_seek_hole_data().
- *
- * The argument offset is used to tell where we start to search from the
- * page cache.  Map is used to figure out the end points of the range to
- * lookup pages.
- *
- * Return true if the desired type of offset was found, and the argument
- * offset is filled with that address.  Otherwise, return false and keep
- * offset unchanged.
- */
-STATIC bool
-xfs_find_get_desired_pgoff(
-	struct inode		*inode,
-	struct xfs_bmbt_irec	*map,
-	unsigned int		type,
-	loff_t			*offset)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	struct pagevec		pvec;
-	pgoff_t			index;
-	pgoff_t			end;
-	loff_t			endoff;
-	loff_t			startoff = *offset;
-	loff_t			lastoff = startoff;
-	bool			found = false;
-
-	pagevec_init(&pvec, 0);
-
-	index = startoff >> PAGE_SHIFT;
-	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
-	end = (endoff - 1) >> PAGE_SHIFT;
-	do {
-		int		want;
-		unsigned	nr_pages;
-		unsigned int	i;
-
-		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
-		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
-					  want);
-		if (nr_pages == 0)
-			break;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page	*page = pvec.pages[i];
-			loff_t		b_offset;
-
-			/*
-			 * At this point, the page may be truncated or
-			 * invalidated (changing page->mapping to NULL),
-			 * or even swizzled back from swapper_space to tmpfs
-			 * file mapping. However, page->index will not change
-			 * because we have a reference on the page.
-			 *
-			 * If current page offset is beyond where we've ended,
-			 * we've found a hole.
-			 */
-			if (type == HOLE_OFF && lastoff < endoff &&
-			    lastoff < page_offset(pvec.pages[i])) {
-				found = true;
-				*offset = lastoff;
-				goto out;
-			}
-			/* Searching done if the page index is out of range. */
-			if (page->index > end)
-				goto out;
-
-			lock_page(page);
-			/*
-			 * Page truncated or invalidated(page->mapping == NULL).
-			 * We can freely skip it and proceed to check the next
-			 * page.
-			 */
-			if (unlikely(page->mapping != inode->i_mapping)) {
-				unlock_page(page);
-				continue;
-			}
-
-			if (!page_has_buffers(page)) {
-				unlock_page(page);
-				continue;
-			}
-
-			found = xfs_lookup_buffer_offset(page, &b_offset, type);
-			if (found) {
-				/*
-				 * The found offset may be less than the start
-				 * point to search if this is the first time to
-				 * come here.
-				 */
-				*offset = max_t(loff_t, startoff, b_offset);
-				unlock_page(page);
-				goto out;
-			}
-
-			/*
-			 * We either searching data but nothing was found, or
-			 * searching hole but found a data buffer.  In either
-			 * case, probably the next page contains the desired
-			 * things, update the last offset to it so.
-			 */
-			lastoff = page_offset(page) + PAGE_SIZE;
-			unlock_page(page);
-		}
-
-		/*
-		 * The number of returned pages less than our desired, search
-		 * done.
-		 */
-		if (nr_pages < want)
-			break;
-
-		index = pvec.pages[i - 1]->index + 1;
-		pagevec_release(&pvec);
-	} while (index <= end);
-
-	/* No page at lastoff and we are not done - we found a hole. */
-	if (type == HOLE_OFF && lastoff < endoff) {
-		*offset = lastoff;
-		found = true;
-	}
-out:
-	pagevec_release(&pvec);
-	return found;
-}
-
-/*
- * caller must lock inode with xfs_ilock_data_map_shared,
- * can we craft an appropriate ASSERT?
- *
- * end is because the VFS-level lseek interface is defined such that any
- * offset past i_size shall return -ENXIO, but we use this for quota code
- * which does not maintain i_size, and we want to SEEK_DATA past i_size.
- */
-loff_t
-__xfs_seek_hole_data(
-	struct inode		*inode,
-	loff_t			start,
-	loff_t			end,
-	int			whence)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	loff_t			uninitialized_var(offset);
-	xfs_fileoff_t		fsbno;
-	xfs_filblks_t		lastbno;
-	int			error;
-
-	if (start >= end) {
-		error = -ENXIO;
-		goto out_error;
-	}
-
-	/*
-	 * Try to read extents from the first block indicated
-	 * by fsbno to the end block of the file.
-	 */
-	fsbno = XFS_B_TO_FSBT(mp, start);
-	lastbno = XFS_B_TO_FSB(mp, end);
-
-	for (;;) {
-		struct xfs_bmbt_irec	map[2];
-		int			nmap = 2;
-		unsigned int		i;
-
-		error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
-				       XFS_BMAPI_ENTIRE);
-		if (error)
-			goto out_error;
-
-		/* No extents at given offset, must be beyond EOF */
-		if (nmap == 0) {
-			error = -ENXIO;
-			goto out_error;
-		}
-
-		for (i = 0; i < nmap; i++) {
-			offset = max_t(loff_t, start,
-				       XFS_FSB_TO_B(mp, map[i].br_startoff));
-
-			/* Landed in the hole we wanted? */
-			if (whence == SEEK_HOLE &&
-			    map[i].br_startblock == HOLESTARTBLOCK)
-				goto out;
-
-			/* Landed in the data extent we wanted? */
-			if (whence == SEEK_DATA &&
-			    (map[i].br_startblock == DELAYSTARTBLOCK ||
-			     (map[i].br_state == XFS_EXT_NORM &&
-			      !isnullstartblock(map[i].br_startblock))))
-				goto out;
-
-			/*
-			 * Landed in an unwritten extent, try to search
-			 * for hole or data from page cache.
-			 */
-			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
-				if (xfs_find_get_desired_pgoff(inode, &map[i],
-				      whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF,
-							&offset))
-					goto out;
-			}
-		}
-
-		/*
-		 * We only received one extent out of the two requested. This
-		 * means we've hit EOF and didn't find what we are looking for.
-		 */
-		if (nmap == 1) {
-			/*
-			 * If we were looking for a hole, set offset to
-			 * the end of the file (i.e., there is an implicit
-			 * hole at the end of any file).
-		 	 */
-			if (whence == SEEK_HOLE) {
-				offset = end;
-				break;
-			}
-			/*
-			 * If we were looking for data, it's nowhere to be found
-			 */
-			ASSERT(whence == SEEK_DATA);
-			error = -ENXIO;
-			goto out_error;
-		}
-
-		ASSERT(i > 1);
-
-		/*
-		 * Nothing was found, proceed to the next round of search
-		 * if the next reading offset is not at or beyond EOF.
-		 */
-		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
-		start = XFS_FSB_TO_B(mp, fsbno);
-		if (start >= end) {
-			if (whence == SEEK_HOLE) {
-				offset = end;
-				break;
-			}
-			ASSERT(whence == SEEK_DATA);
-			error = -ENXIO;
-			goto out_error;
-		}
-	}
-
-out:
-	/*
-	 * If at this point we have found the hole we wanted, the returned
-	 * offset may be bigger than the file size as it may be aligned to
-	 * page boundary for unwritten extents.  We need to deal with this
-	 * situation in particular.
-	 */
-	if (whence == SEEK_HOLE)
-		offset = min_t(loff_t, offset, end);
-
-	return offset;
-
-out_error:
-	return error;
-}
-
-STATIC loff_t
-xfs_seek_hole_data(
-	struct file		*file,
-	loff_t			start,
-	int			whence)
-{
-	struct inode		*inode = file->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	uint			lock;
-	loff_t			offset, end;
-	int			error = 0;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
-	lock = xfs_ilock_data_map_shared(ip);
-
-	end = i_size_read(inode);
-	offset = __xfs_seek_hole_data(inode, start, end, whence);
-	if (offset < 0) {
-		error = offset;
-		goto out_unlock;
-	}
-
-	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
-
-out_unlock:
-	xfs_iunlock(ip, lock);
-
-	if (error)
-		return error;
-	return offset;
-}
-
 STATIC loff_t
 xfs_file_llseek(
 	struct file	*file,
 	loff_t		offset,
 	int		whence)
 {
+	struct inode		*inode = file->f_mapping->host;
+
+	if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount))
+		return -EIO;
+
 	switch (whence) {
-	case SEEK_END:
-	case SEEK_CUR:
-	case SEEK_SET:
+	default:
 		return generic_file_llseek(file, offset, whence);
 	case SEEK_HOLE:
+		offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops);
+		break;
 	case SEEK_DATA:
-		return xfs_seek_hole_data(file, offset, whence);
-	default:
-		return -EINVAL;
+		offset = iomap_seek_data(inode, offset, &xfs_iomap_ops);
+		break;
 	}
+
+	if (offset < 0)
+		return offset;
+	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 }
 
 /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 677d0bfe1c2d3..0ee453de239a1 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -445,9 +445,6 @@ int	xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
 		     xfs_fsize_t isize, bool *did_zeroing);
 int	xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
 		bool *did_zero);
-loff_t	__xfs_seek_hole_data(struct inode *inode, loff_t start,
-			     loff_t eof, int whence);
-
 
 /* from xfs_iops.c */
 extern void xfs_setup_inode(struct xfs_inode *ip);
-- 
GitLab


From 4086d90cffb8f48400d51fbab575fe50458512e3 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 29 Jun 2017 13:59:54 +0200
Subject: [PATCH 0569/1958] drm/atomic: Add missing drm_atomic_state_clear to
 atomic_remove_fb

All atomic state should be cleared when drm_modeset_backoff() is
called, because it drops all locks and the state becomes invalid.

The call to drm_atomic_state_clear was missing in atomic_remove_fb,
so add the missing call there.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170629115954.26029-1-maarten.lankhorst@linux.intel.com
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Fixes: db8f6403e88a ("drm: Convert drm_framebuffer_remove to atomic, v4.")
Cc: stable@vger.kernel.org # v4.12-rc1+
---
 drivers/gpu/drm/drm_framebuffer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index fc8ef42203ec0..b3ef4f1c2630f 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -832,6 +832,7 @@ static int atomic_remove_fb(struct drm_framebuffer *fb)
 		drm_atomic_clean_old_fb(dev, plane_mask, ret);
 
 	if (ret == -EDEADLK) {
+		drm_atomic_state_clear(state);
 		drm_modeset_backoff(&ctx);
 		goto retry;
 	}
-- 
GitLab


From d9c033a1343d91fb1c1eebe66823673c60b91915 Mon Sep 17 00:00:00 2001
From: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Date: Mon, 22 May 2017 10:51:38 +0200
Subject: [PATCH 0570/1958] watchdog: bindings: dw_wdt: add reset lines

Document the reset lines holding the watchdog core in reset.

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-watchdog@vger.kernel.org
Cc: devicetree@vger.kernel.org
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 Documentation/devicetree/bindings/watchdog/dw_wdt.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
index 08e16f684f2d7..eb0914420c7ca 100644
--- a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
@@ -10,6 +10,8 @@ Required Properties:
 Optional Properties:
 
 - interrupts	: The interrupt used for the watchdog timeout warning.
+- resets	: phandle pointing to the system reset controller with
+		line index for the watchdog.
 
 Example:
 
@@ -18,4 +20,5 @@ Example:
 		reg = <0xffd02000 0x1000>;
 		interrupts = <0 171 4>;
 		clocks = <&per_base_clk>;
+		resets = <&rst WDT0_RESET>;
 	};
-- 
GitLab


From 65a3b6935d920a37820226864eb607467e49ba50 Mon Sep 17 00:00:00 2001
From: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Date: Mon, 22 May 2017 10:51:39 +0200
Subject: [PATCH 0571/1958] watchdog: dw_wdt: get reset lines from dt

The dw_wdt has an external reset line, that can keep the device in reset
and therefore rendering it useless and also is the only way of stopping
the watchdog once it was started.

Get the reset lines for this core from the devicetree. As these lines are
optional, use devm_reset_control_get_optional_shared. If the reset line
is not specified in the devicetree, the reset framework will just skip
deasserting and continue.
This way all users of the driver will continue to function without
any harm, even if the reset line is not specified in the devicetree.

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Cc: linux-watchdog@vger.kernel.org
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/dw_wdt.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
index 914da3a4d3341..36be987ff9efc 100644
--- a/drivers/watchdog/dw_wdt.c
+++ b/drivers/watchdog/dw_wdt.c
@@ -29,6 +29,7 @@
 #include <linux/of.h>
 #include <linux/pm.h>
 #include <linux/platform_device.h>
+#include <linux/reset.h>
 #include <linux/watchdog.h>
 
 #define WDOG_CONTROL_REG_OFFSET		    0x00
@@ -54,6 +55,7 @@ struct dw_wdt {
 	struct clk		*clk;
 	unsigned long		rate;
 	struct watchdog_device	wdd;
+	struct reset_control	*rst;
 };
 
 #define to_dw_wdt(wdd)	container_of(wdd, struct dw_wdt, wdd)
@@ -234,6 +236,14 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
 		goto out_disable_clk;
 	}
 
+	dw_wdt->rst = devm_reset_control_get_optional_shared(&pdev->dev, NULL);
+	if (IS_ERR(dw_wdt->rst)) {
+		ret = PTR_ERR(dw_wdt->rst);
+		goto out_disable_clk;
+	}
+
+	reset_control_deassert(dw_wdt->rst);
+
 	wdd = &dw_wdt->wdd;
 	wdd->info = &dw_wdt_ident;
 	wdd->ops = &dw_wdt_ops;
@@ -279,6 +289,7 @@ static int dw_wdt_drv_remove(struct platform_device *pdev)
 	struct dw_wdt *dw_wdt = platform_get_drvdata(pdev);
 
 	watchdog_unregister_device(&dw_wdt->wdd);
+	reset_control_assert(dw_wdt->rst);
 	clk_disable_unprepare(dw_wdt->clk);
 
 	return 0;
-- 
GitLab


From 733403017399f18c1e31db2470b724e2605618b9 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 10 Jun 2017 21:04:32 -0700
Subject: [PATCH 0572/1958] watchdog: it87: Drop FSF mailing address

The FSF mailing address may change. Drop it.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/it87_wdt.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index b9878c41598fa..8c847f8c24705 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -24,10 +24,6 @@
  *	but WITHOUT ANY WARRANTY; without even the implied warranty of
  *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *	GNU General Public License for more details.
- *
- *	You should have received a copy of the GNU General Public License
- *	along with this program; if not, write to the Free Software
- *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-- 
GitLab


From 1d7b80394c48a56c705733cb6c044199ffbf1dfd Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 10 Jun 2017 21:04:33 -0700
Subject: [PATCH 0573/1958] watchdog: it87: Convert to use watchdog core
 infrastructure

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig    |   1 +
 drivers/watchdog/it87_wdt.c | 375 +++++++-----------------------------
 2 files changed, 72 insertions(+), 304 deletions(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index e9da196279149..d347bb25ac2ee 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1069,6 +1069,7 @@ config IT8712F_WDT
 config IT87_WDT
 	tristate "IT87 Watchdog Timer"
 	depends on X86
+	select WATCHDOG_CORE
 	---help---
 	  This is the driver for the hardware watchdog on the ITE IT8620,
 	  IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726 and IT8728
diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index 8c847f8c24705..9a6523e70ffc0 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -32,26 +32,18 @@
 #include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/watchdog.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
-#include <linux/uaccess.h>
 #include <linux/io.h>
 
-
-#define WATCHDOG_VERSION	"1.14"
 #define WATCHDOG_NAME		"IT87 WDT"
-#define DRIVER_VERSION		WATCHDOG_NAME " driver, v" WATCHDOG_VERSION "\n"
-#define WD_MAGIC		'V'
 
 /* Defaults for Module Parameter */
 #define DEFAULT_NOGAMEPORT	0
 #define DEFAULT_NOCIR		0
-#define DEFAULT_EXCLUSIVE	1
 #define DEFAULT_TIMEOUT		60
 #define DEFAULT_TESTMODE	0
 #define DEFAULT_NOWAYOUT	WATCHDOG_NOWAYOUT
@@ -129,23 +121,17 @@
 #define GP_BASE_DEFAULT	0x0201
 
 /* wdt_status */
-#define WDTS_TIMER_RUN	0
-#define WDTS_DEV_OPEN	1
-#define WDTS_KEEPALIVE	2
-#define WDTS_LOCKED	3
-#define WDTS_USE_GP	4
-#define WDTS_EXPECTED	5
-#define WDTS_USE_CIR	6
-
-static	unsigned int base, gpact, ciract, max_units, chip_type;
-static	unsigned long wdt_status;
-
-static	int nogameport = DEFAULT_NOGAMEPORT;
-static int nocir      = DEFAULT_NOCIR;
-static	int exclusive  = DEFAULT_EXCLUSIVE;
-static	int timeout    = DEFAULT_TIMEOUT;
-static	int testmode   = DEFAULT_TESTMODE;
-static	bool nowayout   = DEFAULT_NOWAYOUT;
+#define WDTS_USE_GP	0
+#define WDTS_USE_CIR	1
+
+static unsigned int base, gpact, ciract, max_units, chip_type;
+static unsigned long wdt_status;
+
+static int nogameport  = DEFAULT_NOGAMEPORT;
+static int nocir       = DEFAULT_NOCIR;
+static unsigned int timeout = DEFAULT_TIMEOUT;
+static int testmode    = DEFAULT_TESTMODE;
+static bool nowayout    = DEFAULT_NOWAYOUT;
 
 module_param(nogameport, int, 0);
 MODULE_PARM_DESC(nogameport, "Forbid the activation of game port, default="
@@ -153,9 +139,6 @@ MODULE_PARM_DESC(nogameport, "Forbid the activation of game port, default="
 module_param(nocir, int, 0);
 MODULE_PARM_DESC(nocir, "Forbid the use of Consumer IR interrupts to reset timer, default="
 		__MODULE_STRING(DEFAULT_NOCIR));
-module_param(exclusive, int, 0);
-MODULE_PARM_DESC(exclusive, "Watchdog exclusive device open, default="
-		__MODULE_STRING(DEFAULT_EXCLUSIVE));
 module_param(timeout, int, 0);
 MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds, default="
 		__MODULE_STRING(DEFAULT_TIMEOUT));
@@ -227,7 +210,7 @@ static inline void superio_outw(int val, int reg)
 }
 
 /* Internal function, should be called after superio_select(GPIO) */
-static void wdt_update_timeout(void)
+static void _wdt_update_timeout(void)
 {
 	unsigned char cfg = WDT_KRST;
 	int tm = timeout;
@@ -249,6 +232,21 @@ static void wdt_update_timeout(void)
 		superio_outb(tm>>8, WDTVALMSB);
 }
 
+static int wdt_update_timeout(void)
+{
+	int ret;
+
+	ret = superio_enter();
+	if (ret)
+		return ret;
+
+	superio_select(GPIO);
+	_wdt_update_timeout();
+	superio_exit();
+
+	return 0;
+}
+
 static int wdt_round_time(int t)
 {
 	t += 59;
@@ -258,25 +256,22 @@ static int wdt_round_time(int t)
 
 /* watchdog timer handling */
 
-static void wdt_keepalive(void)
+static int wdt_keepalive(struct watchdog_device *wdd)
 {
+	int ret = 0;
+
 	if (test_bit(WDTS_USE_GP, &wdt_status))
 		inb(base);
 	else if (test_bit(WDTS_USE_CIR, &wdt_status))
 		/* The timer reloads with around 5 msec delay */
 		outb(0x55, CIR_DR(base));
-	else {
-		if (superio_enter())
-			return;
+	else
+		ret = wdt_update_timeout();
 
-		superio_select(GPIO);
-		wdt_update_timeout();
-		superio_exit();
-	}
-	set_bit(WDTS_KEEPALIVE, &wdt_status);
+	return ret;
 }
 
-static int wdt_start(void)
+static int wdt_start(struct watchdog_device *wdd)
 {
 	int ret = superio_enter();
 	if (ret)
@@ -287,14 +282,15 @@ static int wdt_start(void)
 		superio_outb(WDT_GAMEPORT, WDTCTRL);
 	else if (test_bit(WDTS_USE_CIR, &wdt_status))
 		superio_outb(WDT_CIRINT, WDTCTRL);
-	wdt_update_timeout();
+
+	_wdt_update_timeout();
 
 	superio_exit();
 
 	return 0;
 }
 
-static int wdt_stop(void)
+static int wdt_stop(struct watchdog_device *wdd)
 {
 	int ret = superio_enter();
 	if (ret)
@@ -321,280 +317,49 @@ static int wdt_stop(void)
  *	Used within WDIOC_SETTIMEOUT watchdog device ioctl.
  */
 
-static int wdt_set_timeout(int t)
+static int wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
 {
-	if (t < 1 || t > max_units * 60)
-		return -EINVAL;
+	int ret = 0;
 
 	if (t > max_units)
-		timeout = wdt_round_time(t);
-	else
-		timeout = t;
-
-	if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
-		int ret = superio_enter();
-		if (ret)
-			return ret;
-
-		superio_select(GPIO);
-		wdt_update_timeout();
-		superio_exit();
-	}
-	return 0;
-}
-
-/**
- *	wdt_get_status - determines the status supported by watchdog ioctl
- *	@status: status returned to user space
- *
- *	The status bit of the device does not allow to distinguish
- *	between a regular system reset and a watchdog forced reset.
- *	But, in test mode it is useful, so it is supported through
- *	WDIOC_GETSTATUS watchdog ioctl. Additionally the driver
- *	reports the keepalive signal and the acception of the magic.
- *
- *	Used within WDIOC_GETSTATUS watchdog device ioctl.
- */
-
-static int wdt_get_status(int *status)
-{
-	*status = 0;
-	if (testmode) {
-		int ret = superio_enter();
-		if (ret)
-			return ret;
-
-		superio_select(GPIO);
-		if (superio_inb(WDTCTRL) & WDT_ZERO) {
-			superio_outb(0x00, WDTCTRL);
-			clear_bit(WDTS_TIMER_RUN, &wdt_status);
-			*status |= WDIOF_CARDRESET;
-		}
-
-		superio_exit();
-	}
-	if (test_and_clear_bit(WDTS_KEEPALIVE, &wdt_status))
-		*status |= WDIOF_KEEPALIVEPING;
-	if (test_bit(WDTS_EXPECTED, &wdt_status))
-		*status |= WDIOF_MAGICCLOSE;
-	return 0;
-}
-
-/* /dev/watchdog handling */
-
-/**
- *	wdt_open - watchdog file_operations .open
- *	@inode: inode of the device
- *	@file: file handle to the device
- *
- *	The watchdog timer starts by opening the device.
- *
- *	Used within the file operation of the watchdog device.
- */
-
-static int wdt_open(struct inode *inode, struct file *file)
-{
-	if (exclusive && test_and_set_bit(WDTS_DEV_OPEN, &wdt_status))
-		return -EBUSY;
-	if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
-		int ret;
-		if (nowayout && !test_and_set_bit(WDTS_LOCKED, &wdt_status))
-			__module_get(THIS_MODULE);
-
-		ret = wdt_start();
-		if (ret) {
-			clear_bit(WDTS_LOCKED, &wdt_status);
-			clear_bit(WDTS_TIMER_RUN, &wdt_status);
-			clear_bit(WDTS_DEV_OPEN, &wdt_status);
-			return ret;
-		}
-	}
-	return nonseekable_open(inode, file);
-}
+		t = wdt_round_time(t);
 
-/**
- *	wdt_release - watchdog file_operations .release
- *	@inode: inode of the device
- *	@file: file handle to the device
- *
- *	Closing the watchdog device either stops the watchdog timer
- *	or in the case, that nowayout is set or the magic character
- *	wasn't written, a critical warning about an running watchdog
- *	timer is given.
- *
- *	Used within the file operation of the watchdog device.
- */
+	wdd->timeout = t;
 
-static int wdt_release(struct inode *inode, struct file *file)
-{
-	if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
-		if (test_and_clear_bit(WDTS_EXPECTED, &wdt_status)) {
-			int ret = wdt_stop();
-			if (ret) {
-				/*
-				 * Stop failed. Just keep the watchdog alive
-				 * and hope nothing bad happens.
-				 */
-				set_bit(WDTS_EXPECTED, &wdt_status);
-				wdt_keepalive();
-				return ret;
-			}
-			clear_bit(WDTS_TIMER_RUN, &wdt_status);
-		} else {
-			wdt_keepalive();
-			pr_crit("unexpected close, not stopping watchdog!\n");
-		}
-	}
-	clear_bit(WDTS_DEV_OPEN, &wdt_status);
-	return 0;
-}
-
-/**
- *	wdt_write - watchdog file_operations .write
- *	@file: file handle to the watchdog
- *	@buf: buffer to write
- *	@count: count of bytes
- *	@ppos: pointer to the position to write. No seeks allowed
- *
- *	A write to a watchdog device is defined as a keepalive signal. Any
- *	write of data will do, as we don't define content meaning.
- *
- *	Used within the file operation of the watchdog device.
- */
+	if (watchdog_hw_running(wdd))
+		ret = wdt_update_timeout();
 
-static ssize_t wdt_write(struct file *file, const char __user *buf,
-			    size_t count, loff_t *ppos)
-{
-	if (count) {
-		clear_bit(WDTS_EXPECTED, &wdt_status);
-		wdt_keepalive();
-	}
-	if (!nowayout) {
-		size_t ofs;
-
-	/* note: just in case someone wrote the magic character long ago */
-		for (ofs = 0; ofs != count; ofs++) {
-			char c;
-			if (get_user(c, buf + ofs))
-				return -EFAULT;
-			if (c == WD_MAGIC)
-				set_bit(WDTS_EXPECTED, &wdt_status);
-		}
-	}
-	return count;
+	return ret;
 }
 
 static const struct watchdog_info ident = {
 	.options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING,
-	.firmware_version =	1,
+	.firmware_version = 1,
 	.identity = WATCHDOG_NAME,
 };
 
-/**
- *	wdt_ioctl - watchdog file_operations .unlocked_ioctl
- *	@file: file handle to the device
- *	@cmd: watchdog command
- *	@arg: argument pointer
- *
- *	The watchdog API defines a common set of functions for all watchdogs
- *	according to their available features.
- *
- *	Used within the file operation of the watchdog device.
- */
-
-static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int rc = 0, status, new_options, new_timeout;
-	union {
-		struct watchdog_info __user *ident;
-		int __user *i;
-	} uarg;
-
-	uarg.i = (int __user *)arg;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		return copy_to_user(uarg.ident,
-				    &ident, sizeof(ident)) ? -EFAULT : 0;
-
-	case WDIOC_GETSTATUS:
-		rc = wdt_get_status(&status);
-		if (rc)
-			return rc;
-		return put_user(status, uarg.i);
-
-	case WDIOC_GETBOOTSTATUS:
-		return put_user(0, uarg.i);
-
-	case WDIOC_KEEPALIVE:
-		wdt_keepalive();
-		return 0;
-
-	case WDIOC_SETOPTIONS:
-		if (get_user(new_options, uarg.i))
-			return -EFAULT;
-
-		switch (new_options) {
-		case WDIOS_DISABLECARD:
-			if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
-				rc = wdt_stop();
-				if (rc)
-					return rc;
-			}
-			clear_bit(WDTS_TIMER_RUN, &wdt_status);
-			return 0;
-
-		case WDIOS_ENABLECARD:
-			if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
-				rc = wdt_start();
-				if (rc) {
-					clear_bit(WDTS_TIMER_RUN, &wdt_status);
-					return rc;
-				}
-			}
-			return 0;
-
-		default:
-			return -EFAULT;
-		}
-
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_timeout, uarg.i))
-			return -EFAULT;
-		rc = wdt_set_timeout(new_timeout);
-	case WDIOC_GETTIMEOUT:
-		if (put_user(timeout, uarg.i))
-			return -EFAULT;
-		return rc;
+static struct watchdog_ops wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = wdt_start,
+	.stop = wdt_stop,
+	.ping = wdt_keepalive,
+	.set_timeout = wdt_set_timeout,
+};
 
-	default:
-		return -ENOTTY;
-	}
-}
+static struct watchdog_device wdt_dev = {
+	.info = &ident,
+	.ops = &wdt_ops,
+	.min_timeout = 1,
+};
 
 static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
-	void *unused)
+			  void *unused)
 {
 	if (code == SYS_DOWN || code == SYS_HALT)
-		wdt_stop();
+		wdt_stop(&wdt_dev);
 	return NOTIFY_DONE;
 }
 
-static const struct file_operations wdt_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.write		= wdt_write,
-	.unlocked_ioctl	= wdt_ioctl,
-	.open		= wdt_open,
-	.release	= wdt_release,
-};
-
-static struct miscdevice wdt_miscdev = {
-	.minor		= WATCHDOG_MINOR,
-	.name		= "watchdog",
-	.fops		= &wdt_fops,
-};
-
 static struct notifier_block wdt_notifier = {
 	.notifier_call = wdt_notify_sys,
 };
@@ -708,19 +473,15 @@ static int __init it87_wdt_init(void)
 	if (timeout > max_units)
 		timeout = wdt_round_time(timeout);
 
+	wdt_dev.timeout = timeout;
+	wdt_dev.max_timeout = max_units * 60;
+
 	rc = register_reboot_notifier(&wdt_notifier);
 	if (rc) {
 		pr_err("Cannot register reboot notifier (err=%d)\n", rc);
 		goto err_out_region;
 	}
 
-	rc = misc_register(&wdt_miscdev);
-	if (rc) {
-		pr_err("Cannot register miscdev on minor=%d (err=%d)\n",
-		       wdt_miscdev.minor, rc);
-		goto err_out_reboot;
-	}
-
 	/* Initialize CIR to use it as keepalive source */
 	if (test_bit(WDTS_USE_CIR, &wdt_status)) {
 		outb(0x00, CIR_RCR(base));
@@ -732,9 +493,15 @@ static int __init it87_wdt_init(void)
 		outb(0x09, CIR_IER(base));
 	}
 
-	pr_info("Chip IT%04x revision %d initialized. timeout=%d sec (nowayout=%d testmode=%d exclusive=%d nogameport=%d nocir=%d)\n",
+	rc = watchdog_register_device(&wdt_dev);
+	if (rc) {
+		pr_err("Cannot register watchdog device (err=%d)\n", rc);
+		goto err_out_reboot;
+	}
+
+	pr_info("Chip IT%04x revision %d initialized. timeout=%d sec (nowayout=%d testmode=%d nogameport=%d nocir=%d)\n",
 		chip_type, chip_rev, timeout,
-		nowayout, testmode, exclusive, nogameport, nocir);
+		nowayout, testmode, nogameport, nocir);
 
 	superio_exit();
 	return 0;
@@ -778,7 +545,7 @@ static void __exit it87_wdt_exit(void)
 		superio_exit();
 	}
 
-	misc_deregister(&wdt_miscdev);
+	watchdog_unregister_device(&wdt_dev);
 	unregister_reboot_notifier(&wdt_notifier);
 
 	if (test_bit(WDTS_USE_GP, &wdt_status))
-- 
GitLab


From 893dc8b5c9785e9ecf5ddfebbd78da6eddcdbac6 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 10 Jun 2017 21:04:34 -0700
Subject: [PATCH 0574/1958] watchdog: it87: Drop support for resetting watchdog
 though CIR and Game port

Resetting the watchdog timer on CIR interrupts or on game port interrupts
is not not supported on recent chips, and doesn't really tell if the system
is stable. On top of that, at least the bit to enable resetting the
watchdog through the game port is used differently on recent chips.
Drop resetting the watchdog on CIR  or game port interrupts to simplify
the code.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig    |   3 -
 drivers/watchdog/it87_wdt.c | 226 +++---------------------------------
 2 files changed, 19 insertions(+), 210 deletions(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index d347bb25ac2ee..e5749515d3ce7 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1075,9 +1075,6 @@ config IT87_WDT
 	  IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726 and IT8728
 	  Super I/O chips.
 
-	  If the driver does not work, then make sure that the game port in
-	  the BIOS is enabled.
-
 	  This watchdog simply watches your kernel to make sure it doesn't
 	  freeze, and if it does, it reboots your computer after a certain
 	  amount of time.
diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index 9a6523e70ffc0..29aea28628c85 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -33,7 +33,6 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/ioport.h>
 #include <linux/watchdog.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
@@ -42,8 +41,6 @@
 #define WATCHDOG_NAME		"IT87 WDT"
 
 /* Defaults for Module Parameter */
-#define DEFAULT_NOGAMEPORT	0
-#define DEFAULT_NOCIR		0
 #define DEFAULT_TIMEOUT		60
 #define DEFAULT_TESTMODE	0
 #define DEFAULT_NOWAYOUT	WATCHDOG_NOWAYOUT
@@ -54,15 +51,11 @@
 
 /* Logical device Numbers LDN */
 #define GPIO		0x07
-#define GAMEPORT	0x09
-#define CIR		0x0a
 
 /* Configuration Registers and Functions */
 #define LDNREG		0x07
 #define CHIPID		0x20
 #define CHIPREV		0x22
-#define ACTREG		0x30
-#define BASEREG		0x60
 
 /* Chip Id numbers */
 #define NO_DEV_ID	0xffff
@@ -84,14 +77,6 @@
 #define WDTVALLSB	0x73
 #define WDTVALMSB	0x74
 
-/* GPIO Bits WDTCTRL */
-#define WDT_CIRINT	0x80
-#define WDT_MOUSEINT	0x40
-#define WDT_KYBINT	0x20
-#define WDT_GAMEPORT	0x10 /* not in it8718, it8720, it8721, it8728 */
-#define WDT_FORCE	0x02
-#define WDT_ZERO	0x01
-
 /* GPIO Bits WDTCFG */
 #define WDT_TOV1	0x80
 #define WDT_KRST	0x40
@@ -99,46 +84,12 @@
 #define WDT_PWROK	0x10 /* not in it8721 */
 #define WDT_INT_MASK	0x0f
 
-/* CIR Configuration Register LDN=0x0a */
-#define CIR_ILS		0x70
-
-/* The default Base address is not always available, we use this */
-#define CIR_BASE	0x0208
-
-/* CIR Controller */
-#define CIR_DR(b)	(b)
-#define CIR_IER(b)	(b + 1)
-#define CIR_RCR(b)	(b + 2)
-#define CIR_TCR1(b)	(b + 3)
-#define CIR_TCR2(b)	(b + 4)
-#define CIR_TSR(b)	(b + 5)
-#define CIR_RSR(b)	(b + 6)
-#define CIR_BDLR(b)	(b + 5)
-#define CIR_BDHR(b)	(b + 6)
-#define CIR_IIR(b)	(b + 7)
-
-/* Default Base address of Game port */
-#define GP_BASE_DEFAULT	0x0201
+static unsigned int max_units, chip_type;
 
-/* wdt_status */
-#define WDTS_USE_GP	0
-#define WDTS_USE_CIR	1
-
-static unsigned int base, gpact, ciract, max_units, chip_type;
-static unsigned long wdt_status;
-
-static int nogameport  = DEFAULT_NOGAMEPORT;
-static int nocir       = DEFAULT_NOCIR;
 static unsigned int timeout = DEFAULT_TIMEOUT;
-static int testmode    = DEFAULT_TESTMODE;
-static bool nowayout    = DEFAULT_NOWAYOUT;
-
-module_param(nogameport, int, 0);
-MODULE_PARM_DESC(nogameport, "Forbid the activation of game port, default="
-		__MODULE_STRING(DEFAULT_NOGAMEPORT));
-module_param(nocir, int, 0);
-MODULE_PARM_DESC(nocir, "Forbid the use of Consumer IR interrupts to reset timer, default="
-		__MODULE_STRING(DEFAULT_NOCIR));
+static int testmode = DEFAULT_TESTMODE;
+static bool nowayout = DEFAULT_NOWAYOUT;
+
 module_param(timeout, int, 0);
 MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds, default="
 		__MODULE_STRING(DEFAULT_TIMEOUT));
@@ -210,29 +161,28 @@ static inline void superio_outw(int val, int reg)
 }
 
 /* Internal function, should be called after superio_select(GPIO) */
-static void _wdt_update_timeout(void)
+static void _wdt_update_timeout(unsigned int t)
 {
 	unsigned char cfg = WDT_KRST;
-	int tm = timeout;
 
 	if (testmode)
 		cfg = 0;
 
-	if (tm <= max_units)
+	if (t <= max_units)
 		cfg |= WDT_TOV1;
 	else
-		tm /= 60;
+		t /= 60;
 
 	if (chip_type != IT8721_ID)
 		cfg |= WDT_PWROK;
 
 	superio_outb(cfg, WDTCFG);
-	superio_outb(tm, WDTVALLSB);
+	superio_outb(t, WDTVALLSB);
 	if (max_units > 255)
-		superio_outb(tm>>8, WDTVALMSB);
+		superio_outb(t >> 8, WDTVALMSB);
 }
 
-static int wdt_update_timeout(void)
+static int wdt_update_timeout(unsigned int t)
 {
 	int ret;
 
@@ -241,7 +191,7 @@ static int wdt_update_timeout(void)
 		return ret;
 
 	superio_select(GPIO);
-	_wdt_update_timeout();
+	_wdt_update_timeout(t);
 	superio_exit();
 
 	return 0;
@@ -256,55 +206,14 @@ static int wdt_round_time(int t)
 
 /* watchdog timer handling */
 
-static int wdt_keepalive(struct watchdog_device *wdd)
-{
-	int ret = 0;
-
-	if (test_bit(WDTS_USE_GP, &wdt_status))
-		inb(base);
-	else if (test_bit(WDTS_USE_CIR, &wdt_status))
-		/* The timer reloads with around 5 msec delay */
-		outb(0x55, CIR_DR(base));
-	else
-		ret = wdt_update_timeout();
-
-	return ret;
-}
-
 static int wdt_start(struct watchdog_device *wdd)
 {
-	int ret = superio_enter();
-	if (ret)
-		return ret;
-
-	superio_select(GPIO);
-	if (test_bit(WDTS_USE_GP, &wdt_status))
-		superio_outb(WDT_GAMEPORT, WDTCTRL);
-	else if (test_bit(WDTS_USE_CIR, &wdt_status))
-		superio_outb(WDT_CIRINT, WDTCTRL);
-
-	_wdt_update_timeout();
-
-	superio_exit();
-
-	return 0;
+	return wdt_update_timeout(wdd->timeout);
 }
 
 static int wdt_stop(struct watchdog_device *wdd)
 {
-	int ret = superio_enter();
-	if (ret)
-		return ret;
-
-	superio_select(GPIO);
-	superio_outb(0x00, WDTCTRL);
-	superio_outb(WDT_TOV1, WDTCFG);
-	superio_outb(0x00, WDTVALLSB);
-	if (max_units > 255)
-		superio_outb(0x00, WDTVALMSB);
-
-	superio_exit();
-	return 0;
+	return wdt_update_timeout(0);
 }
 
 /**
@@ -327,7 +236,7 @@ static int wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
 	wdd->timeout = t;
 
 	if (watchdog_hw_running(wdd))
-		ret = wdt_update_timeout();
+		ret = wdt_update_timeout(t);
 
 	return ret;
 }
@@ -342,7 +251,6 @@ static struct watchdog_ops wdt_ops = {
 	.owner = THIS_MODULE,
 	.start = wdt_start,
 	.stop = wdt_stop,
-	.ping = wdt_keepalive,
 	.set_timeout = wdt_set_timeout,
 };
 
@@ -366,12 +274,8 @@ static struct notifier_block wdt_notifier = {
 
 static int __init it87_wdt_init(void)
 {
-	int rc = 0;
-	int try_gameport = !nogameport;
 	u8  chip_rev;
-	int gp_rreq_fail = 0;
-
-	wdt_status = 0;
+	int rc;
 
 	rc = superio_enter();
 	if (rc)
@@ -399,7 +303,6 @@ static int __init it87_wdt_init(void)
 	case IT8728_ID:
 	case IT8783_ID:
 		max_units = 65535;
-		try_gameport = 0;
 		break;
 	case IT8705_ID:
 		pr_err("Unsupported Chip found, Chip %04x Revision %02x\n",
@@ -421,48 +324,7 @@ static int __init it87_wdt_init(void)
 	superio_select(GPIO);
 	superio_outb(WDT_TOV1, WDTCFG);
 	superio_outb(0x00, WDTCTRL);
-
-	/* First try to get Gameport support */
-	if (try_gameport) {
-		superio_select(GAMEPORT);
-		base = superio_inw(BASEREG);
-		if (!base) {
-			base = GP_BASE_DEFAULT;
-			superio_outw(base, BASEREG);
-		}
-		gpact = superio_inb(ACTREG);
-		superio_outb(0x01, ACTREG);
-		if (request_region(base, 1, WATCHDOG_NAME))
-			set_bit(WDTS_USE_GP, &wdt_status);
-		else
-			gp_rreq_fail = 1;
-	}
-
-	/* If we haven't Gameport support, try to get CIR support */
-	if (!nocir && !test_bit(WDTS_USE_GP, &wdt_status)) {
-		if (!request_region(CIR_BASE, 8, WATCHDOG_NAME)) {
-			if (gp_rreq_fail)
-				pr_err("I/O Address 0x%04x and 0x%04x already in use\n",
-				       base, CIR_BASE);
-			else
-				pr_err("I/O Address 0x%04x already in use\n",
-				       CIR_BASE);
-			rc = -EIO;
-			goto err_out;
-		}
-		base = CIR_BASE;
-
-		superio_select(CIR);
-		superio_outw(base, BASEREG);
-		superio_outb(0x00, CIR_ILS);
-		ciract = superio_inb(ACTREG);
-		superio_outb(0x01, ACTREG);
-		if (gp_rreq_fail) {
-			superio_select(GAMEPORT);
-			superio_outb(gpact, ACTREG);
-		}
-		set_bit(WDTS_USE_CIR, &wdt_status);
-	}
+	superio_exit();
 
 	if (timeout < 1 || timeout > max_units * 60) {
 		timeout = DEFAULT_TIMEOUT;
@@ -479,18 +341,7 @@ static int __init it87_wdt_init(void)
 	rc = register_reboot_notifier(&wdt_notifier);
 	if (rc) {
 		pr_err("Cannot register reboot notifier (err=%d)\n", rc);
-		goto err_out_region;
-	}
-
-	/* Initialize CIR to use it as keepalive source */
-	if (test_bit(WDTS_USE_CIR, &wdt_status)) {
-		outb(0x00, CIR_RCR(base));
-		outb(0xc0, CIR_TCR1(base));
-		outb(0x5c, CIR_TCR2(base));
-		outb(0x10, CIR_IER(base));
-		outb(0x00, CIR_BDHR(base));
-		outb(0x01, CIR_BDLR(base));
-		outb(0x09, CIR_IER(base));
+		return rc;
 	}
 
 	rc = watchdog_register_device(&wdt_dev);
@@ -499,59 +350,20 @@ static int __init it87_wdt_init(void)
 		goto err_out_reboot;
 	}
 
-	pr_info("Chip IT%04x revision %d initialized. timeout=%d sec (nowayout=%d testmode=%d nogameport=%d nocir=%d)\n",
-		chip_type, chip_rev, timeout,
-		nowayout, testmode, nogameport, nocir);
+	pr_info("Chip IT%04x revision %d initialized. timeout=%d sec (nowayout=%d testmode=%d)\n",
+		chip_type, chip_rev, timeout, nowayout, testmode);
 
-	superio_exit();
 	return 0;
 
 err_out_reboot:
 	unregister_reboot_notifier(&wdt_notifier);
-err_out_region:
-	if (test_bit(WDTS_USE_GP, &wdt_status))
-		release_region(base, 1);
-	else if (test_bit(WDTS_USE_CIR, &wdt_status)) {
-		release_region(base, 8);
-		superio_select(CIR);
-		superio_outb(ciract, ACTREG);
-	}
-err_out:
-	if (try_gameport) {
-		superio_select(GAMEPORT);
-		superio_outb(gpact, ACTREG);
-	}
-
-	superio_exit();
 	return rc;
 }
 
 static void __exit it87_wdt_exit(void)
 {
-	if (superio_enter() == 0) {
-		superio_select(GPIO);
-		superio_outb(0x00, WDTCTRL);
-		superio_outb(0x00, WDTCFG);
-		superio_outb(0x00, WDTVALLSB);
-		if (max_units > 255)
-			superio_outb(0x00, WDTVALMSB);
-		if (test_bit(WDTS_USE_GP, &wdt_status)) {
-			superio_select(GAMEPORT);
-			superio_outb(gpact, ACTREG);
-		} else if (test_bit(WDTS_USE_CIR, &wdt_status)) {
-			superio_select(CIR);
-			superio_outb(ciract, ACTREG);
-		}
-		superio_exit();
-	}
-
 	watchdog_unregister_device(&wdt_dev);
 	unregister_reboot_notifier(&wdt_notifier);
-
-	if (test_bit(WDTS_USE_GP, &wdt_status))
-		release_region(base, 1);
-	else if (test_bit(WDTS_USE_CIR, &wdt_status))
-		release_region(base, 8);
 }
 
 module_init(it87_wdt_init);
-- 
GitLab


From 1123c514b11201758674e2b34f297b94509d81db Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 10 Jun 2017 21:04:35 -0700
Subject: [PATCH 0575/1958] watchdog: it87: Use infrastructure to stop watchdog
 on reboot

Use watchdog_stop_on_reboot() to stop the watchdog on reboot instead
of registering a driver-specific notifier.

While at it, reorder remaining include files alphabetically.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/it87_wdt.c | 34 +++++-----------------------------
 1 file changed, 5 insertions(+), 29 deletions(-)

diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index 29aea28628c85..07f4727eb7f4a 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -28,15 +28,13 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/watchdog.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-#include <linux/io.h>
 
 #define WATCHDOG_NAME		"IT87 WDT"
 
@@ -260,18 +258,6 @@ static struct watchdog_device wdt_dev = {
 	.min_timeout = 1,
 };
 
-static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
-			  void *unused)
-{
-	if (code == SYS_DOWN || code == SYS_HALT)
-		wdt_stop(&wdt_dev);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block wdt_notifier = {
-	.notifier_call = wdt_notify_sys,
-};
-
 static int __init it87_wdt_init(void)
 {
 	u8  chip_rev;
@@ -338,32 +324,22 @@ static int __init it87_wdt_init(void)
 	wdt_dev.timeout = timeout;
 	wdt_dev.max_timeout = max_units * 60;
 
-	rc = register_reboot_notifier(&wdt_notifier);
-	if (rc) {
-		pr_err("Cannot register reboot notifier (err=%d)\n", rc);
-		return rc;
-	}
-
+	watchdog_stop_on_reboot(&wdt_dev);
 	rc = watchdog_register_device(&wdt_dev);
 	if (rc) {
 		pr_err("Cannot register watchdog device (err=%d)\n", rc);
-		goto err_out_reboot;
+		return rc;
 	}
 
 	pr_info("Chip IT%04x revision %d initialized. timeout=%d sec (nowayout=%d testmode=%d)\n",
 		chip_type, chip_rev, timeout, nowayout, testmode);
 
 	return 0;
-
-err_out_reboot:
-	unregister_reboot_notifier(&wdt_notifier);
-	return rc;
 }
 
 static void __exit it87_wdt_exit(void)
 {
 	watchdog_unregister_device(&wdt_dev);
-	unregister_reboot_notifier(&wdt_notifier);
 }
 
 module_init(it87_wdt_init);
-- 
GitLab


From cddda07c7b31828f08c18f5898df0e457d280ada Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 10 Jun 2017 21:04:36 -0700
Subject: [PATCH 0576/1958] watchdog: it87: Add support for various Super-IO
 chips

Add support for IT8607, IT8622, IT8625, IT8628, IT8655, IT8665,
and IT8686.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig    |  7 ++++---
 drivers/watchdog/it87_wdt.c | 19 +++++++++++++++++--
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index e5749515d3ce7..5ca9fb7c11802 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1071,9 +1071,10 @@ config IT87_WDT
 	depends on X86
 	select WATCHDOG_CORE
 	---help---
-	  This is the driver for the hardware watchdog on the ITE IT8620,
-	  IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726 and IT8728
-	  Super I/O chips.
+	  This is the driver for the hardware watchdog on the ITE IT8607,
+	  IT8620, IT8622, IT8625, IT8628, IT8655, IT8665, IT8686, IT8702,
+	  IT8712, IT8716, IT8718, IT8720, IT8721, IT8726, IT8728, and
+	  IT8783 Super I/O chips.
 
 	  This watchdog simply watches your kernel to make sure it doesn't
 	  freeze, and if it does, it reboots your computer after a certain
diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index 07f4727eb7f4a..dd1e7eaef50fc 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -12,8 +12,9 @@
  *		    http://www.ite.com.tw/
  *
  *	Support of the watchdog timers, which are available on
- *	IT8620, IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726,
- *	IT8728 and IT8783.
+ *	IT8607, IT8620, IT8622, IT8625, IT8628, IT8655, IT8665, IT8686,
+ *	IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726, IT8728,
+ *	and IT8783.
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -57,7 +58,14 @@
 
 /* Chip Id numbers */
 #define NO_DEV_ID	0xffff
+#define IT8607_ID	0x8607
 #define IT8620_ID	0x8620
+#define IT8622_ID	0x8622
+#define IT8625_ID	0x8625
+#define IT8628_ID	0x8628
+#define IT8655_ID	0x8655
+#define IT8665_ID	0x8665
+#define IT8686_ID	0x8686
 #define IT8702_ID	0x8702
 #define IT8705_ID	0x8705
 #define IT8712_ID	0x8712
@@ -282,7 +290,14 @@ static int __init it87_wdt_init(void)
 	case IT8726_ID:
 		max_units = 65535;
 		break;
+	case IT8607_ID:
 	case IT8620_ID:
+	case IT8622_ID:
+	case IT8625_ID:
+	case IT8628_ID:
+	case IT8655_ID:
+	case IT8665_ID:
+	case IT8686_ID:
 	case IT8718_ID:
 	case IT8720_ID:
 	case IT8721_ID:
-- 
GitLab


From 65360944c19814a6d3c0a58c5a983f7198d29c51 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 7 Jun 2017 15:04:15 +0530
Subject: [PATCH 0577/1958] watchdog: meson: Handle return value of
 clk_prepare_enable

clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/meson_gxbb_wdt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c
index 45d47664a00a1..69a5a57f14462 100644
--- a/drivers/watchdog/meson_gxbb_wdt.c
+++ b/drivers/watchdog/meson_gxbb_wdt.c
@@ -203,7 +203,9 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
 	if (IS_ERR(data->clk))
 		return PTR_ERR(data->clk);
 
-	clk_prepare_enable(data->clk);
+	ret = clk_prepare_enable(data->clk);
+	if (ret)
+		return ret;
 
 	platform_set_drvdata(pdev, data);
 
-- 
GitLab


From 8f11eb58ad5747bab622910cc2efaa45e6a21c1f Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 6 Jun 2017 15:47:53 +0530
Subject: [PATCH 0578/1958] watchdog: davinci: Handle return value of
 clk_prepare_enable

clk_prepare_enable() can fail here and we must check its return value.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/davinci_wdt.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index 0e731d797a2a3..1ba9ead7d75a3 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -173,7 +173,11 @@ static int davinci_wdt_probe(struct platform_device *pdev)
 		return PTR_ERR(davinci_wdt->clk);
 	}
 
-	clk_prepare_enable(davinci_wdt->clk);
+	ret = clk_prepare_enable(davinci_wdt->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare clock\n");
+		return ret;
+	}
 
 	platform_set_drvdata(pdev, davinci_wdt);
 
-- 
GitLab


From 737bcff5faaeea121ccf7c5885507d438c60f06b Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 6 Jun 2017 16:08:31 +0530
Subject: [PATCH 0579/1958] watchdog: davinci: Add missing
 clk_disable_unprepare().

davinci_wdt_probe() can fail here and we must disable clock.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/davinci_wdt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index 1ba9ead7d75a3..2f46487af86d0 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -202,8 +202,10 @@ static int davinci_wdt_probe(struct platform_device *pdev)
 		return PTR_ERR(davinci_wdt->base);
 
 	ret = watchdog_register_device(wdd);
-	if (ret < 0)
+	if (ret < 0) {
+		clk_disable_unprepare(davinci_wdt->clk);
 		dev_err(dev, "cannot register watchdog device\n");
+	}
 
 	return ret;
 }
-- 
GitLab


From 81667e9c8ad827365f2d1e8b924caf062a19c593 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 3 Jul 2017 13:54:28 +0300
Subject: [PATCH 0580/1958] mtd: nand: mtk: release lock on error path

We only want to hold the lock on the success path, not this error path.

Fixes: 7ec4a37c5d71 ("mtd: nand: mediatek: add support for different MTK NAND FLASH Controller IP")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/mtk_ecc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index f38e2bb1953e9..6c3a4aab0b487 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c
@@ -273,8 +273,10 @@ int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 	mtk_ecc_wait_idle(ecc, op);
 
 	ret = mtk_ecc_config(ecc, config);
-	if (ret)
+	if (ret) {
+		mutex_unlock(&ecc->lock);
 		return ret;
+	}
 
 	if (config->mode != ECC_NFI_MODE || op != ECC_ENCODE) {
 		init_completion(&ecc->done);
-- 
GitLab


From d34f1f4800439c8330f05e9f1133317f3b1f8c38 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 12 Jun 2017 11:50:26 +0530
Subject: [PATCH 0581/1958] watchdog: bcm47xx_wdt: constify
 bcm47xx_wdt_hard_ops and bcm47xx_wdt_soft_ops

File size before:
   text	   data	    bss	    dec	    hex	filename
   1282	    388	      1	   1671	    687	drivers/watchdog/bcm47xx_wdt.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   1474	    196	      1	   1671	    687	drivers/watchdog/bcm47xx_wdt.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/bcm47xx_wdt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c
index 35725e21b18a6..236582809336b 100644
--- a/drivers/watchdog/bcm47xx_wdt.c
+++ b/drivers/watchdog/bcm47xx_wdt.c
@@ -97,7 +97,7 @@ static int bcm47xx_wdt_restart(struct watchdog_device *wdd,
 	return 0;
 }
 
-static struct watchdog_ops bcm47xx_wdt_hard_ops = {
+static const struct watchdog_ops bcm47xx_wdt_hard_ops = {
 	.owner		= THIS_MODULE,
 	.start		= bcm47xx_wdt_hard_start,
 	.stop		= bcm47xx_wdt_hard_stop,
@@ -168,7 +168,7 @@ static const struct watchdog_info bcm47xx_wdt_info = {
 				WDIOF_MAGICCLOSE,
 };
 
-static struct watchdog_ops bcm47xx_wdt_soft_ops = {
+static const struct watchdog_ops bcm47xx_wdt_soft_ops = {
 	.owner		= THIS_MODULE,
 	.start		= bcm47xx_wdt_soft_start,
 	.stop		= bcm47xx_wdt_soft_stop,
-- 
GitLab


From 935988c67406f35e63873ac63e2962664ee99bdc Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 12 Jun 2017 12:02:39 +0530
Subject: [PATCH 0582/1958] watchdog: zx2967: constify zx2967_wdt_ops.

File size before:
   text	   data	    bss	    dec	    hex	filename
    988	    288	      0	   1276	    4fc	drivers/watchdog/zx2967_wdt.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   1084	    192	      0	   1276	    4fc	drivers/watchdog/zx2967_wdt.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/zx2967_wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/zx2967_wdt.c b/drivers/watchdog/zx2967_wdt.c
index c98252733c306..69ec5855584b1 100644
--- a/drivers/watchdog/zx2967_wdt.c
+++ b/drivers/watchdog/zx2967_wdt.c
@@ -154,7 +154,7 @@ static const struct watchdog_info zx2967_wdt_ident = {
 	.identity         =	"zx2967 watchdog",
 };
 
-static struct watchdog_ops zx2967_wdt_ops = {
+static const struct watchdog_ops zx2967_wdt_ops = {
 	.owner = THIS_MODULE,
 	.start = zx2967_wdt_start,
 	.stop = zx2967_wdt_stop,
-- 
GitLab


From 011e29e7d93d80c2529ba17109bc4e5d031ea2b1 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 20 Jun 2017 11:04:26 +0530
Subject: [PATCH 0583/1958] watchdog: cadence_wdt: make of_device_ids const.

of_device_ids are not supposed to change at runtime. All functions
working with of_device_ids provided by <linux/of.h> work with const
of_device_ids. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   1962	    612	      4	   2578	    a12	drivers/watchdog/cadence_wdt.o

File size after constify cdns_wdt_of_match:
   text	   data	    bss	    dec	    hex	filename
   2378	    196	      4	   2578	    a12	drivers/watchdog/cadence_wdt.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/cadence_wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c
index 86e0b5d2e7616..05c000081e9da 100644
--- a/drivers/watchdog/cadence_wdt.c
+++ b/drivers/watchdog/cadence_wdt.c
@@ -458,7 +458,7 @@ static int __maybe_unused cdns_wdt_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(cdns_wdt_pm_ops, cdns_wdt_suspend, cdns_wdt_resume);
 
-static struct of_device_id cdns_wdt_of_match[] = {
+static const struct of_device_id cdns_wdt_of_match[] = {
 	{ .compatible = "cdns,wdt-r1p2", },
 	{ /* end of table */ }
 };
-- 
GitLab


From e75bf0ea77e9778e9a1975eb9b38698babcbde63 Mon Sep 17 00:00:00 2001
From: Keiji Hayashibara <hayashibara.keiji@socionext.com>
Date: Wed, 14 Jun 2017 16:53:43 +0900
Subject: [PATCH 0584/1958] dt-bindings: watchdog: add description for UniPhier
 WDT controller

Add uniphier-wdt dt-bindings documentation.

Signed-off-by: Keiji Hayashibara <hayashibara.keiji@socionext.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 .../bindings/watchdog/uniphier-wdt.txt        | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt

diff --git a/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt b/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt
new file mode 100644
index 0000000000000..bf6337546dd18
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt
@@ -0,0 +1,20 @@
+UniPhier watchdog timer controller
+
+This UniPhier watchdog timer controller must be under sysctrl node.
+
+Required properties:
+- compatible: should be "socionext,uniphier-wdt"
+
+Example:
+
+	sysctrl@61840000 {
+		compatible = "socionext,uniphier-ld11-sysctrl",
+			     "simple-mfd", "syscon";
+		reg = <0x61840000 0x4000>;
+
+		watchdog {
+			compatible = "socionext,uniphier-wdt";
+		}
+
+		other nodes ...
+	};
-- 
GitLab


From e7bf02895f06c0603af800c4bfce3ca4ede9147d Mon Sep 17 00:00:00 2001
From: Keiji Hayashibara <hayashibara.keiji@socionext.com>
Date: Wed, 14 Jun 2017 16:53:44 +0900
Subject: [PATCH 0585/1958] watchdog: uniphier: add UniPhier watchdog driver

Add a watchdog driver for Socionext UniPhier series SoC.
Note that the timeout value for this device must be a power
of 2 because of the specification.

Signed-off-by: Keiji Hayashibara <hayashibara.keiji@socionext.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 .../watchdog/watchdog-parameters.txt          |   6 +
 drivers/watchdog/Kconfig                      |  12 +
 drivers/watchdog/Makefile                     |   1 +
 drivers/watchdog/uniphier_wdt.c               | 268 ++++++++++++++++++
 4 files changed, 287 insertions(+)
 create mode 100644 drivers/watchdog/uniphier_wdt.c

diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 914518aeb9728..b3526365ea8e3 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -369,6 +369,12 @@ timeout: Watchdog timeout in seconds. (0<timeout<N, default=60)
 nowayout: Watchdog cannot be stopped once started
 	(default=kernel config parameter)
 -------------------------------------------------
+uniphier_wdt:
+timeout: Watchdog timeout in power of two seconds.
+	(1 <= timeout <= 128, default=64)
+nowayout: Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+-------------------------------------------------
 w83627hf_wdt:
 wdt_io: w83627hf/thf WDT io port (default 0x2E)
 timeout: Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 5ca9fb7c11802..07146b103ed24 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -775,6 +775,18 @@ config STM32_WATCHDOG
 	  To compile this driver as a module, choose M here: the
 	  module will be called stm32_iwdg.
 
+config UNIPHIER_WATCHDOG
+	tristate "UniPhier watchdog support"
+	depends on ARCH_UNIPHIER || COMPILE_TEST
+	depends on OF && MFD_SYSCON
+	select WATCHDOG_CORE
+	help
+	  Say Y here to include support watchdog timer embedded
+	  into the UniPhier system.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called uniphier_wdt.
+
 # AVR32 Architecture
 
 config AT32AP700X_WDT
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 3aafd997917a4..56adf9fa67d04 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_RENESAS_RZAWDT) += rza_wdt.o
 obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
 obj-$(CONFIG_ZX2967_WATCHDOG) += zx2967_wdt.o
 obj-$(CONFIG_STM32_WATCHDOG) += stm32_iwdg.o
+obj-$(CONFIG_UNIPHIER_WATCHDOG) += uniphier_wdt.o
 
 # AVR32 Architecture
 obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
diff --git a/drivers/watchdog/uniphier_wdt.c b/drivers/watchdog/uniphier_wdt.c
new file mode 100644
index 0000000000000..0ea2339d97028
--- /dev/null
+++ b/drivers/watchdog/uniphier_wdt.c
@@ -0,0 +1,268 @@
+/*
+ * Watchdog driver for the UniPhier watchdog timer
+ *
+ * (c) Copyright 2014 Panasonic Corporation
+ * (c) Copyright 2016 Socionext Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/bitops.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/watchdog.h>
+
+/* WDT timer setting register */
+#define WDTTIMSET			0x3004
+#define   WDTTIMSET_PERIOD_MASK		(0xf << 0)
+#define   WDTTIMSET_PERIOD_1_SEC	(0x3 << 0)
+
+/* WDT reset selection register */
+#define WDTRSTSEL			0x3008
+#define   WDTRSTSEL_RSTSEL_MASK		(0x3 << 0)
+#define   WDTRSTSEL_RSTSEL_BOTH		(0x0 << 0)
+#define   WDTRSTSEL_RSTSEL_IRQ_ONLY	(0x2 << 0)
+
+/* WDT control register */
+#define WDTCTRL				0x300c
+#define   WDTCTRL_STATUS		BIT(8)
+#define   WDTCTRL_CLEAR			BIT(1)
+#define   WDTCTRL_ENABLE		BIT(0)
+
+#define SEC_TO_WDTTIMSET_PRD(sec) \
+		(ilog2(sec) + WDTTIMSET_PERIOD_1_SEC)
+
+#define WDTST_TIMEOUT			1000 /* usec */
+
+#define WDT_DEFAULT_TIMEOUT		64   /* Default is 64 seconds */
+#define WDT_PERIOD_MIN			1
+#define WDT_PERIOD_MAX			128
+
+static unsigned int timeout = 0;
+static bool nowayout = WATCHDOG_NOWAYOUT;
+
+struct uniphier_wdt_dev {
+	struct watchdog_device wdt_dev;
+	struct regmap	*regmap;
+};
+
+/*
+ * UniPhier Watchdog operations
+ */
+static int uniphier_watchdog_ping(struct watchdog_device *w)
+{
+	struct uniphier_wdt_dev *wdev = watchdog_get_drvdata(w);
+	unsigned int val;
+	int ret;
+
+	/* Clear counter */
+	ret = regmap_write_bits(wdev->regmap, WDTCTRL,
+				WDTCTRL_CLEAR, WDTCTRL_CLEAR);
+	if (!ret)
+		/*
+		 * As SoC specification, after clear counter,
+		 * it needs to wait until counter status is 1.
+		 */
+		ret = regmap_read_poll_timeout(wdev->regmap, WDTCTRL, val,
+					       (val & WDTCTRL_STATUS),
+					       0, WDTST_TIMEOUT);
+
+	return ret;
+}
+
+static int __uniphier_watchdog_start(struct regmap *regmap, unsigned int sec)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read_poll_timeout(regmap, WDTCTRL, val,
+				       !(val & WDTCTRL_STATUS),
+				       0, WDTST_TIMEOUT);
+	if (ret)
+		return ret;
+
+	/* Setup period */
+	ret = regmap_write(regmap, WDTTIMSET,
+			   SEC_TO_WDTTIMSET_PRD(sec));
+	if (ret)
+		return ret;
+
+	/* Enable and clear watchdog */
+	ret = regmap_write(regmap, WDTCTRL, WDTCTRL_ENABLE | WDTCTRL_CLEAR);
+	if (!ret)
+		/*
+		 * As SoC specification, after clear counter,
+		 * it needs to wait until counter status is 1.
+		 */
+		ret = regmap_read_poll_timeout(regmap, WDTCTRL, val,
+					       (val & WDTCTRL_STATUS),
+					       0, WDTST_TIMEOUT);
+
+	return ret;
+}
+
+static int __uniphier_watchdog_stop(struct regmap *regmap)
+{
+	/* Disable and stop watchdog */
+	return regmap_write_bits(regmap, WDTCTRL, WDTCTRL_ENABLE, 0);
+}
+
+static int __uniphier_watchdog_restart(struct regmap *regmap, unsigned int sec)
+{
+	int ret;
+
+	ret = __uniphier_watchdog_stop(regmap);
+	if (ret)
+		return ret;
+
+	return __uniphier_watchdog_start(regmap, sec);
+}
+
+static int uniphier_watchdog_start(struct watchdog_device *w)
+{
+	struct uniphier_wdt_dev *wdev = watchdog_get_drvdata(w);
+	unsigned int tmp_timeout;
+
+	tmp_timeout = roundup_pow_of_two(w->timeout);
+
+	return __uniphier_watchdog_start(wdev->regmap, tmp_timeout);
+}
+
+static int uniphier_watchdog_stop(struct watchdog_device *w)
+{
+	struct uniphier_wdt_dev *wdev = watchdog_get_drvdata(w);
+
+	return __uniphier_watchdog_stop(wdev->regmap);
+}
+
+static int uniphier_watchdog_set_timeout(struct watchdog_device *w,
+					 unsigned int t)
+{
+	struct uniphier_wdt_dev *wdev = watchdog_get_drvdata(w);
+	unsigned int tmp_timeout;
+	int ret;
+
+	tmp_timeout = roundup_pow_of_two(t);
+	if (tmp_timeout == w->timeout)
+		return 0;
+
+	if (watchdog_active(w)) {
+		ret = __uniphier_watchdog_restart(wdev->regmap, tmp_timeout);
+		if (ret)
+			return ret;
+	}
+
+	w->timeout = tmp_timeout;
+
+	return 0;
+}
+
+/*
+ * Kernel Interfaces
+ */
+static const struct watchdog_info uniphier_wdt_info = {
+	.identity	= "uniphier-wdt",
+	.options	= WDIOF_SETTIMEOUT |
+			  WDIOF_KEEPALIVEPING |
+			  WDIOF_MAGICCLOSE |
+			  WDIOF_OVERHEAT,
+};
+
+static const struct watchdog_ops uniphier_wdt_ops = {
+	.owner		= THIS_MODULE,
+	.start		= uniphier_watchdog_start,
+	.stop		= uniphier_watchdog_stop,
+	.ping		= uniphier_watchdog_ping,
+	.set_timeout	= uniphier_watchdog_set_timeout,
+};
+
+static int uniphier_wdt_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct uniphier_wdt_dev *wdev;
+	struct regmap *regmap;
+	struct device_node *parent;
+	int ret;
+
+	wdev = devm_kzalloc(dev, sizeof(*wdev), GFP_KERNEL);
+	if (!wdev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, wdev);
+
+	parent = of_get_parent(dev->of_node); /* parent should be syscon node */
+	regmap = syscon_node_to_regmap(parent);
+	of_node_put(parent);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	wdev->regmap = regmap;
+	wdev->wdt_dev.info = &uniphier_wdt_info;
+	wdev->wdt_dev.ops = &uniphier_wdt_ops;
+	wdev->wdt_dev.max_timeout = WDT_PERIOD_MAX;
+	wdev->wdt_dev.min_timeout = WDT_PERIOD_MIN;
+	wdev->wdt_dev.parent = dev;
+
+	if (watchdog_init_timeout(&wdev->wdt_dev, timeout, dev) < 0) {
+		wdev->wdt_dev.timeout = WDT_DEFAULT_TIMEOUT;
+	}
+	watchdog_set_nowayout(&wdev->wdt_dev, nowayout);
+	watchdog_stop_on_reboot(&wdev->wdt_dev);
+
+	watchdog_set_drvdata(&wdev->wdt_dev, wdev);
+
+	uniphier_watchdog_stop(&wdev->wdt_dev);
+	ret = regmap_write(wdev->regmap, WDTRSTSEL, WDTRSTSEL_RSTSEL_BOTH);
+	if (ret)
+		return ret;
+
+	ret = devm_watchdog_register_device(dev, &wdev->wdt_dev);
+	if (ret)
+		return ret;
+
+	dev_info(dev, "watchdog driver (timeout=%d sec, nowayout=%d)\n",
+		 wdev->wdt_dev.timeout, nowayout);
+
+	return 0;
+}
+
+static const struct of_device_id uniphier_wdt_dt_ids[] = {
+	{ .compatible = "socionext,uniphier-wdt" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, uniphier_wdt_dt_ids);
+
+static struct platform_driver uniphier_wdt_driver = {
+	.probe		= uniphier_wdt_probe,
+	.driver		= {
+		.name		= "uniphier-wdt",
+		.of_match_table	= uniphier_wdt_dt_ids,
+	},
+};
+
+module_platform_driver(uniphier_wdt_driver);
+
+module_param(timeout, uint, 0000);
+MODULE_PARM_DESC(timeout,
+	"Watchdog timeout seconds in power of 2. (0 < timeout < 128, default="
+				__MODULE_STRING(WDT_DEFAULT_TIMEOUT) ")");
+
+module_param(nowayout, bool, 0000);
+MODULE_PARM_DESC(nowayout,
+	"Watchdog cannot be stopped once started (default="
+				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+MODULE_AUTHOR("Keiji Hayashibara <hayashibara.keiji@socionext.com>");
+MODULE_DESCRIPTION("UniPhier Watchdog Device Driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From c013b65ad8a1e132f733404809afc72f7d00e768 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Tue, 30 May 2017 10:56:45 +0200
Subject: [PATCH 0586/1958] watchdog: introduce watchdog_worker_should_ping
 helper

This will be useful when the condition becomes slightly more
complicated in the next patch.

Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Reviewed-by: Esben Haabendal <esben@haabendal.dk>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/watchdog_dev.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 4bc7ab60b12c4..0826e663bd5a3 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -195,18 +195,23 @@ static int watchdog_ping(struct watchdog_device *wdd)
 	return __watchdog_ping(wdd);
 }
 
+static bool watchdog_worker_should_ping(struct watchdog_core_data *wd_data)
+{
+	struct watchdog_device *wdd = wd_data->wdd;
+
+	return wdd && (watchdog_active(wdd) || watchdog_hw_running(wdd));
+}
+
 static void watchdog_ping_work(struct work_struct *work)
 {
 	struct watchdog_core_data *wd_data;
-	struct watchdog_device *wdd;
 
 	wd_data = container_of(to_delayed_work(work), struct watchdog_core_data,
 			       work);
 
 	mutex_lock(&wd_data->lock);
-	wdd = wd_data->wdd;
-	if (wdd && (watchdog_active(wdd) || watchdog_hw_running(wdd)))
-		__watchdog_ping(wdd);
+	if (watchdog_worker_should_ping(wd_data))
+		__watchdog_ping(wd_data->wdd);
 	mutex_unlock(&wd_data->lock);
 }
 
-- 
GitLab


From ce3f7163e4ce8fd583dcb36b6ee6b81fd1b419ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 26 Jun 2017 23:30:51 +0300
Subject: [PATCH 0587/1958] drm/i915: Disable MSI for all pre-gen5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have pretty clear evidence that MSIs are getting lost on g4x and
somehow the interrupt logic doesn't seem to recover from that state
even if we try hard to clear the IIR.

Disabling IER around the normal IIR clearing in the irq handler isn't
sufficient to avoid this, so the problem really seems to be further
up the interrupt chain. This should guarantee that there's always
an edge if any IIR bits are set after the interrupt handler is done,
which should normally guarantee that the CPU interrupt is generated.
That approach seems to work perfectly on VLV/CHV, but apparently
not on g4x.

MSI is documented to be broken on 965gm at least. The chipset spec
says MSI is defeatured because interrupts can be delayed or lost,
which fits well with what we're seeing on g4x. Previously we've
already disabled GMBUS interrupts on g4x because somehow GMBUS
manages to raise legacy interrupts even when MSI is enabled.

Since there's such widespread MSI breakahge all over in the pre-gen5
land let's just give up on MSI on these platforms.

Seqno reporting might be negatively affected by this since the legcy
interrupts aren't guaranteed to be ordered with the seqno writes,
whereas MSI interrupts may be? But an occasioanlly missed seqno
seems like a small price to pay for generally working interrupts.

Cc: stable@vger.kernel.org
Cc: Diego Viola <diego.viola@gmail.com>
Tested-by: Diego Viola <diego.viola@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101261
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170626203051.28480-1-ville.syrjala@linux.intel.com
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
(cherry picked from commit e38c2da01f76cca82b59ca612529b81df82a7cc7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ee2325b180e75..fc307e03943c8 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1132,10 +1132,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	 * and the registers being closely associated.
 	 *
 	 * According to chipset errata, on the 965GM, MSI interrupts may
-	 * be lost or delayed, but we use them anyways to avoid
-	 * stuck interrupts on some machines.
+	 * be lost or delayed, and was defeatured. MSI interrupts seem to
+	 * get lost on g4x as well, and interrupt delivery seems to stay
+	 * properly dead afterwards. So we'll just disable them for all
+	 * pre-gen5 chipsets.
 	 */
-	if (!IS_I945G(dev_priv) && !IS_I945GM(dev_priv)) {
+	if (INTEL_GEN(dev_priv) >= 5) {
 		if (pci_enable_msi(pdev) < 0)
 			DRM_DEBUG_DRIVER("can't enable MSI");
 	}
-- 
GitLab


From 1a13a2ec3eb360c80e1ea88b9b7616fa64d4e278 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 27 Jun 2017 07:38:54 +0200
Subject: [PATCH 0588/1958] drm/i915: Fix an error checking test

'dma_buf_vmap' returns NULL on error, not an error pointer.

Fixes: 6cca22ede8a4 ("drm/i915: Add some mock tests for dmabuf interop")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: http://patchwork.freedesktop.org/patch/msgid/20170627053854.21152-1-christophe.jaillet@wanadoo.fr
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 7c3f5317b8c2828ab10e8cf87c8ab5232d1966d0)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
index d15cc9d3a5cd1..89dc25a5a53b8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
@@ -246,9 +246,9 @@ static int igt_dmabuf_export_vmap(void *arg)
 	i915_gem_object_put(obj);
 
 	ptr = dma_buf_vmap(dmabuf);
-	if (IS_ERR(ptr)) {
-		err = PTR_ERR(ptr);
-		pr_err("dma_buf_vmap failed with err=%d\n", err);
+	if (!ptr) {
+		pr_err("dma_buf_vmap failed\n");
+		err = -ENOMEM;
 		goto out;
 	}
 
-- 
GitLab


From 9c75b185274b7766fe69c2e73607c1ed780b284b Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Date: Wed, 28 Jun 2017 18:06:05 -0300
Subject: [PATCH 0589/1958] drm/i915: reintroduce VLV/CHV PFI programming power
 domain workaround
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are still cases on these platforms where an attempt is made to
configure the CDCLK while the power domain is off, like when coming back
from a suspend.  So the workaround below is still needed.

This effectively reverts commit 63ff30442519 ("drm/i915: Nuke the
VLV/CHV PFI programming power domain workaround").

Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101517
Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170628210605.4994-1-krisman@collabora.co.uk
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit 886015a0ad43c7fc034b23ea4614ba39162f9ddd)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_cdclk.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index b8914db7d2e1b..1241e5891b295 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -491,6 +491,14 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	int cdclk = cdclk_state->cdclk;
 	u32 val, cmd;
 
+	/* There are cases where we can end up here with power domains
+	 * off and a CDCLK frequency other than the minimum, like when
+	 * issuing a modeset without actually changing any display after
+	 * a system suspend.  So grab the PIPE-A domain, which covers
+	 * the HW blocks needed for the following programming.
+	 */
+	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
+
 	if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */
 		cmd = 2;
 	else if (cdclk == 266667)
@@ -549,6 +557,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	intel_update_cdclk(dev_priv);
 
 	vlv_program_pfi_credits(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
 }
 
 static void chv_set_cdclk(struct drm_i915_private *dev_priv,
@@ -568,6 +578,14 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 		return;
 	}
 
+	/* There are cases where we can end up here with power domains
+	 * off and a CDCLK frequency other than the minimum, like when
+	 * issuing a modeset without actually changing any display after
+	 * a system suspend.  So grab the PIPE-A domain, which covers
+	 * the HW blocks needed for the following programming.
+	 */
+	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
+
 	/*
 	 * Specs are full of misinformation, but testing on actual
 	 * hardware has shown that we just need to write the desired
@@ -590,6 +608,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	intel_update_cdclk(dev_priv);
 
 	vlv_program_pfi_credits(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
 }
 
 static int bdw_calc_cdclk(int max_pixclk)
-- 
GitLab


From 4ec654bf3a63d503e3c5336eade5c369ae17db56 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 29 Jun 2017 16:04:25 +0100
Subject: [PATCH 0590/1958] drm/i915: Avoid undefined behaviour of "u32 >> 32"

When computing a hash for looking up relocation target handles in an
execbuf, we start with a large size for the hashtable and proceed to
halve it until the allocation succeeds. The final attempt is with an
order of 0 (i.e. a single element). This means that we then pass bits=0
to hash_32() which then computes "hash >> (32 - 0)" to lookup the single
element. Right shifting a value by the width of the operand is
undefined, so limit the smallest hash table we use to order 1.

v2: Keep the retry allocation flag for the final pass

Fixes: 4ff4b44cbb70 ("drm/i915: Store a direct lookup from object handle to vma")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170629150425.27508-1-chris@chris-wilson.co.uk
(cherry picked from commit 4d470f7359c4bf22518baa30700ad45649371a22)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 38 ++++++++++++++--------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9337446f10682..054b2e54cdafd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -288,20 +288,26 @@ static int eb_create(struct i915_execbuffer *eb)
 		 * direct lookup.
 		 */
 		do {
+			unsigned int flags;
+
+			/* While we can still reduce the allocation size, don't
+			 * raise a warning and allow the allocation to fail.
+			 * On the last pass though, we want to try as hard
+			 * as possible to perform the allocation and warn
+			 * if it fails.
+			 */
+			flags = GFP_TEMPORARY;
+			if (size > 1)
+				flags |= __GFP_NORETRY | __GFP_NOWARN;
+
 			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
-					      GFP_TEMPORARY |
-					      __GFP_NORETRY |
-					      __GFP_NOWARN);
+					      flags);
 			if (eb->buckets)
 				break;
 		} while (--size);
 
-		if (unlikely(!eb->buckets)) {
-			eb->buckets = kzalloc(sizeof(struct hlist_head),
-					      GFP_TEMPORARY);
-			if (unlikely(!eb->buckets))
-				return -ENOMEM;
-		}
+		if (unlikely(!size))
+			return -ENOMEM;
 
 		eb->lut_size = size;
 	} else {
@@ -452,7 +458,7 @@ eb_add_vma(struct i915_execbuffer *eb,
 			return err;
 	}
 
-	if (eb->lut_size >= 0) {
+	if (eb->lut_size > 0) {
 		vma->exec_handle = entry->handle;
 		hlist_add_head(&vma->exec_node,
 			       &eb->buckets[hash_32(entry->handle,
@@ -894,7 +900,7 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 static void eb_reset_vmas(const struct i915_execbuffer *eb)
 {
 	eb_release_vmas(eb);
-	if (eb->lut_size >= 0)
+	if (eb->lut_size > 0)
 		memset(eb->buckets, 0,
 		       sizeof(struct hlist_head) << eb->lut_size);
 }
@@ -903,7 +909,7 @@ static void eb_destroy(const struct i915_execbuffer *eb)
 {
 	GEM_BUG_ON(eb->reloc_cache.rq);
 
-	if (eb->lut_size >= 0)
+	if (eb->lut_size > 0)
 		kfree(eb->buckets);
 }
 
@@ -2180,8 +2186,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
-	if (eb_create(&eb))
-		return -ENOMEM;
+	err = eb_create(&eb);
+	if (err)
+		goto err_out_fence;
+
+	GEM_BUG_ON(!eb.lut_size);
 
 	/*
 	 * Take a local wakeref for preparing to dispatch the execbuf as
@@ -2340,6 +2349,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_rpm:
 	intel_runtime_pm_put(eb.i915);
 	eb_destroy(&eb);
+err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
 err_in_fence:
-- 
GitLab


From 2347728934b501b07bea8b1b6e8fa27a56dc098f Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Mon, 19 Jun 2017 14:21:47 -0700
Subject: [PATCH 0591/1958] drm/i915/cfl: Fix Workarounds.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During the review of Coffee Lake workarounds Mika pointed out
that WaDisableKillLogic and GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC
should be removed from CFL and with that I should carry the rv-b.

However when doing the v2 I removed another Workaround that should
remain because although not mentioned by spec the history of hangs
around it advocates on its favor.

On some follow-up patches I continued operating on the wrong
workardound, but Ville noticed that, so here is the fix for the
current CFL code that is upstream already.

Fixes: 46c26662d2f ("drm/i915/cfl: Introduce Coffee Lake workarounds.")
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
(cherry picked from commit 98eed3d1ade53596e1c8785e049f03da4480a820)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a4487c5b7e37d..5b4de719bec3d 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -821,9 +821,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
 		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
-	/* WaDisableKillLogic:bxt,skl,kbl,cfl */
-	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-		   ECOCHK_DIS_TLB);
+	/* WaDisableKillLogic:bxt,skl,kbl */
+	if (!IS_COFFEELAKE(dev_priv))
+		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+			   ECOCHK_DIS_TLB);
 
 	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
@@ -894,10 +895,9 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
 			  HDC_FORCE_NON_COHERENT);
 
-	/* WaDisableHDCInvalidation:skl,bxt,kbl */
-	if (!IS_COFFEELAKE(dev_priv))
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-			   BDW_DISABLE_HDC_INVALIDATION);
+	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
+	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+		   BDW_DISABLE_HDC_INVALIDATION);
 
 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
 	if (IS_SKYLAKE(dev_priv) ||
-- 
GitLab


From c379b897ba1a7f786419e8c36a3438ce856f016a Mon Sep 17 00:00:00 2001
From: "Navare, Manasi D" <manasi.d.navare@intel.com>
Date: Thu, 29 Jun 2017 18:14:01 -0700
Subject: [PATCH 0592/1958] drm/i915/cnl: Fix the CURSOR_COEFF_MASK used in DDI
 Vswing Programming

The Cursor Coeff is lower 6 bits in the PORT_TX_DW4 register
and hence the CURSOR_COEFF_MASK should be (0x3F << 0)

Fixes: 04416108ccea ("drm/i915/cnl: Add registers related to voltage
swing sequences.")
Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1498785241-21138-1-git-send-email-manasi.d.navare@intel.com

(cherry picked from commit fcace3b9b727e25ffa3f7ad2c96e76b8584a9f3e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c8647cfa81baa..64cc674b652ac 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1802,7 +1802,7 @@ enum skl_disp_power_wells {
 #define   POST_CURSOR_2(x)		((x) << 6)
 #define   POST_CURSOR_2_MASK		(0x3F << 6)
 #define   CURSOR_COEFF(x)		((x) << 0)
-#define   CURSOR_COEFF_MASK		(0x3F << 6)
+#define   CURSOR_COEFF_MASK		(0x3F << 0)
 
 #define _CNL_PORT_TX_DW5_GRP_AE		0x162354
 #define _CNL_PORT_TX_DW5_GRP_B		0x1623D4
-- 
GitLab


From 04941829b0049d2446c7042ab9686dd057d809a6 Mon Sep 17 00:00:00 2001
From: "sagar.a.kamble@intel.com" <sagar.a.kamble@intel.com>
Date: Tue, 27 Jun 2017 23:09:41 +0530
Subject: [PATCH 0593/1958] drm/i915: Hold RPM wakelock while initializing OA
 buffer

OA buffer initialization involves access to HW registers to set
the OA base, head and tail. Ensure device is awake while setting
these. With this, all oa.ops are covered under RPM and forcewake
wakelock.

Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1498585181-23048-1-git-send-email-sagar.a.kamble@intel.com
Fixes: d79651522e89c ("drm/i915: Enable i915 perf stream for Haswell OA unit")
Cc: <stable@vger.kernel.org> # v4.11+
(cherry picked from commit 987f8c444aa2c33d98e7030d0c5f0a5325cc84ea)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 38c44407bafc5..9cd22f83b0cfa 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2067,10 +2067,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 			return ret;
 	}
 
-	ret = alloc_oa_buffer(dev_priv);
-	if (ret)
-		goto err_oa_buf_alloc;
-
 	/* PRM - observability performance counters:
 	 *
 	 *   OACONTROL, performance counter enable, note:
@@ -2086,6 +2082,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	intel_runtime_pm_get(dev_priv);
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
+	ret = alloc_oa_buffer(dev_priv);
+	if (ret)
+		goto err_oa_buf_alloc;
+
 	ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv);
 	if (ret)
 		goto err_enable;
@@ -2097,11 +2097,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	return 0;
 
 err_enable:
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv);
 	free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_runtime_pm_put(dev_priv);
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-- 
GitLab


From 3950c95e2c39b998abcc9702005b5ba7c7f70d32 Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Mon, 3 Jul 2017 18:37:10 +0200
Subject: [PATCH 0594/1958] power: supply: twl4030-charger: move irq allocation
 to just before irqs are enabled

And initialize workers and notifiers as soon as possible.

This avoids a potential race if irqs are enabled and triggered too early
before the worker is properly set up.

Suggested-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/twl4030_charger.c | 29 +++++++++++++-------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index 3de802f169a16..9fad387e50817 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -980,12 +980,27 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, bci);
 
+	INIT_WORK(&bci->work, twl4030_bci_usb_work);
+	INIT_DELAYED_WORK(&bci->current_worker, twl4030_current_worker);
+
 	bci->channel_vac = devm_iio_channel_get(&pdev->dev, "vac");
 	if (IS_ERR(bci->channel_vac)) {
 		bci->channel_vac = NULL;
 		dev_warn(&pdev->dev, "could not request vac iio channel");
 	}
 
+	if (bci->dev->of_node) {
+		struct device_node *phynode;
+
+		phynode = of_find_compatible_node(bci->dev->of_node->parent,
+						  NULL, "ti,twl4030-usb");
+		if (phynode) {
+			bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
+			bci->transceiver = devm_usb_get_phy_by_node(
+				bci->dev, phynode, &bci->usb_nb);
+		}
+	}
+
 	bci->ac = devm_power_supply_register(&pdev->dev, &twl4030_bci_ac_desc,
 					     NULL);
 	if (IS_ERR(bci->ac)) {
@@ -1019,20 +1034,6 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	INIT_WORK(&bci->work, twl4030_bci_usb_work);
-	INIT_DELAYED_WORK(&bci->current_worker, twl4030_current_worker);
-
-	bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
-	if (bci->dev->of_node) {
-		struct device_node *phynode;
-
-		phynode = of_find_compatible_node(bci->dev->of_node->parent,
-						  NULL, "ti,twl4030-usb");
-		if (phynode)
-			bci->transceiver = devm_usb_get_phy_by_node(
-				bci->dev, phynode, &bci->usb_nb);
-	}
-
 	/* Enable interrupts now. */
 	reg = ~(u32)(TWL4030_ICHGLOW | TWL4030_ICHGEOC | TWL4030_TBATOR2 |
 		TWL4030_TBATOR1 | TWL4030_BATSTS);
-- 
GitLab


From 7bb5a650f3471e4052e1692b29b0f3d3ca099c47 Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Mon, 3 Jul 2017 18:37:11 +0200
Subject: [PATCH 0595/1958] power: supply: twl4030-charger: add deferred
 probing for phy and iio

This fixes an issue if both this twl4030_charger driver and
phy-twl4030-usb are compiled as modules and loaded in random order.

It has been observed on GTA04 and OpenPandora devices that in worst
case the boot process hangs and in best case the AC detection fails
with a warning.

Therefore we add deferred probing checks for the usb_phy and the
iio channel for AC detection.

Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/twl4030_charger.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index 9fad387e50817..9dff1b4b85fc3 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -985,8 +985,12 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 
 	bci->channel_vac = devm_iio_channel_get(&pdev->dev, "vac");
 	if (IS_ERR(bci->channel_vac)) {
+		ret = PTR_ERR(bci->channel_vac);
+		if (ret == -EPROBE_DEFER)
+			return ret;	/* iio not ready */
+		dev_warn(&pdev->dev, "could not request vac iio channel (%d)",
+			ret);
 		bci->channel_vac = NULL;
-		dev_warn(&pdev->dev, "could not request vac iio channel");
 	}
 
 	if (bci->dev->of_node) {
@@ -998,6 +1002,14 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 			bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
 			bci->transceiver = devm_usb_get_phy_by_node(
 				bci->dev, phynode, &bci->usb_nb);
+			if (IS_ERR(bci->transceiver)) {
+				ret = PTR_ERR(bci->transceiver);
+				if (ret == -EPROBE_DEFER)
+					return ret;	/* phy not ready */
+				dev_warn(&pdev->dev, "could not request transceiver (%d)",
+					ret);
+				bci->transceiver = NULL;
+			}
 		}
 	}
 
-- 
GitLab


From 468138d78510688fb5476f98d23f11ac6a63229a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 2 May 2017 19:52:17 -0400
Subject: [PATCH 0596/1958] binfmt_flat: flat_{get,put}_addr_from_rp() should
 be able to fail

on MMU targets EFAULT is possible here.  Make both return 0 or error,
passing what used to be the return value of flat_get_addr_from_rp()
by reference.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm/include/asm/flat.h        | 25 ++++++++++++++++---
 arch/blackfin/include/asm/flat.h   | 25 +++++++++++--------
 arch/blackfin/kernel/flat.c        | 13 +++++-----
 arch/c6x/include/asm/flat.h        | 15 ++++++++++--
 arch/h8300/include/asm/flat.h      | 24 +++++++++++++-----
 arch/m32r/include/asm/flat.h       | 19 ++++++---------
 arch/m68k/include/asm/flat.h       | 23 +++++++++++++++---
 arch/microblaze/include/asm/flat.h | 34 +++++++++++++-------------
 arch/sh/include/asm/flat.h         | 15 ++++++++++--
 arch/xtensa/include/asm/flat.h     | 15 ++++++++++--
 fs/binfmt_flat.c                   | 39 +++++++++++++++++-------------
 include/linux/flat.h               |  2 +-
 12 files changed, 168 insertions(+), 81 deletions(-)

diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h
index acf1d14b89a65..29d3a1524bce8 100644
--- a/arch/arm/include/asm/flat.h
+++ b/arch/arm/include/asm/flat.h
@@ -5,12 +5,31 @@
 #ifndef __ARM_FLAT_H__
 #define __ARM_FLAT_H__
 
+#include <linux/uaccess.h>
+
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, persistent) \
-	({ unsigned long __val; __get_user_unaligned(__val, rp); __val; })
-#define	flat_put_addr_at_rp(rp, val, relval)	__put_user_unaligned(val, rp)
+
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return copy_from_user(addr, rp, 4) ? -EFAULT : 0;
+#else
+	return get_user(*addr, rp);
+#endif
+}
+
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return copy_to_user(rp, &addr, 4) ? -EFAULT : 0;
+#else
+	return put_user(addr, rp);
+#endif
+}
+
 #define	flat_get_relocate_addr(rel)		(rel)
 #define	flat_set_persistent(relval, p)		0
 
diff --git a/arch/blackfin/include/asm/flat.h b/arch/blackfin/include/asm/flat.h
index c1314c56dd189..296d7f56fbfd0 100644
--- a/arch/blackfin/include/asm/flat.h
+++ b/arch/blackfin/include/asm/flat.h
@@ -14,23 +14,28 @@
 #define	flat_argvp_envp_on_stack()		0
 #define	flat_old_ram_flag(flags)		(flags)
 
-extern unsigned long bfin_get_addr_from_rp (unsigned long *ptr,
-					unsigned long relval,
-					unsigned long flags,
-					unsigned long *persistent);
+extern unsigned long bfin_get_addr_from_rp (u32 *ptr, u32 relval,
+					u32 flags, u32 *persistent);
 
-extern void bfin_put_addr_at_rp(unsigned long *ptr, unsigned long addr,
-		                unsigned long relval);
+extern void bfin_put_addr_at_rp(u32 *ptr, u32 addr, u32 relval);
 
 /* The amount by which a relocation can exceed the program image limits
    without being regarded as an error.  */
 
 #define	flat_reloc_valid(reloc, size)	((reloc) <= (size))
 
-#define	flat_get_addr_from_rp(rp, relval, flags, persistent)	\
-	bfin_get_addr_from_rp(rp, relval, flags, persistent)
-#define	flat_put_addr_at_rp(rp, val, relval)	\
-	bfin_put_addr_at_rp(rp, val, relval)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	*addr = bfin_get_addr_from_rp(rp, relval, flags, persistent);
+	return 0;
+}
+
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 val, u32 relval)
+{
+	bfin_put_addr_at_rp(rp, val, relval);
+	return 0;
+}
 
 /* Convert a relocation entry into an address.  */
 static inline unsigned long
diff --git a/arch/blackfin/kernel/flat.c b/arch/blackfin/kernel/flat.c
index b5b6584496164..d29ab6a2e909f 100644
--- a/arch/blackfin/kernel/flat.c
+++ b/arch/blackfin/kernel/flat.c
@@ -13,14 +13,14 @@
 #define FLAT_BFIN_RELOC_TYPE_16H_BIT 1
 #define FLAT_BFIN_RELOC_TYPE_32_BIT 2
 
-unsigned long bfin_get_addr_from_rp(unsigned long *ptr,
-		unsigned long relval,
-		unsigned long flags,
-		unsigned long *persistent)
+unsigned long bfin_get_addr_from_rp(u32 *ptr,
+		u32 relval,
+		u32 flags,
+		u32 *persistent)
 {
 	unsigned short *usptr = (unsigned short *)ptr;
 	int type = (relval >> 26) & 7;
-	unsigned long val;
+	u32 val;
 
 	switch (type) {
 	case FLAT_BFIN_RELOC_TYPE_16_BIT:
@@ -59,8 +59,7 @@ EXPORT_SYMBOL(bfin_get_addr_from_rp);
  * Insert the address ADDR into the symbol reference at RP;
  * RELVAL is the raw relocation-table entry from which RP is derived
  */
-void bfin_put_addr_at_rp(unsigned long *ptr, unsigned long addr,
-		unsigned long relval)
+void bfin_put_addr_at_rp(u32 *ptr, u32 addr, u32 relval)
 {
 	unsigned short *usptr = (unsigned short *)ptr;
 	int type = (relval >> 26) & 7;
diff --git a/arch/c6x/include/asm/flat.h b/arch/c6x/include/asm/flat.h
index a1858bd5f6c8a..6f1feb00bd52b 100644
--- a/arch/c6x/include/asm/flat.h
+++ b/arch/c6x/include/asm/flat.h
@@ -1,11 +1,22 @@
 #ifndef __ASM_C6X_FLAT_H
 #define __ASM_C6X_FLAT_H
 
+#include <asm/unaligned.h>
+
 #define flat_argvp_envp_on_stack()			0
 #define flat_old_ram_flag(flags)			(flags)
 #define flat_reloc_valid(reloc, size)			((reloc) <= (size))
-#define flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
-#define flat_put_addr_at_rp(rp, val, relval)		put_unaligned(val, rp)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	*addr = get_unaligned((__force u32 *)rp);
+	return 0;
+}
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+	put_unaligned(addr, (__force u32 *)rp);
+	return 0;
+}
 #define flat_get_relocate_addr(rel)			(rel)
 #define flat_set_persistent(relval, p)			0
 
diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h
index a4898eccf2bf6..18d024251738d 100644
--- a/arch/h8300/include/asm/flat.h
+++ b/arch/h8300/include/asm/flat.h
@@ -5,6 +5,8 @@
 #ifndef __H8300_FLAT_H__
 #define __H8300_FLAT_H__
 
+#include <asm/unaligned.h>
+
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		1
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
@@ -18,11 +20,21 @@
  */
 
 #define	flat_get_relocate_addr(rel)		(rel & ~0x00000001)
-#define flat_get_addr_from_rp(rp, relval, flags, persistent) \
-	({(void)persistent; \
-		get_unaligned(rp) & (((flags) & FLAT_FLAG_GOTPIC) ?	\
-				     0xffffffff : 0x00ffffff); })
-#define flat_put_addr_at_rp(rp, addr, rel) \
-	put_unaligned(((*(char *)(rp)) << 24) | ((addr) & 0x00ffffff), (rp))
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	u32 val = get_unaligned((__force u32 *)rp);
+	if (!(flags & FLAT_FLAG_GOTPIC)
+		val &= 0x00ffffff;
+	*addr = val;
+	return 0;
+}
+
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+	u32 *p = (__force u32 *)rp;
+	put_unaligned((addr & 0x00ffffff) | (*(char *)p << 24), p);
+	return 0;
+}
 
 #endif /* __H8300_FLAT_H__ */
diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h
index 5d711c4688fb9..455ce7ddbf142 100644
--- a/arch/m32r/include/asm/flat.h
+++ b/arch/m32r/include/asm/flat.h
@@ -17,11 +17,6 @@
 #define	flat_set_persistent(relval, p)		0
 #define	flat_reloc_valid(reloc, size)		\
 	(((reloc) - textlen_for_m32r_lo16_data) <= (size))
-#define flat_get_addr_from_rp(rp, relval, flags, persistent) \
-	m32r_flat_get_addr_from_rp(rp, relval, (text_len) )
-
-#define flat_put_addr_at_rp(rp, addr, relval) \
-	m32r_flat_put_addr_at_rp(rp, addr, relval)
 
 /* Convert a relocation entry into an address.  */
 static inline unsigned long
@@ -57,9 +52,9 @@ flat_get_relocate_addr (unsigned long relval)
 
 static unsigned long textlen_for_m32r_lo16_data = 0;
 
-static inline unsigned long m32r_flat_get_addr_from_rp (unsigned long *rp,
-                                                        unsigned long relval,
-						        unsigned long textlen)
+static inline unsigned long m32r_flat_get_addr_from_rp (u32 *rp,
+                                                        u32 relval,
+						        u32 textlen)
 {
         unsigned int reloc = flat_m32r_get_reloc_type (relval);
 	textlen_for_m32r_lo16_data = 0;
@@ -100,9 +95,7 @@ static inline unsigned long m32r_flat_get_addr_from_rp (unsigned long *rp,
 	return ~0;      /* bogus value */
 }
 
-static inline void m32r_flat_put_addr_at_rp (unsigned long *rp,
-					     unsigned long addr,
-                                             unsigned long relval)
+static inline void flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval)
 {
         unsigned int reloc = flat_m32r_get_reloc_type (relval);
 	if (reloc & 0xf0) {
@@ -142,4 +135,8 @@ static inline void m32r_flat_put_addr_at_rp (unsigned long *rp,
 	}
 }
 
+// kludge - text_len is a local variable in the only user.
+#define flat_get_addr_from_rp(rp, relval, flags, addr, persistent) \
+	(m32r_flat_get_addr_from_rp(rp, relval, text_len), 0)
+
 #endif /* __ASM_M32R_FLAT_H */
diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h
index 00c392b0cabdb..48b62790fe70f 100644
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -5,12 +5,29 @@
 #ifndef __M68KNOMMU_FLAT_H__
 #define __M68KNOMMU_FLAT_H__
 
+#include <linux/uaccess.h>
+
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, p) \
-	({ unsigned long __val; __get_user_unaligned(__val, rp); __val; })
-#define	flat_put_addr_at_rp(rp, val, relval)	__put_user_unaligned(val, rp)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
+	return copy_from_user(addr, rp, 4) ? -EFAULT : 0;
+#else
+	return get_user(*addr, rp);
+#endif
+}
+
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
+	return copy_to_user(rp, &addr, 4) ? -EFAULT : 0;
+#else
+	return put_user(addr, rp);
+#endif
+}
 #define	flat_get_relocate_addr(rel)		(rel)
 
 static inline int flat_set_persistent(unsigned long relval,
diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index 6847c1512c7ba..f23c3d266bae3 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -32,29 +32,27 @@
  * reference
  */
 
-static inline unsigned long
-flat_get_addr_from_rp(unsigned long *rp, unsigned long relval,
-			unsigned long flags, unsigned long *persistent)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
 {
-	unsigned long addr;
-	(void)flags;
+	u32 *p = (__force u32 *)rp;
 
 	/* Is it a split 64/32 reference? */
 	if (relval & 0x80000000) {
 		/* Grab the two halves of the reference */
-		unsigned long val_hi, val_lo;
+		u32 val_hi, val_lo;
 
-		val_hi = get_unaligned(rp);
-		val_lo = get_unaligned(rp+1);
+		val_hi = get_unaligned(p);
+		val_lo = get_unaligned(p+1);
 
 		/* Crack the address out */
-		addr = ((val_hi & 0xffff) << 16) + (val_lo & 0xffff);
+		*addr = ((val_hi & 0xffff) << 16) + (val_lo & 0xffff);
 	} else {
 		/* Get the address straight out */
-		addr = get_unaligned(rp);
+		*addr = get_unaligned(p);
 	}
 
-	return addr;
+	return 0;
 }
 
 /*
@@ -63,25 +61,27 @@ flat_get_addr_from_rp(unsigned long *rp, unsigned long relval,
  */
 
 static inline void
-flat_put_addr_at_rp(unsigned long *rp, unsigned long addr, unsigned long relval)
+flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 relval)
 {
+	u32 *p = (__force u32 *)rp;
 	/* Is this a split 64/32 reloc? */
 	if (relval & 0x80000000) {
 		/* Get the two "halves" */
-		unsigned long val_hi = get_unaligned(rp);
-		unsigned long val_lo = get_unaligned(rp + 1);
+		unsigned long val_hi = get_unaligned(p);
+		unsigned long val_lo = get_unaligned(p + 1);
 
 		/* insert the address */
 		val_hi = (val_hi & 0xffff0000) | addr >> 16;
 		val_lo = (val_lo & 0xffff0000) | (addr & 0xffff);
 
 		/* store the two halves back into memory */
-		put_unaligned(val_hi, rp);
-		put_unaligned(val_lo, rp+1);
+		put_unaligned(val_hi, p);
+		put_unaligned(val_lo, p+1);
 	} else {
 		/* Put it straight in, no messing around */
-		put_unaligned(addr, rp);
+		put_unaligned(addr, p);
 	}
+	return 0;
 }
 
 #define	flat_get_relocate_addr(rel)	(rel & 0x7fffffff)
diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h
index 5d84df5e27f65..275fcae235392 100644
--- a/arch/sh/include/asm/flat.h
+++ b/arch/sh/include/asm/flat.h
@@ -12,11 +12,22 @@
 #ifndef __ASM_SH_FLAT_H
 #define __ASM_SH_FLAT_H
 
+#include <asm/unaligned.h>
+
 #define	flat_argvp_envp_on_stack()		0
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
-#define	flat_put_addr_at_rp(rp, val, relval)	put_unaligned(val,rp)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	*addr = get_unaligned((__force u32 *)rp);
+	return 0;
+}
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+	put_unaligned(addr, (__force u32 *)rp);
+	return 0;
+}
 #define	flat_get_relocate_addr(rel)		(rel)
 #define	flat_set_persistent(relval, p)		({ (void)p; 0; })
 
diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h
index 94c44abf15e44..60e0d6a457956 100644
--- a/arch/xtensa/include/asm/flat.h
+++ b/arch/xtensa/include/asm/flat.h
@@ -1,11 +1,22 @@
 #ifndef __ASM_XTENSA_FLAT_H
 #define __ASM_XTENSA_FLAT_H
 
+#include <asm/unaligned.h>
+
 #define flat_argvp_envp_on_stack()			0
 #define flat_old_ram_flag(flags)			(flags)
 #define flat_reloc_valid(reloc, size)			((reloc) <= (size))
-#define flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
-#define flat_put_addr_at_rp(rp, val, relval	)	put_unaligned(val, rp)
+static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
+					u32 *addr, u32 *persistent)
+{
+	*addr = get_unaligned((__force u32 *)rp);
+	return 0;
+}
+static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
+{
+	put_unaligned(addr, (__force u32 *)rp);
+	return 0;
+}
 #define flat_get_relocate_addr(rel)			(rel)
 #define flat_set_persistent(relval, p)			0
 
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2edcefc0a2949..69ec23daa25e0 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -422,9 +422,9 @@ static int load_flat_file(struct linux_binprm *bprm,
 {
 	struct flat_hdr *hdr;
 	unsigned long textpos, datapos, realdatastart;
-	unsigned long text_len, data_len, bss_len, stack_len, full_data, flags;
+	u32 text_len, data_len, bss_len, stack_len, full_data, flags;
 	unsigned long len, memp, memp_size, extra, rlim;
-	unsigned long __user *reloc, *rp;
+	u32 __user *reloc, *rp;
 	struct inode *inode;
 	int i, rev, relocs;
 	loff_t fpos;
@@ -596,13 +596,13 @@ static int load_flat_file(struct linux_binprm *bprm,
 			goto err;
 		}
 
-		reloc = (unsigned long __user *)
+		reloc = (u32 __user *)
 			(datapos + (ntohl(hdr->reloc_start) - text_len));
 		memp = realdatastart;
 		memp_size = len;
 	} else {
 
-		len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
+		len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
 		len = PAGE_ALIGN(len);
 		textpos = vm_mmap(NULL, 0, len,
 			PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
@@ -618,10 +618,10 @@ static int load_flat_file(struct linux_binprm *bprm,
 
 		realdatastart = textpos + ntohl(hdr->data_start);
 		datapos = ALIGN(realdatastart +
-				MAX_SHARED_LIBS * sizeof(unsigned long),
+				MAX_SHARED_LIBS * sizeof(u32),
 				FLAT_DATA_ALIGN);
 
-		reloc = (unsigned long __user *)
+		reloc = (u32 __user *)
 			(datapos + (ntohl(hdr->reloc_start) - text_len));
 		memp = textpos;
 		memp_size = len;
@@ -694,7 +694,7 @@ static int load_flat_file(struct linux_binprm *bprm,
 			ret = result;
 			pr_err("Unable to read code+data+bss, errno %d\n", ret);
 			vm_munmap(textpos, text_len + data_len + extra +
-				MAX_SHARED_LIBS * sizeof(unsigned long));
+				MAX_SHARED_LIBS * sizeof(u32));
 			goto err;
 		}
 	}
@@ -754,8 +754,8 @@ static int load_flat_file(struct linux_binprm *bprm,
 	 * image.
 	 */
 	if (flags & FLAT_FLAG_GOTPIC) {
-		for (rp = (unsigned long __user *)datapos; ; rp++) {
-			unsigned long addr, rp_val;
+		for (rp = (u32 __user *)datapos; ; rp++) {
+			u32 addr, rp_val;
 			if (get_user(rp_val, rp))
 				return -EFAULT;
 			if (rp_val == 0xffffffff)
@@ -784,9 +784,9 @@ static int load_flat_file(struct linux_binprm *bprm,
 	 * __start to address 4 so that is okay).
 	 */
 	if (rev > OLD_FLAT_VERSION) {
-		unsigned long __maybe_unused persistent = 0;
+		u32 __maybe_unused persistent = 0;
 		for (i = 0; i < relocs; i++) {
-			unsigned long addr, relval;
+			u32 addr, relval;
 
 			/*
 			 * Get the address of the pointer to be
@@ -799,15 +799,18 @@ static int load_flat_file(struct linux_binprm *bprm,
 			if (flat_set_persistent(relval, &persistent))
 				continue;
 			addr = flat_get_relocate_addr(relval);
-			rp = (unsigned long __user *)calc_reloc(addr, libinfo, id, 1);
-			if (rp == (unsigned long __user *)RELOC_FAILED) {
+			rp = (u32 __user *)calc_reloc(addr, libinfo, id, 1);
+			if (rp == (u32 __user *)RELOC_FAILED) {
 				ret = -ENOEXEC;
 				goto err;
 			}
 
 			/* Get the pointer's value.  */
-			addr = flat_get_addr_from_rp(rp, relval, flags,
-							&persistent);
+			ret = flat_get_addr_from_rp(rp, relval, flags,
+							&addr, &persistent);
+			if (unlikely(ret))
+				goto err;
+
 			if (addr != 0) {
 				/*
 				 * Do the relocation.  PIC relocs in the data section are
@@ -822,12 +825,14 @@ static int load_flat_file(struct linux_binprm *bprm,
 				}
 
 				/* Write back the relocated pointer.  */
-				flat_put_addr_at_rp(rp, addr, relval);
+				ret = flat_put_addr_at_rp(rp, addr, relval);
+				if (unlikely(ret))
+					goto err;
 			}
 		}
 	} else {
 		for (i = 0; i < relocs; i++) {
-			unsigned long relval;
+			u32 relval;
 			if (get_user(relval, reloc + i))
 				return -EFAULT;
 			relval = ntohl(relval);
diff --git a/include/linux/flat.h b/include/linux/flat.h
index 2c1eb15c4ba4d..7d542dfd0def5 100644
--- a/include/linux/flat.h
+++ b/include/linux/flat.h
@@ -9,8 +9,8 @@
 #ifndef _LINUX_FLAT_H
 #define _LINUX_FLAT_H
 
-#include <asm/flat.h>
 #include <uapi/linux/flat.h>
+#include <asm/flat.h>
 
 /*
  * While it would be nice to keep this header clean,  users of older
-- 
GitLab


From 3170d8d226c2053355f3946b4b5ded4c006fe6d4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 2 May 2017 20:06:33 -0400
Subject: [PATCH 0597/1958] kill {__,}{get,put}_user_unaligned()

no users left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm/include/asm/uaccess.h          |   7 -
 arch/arm64/include/asm/uaccess.h        |   4 -
 arch/ia64/include/asm/uaccess.h         |  36 ---
 arch/m68k/include/asm/uaccess.h         |   7 -
 arch/mips/include/asm/uaccess.h         | 277 ------------------------
 arch/parisc/include/asm/uaccess.h       |   1 -
 arch/powerpc/include/asm/uaccess.h      |   3 -
 arch/s390/include/asm/uaccess.h         |   3 -
 arch/sparc/include/asm/uaccess_64.h     |   1 -
 arch/tile/include/asm/uaccess.h         |   1 -
 arch/x86/include/asm/uaccess.h          |   3 -
 include/asm-generic/uaccess-unaligned.h |  26 ---
 12 files changed, 369 deletions(-)
 delete mode 100644 include/asm-generic/uaccess-unaligned.h

diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 2577405d082da..0726091a89646 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -17,13 +17,6 @@
 #include <asm/unified.h>
 #include <asm/compiler.h>
 
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#include <asm-generic/uaccess-unaligned.h>
-#else
-#define __get_user_unaligned __get_user
-#define __put_user_unaligned __put_user
-#endif
-
 #include <asm/extable.h>
 
 /*
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 7b8a04789cef6..f2b465e129dea 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -254,8 +254,6 @@ do {									\
 	(void)0;							\
 })
 
-#define __get_user_unaligned __get_user
-
 #define get_user(x, ptr)						\
 ({									\
 	__typeof__(*(ptr)) __user *__p = (ptr);				\
@@ -320,8 +318,6 @@ do {									\
 	(void)0;							\
 })
 
-#define __put_user_unaligned __put_user
-
 #define put_user(x, ptr)						\
 ({									\
 	__typeof__(*(ptr)) __user *__p = (ptr);				\
diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
index 82a7646c4416a..a217bcfe6700a 100644
--- a/arch/ia64/include/asm/uaccess.h
+++ b/arch/ia64/include/asm/uaccess.h
@@ -87,42 +87,6 @@ static inline int __access_ok(const void __user *p, unsigned long size)
 #define __put_user(x, ptr)	__put_user_nocheck((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr)))
 #define __get_user(x, ptr)	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
 
-extern long __put_user_unaligned_unknown (void);
-
-#define __put_user_unaligned(x, ptr)								\
-({												\
-	long __ret;										\
-	switch (sizeof(*(ptr))) {								\
-		case 1: __ret = __put_user((x), (ptr)); break;					\
-		case 2: __ret = (__put_user((x), (u8 __user *)(ptr)))				\
-			| (__put_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break;		\
-		case 4: __ret = (__put_user((x), (u16 __user *)(ptr)))				\
-			| (__put_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break;		\
-		case 8: __ret = (__put_user((x), (u32 __user *)(ptr)))				\
-			| (__put_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break;		\
-		default: __ret = __put_user_unaligned_unknown();				\
-	}											\
-	__ret;											\
-})
-
-extern long __get_user_unaligned_unknown (void);
-
-#define __get_user_unaligned(x, ptr)								\
-({												\
-	long __ret;										\
-	switch (sizeof(*(ptr))) {								\
-		case 1: __ret = __get_user((x), (ptr)); break;					\
-		case 2: __ret = (__get_user((x), (u8 __user *)(ptr)))				\
-			| (__get_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break;		\
-		case 4: __ret = (__get_user((x), (u16 __user *)(ptr)))				\
-			| (__get_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break;		\
-		case 8: __ret = (__get_user((x), (u32 __user *)(ptr)))				\
-			| (__get_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break;		\
-		default: __ret = __get_user_unaligned_unknown();				\
-	}											\
-	__ret;											\
-})
-
 #ifdef ASM_SUPPORTED
   struct __large_struct { unsigned long buf[100]; };
 # define __m(x) (*(struct __large_struct __user *)(x))
diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h
index 67b3481d6020f..63ba18e4c9a26 100644
--- a/arch/m68k/include/asm/uaccess.h
+++ b/arch/m68k/include/asm/uaccess.h
@@ -3,11 +3,4 @@
 #else
 #include <asm/uaccess_mm.h>
 #endif
-
 #include <asm/extable.h>
-#ifdef CONFIG_CPU_HAS_NO_UNALIGNED
-#include <asm-generic/uaccess-unaligned.h>
-#else
-#define __get_user_unaligned(x, ptr)	__get_user((x), (ptr))
-#define __put_user_unaligned(x, ptr)	__put_user((x), (ptr))
-#endif
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 99e629a590a56..c5fc42429ce87 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -496,283 +496,6 @@ do {									\
 
 extern void __put_user_unknown(void);
 
-/*
- * ul{b,h,w} are macros and there are no equivalent macros for EVA.
- * EVA unaligned access is handled in the ADE exception handler.
- */
-#ifndef CONFIG_EVA
-/*
- * put_user_unaligned: - Write a simple value into user space.
- * @x:	 Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * This macro copies a single simple value from kernel space to user
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Returns zero on success, or -EFAULT on error.
- */
-#define put_user_unaligned(x,ptr)	\
-	__put_user_unaligned_check((x),(ptr),sizeof(*(ptr)))
-
-/*
- * get_user_unaligned: - Get a simple variable from user space.
- * @x:	 Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * This macro copies a single simple variable from user space to kernel
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-#define get_user_unaligned(x,ptr) \
-	__get_user_unaligned_check((x),(ptr),sizeof(*(ptr)))
-
-/*
- * __put_user_unaligned: - Write a simple value into user space, with less checking.
- * @x:	 Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * This macro copies a single simple value from kernel space to user
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- */
-#define __put_user_unaligned(x,ptr) \
-	__put_user_unaligned_nocheck((x),(ptr),sizeof(*(ptr)))
-
-/*
- * __get_user_unaligned: - Get a simple variable from user space, with less checking.
- * @x:	 Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * This macro copies a single simple variable from user space to kernel
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-#define __get_user_unaligned(x,ptr) \
-	__get_user_unaligned_nocheck((x),(ptr),sizeof(*(ptr)))
-
-/*
- * Yuck.  We need two variants, one for 64bit operation and one
- * for 32 bit mode and old iron.
- */
-#ifdef CONFIG_32BIT
-#define __GET_USER_UNALIGNED_DW(val, ptr)				\
-	__get_user_unaligned_asm_ll32(val, ptr)
-#endif
-#ifdef CONFIG_64BIT
-#define __GET_USER_UNALIGNED_DW(val, ptr)				\
-	__get_user_unaligned_asm(val, "uld", ptr)
-#endif
-
-extern void __get_user_unaligned_unknown(void);
-
-#define __get_user_unaligned_common(val, size, ptr)			\
-do {									\
-	switch (size) {							\
-	case 1: __get_data_asm(val, "lb", ptr); break;			\
-	case 2: __get_data_unaligned_asm(val, "ulh", ptr); break;	\
-	case 4: __get_data_unaligned_asm(val, "ulw", ptr); break;	\
-	case 8: __GET_USER_UNALIGNED_DW(val, ptr); break;		\
-	default: __get_user_unaligned_unknown(); break;			\
-	}								\
-} while (0)
-
-#define __get_user_unaligned_nocheck(x,ptr,size)			\
-({									\
-	int __gu_err;							\
-									\
-	__get_user_unaligned_common((x), size, ptr);			\
-	__gu_err;							\
-})
-
-#define __get_user_unaligned_check(x,ptr,size)				\
-({									\
-	int __gu_err = -EFAULT;						\
-	const __typeof__(*(ptr)) __user * __gu_ptr = (ptr);		\
-									\
-	if (likely(access_ok(VERIFY_READ,  __gu_ptr, size)))		\
-		__get_user_unaligned_common((x), size, __gu_ptr);	\
-									\
-	__gu_err;							\
-})
-
-#define __get_data_unaligned_asm(val, insn, addr)			\
-{									\
-	long __gu_tmp;							\
-									\
-	__asm__ __volatile__(						\
-	"1:	" insn "	%1, %3				\n"	\
-	"2:							\n"	\
-	"	.insn						\n"	\
-	"	.section .fixup,\"ax\"				\n"	\
-	"3:	li	%0, %4					\n"	\
-	"	move	%1, $0					\n"	\
-	"	j	2b					\n"	\
-	"	.previous					\n"	\
-	"	.section __ex_table,\"a\"			\n"	\
-	"	"__UA_ADDR "\t1b, 3b				\n"	\
-	"	"__UA_ADDR "\t1b + 4, 3b			\n"	\
-	"	.previous					\n"	\
-	: "=r" (__gu_err), "=r" (__gu_tmp)				\
-	: "0" (0), "o" (__m(addr)), "i" (-EFAULT));			\
-									\
-	(val) = (__typeof__(*(addr))) __gu_tmp;				\
-}
-
-/*
- * Get a long long 64 using 32 bit registers.
- */
-#define __get_user_unaligned_asm_ll32(val, addr)			\
-{									\
-	unsigned long long __gu_tmp;					\
-									\
-	__asm__ __volatile__(						\
-	"1:	ulw	%1, (%3)				\n"	\
-	"2:	ulw	%D1, 4(%3)				\n"	\
-	"	move	%0, $0					\n"	\
-	"3:							\n"	\
-	"	.insn						\n"	\
-	"	.section	.fixup,\"ax\"			\n"	\
-	"4:	li	%0, %4					\n"	\
-	"	move	%1, $0					\n"	\
-	"	move	%D1, $0					\n"	\
-	"	j	3b					\n"	\
-	"	.previous					\n"	\
-	"	.section	__ex_table,\"a\"		\n"	\
-	"	" __UA_ADDR "	1b, 4b				\n"	\
-	"	" __UA_ADDR "	1b + 4, 4b			\n"	\
-	"	" __UA_ADDR "	2b, 4b				\n"	\
-	"	" __UA_ADDR "	2b + 4, 4b			\n"	\
-	"	.previous					\n"	\
-	: "=r" (__gu_err), "=&r" (__gu_tmp)				\
-	: "0" (0), "r" (addr), "i" (-EFAULT));				\
-	(val) = (__typeof__(*(addr))) __gu_tmp;				\
-}
-
-/*
- * Yuck.  We need two variants, one for 64bit operation and one
- * for 32 bit mode and old iron.
- */
-#ifdef CONFIG_32BIT
-#define __PUT_USER_UNALIGNED_DW(ptr) __put_user_unaligned_asm_ll32(ptr)
-#endif
-#ifdef CONFIG_64BIT
-#define __PUT_USER_UNALIGNED_DW(ptr) __put_user_unaligned_asm("usd", ptr)
-#endif
-
-#define __put_user_unaligned_common(ptr, size)				\
-do {									\
-	switch (size) {							\
-	case 1: __put_data_asm("sb", ptr); break;			\
-	case 2: __put_user_unaligned_asm("ush", ptr); break;		\
-	case 4: __put_user_unaligned_asm("usw", ptr); break;		\
-	case 8: __PUT_USER_UNALIGNED_DW(ptr); break;			\
-	default: __put_user_unaligned_unknown(); break;			\
-} while (0)
-
-#define __put_user_unaligned_nocheck(x,ptr,size)			\
-({									\
-	__typeof__(*(ptr)) __pu_val;					\
-	int __pu_err = 0;						\
-									\
-	__pu_val = (x);							\
-	__put_user_unaligned_common(ptr, size);				\
-	__pu_err;							\
-})
-
-#define __put_user_unaligned_check(x,ptr,size)				\
-({									\
-	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
-	__typeof__(*(ptr)) __pu_val = (x);				\
-	int __pu_err = -EFAULT;						\
-									\
-	if (likely(access_ok(VERIFY_WRITE,  __pu_addr, size)))		\
-		__put_user_unaligned_common(__pu_addr, size);		\
-									\
-	__pu_err;							\
-})
-
-#define __put_user_unaligned_asm(insn, ptr)				\
-{									\
-	__asm__ __volatile__(						\
-	"1:	" insn "	%z2, %3		# __put_user_unaligned_asm\n" \
-	"2:							\n"	\
-	"	.insn						\n"	\
-	"	.section	.fixup,\"ax\"			\n"	\
-	"3:	li	%0, %4					\n"	\
-	"	j	2b					\n"	\
-	"	.previous					\n"	\
-	"	.section	__ex_table,\"a\"		\n"	\
-	"	" __UA_ADDR "	1b, 3b				\n"	\
-	"	.previous					\n"	\
-	: "=r" (__pu_err)						\
-	: "0" (0), "Jr" (__pu_val), "o" (__m(ptr)),			\
-	  "i" (-EFAULT));						\
-}
-
-#define __put_user_unaligned_asm_ll32(ptr)				\
-{									\
-	__asm__ __volatile__(						\
-	"1:	sw	%2, (%3)	# __put_user_unaligned_asm_ll32 \n" \
-	"2:	sw	%D2, 4(%3)				\n"	\
-	"3:							\n"	\
-	"	.insn						\n"	\
-	"	.section	.fixup,\"ax\"			\n"	\
-	"4:	li	%0, %4					\n"	\
-	"	j	3b					\n"	\
-	"	.previous					\n"	\
-	"	.section	__ex_table,\"a\"		\n"	\
-	"	" __UA_ADDR "	1b, 4b				\n"	\
-	"	" __UA_ADDR "	1b + 4, 4b			\n"	\
-	"	" __UA_ADDR "	2b, 4b				\n"	\
-	"	" __UA_ADDR "	2b + 4, 4b			\n"	\
-	"	.previous"						\
-	: "=r" (__pu_err)						\
-	: "0" (0), "r" (__pu_val), "r" (ptr),				\
-	  "i" (-EFAULT));						\
-}
-
-extern void __put_user_unaligned_unknown(void);
-#endif
-
 /*
  * We're generating jump to subroutines which will be outside the range of
  * jump instructions
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 6b113f39f30c9..d24091e1907a7 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -6,7 +6,6 @@
  */
 #include <asm/page.h>
 #include <asm/cache.h>
-#include <asm-generic/uaccess-unaligned.h>
 
 #include <linux/bug.h>
 #include <linux/string.h>
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 5c0d8a8cdae5b..9e7b08bbde5b2 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -90,9 +90,6 @@
 #define __put_user_inatomic(x, ptr) \
 	__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
-#define __get_user_unaligned __get_user
-#define __put_user_unaligned __put_user
-
 extern long __put_user_bad(void);
 
 /*
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 78f3f093d1437..fad9df965ec25 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -249,9 +249,6 @@ int __put_user_bad(void) __attribute__((noreturn));
 
 int __get_user_bad(void) __attribute__((noreturn));
 
-#define __put_user_unaligned __put_user
-#define __get_user_unaligned __get_user
-
 unsigned long __must_check
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n);
 
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index 6096d671aa63a..b5f976ee75105 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -9,7 +9,6 @@
 #include <linux/string.h>
 #include <asm/asi.h>
 #include <asm/spitfire.h>
-#include <asm-generic/uaccess-unaligned.h>
 #include <asm/extable_64.h>
 
 #include <asm/processor.h>
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index a803f6bb4d929..7f777411d1d6f 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -19,7 +19,6 @@
  * User space memory access functions
  */
 #include <linux/mm.h>
-#include <asm-generic/uaccess-unaligned.h>
 #include <asm/processor.h>
 #include <asm/page.h>
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 68766b276d9ee..fd91722315c82 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -535,9 +535,6 @@ struct __large_struct { unsigned long buf[100]; };
 #define __put_user(x, ptr)						\
 	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
-#define __get_user_unaligned __get_user
-#define __put_user_unaligned __put_user
-
 /*
  * {get|put}_user_try and catch
  *
diff --git a/include/asm-generic/uaccess-unaligned.h b/include/asm-generic/uaccess-unaligned.h
deleted file mode 100644
index 67deb898f0c5d..0000000000000
--- a/include/asm-generic/uaccess-unaligned.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef __ASM_GENERIC_UACCESS_UNALIGNED_H
-#define __ASM_GENERIC_UACCESS_UNALIGNED_H
-
-/*
- * This macro should be used instead of __get_user() when accessing
- * values at locations that are not known to be aligned.
- */
-#define __get_user_unaligned(x, ptr)					\
-({									\
-	__typeof__ (*(ptr)) __x;					\
-	__copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0;	\
-	(x) = __x;							\
-})
-
-
-/*
- * This macro should be used instead of __put_user() when accessing
- * values at locations that are not known to be aligned.
- */
-#define __put_user_unaligned(x, ptr)					\
-({									\
-	__typeof__ (*(ptr)) __x = (x);					\
-	__copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0;	\
-})
-
-#endif /* __ASM_GENERIC_UACCESS_UNALIGNED_H */
-- 
GitLab


From 431b17f9d5453533cba7d73e7e40428e4f90b35d Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 3 Jul 2017 10:00:10 +0200
Subject: [PATCH 0598/1958] block, bfq: don't change ioprio class for a
 bfq_queue on a service tree

On each deactivation or re-scheduling (after being served) of a
bfq_queue, BFQ invokes the function __bfq_entity_update_weight_prio(),
to perform pending updates of ioprio, weight and ioprio class for the
bfq_queue. BFQ also invokes this function on I/O-request dispatches,
to raise or lower weights more quickly when needed, thereby improving
latency. However, the entity representing the bfq_queue may be on the
active (sub)tree of a service tree when this happens, and, although
with a very low probability, the bfq_queue may happen to also have a
pending change of its ioprio class. If both conditions hold when
__bfq_entity_update_weight_prio() is invoked, then the entity moves to
a sort of hybrid state: the new service tree for the entity, as
returned by bfq_entity_service_tree(), differs from service tree on
which the entity still is. The functions that handle activations and
deactivations of entities do not cope with such a hybrid state (and
would need to become more complex to cope).

This commit addresses this issue by just making
__bfq_entity_update_weight_prio() not perform also a possible pending
change of ioprio class, when invoked on an I/O-request dispatch for a
bfq_queue. Such a change is thus postponed to when
__bfq_entity_update_weight_prio() is invoked on deactivation or
re-scheduling of the bfq_queue.

Reported-by: Marco Piazza <mpiazza@gmail.com>
Reported-by: Laurentiu Nicola <lnicola@dend.ro>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Tested-by: Marco Piazza <mpiazza@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 14 ++++++++++----
 block/bfq-iosched.h |  3 ++-
 block/bfq-wf2q.c    | 39 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 12bbc6b8657dd..60a6835265fc3 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3483,11 +3483,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 			}
 		}
 	}
-	/* Update weight both if it must be raised and if it must be lowered */
+	/*
+	 * To improve latency (for this or other queues), immediately
+	 * update weight both if it must be raised and if it must be
+	 * lowered. Since, entity may be on some active tree here, and
+	 * might have a pending change of its ioprio class, invoke
+	 * next function with the last parameter unset (see the
+	 * comments on the function).
+	 */
 	if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
-		__bfq_entity_update_weight_prio(
-			bfq_entity_service_tree(entity),
-			entity);
+		__bfq_entity_update_weight_prio(bfq_entity_service_tree(entity),
+						entity, false);
 }
 
 /*
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 5c3bf98614921..8fd83b8857743 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -892,7 +892,8 @@ void bfq_put_idle_entity(struct bfq_service_tree *st,
 			 struct bfq_entity *entity);
 struct bfq_service_tree *
 __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
-				struct bfq_entity *entity);
+				struct bfq_entity *entity,
+				bool update_class_too);
 void bfq_bfqq_served(struct bfq_queue *bfqq, int served);
 void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 			  unsigned long time_ms);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 8726ede19eef2..5ec05cd42b807 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -694,10 +694,28 @@ struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity)
 	return sched_data->service_tree + idx;
 }
 
-
+/*
+ * Update weight and priority of entity. If update_class_too is true,
+ * then update the ioprio_class of entity too.
+ *
+ * The reason why the update of ioprio_class is controlled through the
+ * last parameter is as follows. Changing the ioprio class of an
+ * entity implies changing the destination service trees for that
+ * entity. If such a change occurred when the entity is already on one
+ * of the service trees for its previous class, then the state of the
+ * entity would become more complex: none of the new possible service
+ * trees for the entity, according to bfq_entity_service_tree(), would
+ * match any of the possible service trees on which the entity
+ * is. Complex operations involving these trees, such as entity
+ * activations and deactivations, should take into account this
+ * additional complexity.  To avoid this issue, this function is
+ * invoked with update_class_too unset in the points in the code where
+ * entity may happen to be on some tree.
+ */
 struct bfq_service_tree *
 __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
-				struct bfq_entity *entity)
+				struct bfq_entity *entity,
+				bool update_class_too)
 {
 	struct bfq_service_tree *new_st = old_st;
 
@@ -739,9 +757,15 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
 				  bfq_weight_to_ioprio(entity->orig_weight);
 		}
 
-		if (bfqq)
+		if (bfqq && update_class_too)
 			bfqq->ioprio_class = bfqq->new_ioprio_class;
-		entity->prio_changed = 0;
+
+		/*
+		 * Reset prio_changed only if the ioprio_class change
+		 * is not pending any longer.
+		 */
+		if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)
+			entity->prio_changed = 0;
 
 		/*
 		 * NOTE: here we may be changing the weight too early,
@@ -867,7 +891,12 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
 {
 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
 
-	st = __bfq_entity_update_weight_prio(st, entity);
+	/*
+	 * When this function is invoked, entity is not in any service
+	 * tree, then it is safe to invoke next function with the last
+	 * parameter set (see the comments on the function).
+	 */
+	st = __bfq_entity_update_weight_prio(st, entity, true);
 	bfq_calc_finish(entity, entity->budget);
 
 	/*
-- 
GitLab


From 32825c45ff8f4cce937ab85b030dc693ceb1aa0a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 3 Jul 2017 20:37:14 +0800
Subject: [PATCH 0599/1958] blk-mq-sched: fix performance regression of
 mq-deadline

When mq-deadline is taken, IOPS of sequential read and
seqential write is observed more than 20% drop on sata(scsi-mq)
devices, compared with using 'none' scheduler.

The reason is that the default nr_requests for scheduler is
too big for small queuedepth devices, and latency is increased
much.

Since the principle of taking 256 requests for mq scheduler
is based on 128 queue depth, this patch changes into
double size of min(hw queue_depth, 128).

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 7f0dc48ffb408..4ab69435708c2 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -515,10 +515,12 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	}
 
 	/*
-	 * Default to 256, since we don't split into sync/async like the
-	 * old code did. Additionally, this is a per-hw queue depth.
+	 * Default to double of smaller one between hw queue_depth and 128,
+	 * since we don't split into sync/async like the old code did.
+	 * Additionally, this is a per-hw queue depth.
 	 */
-	q->nr_requests = 2 * BLKDEV_MAX_RQ;
+	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
+				   BLKDEV_MAX_RQ);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_sched_alloc_tags(q, hctx, i);
-- 
GitLab


From 376a78abf5cc721a86ed42a1a24044d35fb8d2a8 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:08 -0700
Subject: [PATCH 0600/1958] bio-integrity: bio_trim should truncate integrity
 vector accordingly

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index 1cfcd0df3f30f..5b4b32a2f8d0e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1900,6 +1900,10 @@ void bio_trim(struct bio *bio, int offset, int size)
 	bio_advance(bio, offset << 9);
 
 	bio->bi_iter.bi_size = size;
+
+	if (bio_integrity(bio))
+		bio_integrity_trim(bio, 0, size);
+
 }
 EXPORT_SYMBOL_GPL(bio_trim);
 
-- 
GitLab


From 309a62fa3a9e78cb37a620913151cbb47d83b81d Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:09 -0700
Subject: [PATCH 0601/1958] bio-integrity: bio_integrity_advance must update
 integrity seed

SCSI drivers do care about bip_seed so we must update it accordingly.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index b8a3a65f73641..8c2253c59edbc 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -425,6 +425,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
 
+	bip->bip_iter.bi_sector += bytes_done >> 9;
 	bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
 }
 EXPORT_SYMBOL(bio_integrity_advance);
-- 
GitLab


From fbd08e7673f950854679e5d79a30bb25e77a9d08 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:10 -0700
Subject: [PATCH 0602/1958] bio-integrity: fix interface for bio_integrity_trim

bio_integrity_trim inherent it's interface from bio_trim and accept
offset and size, but this API is error prone because data offset
must always be insync with bio's data offset. That is why we have
integrity update hook in bio_advance()

So only meaningful values are: offset == 0, sectors == bio_sectors(bio)
Let's just remove them completely.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 11 ++---------
 block/bio.c           |  4 ++--
 drivers/md/dm.c       |  2 +-
 include/linux/bio.h   |  5 ++---
 4 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 8c2253c59edbc..3a0d71199fb07 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -433,22 +433,15 @@ EXPORT_SYMBOL(bio_integrity_advance);
 /**
  * bio_integrity_trim - Trim integrity vector
  * @bio:	bio whose integrity vector to update
- * @offset:	offset to first data sector
- * @sectors:	number of data sectors
  *
  * Description: Used to trim the integrity vector in a cloned bio.
- * The ivec will be advanced corresponding to 'offset' data sectors
- * and the length will be truncated corresponding to 'len' data
- * sectors.
  */
-void bio_integrity_trim(struct bio *bio, unsigned int offset,
-			unsigned int sectors)
+void bio_integrity_trim(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio_integrity_advance(bio, offset << 9);
-	bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
+	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
 }
 EXPORT_SYMBOL(bio_integrity_trim);
 
diff --git a/block/bio.c b/block/bio.c
index 5b4b32a2f8d0e..a6b225324a61b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1868,7 +1868,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	split->bi_iter.bi_size = sectors << 9;
 
 	if (bio_integrity(split))
-		bio_integrity_trim(split, 0, sectors);
+		bio_integrity_trim(split);
 
 	bio_advance(bio, split->bi_iter.bi_size);
 
@@ -1902,7 +1902,7 @@ void bio_trim(struct bio *bio, int offset, int size)
 	bio->bi_iter.bi_size = size;
 
 	if (bio_integrity(bio))
-		bio_integrity_trim(bio, 0, size);
+		bio_integrity_trim(bio);
 
 }
 EXPORT_SYMBOL_GPL(bio_trim);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4029460353080..13e714ea7a421 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1153,7 +1153,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
 	clone->bi_iter.bi_size = to_bytes(len);
 
 	if (unlikely(bio_integrity(bio) != NULL))
-		bio_integrity_trim(clone, 0, len);
+		bio_integrity_trim(clone);
 
 	return 0;
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 664a27da276d6..1d74f5120369d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -728,7 +728,7 @@ extern bool bio_integrity_enabled(struct bio *bio);
 extern int bio_integrity_prep(struct bio *);
 extern void bio_integrity_endio(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
-extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
+extern void bio_integrity_trim(struct bio *);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
 extern int bioset_integrity_create(struct bio_set *, int);
 extern void bioset_integrity_free(struct bio_set *);
@@ -778,8 +778,7 @@ static inline void bio_integrity_advance(struct bio *bio,
 	return;
 }
 
-static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
-				      unsigned int sectors)
+static inline void bio_integrity_trim(struct bio *bio)
 {
 	return;
 }
-- 
GitLab


From e23947bd76f00701f9407af23e671f4da96f5f25 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:11 -0700
Subject: [PATCH 0603/1958] bio-integrity: fold bio_integrity_enabled to
 bio_integrity_prep

Currently all integrity prep hooks are open-coded, and if prepare fails
we ignore it's code and fail bio with EIO. Let's return real error to
upper layer, so later caller may react accordingly.

In fact no one want to use bio_integrity_prep() w/o bio_integrity_enabled,
so it is reasonable to fold it in to one function.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
[hch: merged with the latest block tree,
	return bool from bio_integrity_prep]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/data-integrity.txt |  6 +-
 block/bio-integrity.c                  | 88 ++++++++++++--------------
 block/blk-core.c                       |  5 +-
 block/blk-mq.c                         |  4 +-
 drivers/nvdimm/blk.c                   | 13 +---
 drivers/nvdimm/btt.c                   | 13 +---
 include/linux/bio.h                    | 12 +---
 7 files changed, 50 insertions(+), 91 deletions(-)

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index f56ec97f0d149..934c44ea0c57f 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -192,7 +192,7 @@ will require extra work due to the application tag.
     supported by the block device.
 
 
-    int bio_integrity_prep(bio);
+    bool bio_integrity_prep(bio);
 
       To generate IMD for WRITE and to set up buffers for READ, the
       filesystem must call bio_integrity_prep(bio).
@@ -201,9 +201,7 @@ will require extra work due to the application tag.
       sector must be set, and the bio should have all data pages
       added.  It is up to the caller to ensure that the bio does not
       change while I/O is in progress.
-
-      bio_integrity_prep() should only be called if
-      bio_integrity_enabled() returned 1.
+      Complete bio with error if prepare failed for some reson.
 
 
 5.3 PASSING EXISTING INTEGRITY METADATA
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 3a0d71199fb07..44c4c52681c20 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -159,44 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-/**
- * bio_integrity_enabled - Check whether integrity can be passed
- * @bio:	bio to check
- *
- * Description: Determines whether bio_integrity_prep() can be called
- * on this bio or not.	bio data direction and target device must be
- * set prior to calling.  The functions honors the write_generate and
- * read_verify flags in sysfs.
- */
-bool bio_integrity_enabled(struct bio *bio)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
-	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
-		return false;
-
-	if (!bio_sectors(bio))
-		return false;
-
-	/* Already protected? */
-	if (bio_integrity(bio))
-		return false;
-
-	if (bi == NULL)
-		return false;
-
-	if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
-	    (bi->flags & BLK_INTEGRITY_VERIFY))
-		return true;
-
-	if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
-	    (bi->flags & BLK_INTEGRITY_GENERATE))
-		return true;
-
-	return false;
-}
-EXPORT_SYMBOL(bio_integrity_enabled);
-
 /**
  * bio_integrity_intervals - Return number of integrity intervals for a bio
  * @bi:		blk_integrity profile for device
@@ -262,14 +224,15 @@ static blk_status_t bio_integrity_process(struct bio *bio,
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
  *
- * Description: Allocates a buffer for integrity metadata, maps the
- * pages and attaches them to a bio.  The bio must have data
- * direction, target device and start sector set priot to calling.  In
- * the WRITE case, integrity metadata will be generated using the
- * block device's integrity function.  In the READ case, the buffer
+ * Description:  Checks if the bio already has an integrity payload attached.
+ * If it does, the payload has been generated by another kernel subsystem,
+ * and we just pass it through. Otherwise allocates integrity payload.
+ * The bio must have data direction, target device and start sector set priot
+ * to calling.  In the WRITE case, integrity metadata will be generated using
+ * the block device's integrity function.  In the READ case, the buffer
  * will be prepared for DMA and a suitable end_io handler set up.
  */
-int bio_integrity_prep(struct bio *bio)
+bool bio_integrity_prep(struct bio *bio)
 {
 	struct bio_integrity_payload *bip;
 	struct blk_integrity *bi;
@@ -279,20 +242,41 @@ int bio_integrity_prep(struct bio *bio)
 	unsigned int len, nr_pages;
 	unsigned int bytes, offset, i;
 	unsigned int intervals;
+	blk_status_t status;
 
 	bi = bdev_get_integrity(bio->bi_bdev);
 	q = bdev_get_queue(bio->bi_bdev);
-	BUG_ON(bi == NULL);
-	BUG_ON(bio_integrity(bio));
+	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
+		return true;
+
+	if (!bio_sectors(bio))
+		return true;
 
+	/* Already protected? */
+	if (bio_integrity(bio))
+		return true;
+
+	if (bi == NULL)
+		return true;
+
+	if (bio_data_dir(bio) == READ) {
+		if (!bi->profile->verify_fn ||
+		    !(bi->flags & BLK_INTEGRITY_VERIFY))
+			return true;
+	} else {
+		if (!bi->profile->generate_fn ||
+		    !(bi->flags & BLK_INTEGRITY_GENERATE))
+			return true;
+	}
 	intervals = bio_integrity_intervals(bi, bio_sectors(bio));
 
 	/* Allocate kernel buffer for protection data */
 	len = intervals * bi->tuple_size;
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
+	status = BLK_STS_RESOURCE;
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
-		return -ENOMEM;
+		goto err_end_io;
 	}
 
 	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -304,7 +288,8 @@ int bio_integrity_prep(struct bio *bio)
 	if (IS_ERR(bip)) {
 		printk(KERN_ERR "could not allocate data integrity bioset\n");
 		kfree(buf);
-		return PTR_ERR(bip);
+		status = BLK_STS_RESOURCE;
+		goto err_end_io;
 	}
 
 	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
@@ -349,8 +334,13 @@ int bio_integrity_prep(struct bio *bio)
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE)
 		bio_integrity_process(bio, bi->profile->generate_fn);
+	return true;
+
+err_end_io:
+	bio->bi_status = status;
+	bio_endio(bio);
+	return false;
 
-	return 0;
 }
 EXPORT_SYMBOL(bio_integrity_prep);
 
diff --git a/block/blk-core.c b/block/blk-core.c
index af393d5a96807..970b9c9638c55 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1787,11 +1787,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio);
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		bio_endio(bio);
+	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
-	}
 
 	if (op_is_flush(bio->bi_opf)) {
 		spin_lock_irq(q->queue_lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ced2b000ca028..77617fb126612 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1550,10 +1550,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio);
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio_io_error(bio);
+	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
-	}
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index f12d23c49771c..1a578b2a437b2 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -179,16 +179,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	int err = 0, rw;
 	bool do_acct;
 
-	/*
-	 * bio_integrity_enabled also checks if the bio already has an
-	 * integrity payload attached. If it does, we *don't* do a
-	 * bio_integrity_prep here - the payload has been generated by
-	 * another kernel subsystem, and we just pass it through.
-	 */
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		goto out;
-	}
+	if (!bio_integrity_prep(bio))
+		return BLK_QC_T_NONE;
 
 	bip = bio_integrity(bio);
 	nsblk = q->queuedata;
@@ -212,7 +204,6 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
- out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index b6ba0618ea467..b5caaee78bbfa 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1203,16 +1203,8 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	int err = 0;
 	bool do_acct;
 
-	/*
-	 * bio_integrity_enabled also checks if the bio already has an
-	 * integrity payload attached. If it does, we *don't* do a
-	 * bio_integrity_prep here - the payload has been generated by
-	 * another kernel subsystem, and we just pass it through.
-	 */
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		goto out;
-	}
+	if (!bio_integrity_prep(bio))
+		return BLK_QC_T_NONE;
 
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
@@ -1239,7 +1231,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
-out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1d74f5120369d..b3b5f5a89a9c6 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -724,8 +724,7 @@ struct biovec_slab {
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern bool bio_integrity_enabled(struct bio *bio);
-extern int bio_integrity_prep(struct bio *);
+extern bool bio_integrity_prep(struct bio *);
 extern void bio_integrity_endio(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *);
@@ -741,11 +740,6 @@ static inline void *bio_integrity(struct bio *bio)
 	return NULL;
 }
 
-static inline bool bio_integrity_enabled(struct bio *bio)
-{
-	return false;
-}
-
 static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
 	return 0;
@@ -756,9 +750,9 @@ static inline void bioset_integrity_free (struct bio_set *bs)
 	return;
 }
 
-static inline int bio_integrity_prep(struct bio *bio)
+static inline bool bio_integrity_prep(struct bio *bio)
 {
-	return 0;
+	return true;
 }
 
 static inline void bio_integrity_free(struct bio *bio)
-- 
GitLab


From 128b6f9fdd9ace9e56cb3a263b4bc269658f9c40 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:12 -0700
Subject: [PATCH 0604/1958] t10-pi: Move opencoded contants to common header

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/t10-pi.c                   | 9 +++------
 drivers/scsi/lpfc/lpfc_scsi.c    | 5 +++--
 drivers/scsi/qla2xxx/qla_isr.c   | 8 ++++----
 drivers/target/target_core_sbc.c | 2 +-
 include/linux/t10-pi.h           | 2 ++
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/block/t10-pi.c b/block/t10-pi.c
index 3416dadf7b15b..a98db384048fa 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -28,9 +28,6 @@
 
 typedef __be16 (csum_fn) (void *, unsigned int);
 
-static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
-static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
-
 static __be16 t10_pi_crc_fn(void *data, unsigned int len)
 {
 	return cpu_to_be16(crc_t10dif(data, len));
@@ -82,7 +79,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 		switch (type) {
 		case 1:
 		case 2:
-			if (pi->app_tag == APP_ESCAPE)
+			if (pi->app_tag == T10_PI_APP_ESCAPE)
 				goto next;
 
 			if (be32_to_cpu(pi->ref_tag) !=
@@ -95,8 +92,8 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 			}
 			break;
 		case 3:
-			if (pi->app_tag == APP_ESCAPE &&
-			    pi->ref_tag == REF_ESCAPE)
+			if (pi->app_tag == T10_PI_APP_ESCAPE &&
+			    pi->ref_tag == T10_PI_REF_ESCAPE)
 				goto next;
 			break;
 		}
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 54fd0c81ceaf6..99d2e990b231b 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -26,6 +26,7 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <asm/unaligned.h>
+#include <linux/t10-pi.h>
 #include <linux/crc-t10dif.h>
 #include <net/checksum.h>
 
@@ -2934,8 +2935,8 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 				 * First check to see if a protection data
 				 * check is valid
 				 */
-				if ((src->ref_tag == 0xffffffff) ||
-				    (src->app_tag == 0xffff)) {
+				if ((src->ref_tag == T10_PI_REF_ESCAPE) ||
+				    (src->app_tag == T10_PI_APP_ESCAPE)) {
 					start_ref_tag++;
 					goto skipit;
 				}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 2572121b765b4..de031aed94f65 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1950,9 +1950,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 	 * For type     3: ref & app tag is all 'f's
 	 * For type 0,1,2: app tag is all 'f's
 	 */
-	if ((a_app_tag == 0xffff) &&
+	if ((a_app_tag == T10_PI_APP_ESCAPE) &&
 	    ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) ||
-	     (a_ref_tag == 0xffffffff))) {
+	     (a_ref_tag == T10_PI_REF_ESCAPE))) {
 		uint32_t blocks_done, resid;
 		sector_t lba_s = scsi_get_lba(cmd);
 
@@ -1994,9 +1994,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 			spt = page_address(sg_page(sg)) + sg->offset;
 			spt += j;
 
-			spt->app_tag = 0xffff;
+			spt->app_tag = T10_PI_APP_ESCAPE;
 			if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3)
-				spt->ref_tag = 0xffffffff;
+				spt->ref_tag = T10_PI_REF_ESCAPE;
 		}
 
 		return 0;
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 4316f7b65fb76..dc9456e7dac98 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1450,7 +1450,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
 				 (unsigned long long)sector, sdt->guard_tag,
 				 sdt->app_tag, be32_to_cpu(sdt->ref_tag));
 
-			if (sdt->app_tag == cpu_to_be16(0xffff)) {
+			if (sdt->app_tag == T10_PI_APP_ESCAPE) {
 				dsg_off += block_size;
 				goto next;
 			}
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 9375d23a24e7a..635a3c5706bde 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -33,6 +33,8 @@ struct t10_pi_tuple {
 	__be32 ref_tag;		/* Target LBA or indirect LBA */
 };
 
+#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
+#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
-- 
GitLab


From b1fb2c52b2d85f51f36f1661409f9aeef94265ff Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:13 -0700
Subject: [PATCH 0605/1958] block: guard bvec iteration logic

Currently if some one try to advance bvec beyond it's size we simply
dump WARN_ONCE and continue to iterate beyond bvec array boundaries.
This simply means that we endup dereferencing/corrupting random memory
region.

Sane reaction would be to propagate error back to calling context
But bvec_iter_advance's calling context is not always good for error
handling. For safity reason let truncate iterator size to zero which
will break external iteration loop which prevent us from unpredictable
memory range corruption. And even it caller ignores an error, it will
corrupt it's own bvecs, not others.

This patch does:
- Return error back to caller with hope that it will react on this
- Truncate iterator size

Code was added long time ago here 4550dd6c, luckily no one hit it
in real life :)

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
[hch: switch to true/false returns instead of errno values]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/blk.c |  3 ++-
 drivers/nvdimm/btt.c |  3 ++-
 include/linux/bio.h  |  4 +++-
 include/linux/bvec.h | 14 +++++++++-----
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 1a578b2a437b2..345acca576b3c 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -106,7 +106,8 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
 
 		len -= cur_len;
 		dev_offset += cur_len;
-		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+			return -EIO;
 	}
 
 	return err;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index b5caaee78bbfa..d00c10f382f0f 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -985,7 +985,8 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 
 		len -= cur_len;
 		meta_nsoff += cur_len;
-		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+			return -EIO;
 	}
 
 	return ret;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b3b5f5a89a9c6..d5e8689f86b8c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -167,8 +167,10 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 
 	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
-	else
+	else {
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+		/* TODO: It is reasonable to complete bio with error here. */
+	}
 }
 
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 89b65b82d98f5..de317b4c13c10 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -22,6 +22,7 @@
 
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/errno.h>
 
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
@@ -66,12 +67,14 @@ struct bvec_iter {
 	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
 })
 
-static inline void bvec_iter_advance(const struct bio_vec *bv,
-				     struct bvec_iter *iter,
-				     unsigned bytes)
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes)
 {
-	WARN_ONCE(bytes > iter->bi_size,
-		  "Attempted to advance past end of bvec iter\n");
+	if (WARN_ONCE(bytes > iter->bi_size,
+		     "Attempted to advance past end of bvec iter\n")) {
+		iter->bi_size = 0;
+		return false;
+	}
 
 	while (bytes) {
 		unsigned iter_len = bvec_iter_len(bv, *iter);
@@ -86,6 +89,7 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
 			iter->bi_idx++;
 		}
 	}
+	return true;
 }
 
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
-- 
GitLab


From f9df1cd99ebd82f05e8f5e0aa7e38cb8d3c791d7 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:14 -0700
Subject: [PATCH 0606/1958] bio: add bvec_iter rewind API

Some ->bi_end_io handlers (for example: pi_verify or decrypt handlers)
need to know original data vector, but after bio traverse io-stack it may
be advanced, splited and relocated many times so it is hard to guess
original iterator. Let's add 'bi_done' conter which accounts number
of bytes iterator was advanced during it's evolution. Later end_io handler
may easily restore original iterator by rewinding iterator to
iter->bi_done.

Note: this change makes sizeof (struct bvec_iter) multiple to 8

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
[hch: switched to true/false return]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h  | 19 +++++++++++++++++--
 include/linux/bvec.h | 27 +++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/include/linux/bio.h b/include/linux/bio.h
index d5e8689f86b8c..1eba19580185e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -165,14 +165,29 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 {
 	iter->bi_sector += bytes >> 9;
 
-	if (bio_no_advance_iter(bio))
+	if (bio_no_advance_iter(bio)) {
 		iter->bi_size -= bytes;
-	else {
+		iter->bi_done += bytes;
+	} else {
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
 		/* TODO: It is reasonable to complete bio with error here. */
 	}
 }
 
+static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter,
+		unsigned int bytes)
+{
+	iter->bi_sector -= bytes >> 9;
+
+	if (bio_no_advance_iter(bio)) {
+		iter->bi_size += bytes;
+		iter->bi_done -= bytes;
+		return true;
+	}
+
+	return bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
+}
+
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index de317b4c13c10..ec8a4d7af6bda 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -40,6 +40,8 @@ struct bvec_iter {
 
 	unsigned int		bi_idx;		/* current index into bvl_vec */
 
+	unsigned int            bi_done;	/* number of bytes completed */
+
 	unsigned int            bi_bvec_done;	/* number of bytes completed in
 						   current bvec */
 };
@@ -83,6 +85,7 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 		bytes -= len;
 		iter->bi_size -= len;
 		iter->bi_bvec_done += len;
+		iter->bi_done += len;
 
 		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
 			iter->bi_bvec_done = 0;
@@ -92,6 +95,30 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	return true;
 }
 
+static inline bool bvec_iter_rewind(const struct bio_vec *bv,
+				     struct bvec_iter *iter,
+				     unsigned int bytes)
+{
+	while (bytes) {
+		unsigned len = min(bytes, iter->bi_bvec_done);
+
+		if (iter->bi_bvec_done == 0) {
+			if (WARN_ONCE(iter->bi_idx == 0,
+				      "Attempted to rewind iter beyond "
+				      "bvec's boundaries\n")) {
+				return false;
+			}
+			iter->bi_idx--;
+			iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len;
+			continue;
+		}
+		bytes -= len;
+		iter->bi_size += len;
+		iter->bi_bvec_done -= len;
+	}
+	return true;
+}
+
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
-- 
GitLab


From 63573e359d052e506d305c263576499f06355985 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 29 Jun 2017 11:31:15 -0700
Subject: [PATCH 0607/1958] bio-integrity: Restore original iterator on verify
 stage

Currently ->verify_fn not woks at all because at the moment it is called
bio->bi_iter.bi_size == 0, so we do not iterate integrity bvecs at all.

In order to perform verification we need to know original data vector,
with new bvec rewind API this is trivial.

testcase: https://github.com/dmonakhov/xfstests/commit/3c6509eaa83b9c17cd0bc95d73fcdd76e1c54a85

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
[hch: adopted for new status values]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 44c4c52681c20..8df4eb103ba9e 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -184,10 +184,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 /**
  * bio_integrity_process - Process integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
+ * @proc_iter:  iterator to process
  * @proc_fn:	Pointer to the relevant processing function
  */
 static blk_status_t bio_integrity_process(struct bio *bio,
-				 integrity_processing_fn *proc_fn)
+		struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_iter iter;
@@ -200,10 +201,10 @@ static blk_status_t bio_integrity_process(struct bio *bio,
 
 	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	iter.interval = 1 << bi->interval_exp;
-	iter.seed = bip_get_seed(bip);
+	iter.seed = proc_iter->bi_sector;
 	iter.prot_buf = prot_buf;
 
-	bio_for_each_segment(bv, bio, bviter) {
+	__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
 		void *kaddr = kmap_atomic(bv.bv_page);
 
 		iter.data_buf = kaddr + bv.bv_offset;
@@ -332,8 +333,10 @@ bool bio_integrity_prep(struct bio *bio)
 	}
 
 	/* Auto-generate integrity metadata if this is a write */
-	if (bio_data_dir(bio) == WRITE)
-		bio_integrity_process(bio, bi->profile->generate_fn);
+	if (bio_data_dir(bio) == WRITE) {
+		bio_integrity_process(bio, &bio->bi_iter,
+				      bi->profile->generate_fn);
+	}
 	return true;
 
 err_end_io:
@@ -358,8 +361,19 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 		container_of(work, struct bio_integrity_payload, bip_work);
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+	struct bvec_iter iter = bio->bi_iter;
 
-	bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
+	/*
+	 * At the moment verify is called bio's iterator was advanced
+	 * during split and completion, we need to rewind iterator to
+	 * it's original position.
+	 */
+	if (bio_rewind_iter(bio, &iter, iter.bi_done)) {
+		bio->bi_status = bio_integrity_process(bio, &iter,
+						       bi->profile->verify_fn);
+	} else {
+		bio->bi_status = BLK_STS_IOERR;
+	}
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
-- 
GitLab


From 7c20f11680a441df09de7235206f70115fbf6290 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Jul 2017 16:58:43 -0600
Subject: [PATCH 0608/1958] bio-integrity: stop abusing bi_end_io

And instead call directly into the integrity code from bio_end_io.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 39 ++++++++++++---------------------------
 block/bio.c           |  5 ++---
 block/blk.h           | 11 +++++++++++
 include/linux/bio.h   |  9 ---------
 4 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 8df4eb103ba9e..f733beab6ca2b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-void bio_integrity_free(struct bio *bio)
+static void bio_integrity_free(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_set *bs = bio->bi_pool;
@@ -120,8 +120,8 @@ void bio_integrity_free(struct bio *bio)
 	}
 
 	bio->bi_integrity = NULL;
+	bio->bi_opf &= ~REQ_INTEGRITY;
 }
-EXPORT_SYMBOL(bio_integrity_free);
 
 /**
  * bio_integrity_add_page - Attach integrity metadata
@@ -326,12 +326,6 @@ bool bio_integrity_prep(struct bio *bio)
 		offset = 0;
 	}
 
-	/* Install custom I/O completion handler if read verify is enabled */
-	if (bio_data_dir(bio) == READ) {
-		bip->bip_end_io = bio->bi_end_io;
-		bio->bi_end_io = bio_integrity_endio;
-	}
-
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE) {
 		bio_integrity_process(bio, &bio->bi_iter,
@@ -375,13 +369,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 		bio->bi_status = BLK_STS_IOERR;
 	}
 
-	/* Restore original bio completion handler */
-	bio->bi_end_io = bip->bip_end_io;
+	bio_integrity_free(bio);
 	bio_endio(bio);
 }
 
 /**
- * bio_integrity_endio - Integrity I/O completion function
+ * __bio_integrity_endio - Integrity I/O completion function
  * @bio:	Protected bio
  * @error:	Pointer to errno
  *
@@ -392,27 +385,19 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  * in process context.	This function postpones completion
  * accordingly.
  */
-void bio_integrity_endio(struct bio *bio)
+bool __bio_integrity_endio(struct bio *bio)
 {
-	struct bio_integrity_payload *bip = bio_integrity(bio);
+	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
 
-	BUG_ON(bip->bip_bio != bio);
-
-	/* In case of an I/O error there is no point in verifying the
-	 * integrity metadata.  Restore original bio end_io handler
-	 * and run it.
-	 */
-	if (bio->bi_status) {
-		bio->bi_end_io = bip->bip_end_io;
-		bio_endio(bio);
-
-		return;
+		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
+		queue_work(kintegrityd_wq, &bip->bip_work);
+		return false;
 	}
 
-	INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
-	queue_work(kintegrityd_wq, &bip->bip_work);
+	bio_integrity_free(bio);
+	return true;
 }
-EXPORT_SYMBOL(bio_integrity_endio);
 
 /**
  * bio_integrity_advance - Advance integrity vector
diff --git a/block/bio.c b/block/bio.c
index a6b225324a61b..9cabf5d0be209 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -243,9 +243,6 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
 void bio_uninit(struct bio *bio)
 {
 	bio_disassociate_task(bio);
-
-	if (bio_integrity(bio))
-		bio_integrity_free(bio);
 }
 EXPORT_SYMBOL(bio_uninit);
 
@@ -1813,6 +1810,8 @@ void bio_endio(struct bio *bio)
 again:
 	if (!bio_remaining_done(bio))
 		return;
+	if (!bio_integrity_endio(bio))
+		return;
 
 	/*
 	 * Need to have a real endio function for chained bios, otherwise
diff --git a/block/blk.h b/block/blk.h
index 01ebb8185f6bf..3a3d715bd7253 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -81,10 +81,21 @@ static inline void blk_queue_enter_live(struct request_queue *q)
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
+bool __bio_integrity_endio(struct bio *);
+static inline bool bio_integrity_endio(struct bio *bio)
+{
+	if (bio_integrity(bio))
+		return __bio_integrity_endio(bio);
+	return true;
+}
 #else
 static inline void blk_flush_integrity(void)
 {
 }
+static inline bool bio_integrity_endio(struct bio *bio)
+{
+	return true;
+}
 #endif
 
 void blk_timeout_work(struct work_struct *work);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1eba19580185e..7b1cf4ba09029 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -320,8 +320,6 @@ struct bio_integrity_payload {
 
 	struct bvec_iter	bip_iter;
 
-	bio_end_io_t		*bip_end_io;	/* saved I/O completion fn */
-
 	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
 	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
@@ -739,10 +737,8 @@ struct biovec_slab {
 		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
 
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
 extern bool bio_integrity_prep(struct bio *);
-extern void bio_integrity_endio(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
@@ -772,11 +768,6 @@ static inline bool bio_integrity_prep(struct bio *bio)
 	return true;
 }
 
-static inline void bio_integrity_free(struct bio *bio)
-{
-	return;
-}
-
 static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 				      gfp_t gfp_mask)
 {
-- 
GitLab


From 7314183d1d0c200def4d0f5a6d978d3b29d28813 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 29 Jun 2017 11:16:49 +0300
Subject: [PATCH 0609/1958] nvme-fc: don't override opts->nr_io_queues

Its what the user passed, so its probably a better
idea to keep it intact. Also, limit the number of
I/O queues to max online cpus and the lport maximum
hw queues.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 2f990d9790379..996720043b3a0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2175,17 +2175,20 @@ static int
 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+				ctrl->lport->ops->max_hw_queues);
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
 	if (ret) {
 		dev_info(ctrl->ctrl.device,
 			"set_queue_count failed: %d\n", ret);
 		return ret;
 	}
 
-	ctrl->ctrl.queue_count = opts->nr_io_queues + 1;
-	if (!opts->nr_io_queues)
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
+	if (!nr_io_queues)
 		return 0;
 
 	nvme_fc_init_io_queues(ctrl);
@@ -2245,15 +2248,19 @@ static int
 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+				ctrl->lport->ops->max_hw_queues);
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
 	if (ret) {
 		dev_info(ctrl->ctrl.device,
 			"set_queue_count failed: %d\n", ret);
 		return ret;
 	}
 
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
 	/* check for io queues existing */
 	if (ctrl->ctrl.queue_count == 1)
 		return 0;
@@ -2702,7 +2709,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->ctrl.queue_count = min_t(unsigned int,
 				opts->nr_io_queues,
 				lport->ops->max_hw_queues);
-	opts->nr_io_queues = ctrl->ctrl.queue_count;	/* so opts has valid value */
 	ctrl->ctrl.queue_count++;	/* +1 for admin queue */
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
-- 
GitLab


From 4c8b99f6b1ff086c1745fcb0511800fba5c4fb34 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 29 Jun 2017 11:10:44 +0300
Subject: [PATCH 0610/1958] nvme-rdma: update tagset nr_hw_queues after
 reconnecting/resetting

We might have more/less queues once we reconnect/reset. For
example due to cpu going online/offline or controller constraints.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2ab0cdb4d881c..cfb22531fc161 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -741,6 +741,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 		ret = nvme_rdma_connect_io_queues(ctrl);
 		if (ret)
 			goto requeue;
+
+		blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+				ctrl->ctrl.queue_count - 1);
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -1727,6 +1730,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		ret = nvme_rdma_connect_io_queues(ctrl);
 		if (ret)
 			goto del_dead_ctrl;
+
+		blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+				ctrl->ctrl.queue_count - 1);
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-- 
GitLab


From 4368c39bf6e6a2bfffb7a72d78912c68f156e8da Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 29 Jun 2017 11:13:43 +0300
Subject: [PATCH 0611/1958] nvme-loop: update tagset nr_hw_queues after
 reconnecting/resetting

We might have more/less queues once we reconnect/reset. For
example due to cpu going online/offline

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 568ed86256968..3d51341e62ee5 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -508,6 +508,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto out_destroy_io;
 
+	blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+			ctrl->ctrl.queue_count - 1);
+
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-- 
GitLab


From cda5fd1ac5c2c42d1b2e1847aa0438b229c5c068 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 29 Jun 2017 11:20:10 +0300
Subject: [PATCH 0612/1958] nvme-fc: update tagset nr_hw_queues after queues
 reinit

We might have more/less queues once we reconnect/reset. For
example due to cpu going online/offline or controller constraints.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 996720043b3a0..1e859ee2b5653 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2279,6 +2279,8 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ret)
 		goto out_delete_hw_queues;
 
+	blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+
 	return 0;
 
 out_delete_hw_queues:
-- 
GitLab


From e5859d3a0ea07bf948f4538d47fe6547f6e115be Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 10:18:50 +0300
Subject: [PATCH 0613/1958] nvme-fc: use blk_mq_delay_run_hw_queue instead of
 open-coding it

Cc: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 1e859ee2b5653..50cc3b2b0e118 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1966,10 +1966,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		if (ret != -EBUSY)
 			return BLK_STS_IOERR;
 
-		if (op->rq) {
-			blk_mq_stop_hw_queues(op->rq->q);
-			blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
-		}
+		if (op->rq)
+			blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
+
 		return BLK_STS_RESOURCE;
 	}
 
-- 
GitLab


From acfd2810c75b0625897fc119a2d3a9c26cc0e405 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 26 May 2017 17:04:40 +0900
Subject: [PATCH 0614/1958] f2fs: Do not issue small discards in LFS mode

clear_prefree_segments() issues small discards after discarding full
segments. These small discards may not be section aligned, so not zone
aligned on a zoned block device, causing __f2fs_iissue_discard_zone() to fail.
Fix this by not issuing small discards for a volume mounted with the BLKZONED
feature enabled.

Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a8f4c81467141..de6738b82745e 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1337,7 +1337,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 					sbi->blocks_per_seg, cur_pos);
 			len = next_pos - cur_pos;
 
-			if (force && len < cpc->trim_minlen)
+			if (f2fs_sb_mounted_blkzoned(sbi->sb) ||
+			    (force && len < cpc->trim_minlen))
 				goto skip;
 
 			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
-- 
GitLab


From d8c4256c17dac3e9ec0c441b81292d5d2044c89f Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 1 Jun 2017 15:39:27 -0700
Subject: [PATCH 0615/1958] f2fs: remove false-positive bug_on

For example,

f2fs_create
 - new_node_page is failed
 - handle_failed_inode
  - skip to add it into orphan list, since ni.blk_addr == NULL_ADDR
   : set_inode_flag(inode, FI_FREE_NID)

f2fs_evict_inode
 - EIO due to fault injection
 - f2fs_bug_on() is triggered

So, we don't need to call f2fs_bug_on in this case.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index e53c784ab11e3..868d71436ebc2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -425,9 +425,10 @@ void f2fs_evict_inode(struct inode *inode)
 	if (is_inode_flag_set(inode, FI_FREE_NID)) {
 		alloc_nid_failed(sbi, inode->i_ino);
 		clear_inode_flag(inode, FI_FREE_NID);
+	} else {
+		f2fs_bug_on(sbi, err &&
+			!exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
 	}
-	f2fs_bug_on(sbi, err &&
-		!exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
 out_clear:
 	fscrypt_put_encryption_info(inode, NULL);
 	clear_inode(inode);
-- 
GitLab


From 68390dd9bd5b2ef7c3ec69c23010b39981e264a4 Mon Sep 17 00:00:00 2001
From: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Date: Thu, 1 Jun 2017 16:50:10 +0800
Subject: [PATCH 0616/1958] f2fs: remove the unnecessary cast for PTR_ERR

It's not necessary to specify 'int' casting for PTR_ERR.

Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 8f487692c21f4..a140c5e3dc54e 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -233,7 +233,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 		value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size);
 		if (IS_ERR(value)) {
 			clear_inode_flag(inode, FI_ACL_MODE);
-			return (int)PTR_ERR(value);
+			return PTR_ERR(value);
 		}
 	}
 
-- 
GitLab


From d4fdf8ba0e5808ba9ad6b44337783bd9935e0982 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Thu, 1 Jun 2017 16:43:51 +0800
Subject: [PATCH 0617/1958] f2fs: fix a panic caused by NULL flush_cmd_control

Mount fs with option noflush_merge, boot failed for illegal address
fcc in function f2fs_issue_flush:

        if (!test_opt(sbi, FLUSH_MERGE)) {
                ret = submit_flush_wait(sbi);
                atomic_inc(&fcc->issued_flush);   ->  Here, fcc illegal
                return ret;
        }

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index de6738b82745e..c1026b78cdfc1 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -566,6 +566,9 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 	init_waitqueue_head(&fcc->flush_wait_queue);
 	init_llist_head(&fcc->issue_list);
 	SM_I(sbi)->fcc_info = fcc;
+	if (!test_opt(sbi, FLUSH_MERGE))
+		return err;
+
 init_thread:
 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
@@ -3240,7 +3243,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
-	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+	if (!f2fs_readonly(sbi->sb)) {
 		err = create_flush_cmd_control(sbi);
 		if (err)
 			return err;
-- 
GitLab


From 21d3f8e1c3b7996ce239ab6fa82e9f7a8c47d84d Mon Sep 17 00:00:00 2001
From: Jin Qian <jinqian@android.com>
Date: Thu, 1 Jun 2017 11:18:30 -0700
Subject: [PATCH 0618/1958] f2fs: sanity check size of nat and sit cache

Make sure number of entires doesn't exceed max journal size.

Cc: stable@vger.kernel.org
Signed-off-by: Jin Qian <jinqian@android.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c1026b78cdfc1..a739803b65136 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2495,6 +2495,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 
 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 {
+	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
+	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
 	int type = CURSEG_HOT_DATA;
 	int err;
 
@@ -2521,6 +2523,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 			return err;
 	}
 
+	/* sanity check for summary blocks */
+	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
+			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
+		return -EINVAL;
+
 	return 0;
 }
 
-- 
GitLab


From 72fdbe2efe3e42a54e268d2ee2a8c0828d3996e7 Mon Sep 17 00:00:00 2001
From: Fan Li <fanofcode.li@samsung.com>
Date: Fri, 2 Jun 2017 15:45:42 +0800
Subject: [PATCH 0619/1958] f2fs: simplify the way of calulating next nat
 address

The index of segment which the next nat block is in has only one different
bit than the current one, so to get the next nat address, we can simply
alter that one bit.

Signed-off-by: Fan Li <fanofcode.li@samsung.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 558048e33cf9a..bb53e9955ff2a 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -224,11 +224,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 
 	block_addr -= nm_i->nat_blkaddr;
-	if ((block_addr >> sbi->log_blocks_per_seg) % 2)
-		block_addr -= sbi->blocks_per_seg;
-	else
-		block_addr += sbi->blocks_per_seg;
-
+	block_addr ^= 1 << sbi->log_blocks_per_seg;
 	return block_addr + nm_i->nat_blkaddr;
 }
 
-- 
GitLab


From 5a3a2d83cda82df7f8c306df85647d2c368e829a Mon Sep 17 00:00:00 2001
From: Qiuyang Sun <sunqiuyang@huawei.com>
Date: Thu, 18 May 2017 11:06:45 +0800
Subject: [PATCH 0620/1958] f2fs: dax: fix races between page faults and
 truncating pages

Currently in F2FS, page faults and operations that truncate the pagecahe
or data blocks, are completely unsynchronized. This can result in page
fault faulting in a page into a range that we are changing after
truncating, and thus we can end up with a page mapped to disk blocks that
will be shortly freed. Filesystem corruption will shortly follow.

This patch fixes the problem by creating new rw semaphore i_mmap_sem in
f2fs_inode_info and grab it for functions removing blocks from extent tree
and for read over page faults. The mechanism is similar to that in ext4.

Signed-off-by: Qiuyang Sun <sunqiuyang@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c  |  2 ++
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/file.c  | 48 +++++++++++++++++++++++++++++++++++++++---------
 fs/f2fs/super.c |  1 +
 4 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2ed90f5db8320..7d3af48d34a98 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1801,8 +1801,10 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
 	loff_t i_size = i_size_read(inode);
 
 	if (to > i_size) {
+		down_write(&F2FS_I(inode)->i_mmap_sem);
 		truncate_pagecache(inode, i_size);
 		truncate_blocks(inode, i_size, true);
+		up_write(&F2FS_I(inode)->i_mmap_sem);
 	}
 }
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cd777cf30be2a..da70964cbd743 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -519,6 +519,7 @@ struct f2fs_inode_info {
 	struct mutex inmem_lock;	/* lock for inmemory pages */
 	struct extent_tree *extent_tree;	/* cached extent_tree entry */
 	struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
+	struct rw_semaphore i_mmap_sem;
 };
 
 static inline void get_extent_info(struct extent_info *ext,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 65915b4ce14be..ac8b943817e60 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -33,6 +33,18 @@
 #include "trace.h"
 #include <trace/events/f2fs.h>
 
+static int f2fs_filemap_fault(struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vmf->vma->vm_file);
+	int err;
+
+	down_read(&F2FS_I(inode)->i_mmap_sem);
+	err = filemap_fault(vmf);
+	up_read(&F2FS_I(inode)->i_mmap_sem);
+
+	return err;
+}
+
 static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
@@ -59,13 +71,14 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 	f2fs_balance_fs(sbi, dn.node_changed);
 
 	file_update_time(vmf->vma->vm_file);
+	down_read(&F2FS_I(inode)->i_mmap_sem);
 	lock_page(page);
 	if (unlikely(page->mapping != inode->i_mapping ||
 			page_offset(page) > i_size_read(inode) ||
 			!PageUptodate(page))) {
 		unlock_page(page);
 		err = -EFAULT;
-		goto out;
+		goto out_sem;
 	}
 
 	/*
@@ -94,6 +107,8 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 		f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
 
+out_sem:
+	up_read(&F2FS_I(inode)->i_mmap_sem);
 out:
 	sb_end_pagefault(inode->i_sb);
 	f2fs_update_time(sbi, REQ_TIME);
@@ -101,7 +116,7 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 }
 
 static const struct vm_operations_struct f2fs_file_vm_ops = {
-	.fault		= filemap_fault,
+	.fault		= f2fs_filemap_fault,
 	.map_pages	= filemap_map_pages,
 	.page_mkwrite	= f2fs_vm_page_mkwrite,
 };
@@ -700,8 +715,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 			return -EACCES;
 
 		if (attr->ia_size <= i_size_read(inode)) {
+			down_write(&F2FS_I(inode)->i_mmap_sem);
 			truncate_setsize(inode, attr->ia_size);
 			err = f2fs_truncate(inode);
+			up_write(&F2FS_I(inode)->i_mmap_sem);
 			if (err)
 				return err;
 		} else {
@@ -709,7 +726,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 			 * do not trim all blocks after i_size if target size is
 			 * larger than i_size.
 			 */
+			down_write(&F2FS_I(inode)->i_mmap_sem);
 			truncate_setsize(inode, attr->ia_size);
+			up_write(&F2FS_I(inode)->i_mmap_sem);
 
 			/* should convert inline inode here */
 			if (!f2fs_may_inline_data(inode)) {
@@ -852,12 +871,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
 			blk_start = (loff_t)pg_start << PAGE_SHIFT;
 			blk_end = (loff_t)pg_end << PAGE_SHIFT;
+			down_write(&F2FS_I(inode)->i_mmap_sem);
 			truncate_inode_pages_range(mapping, blk_start,
 					blk_end - 1);
 
 			f2fs_lock_op(sbi);
 			ret = truncate_hole(inode, pg_start, pg_end);
 			f2fs_unlock_op(sbi);
+			up_write(&F2FS_I(inode)->i_mmap_sem);
 		}
 	}
 
@@ -1096,16 +1117,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	pg_start = offset >> PAGE_SHIFT;
 	pg_end = (offset + len) >> PAGE_SHIFT;
 
+	down_write(&F2FS_I(inode)->i_mmap_sem);
 	/* write out all dirty pages from offset */
 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
 	if (ret)
-		return ret;
+		goto out;
 
 	truncate_pagecache(inode, offset);
 
 	ret = f2fs_do_collapse(inode, pg_start, pg_end);
 	if (ret)
-		return ret;
+		goto out;
 
 	/* write out all moved pages, if possible */
 	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1118,6 +1140,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	if (!ret)
 		f2fs_i_size_write(inode, new_size);
 
+out:
+	up_write(&F2FS_I(inode)->i_mmap_sem);
 	return ret;
 }
 
@@ -1182,9 +1206,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 	if (ret)
 		return ret;
 
+	down_write(&F2FS_I(inode)->i_mmap_sem);
 	ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
 	if (ret)
-		return ret;
+		goto out_sem;
 
 	truncate_pagecache_range(inode, offset, offset + len - 1);
 
@@ -1198,7 +1223,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 		ret = fill_zero(inode, pg_start, off_start,
 						off_end - off_start);
 		if (ret)
-			return ret;
+			goto out_sem;
 
 		new_size = max_t(loff_t, new_size, offset + len);
 	} else {
@@ -1206,7 +1231,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 			ret = fill_zero(inode, pg_start++, off_start,
 						PAGE_SIZE - off_start);
 			if (ret)
-				return ret;
+				goto out_sem;
 
 			new_size = max_t(loff_t, new_size,
 					(loff_t)pg_start << PAGE_SHIFT);
@@ -1255,6 +1280,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 out:
 	if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
 		f2fs_i_size_write(inode, new_size);
+out_sem:
+	up_write(&F2FS_I(inode)->i_mmap_sem);
 
 	return ret;
 }
@@ -1284,14 +1311,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 	f2fs_balance_fs(sbi, true);
 
+	down_write(&F2FS_I(inode)->i_mmap_sem);
 	ret = truncate_blocks(inode, i_size_read(inode), true);
 	if (ret)
-		return ret;
+		goto out;
 
 	/* write out all dirty pages from offset */
 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
 	if (ret)
-		return ret;
+		goto out;
 
 	truncate_pagecache(inode, offset);
 
@@ -1320,6 +1348,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 	if (!ret)
 		f2fs_i_size_write(inode, new_size);
+out:
+	up_write(&F2FS_I(inode)->i_mmap_sem);
 	return ret;
 }
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index d6af34d1e6a83..ddd2973ffcbf0 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -624,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	mutex_init(&fi->inmem_lock);
 	init_rwsem(&fi->dio_rwsem[READ]);
 	init_rwsem(&fi->dio_rwsem[WRITE]);
+	init_rwsem(&fi->i_mmap_sem);
 
 	/* Will be used by directory only */
 	fi->i_dir_level = F2FS_SB(sb)->dir_level;
-- 
GitLab


From 2a510c005c9d3fafbaae1a6d27da8256c95542f9 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 5 Jun 2017 18:29:06 +0800
Subject: [PATCH 0621/1958] f2fs: introduce __wait_one_discard_bio

In order to avoid copied codes.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a739803b65136..0fa717a473948 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -995,6 +995,20 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
 	mutex_unlock(&dcc->cmd_lock);
 }
 
+static void __wait_one_discard_bio(struct f2fs_sb_info *sbi,
+							struct discard_cmd *dc)
+{
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+	wait_for_completion_io(&dc->wait);
+	mutex_lock(&dcc->cmd_lock);
+	f2fs_bug_on(sbi, dc->state != D_DONE);
+	dc->ref--;
+	if (!dc->ref)
+		__remove_discard_cmd(sbi, dc);
+	mutex_unlock(&dcc->cmd_lock);
+}
+
 static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
@@ -1019,13 +1033,7 @@ static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
 	mutex_unlock(&dcc->cmd_lock);
 
 	if (need_wait) {
-		wait_for_completion_io(&dc->wait);
-		mutex_lock(&dcc->cmd_lock);
-		f2fs_bug_on(sbi, dc->state != D_DONE);
-		dc->ref--;
-		if (!dc->ref)
-			__remove_discard_cmd(sbi, dc);
-		mutex_unlock(&dcc->cmd_lock);
+		__wait_one_discard_bio(sbi, dc);
 		goto next;
 	}
 }
@@ -1049,15 +1057,8 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 	}
 	mutex_unlock(&dcc->cmd_lock);
 
-	if (need_wait) {
-		wait_for_completion_io(&dc->wait);
-		mutex_lock(&dcc->cmd_lock);
-		f2fs_bug_on(sbi, dc->state != D_DONE);
-		dc->ref--;
-		if (!dc->ref)
-			__remove_discard_cmd(sbi, dc);
-		mutex_unlock(&dcc->cmd_lock);
-	}
+	if (need_wait)
+		__wait_one_discard_bio(sbi, dc);
 }
 
 /* This comes from f2fs_put_super */
-- 
GitLab


From d9703d9097d7c97f735a3b7870c52735a4dfa051 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 5 Jun 2017 18:29:07 +0800
Subject: [PATCH 0622/1958] f2fs: add f2fs_bug_on in __remove_discard_cmd

Recently, discard related codes have changed a lot, so add f2fs_bug_on to
detect potential bug.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0fa717a473948..86a0c1095939c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -739,6 +739,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 
+	f2fs_bug_on(sbi, dc->ref);
+
 	if (dc->error == -EOPNOTSUPP)
 		dc->error = 0;
 
-- 
GitLab


From febeca6d375531f2d3244b47bf0eb396180689e0 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 5 Jun 2017 18:29:08 +0800
Subject: [PATCH 0623/1958] f2fs: don't track newly allocated nat entry in list

We will never persist newly allocated nat entries during checkpoint(), so
we don't need to track such nat entries in nat dirty list in order to
avoid:
- more latency during traversing dirty list;
- sorting nat sets incorrectly due to recording wrong entry_cnt in nat
entry set.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d22db8ce0a692..05700e54f91e7 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -158,9 +158,6 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 	nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
 	struct nat_entry_set *head;
 
-	if (get_nat_flag(ne, IS_DIRTY))
-		return;
-
 	head = radix_tree_lookup(&nm_i->nat_set_root, set);
 	if (!head) {
 		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
@@ -171,10 +168,18 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 		head->entry_cnt = 0;
 		f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
 	}
-	list_move_tail(&ne->list, &head->entry_list);
+
+	if (get_nat_flag(ne, IS_DIRTY))
+		goto refresh_list;
+
 	nm_i->dirty_nat_cnt++;
 	head->entry_cnt++;
 	set_nat_flag(ne, IS_DIRTY, true);
+refresh_list:
+	if (nat_get_blkaddr(ne) == NEW_ADDR)
+		list_del_init(&ne->list);
+	else
+		list_move_tail(&ne->list, &head->entry_list);
 }
 
 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
@@ -2423,8 +2428,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		nid_t nid = nat_get_nid(ne);
 		int offset;
 
-		if (nat_get_blkaddr(ne) == NEW_ADDR)
-			continue;
+		f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
 
 		if (to_journal) {
 			offset = lookup_journal_in_cursum(journal,
-- 
GitLab


From 1f258ec13b82d3d947b515a007a748ffcbe29f9a Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 7 Jun 2017 11:17:35 +0800
Subject: [PATCH 0624/1958] f2fs: fix to avoid panic when encountering corrupt
 node

With fault_injection option, generic/361 of fstests will complain us
with below message:

Call Trace:
 get_node_page+0x12/0x20 [f2fs]
 f2fs_iget+0x92/0x7d0 [f2fs]
 f2fs_fill_super+0x10fb/0x15e0 [f2fs]
 mount_bdev+0x184/0x1c0
 f2fs_mount+0x15/0x20 [f2fs]
 mount_fs+0x39/0x150
 vfs_kern_mount+0x67/0x110
 do_mount+0x1bb/0xc70
 SyS_mount+0x83/0xd0
 do_syscall_64+0x6e/0x160
 entry_SYSCALL64_slow_path+0x25/0x25

Since mkfs loop device in f2fs partition can be failed silently due to
checkpoint error injection, so root inode page can be corrupted, in order
to avoid needless panic, in get_node_page, it's better to leave message
and return error to caller, and let fsck repaire it later.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 05700e54f91e7..f522378224aae 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1157,6 +1157,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
 		f2fs_put_page(page, 1);
 		return ERR_PTR(err);
 	} else if (err == LOCKED_PAGE) {
+		err = 0;
 		goto page_hit;
 	}
 
@@ -1170,15 +1171,22 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
 		goto repeat;
 	}
 
-	if (unlikely(!PageUptodate(page)))
+	if (unlikely(!PageUptodate(page))) {
+		err = -EIO;
 		goto out_err;
+	}
 page_hit:
 	if(unlikely(nid != nid_of_node(page))) {
-		f2fs_bug_on(sbi, 1);
+		f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
+			"nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+			nid, nid_of_node(page), ino_of_node(page),
+			ofs_of_node(page), cpver_of_node(page),
+			next_blkaddr_of_node(page));
 		ClearPageUptodate(page);
+		err = -EINVAL;
 out_err:
 		f2fs_put_page(page, 1);
-		return ERR_PTR(-EIO);
+		return ERR_PTR(err);
 	}
 	return page;
 }
-- 
GitLab


From a005774c8dab68d1c3963aee7d9464c3ec400eba Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <kernelpatch@126.com>
Date: Fri, 9 Jun 2017 06:32:54 +0800
Subject: [PATCH 0625/1958] f2fs: use proper variable name

It is better to use variable name "inline_dentry" instead of "dentry_blk"
when data type is "struct f2fs_inline_dentry". This patch has no functional
changes, just to make code more readable especially when call the function
make_dentry_ptr_inline() and f2fs_convert_inline_dir().

Signed-off-by: Tiezhu Yang <kernelpatch@126.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/inline.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index e4c527c4e7d0b..e0fd4376e6fb0 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -316,12 +316,12 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
 int make_empty_inline_dir(struct inode *inode, struct inode *parent,
 							struct page *ipage)
 {
-	struct f2fs_inline_dentry *dentry_blk;
+	struct f2fs_inline_dentry *inline_dentry;
 	struct f2fs_dentry_ptr d;
 
-	dentry_blk = inline_data_addr(ipage);
+	inline_dentry = inline_data_addr(ipage);
 
-	make_dentry_ptr_inline(NULL, &d, dentry_blk);
+	make_dentry_ptr_inline(NULL, &d, inline_dentry);
 	do_make_empty_dir(inode, parent, &d);
 
 	set_page_dirty(ipage);
@@ -500,7 +500,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
 	struct page *ipage;
 	unsigned int bit_pos;
 	f2fs_hash_t name_hash;
-	struct f2fs_inline_dentry *dentry_blk = NULL;
+	struct f2fs_inline_dentry *inline_dentry = NULL;
 	struct f2fs_dentry_ptr d;
 	int slots = GET_DENTRY_SLOTS(new_name->len);
 	struct page *page = NULL;
@@ -510,11 +510,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
 	if (IS_ERR(ipage))
 		return PTR_ERR(ipage);
 
-	dentry_blk = inline_data_addr(ipage);
-	bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+	inline_dentry = inline_data_addr(ipage);
+	bit_pos = room_for_filename(&inline_dentry->dentry_bitmap,
 						slots, NR_INLINE_DENTRY);
 	if (bit_pos >= NR_INLINE_DENTRY) {
-		err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
+		err = f2fs_convert_inline_dir(dir, ipage, inline_dentry);
 		if (err)
 			return err;
 		err = -EAGAIN;
@@ -534,7 +534,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
 	f2fs_wait_on_page_writeback(ipage, NODE, true);
 
 	name_hash = f2fs_dentry_hash(new_name, NULL);
-	make_dentry_ptr_inline(NULL, &d, dentry_blk);
+	make_dentry_ptr_inline(NULL, &d, inline_dentry);
 	f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
 
 	set_page_dirty(ipage);
@@ -586,14 +586,14 @@ bool f2fs_empty_inline_dir(struct inode *dir)
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 	struct page *ipage;
 	unsigned int bit_pos = 2;
-	struct f2fs_inline_dentry *dentry_blk;
+	struct f2fs_inline_dentry *inline_dentry;
 
 	ipage = get_node_page(sbi, dir->i_ino);
 	if (IS_ERR(ipage))
 		return false;
 
-	dentry_blk = inline_data_addr(ipage);
-	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+	inline_dentry = inline_data_addr(ipage);
+	bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap,
 					NR_INLINE_DENTRY,
 					bit_pos);
 
-- 
GitLab


From b63def9112cd8b91477a06ba5318c8a01ac474f1 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 11 Jun 2017 09:21:11 +0200
Subject: [PATCH 0626/1958] f2fs: Fix a return value in case of error in
 'f2fs_fill_super'

err must be set to -ENOMEM, otherwise we return 0.

Fixes: a912b54d3aaa0 ("f2fs: split bio cache")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ddd2973ffcbf0..fe86a7edfa60d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1977,8 +1977,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 
 		sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
 								GFP_KERNEL);
-		if (!sbi->write_io[i])
+		if (!sbi->write_io[i]) {
+			err = -ENOMEM;
 			goto free_options;
+		}
 
 		for (j = HOT; j < n; j++) {
 			init_rwsem(&sbi->write_io[i][j].io_rwsem);
-- 
GitLab


From 44529f8975b7b93709b1b92be7d027a1d406de8a Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 12 Jun 2017 09:44:24 +0800
Subject: [PATCH 0627/1958] f2fs: fix to show injection rate in ->show_options

If fault injection functionality is enabled, show additional injection
rate in ->show_options.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fe86a7edfa60d..2aa864eae5229 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -984,7 +984,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (test_opt(sbi, FAULT_INJECTION))
-		seq_puts(seq, ",fault_injection");
+		seq_printf(seq, ",fault_injection=%u",
+				sbi->fault_info.inject_rate);
 #endif
 
 	return 0;
-- 
GitLab


From 6f6d9fe2ab3fc68d194b18f4d120443326ec524a Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 12 Jun 2017 09:44:26 +0800
Subject: [PATCH 0628/1958] f2fs: fix incorrect document of
 batched_trim_sections

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index a809f6005f146..3cf9320f492c3 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -75,7 +75,7 @@ Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
 Description:
 		 Controls the memory footprint used by f2fs.
 
-What:		/sys/fs/f2fs/<disk>/trim_sections
+What:		/sys/fs/f2fs/<disk>/batched_trim_sections
 Date:		February 2015
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
-- 
GitLab


From 1727f317219bfc60a3e50306d67938ffedb17f8a Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 12 Jun 2017 09:44:27 +0800
Subject: [PATCH 0629/1958] f2fs: fix wrong error number of fill_super

This patch fixes incorrect error number in error path of fill_super.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2aa864eae5229..227498064a8fb 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1930,6 +1930,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (f2fs_sb_mounted_blkzoned(sb)) {
 		f2fs_msg(sb, KERN_ERR,
 			 "Zoned block device support is not enabled\n");
+		err = -EOPNOTSUPP;
 		goto free_sb_buf;
 	}
 #endif
@@ -2003,8 +2004,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (F2FS_IO_SIZE(sbi) > 1) {
 		sbi->write_io_dummy =
 			mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
-		if (!sbi->write_io_dummy)
+		if (!sbi->write_io_dummy) {
+			err = -ENOMEM;
 			goto free_options;
+		}
 	}
 
 	/* get an inode for meta space */
-- 
GitLab


From 56412894b3cee24805e48f380ffa9a5f32cff183 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 12 Jun 2017 22:30:44 +0800
Subject: [PATCH 0630/1958] f2fs: fix to document fault injection option and
 sysfs file

Commit 73faec4d9935 ("f2fs: add mount option to select fault injection
ratio") and Commit 087968974fcd ("f2fs: add fault injection to sysfs")
forget to document mount option and sysfs file.

This patch fixes to document them.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 12 ++++++++++++
 Documentation/filesystems/f2fs.txt      |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 3cf9320f492c3..b09108811ff1f 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -112,3 +112,15 @@ Date:		January 2016
 Contact:	"Shuoran Liu" <liushuoran@huawei.com>
 Description:
 		 Shows total written kbytes issued to disk.
+
+What:		/sys/fs/f2fs/<disk>/inject_rate
+Date:		May 2016
+Contact:	"Sheng Yong" <shengyong1@huawei.com>
+Description:
+		 Controls the injection rate.
+
+What:		/sys/fs/f2fs/<disk>/inject_type
+Date:		May 2016
+Contact:	"Sheng Yong" <shengyong1@huawei.com>
+Description:
+		 Controls the injection type.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 4f6531a4701be..8b04a6359530b 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -155,6 +155,8 @@ noinline_data          Disable the inline data feature, inline data feature is
                        enabled by default.
 data_flush             Enable data flushing before checkpoint in order to
                        persist data of regular and symlink.
+fault_injection=%d     Enable fault injection in all supported types with
+                       specified injection rate.
 mode=%s                Control block allocation mode which supports "adaptive"
                        and "lfs". In "lfs" mode, there should be no random
                        writes towards main area.
-- 
GitLab


From a398101aa113351ec973e9e6a3208c7160b7b1fc Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 14 Jun 2017 17:39:46 +0800
Subject: [PATCH 0631/1958] f2fs: clean up sysfs codes

Just cleanup.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 121 +++++++++++++++++++++++++++++-------------------
 1 file changed, 74 insertions(+), 47 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 227498064a8fb..6c04a5af455ac 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -349,6 +349,22 @@ static struct kobj_type f2fs_ktype = {
 	.release	= f2fs_sb_release,
 };
 
+int __init f2fs_register_sysfs(void)
+{
+	f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+
+	f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
+	if (!f2fs_kset)
+		return -ENOMEM;
+	return 0;
+}
+
+void f2fs_unregister_sysfs(void)
+{
+	kset_unregister(f2fs_kset);
+	remove_proc_entry("fs/f2fs", NULL);
+}
+
 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
 {
 	struct va_format vaf;
@@ -766,17 +782,23 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 	kfree(sbi->devs);
 }
 
-static void f2fs_put_super(struct super_block *sb)
+void f2fs_exit_sysfs(struct f2fs_sb_info *sbi)
 {
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
-	int i;
+	kobject_del(&sbi->s_kobj);
+	kobject_put(&sbi->s_kobj);
+	wait_for_completion(&sbi->s_kobj_unregister);
 
 	if (sbi->s_proc) {
 		remove_proc_entry("segment_info", sbi->s_proc);
 		remove_proc_entry("segment_bits", sbi->s_proc);
-		remove_proc_entry(sb->s_id, f2fs_proc_root);
+		remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
 	}
-	kobject_del(&sbi->s_kobj);
+}
+
+static void f2fs_put_super(struct super_block *sb)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	int i;
 
 	stop_gc_thread(sbi);
 
@@ -829,8 +851,8 @@ static void f2fs_put_super(struct super_block *sb)
 	destroy_segment_manager(sbi);
 
 	kfree(sbi->ckpt);
-	kobject_put(&sbi->s_kobj);
-	wait_for_completion(&sbi->s_kobj_unregister);
+
+	f2fs_exit_sysfs(sbi);
 
 	sb->s_fs_info = NULL;
 	if (sbi->s_chksum_driver)
@@ -1058,6 +1080,37 @@ static const struct file_operations f2fs_seq_##_name##_fops = {		\
 F2FS_PROC_FILE_DEF(segment_info);
 F2FS_PROC_FILE_DEF(segment_bits);
 
+int f2fs_init_sysfs(struct f2fs_sb_info *sbi)
+{
+	struct super_block *sb = sbi->sb;
+	int err;
+
+	if (f2fs_proc_root)
+		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+
+	if (sbi->s_proc) {
+		proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
+				 &f2fs_seq_segment_info_fops, sb);
+		proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
+				 &f2fs_seq_segment_bits_fops, sb);
+	}
+
+	sbi->s_kobj.kset = f2fs_kset;
+	init_completion(&sbi->s_kobj_unregister);
+	err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
+							"%s", sb->s_id);
+	if (err)
+		goto err_out;
+	return 0;
+err_out:
+	if (sbi->s_proc) {
+		remove_proc_entry("segment_info", sbi->s_proc);
+		remove_proc_entry("segment_bits", sbi->s_proc);
+		remove_proc_entry(sb->s_id, f2fs_proc_root);
+	}
+	return err;
+}
+
 static void default_options(struct f2fs_sb_info *sbi)
 {
 	/* init some FS parameters */
@@ -2114,22 +2167,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_root_inode;
 	}
 
-	if (f2fs_proc_root)
-		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
-
-	if (sbi->s_proc) {
-		proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
-				 &f2fs_seq_segment_info_fops, sb);
-		proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
-				 &f2fs_seq_segment_bits_fops, sb);
-	}
-
-	sbi->s_kobj.kset = f2fs_kset;
-	init_completion(&sbi->s_kobj_unregister);
-	err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
-							"%s", sb->s_id);
+	err = f2fs_init_sysfs(sbi);
 	if (err)
-		goto free_proc;
+		goto free_root_inode;
 
 	/* recover fsynced data */
 	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
@@ -2140,7 +2180,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		if (bdev_read_only(sb->s_bdev) &&
 				!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
 			err = -EROFS;
-			goto free_kobj;
+			goto free_sysfs;
 		}
 
 		if (need_fsck)
@@ -2154,7 +2194,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 			need_fsck = true;
 			f2fs_msg(sb, KERN_ERR,
 				"Cannot recover all fsync data errno=%d", err);
-			goto free_kobj;
+			goto free_sysfs;
 		}
 	} else {
 		err = recover_fsync_data(sbi, true);
@@ -2163,7 +2203,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 			err = -EINVAL;
 			f2fs_msg(sb, KERN_ERR,
 				"Need to recover fsync data");
-			goto free_kobj;
+			goto free_sysfs;
 		}
 	}
 skip_recovery:
@@ -2178,7 +2218,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		/* After POR, we can run background GC thread.*/
 		err = start_gc_thread(sbi);
 		if (err)
-			goto free_kobj;
+			goto free_sysfs;
 	}
 	kfree(options);
 
@@ -2196,17 +2236,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	f2fs_update_time(sbi, REQ_TIME);
 	return 0;
 
-free_kobj:
+free_sysfs:
 	f2fs_sync_inode_meta(sbi);
-	kobject_del(&sbi->s_kobj);
-	kobject_put(&sbi->s_kobj);
-	wait_for_completion(&sbi->s_kobj_unregister);
-free_proc:
-	if (sbi->s_proc) {
-		remove_proc_entry("segment_info", sbi->s_proc);
-		remove_proc_entry("segment_bits", sbi->s_proc);
-		remove_proc_entry(sb->s_id, f2fs_proc_root);
-	}
+	f2fs_exit_sysfs(sbi);
 free_root_inode:
 	dput(sb->s_root);
 	sb->s_root = NULL;
@@ -2321,30 +2353,26 @@ static int __init init_f2fs_fs(void)
 	err = create_extent_cache();
 	if (err)
 		goto free_checkpoint_caches;
-	f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
-	if (!f2fs_kset) {
-		err = -ENOMEM;
+	err = f2fs_register_sysfs();
+	if (err)
 		goto free_extent_cache;
-	}
 	err = register_shrinker(&f2fs_shrinker_info);
 	if (err)
-		goto free_kset;
-
+		goto free_sysfs;
 	err = register_filesystem(&f2fs_fs_type);
 	if (err)
 		goto free_shrinker;
 	err = f2fs_create_root_stats();
 	if (err)
 		goto free_filesystem;
-	f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
 	return 0;
 
 free_filesystem:
 	unregister_filesystem(&f2fs_fs_type);
 free_shrinker:
 	unregister_shrinker(&f2fs_shrinker_info);
-free_kset:
-	kset_unregister(f2fs_kset);
+free_sysfs:
+	f2fs_unregister_sysfs();
 free_extent_cache:
 	destroy_extent_cache();
 free_checkpoint_caches:
@@ -2361,11 +2389,10 @@ static int __init init_f2fs_fs(void)
 
 static void __exit exit_f2fs_fs(void)
 {
-	remove_proc_entry("fs/f2fs", NULL);
 	f2fs_destroy_root_stats();
 	unregister_filesystem(&f2fs_fs_type);
 	unregister_shrinker(&f2fs_shrinker_info);
-	kset_unregister(f2fs_kset);
+	f2fs_unregister_sysfs();
 	destroy_extent_cache();
 	destroy_checkpoint_caches();
 	destroy_segment_manager_caches();
-- 
GitLab


From 8ceffcb29e61ba882a011b1e4d73ca03691fdc2e Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 14 Jun 2017 17:39:47 +0800
Subject: [PATCH 0632/1958] f2fs: move sysfs code from super.c to
 fs/f2fs/sysfs.c

Codes related to sysfs and procfs are dispersive and mixed with sb
related codes, but actually these codes are independent from others,
so split them from super.c, and reorgnize and manger them in sysfs.c.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/Makefile |   2 +-
 fs/f2fs/f2fs.h   |   8 ++
 fs/f2fs/super.c  | 332 --------------------------------------------
 fs/f2fs/sysfs.c  | 350 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 359 insertions(+), 333 deletions(-)
 create mode 100644 fs/f2fs/sysfs.c

diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index ca949ea7c02fd..a0dc559b1b47f 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o
 
 f2fs-y		:= dir.o file.o inode.o namei.o hash.o super.o inline.o
 f2fs-y		+= checkpoint.o gc.o data.o node.o segment.o recovery.o
-f2fs-y		+= shrinker.o extent_cache.o
+f2fs-y		+= shrinker.o extent_cache.o sysfs.o
 f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
 f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
 f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index da70964cbd743..dd5449423fd22 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2673,6 +2673,14 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi);
 int __init create_extent_cache(void);
 void destroy_extent_cache(void);
 
+/*
+ * sysfs.c
+ */
+int __init f2fs_register_sysfs(void);
+void f2fs_unregister_sysfs(void);
+int f2fs_init_sysfs(struct f2fs_sb_info *sbi);
+void f2fs_exit_sysfs(struct f2fs_sb_info *sbi);
+
 /*
  * crypto support
  */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 6c04a5af455ac..9081570bc616b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -35,9 +35,7 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/f2fs.h>
 
-static struct proc_dir_entry *f2fs_proc_root;
 static struct kmem_cache *f2fs_inode_cachep;
-static struct kset *f2fs_kset;
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 
@@ -146,225 +144,6 @@ static match_table_t f2fs_tokens = {
 	{Opt_err, NULL},
 };
 
-/* Sysfs support for f2fs */
-enum {
-	GC_THREAD,	/* struct f2fs_gc_thread */
-	SM_INFO,	/* struct f2fs_sm_info */
-	DCC_INFO,	/* struct discard_cmd_control */
-	NM_INFO,	/* struct f2fs_nm_info */
-	F2FS_SBI,	/* struct f2fs_sb_info */
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-	FAULT_INFO_RATE,	/* struct f2fs_fault_info */
-	FAULT_INFO_TYPE,	/* struct f2fs_fault_info */
-#endif
-};
-
-struct f2fs_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
-	ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
-			 const char *, size_t);
-	int struct_type;
-	int offset;
-};
-
-static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
-{
-	if (struct_type == GC_THREAD)
-		return (unsigned char *)sbi->gc_thread;
-	else if (struct_type == SM_INFO)
-		return (unsigned char *)SM_I(sbi);
-	else if (struct_type == DCC_INFO)
-		return (unsigned char *)SM_I(sbi)->dcc_info;
-	else if (struct_type == NM_INFO)
-		return (unsigned char *)NM_I(sbi);
-	else if (struct_type == F2FS_SBI)
-		return (unsigned char *)sbi;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-	else if (struct_type == FAULT_INFO_RATE ||
-					struct_type == FAULT_INFO_TYPE)
-		return (unsigned char *)&sbi->fault_info;
-#endif
-	return NULL;
-}
-
-static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
-		struct f2fs_sb_info *sbi, char *buf)
-{
-	struct super_block *sb = sbi->sb;
-
-	if (!sb->s_bdev->bd_part)
-		return snprintf(buf, PAGE_SIZE, "0\n");
-
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)(sbi->kbytes_written +
-			BD_PART_WRITTEN(sbi)));
-}
-
-static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
-			struct f2fs_sb_info *sbi, char *buf)
-{
-	unsigned char *ptr = NULL;
-	unsigned int *ui;
-
-	ptr = __struct_ptr(sbi, a->struct_type);
-	if (!ptr)
-		return -EINVAL;
-
-	ui = (unsigned int *)(ptr + a->offset);
-
-	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
-}
-
-static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
-			struct f2fs_sb_info *sbi,
-			const char *buf, size_t count)
-{
-	unsigned char *ptr;
-	unsigned long t;
-	unsigned int *ui;
-	ssize_t ret;
-
-	ptr = __struct_ptr(sbi, a->struct_type);
-	if (!ptr)
-		return -EINVAL;
-
-	ui = (unsigned int *)(ptr + a->offset);
-
-	ret = kstrtoul(skip_spaces(buf), 0, &t);
-	if (ret < 0)
-		return ret;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
-		return -EINVAL;
-#endif
-	*ui = t;
-	return count;
-}
-
-static ssize_t f2fs_attr_show(struct kobject *kobj,
-				struct attribute *attr, char *buf)
-{
-	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
-								s_kobj);
-	struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
-
-	return a->show ? a->show(a, sbi, buf) : 0;
-}
-
-static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
-						const char *buf, size_t len)
-{
-	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
-									s_kobj);
-	struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
-
-	return a->store ? a->store(a, sbi, buf, len) : 0;
-}
-
-static void f2fs_sb_release(struct kobject *kobj)
-{
-	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
-								s_kobj);
-	complete(&sbi->s_kobj_unregister);
-}
-
-#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
-static struct f2fs_attr f2fs_attr_##_name = {			\
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
-	.struct_type = _struct_type,				\
-	.offset = _offset					\
-}
-
-#define F2FS_RW_ATTR(struct_type, struct_name, name, elname)	\
-	F2FS_ATTR_OFFSET(struct_type, name, 0644,		\
-		f2fs_sbi_show, f2fs_sbi_store,			\
-		offsetof(struct struct_name, elname))
-
-#define F2FS_GENERAL_RO_ATTR(name) \
-static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
-
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
-F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
-#endif
-F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
-
-#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
-static struct attribute *f2fs_attrs[] = {
-	ATTR_LIST(gc_min_sleep_time),
-	ATTR_LIST(gc_max_sleep_time),
-	ATTR_LIST(gc_no_gc_sleep_time),
-	ATTR_LIST(gc_idle),
-	ATTR_LIST(reclaim_segments),
-	ATTR_LIST(max_small_discards),
-	ATTR_LIST(batched_trim_sections),
-	ATTR_LIST(ipu_policy),
-	ATTR_LIST(min_ipu_util),
-	ATTR_LIST(min_fsync_blocks),
-	ATTR_LIST(min_hot_blocks),
-	ATTR_LIST(max_victim_search),
-	ATTR_LIST(dir_level),
-	ATTR_LIST(ram_thresh),
-	ATTR_LIST(ra_nid_pages),
-	ATTR_LIST(dirty_nats_ratio),
-	ATTR_LIST(cp_interval),
-	ATTR_LIST(idle_interval),
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-	ATTR_LIST(inject_rate),
-	ATTR_LIST(inject_type),
-#endif
-	ATTR_LIST(lifetime_write_kbytes),
-	NULL,
-};
-
-static const struct sysfs_ops f2fs_attr_ops = {
-	.show	= f2fs_attr_show,
-	.store	= f2fs_attr_store,
-};
-
-static struct kobj_type f2fs_ktype = {
-	.default_attrs	= f2fs_attrs,
-	.sysfs_ops	= &f2fs_attr_ops,
-	.release	= f2fs_sb_release,
-};
-
-int __init f2fs_register_sysfs(void)
-{
-	f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
-
-	f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
-	if (!f2fs_kset)
-		return -ENOMEM;
-	return 0;
-}
-
-void f2fs_unregister_sysfs(void)
-{
-	kset_unregister(f2fs_kset);
-	remove_proc_entry("fs/f2fs", NULL);
-}
-
 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
 {
 	struct va_format vaf;
@@ -782,19 +561,6 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 	kfree(sbi->devs);
 }
 
-void f2fs_exit_sysfs(struct f2fs_sb_info *sbi)
-{
-	kobject_del(&sbi->s_kobj);
-	kobject_put(&sbi->s_kobj);
-	wait_for_completion(&sbi->s_kobj_unregister);
-
-	if (sbi->s_proc) {
-		remove_proc_entry("segment_info", sbi->s_proc);
-		remove_proc_entry("segment_bits", sbi->s_proc);
-		remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
-	}
-}
-
 static void f2fs_put_super(struct super_block *sb)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1013,104 +779,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	return 0;
 }
 
-static int segment_info_seq_show(struct seq_file *seq, void *offset)
-{
-	struct super_block *sb = seq->private;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
-	unsigned int total_segs =
-			le32_to_cpu(sbi->raw_super->segment_count_main);
-	int i;
-
-	seq_puts(seq, "format: segment_type|valid_blocks\n"
-		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
-
-	for (i = 0; i < total_segs; i++) {
-		struct seg_entry *se = get_seg_entry(sbi, i);
-
-		if ((i % 10) == 0)
-			seq_printf(seq, "%-10d", i);
-		seq_printf(seq, "%d|%-3u", se->type,
-					get_valid_blocks(sbi, i, false));
-		if ((i % 10) == 9 || i == (total_segs - 1))
-			seq_putc(seq, '\n');
-		else
-			seq_putc(seq, ' ');
-	}
-
-	return 0;
-}
-
-static int segment_bits_seq_show(struct seq_file *seq, void *offset)
-{
-	struct super_block *sb = seq->private;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
-	unsigned int total_segs =
-			le32_to_cpu(sbi->raw_super->segment_count_main);
-	int i, j;
-
-	seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
-		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
-
-	for (i = 0; i < total_segs; i++) {
-		struct seg_entry *se = get_seg_entry(sbi, i);
-
-		seq_printf(seq, "%-10d", i);
-		seq_printf(seq, "%d|%-3u|", se->type,
-					get_valid_blocks(sbi, i, false));
-		for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
-			seq_printf(seq, " %.2x", se->cur_valid_map[j]);
-		seq_putc(seq, '\n');
-	}
-	return 0;
-}
-
-#define F2FS_PROC_FILE_DEF(_name)					\
-static int _name##_open_fs(struct inode *inode, struct file *file)	\
-{									\
-	return single_open(file, _name##_seq_show, PDE_DATA(inode));	\
-}									\
-									\
-static const struct file_operations f2fs_seq_##_name##_fops = {		\
-	.open = _name##_open_fs,					\
-	.read = seq_read,						\
-	.llseek = seq_lseek,						\
-	.release = single_release,					\
-};
-
-F2FS_PROC_FILE_DEF(segment_info);
-F2FS_PROC_FILE_DEF(segment_bits);
-
-int f2fs_init_sysfs(struct f2fs_sb_info *sbi)
-{
-	struct super_block *sb = sbi->sb;
-	int err;
-
-	if (f2fs_proc_root)
-		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
-
-	if (sbi->s_proc) {
-		proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
-				 &f2fs_seq_segment_info_fops, sb);
-		proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
-				 &f2fs_seq_segment_bits_fops, sb);
-	}
-
-	sbi->s_kobj.kset = f2fs_kset;
-	init_completion(&sbi->s_kobj_unregister);
-	err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
-							"%s", sb->s_id);
-	if (err)
-		goto err_out;
-	return 0;
-err_out:
-	if (sbi->s_proc) {
-		remove_proc_entry("segment_info", sbi->s_proc);
-		remove_proc_entry("segment_bits", sbi->s_proc);
-		remove_proc_entry(sb->s_id, f2fs_proc_root);
-	}
-	return err;
-}
-
 static void default_options(struct f2fs_sb_info *sbi)
 {
 	/* init some FS parameters */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
new file mode 100644
index 0000000000000..714a3e47bbe8d
--- /dev/null
+++ b/fs/f2fs/sysfs.c
@@ -0,0 +1,350 @@
+/*
+ * f2fs sysfs interface
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ * Copyright (c) 2017 Chao Yu <chao@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/proc_fs.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+#include "segment.h"
+#include "gc.h"
+
+static struct proc_dir_entry *f2fs_proc_root;
+static struct kset *f2fs_kset;
+
+/* Sysfs support for f2fs */
+enum {
+	GC_THREAD,	/* struct f2fs_gc_thread */
+	SM_INFO,	/* struct f2fs_sm_info */
+	DCC_INFO,	/* struct discard_cmd_control */
+	NM_INFO,	/* struct f2fs_nm_info */
+	F2FS_SBI,	/* struct f2fs_sb_info */
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	FAULT_INFO_RATE,	/* struct f2fs_fault_info */
+	FAULT_INFO_TYPE,	/* struct f2fs_fault_info */
+#endif
+};
+
+struct f2fs_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
+	ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
+			 const char *, size_t);
+	int struct_type;
+	int offset;
+};
+
+static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
+{
+	if (struct_type == GC_THREAD)
+		return (unsigned char *)sbi->gc_thread;
+	else if (struct_type == SM_INFO)
+		return (unsigned char *)SM_I(sbi);
+	else if (struct_type == DCC_INFO)
+		return (unsigned char *)SM_I(sbi)->dcc_info;
+	else if (struct_type == NM_INFO)
+		return (unsigned char *)NM_I(sbi);
+	else if (struct_type == F2FS_SBI)
+		return (unsigned char *)sbi;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	else if (struct_type == FAULT_INFO_RATE ||
+					struct_type == FAULT_INFO_TYPE)
+		return (unsigned char *)&sbi->fault_info;
+#endif
+	return NULL;
+}
+
+static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+		struct f2fs_sb_info *sbi, char *buf)
+{
+	struct super_block *sb = sbi->sb;
+
+	if (!sb->s_bdev->bd_part)
+		return snprintf(buf, PAGE_SIZE, "0\n");
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n",
+		(unsigned long long)(sbi->kbytes_written +
+			BD_PART_WRITTEN(sbi)));
+}
+
+static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
+			struct f2fs_sb_info *sbi, char *buf)
+{
+	unsigned char *ptr = NULL;
+	unsigned int *ui;
+
+	ptr = __struct_ptr(sbi, a->struct_type);
+	if (!ptr)
+		return -EINVAL;
+
+	ui = (unsigned int *)(ptr + a->offset);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+			struct f2fs_sb_info *sbi,
+			const char *buf, size_t count)
+{
+	unsigned char *ptr;
+	unsigned long t;
+	unsigned int *ui;
+	ssize_t ret;
+
+	ptr = __struct_ptr(sbi, a->struct_type);
+	if (!ptr)
+		return -EINVAL;
+
+	ui = (unsigned int *)(ptr + a->offset);
+
+	ret = kstrtoul(skip_spaces(buf), 0, &t);
+	if (ret < 0)
+		return ret;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
+		return -EINVAL;
+#endif
+	*ui = t;
+	return count;
+}
+
+static ssize_t f2fs_attr_show(struct kobject *kobj,
+				struct attribute *attr, char *buf)
+{
+	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+								s_kobj);
+	struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+	return a->show ? a->show(a, sbi, buf) : 0;
+}
+
+static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
+						const char *buf, size_t len)
+{
+	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+									s_kobj);
+	struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+	return a->store ? a->store(a, sbi, buf, len) : 0;
+}
+
+static void f2fs_sb_release(struct kobject *kobj)
+{
+	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+								s_kobj);
+	complete(&sbi->s_kobj_unregister);
+}
+
+#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
+static struct f2fs_attr f2fs_attr_##_name = {			\
+	.attr = {.name = __stringify(_name), .mode = _mode },	\
+	.show	= _show,					\
+	.store	= _store,					\
+	.struct_type = _struct_type,				\
+	.offset = _offset					\
+}
+
+#define F2FS_RW_ATTR(struct_type, struct_name, name, elname)	\
+	F2FS_ATTR_OFFSET(struct_type, name, 0644,		\
+		f2fs_sbi_show, f2fs_sbi_store,			\
+		offsetof(struct struct_name, elname))
+
+#define F2FS_GENERAL_RO_ATTR(name) \
+static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
+
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
+F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
+#endif
+F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
+
+#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
+static struct attribute *f2fs_attrs[] = {
+	ATTR_LIST(gc_min_sleep_time),
+	ATTR_LIST(gc_max_sleep_time),
+	ATTR_LIST(gc_no_gc_sleep_time),
+	ATTR_LIST(gc_idle),
+	ATTR_LIST(reclaim_segments),
+	ATTR_LIST(max_small_discards),
+	ATTR_LIST(batched_trim_sections),
+	ATTR_LIST(ipu_policy),
+	ATTR_LIST(min_ipu_util),
+	ATTR_LIST(min_fsync_blocks),
+	ATTR_LIST(min_hot_blocks),
+	ATTR_LIST(max_victim_search),
+	ATTR_LIST(dir_level),
+	ATTR_LIST(ram_thresh),
+	ATTR_LIST(ra_nid_pages),
+	ATTR_LIST(dirty_nats_ratio),
+	ATTR_LIST(cp_interval),
+	ATTR_LIST(idle_interval),
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	ATTR_LIST(inject_rate),
+	ATTR_LIST(inject_type),
+#endif
+	ATTR_LIST(lifetime_write_kbytes),
+	NULL,
+};
+
+static const struct sysfs_ops f2fs_attr_ops = {
+	.show	= f2fs_attr_show,
+	.store	= f2fs_attr_store,
+};
+
+static struct kobj_type f2fs_ktype = {
+	.default_attrs	= f2fs_attrs,
+	.sysfs_ops	= &f2fs_attr_ops,
+	.release	= f2fs_sb_release,
+};
+
+static int segment_info_seq_show(struct seq_file *seq, void *offset)
+{
+	struct super_block *sb = seq->private;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	unsigned int total_segs =
+			le32_to_cpu(sbi->raw_super->segment_count_main);
+	int i;
+
+	seq_puts(seq, "format: segment_type|valid_blocks\n"
+		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
+	for (i = 0; i < total_segs; i++) {
+		struct seg_entry *se = get_seg_entry(sbi, i);
+
+		if ((i % 10) == 0)
+			seq_printf(seq, "%-10d", i);
+		seq_printf(seq, "%d|%-3u", se->type,
+					get_valid_blocks(sbi, i, false));
+		if ((i % 10) == 9 || i == (total_segs - 1))
+			seq_putc(seq, '\n');
+		else
+			seq_putc(seq, ' ');
+	}
+
+	return 0;
+}
+
+static int segment_bits_seq_show(struct seq_file *seq, void *offset)
+{
+	struct super_block *sb = seq->private;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	unsigned int total_segs =
+			le32_to_cpu(sbi->raw_super->segment_count_main);
+	int i, j;
+
+	seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
+		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
+	for (i = 0; i < total_segs; i++) {
+		struct seg_entry *se = get_seg_entry(sbi, i);
+
+		seq_printf(seq, "%-10d", i);
+		seq_printf(seq, "%d|%-3u|", se->type,
+					get_valid_blocks(sbi, i, false));
+		for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
+			seq_printf(seq, " %.2x", se->cur_valid_map[j]);
+		seq_putc(seq, '\n');
+	}
+	return 0;
+}
+
+#define F2FS_PROC_FILE_DEF(_name)					\
+static int _name##_open_fs(struct inode *inode, struct file *file)	\
+{									\
+	return single_open(file, _name##_seq_show, PDE_DATA(inode));	\
+}									\
+									\
+static const struct file_operations f2fs_seq_##_name##_fops = {		\
+	.open = _name##_open_fs,					\
+	.read = seq_read,						\
+	.llseek = seq_lseek,						\
+	.release = single_release,					\
+};
+
+F2FS_PROC_FILE_DEF(segment_info);
+F2FS_PROC_FILE_DEF(segment_bits);
+
+int __init f2fs_register_sysfs(void)
+{
+	f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+
+	f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
+	if (!f2fs_kset)
+		return -ENOMEM;
+	return 0;
+}
+
+void f2fs_unregister_sysfs(void)
+{
+	kset_unregister(f2fs_kset);
+	remove_proc_entry("fs/f2fs", NULL);
+}
+
+int f2fs_init_sysfs(struct f2fs_sb_info *sbi)
+{
+	struct super_block *sb = sbi->sb;
+	int err;
+
+	if (f2fs_proc_root)
+		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+
+	if (sbi->s_proc) {
+		proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
+				 &f2fs_seq_segment_info_fops, sb);
+		proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
+				 &f2fs_seq_segment_bits_fops, sb);
+	}
+
+	sbi->s_kobj.kset = f2fs_kset;
+	init_completion(&sbi->s_kobj_unregister);
+	err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
+							"%s", sb->s_id);
+	if (err)
+		goto err_out;
+	return 0;
+err_out:
+	if (sbi->s_proc) {
+		remove_proc_entry("segment_info", sbi->s_proc);
+		remove_proc_entry("segment_bits", sbi->s_proc);
+		remove_proc_entry(sb->s_id, f2fs_proc_root);
+	}
+	return err;
+}
+
+void f2fs_exit_sysfs(struct f2fs_sb_info *sbi)
+{
+	kobject_del(&sbi->s_kobj);
+	kobject_put(&sbi->s_kobj);
+	wait_for_completion(&sbi->s_kobj_unregister);
+
+	if (sbi->s_proc) {
+		remove_proc_entry("segment_info", sbi->s_proc);
+		remove_proc_entry("segment_bits", sbi->s_proc);
+		remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
+	}
+}
-- 
GitLab


From 67773a1fbdcb5be4a0490b1dd2a5975784ef40df Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 13 Jun 2017 16:47:54 -0700
Subject: [PATCH 0633/1958] f2fs: require key for truncate(2) of encrypted file

Currently, filesystems allow truncate(2) on an encrypted file without
the encryption key.  However, it's impossible to correctly handle the
case where the size being truncated to is not a multiple of the
filesystem block size, because that would require decrypting the final
block, zeroing the part beyond i_size, then encrypting the block.

As other modifications to encrypted file contents are prohibited without
the key, just prohibit truncate(2) as well, making it fail with ENOKEY.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ac8b943817e60..61ee029d7e48e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -710,9 +710,13 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 		return err;
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		if (f2fs_encrypted_inode(inode) &&
-				fscrypt_get_encryption_info(inode))
-			return -EACCES;
+		if (f2fs_encrypted_inode(inode)) {
+			err = fscrypt_get_encryption_info(inode);
+			if (err)
+				return err;
+			if (!fscrypt_has_encryption_key(inode))
+				return -ENOKEY;
+		}
 
 		if (attr->ia_size <= i_size_read(inode)) {
 			down_write(&F2FS_I(inode)->i_mmap_sem);
-- 
GitLab


From 663f387b713089463e37761bfa2561972c7f45ff Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 14 Jun 2017 23:00:55 +0800
Subject: [PATCH 0634/1958] f2fs: set CP_TRIMMED_FLAG correctly

Don't set CP_TRIMMED_FLAG for non-zoned block device or discard
unsupported device, it can avoid to trigger unneeded checkpoint for
that kind of device.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 9081570bc616b..8e39b850bfc09 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -587,7 +587,7 @@ static void f2fs_put_super(struct super_block *sb)
 	/* be sure to wait for any on-going discard commands */
 	f2fs_wait_discard_bios(sbi);
 
-	if (!sbi->discard_blks) {
+	if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT | CP_TRIMMED,
 		};
-- 
GitLab


From 0eb0adadf2e49d82bc4ecd65ec3bb69251f7564c Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 14 Jun 2017 23:00:56 +0800
Subject: [PATCH 0635/1958] f2fs: measure inode.i_blocks as generic filesystem

Both in memory or on disk, generic filesystems record i_blocks with
512bytes sized sector count, also VFS sub module such as disk quota
follows this rule, but f2fs records it with 4096bytes sized block
count, this difference leads to that once we use dquota's function
which inc/dec iblocks, it will make i_blocks of f2fs being inconsistent
between in memory and on disk.

In order to resolve this issue, this patch changes to make in-memory
i_blocks of f2fs recording sector count instead of block count,
meanwhile leaving on-disk i_blocks recording block count.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h  | 23 +++++++++++++----------
 fs/f2fs/file.c  |  1 -
 fs/f2fs/inode.c |  5 +++--
 fs/f2fs/node.c  |  2 +-
 4 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index dd5449423fd22..91db1d07f9f8b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1352,10 +1352,10 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
  */
 static inline int F2FS_HAS_BLOCKS(struct inode *inode)
 {
-	if (F2FS_I(inode)->i_xattr_nid)
-		return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;
-	else
-		return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
+	block_t xattr_block = F2FS_I(inode)->i_xattr_nid ? 1 : 0;
+
+	return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) >
+			(F2FS_DEFAULT_ALLOCATED_BLOCKS + xattr_block);
 }
 
 static inline bool f2fs_has_xattr_block(unsigned int ofs)
@@ -1363,7 +1363,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
 	return ofs == XATTR_NODE_OFFSET;
 }
 
-static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool);
+static inline void f2fs_i_blocks_write(struct inode *, block_t, bool);
 static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
 				 struct inode *inode, blkcnt_t *count)
 {
@@ -1401,11 +1401,13 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
 						struct inode *inode,
-						blkcnt_t count)
+						block_t count)
 {
+	blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK;
+
 	spin_lock(&sbi->stat_lock);
 	f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
-	f2fs_bug_on(sbi, inode->i_blocks < count);
+	f2fs_bug_on(sbi, inode->i_blocks < sectors);
 	sbi->total_valid_block_count -= (block_t)count;
 	spin_unlock(&sbi->stat_lock);
 	f2fs_i_blocks_write(inode, count, false);
@@ -1856,13 +1858,14 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc)
 }
 
 static inline void f2fs_i_blocks_write(struct inode *inode,
-					blkcnt_t diff, bool add)
+					block_t diff, bool add)
 {
 	bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE);
 	bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER);
+	blkcnt_t sectors = diff << F2FS_LOG_SECTORS_PER_BLOCK;
 
-	inode->i_blocks = add ? inode->i_blocks + diff :
-				inode->i_blocks - diff;
+	inode->i_blocks = add ? inode->i_blocks + sectors :
+				inode->i_blocks - sectors;
 	f2fs_mark_inode_dirty_sync(inode, true);
 	if (clean || recover)
 		set_inode_flag(inode, FI_AUTO_RECOVER);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 61ee029d7e48e..7ea63d84a699a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -665,7 +665,6 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
 				  STATX_ATTR_NODUMP);
 
 	generic_fillattr(inode, stat);
-	stat->blocks <<= 3;
 	return 0;
 }
 
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 868d71436ebc2..1ff5bd418d87b 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -16,6 +16,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 
 #include <trace/events/f2fs.h>
 
@@ -129,7 +130,7 @@ static int do_read_inode(struct inode *inode)
 	i_gid_write(inode, le32_to_cpu(ri->i_gid));
 	set_nlink(inode, le32_to_cpu(ri->i_links));
 	inode->i_size = le64_to_cpu(ri->i_size);
-	inode->i_blocks = le64_to_cpu(ri->i_blocks);
+	inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks));
 
 	inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
 	inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
@@ -267,7 +268,7 @@ int update_inode(struct inode *inode, struct page *node_page)
 	ri->i_gid = cpu_to_le32(i_gid_read(inode));
 	ri->i_links = cpu_to_le32(inode->i_nlink);
 	ri->i_size = cpu_to_le64(i_size_read(inode));
-	ri->i_blocks = cpu_to_le64(inode->i_blocks);
+	ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks));
 
 	if (et) {
 		read_lock(&et->lock);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f522378224aae..f6f46be139f46 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1011,7 +1011,7 @@ int remove_inode_page(struct inode *inode)
 
 	/* 0 is possible, after f2fs_new_inode() has failed */
 	f2fs_bug_on(F2FS_I_SB(inode),
-			inode->i_blocks != 0 && inode->i_blocks != 1);
+			inode->i_blocks != 0 && inode->i_blocks != 8);
 
 	/* will put inode & node pages */
 	truncate_node(&dn);
-- 
GitLab


From a9bcf9bcd01499001834273ac1114ec76668f048 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 14 Jun 2017 08:05:32 -0700
Subject: [PATCH 0636/1958] f2fs: don't need to check encrypted inode for
 partial truncation

The cache_only is always false, if inode is encrypted.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7ea63d84a699a..6a201c61eef54 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -534,8 +534,10 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
 truncate_out:
 	f2fs_wait_on_page_writeback(page, DATA, true);
 	zero_user(page, offset, PAGE_SIZE - offset);
-	if (!cache_only || !f2fs_encrypted_inode(inode) ||
-					!S_ISREG(inode->i_mode))
+
+	/* An encrypted inode should have a key and truncate the last page. */
+	f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode));
+	if (!cache_only)
 		set_page_dirty(page);
 	f2fs_put_page(page, 1);
 	return 0;
-- 
GitLab


From 34dc77ad74368707f0f51f42536e38e6ef30ff22 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 15 Jun 2017 16:44:42 -0700
Subject: [PATCH 0637/1958] f2fs: add ioctl to do gc with target block address

This patch adds f2fs_ioc_gc_range() to move blocks located in the given
range.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h |  8 ++++++++
 fs/f2fs/file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 91db1d07f9f8b..c27a6264d9bf2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -303,6 +303,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 						struct f2fs_move_range)
 #define F2FS_IOC_FLUSH_DEVICE		_IOW(F2FS_IOCTL_MAGIC, 10,	\
 						struct f2fs_flush_device)
+#define F2FS_IOC_GARBAGE_COLLECT_RANGE	_IOW(F2FS_IOCTL_MAGIC, 11,	\
+						struct f2fs_gc_range)
 
 #define F2FS_IOC_SET_ENCRYPTION_POLICY	FS_IOC_SET_ENCRYPTION_POLICY
 #define F2FS_IOC_GET_ENCRYPTION_POLICY	FS_IOC_GET_ENCRYPTION_POLICY
@@ -327,6 +329,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 #define F2FS_IOC32_GETVERSION		FS_IOC32_GETVERSION
 #endif
 
+struct f2fs_gc_range {
+	u32 sync;
+	u64 start;
+	u64 len;
+};
+
 struct f2fs_defragment {
 	u64 start;
 	u64 len;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6a201c61eef54..3f56b27e761b8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1911,6 +1911,50 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
 	return ret;
 }
 
+static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_gc_range range;
+	u64 end;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+
+	if (f2fs_readonly(sbi->sb))
+		return -EROFS;
+
+	ret = mnt_want_write_file(filp);
+	if (ret)
+		return ret;
+
+	end = range.start + range.len;
+	if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi))
+		return -EINVAL;
+do_more:
+	if (!range.sync) {
+		if (!mutex_trylock(&sbi->gc_mutex)) {
+			ret = -EBUSY;
+			goto out;
+		}
+	} else {
+		mutex_lock(&sbi->gc_mutex);
+	}
+
+	ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
+	range.start += sbi->blocks_per_seg;
+	if (range.start <= end)
+		goto do_more;
+out:
+	mnt_drop_write_file(filp);
+	return ret;
+}
+
 static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -2355,6 +2399,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return f2fs_ioc_get_encryption_pwsalt(filp, arg);
 	case F2FS_IOC_GARBAGE_COLLECT:
 		return f2fs_ioc_gc(filp, arg);
+	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
+		return f2fs_ioc_gc_range(filp, arg);
 	case F2FS_IOC_WRITE_CHECKPOINT:
 		return f2fs_ioc_write_checkpoint(filp, arg);
 	case F2FS_IOC_DEFRAGMENT:
@@ -2423,6 +2469,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_GET_ENCRYPTION_PWSALT:
 	case F2FS_IOC_GET_ENCRYPTION_POLICY:
 	case F2FS_IOC_GARBAGE_COLLECT:
+	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
 	case F2FS_IOC_WRITE_CHECKPOINT:
 	case F2FS_IOC_DEFRAGMENT:
 	case F2FS_IOC_MOVE_RANGE:
-- 
GitLab


From 3a4991a9864cb6b98bb9c7cf47854ffe2a79805a Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 4 Jul 2017 15:04:48 +0200
Subject: [PATCH 0638/1958] i2c: acpi: Do not create i2c-clients for LNXVIDEO
 ACPI devices

ACPI video devices get tagged by the kernel with the custom LNXVIDEO
HID so that normal pnp-id matching can be used and are handled by the
acpi-video driver.

Sometimes the ACPI nodes describing these contain a SERIAL_TYPE_I2C ACPI
resource. Before this commit the presence of this resource would cause the
i2c-core to create a /sys/bus/i2c/devices/i2c-LNXVIDEO:00 device for this
with a modalias of: "i2c:LNXVIDEO:00".

There is no i2c driver for this custom HID, the acpi-video driver binds
directly to the ACPI device /sys/bus/acpi/devices/LNXVIDEO\:00 which has
a modalias of "acpi:LNXVIDEO:" .

Not only is the creation of an i2c-client for this undesirable, it is
actually causing problems. This weird pseudo-resource claims an i2c
speed of 100KHz and typically points to the i2c bus which is used by the
touchscreen controller. Some touchscreen controllers only work properly at
400KHz, at 100KHz they cause errors like these:

i2c_designware 80860F41:03: i2c_dw_handle_tx_abort: lost arbitration
i2c_designware 80860F41:03: i2c_dw_handle_tx_abort: lost arbitration
i2c_designware 80860F41:03: i2c_dw_handle_tx_abort: lost arbitration
i2c_designware 80860F41:03: i2c_dw_handle_tx_abort: lost arbitration
silead_ts i2c-MSSL1680:00: Registers clear error -11

This commit makes the i2c-core ignore LNXVIDEO compatible ACPI devices
which has 2 positive results:

1) The bogus i2c-client for these is no longer created.
2) i2c_acpi_lookup_speed now ignores the 100KHz speed from the pseudo
i2c-resouce and properly returns 400KHz as speed for the touchscreen
i2c bus, fixing the touchscreen not working on various devies.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-acpi.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 052005579ed62..4842ec3a5451e 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -76,6 +76,15 @@ static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data)
 	return 1;
 }
 
+static const struct acpi_device_id i2c_acpi_ignored_device_ids[] = {
+	/*
+	 * ACPI video acpi_devices, which are handled by the acpi-video driver
+	 * sometimes contain a SERIAL_TYPE_I2C ACPI resource, ignore these.
+	 */
+	{ ACPI_VIDEO_HID, 0 },
+	{}
+};
+
 static int i2c_acpi_do_lookup(struct acpi_device *adev,
 			      struct i2c_acpi_lookup *lookup)
 {
@@ -87,6 +96,9 @@ static int i2c_acpi_do_lookup(struct acpi_device *adev,
 	    acpi_device_enumerated(adev))
 		return -EINVAL;
 
+	if (acpi_match_device_ids(adev, i2c_acpi_ignored_device_ids) == 0)
+		return -ENODEV;
+
 	memset(info, 0, sizeof(*info));
 	lookup->device_handle = acpi_device_handle(adev);
 
-- 
GitLab


From 2ec4d8831b9644a080302ce10868afff8d135fc3 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Fri, 30 Jun 2017 12:54:04 +1200
Subject: [PATCH 0639/1958] i2c: pca-platform: correctly set
 algo_data.reset_chip

When device tree support was added the setting of algo_data.reset_chip
was moved. There were two problems with this. The first being that
i2c_pca_pf_resetchip was only used if platform data was provided. The
second that it was unconditionally overridden with
i2c_pca_pf_dummyreset. Ensure that however the reset gpio is defined the
correct reset_chip function is used.

Fixes: commit 4cc7229daa46 ("i2c: pca-platform: switch to struct gpio_desc")
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pca-platform.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 395eca0cbb1fc..daccef6865e8a 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -184,7 +184,6 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 			if (ret == 0) {
 				i2c->gpio = gpio_to_desc(platform_data->gpio);
 				gpiod_direction_output(i2c->gpio, 0);
-				i2c->algo_data.reset_chip = i2c_pca_pf_resetchip;
 			} else {
 				dev_warn(&pdev->dev, "Registering gpio failed!\n");
 				i2c->gpio = NULL;
@@ -205,7 +204,10 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 
 	i2c->algo_data.data = i2c;
 	i2c->algo_data.wait_for_completion = i2c_pca_pf_waitforcompletion;
-	i2c->algo_data.reset_chip = i2c_pca_pf_dummyreset;
+	if (i2c->gpio)
+		i2c->algo_data.reset_chip = i2c_pca_pf_resetchip;
+	else
+		i2c->algo_data.reset_chip = i2c_pca_pf_dummyreset;
 
 	switch (res->flags & IORESOURCE_MEM_TYPE_MASK) {
 	case IORESOURCE_MEM_32BIT:
-- 
GitLab


From 78e6c5abeb4ad895867fc0ff7f61c858d4deb7c0 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Fri, 30 Jun 2017 12:54:05 +1200
Subject: [PATCH 0640/1958] i2c: pca-platform: propagate error from
 i2c_pca_add_numbered_bus

Rather than returning -ENODEV if i2c_pca_add_numbered_bus() fails,
propagate the error to aid debugging.

Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-pca-platform.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index daccef6865e8a..853a2abedb05e 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -232,9 +232,9 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 			return ret;
 	}
 
-	if (i2c_pca_add_numbered_bus(&i2c->adap) < 0) {
-		return -ENODEV;
-	}
+	ret = i2c_pca_add_numbered_bus(&i2c->adap);
+	if (ret)
+		return ret;
 
 	platform_set_drvdata(pdev, i2c);
 
-- 
GitLab


From 9809cb831c9645d61ee9501c308045bb3d8afd31 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 29 Jun 2017 09:22:15 +0100
Subject: [PATCH 0641/1958] i2c: designware: fix spelling mistakes

Trivial fixes to spelling mistakes in dev_dbg message

"STAUTS" -> "STATUS"
"SLAVE_ACTTVITY" -> "SLAVE_ACTIVITY"

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-slave.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 4012c74e9785a..100643f472444 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -277,7 +277,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
 		return 0;
 
 	dev_dbg(dev->dev,
-		"%#x STAUTS SLAVE_ACTTVITY=%#x : RAW_INTR_STAT=%#x : INTR_STAT=%#x\n",
+		"%#x STATUS SLAVE_ACTIVITY=%#x : RAW_INTR_STAT=%#x : INTR_STAT=%#x\n",
 		enabled, slave_activity, raw_stat, stat);
 
 	if ((stat & DW_IC_INTR_RX_FULL) && (stat & DW_IC_INTR_STOP_DET))
-- 
GitLab


From 21bf440ce18e49b24601c3d1d25f691ef0334c4b Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Wed, 28 Jun 2017 17:23:28 +0300
Subject: [PATCH 0642/1958] i2c: designware: Make HW init functions static

Recent i2c-designware slave support patches use master or slave HW init
functions through the function pointer so we can declare them static.

While at it, rename i2c_dw_init() as i2c_dw_init_master().

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Luis Oliveira <lolivei@synopsys.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-core.h   | 2 --
 drivers/i2c/busses/i2c-designware-master.c | 7 +++----
 drivers/i2c/busses/i2c-designware-slave.c  | 3 +--
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 7bfbcc62f6c51..a403dcdfca4bc 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -306,8 +306,6 @@ int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev);
 u32 i2c_dw_func(struct i2c_adapter *adap);
 void i2c_dw_disable(struct dw_i2c_dev *dev);
 void i2c_dw_disable_int(struct dw_i2c_dev *dev);
-int i2c_dw_init(struct dw_i2c_dev *dev);
-int i2c_dw_init_slave(struct dw_i2c_dev *dev);
 
 extern u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev);
 extern int i2c_dw_probe(struct dw_i2c_dev *dev);
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index eefc4db1ee3e2..418c233075d3d 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -51,7 +51,7 @@ static void i2c_dw_configure_fifo_master(struct dw_i2c_dev *dev)
  * This function is called during I2C init function, and in case of timeout at
  * run time.
  */
-int i2c_dw_init(struct dw_i2c_dev *dev)
+static int i2c_dw_init_master(struct dw_i2c_dev *dev)
 {
 	u32 hcnt, lcnt;
 	u32 reg, comp_param1;
@@ -171,7 +171,6 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(i2c_dw_init);
 
 static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 {
@@ -444,7 +443,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	if (!wait_for_completion_timeout(&dev->cmd_complete, adap->timeout)) {
 		dev_err(dev->dev, "controller timed out\n");
 		/* i2c_dw_init implicitly disables the adapter */
-		i2c_dw_init(dev);
+		i2c_dw_init_master(dev);
 		ret = -ETIMEDOUT;
 		goto done;
 	}
@@ -622,7 +621,7 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 
 	init_completion(&dev->cmd_complete);
 
-	dev->init = i2c_dw_init;
+	dev->init = i2c_dw_init_master;
 	dev->disable = i2c_dw_disable;
 	dev->disable_int = i2c_dw_disable_int;
 
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 100643f472444..0548c7ea578c9 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -49,7 +49,7 @@ static void i2c_dw_configure_fifo_slave(struct dw_i2c_dev *dev)
  * This function is called during I2C init function, and in case of timeout at
  * run time.
  */
-int i2c_dw_init_slave(struct dw_i2c_dev *dev)
+static int i2c_dw_init_slave(struct dw_i2c_dev *dev)
 {
 	u32 sda_falling_time, scl_falling_time;
 	u32 reg, comp_param1;
@@ -168,7 +168,6 @@ int i2c_dw_init_slave(struct dw_i2c_dev *dev)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(i2c_dw_init_slave);
 
 static int i2c_dw_reg_slave(struct i2c_client *slave)
 {
-- 
GitLab


From 6e38cf3b442120a84e69faf1ac9a7b4cd79d1ad6 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Wed, 28 Jun 2017 17:23:29 +0300
Subject: [PATCH 0643/1958] i2c: designware: Let slave adapter support be
 optional

Only certain system configurations may use the I2C slave mode so let the
support be optional. This allow reducing module size if needed:

   text    data     bss     dec     hex filename
  10328    1336      16   11680    2da0 drivers/i2c/busses/i2c-designware-core.ko
   7222    1136       8    8366    20ae drivers/i2c/busses/i2c-designware-core.ko

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Luis Oliveira <lolivei@synopsys.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig               | 1 -
 drivers/i2c/busses/i2c-designware-core.h | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index c98c8e51c4482..1006b230b236f 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -483,7 +483,6 @@ config I2C_DESIGNWARE_CORE
 config I2C_DESIGNWARE_PLATFORM
 	tristate "Synopsys DesignWare Platform"
 	select I2C_DESIGNWARE_CORE
-	select I2C_DESIGNWARE_SLAVE
 	depends on (ACPI && COMMON_CLK) || !ACPI
 	help
 	  If you say yes to this option, support will be included for the
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index a403dcdfca4bc..9fee4c054d3df 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -309,7 +309,11 @@ void i2c_dw_disable_int(struct dw_i2c_dev *dev);
 
 extern u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev);
 extern int i2c_dw_probe(struct dw_i2c_dev *dev);
+#if IS_ENABLED(CONFIG_I2C_DESIGNWARE_SLAVE)
 extern int i2c_dw_probe_slave(struct dw_i2c_dev *dev);
+#else
+static inline int i2c_dw_probe_slave(struct dw_i2c_dev *dev) { return -EINVAL; }
+#endif
 
 #if IS_ENABLED(CONFIG_I2C_DESIGNWARE_BAYTRAIL)
 extern int i2c_dw_probe_lock_support(struct dw_i2c_dev *dev);
-- 
GitLab


From 526c9e971f4fdec4cab41b531405e211afb1d82a Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 4 Jul 2017 17:47:22 +0200
Subject: [PATCH 0644/1958] video: fbdev: fsl-diu-fb: constify mfb_template and
 fsl_diu_match.

File size before:
   text	   data	    bss	    dec	    hex	filename
   2469	   1552	     24	   4045	    fcd	drivers/video/fbdev/fsl-diu-fb.o

File size after adding 'const':
   text	   data	    bss	    dec	    hex	filename
   3821	    200	     24	   4045	    fcd	drivers/video/fbdev/fsl-diu-fb.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Cc: Timur Tabi <timur@tabi.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/fsl-diu-fb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c
index ca3d6b3664715..25abbcf389134 100644
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c
@@ -388,7 +388,7 @@ struct fsl_diu_data {
 /* Determine the DMA address of a member of the fsl_diu_data structure */
 #define DMA_ADDR(p, f) ((p)->dma_addr + offsetof(struct fsl_diu_data, f))
 
-static struct mfb_info mfb_template[] = {
+static const struct mfb_info mfb_template[] = {
 	{
 		.index = PLANE0,
 		.id = "Panel0",
@@ -1868,7 +1868,7 @@ static int __init fsl_diu_setup(char *options)
 }
 #endif
 
-static struct of_device_id fsl_diu_match[] = {
+static const struct of_device_id fsl_diu_match[] = {
 #ifdef CONFIG_PPC_MPC512x
 	{
 		.compatible = "fsl,mpc5121-diu",
-- 
GitLab


From 6af574e826740bf17663b48ba3f8fadb81d2113f Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Tue, 4 Jul 2017 17:47:23 +0200
Subject: [PATCH 0645/1958] vmlfb: Fix error handling in cr_pll_init()

There is an error path, where iomemory is left mapped.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Cc: Alan Hourihane <alanh@fairlite.demon.co.uk>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/vermilion/cr_pll.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/vermilion/cr_pll.c b/drivers/video/fbdev/vermilion/cr_pll.c
index ebc6e6e0dd0f4..ba105c876bed5 100644
--- a/drivers/video/fbdev/vermilion/cr_pll.c
+++ b/drivers/video/fbdev/vermilion/cr_pll.c
@@ -185,6 +185,7 @@ static int __init cr_pll_init(void)
 	if (err) {
 		printk(KERN_ERR
 		       "Carillo Ranch failed to initialize vml_sys.\n");
+		iounmap(mch_regs_base);
 		pci_dev_put(mch_dev);
 		return err;
 	}
-- 
GitLab


From 5ffe80c1fa1ac5b0b5c47503452d2f155fcc2702 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 4 Jul 2017 17:47:23 +0200
Subject: [PATCH 0646/1958] omapfb: panel-dsi-cm: constify dsicm_attr_group

File size before:
   text	   data	    bss	    dec	    hex	filename
   4657	    464	      0	   5121	   1401	drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.o

File size after adding 'const':
   text	   data	    bss	    dec	    hex	filename
   4713	    400	      0	   5113	   13f9	drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index 7f20bc267234c..bef4315300905 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
@@ -559,7 +559,7 @@ static struct attribute *dsicm_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group dsicm_attr_group = {
+static const struct attribute_group dsicm_attr_group = {
 	.attrs = dsicm_attrs,
 };
 
-- 
GitLab


From 62bffe325f08517eb57963bf0907701fffaab57c Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 4 Jul 2017 17:47:23 +0200
Subject: [PATCH 0647/1958] fbdev: omapfb: constify ctrl_caps, color_caps,
 panel_attr_grp and ctrl_attr_grp

File size before:
   text	   data	    bss	    dec	    hex	filename
  13007	   1536	    156	  14699	   396b	drivers/video/fbdev/omap/omapfb_main.o

File size after adding 'const':
   text	   data	    bss	    dec	    hex	filename
  13135	   1408	    156	  14699	   396b	drivers/video/fbdev/omap/omapfb_main.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/omap/omapfb_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index f4cbfb3b8a098..3479a47a30820 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -62,7 +62,7 @@ struct caps_table_struct {
 	const char *name;
 };
 
-static struct caps_table_struct ctrl_caps[] = {
+static const struct caps_table_struct ctrl_caps[] = {
 	{ OMAPFB_CAPS_MANUAL_UPDATE,  "manual update" },
 	{ OMAPFB_CAPS_TEARSYNC,       "tearing synchronization" },
 	{ OMAPFB_CAPS_PLANE_RELOCATE_MEM, "relocate plane memory" },
@@ -74,7 +74,7 @@ static struct caps_table_struct ctrl_caps[] = {
 	{ OMAPFB_CAPS_SET_BACKLIGHT,  "backlight setting" },
 };
 
-static struct caps_table_struct color_caps[] = {
+static const struct caps_table_struct color_caps[] = {
 	{ 1 << OMAPFB_COLOR_RGB565,	"RGB565", },
 	{ 1 << OMAPFB_COLOR_YUV422,	"YUV422", },
 	{ 1 << OMAPFB_COLOR_YUV420,	"YUV420", },
@@ -1384,7 +1384,7 @@ static struct attribute *panel_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group panel_attr_grp = {
+static const struct attribute_group panel_attr_grp = {
 	.name  = "panel",
 	.attrs = panel_attrs,
 };
@@ -1406,7 +1406,7 @@ static struct attribute *ctrl_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group ctrl_attr_grp = {
+static const struct attribute_group ctrl_attr_grp = {
 	.name  = "ctrl",
 	.attrs = ctrl_attrs,
 };
-- 
GitLab


From dd7d958ae9126b2d4a96e41340579049afebd2bf Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Tue, 4 Jul 2017 17:47:23 +0200
Subject: [PATCH 0648/1958] video: fbdev: aty: remove useless variable
 assignments in aty_var_to_crtc()

Value assigned to variable vdisplay at line 990 is overwritten
at line 1039 before it can be used. Also, variable assignment
at line 987 is the same as at line 1039. This makes such
variable assignments useless.

Remove these variable assignments and the code related.

Addresses-Covertity-ID: 1226900
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/aty/atyfb_base.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c
index 11026e726b684..3142d7f7020f0 100644
--- a/drivers/video/fbdev/aty/atyfb_base.c
+++ b/drivers/video/fbdev/aty/atyfb_base.c
@@ -984,12 +984,6 @@ static int aty_var_to_crtc(const struct fb_info *info,
 		v_total <<= 1;
 	}
 
-	vdisplay = yres;
-#ifdef CONFIG_FB_ATY_GENERIC_LCD
-	if ((par->lcd_table != 0) && (crtc->lcd_gen_cntl & LCD_ON))
-		vdisplay  = par->lcd_height;
-#endif
-
 	v_disp--;
 	v_sync_strt--;
 	v_sync_end--;
-- 
GitLab


From 360772f26407b759ec3a91dda81fdbc74b7f7f2c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 4 Jul 2017 17:47:23 +0200
Subject: [PATCH 0649/1958] video: fbdev: matrox: the list iterator can't be
 NULL

My static checker is complaining because we check "drv" for NULL and
then we dereference it to get the next item in the list.  It can't be
NULL so we can remove this check.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/matrox/matroxfb_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c
index 11eb094396aea..f6a0b9af97a90 100644
--- a/drivers/video/fbdev/matrox/matroxfb_base.c
+++ b/drivers/video/fbdev/matrox/matroxfb_base.c
@@ -2001,7 +2001,7 @@ static void matroxfb_register_device(struct matrox_fb_info* minfo) {
 	for (drv = matroxfb_driver_l(matroxfb_driver_list.next);
 	     drv != matroxfb_driver_l(&matroxfb_driver_list);
 	     drv = matroxfb_driver_l(drv->node.next)) {
-		if (drv && drv->probe) {
+		if (drv->probe) {
 			void *p = drv->probe(minfo);
 			if (p) {
 				minfo->drivers_data[i] = p;
-- 
GitLab


From ffb07550c76f70b8fa5e57352e7e299f4811f0a0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 27 Jun 2017 19:32:04 -0400
Subject: [PATCH 0650/1958] copy_msghdr_from_user(): get rid of field-by-field
 copyin

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/socket.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index c2564eb25c6b8..af33d929135a5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1870,22 +1870,18 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
 				 struct sockaddr __user **save_addr,
 				 struct iovec **iov)
 {
-	struct sockaddr __user *uaddr;
-	struct iovec __user *uiov;
-	size_t nr_segs;
+	struct user_msghdr msg;
 	ssize_t err;
 
-	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
-	    __get_user(uaddr, &umsg->msg_name) ||
-	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
-	    __get_user(uiov, &umsg->msg_iov) ||
-	    __get_user(nr_segs, &umsg->msg_iovlen) ||
-	    __get_user(kmsg->msg_control, &umsg->msg_control) ||
-	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
-	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
+	if (copy_from_user(&msg, umsg, sizeof(*umsg)))
 		return -EFAULT;
 
-	if (!uaddr)
+	kmsg->msg_control = msg.msg_control;
+	kmsg->msg_controllen = msg.msg_controllen;
+	kmsg->msg_flags = msg.msg_flags;
+
+	kmsg->msg_namelen = msg.msg_namelen;
+	if (!msg.msg_name)
 		kmsg->msg_namelen = 0;
 
 	if (kmsg->msg_namelen < 0)
@@ -1895,11 +1891,11 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
 		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
 
 	if (save_addr)
-		*save_addr = uaddr;
+		*save_addr = msg.msg_name;
 
-	if (uaddr && kmsg->msg_namelen) {
+	if (msg.msg_name && kmsg->msg_namelen) {
 		if (!save_addr) {
-			err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
+			err = move_addr_to_kernel(msg.msg_name, kmsg->msg_namelen,
 						  kmsg->msg_name);
 			if (err < 0)
 				return err;
@@ -1909,12 +1905,13 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
 		kmsg->msg_namelen = 0;
 	}
 
-	if (nr_segs > UIO_MAXIOV)
+	if (msg.msg_iovlen > UIO_MAXIOV)
 		return -EMSGSIZE;
 
 	kmsg->msg_iocb = NULL;
 
-	return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
+	return import_iovec(save_addr ? READ : WRITE,
+			    msg.msg_iov, msg.msg_iovlen,
 			    UIO_FASTIOV, iov, &kmsg->msg_iter);
 }
 
-- 
GitLab


From 5da028a8af38eced330332d5ae51c212e9e86242 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 27 Jun 2017 18:24:21 -0400
Subject: [PATCH 0651/1958] get_compat_msghdr(): get rid of field-by-field
 copyin

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/compat.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/net/compat.c b/net/compat.c
index aba929e5250f1..dba5e222a0e5b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -37,21 +37,16 @@ int get_compat_msghdr(struct msghdr *kmsg,
 		      struct sockaddr __user **save_addr,
 		      struct iovec **iov)
 {
-	compat_uptr_t uaddr, uiov, tmp3;
-	compat_size_t nr_segs;
+	struct compat_msghdr msg;
 	ssize_t err;
 
-	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
-	    __get_user(uaddr, &umsg->msg_name) ||
-	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
-	    __get_user(uiov, &umsg->msg_iov) ||
-	    __get_user(nr_segs, &umsg->msg_iovlen) ||
-	    __get_user(tmp3, &umsg->msg_control) ||
-	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
-	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
+	if (copy_from_user(&msg, umsg, sizeof(*umsg)))
 		return -EFAULT;
 
-	if (!uaddr)
+	kmsg->msg_flags = msg.msg_flags;
+	kmsg->msg_namelen = msg.msg_namelen;
+
+	if (!msg.msg_name)
 		kmsg->msg_namelen = 0;
 
 	if (kmsg->msg_namelen < 0)
@@ -59,14 +54,16 @@ int get_compat_msghdr(struct msghdr *kmsg,
 
 	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
 		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
-	kmsg->msg_control = compat_ptr(tmp3);
+
+	kmsg->msg_control = compat_ptr(msg.msg_control);
+	kmsg->msg_controllen = msg.msg_controllen;
 
 	if (save_addr)
-		*save_addr = compat_ptr(uaddr);
+		*save_addr = compat_ptr(msg.msg_name);
 
-	if (uaddr && kmsg->msg_namelen) {
+	if (msg.msg_name && kmsg->msg_namelen) {
 		if (!save_addr) {
-			err = move_addr_to_kernel(compat_ptr(uaddr),
+			err = move_addr_to_kernel(compat_ptr(msg.msg_name),
 						  kmsg->msg_namelen,
 						  kmsg->msg_name);
 			if (err < 0)
@@ -77,13 +74,13 @@ int get_compat_msghdr(struct msghdr *kmsg,
 		kmsg->msg_namelen = 0;
 	}
 
-	if (nr_segs > UIO_MAXIOV)
+	if (msg.msg_iovlen > UIO_MAXIOV)
 		return -EMSGSIZE;
 
 	kmsg->msg_iocb = NULL;
 
 	return compat_import_iovec(save_addr ? READ : WRITE,
-				   compat_ptr(uiov), nr_segs,
+				   compat_ptr(msg.msg_iov), msg.msg_iovlen,
 				   UIO_FASTIOV, iov, &kmsg->msg_iter);
 }
 
-- 
GitLab


From f8f8a727eab1c5b78c3703a461565b042979cc79 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 27 Jun 2017 18:34:53 -0400
Subject: [PATCH 0652/1958] get_compat_bpf_fprog(): don't copyin field-by-field

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/compat.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/compat.c b/net/compat.c
index dba5e222a0e5b..6ded6c821d7a2 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -313,15 +313,15 @@ struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval)
 {
 	struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval;
 	struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog));
-	compat_uptr_t ptr;
-	u16 len;
-
-	if (!access_ok(VERIFY_READ, fprog32, sizeof(*fprog32)) ||
-	    !access_ok(VERIFY_WRITE, kfprog, sizeof(struct sock_fprog)) ||
-	    __get_user(len, &fprog32->len) ||
-	    __get_user(ptr, &fprog32->filter) ||
-	    __put_user(len, &kfprog->len) ||
-	    __put_user(compat_ptr(ptr), &kfprog->filter))
+	struct compat_sock_fprog f32;
+	struct sock_fprog f;
+
+	if (copy_from_user(&f32, fprog32, sizeof(*fprog32)))
+		return NULL;
+	memset(&f, 0, sizeof(f));
+	f.len = f32.len;
+	f.filter = compat_ptr(f32.filter);
+	if (copy_to_user(kfprog, &f, sizeof(struct sock_fprog)))
 		return NULL;
 
 	return kfprog;
-- 
GitLab


From ecebcd4da6cde7bfcde62d06488faba164b70b37 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 4 Jul 2017 13:16:41 -0600
Subject: [PATCH 0653/1958] docs: Do not include from kernel/rcu/srcu.c

That file went away with commit bd8cc5a062f4 (srcu: Remove Classic SRCU)
during the 4.13 merge window, leading to errors like:

  Error: Cannot open file ./kernel/rcu/srcu.c

during the docs build.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/basics.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
index 472e7a664d13c..ab82250c77273 100644
--- a/Documentation/driver-api/basics.rst
+++ b/Documentation/driver-api/basics.rst
@@ -106,9 +106,6 @@ Kernel utility functions
 .. kernel-doc:: kernel/sys.c
    :export:
 
-.. kernel-doc:: kernel/rcu/srcu.c
-   :export:
-
 .. kernel-doc:: kernel/rcu/tree.c
    :export:
 
-- 
GitLab


From 2bece49394872d36bbc5767fd643deac05920c55 Mon Sep 17 00:00:00 2001
From: Loc Ho <lho@apm.com>
Date: Mon, 3 Jul 2017 14:33:08 -0700
Subject: [PATCH 0654/1958] ACPI: SPCR: Use access width to determine mmio
 usage

The current SPCR code does not check the access width of the MMIO, and
uses a default of 8bit register accesses.  This prevents devices that
only do 16 or 32bit register accesses from working.  By simply checking
this field and setting the MMIO string appropriately, this issue can be
corrected.  To prevent any legacy issues, the code will default to 8bit
accesses if the value is anything but 16 or 32.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Signed-off-by: Loc Ho <lho@apm.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/spcr.c     | 18 ++++++++++++++++--
 include/acpi/acrestyp.h |  7 +++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 3afa8c1fa1270..29050630f3da2 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -74,8 +74,22 @@ int __init parse_spcr(bool earlycon)
 		goto done;
 	}
 
-	iotype = table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY ?
-			"mmio" : "io";
+	if (table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		switch (table->serial_port.access_width) {
+		default:
+			pr_err("Unexpected SPCR Access Width.  Defaulting to byte size\n");
+		case ACPI_ACCESS_SIZE_BYTE:
+			iotype = "mmio";
+			break;
+		case ACPI_ACCESS_SIZE_WORD:
+			iotype = "mmio16";
+			break;
+		case ACPI_ACCESS_SIZE_DWORD:
+			iotype = "mmio32";
+			break;
+		}
+	} else
+		iotype = "io";
 
 	switch (table->interface_type) {
 	case ACPI_DBG2_ARM_SBSA_32BIT:
diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
index f0f7403d2000d..781cb555c960e 100644
--- a/include/acpi/acrestyp.h
+++ b/include/acpi/acrestyp.h
@@ -372,6 +372,13 @@ struct acpi_resource_generic_register {
 	u64 address;
 };
 
+/* Generic Address Space Access Sizes */
+#define ACPI_ACCESS_SIZE_UNDEFINED		0
+#define ACPI_ACCESS_SIZE_BYTE			1
+#define ACPI_ACCESS_SIZE_WORD			2
+#define ACPI_ACCESS_SIZE_DWORD			3
+#define ACPI_ACCESS_SIZE_QWORD			4
+
 struct acpi_resource_gpio {
 	u8 revision_id;
 	u8 connection_type;
-- 
GitLab


From 79a648328d2a604524a30523ca763fbeca0f70e3 Mon Sep 17 00:00:00 2001
From: Loc Ho <lho@apm.com>
Date: Mon, 3 Jul 2017 14:33:09 -0700
Subject: [PATCH 0655/1958] ACPI: SPCR: Workaround for APM X-Gene 8250 UART
 32-alignment errata

APM X-Gene verion 1 and 2 have an 8250 UART with its register
aligned to 32-bit. In addition, the latest released BIOS
encodes the access field as 8-bit access instead 32-bit access.
This causes no console with ACPI boot as the console
will not match X-Gene UART port due to the lack of mmio32
option.

Signed-off-by: Loc Ho <lho@apm.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/spcr.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 29050630f3da2..4ac3e06b41d84 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -36,6 +36,26 @@ static bool qdf2400_erratum_44_present(struct acpi_table_header *h)
 	return false;
 }
 
+/*
+ * APM X-Gene v1 and v2 UART hardware is an 16550 like device but has its
+ * register aligned to 32-bit. In addition, the BIOS also encoded the
+ * access width to be 8 bits. This function detects this errata condition.
+ */
+static bool xgene_8250_erratum_present(struct acpi_table_spcr *tb)
+{
+	if (tb->interface_type != ACPI_DBG2_16550_COMPATIBLE)
+		return false;
+
+	if (memcmp(tb->header.oem_id, "APMC0D", ACPI_OEM_ID_SIZE))
+		return false;
+
+	if (!memcmp(tb->header.oem_table_id, "XGENESPC",
+	    ACPI_OEM_TABLE_ID_SIZE) && tb->header.oem_revision == 0)
+		return true;
+
+	return false;
+}
+
 /**
  * parse_spcr() - parse ACPI SPCR table and add preferred console
  *
@@ -129,6 +149,8 @@ int __init parse_spcr(bool earlycon)
 
 	if (qdf2400_erratum_44_present(&table->header))
 		uart = "qdf2400_e44";
+	if (xgene_8250_erratum_present(table))
+		iotype = "mmio32";
 
 	snprintf(opts, sizeof(opts), "%s,%s,0x%llx,%d", uart, iotype,
 		 table->serial_port.address, baud_rate);
-- 
GitLab


From 5438bc573a2cd93f9e28acc71db50d89ce72eb64 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 29 Jun 2017 10:49:25 +0100
Subject: [PATCH 0656/1958] ACPI / osi: Make local function
 acpi_osi_dmi_linux() static

The function acpi_osi_dmi_linux() is local and does not need to be in
global scope, so make it static.

Cleans up sparse warning:
"symbol 'acpi_osi_dmi_linux' was not declared. Should it be static?"

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/osi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c
index 849f9d2245cac..723bee58bbcf5 100644
--- a/drivers/acpi/osi.c
+++ b/drivers/acpi/osi.c
@@ -265,7 +265,8 @@ static void __init acpi_osi_dmi_darwin(bool enable,
 	__acpi_osi_setup_darwin(enable);
 }
 
-void __init acpi_osi_dmi_linux(bool enable, const struct dmi_system_id *d)
+static void __init acpi_osi_dmi_linux(bool enable,
+				      const struct dmi_system_id *d)
 {
 	pr_notice("DMI detected to setup _OSI(\"Linux\"): %s\n", d->ident);
 	osi_config.linux_dmi = 1;
-- 
GitLab


From 9a4d8d60df3bc26236fa9cc3bc806ff99e5e5625 Mon Sep 17 00:00:00 2001
From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com>
Date: Fri, 30 Jun 2017 15:25:38 +0800
Subject: [PATCH 0657/1958] ACPI / bus: handle ACPI hotplug schedule errors
 completely

Kernel should decrements the reference count of ACPI device
when the scheduling of ACPI hotplug work failed, and evaluates
_OST to notify BIOS the failure.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 784bda663d162..9d4fea6433f51 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -432,11 +432,15 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
 	    (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS))
 		driver->ops.notify(adev, type);
 
-	if (hotplug_event && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
+	if (!hotplug_event) {
+		acpi_bus_put_acpi_device(adev);
+		return;
+	}
+
+	if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
 		return;
 
 	acpi_bus_put_acpi_device(adev);
-	return;
 
  err:
 	acpi_evaluate_ost(handle, type, ost_code, NULL);
-- 
GitLab


From d429e5c12269a930b81d8b57b788bbe3cf12e815 Mon Sep 17 00:00:00 2001
From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com>
Date: Mon, 3 Jul 2017 21:26:10 +0800
Subject: [PATCH 0658/1958] ACPI / scan: Indicate to platform when hot remove
 returns busy

In hotplug logic, it always indicates non-specific failure to
platform through _OST when handing ACPI hot-remove event failed. Then
platform terminates the hot-remove process but it can not identify
the reason.

Base on current hot-remove code, there have two situations that it
returns busy:

 - OSPM try to offline an individual device, but the device offline
   function returns "busy".

 - When the ejection event is applied to an "not offlined yet"
   container.  OSPM sends a kobject change event to userspace and
   returns "busy".

Both of them will returns -EBUSY to ACPI device hotplug function.
Then, the hotplug function indicates non-specific failure to platform
just like for any other error, e.g. -ENODEV or -EIO.

The benefit to the platform for identifying the OS "busy" state is
that it can use a different approach to handle the "busy" instead of
simply terminating the hot-remove operation for an unknown reason.
For example, the platform can wait for a while and then re-trigger
hot-remove.

Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
[ rjw: Changelog massage ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d53162997f320..ce88175032629 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -404,10 +404,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 		error = dock_notify(adev, src);
 	} else if (adev->flags.hotplug_notify) {
 		error = acpi_generic_hotplug_event(adev, src);
-		if (error == -EPERM) {
-			ost_code = ACPI_OST_SC_EJECT_NOT_SUPPORTED;
-			goto err_out;
-		}
 	} else {
 		int (*notify)(struct acpi_device *, u32);
 
@@ -423,8 +419,20 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 		else
 			goto out;
 	}
-	if (!error)
+	switch (error) {
+	case 0:
 		ost_code = ACPI_OST_SC_SUCCESS;
+		break;
+	case -EPERM:
+		ost_code = ACPI_OST_SC_EJECT_NOT_SUPPORTED;
+		break;
+	case -EBUSY:
+		ost_code = ACPI_OST_SC_DEVICE_BUSY;
+		break;
+	default:
+		ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
+		break;
+	}
 
  err_out:
 	acpi_evaluate_ost(adev->handle, src, ost_code, NULL);
-- 
GitLab


From 23c6d2c73f4c7b1074e21fb3be8220fd677b1f7e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 4 Jul 2017 21:55:13 +0200
Subject: [PATCH 0659/1958] Revert "PM / Domains: Handle safely
 genpd_syscore_switch() call on non-genpd device"

Revert commit 8b55e55ee443 (PM / Domains: Handle safely
genpd_syscore_switch() call on non-genpd device) which was misguided
(the change made by it was not necessary) and it introduced a call to
a function that may sleep into an atomic context code path.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e342408cfb8d2..b463859f185f7 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1148,8 +1148,8 @@ static void genpd_syscore_switch(struct device *dev, bool suspend)
 {
 	struct generic_pm_domain *genpd;
 
-	genpd = genpd_lookup_dev(dev);
-	if (!genpd)
+	genpd = dev_to_genpd(dev);
+	if (!pm_genpd_present(genpd))
 		return;
 
 	if (suspend) {
-- 
GitLab


From af3eb27433356ed4b6e124fd446a425a0ccf4e49 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 30 Jun 2017 18:06:57 +0200
Subject: [PATCH 0660/1958] PM / Domains: provide pm_genpd_poweroff_noirq()
 stub

When CONFIG_PM_SLEEP is disabled, we don't have a pm_genpd_poweroff_noirq
function definition:

drivers/base/power/domain.c: In function 'pm_genpd_init':
drivers/base/power/domain.c:1549:37: error: 'pm_genpd_poweroff_noirq' undeclared (first use in this function); did you mean 'genpd_power_off_unused'?

This adds another NULL definition for it, just like we already have
for the other _noirq handlers.

Fixes: 10da65423fdb (PM / Domains: Call driver's noirq callbacks)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b463859f185f7..2ac906288a6fe 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1180,6 +1180,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
 #define pm_genpd_resume_noirq		NULL
 #define pm_genpd_freeze_noirq		NULL
 #define pm_genpd_thaw_noirq		NULL
+#define pm_genpd_poweroff_noirq		NULL
 #define pm_genpd_restore_noirq		NULL
 #define pm_genpd_complete		NULL
 
-- 
GitLab


From 1d0c6e593023ac5dafc2ea2b3f23d96f1c1f2fa2 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Fri, 30 Jun 2017 10:22:14 +0530
Subject: [PATCH 0661/1958] PM / sleep: constify attribute_group structures

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work with const
attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   3802	    624	     32	   4458	   116a	kernel/power/main.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   3866	    560	     32	   4458	   116a	kernel/power/main.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/main.c b/kernel/power/main.c
index d401c21136d1c..42bd800a6755b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -705,7 +705,7 @@ static struct attribute * g[] = {
 	NULL,
 };
 
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
 	.attrs = g,
 };
 
-- 
GitLab


From 402202e8deac760aec1b54d96183f37047a51a45 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 3 Jul 2017 13:29:04 +0530
Subject: [PATCH 0662/1958] cpufreq: cpufreq_stats: constify attribute_group
 structures

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work with const
attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   1655	    256	      4	   1915	    77b	drivers/cpufreq/cpufreq_stats.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   1695	    192	      4	   1891	    763	drivers/cpufreq/cpufreq_stats.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index f570ead624547..d362739a71f37 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -135,7 +135,7 @@ static struct attribute *default_attrs[] = {
 	&trans_table.attr,
 	NULL
 };
-static struct attribute_group stats_attr_group = {
+static const struct attribute_group stats_attr_group = {
 	.attrs = default_attrs,
 	.name = "stats"
 };
-- 
GitLab


From 106c9c77edfba86812def8cbd12cf5f60b94ec30 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 3 Jul 2017 13:40:33 +0530
Subject: [PATCH 0663/1958] cpufreq: intel_pstate: constify attribute_group
 structures

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work with const
attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  15197	   2552	     40	  17789	   457d	drivers/cpufreq/intel_pstate.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  15261	   2488	     40	  17789	   457d	drivers/cpufreq/intel_pstate.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index eb1158532de31..9aa4fa128bfa5 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1213,7 +1213,7 @@ static struct attribute *intel_pstate_attributes[] = {
 	NULL
 };
 
-static struct attribute_group intel_pstate_attr_group = {
+static const struct attribute_group intel_pstate_attr_group = {
 	.attrs = intel_pstate_attributes,
 };
 
-- 
GitLab


From f681eb1d5c02c9e79775e10363057d034c720efc Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 5 Jun 2017 22:44:49 +0300
Subject: [PATCH 0664/1958] ovl: fix nlink leak in ovl_rename()

This patch fixes an overlay inode nlink leak in the case where
ovl_rename() renames over a non-dir.

This is not so critical, because overlay inode doesn't rely on
nlink dropping to zero for inode deletion.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a63a71656e9bd..fcfa7de12ad57 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1046,6 +1046,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	if (cleanup_whiteout)
 		ovl_cleanup(old_upperdir->d_inode, newdentry);
 
+	if (overwrite && d_inode(new)) {
+		if (new_is_dir)
+			clear_nlink(d_inode(new));
+		else
+			drop_nlink(d_inode(new));
+	}
+
 	ovl_dentry_version_inc(old->d_parent);
 	ovl_dentry_version_inc(new->d_parent);
 
-- 
GitLab


From 13cf199d0088b77ab08a9594df2e73e775317ed2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 12 Jun 2017 09:54:40 +0300
Subject: [PATCH 0665/1958] ovl: allocate an ovl_inode struct

We need some more space to store overlay inode data in memory,
so allocate overlay inodes from a slab of struct ovl_inode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/ovl_entry.h |  9 +++++++
 fs/overlayfs/super.c     | 56 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 34bc4a9f5c61d..553727df886c5 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -58,3 +58,12 @@ static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
 {
 	return lockless_dereference(oe->__upperdentry);
 }
+
+struct ovl_inode {
+	struct inode vfs_inode;
+};
+
+static inline struct ovl_inode *OVL_I(struct inode *inode)
+{
+	return container_of(inode, struct ovl_inode, vfs_inode);
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4882ffb37baea..ed916018fe1a7 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -165,6 +165,27 @@ static const struct dentry_operations ovl_reval_dentry_operations = {
 	.d_weak_revalidate = ovl_dentry_weak_revalidate,
 };
 
+static struct kmem_cache *ovl_inode_cachep;
+
+static struct inode *ovl_alloc_inode(struct super_block *sb)
+{
+	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+
+	return &oi->vfs_inode;
+}
+
+static void ovl_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
+}
+
+static void ovl_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ovl_i_callback);
+}
+
 static void ovl_put_super(struct super_block *sb)
 {
 	struct ovl_fs *ufs = sb->s_fs_info;
@@ -263,12 +284,14 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 }
 
 static const struct super_operations ovl_super_operations = {
+	.alloc_inode	= ovl_alloc_inode,
+	.destroy_inode	= ovl_destroy_inode,
+	.drop_inode	= generic_delete_inode,
 	.put_super	= ovl_put_super,
 	.sync_fs	= ovl_sync_fs,
 	.statfs		= ovl_statfs,
 	.show_options	= ovl_show_options,
 	.remount_fs	= ovl_remount,
-	.drop_inode	= generic_delete_inode,
 };
 
 enum {
@@ -1038,14 +1061,43 @@ static struct file_system_type ovl_fs_type = {
 };
 MODULE_ALIAS_FS("overlay");
 
+static void ovl_inode_init_once(void *foo)
+{
+	struct ovl_inode *oi = foo;
+
+	inode_init_once(&oi->vfs_inode);
+}
+
 static int __init ovl_init(void)
 {
-	return register_filesystem(&ovl_fs_type);
+	int err;
+
+	ovl_inode_cachep = kmem_cache_create("ovl_inode",
+					     sizeof(struct ovl_inode), 0,
+					     (SLAB_RECLAIM_ACCOUNT|
+					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+					     ovl_inode_init_once);
+	if (ovl_inode_cachep == NULL)
+		return -ENOMEM;
+
+	err = register_filesystem(&ovl_fs_type);
+	if (err)
+		kmem_cache_destroy(ovl_inode_cachep);
+
+	return err;
 }
 
 static void __exit ovl_exit(void)
 {
 	unregister_filesystem(&ovl_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+	kmem_cache_destroy(ovl_inode_cachep);
+
 }
 
 module_init(ovl_init);
-- 
GitLab


From e6d2ebddbc5205635a021a910f2f0e93bc2aa534 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:16 +0200
Subject: [PATCH 0666/1958] ovl: simplify getting inode

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c     | 26 ++++++++++++++++++--------
 fs/overlayfs/namei.c     | 30 +++++++++---------------------
 fs/overlayfs/overlayfs.h |  5 ++---
 fs/overlayfs/super.c     |  5 +----
 fs/overlayfs/util.c      |  7 ++++++-
 5 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index d613e2c41242a..22c677040b359 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -462,18 +462,28 @@ static int ovl_inode_set(struct inode *inode, void *data)
 	return 0;
 }
 
-struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode)
-
+struct inode *ovl_get_inode(struct dentry *dentry)
 {
+	struct dentry *upperdentry = ovl_dentry_upper(dentry);
+	struct inode *realinode = d_inode(ovl_dentry_real(dentry));
 	struct inode *inode;
 
-	inode = iget5_locked(sb, (unsigned long) realinode,
-			     ovl_inode_test, ovl_inode_set, realinode);
-	if (inode && inode->i_state & I_NEW) {
-		ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+	if (upperdentry && !d_is_dir(upperdentry)) {
+		inode = iget5_locked(dentry->d_sb, (unsigned long) realinode,
+				     ovl_inode_test, ovl_inode_set, realinode);
+		if (!inode || !(inode->i_state & I_NEW))
+			goto out;
+
 		set_nlink(inode, realinode->i_nlink);
-		unlock_new_inode(inode);
+	} else {
+		inode = new_inode(dentry->d_sb);
+		if (!inode)
+			goto out;
 	}
-
+	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+	ovl_inode_init(inode, dentry);
+	if (inode->i_state & I_NEW)
+		unlock_new_inode(inode);
+out:
 	return inode;
 }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index de0d4f742f36e..0072ca5d5dace 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -433,41 +433,29 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	if (!oe)
 		goto out_put;
 
-	if (upperdentry || ctr) {
-		struct dentry *realdentry;
-		struct inode *realinode;
-
-		realdentry = upperdentry ? upperdentry : stack[0].dentry;
-		realinode = d_inode(realdentry);
+	oe->opaque = upperopaque;
+	oe->impure = upperimpure;
+	oe->redirect = upperredirect;
+	oe->__upperdentry = upperdentry;
+	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+	dentry->d_fsdata = oe;
 
+	if (upperdentry || ctr) {
 		err = -ENOMEM;
-		if (upperdentry && !d_is_dir(upperdentry)) {
-			inode = ovl_get_inode(dentry->d_sb, realinode);
-		} else {
-			inode = ovl_new_inode(dentry->d_sb, realinode->i_mode,
-					      realinode->i_rdev);
-			if (inode)
-				ovl_inode_init(inode, realinode, !!upperdentry);
-		}
+		inode = ovl_get_inode(dentry);
 		if (!inode)
 			goto out_free_oe;
-		ovl_copyattr(realdentry->d_inode, inode);
 	}
 
 	revert_creds(old_cred);
-	oe->opaque = upperopaque;
-	oe->impure = upperimpure;
-	oe->redirect = upperredirect;
-	oe->__upperdentry = upperdentry;
-	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
 	kfree(stack);
 	kfree(d.redirect);
-	dentry->d_fsdata = oe;
 	d_add(dentry, inode);
 
 	return NULL;
 
 out_free_oe:
+	dentry->d_fsdata = NULL;
 	kfree(oe);
 out_put:
 	for (i = 0; i < ctr; i++)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 10863b4105fa2..3af33d3166e24 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -211,8 +211,7 @@ bool ovl_redirect_dir(struct super_block *sb);
 const char *ovl_dentry_get_redirect(struct dentry *dentry);
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
-void ovl_inode_init(struct inode *inode, struct inode *realinode,
-		    bool is_upper);
+void ovl_inode_init(struct inode *inode, struct dentry *dentry);
 void ovl_inode_update(struct inode *inode, struct inode *upperinode);
 void ovl_dentry_version_inc(struct dentry *dentry);
 u64 ovl_dentry_version_get(struct dentry *dentry);
@@ -262,7 +261,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
 bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode);
+struct inode *ovl_get_inode(struct dentry *dentry);
 static inline void ovl_copyattr(struct inode *from, struct inode *to)
 {
 	to->i_uid = from->i_uid;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ed916018fe1a7..ec1b40816483c 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -757,7 +757,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	struct path upperpath = { };
 	struct path workpath = { };
 	struct dentry *root_dentry;
-	struct inode *realinode;
 	struct ovl_entry *oe;
 	struct ovl_fs *ufs;
 	struct path *stack = NULL;
@@ -1009,9 +1008,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	root_dentry->d_fsdata = oe;
 
-	realinode = d_inode(ovl_dentry_real(root_dentry));
-	ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
-	ovl_copyattr(realinode, d_inode(root_dentry));
+	ovl_inode_init(d_inode(root_dentry), root_dentry);
 
 	sb->s_root = root_dentry;
 
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 8090489138891..f4847eff3284a 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -230,10 +230,15 @@ void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
 	oe->__upperdentry = upperdentry;
 }
 
-void ovl_inode_init(struct inode *inode, struct inode *realinode, bool is_upper)
+void ovl_inode_init(struct inode *inode, struct dentry *dentry)
 {
+	struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+	bool is_upper = ovl_dentry_upper(dentry);
+
 	WRITE_ONCE(inode->i_private, (unsigned long) realinode |
 		   (is_upper ? OVL_ISUPPER_MASK : 0));
+
+	ovl_copyattr(realinode, inode);
 }
 
 void ovl_inode_update(struct inode *inode, struct inode *upperinode)
-- 
GitLab


From 25b7713afe50963e70f98c1c964f60baf1e7e373 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:16 +0200
Subject: [PATCH 0667/1958] ovl: use i_private only as a key

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c     |  4 ++--
 fs/overlayfs/overlayfs.h | 13 +------------
 fs/overlayfs/ovl_entry.h |  2 ++
 fs/overlayfs/super.c     |  3 +++
 fs/overlayfs/util.c      | 35 +++++++++++++++++++++++++++++------
 5 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 22c677040b359..4c30d44905efa 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -453,12 +453,12 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
 
 static int ovl_inode_test(struct inode *inode, void *data)
 {
-	return ovl_inode_real(inode, NULL) == data;
+	return inode->i_private == data;
 }
 
 static int ovl_inode_set(struct inode *inode, void *data)
 {
-	inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK);
+	inode->i_private = data;
 	return 0;
 }
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3af33d3166e24..6e6600ae1d542 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -60,8 +60,6 @@ struct ovl_fh {
 	u8 fid[0];	/* file identifier */
 } __packed;
 
-#define OVL_ISUPPER_MASK 1UL
-
 static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int err = vfs_rmdir(dir, dentry);
@@ -175,16 +173,6 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
 	return ret;
 }
 
-static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
-{
-	unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
-
-	if (is_upper)
-		*is_upper = x & OVL_ISUPPER_MASK;
-
-	return (struct inode *) (x & ~OVL_ISUPPER_MASK);
-}
-
 /* util.c */
 int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
@@ -201,6 +189,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
 struct dentry *ovl_dentry_upper(struct dentry *dentry);
 struct dentry *ovl_dentry_lower(struct dentry *dentry);
 struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct inode *ovl_inode_real(struct inode *inode, bool *is_upper);
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 553727df886c5..b8c213891e84a 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -61,6 +61,8 @@ static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
 
 struct ovl_inode {
 	struct inode vfs_inode;
+	struct inode *upper;
+	struct inode *lower;
 };
 
 static inline struct ovl_inode *OVL_I(struct inode *inode)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ec1b40816483c..c166c1d76890b 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -171,6 +171,9 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
 {
 	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 
+	oi->upper = NULL;
+	oi->lower = NULL;
+
 	return &oi->vfs_inode;
 }
 
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index f4847eff3284a..fc7b1447435bb 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -155,6 +155,22 @@ struct dentry *ovl_dentry_real(struct dentry *dentry)
 	return realdentry;
 }
 
+struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
+{
+	struct inode *realinode = lockless_dereference(OVL_I(inode)->upper);
+	bool isup = false;
+
+	if (!realinode)
+		realinode = OVL_I(inode)->lower;
+	else
+		isup = true;
+
+	if (is_upper)
+		*is_upper = isup;
+
+	return realinode;
+}
+
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
@@ -233,10 +249,11 @@ void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
 void ovl_inode_init(struct inode *inode, struct dentry *dentry)
 {
 	struct inode *realinode = d_inode(ovl_dentry_real(dentry));
-	bool is_upper = ovl_dentry_upper(dentry);
 
-	WRITE_ONCE(inode->i_private, (unsigned long) realinode |
-		   (is_upper ? OVL_ISUPPER_MASK : 0));
+	if (ovl_dentry_upper(dentry))
+		OVL_I(inode)->upper = realinode;
+	else
+		OVL_I(inode)->lower = realinode;
 
 	ovl_copyattr(realinode, inode);
 }
@@ -245,10 +262,16 @@ void ovl_inode_update(struct inode *inode, struct inode *upperinode)
 {
 	WARN_ON(!upperinode);
 	WARN_ON(!inode_unhashed(inode));
-	WRITE_ONCE(inode->i_private,
-		   (unsigned long) upperinode | OVL_ISUPPER_MASK);
-	if (!S_ISDIR(upperinode->i_mode))
+	/*
+	 * Make sure upperinode is consistent before making it visible to
+	 * ovl_inode_real();
+	 */
+	smp_wmb();
+	OVL_I(inode)->upper = upperinode;
+	if (!S_ISDIR(upperinode->i_mode)) {
+		inode->i_private = upperinode;
 		__insert_inode_hash(inode, (unsigned long) upperinode);
+	}
 }
 
 void ovl_dentry_version_inc(struct dentry *dentry)
-- 
GitLab


From 9020df37207867272e590a416c2fb3da0e5383c6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:16 +0200
Subject: [PATCH 0668/1958] ovl: compare inodes

When checking for consistency in directory operations (unlink, rename,
etc.) match inodes not dentries.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index fcfa7de12ad57..59e0dc9897fbb 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -611,6 +611,11 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	return err;
 }
 
+static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
+{
+	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
+}
+
 static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
@@ -646,7 +651,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
 	err = -ESTALE;
 	if ((opaquedir && upper != opaquedir) ||
 	    (!opaquedir && ovl_dentry_upper(dentry) &&
-	     upper != ovl_dentry_upper(dentry))) {
+	     !ovl_matches_upper(dentry, upper))) {
 		goto out_dput_upper;
 	}
 
@@ -707,7 +712,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
 
 	err = -ESTALE;
 	if ((opaquedir && upper != opaquedir) ||
-	    (!opaquedir && upper != ovl_dentry_upper(dentry)))
+	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
 		goto out_dput_upper;
 
 	if (is_dir)
@@ -985,7 +990,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		goto out_unlock;
 
 	err = -ESTALE;
-	if (olddentry != ovl_dentry_upper(old))
+	if (!ovl_matches_upper(old, olddentry))
 		goto out_dput_old;
 
 	newdentry = lookup_one_len(new->d_name.name, new_upperdir,
@@ -1003,7 +1008,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 			if (newdentry != opaquedir)
 				goto out_dput;
 		} else {
-			if (newdentry != ovl_dentry_upper(new))
+			if (!ovl_matches_upper(new, newdentry))
 				goto out_dput;
 		}
 	} else {
-- 
GitLab


From 09d8b586731bf589655c2ac971532c14cf272b63 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:16 +0200
Subject: [PATCH 0669/1958] ovl: move __upperdentry to ovl_inode

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   |  3 +-
 fs/overlayfs/dir.c       |  8 ++--
 fs/overlayfs/inode.c     | 26 ++++++++----
 fs/overlayfs/namei.c     |  7 ++--
 fs/overlayfs/overlayfs.h | 12 +++---
 fs/overlayfs/ovl_entry.h | 13 +++---
 fs/overlayfs/super.c     | 15 ++++---
 fs/overlayfs/util.c      | 89 +++++++++++++++-------------------------
 8 files changed, 79 insertions(+), 94 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index e5869f91b3ab2..87289b9a152c8 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -417,8 +417,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 		goto out_cleanup;
 
 	newdentry = dget(tmpfile ? upper : temp);
-	ovl_dentry_update(dentry, newdentry);
-	ovl_inode_update(d_inode(dentry), d_inode(newdentry));
+	ovl_inode_update(d_inode(dentry), newdentry);
 
 	/* Restore timestamps on parent (best effort) */
 	ovl_set_timestamps(upperdir, pstat);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 59e0dc9897fbb..d0d6292e069a6 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -154,12 +154,12 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
 			    struct dentry *newdentry, bool hardlink)
 {
 	ovl_dentry_version_inc(dentry->d_parent);
-	ovl_dentry_update(dentry, newdentry);
 	if (!hardlink) {
-		ovl_inode_update(inode, d_inode(newdentry));
+		ovl_inode_update(inode, newdentry);
 		ovl_copyattr(newdentry->d_inode, inode);
 	} else {
-		WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
+		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
+		dput(newdentry);
 		inc_nlink(inode);
 	}
 	d_instantiate(dentry, inode);
@@ -1003,7 +1003,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	new_opaque = ovl_dentry_is_opaque(new);
 
 	err = -ESTALE;
-	if (ovl_dentry_upper(new)) {
+	if (d_inode(new) && ovl_dentry_upper(new)) {
 		if (opaquedir) {
 			if (newdentry != opaquedir)
 				goto out_dput;
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 4c30d44905efa..4654c03dd508d 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -134,8 +134,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 
 int ovl_permission(struct inode *inode, int mask)
 {
-	bool is_upper;
-	struct inode *realinode = ovl_inode_real(inode, &is_upper);
+	struct inode *upperinode = ovl_inode_upper(inode);
+	struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
 	const struct cred *old_cred;
 	int err;
 
@@ -154,7 +154,8 @@ int ovl_permission(struct inode *inode, int mask)
 		return err;
 
 	old_cred = ovl_override_creds(inode->i_sb);
-	if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+	if (!upperinode &&
+	    !special_file(realinode->i_mode) && mask & MAY_WRITE) {
 		mask &= ~(MAY_WRITE | MAY_APPEND);
 		/* Make sure mounter can read file for copy up later */
 		mask |= MAY_READ;
@@ -286,7 +287,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 
 struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 {
-	struct inode *realinode = ovl_inode_real(inode, NULL);
+	struct inode *realinode = ovl_inode_real(inode);
 	const struct cred *old_cred;
 	struct posix_acl *acl;
 
@@ -462,17 +463,24 @@ static int ovl_inode_set(struct inode *inode, void *data)
 	return 0;
 }
 
-struct inode *ovl_get_inode(struct dentry *dentry)
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
 {
-	struct dentry *upperdentry = ovl_dentry_upper(dentry);
-	struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+	struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
 	struct inode *inode;
 
+	if (!realinode)
+		realinode = d_inode(lowerdentry);
+
 	if (upperdentry && !d_is_dir(upperdentry)) {
 		inode = iget5_locked(dentry->d_sb, (unsigned long) realinode,
 				     ovl_inode_test, ovl_inode_set, realinode);
-		if (!inode || !(inode->i_state & I_NEW))
+		if (!inode)
 			goto out;
+		if (!(inode->i_state & I_NEW)) {
+			dput(upperdentry);
+			goto out;
+		}
 
 		set_nlink(inode, realinode->i_nlink);
 	} else {
@@ -481,7 +489,7 @@ struct inode *ovl_get_inode(struct dentry *dentry)
 			goto out;
 	}
 	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
-	ovl_inode_init(inode, dentry);
+	ovl_inode_init(inode, upperdentry, lowerdentry);
 	if (inode->i_state & I_NEW)
 		unlock_new_inode(inode);
 out:
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 0072ca5d5dace..1f873e2e8a793 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -359,7 +359,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(-ENAMETOOLONG);
 
 	old_cred = ovl_override_creds(dentry->d_sb);
-	upperdir = ovl_upperdentry_dereference(poe);
+	upperdir = ovl_dentry_upper(dentry->d_parent);
 	if (upperdir) {
 		err = ovl_lookup_layer(upperdir, &d, &upperdentry);
 		if (err)
@@ -436,13 +436,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	oe->opaque = upperopaque;
 	oe->impure = upperimpure;
 	oe->redirect = upperredirect;
-	oe->__upperdentry = upperdentry;
 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
 	dentry->d_fsdata = oe;
 
 	if (upperdentry || ctr) {
 		err = -ENOMEM;
-		inode = ovl_get_inode(dentry);
+		inode = ovl_get_inode(dentry, upperdentry);
 		if (!inode)
 			goto out_free_oe;
 	}
@@ -487,7 +486,7 @@ bool ovl_lower_positive(struct dentry *dentry)
 		return oe->opaque;
 
 	/* Negative upper -> positive lower */
-	if (!oe->__upperdentry)
+	if (!ovl_dentry_upper(dentry))
 		return true;
 
 	/* Positive upper -> have to look up lower to see whether it exists */
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6e6600ae1d542..83607f883ace8 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -189,7 +189,9 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
 struct dentry *ovl_dentry_upper(struct dentry *dentry);
 struct dentry *ovl_dentry_lower(struct dentry *dentry);
 struct dentry *ovl_dentry_real(struct dentry *dentry);
-struct inode *ovl_inode_real(struct inode *inode, bool *is_upper);
+struct inode *ovl_inode_upper(struct inode *inode);
+struct inode *ovl_inode_lower(struct inode *inode);
+struct inode *ovl_inode_real(struct inode *inode);
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
@@ -199,9 +201,9 @@ void ovl_dentry_set_opaque(struct dentry *dentry);
 bool ovl_redirect_dir(struct super_block *sb);
 const char *ovl_dentry_get_redirect(struct dentry *dentry);
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
-void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
-void ovl_inode_init(struct inode *inode, struct dentry *dentry);
-void ovl_inode_update(struct inode *inode, struct inode *upperinode);
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+		    struct dentry *lowerdentry);
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
 void ovl_dentry_version_inc(struct dentry *dentry);
 u64 ovl_dentry_version_get(struct dentry *dentry);
 bool ovl_is_whiteout(struct dentry *dentry);
@@ -250,7 +252,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
 bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_get_inode(struct dentry *dentry);
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry);
 static inline void ovl_copyattr(struct inode *from, struct inode *to)
 {
 	to->i_uid = from->i_uid;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index b8c213891e84a..ddd937490f0dc 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -36,7 +36,6 @@ struct ovl_fs {
 
 /* private information held for every overlayfs dentry */
 struct ovl_entry {
-	struct dentry *__upperdentry;
 	struct ovl_dir_cache *cache;
 	union {
 		struct {
@@ -54,14 +53,9 @@ struct ovl_entry {
 
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 
-static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
-{
-	return lockless_dereference(oe->__upperdentry);
-}
-
 struct ovl_inode {
 	struct inode vfs_inode;
-	struct inode *upper;
+	struct dentry *__upperdentry;
 	struct inode *lower;
 };
 
@@ -69,3 +63,8 @@ static inline struct ovl_inode *OVL_I(struct inode *inode)
 {
 	return container_of(inode, struct ovl_inode, vfs_inode);
 }
+
+static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
+{
+	return lockless_dereference(oi->__upperdentry);
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index c166c1d76890b..1b865716110a1 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -41,7 +41,6 @@ static void ovl_dentry_release(struct dentry *dentry)
 	if (oe) {
 		unsigned int i;
 
-		dput(oe->__upperdentry);
 		kfree(oe->redirect);
 		for (i = 0; i < oe->numlower; i++)
 			dput(oe->lowerstack[i].dentry);
@@ -171,7 +170,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
 {
 	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 
-	oi->upper = NULL;
+	oi->__upperdentry = NULL;
 	oi->lower = NULL;
 
 	return &oi->vfs_inode;
@@ -186,6 +185,10 @@ static void ovl_i_callback(struct rcu_head *head)
 
 static void ovl_destroy_inode(struct inode *inode)
 {
+	struct ovl_inode *oi = OVL_I(inode);
+
+	dput(oi->__upperdentry);
+
 	call_rcu(&inode->i_rcu, ovl_i_callback);
 }
 
@@ -636,7 +639,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 			size_t size, int flags)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
-	struct inode *realinode = ovl_inode_real(inode, NULL);
+	struct inode *realinode = ovl_inode_real(inode);
 	struct posix_acl *acl = NULL;
 	int err;
 
@@ -678,7 +681,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 
 	err = ovl_xattr_set(dentry, handler->name, value, size, flags);
 	if (!err)
-		ovl_copyattr(ovl_inode_real(inode, NULL), inode);
+		ovl_copyattr(ovl_inode_real(inode), inode);
 
 	return err;
 
@@ -1000,7 +1003,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	kfree(lowertmp);
 
 	if (upperpath.dentry) {
-		oe->__upperdentry = upperpath.dentry;
 		oe->impure = ovl_is_impuredir(upperpath.dentry);
 	}
 	for (i = 0; i < numlower; i++) {
@@ -1011,7 +1013,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	root_dentry->d_fsdata = oe;
 
-	ovl_inode_init(d_inode(root_dentry), root_dentry);
+	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
+		       ovl_dentry_lower(root_dentry));
 
 	sb->s_root = root_dentry;
 
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index fc7b1447435bb..e5d9e8daa55cb 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -78,7 +78,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
 	struct ovl_entry *oe = dentry->d_fsdata;
 	enum ovl_path_type type = 0;
 
-	if (oe->__upperdentry) {
+	if (ovl_dentry_upper(dentry)) {
 		type = __OVL_PATH_UPPER;
 
 		/*
@@ -99,10 +99,9 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
 void ovl_path_upper(struct dentry *dentry, struct path *path)
 {
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	struct ovl_entry *oe = dentry->d_fsdata;
 
 	path->mnt = ofs->upper_mnt;
-	path->dentry = ovl_upperdentry_dereference(oe);
+	path->dentry = ovl_dentry_upper(dentry);
 }
 
 void ovl_path_lower(struct dentry *dentry, struct path *path)
@@ -126,51 +125,39 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
 
 struct dentry *ovl_dentry_upper(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return ovl_upperdentry_dereference(oe);
-}
-
-static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe)
-{
-	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
+	return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
 }
 
 struct dentry *ovl_dentry_lower(struct dentry *dentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
 
-	return __ovl_dentry_lower(oe);
+	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
 }
 
 struct dentry *ovl_dentry_real(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-	struct dentry *realdentry;
-
-	realdentry = ovl_upperdentry_dereference(oe);
-	if (!realdentry)
-		realdentry = __ovl_dentry_lower(oe);
-
-	return realdentry;
+	return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
 }
 
-struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
+struct inode *ovl_inode_upper(struct inode *inode)
 {
-	struct inode *realinode = lockless_dereference(OVL_I(inode)->upper);
-	bool isup = false;
+	struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode));
 
-	if (!realinode)
-		realinode = OVL_I(inode)->lower;
-	else
-		isup = true;
+	return upperdentry ? d_inode(upperdentry) : NULL;
+}
 
-	if (is_upper)
-		*is_upper = isup;
+struct inode *ovl_inode_lower(struct inode *inode)
+{
+	return OVL_I(inode)->lower;
+}
 
-	return realinode;
+struct inode *ovl_inode_real(struct inode *inode)
+{
+	return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
 }
 
+
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
@@ -232,42 +219,30 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
 	oe->redirect = redirect;
 }
 
-void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+		    struct dentry *lowerdentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
+	if (upperdentry)
+		OVL_I(inode)->__upperdentry = upperdentry;
+	if (lowerdentry)
+		OVL_I(inode)->lower = d_inode(lowerdentry);
 
-	WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
-	WARN_ON(oe->__upperdentry);
-	/*
-	 * Make sure upperdentry is consistent before making it visible to
-	 * ovl_upperdentry_dereference().
-	 */
-	smp_wmb();
-	oe->__upperdentry = upperdentry;
+	ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
 }
 
-void ovl_inode_init(struct inode *inode, struct dentry *dentry)
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 {
-	struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+	struct inode *upperinode = d_inode(upperdentry);
 
-	if (ovl_dentry_upper(dentry))
-		OVL_I(inode)->upper = realinode;
-	else
-		OVL_I(inode)->lower = realinode;
-
-	ovl_copyattr(realinode, inode);
-}
-
-void ovl_inode_update(struct inode *inode, struct inode *upperinode)
-{
-	WARN_ON(!upperinode);
 	WARN_ON(!inode_unhashed(inode));
+	WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
+	WARN_ON(OVL_I(inode)->__upperdentry);
+
 	/*
-	 * Make sure upperinode is consistent before making it visible to
-	 * ovl_inode_real();
+	 * Make sure upperdentry is consistent before making it visible
 	 */
 	smp_wmb();
-	OVL_I(inode)->upper = upperinode;
+	OVL_I(inode)->__upperdentry = upperdentry;
 	if (!S_ISDIR(upperinode->i_mode)) {
 		inode->i_private = upperinode;
 		__insert_inode_hash(inode, (unsigned long) upperinode);
@@ -311,7 +286,7 @@ int ovl_copy_up_start(struct dentry *dentry)
 	spin_lock(&ofs->copyup_wq.lock);
 	err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
 	if (!err) {
-		if (oe->__upperdentry)
+		if (ovl_dentry_upper(dentry))
 			err = 1; /* Already copied up */
 		else
 			oe->copying = true;
-- 
GitLab


From cf31c46347e8e54cb53d66255ae3eea045b0a60c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:16 +0200
Subject: [PATCH 0670/1958] ovl: move redirect to ovl_inode

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c     |  3 ++-
 fs/overlayfs/ovl_entry.h |  2 +-
 fs/overlayfs/super.c     |  3 ++-
 fs/overlayfs/util.c      | 10 ++++------
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 1f873e2e8a793..1b6092b80330c 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -435,7 +435,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 	oe->opaque = upperopaque;
 	oe->impure = upperimpure;
-	oe->redirect = upperredirect;
 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
 	dentry->d_fsdata = oe;
 
@@ -444,6 +443,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		inode = ovl_get_inode(dentry, upperdentry);
 		if (!inode)
 			goto out_free_oe;
+
+		OVL_I(inode)->redirect = upperredirect;
 	}
 
 	revert_creds(old_cred);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index ddd937490f0dc..477d21738656f 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -40,7 +40,6 @@ struct ovl_entry {
 	union {
 		struct {
 			u64 version;
-			const char *redirect;
 			bool opaque;
 			bool impure;
 			bool copying;
@@ -54,6 +53,7 @@ struct ovl_entry {
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 
 struct ovl_inode {
+	const char *redirect;
 	struct inode vfs_inode;
 	struct dentry *__upperdentry;
 	struct inode *lower;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 1b865716110a1..7346518846c5a 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -41,7 +41,6 @@ static void ovl_dentry_release(struct dentry *dentry)
 	if (oe) {
 		unsigned int i;
 
-		kfree(oe->redirect);
 		for (i = 0; i < oe->numlower; i++)
 			dput(oe->lowerstack[i].dentry);
 		kfree_rcu(oe, rcu);
@@ -170,6 +169,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
 {
 	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 
+	oi->redirect = NULL;
 	oi->__upperdentry = NULL;
 	oi->lower = NULL;
 
@@ -188,6 +188,7 @@ static void ovl_destroy_inode(struct inode *inode)
 	struct ovl_inode *oi = OVL_I(inode);
 
 	dput(oi->__upperdentry);
+	kfree(oi->redirect);
 
 	call_rcu(&inode->i_rcu, ovl_i_callback);
 }
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index e5d9e8daa55cb..be0670f1addfc 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -206,17 +206,15 @@ bool ovl_redirect_dir(struct super_block *sb)
 
 const char *ovl_dentry_get_redirect(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return oe->redirect;
+	return OVL_I(d_inode(dentry))->redirect;
 }
 
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
 
-	kfree(oe->redirect);
-	oe->redirect = redirect;
+	kfree(oi->redirect);
+	oi->redirect = redirect;
 }
 
 void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
-- 
GitLab


From 13c72075ac9f5a5cf3f61c85adaafffe48a6f797 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:16 +0200
Subject: [PATCH 0671/1958] ovl: move impure to ovl_inode

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c     |  4 ++++
 fs/overlayfs/namei.c     |  4 ----
 fs/overlayfs/overlayfs.h |  7 ++++++-
 fs/overlayfs/ovl_entry.h |  2 +-
 fs/overlayfs/super.c     |  4 +++-
 fs/overlayfs/util.c      | 22 ++++++++++++----------
 6 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 4654c03dd508d..23d64d51f331b 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -490,6 +490,10 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
 	}
 	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
 	ovl_inode_init(inode, upperdentry, lowerdentry);
+
+	if (upperdentry && ovl_is_impuredir(upperdentry))
+		ovl_set_flag(OVL_IMPURE, inode);
+
 	if (inode->i_state & I_NEW)
 		unlock_new_inode(inode);
 out:
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 1b6092b80330c..277a55c8299a2 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -341,7 +341,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	unsigned int ctr = 0;
 	struct inode *inode = NULL;
 	bool upperopaque = false;
-	bool upperimpure = false;
 	char *upperredirect = NULL;
 	struct dentry *this;
 	unsigned int i;
@@ -386,8 +385,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 				poe = roe;
 		}
 		upperopaque = d.opaque;
-		if (upperdentry && d.is_dir)
-			upperimpure = ovl_is_impuredir(upperdentry);
 	}
 
 	if (!d.stop && poe->numlower) {
@@ -434,7 +431,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		goto out_put;
 
 	oe->opaque = upperopaque;
-	oe->impure = upperimpure;
 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
 	dentry->d_fsdata = oe;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 83607f883ace8..b1be3d39ac9da 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -26,6 +26,10 @@ enum ovl_path_type {
 #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
 #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
 
+enum ovl_flag {
+	OVL_IMPURE,
+};
+
 /*
  * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
  * where:
@@ -195,7 +199,6 @@ struct inode *ovl_inode_real(struct inode *inode);
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
-bool ovl_dentry_is_impure(struct dentry *dentry);
 bool ovl_dentry_is_whiteout(struct dentry *dentry);
 void ovl_dentry_set_opaque(struct dentry *dentry);
 bool ovl_redirect_dir(struct super_block *sb);
@@ -215,6 +218,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 		       const char *name, const void *value, size_t size,
 		       int xerr);
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+void ovl_set_flag(unsigned long flag, struct inode *inode);
+bool ovl_test_flag(unsigned long flag, struct inode *inode);
 
 static inline bool ovl_is_impuredir(struct dentry *dentry)
 {
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 477d21738656f..50dfa4826152e 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -41,7 +41,6 @@ struct ovl_entry {
 		struct {
 			u64 version;
 			bool opaque;
-			bool impure;
 			bool copying;
 		};
 		struct rcu_head rcu;
@@ -54,6 +53,7 @@ struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 
 struct ovl_inode {
 	const char *redirect;
+	unsigned long flags;
 	struct inode vfs_inode;
 	struct dentry *__upperdentry;
 	struct inode *lower;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7346518846c5a..7c7b946b063f9 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -170,6 +170,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
 	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 
 	oi->redirect = NULL;
+	oi->flags = 0;
 	oi->__upperdentry = NULL;
 	oi->lower = NULL;
 
@@ -1004,7 +1005,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	kfree(lowertmp);
 
 	if (upperpath.dentry) {
-		oe->impure = ovl_is_impuredir(upperpath.dentry);
+		if (ovl_is_impuredir(upperpath.dentry))
+			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
 	}
 	for (i = 0; i < numlower; i++) {
 		oe->lowerstack[i].dentry = stack[i].dentry;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index be0670f1addfc..2fc4c22707aad 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -178,13 +178,6 @@ bool ovl_dentry_is_opaque(struct dentry *dentry)
 	return oe->opaque;
 }
 
-bool ovl_dentry_is_impure(struct dentry *dentry)
-{
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return oe->impure;
-}
-
 bool ovl_dentry_is_whiteout(struct dentry *dentry)
 {
 	return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
@@ -344,9 +337,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
 {
 	int err;
-	struct ovl_entry *oe = dentry->d_fsdata;
 
-	if (oe->impure)
+	if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
 		return 0;
 
 	/*
@@ -356,7 +348,17 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
 	err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
 				 "y", 1, 0);
 	if (!err)
-		oe->impure = true;
+		ovl_set_flag(OVL_IMPURE, d_inode(dentry));
 
 	return err;
 }
+
+void ovl_set_flag(unsigned long flag, struct inode *inode)
+{
+	set_bit(flag, &OVL_I(inode)->flags);
+}
+
+bool ovl_test_flag(unsigned long flag, struct inode *inode)
+{
+	return test_bit(flag, &OVL_I(inode)->flags);
+}
-- 
GitLab


From a015dafcaf5b0316654a39bc598a76804595af90 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:51 +0300
Subject: [PATCH 0672/1958] ovl: use ovl_inode mutex to synchronize concurrent
 copy up

Use the new ovl_inode mutex to synchonize concurrent copy up
instead of the super block copy up workqueue.

Moving the synchronization object from the overlay dentry to
the overlay inode is needed for synchonizing concurrent copy up
of lower hardlinks to the same upper inode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/ovl_entry.h |  5 +++--
 fs/overlayfs/super.c     |  3 ++-
 fs/overlayfs/util.c      | 23 ++++++-----------------
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 50dfa4826152e..d8f514a474ca0 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -29,7 +29,6 @@ struct ovl_fs {
 	const struct cred *creator_cred;
 	bool tmpfile;
 	bool noxattr;
-	wait_queue_head_t copyup_wq;
 	/* sb common to all layers */
 	struct super_block *same_sb;
 };
@@ -41,7 +40,6 @@ struct ovl_entry {
 		struct {
 			u64 version;
 			bool opaque;
-			bool copying;
 		};
 		struct rcu_head rcu;
 	};
@@ -57,6 +55,9 @@ struct ovl_inode {
 	struct inode vfs_inode;
 	struct dentry *__upperdentry;
 	struct inode *lower;
+
+	/* synchronize copy up and more */
+	struct mutex lock;
 };
 
 static inline struct ovl_inode *OVL_I(struct inode *inode)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7c7b946b063f9..b0d539af1fadc 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -173,6 +173,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
 	oi->flags = 0;
 	oi->__upperdentry = NULL;
 	oi->lower = NULL;
+	mutex_init(&oi->lock);
 
 	return &oi->vfs_inode;
 }
@@ -190,6 +191,7 @@ static void ovl_destroy_inode(struct inode *inode)
 
 	dput(oi->__upperdentry);
 	kfree(oi->redirect);
+	mutex_destroy(&oi->lock);
 
 	call_rcu(&inode->i_rcu, ovl_i_callback);
 }
@@ -782,7 +784,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!ufs)
 		goto out;
 
-	init_waitqueue_head(&ufs->copyup_wq);
 	ufs->config.redirect_dir = ovl_redirect_dir_def;
 	err = ovl_parse_opt((char *) data, &ufs->config);
 	if (err)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 2fc4c22707aad..a0baaa7e224c2 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -270,32 +270,21 @@ struct file *ovl_path_open(struct path *path, int flags)
 
 int ovl_copy_up_start(struct dentry *dentry)
 {
-	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	struct ovl_entry *oe = dentry->d_fsdata;
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
 	int err;
 
-	spin_lock(&ofs->copyup_wq.lock);
-	err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
-	if (!err) {
-		if (ovl_dentry_upper(dentry))
-			err = 1; /* Already copied up */
-		else
-			oe->copying = true;
+	err = mutex_lock_interruptible(&oi->lock);
+	if (!err && ovl_dentry_upper(dentry)) {
+		err = 1; /* Already copied up */
+		mutex_unlock(&oi->lock);
 	}
-	spin_unlock(&ofs->copyup_wq.lock);
 
 	return err;
 }
 
 void ovl_copy_up_end(struct dentry *dentry)
 {
-	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	spin_lock(&ofs->copyup_wq.lock);
-	oe->copying = false;
-	wake_up_locked(&ofs->copyup_wq);
-	spin_unlock(&ofs->copyup_wq.lock);
+	mutex_unlock(&OVL_I(d_inode(dentry))->lock);
 }
 
 bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
-- 
GitLab


From 04a01ac7ed3c5cd718713ef6341249a143c96b10 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:16 +0200
Subject: [PATCH 0673/1958] ovl: move cache and version to ovl_inode

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/ovl_entry.h |  8 +++-----
 fs/overlayfs/super.c     |  2 ++
 fs/overlayfs/util.c      | 20 ++++++++------------
 3 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index d8f514a474ca0..6a90a48c35897 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -35,12 +35,8 @@ struct ovl_fs {
 
 /* private information held for every overlayfs dentry */
 struct ovl_entry {
-	struct ovl_dir_cache *cache;
 	union {
-		struct {
-			u64 version;
-			bool opaque;
-		};
+		bool opaque;
 		struct rcu_head rcu;
 	};
 	unsigned numlower;
@@ -50,7 +46,9 @@ struct ovl_entry {
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 
 struct ovl_inode {
+	struct ovl_dir_cache *cache;
 	const char *redirect;
+	u64 version;
 	unsigned long flags;
 	struct inode vfs_inode;
 	struct dentry *__upperdentry;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b0d539af1fadc..e0a51ea773eca 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -169,7 +169,9 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
 {
 	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 
+	oi->cache = NULL;
 	oi->redirect = NULL;
+	oi->version = 0;
 	oi->flags = 0;
 	oi->__upperdentry = NULL;
 	oi->lower = NULL;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index a0baaa7e224c2..f093fcf2b4bd7 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -160,16 +160,12 @@ struct inode *ovl_inode_real(struct inode *inode)
 
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return oe->cache;
+	return OVL_I(d_inode(dentry))->cache;
 }
 
 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	oe->cache = cache;
+	OVL_I(d_inode(dentry))->cache = cache;
 }
 
 bool ovl_dentry_is_opaque(struct dentry *dentry)
@@ -242,18 +238,18 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 
 void ovl_dentry_version_inc(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
+	struct inode *inode = d_inode(dentry);
 
-	WARN_ON(!inode_is_locked(dentry->d_inode));
-	oe->version++;
+	WARN_ON(!inode_is_locked(inode));
+	OVL_I(inode)->version++;
 }
 
 u64 ovl_dentry_version_get(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
+	struct inode *inode = d_inode(dentry);
 
-	WARN_ON(!inode_is_locked(dentry->d_inode));
-	return oe->version;
+	WARN_ON(!inode_is_locked(inode));
+	return OVL_I(inode)->version;
 }
 
 bool ovl_is_whiteout(struct dentry *dentry)
-- 
GitLab


From ad0af7104dadccd55cd2b390271677fac142650f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:32 +0300
Subject: [PATCH 0674/1958] vfs: introduce inode 'inuse' lock

Added an i_state flag I_INUSE and helpers to set/clear/test the bit.

The 'inuse' lock is an 'advisory' inode lock, that can be used to extend
exclusive create protection beyond parent->i_mutex lock among cooperating
users.

This is going to be used by overlayfs to get exclusive ownership on upper
and work dirs among overlayfs mounts.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/util.c      | 31 +++++++++++++++++++++++++++++++
 include/linux/fs.h       |  4 ++++
 3 files changed, 37 insertions(+)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index b1be3d39ac9da..5e958427463d1 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -220,6 +220,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
 void ovl_set_flag(unsigned long flag, struct inode *inode);
 bool ovl_test_flag(unsigned long flag, struct inode *inode);
+bool ovl_inuse_trylock(struct dentry *dentry);
+void ovl_inuse_unlock(struct dentry *dentry);
 
 static inline bool ovl_is_impuredir(struct dentry *dentry)
 {
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index f093fcf2b4bd7..adccd74162d63 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -347,3 +347,34 @@ bool ovl_test_flag(unsigned long flag, struct inode *inode)
 {
 	return test_bit(flag, &OVL_I(inode)->flags);
 }
+
+/**
+ * Caller must hold a reference to inode to prevent it from being freed while
+ * it is marked inuse.
+ */
+bool ovl_inuse_trylock(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	bool locked = false;
+
+	spin_lock(&inode->i_lock);
+	if (!(inode->i_state & I_OVL_INUSE)) {
+		inode->i_state |= I_OVL_INUSE;
+		locked = true;
+	}
+	spin_unlock(&inode->i_lock);
+
+	return locked;
+}
+
+void ovl_inuse_unlock(struct dentry *dentry)
+{
+	if (dentry) {
+		struct inode *inode = d_inode(dentry);
+
+		spin_lock(&inode->i_lock);
+		WARN_ON(!(inode->i_state & I_OVL_INUSE));
+		inode->i_state &= ~I_OVL_INUSE;
+		spin_unlock(&inode->i_lock);
+	}
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3e68cabb8457e..75a5fafaf096a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1930,6 +1930,9 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
  *			wb stat updates to grab mapping->tree_lock.  See
  *			inode_switch_wb_work_fn() for details.
  *
+ * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
+ *			and work dirs among overlayfs mounts.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -1950,6 +1953,7 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
 #define __I_DIRTY_TIME_EXPIRED	12
 #define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
 #define I_WB_SWITCH		(1 << 13)
+#define I_OVL_INUSE			(1 << 14)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
-- 
GitLab


From 2cac0c00a6cdcc9121de150ed531f652396d1544 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:33 +0300
Subject: [PATCH 0675/1958] ovl: get exclusive ownership on upper/work dirs

Bad things can happen if several concurrent overlay mounts try to
use the same upperdir/workdir path.

Try to get the 'inuse' advisory lock on upperdir and workdir.
Fail mount if another overlay mount instance or another user
holds the 'inuse' lock on these directories.

Note that this provides no protection for concurrent overlay
mount that use overlapping (i.e. descendant) upper/work dirs.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/ovl_entry.h |  3 +++
 fs/overlayfs/super.c     | 29 ++++++++++++++++++++++++++---
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 6a90a48c35897..5b5a321164242 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -21,6 +21,9 @@ struct ovl_fs {
 	struct vfsmount *upper_mnt;
 	unsigned numlower;
 	struct vfsmount **lower_mnt;
+	/* workbasedir is the path at workdir= mount option */
+	struct dentry *workbasedir;
+	/* workdir is the 'work' directory under workbasedir */
 	struct dentry *workdir;
 	long namelen;
 	/* pathnames of lower and upper dirs, for show_options */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index e0a51ea773eca..b316377270219 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -204,6 +204,10 @@ static void ovl_put_super(struct super_block *sb)
 	unsigned i;
 
 	dput(ufs->workdir);
+	ovl_inuse_unlock(ufs->workbasedir);
+	dput(ufs->workbasedir);
+	if (ufs->upper_mnt)
+		ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
 	mntput(ufs->upper_mnt);
 	for (i = 0; i < ufs->numlower; i++)
 		mntput(ufs->lower_mnt[i]);
@@ -821,9 +825,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		if (err)
 			goto out_put_upperpath;
 
+		err = -EBUSY;
+		if (!ovl_inuse_trylock(upperpath.dentry)) {
+			pr_err("overlayfs: upperdir is in-use by another mount\n");
+			goto out_put_upperpath;
+		}
+
 		err = ovl_mount_dir(ufs->config.workdir, &workpath);
 		if (err)
-			goto out_put_upperpath;
+			goto out_unlock_upperdentry;
 
 		err = -EINVAL;
 		if (upperpath.mnt != workpath.mnt) {
@@ -834,12 +844,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
 			goto out_put_workpath;
 		}
+
+		err = -EBUSY;
+		if (!ovl_inuse_trylock(workpath.dentry)) {
+			pr_err("overlayfs: workdir is in-use by another mount\n");
+			goto out_put_workpath;
+		}
+
+		ufs->workbasedir = workpath.dentry;
 		sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
 	}
 	err = -ENOMEM;
 	lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
 	if (!lowertmp)
-		goto out_put_workpath;
+		goto out_unlock_workdentry;
 
 	err = -EINVAL;
 	stacklen = ovl_split_lowerdirs(lowertmp);
@@ -882,6 +900,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			pr_err("overlayfs: failed to clone upperpath\n");
 			goto out_put_lowerpath;
 		}
+
 		/* Don't inherit atime flags */
 		ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
 
@@ -1004,7 +1023,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	mntput(upperpath.mnt);
 	for (i = 0; i < numlower; i++)
 		mntput(stack[i].mnt);
-	path_put(&workpath);
+	mntput(workpath.mnt);
 	kfree(lowertmp);
 
 	if (upperpath.dentry) {
@@ -1043,8 +1062,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	kfree(stack);
 out_free_lowertmp:
 	kfree(lowertmp);
+out_unlock_workdentry:
+	ovl_inuse_unlock(workpath.dentry);
 out_put_workpath:
 	path_put(&workpath);
+out_unlock_upperdentry:
+	ovl_inuse_unlock(upperpath.dentry);
 out_put_upperpath:
 	path_put(&upperpath);
 out_free_config:
-- 
GitLab


From f7d3daca7c79d9b77e61f50f718b257b71d07498 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:34 +0300
Subject: [PATCH 0676/1958] ovl: relax same fs constrain for ovl_check_origin()

For the case of all layers not on the same fs, try to decode the copy up
origin file handle on any of the lower layers.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 277a55c8299a2..6485beddaa1fb 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -272,28 +272,24 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
 			    struct path **stackp, unsigned int *ctrp)
 {
-	struct super_block *same_sb = ovl_same_sb(dentry->d_sb);
 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 	struct vfsmount *mnt;
-	struct dentry *origin;
+	struct dentry *origin = NULL;
+	int i;
 
-	if (!same_sb || !roe->numlower)
-		return 0;
 
-       /*
-	* Since all layers are on the same fs, we use the first layer for
-	* decoding the file handle.  We may get a disconnected dentry,
-	* which is fine, because we only need to hold the origin inode in
-	* cache and use its inode number.  We may even get a connected dentry,
-	* that is not under the first layer's root.  That is also fine for
-	* using it's inode number - it's the same as if we held a reference
-	* to a dentry in first layer that was moved under us.
-	*/
-	mnt = roe->lowerstack[0].mnt;
-
-	origin = ovl_get_origin(upperdentry, mnt);
-	if (IS_ERR_OR_NULL(origin))
-		return PTR_ERR(origin);
+	for (i = 0; i < roe->numlower; i++) {
+		mnt = roe->lowerstack[i].mnt;
+		origin = ovl_get_origin(upperdentry, mnt);
+		if (IS_ERR(origin))
+			return PTR_ERR(origin);
+
+		if (origin)
+			break;
+	}
+
+	if (!origin)
+		return 0;
 
 	BUG_ON(*stackp || *ctrp);
 	*stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
@@ -371,6 +367,16 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		}
 		if (upperdentry && !d.is_dir) {
 			BUG_ON(!d.stop || d.redirect);
+			/*
+			 * Lookup copy up origin by decoding origin file handle.
+			 * We may get a disconnected dentry, which is fine,
+			 * because we only need to hold the origin inode in
+			 * cache and use its inode number.  We may even get a
+			 * connected dentry, that is not under any of the lower
+			 * layers root.  That is also fine for using it's inode
+			 * number - it's the same as if we held a reference
+			 * to a dentry in lower layer that was moved under us.
+			 */
 			err = ovl_check_origin(dentry, upperdentry,
 					       &stack, &ctr);
 			if (err)
-- 
GitLab


From 6b8aa129dcbe0e9825109b35c4b967f984e8fb13 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:35 +0300
Subject: [PATCH 0677/1958] ovl: generalize ovl_create_workdir()

Pass in the subdir name to create and specify if subdir is persistent
or if it should be cleaned up on every mount.

Move fallback to readonly mount on failure to create dir and print of error
message into the helper.

This function is going to be used for creating the persistent 'index' dir
under workbasedir.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b316377270219..fea7bd496f2e5 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -418,22 +418,27 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 
 #define OVL_WORKDIR_NAME "work"
 
-static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
-					 struct dentry *dentry)
+static struct dentry *ovl_workdir_create(struct super_block *sb,
+					 struct ovl_fs *ufs,
+					 struct dentry *dentry,
+					 const char *name, bool persist)
 {
 	struct inode *dir = dentry->d_inode;
+	struct vfsmount *mnt = ufs->upper_mnt;
 	struct dentry *work;
 	int err;
 	bool retried = false;
+	bool locked = false;
 
 	err = mnt_want_write(mnt);
 	if (err)
-		return ERR_PTR(err);
+		goto out_err;
 
 	inode_lock_nested(dir, I_MUTEX_PARENT);
+	locked = true;
+
 retry:
-	work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
-			      strlen(OVL_WORKDIR_NAME));
+	work = lookup_one_len(name, dentry, strlen(name));
 
 	if (!IS_ERR(work)) {
 		struct iattr attr = {
@@ -446,6 +451,9 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
 			if (retried)
 				goto out_dput;
 
+			if (persist)
+				goto out_unlock;
+
 			retried = true;
 			ovl_workdir_cleanup(dir, mnt, work, 0);
 			dput(work);
@@ -485,16 +493,24 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
 		inode_unlock(work->d_inode);
 		if (err)
 			goto out_dput;
+	} else {
+		err = PTR_ERR(work);
+		goto out_err;
 	}
 out_unlock:
-	inode_unlock(dir);
 	mnt_drop_write(mnt);
+	if (locked)
+		inode_unlock(dir);
 
 	return work;
 
 out_dput:
 	dput(work);
-	work = ERR_PTR(err);
+out_err:
+	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+		ufs->config.workdir, name, -err);
+	sb->s_flags |= MS_RDONLY;
+	work = NULL;
 	goto out_unlock;
 }
 
@@ -906,15 +922,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 		sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran;
 
-		ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
-		err = PTR_ERR(ufs->workdir);
-		if (IS_ERR(ufs->workdir)) {
-			pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
-				ufs->config.workdir, OVL_WORKDIR_NAME, -err);
-			sb->s_flags |= MS_RDONLY;
-			ufs->workdir = NULL;
-		}
-
+		ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry,
+						  OVL_WORKDIR_NAME, false);
 		/*
 		 * Upper should support d_type, else whiteouts are visible.
 		 * Given workdir and upper are on same fs, we can do
-- 
GitLab


From 02bcd1577400b0b2eab806ccb9f72d6b5ec7bcca Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:36 +0300
Subject: [PATCH 0678/1958] ovl: introduce the inodes index dir feature

Create the index dir on mount. The index dir will contain hardlinks to
upper inodes, named after the hex representation of their origin lower
inodes.

The index dir is going to be used to prevent breaking lower hardlinks
on copy up and to implement overlayfs NFS export.

Because the feature is not fully backward compat, enabling the feature
is opt-in by config/module/mount option.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/Kconfig     | 20 ++++++++++++
 fs/overlayfs/copy_up.c   |  9 +++---
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/ovl_entry.h |  3 ++
 fs/overlayfs/super.c     | 66 ++++++++++++++++++++++++++++++++++++++--
 fs/overlayfs/util.c      | 15 +++++++++
 6 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index c0c9683934b7a..cbfc196e5dc53 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -23,3 +23,23 @@ config OVERLAY_FS_REDIRECT_DIR
 	  Note, that redirects are not backward compatible.  That is, mounting
 	  an overlay which has redirects on a kernel that doesn't support this
 	  feature will have unexpected results.
+
+config OVERLAY_FS_INDEX
+	bool "Overlayfs: turn on inodes index feature by default"
+	depends on OVERLAY_FS
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  the inodes index dir to map lower inodes to upper inodes by default.
+	  In this case it is still possible to turn off index globally with the
+	  "index=off" module option or on a filesystem instance basis with the
+	  "index=off" mount option.
+
+	  The inodes index feature prevents breaking of lower hardlinks on copy
+	  up.
+
+	  Note, that the inodes index feature is read-only backward compatible.
+	  That is, mounting an overlay which has an index dir on a kernel that
+	  doesn't support this feature read-only, will not have any negative
+	  outcomes.  However, mounting the same overlay with an old kernel
+	  read-write and then mounting it again with a new kernel, will have
+	  unexpected results.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 87289b9a152c8..f9f51cce3c181 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -233,12 +233,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
+static struct ovl_fh *ovl_encode_fh(struct dentry *lower)
 {
 	struct ovl_fh *fh;
 	int fh_type, fh_len, dwords;
 	void *buf;
 	int buflen = MAX_HANDLE_SZ;
+	uuid_t *uuid = &lower->d_sb->s_uuid;
 
 	buf = kmalloc(buflen, GFP_TEMPORARY);
 	if (!buf)
@@ -283,7 +284,6 @@ static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
 static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 			  struct dentry *upper)
 {
-	struct super_block *sb = lower->d_sb;
 	const struct ovl_fh *fh = NULL;
 	int err;
 
@@ -292,9 +292,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	 * so we can use the overlay.origin xattr to distignuish between a copy
 	 * up and a pure upper inode.
 	 */
-	if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
-	    !uuid_is_null(&sb->s_uuid)) {
-		fh = ovl_encode_fh(lower, &sb->s_uuid);
+	if (ovl_can_decode_fh(lower->d_sb)) {
+		fh = ovl_encode_fh(lower);
 		if (IS_ERR(fh))
 			return PTR_ERR(fh);
 	}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 5e958427463d1..4e7a74e99d3cb 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -183,6 +183,8 @@ void ovl_drop_write(struct dentry *dentry);
 struct dentry *ovl_workdir(struct dentry *dentry);
 const struct cred *ovl_override_creds(struct super_block *sb);
 struct super_block *ovl_same_sb(struct super_block *sb);
+bool ovl_can_decode_fh(struct super_block *sb);
+struct dentry *ovl_indexdir(struct super_block *sb);
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 bool ovl_dentry_remote(struct dentry *dentry);
 bool ovl_dentry_weird(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 5b5a321164242..9642ec64467b5 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,7 @@ struct ovl_config {
 	char *workdir;
 	bool default_permissions;
 	bool redirect_dir;
+	bool index;
 };
 
 /* private information held for overlayfs's superblock */
@@ -25,6 +26,8 @@ struct ovl_fs {
 	struct dentry *workbasedir;
 	/* workdir is the 'work' directory under workbasedir */
 	struct dentry *workdir;
+	/* index directory listing overlay inodes by origin file handle */
+	struct dentry *indexdir;
 	long namelen;
 	/* pathnames of lower and upper dirs, for show_options */
 	struct ovl_config config;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fea7bd496f2e5..fa83b3245124c 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -34,6 +34,11 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
 MODULE_PARM_DESC(ovl_redirect_dir_def,
 		 "Default to on or off for the redirect_dir feature");
 
+static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
+module_param_named(index, ovl_index_def, bool, 0644);
+MODULE_PARM_DESC(ovl_index_def,
+		 "Default to on or off for the inodes index feature");
+
 static void ovl_dentry_release(struct dentry *dentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
@@ -203,6 +208,7 @@ static void ovl_put_super(struct super_block *sb)
 	struct ovl_fs *ufs = sb->s_fs_info;
 	unsigned i;
 
+	dput(ufs->indexdir);
 	dput(ufs->workdir);
 	ovl_inuse_unlock(ufs->workbasedir);
 	dput(ufs->workbasedir);
@@ -265,6 +271,12 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return err;
 }
 
+/* Will this overlay be forced to mount/remount ro? */
+static bool ovl_force_readonly(struct ovl_fs *ufs)
+{
+	return (!ufs->upper_mnt || !ufs->workdir);
+}
+
 /**
  * ovl_show_options
  *
@@ -286,6 +298,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	if (ufs->config.redirect_dir != ovl_redirect_dir_def)
 		seq_printf(m, ",redirect_dir=%s",
 			   ufs->config.redirect_dir ? "on" : "off");
+	if (ufs->config.index != ovl_index_def)
+		seq_printf(m, ",index=%s",
+			   ufs->config.index ? "on" : "off");
 	return 0;
 }
 
@@ -293,7 +308,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct ovl_fs *ufs = sb->s_fs_info;
 
-	if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
+	if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs))
 		return -EROFS;
 
 	return 0;
@@ -317,6 +332,8 @@ enum {
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_REDIRECT_DIR_ON,
 	OPT_REDIRECT_DIR_OFF,
+	OPT_INDEX_ON,
+	OPT_INDEX_OFF,
 	OPT_ERR,
 };
 
@@ -327,6 +344,8 @@ static const match_table_t ovl_tokens = {
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_REDIRECT_DIR_ON,		"redirect_dir=on"},
 	{OPT_REDIRECT_DIR_OFF,		"redirect_dir=off"},
+	{OPT_INDEX_ON,			"index=on"},
+	{OPT_INDEX_OFF,			"index=off"},
 	{OPT_ERR,			NULL}
 };
 
@@ -399,6 +418,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			config->redirect_dir = false;
 			break;
 
+		case OPT_INDEX_ON:
+			config->index = true;
+			break;
+
+		case OPT_INDEX_OFF:
+			config->index = false;
+			break;
+
 		default:
 			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
 			return -EINVAL;
@@ -417,6 +444,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 }
 
 #define OVL_WORKDIR_NAME "work"
+#define OVL_INDEXDIR_NAME "index"
 
 static struct dentry *ovl_workdir_create(struct super_block *sb,
 					 struct ovl_fs *ufs,
@@ -610,6 +638,15 @@ static int ovl_lower_dir(const char *name, struct path *path,
 	if (ovl_dentry_remote(path->dentry))
 		*remote = true;
 
+	/*
+	 * The inodes index feature needs to encode and decode file
+	 * handles, so it requires that all layers support them.
+	 */
+	if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) {
+		ofs->config.index = false;
+		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
+	}
+
 	return 0;
 
 out_put:
@@ -807,6 +844,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		goto out;
 
 	ufs->config.redirect_dir = ovl_redirect_dir_def;
+	ufs->config.index = ovl_index_def;
 	err = ovl_parse_opt((char *) data, &ufs->config);
 	if (err)
 		goto out_free_config;
@@ -965,6 +1003,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			} else {
 				vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
 			}
+
+			/* Check if upper/work fs supports file handles */
+			if (ufs->config.index &&
+			    !ovl_can_decode_fh(ufs->workdir->d_sb)) {
+				ufs->config.index = false;
+				pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+			}
 		}
 	}
 
@@ -1002,6 +1047,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
 		ufs->same_sb = NULL;
 
+	if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
+		ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
+						   OVL_INDEXDIR_NAME, true);
+		err = PTR_ERR(ufs->indexdir);
+		if (IS_ERR(ufs->indexdir))
+			goto out_put_lower_mnt;
+
+		if (!ufs->indexdir)
+			pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+	}
+
+	/* Show index=off/on in /proc/mounts for any of the reasons above */
+	if (!ufs->indexdir)
+		ufs->config.index = false;
+
 	if (remote)
 		sb->s_d_op = &ovl_reval_dentry_operations;
 	else
@@ -1009,7 +1069,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	ufs->creator_cred = cred = prepare_creds();
 	if (!cred)
-		goto out_put_lower_mnt;
+		goto out_put_indexdir;
 
 	/* Never override disk quota limits or use reserved space */
 	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
@@ -1058,6 +1118,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	kfree(oe);
 out_put_cred:
 	put_cred(ufs->creator_cred);
+out_put_indexdir:
+	dput(ufs->indexdir);
 out_put_lower_mnt:
 	for (i = 0; i < ufs->numlower; i++)
 		mntput(ufs->lower_mnt[i]);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index adccd74162d63..90b50b8e75abc 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -12,6 +12,8 @@
 #include <linux/slab.h>
 #include <linux/cred.h>
 #include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/uuid.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -47,6 +49,19 @@ struct super_block *ovl_same_sb(struct super_block *sb)
 	return ofs->same_sb;
 }
 
+bool ovl_can_decode_fh(struct super_block *sb)
+{
+	return (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
+		!uuid_is_null(&sb->s_uuid));
+}
+
+struct dentry *ovl_indexdir(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->indexdir;
+}
+
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 {
 	size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
-- 
GitLab


From 8b88a2e6403638b56556ed5b1c60d9318eefea9c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:37 +0300
Subject: [PATCH 0679/1958] ovl: verify upper root dir matches lower root dir

When inodes index feature is enabled, verify that the file handle stored
in upper root dir matches the lower root dir or fail to mount.

If upper root dir has no stored file handle, encode and store the lower
root dir file handle in overlay.origin xattr.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   |   2 +-
 fs/overlayfs/namei.c     | 103 +++++++++++++++++++++++++++++++++------
 fs/overlayfs/overlayfs.h |   3 ++
 fs/overlayfs/super.c     |   8 +++
 4 files changed, 101 insertions(+), 15 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f9f51cce3c181..42807cb57da07 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -233,7 +233,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower)
+struct ovl_fh *ovl_encode_fh(struct dentry *lower)
 {
 	struct ovl_fh *fh;
 	int fh_type, fh_len, dwords;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 6485beddaa1fb..197b53d34861c 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -88,13 +88,10 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
 	return 1;
 }
 
-static struct dentry *ovl_get_origin(struct dentry *dentry,
-				     struct vfsmount *mnt)
+static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
 {
 	int res;
 	struct ovl_fh *fh = NULL;
-	struct dentry *origin = NULL;
-	int bytes;
 
 	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
 	if (res < 0) {
@@ -106,7 +103,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	if (res == 0)
 		return NULL;
 
-	fh  = kzalloc(res, GFP_TEMPORARY);
+	fh = kzalloc(res, GFP_TEMPORARY);
 	if (!fh)
 		return ERR_PTR(-ENOMEM);
 
@@ -129,7 +126,29 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
 		goto out;
 
-	bytes = (fh->len - offsetof(struct ovl_fh, fid));
+	return fh;
+
+out:
+	kfree(fh);
+	return NULL;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+	goto out;
+invalid:
+	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	goto out;
+}
+
+static struct dentry *ovl_get_origin(struct dentry *dentry,
+				     struct vfsmount *mnt)
+{
+	struct dentry *origin = NULL;
+	struct ovl_fh *fh = ovl_get_origin_fh(dentry);
+	int bytes;
+
+	if (IS_ERR_OR_NULL(fh))
+		return (struct dentry *)fh;
 
 	/*
 	 * Make sure that the stored uuid matches the uuid of the lower
@@ -138,6 +157,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
 		goto out;
 
+	bytes = (fh->len - offsetof(struct ovl_fh, fid));
 	origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
 				    bytes >> 2, (int)fh->type,
 				    ovl_acceptable, NULL);
@@ -149,21 +169,17 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	}
 
 	if (ovl_dentry_weird(origin) ||
-	    ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) {
-		dput(origin);
-		origin = NULL;
+	    ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
 		goto invalid;
-	}
 
 out:
 	kfree(fh);
 	return origin;
 
-fail:
-	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
-	goto out;
 invalid:
-	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
+	dput(origin);
+	origin = NULL;
 	goto out;
 }
 
@@ -303,6 +319,65 @@ static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
 	return 0;
 }
 
+/*
+ * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
+{
+	struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
+	int err = 0;
+
+	if (!ofh)
+		return -ENODATA;
+
+	if (IS_ERR(ofh))
+		return PTR_ERR(ofh);
+
+	if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+		err = -ESTALE;
+
+	kfree(ofh);
+	return err;
+}
+
+/*
+ * Verify that an inode matches the origin file handle stored in upper inode.
+ *
+ * If @set is true and there is no stored file handle, encode and store origin
+ * file handle in OVL_XATTR_ORIGIN.
+ *
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
+		      struct dentry *origin, bool set)
+{
+	struct inode *inode;
+	struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_fh(origin);
+	err = PTR_ERR(fh);
+	if (IS_ERR(fh))
+		goto fail;
+
+	err = ovl_verify_origin_fh(dentry, fh);
+	if (set && err == -ENODATA)
+		err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
+	if (err)
+		goto fail;
+
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	inode = d_inode(origin);
+	pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
+			    origin, inode ? inode->i_ino : 0, err);
+	goto out;
+}
+
 /*
  * Returns next layer in stack starting from top.
  * Returns -1 if this is the last layer.
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4e7a74e99d3cb..38ac84cba6ea7 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -232,6 +232,8 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 
 
 /* namei.c */
+int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
+		      struct dentry *origin, bool set);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
 bool ovl_lower_positive(struct dentry *dentry);
@@ -290,3 +292,4 @@ int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_flags(struct dentry *dentry, int flags);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
+struct ovl_fh *ovl_encode_fh(struct dentry *lower);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fa83b3245124c..bfdcff0f31689 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1048,6 +1048,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		ufs->same_sb = NULL;
 
 	if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
+		/* Verify lower root is upper root origin */
+		err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0],
+					stack[0].dentry, true);
+		if (err) {
+			pr_err("overlayfs: failed to verify upper root origin\n");
+			goto out_put_lower_mnt;
+		}
+
 		ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
 						   OVL_INDEXDIR_NAME, true);
 		err = PTR_ERR(ufs->indexdir);
-- 
GitLab


From 54fb347e836faadaed2a5617fb4dd4a4597d0490 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:38 +0300
Subject: [PATCH 0680/1958] ovl: verify index dir matches upper dir

An index dir contains persistent hardlinks to files in upper dir.
Therefore, we must never mount an existing index dir with a differnt
upper dir.

Store the upper root dir file handle in index dir inode when index
dir is created and verify the file handle before using an existing
index dir on mount.

Add an 'is_upper' flag to the overlay file handle encoding and set it
when encoding the upper root file handle. This is not critical for index
dir verification, but it is good practice towards a standard overlayfs
file handle format for NFS export.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   | 12 ++++++++++--
 fs/overlayfs/namei.c     |  4 ++--
 fs/overlayfs/overlayfs.h |  6 ++++--
 fs/overlayfs/super.c     | 13 +++++++++++--
 4 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 42807cb57da07..5e8fd99557e10 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -233,7 +233,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
-struct ovl_fh *ovl_encode_fh(struct dentry *lower)
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
 {
 	struct ovl_fh *fh;
 	int fh_type, fh_len, dwords;
@@ -272,6 +272,14 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower)
 	fh->magic = OVL_FH_MAGIC;
 	fh->type = fh_type;
 	fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+	/*
+	 * When we will want to decode an overlay dentry from this handle
+	 * and all layers are on the same fs, if we get a disconncted real
+	 * dentry when we decode fid, the only way to tell if we should assign
+	 * it to upperdentry or to lowerstack is by checking this flag.
+	 */
+	if (is_upper)
+		fh->flags |= OVL_FH_FLAG_PATH_UPPER;
 	fh->len = fh_len;
 	fh->uuid = *uuid;
 	memcpy(fh->fid, buf, buflen);
@@ -293,7 +301,7 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	 * up and a pure upper inode.
 	 */
 	if (ovl_can_decode_fh(lower->d_sb)) {
-		fh = ovl_encode_fh(lower);
+		fh = ovl_encode_fh(lower, false);
 		if (IS_ERR(fh))
 			return PTR_ERR(fh);
 	}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 197b53d34861c..0c816e9aa50c0 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -350,13 +350,13 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
  * Return 0 on match, -ESTALE on mismatch, < 0 on error.
  */
 int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
-		      struct dentry *origin, bool set)
+		      struct dentry *origin, bool is_upper, bool set)
 {
 	struct inode *inode;
 	struct ovl_fh *fh;
 	int err;
 
-	fh = ovl_encode_fh(origin);
+	fh = ovl_encode_fh(origin, is_upper);
 	err = PTR_ERR(fh);
 	if (IS_ERR(fh))
 		goto fail;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 38ac84cba6ea7..58bbd135a7b3a 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -42,6 +42,8 @@ enum ovl_flag {
 /* CPU byte order required for fid decoding:  */
 #define OVL_FH_FLAG_BIG_ENDIAN	(1 << 0)
 #define OVL_FH_FLAG_ANY_ENDIAN	(1 << 1)
+/* Is the real inode encoded in fid an upper inode? */
+#define OVL_FH_FLAG_PATH_UPPER	(1 << 2)
 
 #define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN)
 
@@ -233,7 +235,7 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 
 /* namei.c */
 int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
-		      struct dentry *origin, bool set);
+		      struct dentry *origin, bool is_upper, bool set);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
 bool ovl_lower_positive(struct dentry *dentry);
@@ -292,4 +294,4 @@ int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_flags(struct dentry *dentry, int flags);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_fh(struct dentry *lower);
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index bfdcff0f31689..a313af25dac26 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1050,7 +1050,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
 		/* Verify lower root is upper root origin */
 		err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0],
-					stack[0].dentry, true);
+					stack[0].dentry, false, true);
 		if (err) {
 			pr_err("overlayfs: failed to verify upper root origin\n");
 			goto out_put_lower_mnt;
@@ -1062,8 +1062,17 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		if (IS_ERR(ufs->indexdir))
 			goto out_put_lower_mnt;
 
-		if (!ufs->indexdir)
+		if (ufs->indexdir) {
+			/* Verify upper root is index dir origin */
+			err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt,
+						upperpath.dentry, true, true);
+			if (err)
+				pr_err("overlayfs: failed to verify index dir origin\n");
+		}
+		if (err || !ufs->indexdir)
 			pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+		if (err)
+			goto out_put_indexdir;
 	}
 
 	/* Show index=off/on in /proc/mounts for any of the reasons above */
-- 
GitLab


From 359f392ca53e9122cafa5fc103545558b0b85d54 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:41 +0300
Subject: [PATCH 0681/1958] ovl: lookup index entry for copy up origin

When inodes index feature is enabled, lookup in indexdir for the index
entry of lower real inode or copy up origin inode. The index entry name
is the hex representation of the lower inode file handle.

If the index dentry in negative, then either no lower aliases have been
copied up yet, or aliases have been copied up in older kernels and are
not indexed.

If the index dentry for a copy up origin inode is positive, but points
to an inode different than the upper inode, then either the upper inode
has been copied up and not indexed or it was indexed, but since then
index dir was cleared. Either way, that index cannot be used to indentify
the overlay inode.

If a positive dentry that matches the upper inode was found, then it is
safe to use the copy up origin st_ino for upper hardlinks, because all
indexed upper hardlinks are represented by the same overlay inode as the
copy up origin.

Set the INDEX type flag on an indexed upper dentry. A non-upper dentry
may also have a positive index from copy up of another lower hardlink.
This situation will be handled by following patches.

Index lookup is going to be used to prevent breaking hardlinks on copy up.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c     |   8 ++-
 fs/overlayfs/namei.c     | 108 +++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/overlayfs.h |   2 +
 3 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 23d64d51f331b..35bb956af8e8a 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -96,11 +96,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 
 			WARN_ON_ONCE(stat->dev != lowerstat.dev);
 			/*
-			 * Lower hardlinks are broken on copy up to different
+			 * Lower hardlinks may be broken on copy up to different
 			 * upper files, so we cannot use the lower origin st_ino
 			 * for those different files, even for the same fs case.
+			 * With inodes index enabled, it is safe to use st_ino
+			 * of an indexed hardlinked origin. The index validates
+			 * that the upper hardlink is not broken.
 			 */
-			if (is_dir || lowerstat.nlink == 1)
+			if (is_dir || lowerstat.nlink == 1 ||
+			    ovl_test_flag(OVL_INDEX, d_inode(dentry)))
 				stat->ino = lowerstat.ino;
 		}
 		stat->dev = dentry->d_sb->s_dev;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 0c816e9aa50c0..3bec4cb399678 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -378,6 +378,94 @@ int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
 	goto out;
 }
 
+/*
+ * Lookup in indexdir for the index entry of a lower real inode or a copy up
+ * origin inode. The index entry name is the hex representation of the lower
+ * inode file handle.
+ *
+ * If the index dentry in negative, then either no lower aliases have been
+ * copied up yet, or aliases have been copied up in older kernels and are
+ * not indexed.
+ *
+ * If the index dentry for a copy up origin inode is positive, but points
+ * to an inode different than the upper inode, then either the upper inode
+ * has been copied up and not indexed or it was indexed, but since then
+ * index dir was cleared. Either way, that index cannot be used to indentify
+ * the overlay inode.
+ */
+int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+{
+	int err;
+	struct ovl_fh *fh;
+	char *n, *s;
+
+	fh = ovl_encode_fh(origin, false);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	err = -ENOMEM;
+	n = kzalloc(fh->len * 2, GFP_TEMPORARY);
+	if (n) {
+		s  = bin2hex(n, fh, fh->len);
+		*name = (struct qstr) QSTR_INIT(n, s - n);
+		err = 0;
+	}
+	kfree(fh);
+
+	return err;
+
+}
+
+static struct dentry *ovl_lookup_index(struct dentry *dentry,
+				       struct dentry *upper,
+				       struct dentry *origin)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct dentry *index;
+	struct inode *inode;
+	struct qstr name;
+	int err;
+
+	err = ovl_get_index_name(origin, &name);
+	if (err)
+		return ERR_PTR(err);
+
+	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+	if (IS_ERR(index)) {
+		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
+				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
+				    d_inode(origin)->i_ino, name.len, name.name,
+				    err);
+		goto out;
+	}
+
+	if (d_is_negative(index)) {
+		if (upper && d_inode(origin)->i_nlink > 1) {
+			pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n",
+					    d_inode(origin)->i_ino);
+			goto fail;
+		}
+
+		dput(index);
+		index = NULL;
+	} else if (upper && d_inode(index) != d_inode(upper)) {
+		inode = d_inode(index);
+		pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n",
+				    d_inode(index)->i_ino,
+				    d_inode(upper)->i_ino);
+		goto fail;
+	}
+
+out:
+	kfree(name.name);
+	return index;
+
+fail:
+	dput(index);
+	index = ERR_PTR(-EIO);
+	goto out;
+}
+
 /*
  * Returns next layer in stack starting from top.
  * Returns -1 if this is the last layer.
@@ -409,6 +497,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 	struct path *stack = NULL;
 	struct dentry *upperdir, *upperdentry = NULL;
+	struct dentry *index = NULL;
 	unsigned int ctr = 0;
 	struct inode *inode = NULL;
 	bool upperopaque = false;
@@ -506,6 +595,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		}
 	}
 
+	/* Lookup index by lower inode and verify it matches upper inode */
+	if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
+		struct dentry *origin = stack[0].dentry;
+
+		index = ovl_lookup_index(dentry, upperdentry, origin);
+		if (IS_ERR(index)) {
+			err = PTR_ERR(index);
+			index = NULL;
+			goto out_put;
+		}
+	}
+
 	oe = ovl_alloc_entry(ctr);
 	err = -ENOMEM;
 	if (!oe)
@@ -515,6 +616,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
 	dentry->d_fsdata = oe;
 
+	if (index && !upperdentry)
+		upperdentry = dget(index);
+
 	if (upperdentry || ctr) {
 		err = -ENOMEM;
 		inode = ovl_get_inode(dentry, upperdentry);
@@ -522,9 +626,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			goto out_free_oe;
 
 		OVL_I(inode)->redirect = upperredirect;
+		if (index)
+			ovl_set_flag(OVL_INDEX, inode);
 	}
 
 	revert_creds(old_cred);
+	dput(index);
 	kfree(stack);
 	kfree(d.redirect);
 	d_add(dentry, inode);
@@ -535,6 +642,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	dentry->d_fsdata = NULL;
 	kfree(oe);
 out_put:
+	dput(index);
 	for (i = 0; i < ctr; i++)
 		dput(stack[i].dentry);
 	kfree(stack);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 58bbd135a7b3a..437a0301e1b6d 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -28,6 +28,7 @@ enum ovl_path_type {
 
 enum ovl_flag {
 	OVL_IMPURE,
+	OVL_INDEX,
 };
 
 /*
@@ -236,6 +237,7 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 /* namei.c */
 int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
 		      struct dentry *origin, bool is_upper, bool set);
+int ovl_get_index_name(struct dentry *origin, struct qstr *name);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
 bool ovl_lower_positive(struct dentry *dentry);
-- 
GitLab


From 415543d5c64fe490b4b6a7e21c3ea2f1310c442f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 15:28:42 +0300
Subject: [PATCH 0682/1958] ovl: cleanup bad and stale index entries on mount

Bad index entries are entries whose name does not match the
origin file handle stored in trusted.overlay.origin xattr.
Bad index entries could be a result of a system power off in
the middle of copy up.

Stale index entries are entries whose origin file handle is
stale. Stale index entries could be a result of copying layers
or removing lower entries while the overlay is not mounted.
The case of copying layers should be detected earlier by the
verification of upper root dir origin and index dir origin.

Both bad and stale index entries are detected and removed
on mount.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c       |  4 ++-
 fs/overlayfs/namei.c     | 74 +++++++++++++++++++++++++++++++++++-----
 fs/overlayfs/overlayfs.h |  6 +++-
 fs/overlayfs/readdir.c   | 50 +++++++++++++++++++++++++++
 fs/overlayfs/super.c     |  6 ++++
 5 files changed, 130 insertions(+), 10 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index d0d6292e069a6..a072c27e03bc2 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -24,7 +24,7 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
 MODULE_PARM_DESC(ovl_redirect_max,
 		 "Maximum length of absolute redirect xattr value");
 
-void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
 {
 	int err;
 
@@ -39,6 +39,8 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
 		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
 		       wdentry, err);
 	}
+
+	return err;
 }
 
 struct dentry *ovl_lookup_temp(struct dentry *workdir)
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 3bec4cb399678..4df37e805eb73 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -285,17 +285,17 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 }
 
 
-static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
+static int ovl_check_origin(struct dentry *upperdentry,
+			    struct path *lowerstack, unsigned int numlower,
 			    struct path **stackp, unsigned int *ctrp)
 {
-	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 	struct vfsmount *mnt;
 	struct dentry *origin = NULL;
 	int i;
 
 
-	for (i = 0; i < roe->numlower; i++) {
-		mnt = roe->lowerstack[i].mnt;
+	for (i = 0; i < numlower; i++) {
+		mnt = lowerstack[i].mnt;
 		origin = ovl_get_origin(upperdentry, mnt);
 		if (IS_ERR(origin))
 			return PTR_ERR(origin);
@@ -307,8 +307,9 @@ static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
 	if (!origin)
 		return 0;
 
-	BUG_ON(*stackp || *ctrp);
-	*stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
+	BUG_ON(*ctrp);
+	if (!*stackp)
+		*stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
 	if (!*stackp) {
 		dput(origin);
 		return -ENOMEM;
@@ -378,6 +379,63 @@ int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
 	goto out;
 }
 
+/*
+ * Verify that an index entry name matches the origin file handle stored in
+ * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
+ * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
+ */
+int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+		     unsigned int numlower)
+{
+	struct ovl_fh *fh = NULL;
+	size_t len;
+	struct path origin = { };
+	struct path *stack = &origin;
+	unsigned int ctr = 0;
+	int err;
+
+	if (!d_inode(index))
+		return 0;
+
+	err = -EISDIR;
+	if (d_is_dir(index))
+		goto fail;
+
+	err = -EINVAL;
+	if (index->d_name.len < sizeof(struct ovl_fh)*2)
+		goto fail;
+
+	err = -ENOMEM;
+	len = index->d_name.len / 2;
+	fh = kzalloc(len, GFP_TEMPORARY);
+	if (!fh)
+		goto fail;
+
+	err = -EINVAL;
+	if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
+		goto fail;
+
+	err = ovl_verify_origin_fh(index, fh);
+	if (err)
+		goto fail;
+
+	err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
+	if (!err && !ctr)
+		err = -ESTALE;
+	if (err)
+		goto fail;
+
+	dput(origin.dentry);
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n",
+			    index, err);
+	goto out;
+}
+
 /*
  * Lookup in indexdir for the index entry of a lower real inode or a copy up
  * origin inode. The index entry name is the hex representation of the lower
@@ -541,8 +599,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			 * number - it's the same as if we held a reference
 			 * to a dentry in lower layer that was moved under us.
 			 */
-			err = ovl_check_origin(dentry, upperdentry,
-					       &stack, &ctr);
+			err = ovl_check_origin(upperdentry, roe->lowerstack,
+					       roe->numlower, &stack, &ctr);
 			if (err)
 				goto out;
 		}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 437a0301e1b6d..f3e49cf345178 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -237,6 +237,8 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 /* namei.c */
 int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
 		      struct dentry *origin, bool is_upper, bool set);
+int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+		     unsigned int numlower);
 int ovl_get_index_name(struct dentry *origin, struct qstr *name);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
@@ -250,6 +252,8 @@ void ovl_cache_free(struct list_head *list);
 int ovl_check_d_type_supported(struct path *realpath);
 void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 			 struct dentry *dentry, int level);
+int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
+			 struct path *lowerstack, unsigned int numlower);
 
 /* inode.c */
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
@@ -289,7 +293,7 @@ struct cattr {
 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
 		    struct cattr *attr,
 		    struct dentry *hardlink, bool debug);
-void ovl_cleanup(struct inode *dir, struct dentry *dentry);
+int ovl_cleanup(struct inode *dir, struct dentry *dentry);
 
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index f241b4ee3d8a5..0298463cf9c3f 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -667,3 +667,53 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 		ovl_cleanup(dir, dentry);
 	}
 }
+
+int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
+			 struct path *lowerstack, unsigned int numlower)
+{
+	int err;
+	struct inode *dir = dentry->d_inode;
+	struct path path = { .mnt = mnt, .dentry = dentry };
+	LIST_HEAD(list);
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = NULL,
+		.list = &list,
+		.root = RB_ROOT,
+		.is_lowest = false,
+	};
+
+	err = ovl_dir_read(&path, &rdd);
+	if (err)
+		goto out;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	list_for_each_entry(p, &list, l_node) {
+		struct dentry *index;
+
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				continue;
+			if (p->len == 2 && p->name[1] == '.')
+				continue;
+		}
+		index = lookup_one_len(p->name, dentry, p->len);
+		if (IS_ERR(index)) {
+			err = PTR_ERR(index);
+			break;
+		}
+		if (ovl_verify_index(index, lowerstack, numlower)) {
+			err = ovl_cleanup(dir, index);
+			if (err)
+				break;
+		}
+		dput(index);
+	}
+	inode_unlock(dir);
+out:
+	ovl_cache_free(&list);
+	if (err)
+		pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
+	return err;
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index a313af25dac26..791581c370f51 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1068,6 +1068,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 						upperpath.dentry, true, true);
 			if (err)
 				pr_err("overlayfs: failed to verify index dir origin\n");
+
+			/* Cleanup bad/stale index entries */
+			if (!err)
+				err = ovl_indexdir_cleanup(ufs->indexdir,
+							   ufs->upper_mnt,
+							   stack, numlower);
 		}
 		if (err || !ufs->indexdir)
 			pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
-- 
GitLab


From b9ac5c274b8c9d642567022c0e319bca4db31956 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:17 +0200
Subject: [PATCH 0683/1958] ovl: hash overlay non-dir inodes by copy up origin

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c | 46 +++++++++++++++++++++++++++++++++++++++-----
 fs/overlayfs/namei.c |  4 ++--
 fs/overlayfs/util.c  |  3 +--
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 35bb956af8e8a..d9fe07defca33 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -467,6 +467,25 @@ static int ovl_inode_set(struct inode *inode, void *data)
 	return 0;
 }
 
+static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
+			     struct dentry *upperdentry)
+{
+	struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL;
+
+	/* Lower (origin) inode must match, even if NULL */
+	if (ovl_inode_lower(inode) != lowerinode)
+		return false;
+
+	/*
+	 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL.
+	 * This happens when finding a lower alias for a copied up hard link.
+	 */
+	if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry))
+		return false;
+
+	return true;
+}
+
 struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
 {
 	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
@@ -476,12 +495,25 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
 	if (!realinode)
 		realinode = d_inode(lowerdentry);
 
-	if (upperdentry && !d_is_dir(upperdentry)) {
-		inode = iget5_locked(dentry->d_sb, (unsigned long) realinode,
-				     ovl_inode_test, ovl_inode_set, realinode);
+	if (!S_ISDIR(realinode->i_mode) &&
+	    (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) {
+		struct inode *key = d_inode(lowerdentry ?: upperdentry);
+
+		inode = iget5_locked(dentry->d_sb, (unsigned long) key,
+				     ovl_inode_test, ovl_inode_set, key);
 		if (!inode)
-			goto out;
+			goto out_nomem;
 		if (!(inode->i_state & I_NEW)) {
+			/*
+			 * Verify that the underlying files stored in the inode
+			 * match those in the dentry.
+			 */
+			if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) {
+				iput(inode);
+				inode = ERR_PTR(-ESTALE);
+				goto out;
+			}
+
 			dput(upperdentry);
 			goto out;
 		}
@@ -490,7 +522,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
 	} else {
 		inode = new_inode(dentry->d_sb);
 		if (!inode)
-			goto out;
+			goto out_nomem;
 	}
 	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
 	ovl_inode_init(inode, upperdentry, lowerdentry);
@@ -502,4 +534,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
 		unlock_new_inode(inode);
 out:
 	return inode;
+
+out_nomem:
+	inode = ERR_PTR(-ENOMEM);
+	goto out;
 }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 4df37e805eb73..f7fb0c9194197 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -678,9 +678,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		upperdentry = dget(index);
 
 	if (upperdentry || ctr) {
-		err = -ENOMEM;
 		inode = ovl_get_inode(dentry, upperdentry);
-		if (!inode)
+		err = PTR_ERR(inode);
+		if (IS_ERR(inode))
 			goto out_free_oe;
 
 		OVL_I(inode)->redirect = upperredirect;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 90b50b8e75abc..22ed51f80e589 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -236,7 +236,6 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 {
 	struct inode *upperinode = d_inode(upperdentry);
 
-	WARN_ON(!inode_unhashed(inode));
 	WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
 	WARN_ON(OVL_I(inode)->__upperdentry);
 
@@ -245,7 +244,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 	 */
 	smp_wmb();
 	OVL_I(inode)->__upperdentry = upperdentry;
-	if (!S_ISDIR(upperinode->i_mode)) {
+	if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) {
 		inode->i_private = upperinode;
 		__insert_inode_hash(inode, (unsigned long) upperinode);
 	}
-- 
GitLab


From 15932c415b3ed20bd1c1e05d071b4ad498656280 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 16 May 2017 01:26:49 +0300
Subject: [PATCH 0684/1958] ovl: defer upper dir lock to tempfile link

On copy up of regular file using an O_TMPFILE, lock upper dir only
before linking the tempfile in place.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 67 ++++++++++++++++++++++++------------------
 fs/overlayfs/util.c    |  1 -
 2 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 5e8fd99557e10..28711af7f9db2 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -316,6 +316,35 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	return err;
 }
 
+static int ovl_install_temp(struct dentry *workdir, struct dentry *upperdir,
+			    struct dentry *dentry,
+			    struct dentry *temp, struct kstat *pstat,
+			    bool tmpfile, struct dentry **newdentry)
+{
+	int err;
+	struct dentry *upper;
+	struct inode *udir = d_inode(upperdir);
+
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	if (IS_ERR(upper))
+		return PTR_ERR(upper);
+
+	if (tmpfile)
+		err = ovl_do_link(temp, udir, upper, true);
+	else
+		err = ovl_do_rename(d_inode(workdir), temp, udir, upper, 0);
+
+	/* Restore timestamps on parent (best effort) */
+	if (!err) {
+		ovl_set_timestamps(upperdir, pstat);
+		*newdentry = dget(tmpfile ? upper : temp);
+	}
+	dput(upper);
+
+	return err;
+}
+
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
 			      struct kstat *stat, const char *link,
@@ -324,7 +353,6 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	struct inode *wdir = workdir->d_inode;
 	struct inode *udir = upperdir->d_inode;
 	struct dentry *newdentry = NULL;
-	struct dentry *upper = NULL;
 	struct dentry *temp = NULL;
 	int err;
 	const struct cred *old_creds = NULL;
@@ -371,16 +399,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 		BUG_ON(upperpath.dentry != NULL);
 		upperpath.dentry = temp;
 
-		if (tmpfile) {
-			inode_unlock(udir);
-			err = ovl_copy_up_data(lowerpath, &upperpath,
-					       stat->size);
-			inode_lock_nested(udir, I_MUTEX_PARENT);
-		} else {
-			err = ovl_copy_up_data(lowerpath, &upperpath,
-					       stat->size);
-		}
-
+		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
 		if (err)
 			goto out_cleanup;
 	}
@@ -408,29 +427,21 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			goto out_cleanup;
 	}
 
-	upper = lookup_one_len(dentry->d_name.name, upperdir,
-			       dentry->d_name.len);
-	if (IS_ERR(upper)) {
-		err = PTR_ERR(upper);
-		upper = NULL;
-		goto out_cleanup;
+	if (tmpfile) {
+		inode_lock_nested(udir, I_MUTEX_PARENT);
+		err = ovl_install_temp(workdir, upperdir, dentry, temp, pstat,
+				       tmpfile, &newdentry);
+		inode_unlock(udir);
+	} else {
+		err = ovl_install_temp(workdir, upperdir, dentry, temp, pstat,
+				       tmpfile, &newdentry);
 	}
-
-	if (tmpfile)
-		err = ovl_do_link(temp, udir, upper, true);
-	else
-		err = ovl_do_rename(wdir, temp, udir, upper, 0);
 	if (err)
 		goto out_cleanup;
 
-	newdentry = dget(tmpfile ? upper : temp);
 	ovl_inode_update(d_inode(dentry), newdentry);
-
-	/* Restore timestamps on parent (best effort) */
-	ovl_set_timestamps(upperdir, pstat);
 out:
 	dput(temp);
-	dput(upper);
 	return err;
 
 out_cleanup:
@@ -496,10 +507,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 			goto out_done;
 		}
 
-		inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
 		err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
 					 stat, link, &pstat, true);
-		inode_unlock(upperdir->d_inode);
 		ovl_copy_up_end(dentry);
 		goto out_done;
 	}
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 22ed51f80e589..c80b4bf1e64f1 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -236,7 +236,6 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 {
 	struct inode *upperinode = d_inode(upperdentry);
 
-	WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
 	WARN_ON(OVL_I(inode)->__upperdentry);
 
 	/*
-- 
GitLab


From 7d90b853f932874f0b348858fddbd41f022179ee Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:18 +0200
Subject: [PATCH 0685/1958] ovl: extract helper to get temp file in copy up

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 59 +++++++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 18 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 28711af7f9db2..a7941ab80c9b3 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -345,16 +345,13 @@ static int ovl_install_temp(struct dentry *workdir, struct dentry *upperdir,
 	return err;
 }
 
-static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
-			      struct dentry *dentry, struct path *lowerpath,
-			      struct kstat *stat, const char *link,
-			      struct kstat *pstat, bool tmpfile)
+static int ovl_get_tmpfile(struct dentry *workdir, struct dentry *upperdir,
+			   struct dentry *dentry,
+			   struct kstat *stat, const char *link, bool tmpfile,
+			   struct dentry **tempp)
 {
-	struct inode *wdir = workdir->d_inode;
-	struct inode *udir = upperdir->d_inode;
-	struct dentry *newdentry = NULL;
-	struct dentry *temp = NULL;
 	int err;
+	struct dentry *temp;
 	const struct cred *old_creds = NULL;
 	struct cred *new_creds = NULL;
 	struct cattr cattr = {
@@ -371,24 +368,50 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (new_creds)
 		old_creds = override_creds(new_creds);
 
-	if (tmpfile)
+	if (tmpfile) {
 		temp = ovl_do_tmpfile(upperdir, stat->mode);
-	else
+		if (IS_ERR(temp))
+			goto temp_err;
+	} else {
 		temp = ovl_lookup_temp(workdir);
-	err = 0;
-	if (IS_ERR(temp)) {
-		err = PTR_ERR(temp);
-		temp = NULL;
+		if (IS_ERR(temp))
+			goto temp_err;
+
+		err = ovl_create_real(d_inode(workdir), temp, &cattr,
+				      NULL, true);
+		if (err) {
+			dput(temp);
+			goto out;
+		}
 	}
-
-	if (!err && !tmpfile)
-		err = ovl_create_real(wdir, temp, &cattr, NULL, true);
-
+	err = 0;
+	*tempp = temp;
+out:
 	if (new_creds) {
 		revert_creds(old_creds);
 		put_cred(new_creds);
 	}
 
+	return err;
+
+temp_err:
+	err = PTR_ERR(temp);
+	goto out;
+}
+
+static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
+			      struct dentry *dentry, struct path *lowerpath,
+			      struct kstat *stat, const char *link,
+			      struct kstat *pstat, bool tmpfile)
+{
+	struct inode *wdir = workdir->d_inode;
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *newdentry = NULL;
+	struct dentry *temp = NULL;
+	int err;
+
+	err = ovl_get_tmpfile(workdir, upperdir, dentry, stat, link, tmpfile,
+			      &temp);
 	if (err)
 		goto out;
 
-- 
GitLab


From 02209d10709c18d552c2494df74117db09a18e05 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 19 May 2017 15:16:21 +0300
Subject: [PATCH 0686/1958] ovl: factor out ovl_copy_up_inode() helper

Factor out helper for copying lower inode data and metadata to temp
upper inode, that is common to copy up using O_TMPFILE and workdir.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 46 ++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index a7941ab80c9b3..81b9a44916a06 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -399,22 +399,11 @@ static int ovl_get_tmpfile(struct dentry *workdir, struct dentry *upperdir,
 	goto out;
 }
 
-static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
-			      struct dentry *dentry, struct path *lowerpath,
-			      struct kstat *stat, const char *link,
-			      struct kstat *pstat, bool tmpfile)
+static int ovl_copy_up_inode(struct dentry *dentry, struct dentry *temp,
+			     struct path *lowerpath, struct kstat *stat)
 {
-	struct inode *wdir = workdir->d_inode;
-	struct inode *udir = upperdir->d_inode;
-	struct dentry *newdentry = NULL;
-	struct dentry *temp = NULL;
 	int err;
 
-	err = ovl_get_tmpfile(workdir, upperdir, dentry, stat, link, tmpfile,
-			      &temp);
-	if (err)
-		goto out;
-
 	if (S_ISREG(stat->mode)) {
 		struct path upperpath;
 
@@ -424,18 +413,18 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 
 		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
 		if (err)
-			goto out_cleanup;
+			return err;
 	}
 
 	err = ovl_copy_xattr(lowerpath->dentry, temp);
 	if (err)
-		goto out_cleanup;
+		return err;
 
 	inode_lock(temp->d_inode);
 	err = ovl_set_attr(temp, stat);
 	inode_unlock(temp->d_inode);
 	if (err)
-		goto out_cleanup;
+		return err;
 
 	/*
 	 * Store identifier of lower inode in upper inode xattr to
@@ -447,9 +436,32 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (S_ISDIR(stat->mode) || stat->nlink == 1) {
 		err = ovl_set_origin(dentry, lowerpath->dentry, temp);
 		if (err)
-			goto out_cleanup;
+			return err;
 	}
 
+	return 0;
+}
+
+static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
+			      struct dentry *dentry, struct path *lowerpath,
+			      struct kstat *stat, const char *link,
+			      struct kstat *pstat, bool tmpfile)
+{
+	struct inode *wdir = workdir->d_inode;
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *newdentry = NULL;
+	struct dentry *temp = NULL;
+	int err;
+
+	err = ovl_get_tmpfile(workdir, upperdir, dentry, stat, link, tmpfile,
+			      &temp);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up_inode(dentry, temp, lowerpath, stat);
+	if (err)
+		goto out_cleanup;
+
 	if (tmpfile) {
 		inode_lock_nested(udir, I_MUTEX_PARENT);
 		err = ovl_install_temp(workdir, upperdir, dentry, temp, pstat,
-- 
GitLab


From 7ab8b1763fd84ff4e7263ed7f5c728e4cb3f364a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:18 +0200
Subject: [PATCH 0687/1958] ovl: base tmpfile in workdir too

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 81b9a44916a06..1264f24340472 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -345,8 +345,7 @@ static int ovl_install_temp(struct dentry *workdir, struct dentry *upperdir,
 	return err;
 }
 
-static int ovl_get_tmpfile(struct dentry *workdir, struct dentry *upperdir,
-			   struct dentry *dentry,
+static int ovl_get_tmpfile(struct dentry *workdir, struct dentry *dentry,
 			   struct kstat *stat, const char *link, bool tmpfile,
 			   struct dentry **tempp)
 {
@@ -369,7 +368,7 @@ static int ovl_get_tmpfile(struct dentry *workdir, struct dentry *upperdir,
 		old_creds = override_creds(new_creds);
 
 	if (tmpfile) {
-		temp = ovl_do_tmpfile(upperdir, stat->mode);
+		temp = ovl_do_tmpfile(workdir, stat->mode);
 		if (IS_ERR(temp))
 			goto temp_err;
 	} else {
@@ -453,8 +452,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	struct dentry *temp = NULL;
 	int err;
 
-	err = ovl_get_tmpfile(workdir, upperdir, dentry, stat, link, tmpfile,
-			      &temp);
+	err = ovl_get_tmpfile(workdir, dentry, stat, link, tmpfile, &temp);
 	if (err)
 		goto out;
 
-- 
GitLab


From 23f0ab13eaa69b4a351184cbec448be2aad3a3a9 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:18 +0200
Subject: [PATCH 0688/1958] ovl: use struct copy_up_ctx as function argument

This cleans up functions with too many arguments.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 160 ++++++++++++++++++++---------------------
 1 file changed, 78 insertions(+), 82 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 1264f24340472..8f9e26e913863 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -316,38 +316,45 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	return err;
 }
 
-static int ovl_install_temp(struct dentry *workdir, struct dentry *upperdir,
-			    struct dentry *dentry,
-			    struct dentry *temp, struct kstat *pstat,
-			    bool tmpfile, struct dentry **newdentry)
+struct ovl_copy_up_ctx {
+	struct dentry *dentry;
+	struct path lowerpath;
+	struct kstat stat;
+	struct kstat pstat;
+	const char *link;
+	struct dentry *upperdir;
+	struct dentry *workdir;
+	bool tmpfile;
+};
+
+static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
+			    struct dentry **newdentry)
 {
 	int err;
 	struct dentry *upper;
-	struct inode *udir = d_inode(upperdir);
+	struct inode *udir = d_inode(c->upperdir);
 
-	upper = lookup_one_len(dentry->d_name.name, upperdir,
-			       dentry->d_name.len);
+	upper = lookup_one_len(c->dentry->d_name.name, c->upperdir,
+			       c->dentry->d_name.len);
 	if (IS_ERR(upper))
 		return PTR_ERR(upper);
 
-	if (tmpfile)
+	if (c->tmpfile)
 		err = ovl_do_link(temp, udir, upper, true);
 	else
-		err = ovl_do_rename(d_inode(workdir), temp, udir, upper, 0);
+		err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
 
 	/* Restore timestamps on parent (best effort) */
 	if (!err) {
-		ovl_set_timestamps(upperdir, pstat);
-		*newdentry = dget(tmpfile ? upper : temp);
+		ovl_set_timestamps(c->upperdir, &c->pstat);
+		*newdentry = dget(c->tmpfile ? upper : temp);
 	}
 	dput(upper);
 
 	return err;
 }
 
-static int ovl_get_tmpfile(struct dentry *workdir, struct dentry *dentry,
-			   struct kstat *stat, const char *link, bool tmpfile,
-			   struct dentry **tempp)
+static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
 {
 	int err;
 	struct dentry *temp;
@@ -355,28 +362,28 @@ static int ovl_get_tmpfile(struct dentry *workdir, struct dentry *dentry,
 	struct cred *new_creds = NULL;
 	struct cattr cattr = {
 		/* Can't properly set mode on creation because of the umask */
-		.mode = stat->mode & S_IFMT,
-		.rdev = stat->rdev,
-		.link = link
+		.mode = c->stat.mode & S_IFMT,
+		.rdev = c->stat.rdev,
+		.link = c->link
 	};
 
-	err = security_inode_copy_up(dentry, &new_creds);
+	err = security_inode_copy_up(c->dentry, &new_creds);
 	if (err < 0)
 		goto out;
 
 	if (new_creds)
 		old_creds = override_creds(new_creds);
 
-	if (tmpfile) {
-		temp = ovl_do_tmpfile(workdir, stat->mode);
+	if (c->tmpfile) {
+		temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
 		if (IS_ERR(temp))
 			goto temp_err;
 	} else {
-		temp = ovl_lookup_temp(workdir);
+		temp = ovl_lookup_temp(c->workdir);
 		if (IS_ERR(temp))
 			goto temp_err;
 
-		err = ovl_create_real(d_inode(workdir), temp, &cattr,
+		err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
 				      NULL, true);
 		if (err) {
 			dput(temp);
@@ -398,29 +405,28 @@ static int ovl_get_tmpfile(struct dentry *workdir, struct dentry *dentry,
 	goto out;
 }
 
-static int ovl_copy_up_inode(struct dentry *dentry, struct dentry *temp,
-			     struct path *lowerpath, struct kstat *stat)
+static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 {
 	int err;
 
-	if (S_ISREG(stat->mode)) {
+	if (S_ISREG(c->stat.mode)) {
 		struct path upperpath;
 
-		ovl_path_upper(dentry, &upperpath);
+		ovl_path_upper(c->dentry, &upperpath);
 		BUG_ON(upperpath.dentry != NULL);
 		upperpath.dentry = temp;
 
-		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
+		err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
 		if (err)
 			return err;
 	}
 
-	err = ovl_copy_xattr(lowerpath->dentry, temp);
+	err = ovl_copy_xattr(c->lowerpath.dentry, temp);
 	if (err)
 		return err;
 
 	inode_lock(temp->d_inode);
-	err = ovl_set_attr(temp, stat);
+	err = ovl_set_attr(temp, &c->stat);
 	inode_unlock(temp->d_inode);
 	if (err)
 		return err;
@@ -432,8 +438,8 @@ static int ovl_copy_up_inode(struct dentry *dentry, struct dentry *temp,
 	 * Don't set origin when we are breaking the association with a lower
 	 * hard link.
 	 */
-	if (S_ISDIR(stat->mode) || stat->nlink == 1) {
-		err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1) {
+		err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
 		if (err)
 			return err;
 	}
@@ -441,45 +447,39 @@ static int ovl_copy_up_inode(struct dentry *dentry, struct dentry *temp,
 	return 0;
 }
 
-static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
-			      struct dentry *dentry, struct path *lowerpath,
-			      struct kstat *stat, const char *link,
-			      struct kstat *pstat, bool tmpfile)
+static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
 {
-	struct inode *wdir = workdir->d_inode;
-	struct inode *udir = upperdir->d_inode;
+	struct inode *udir = c->upperdir->d_inode;
 	struct dentry *newdentry = NULL;
 	struct dentry *temp = NULL;
 	int err;
 
-	err = ovl_get_tmpfile(workdir, dentry, stat, link, tmpfile, &temp);
+	err = ovl_get_tmpfile(c, &temp);
 	if (err)
 		goto out;
 
-	err = ovl_copy_up_inode(dentry, temp, lowerpath, stat);
+	err = ovl_copy_up_inode(c, temp);
 	if (err)
 		goto out_cleanup;
 
-	if (tmpfile) {
+	if (c->tmpfile) {
 		inode_lock_nested(udir, I_MUTEX_PARENT);
-		err = ovl_install_temp(workdir, upperdir, dentry, temp, pstat,
-				       tmpfile, &newdentry);
+		err = ovl_install_temp(c, temp, &newdentry);
 		inode_unlock(udir);
 	} else {
-		err = ovl_install_temp(workdir, upperdir, dentry, temp, pstat,
-				       tmpfile, &newdentry);
+		err = ovl_install_temp(c, temp, &newdentry);
 	}
 	if (err)
 		goto out_cleanup;
 
-	ovl_inode_update(d_inode(dentry), newdentry);
+	ovl_inode_update(d_inode(c->dentry), newdentry);
 out:
 	dput(temp);
 	return err;
 
 out_cleanup:
-	if (!tmpfile)
-		ovl_cleanup(wdir, temp);
+	if (!c->tmpfile)
+		ovl_cleanup(d_inode(c->workdir), temp);
 	goto out;
 }
 
@@ -492,75 +492,70 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
  * is possible that the copy up will lock the old parent.  At that point
  * the file will have already been copied up anyway.
  */
-static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
-			   struct path *lowerpath, struct kstat *stat)
+static int ovl_copy_up_one(struct dentry *parent, struct ovl_copy_up_ctx *c)
 {
 	DEFINE_DELAYED_CALL(done);
-	struct dentry *workdir = ovl_workdir(dentry);
 	int err;
-	struct kstat pstat;
 	struct path parentpath;
-	struct dentry *lowerdentry = lowerpath->dentry;
-	struct dentry *upperdir;
-	const char *link = NULL;
-	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct dentry *lowerdentry = c->lowerpath.dentry;
+	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
 
-	if (WARN_ON(!workdir))
+	c->workdir = ovl_workdir(c->dentry);
+	if (WARN_ON(!c->workdir))
 		return -EROFS;
 
 	ovl_do_check_copy_up(lowerdentry);
 
 	ovl_path_upper(parent, &parentpath);
-	upperdir = parentpath.dentry;
+	c->upperdir = parentpath.dentry;
 
 	/* Mark parent "impure" because it may now contain non-pure upper */
-	err = ovl_set_impure(parent, upperdir);
+	err = ovl_set_impure(parent, c->upperdir);
 	if (err)
 		return err;
 
-	err = vfs_getattr(&parentpath, &pstat,
+	err = vfs_getattr(&parentpath, &c->pstat,
 			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		return err;
 
-	if (S_ISLNK(stat->mode)) {
-		link = vfs_get_link(lowerdentry, &done);
-		if (IS_ERR(link))
-			return PTR_ERR(link);
+	if (S_ISLNK(c->stat.mode)) {
+		c->link = vfs_get_link(lowerdentry, &done);
+		if (IS_ERR(c->link))
+			return PTR_ERR(c->link);
 	}
 
 	/* Should we copyup with O_TMPFILE or with workdir? */
-	if (S_ISREG(stat->mode) && ofs->tmpfile) {
-		err = ovl_copy_up_start(dentry);
+	if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
+		err = ovl_copy_up_start(c->dentry);
 		/* err < 0: interrupted, err > 0: raced with another copy-up */
 		if (unlikely(err)) {
-			pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
+			pr_debug("ovl_copy_up_start(%pd2) = %i\n", c->dentry,
+				 err);
 			if (err > 0)
 				err = 0;
 			goto out_done;
 		}
-
-		err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-					 stat, link, &pstat, true);
-		ovl_copy_up_end(dentry);
+		c->tmpfile = true;
+		err = ovl_copy_up_locked(c);
+		ovl_copy_up_end(c->dentry);
 		goto out_done;
 	}
 
 	err = -EIO;
-	if (lock_rename(workdir, upperdir) != NULL) {
+	if (lock_rename(c->workdir, c->upperdir) != NULL) {
 		pr_err("overlayfs: failed to lock workdir+upperdir\n");
 		goto out_unlock;
 	}
-	if (ovl_dentry_upper(dentry)) {
+	if (ovl_dentry_upper(c->dentry)) {
 		/* Raced with another copy-up?  Nothing to do, then... */
 		err = 0;
 		goto out_unlock;
 	}
 
-	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, link, &pstat, false);
+	err = ovl_copy_up_locked(c);
 out_unlock:
-	unlock_rename(workdir, upperdir);
+	unlock_rename(c->workdir, c->upperdir);
 out_done:
 	do_delayed_call(&done);
 
@@ -575,8 +570,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 	while (!err) {
 		struct dentry *next;
 		struct dentry *parent;
-		struct path lowerpath;
-		struct kstat stat;
+		struct ovl_copy_up_ctx ctx = { };
 		enum ovl_path_type type = ovl_path_type(dentry);
 
 		if (OVL_TYPE_UPPER(type))
@@ -595,14 +589,16 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 			next = parent;
 		}
 
-		ovl_path_lower(next, &lowerpath);
-		err = vfs_getattr(&lowerpath, &stat,
+		ovl_path_lower(next, &ctx.lowerpath);
+		err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
 				  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 		/* maybe truncate regular file. this has no effect on dirs */
 		if (flags & O_TRUNC)
-			stat.size = 0;
-		if (!err)
-			err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
+			ctx.stat.size = 0;
+		if (!err) {
+			ctx.dentry = next;
+			err = ovl_copy_up_one(parent, &ctx);
+		}
 
 		dput(parent);
 		dput(next);
-- 
GitLab


From 55acc6618259c8ff0a400a131f0f4b613e96010a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:18 +0200
Subject: [PATCH 0689/1958] ovl: add flag for upper in ovl_entry

For rename, we need to ensure that an upper alias exists for hard links
before attempting the operation.  Introduce a flag in ovl_entry to track
the state of the upper alias.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   |  1 +
 fs/overlayfs/dir.c       |  1 +
 fs/overlayfs/namei.c     |  4 +++-
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/ovl_entry.h |  5 ++++-
 fs/overlayfs/super.c     |  1 +
 fs/overlayfs/util.c      | 19 +++++++++++++++++++
 7 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 8f9e26e913863..58c06bd58a96d 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -472,6 +472,7 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
 	if (err)
 		goto out_cleanup;
 
+	ovl_dentry_set_upper_alias(c->dentry);
 	ovl_inode_update(d_inode(c->dentry), newdentry);
 out:
 	dput(temp);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a072c27e03bc2..8b2b23181b19c 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -156,6 +156,7 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
 			    struct dentry *newdentry, bool hardlink)
 {
 	ovl_dentry_version_inc(dentry->d_parent);
+	ovl_dentry_set_upper_alias(dentry);
 	if (!hardlink) {
 		ovl_inode_update(inode, newdentry);
 		ovl_copyattr(newdentry->d_inode, inode);
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index f7fb0c9194197..2d8b6292fe21e 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -674,7 +674,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
 	dentry->d_fsdata = oe;
 
-	if (index && !upperdentry)
+	if (upperdentry)
+		ovl_dentry_set_upper_alias(dentry);
+	else if (index)
 		upperdentry = dget(index);
 
 	if (upperdentry || ctr) {
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f3e49cf345178..751b36a5c22f2 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -206,6 +206,8 @@ void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
 bool ovl_dentry_is_whiteout(struct dentry *dentry);
 void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_upper_alias(struct dentry *dentry);
+void ovl_dentry_set_upper_alias(struct dentry *dentry);
 bool ovl_redirect_dir(struct super_block *sb);
 const char *ovl_dentry_get_redirect(struct dentry *dentry);
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 9642ec64467b5..878a750986dd7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -42,7 +42,10 @@ struct ovl_fs {
 /* private information held for every overlayfs dentry */
 struct ovl_entry {
 	union {
-		bool opaque;
+		struct {
+			unsigned long has_upper;
+			bool opaque;
+		};
 		struct rcu_head rcu;
 	};
 	unsigned numlower;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 791581c370f51..f29ee08cf99f8 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1119,6 +1119,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	kfree(lowertmp);
 
 	if (upperpath.dentry) {
+		oe->has_upper = true;
 		if (ovl_is_impuredir(upperpath.dentry))
 			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
 	}
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index c80b4bf1e64f1..38fa75228c66e 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -201,6 +201,25 @@ void ovl_dentry_set_opaque(struct dentry *dentry)
 	oe->opaque = true;
 }
 
+/*
+ * For hard links it's possible for ovl_dentry_upper() to return positive, while
+ * there's no actual upper alias for the inode.  Copy up code needs to know
+ * about the existence of the upper alias, so it can't use ovl_dentry_upper().
+ */
+bool ovl_dentry_has_upper_alias(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->has_upper;
+}
+
+void ovl_dentry_set_upper_alias(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	oe->has_upper = true;
+}
+
 bool ovl_redirect_dir(struct super_block *sb)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
-- 
GitLab


From a6fb235a448b8eb731fd6d4de2c5c6269677cf5b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:18 +0200
Subject: [PATCH 0690/1958] ovl: rearrange copy up

Split up and rearrange copy up functions to make them better readable.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 86 ++++++++++++++++++++++++------------------
 1 file changed, 50 insertions(+), 36 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 58c06bd58a96d..15668d3bbbc4b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -317,6 +317,7 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 }
 
 struct ovl_copy_up_ctx {
+	struct dentry *parent;
 	struct dentry *dentry;
 	struct path lowerpath;
 	struct kstat stat;
@@ -493,39 +494,16 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
  * is possible that the copy up will lock the old parent.  At that point
  * the file will have already been copied up anyway.
  */
-static int ovl_copy_up_one(struct dentry *parent, struct ovl_copy_up_ctx *c)
+static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 {
-	DEFINE_DELAYED_CALL(done);
 	int err;
-	struct path parentpath;
-	struct dentry *lowerdentry = c->lowerpath.dentry;
 	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
 
-	c->workdir = ovl_workdir(c->dentry);
-	if (WARN_ON(!c->workdir))
-		return -EROFS;
-
-	ovl_do_check_copy_up(lowerdentry);
-
-	ovl_path_upper(parent, &parentpath);
-	c->upperdir = parentpath.dentry;
-
 	/* Mark parent "impure" because it may now contain non-pure upper */
-	err = ovl_set_impure(parent, c->upperdir);
-	if (err)
-		return err;
-
-	err = vfs_getattr(&parentpath, &c->pstat,
-			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
+	err = ovl_set_impure(c->parent, c->upperdir);
 	if (err)
 		return err;
 
-	if (S_ISLNK(c->stat.mode)) {
-		c->link = vfs_get_link(lowerdentry, &done);
-		if (IS_ERR(c->link))
-			return PTR_ERR(c->link);
-	}
-
 	/* Should we copyup with O_TMPFILE or with workdir? */
 	if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
 		err = ovl_copy_up_start(c->dentry);
@@ -558,6 +536,52 @@ static int ovl_copy_up_one(struct dentry *parent, struct ovl_copy_up_ctx *c)
 out_unlock:
 	unlock_rename(c->workdir, c->upperdir);
 out_done:
+
+	return err;
+}
+
+static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+			   int flags)
+{
+	int err;
+	DEFINE_DELAYED_CALL(done);
+	struct path parentpath;
+	struct ovl_copy_up_ctx ctx = {
+		.parent = parent,
+		.dentry = dentry,
+		.workdir = ovl_workdir(dentry),
+	};
+
+	if (WARN_ON(!ctx.workdir))
+		return -EROFS;
+
+	ovl_path_lower(dentry, &ctx.lowerpath);
+	err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
+			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+	if (err)
+		return err;
+
+	ovl_path_upper(parent, &parentpath);
+	ctx.upperdir = parentpath.dentry;
+
+	err = vfs_getattr(&parentpath, &ctx.pstat,
+			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
+	if (err)
+		return err;
+
+	/* maybe truncate regular file. this has no effect on dirs */
+	if (flags & O_TRUNC)
+		ctx.stat.size = 0;
+
+	if (S_ISLNK(ctx.stat.mode)) {
+		ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
+		if (IS_ERR(ctx.link))
+			return PTR_ERR(ctx.link);
+	}
+	ovl_do_check_copy_up(ctx.lowerpath.dentry);
+
+	err = ovl_do_copy_up(&ctx);
+
 	do_delayed_call(&done);
 
 	return err;
@@ -571,7 +595,6 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 	while (!err) {
 		struct dentry *next;
 		struct dentry *parent;
-		struct ovl_copy_up_ctx ctx = { };
 		enum ovl_path_type type = ovl_path_type(dentry);
 
 		if (OVL_TYPE_UPPER(type))
@@ -590,16 +613,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 			next = parent;
 		}
 
-		ovl_path_lower(next, &ctx.lowerpath);
-		err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
-				  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
-		/* maybe truncate regular file. this has no effect on dirs */
-		if (flags & O_TRUNC)
-			ctx.stat.size = 0;
-		if (!err) {
-			ctx.dentry = next;
-			err = ovl_copy_up_one(parent, &ctx);
-		}
+		err = ovl_copy_up_one(parent, next, flags);
 
 		dput(parent);
 		dput(next);
-- 
GitLab


From fd210b7d67ee3768bf1ad3e07d55797d4b45fcc1 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 4 Jul 2017 22:03:18 +0200
Subject: [PATCH 0691/1958] ovl: move copy up lock out

Move ovl_copy_up_start()/ovl_copy_up_end() out so that it's used for both
tempfile and workdir copy ups.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 15668d3bbbc4b..0d9de353f42bc 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -506,37 +506,18 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 
 	/* Should we copyup with O_TMPFILE or with workdir? */
 	if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
-		err = ovl_copy_up_start(c->dentry);
-		/* err < 0: interrupted, err > 0: raced with another copy-up */
-		if (unlikely(err)) {
-			pr_debug("ovl_copy_up_start(%pd2) = %i\n", c->dentry,
-				 err);
-			if (err > 0)
-				err = 0;
-			goto out_done;
-		}
 		c->tmpfile = true;
-		err = ovl_copy_up_locked(c);
-		ovl_copy_up_end(c->dentry);
-		goto out_done;
+		return  ovl_copy_up_locked(c);
 	}
 
 	err = -EIO;
 	if (lock_rename(c->workdir, c->upperdir) != NULL) {
 		pr_err("overlayfs: failed to lock workdir+upperdir\n");
-		goto out_unlock;
-	}
-	if (ovl_dentry_upper(c->dentry)) {
-		/* Raced with another copy-up?  Nothing to do, then... */
-		err = 0;
-		goto out_unlock;
+	} else {
+		err = ovl_copy_up_locked(c);
+		unlock_rename(c->workdir, c->upperdir);
 	}
 
-	err = ovl_copy_up_locked(c);
-out_unlock:
-	unlock_rename(c->workdir, c->upperdir);
-out_done:
-
 	return err;
 }
 
@@ -580,8 +561,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 	ovl_do_check_copy_up(ctx.lowerpath.dentry);
 
-	err = ovl_do_copy_up(&ctx);
-
+	err = ovl_copy_up_start(dentry);
+	/* err < 0: interrupted, err > 0: raced with another copy-up */
+	if (unlikely(err)) {
+		if (err > 0)
+			err = 0;
+	} else {
+		err = ovl_do_copy_up(&ctx);
+		ovl_copy_up_end(dentry);
+	}
 	do_delayed_call(&done);
 
 	return err;
-- 
GitLab


From 59be09712ab98a3060f13e31343c7abb9bc4583d Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 20 Jun 2017 15:25:46 +0300
Subject: [PATCH 0692/1958] ovl: implement index dir copy up

Implement a copy up method for non-dir objects using index dir to
prevent breaking lower hardlinks on copy up.

This method requires that the inodes index dir feature was enabled and
that all underlying fs support file handle encoding/decoding.

On the first lower hardlink copy up, upper file is created in index dir,
named after the hex representation of the lower origin inode file handle.
On the second lower hardlink copy up, upper file is found in index dir,
by the same lower handle key.
On either case, the upper indexed inode is then linked to the copy up
upper path.

The index entry remains linked for future lower hardlink copy up and for
lower to upper inode map, that is needed for exporting overlayfs to NFS.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 125 ++++++++++++++++++++++++++++++++---------
 fs/overlayfs/inode.c   |  13 ++---
 fs/overlayfs/util.c    |   2 +-
 3 files changed, 103 insertions(+), 37 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 0d9de353f42bc..9f5a47338e59b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -316,6 +316,29 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	return err;
 }
 
+static int ovl_link_up(struct dentry *parent, struct dentry *dentry)
+{
+	int err;
+	struct dentry *upper;
+	struct dentry *upperdir = ovl_dentry_upper(parent);
+	struct inode *udir = d_inode(upperdir);
+
+	inode_lock_nested(udir, I_MUTEX_PARENT);
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (!IS_ERR(upper)) {
+		err = ovl_do_link(ovl_dentry_upper(dentry), udir, upper, true);
+		dput(upper);
+
+		if (!err)
+			ovl_dentry_set_upper_alias(dentry);
+	}
+	inode_unlock(udir);
+
+	return err;
+}
+
 struct ovl_copy_up_ctx {
 	struct dentry *parent;
 	struct dentry *dentry;
@@ -323,9 +346,11 @@ struct ovl_copy_up_ctx {
 	struct kstat stat;
 	struct kstat pstat;
 	const char *link;
-	struct dentry *upperdir;
+	struct dentry *destdir;
+	struct qstr destname;
 	struct dentry *workdir;
 	bool tmpfile;
+	bool origin;
 };
 
 static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
@@ -333,10 +358,9 @@ static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
 {
 	int err;
 	struct dentry *upper;
-	struct inode *udir = d_inode(c->upperdir);
+	struct inode *udir = d_inode(c->destdir);
 
-	upper = lookup_one_len(c->dentry->d_name.name, c->upperdir,
-			       c->dentry->d_name.len);
+	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
 	if (IS_ERR(upper))
 		return PTR_ERR(upper);
 
@@ -345,11 +369,8 @@ static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
 	else
 		err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
 
-	/* Restore timestamps on parent (best effort) */
-	if (!err) {
-		ovl_set_timestamps(c->upperdir, &c->pstat);
+	if (!err)
 		*newdentry = dget(c->tmpfile ? upper : temp);
-	}
 	dput(upper);
 
 	return err;
@@ -439,7 +460,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 	 * Don't set origin when we are breaking the association with a lower
 	 * hard link.
 	 */
-	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1) {
+	if (c->origin) {
 		err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
 		if (err)
 			return err;
@@ -450,7 +471,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 
 static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
 {
-	struct inode *udir = c->upperdir->d_inode;
+	struct inode *udir = c->destdir->d_inode;
 	struct dentry *newdentry = NULL;
 	struct dentry *temp = NULL;
 	int err;
@@ -473,7 +494,6 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
 	if (err)
 		goto out_cleanup;
 
-	ovl_dentry_set_upper_alias(c->dentry);
 	ovl_inode_update(d_inode(c->dentry), newdentry);
 out:
 	dput(temp);
@@ -498,24 +518,57 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 {
 	int err;
 	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+	bool indexed = false;
 
-	/* Mark parent "impure" because it may now contain non-pure upper */
-	err = ovl_set_impure(c->parent, c->upperdir);
-	if (err)
-		return err;
+	if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
+	    c->stat.nlink > 1)
+		indexed = true;
+
+	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
+		c->origin = true;
+
+	if (indexed) {
+		c->destdir = ovl_indexdir(c->dentry->d_sb);
+		err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+		if (err)
+			return err;
+	} else {
+		/*
+		 * Mark parent "impure" because it may now contain non-pure
+		 * upper
+		 */
+		err = ovl_set_impure(c->parent, c->destdir);
+		if (err)
+			return err;
+	}
 
 	/* Should we copyup with O_TMPFILE or with workdir? */
 	if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
 		c->tmpfile = true;
-		return  ovl_copy_up_locked(c);
+		err = ovl_copy_up_locked(c);
+	} else {
+		err = -EIO;
+		if (lock_rename(c->workdir, c->destdir) != NULL) {
+			pr_err("overlayfs: failed to lock workdir+upperdir\n");
+		} else {
+			err = ovl_copy_up_locked(c);
+			unlock_rename(c->workdir, c->destdir);
+		}
 	}
 
-	err = -EIO;
-	if (lock_rename(c->workdir, c->upperdir) != NULL) {
-		pr_err("overlayfs: failed to lock workdir+upperdir\n");
-	} else {
-		err = ovl_copy_up_locked(c);
-		unlock_rename(c->workdir, c->upperdir);
+	if (indexed) {
+		if (!err)
+			ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+		kfree(c->destname.name);
+	} else if (!err) {
+		struct inode *udir = d_inode(c->destdir);
+
+		/* Restore timestamps on parent (best effort) */
+		inode_lock(udir);
+		ovl_set_timestamps(c->destdir, &c->pstat);
+		inode_unlock(udir);
+
+		ovl_dentry_set_upper_alias(c->dentry);
 	}
 
 	return err;
@@ -543,7 +596,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 		return err;
 
 	ovl_path_upper(parent, &parentpath);
-	ctx.upperdir = parentpath.dentry;
+	ctx.destdir = parentpath.dentry;
+	ctx.destname = dentry->d_name;
 
 	err = vfs_getattr(&parentpath, &ctx.pstat,
 			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
@@ -567,7 +621,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 		if (err > 0)
 			err = 0;
 	} else {
-		err = ovl_do_copy_up(&ctx);
+		if (!ovl_dentry_upper(dentry))
+			err = ovl_do_copy_up(&ctx);
+		if (!err && !ovl_dentry_has_upper_alias(dentry))
+			err = ovl_link_up(parent, dentry);
 		ovl_copy_up_end(dentry);
 	}
 	do_delayed_call(&done);
@@ -583,9 +640,22 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 	while (!err) {
 		struct dentry *next;
 		struct dentry *parent;
-		enum ovl_path_type type = ovl_path_type(dentry);
 
-		if (OVL_TYPE_UPPER(type))
+		/*
+		 * Check if copy-up has happened as well as for upper alias (in
+		 * case of hard links) is there.
+		 *
+		 * Both checks are lockless:
+		 *  - false negatives: will recheck under oi->lock
+		 *  - false positives:
+		 *    + ovl_dentry_upper() uses memory barriers to ensure the
+		 *      upper dentry is up-to-date
+		 *    + ovl_dentry_has_upper_alias() relies on locking of
+		 *      upper parent i_rwsem to prevent reordering copy-up
+		 *      with rename.
+		 */
+		if (ovl_dentry_upper(dentry) &&
+		    ovl_dentry_has_upper_alias(dentry))
 			break;
 
 		next = dget(dentry);
@@ -593,8 +663,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 		for (;;) {
 			parent = dget_parent(next);
 
-			type = ovl_path_type(parent);
-			if (OVL_TYPE_UPPER(type))
+			if (ovl_dentry_upper(parent))
 				break;
 
 			dput(next);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index d9fe07defca33..44d262a0a77ed 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -305,13 +305,13 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
-				  struct dentry *realdentry)
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
 {
-	if (OVL_TYPE_UPPER(type))
+	if (ovl_dentry_upper(dentry) &&
+	    ovl_dentry_has_upper_alias(dentry))
 		return false;
 
-	if (special_file(realdentry->d_inode->i_mode))
+	if (special_file(d_inode(dentry)->i_mode))
 		return false;
 
 	if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
@@ -323,11 +323,8 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
 int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
 {
 	int err = 0;
-	struct path realpath;
-	enum ovl_path_type type;
 
-	type = ovl_path_real(dentry, &realpath);
-	if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
+	if (ovl_open_need_copy_up(dentry, file_flags)) {
 		err = ovl_want_write(dentry);
 		if (!err) {
 			err = ovl_copy_up_flags(dentry, file_flags);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 38fa75228c66e..a290be449b8b6 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -302,7 +302,7 @@ int ovl_copy_up_start(struct dentry *dentry)
 	int err;
 
 	err = mutex_lock_interruptible(&oi->lock);
-	if (!err && ovl_dentry_upper(dentry)) {
+	if (!err && ovl_dentry_has_upper_alias(dentry)) {
 		err = 1; /* Already copied up */
 		mutex_unlock(&oi->lock);
 	}
-- 
GitLab


From 5f8415d6b87ecb4ebf1bbd02c538694ebb7fb57c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 20 Jun 2017 15:35:14 +0300
Subject: [PATCH 0693/1958] ovl: persistent overlay inode nlink for indexed
 inodes

With inodes index enabled, an overlay inode nlink counts the union of upper
and non-covered lower hardlinks. During the lifetime of a non-pure upper
inode, the following nlink modifying operations can happen:

1. Lower hardlink copy up
2. Upper hardlink created, unlinked or renamed over
3. Lower hardlink whiteout or renamed over

For the first, copy up case, the union nlink does not change, whether the
operation succeeds or fails, but the upper inode nlink may change.
Therefore, before copy up, we store the union nlink value relative to the
lower inode nlink in the index inode xattr trusted.overlay.nlink.

For the second, upper hardlink case, the union nlink should be incremented
or decremented IFF the operation succeeds, aligned with nlink change of the
upper inode. Therefore, before link/unlink/rename, we store the union nlink
value relative to the upper inode nlink in the index inode.

For the last, lower cover up case, we simplify things by preceding the
whiteout or cover up with copy up. This makes sure that there is an index
upper inode where the nlink xattr can be stored before the copied up upper
entry is unlink.

Return the overlay inode nlinks for indexed upper inodes on stat(2).

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   |   5 ++
 fs/overlayfs/dir.c       |  19 ++++++-
 fs/overlayfs/inode.c     | 112 ++++++++++++++++++++++++++++++++++++++-
 fs/overlayfs/overlayfs.h |   5 ++
 fs/overlayfs/util.c      |  66 +++++++++++++++++++++++
 5 files changed, 204 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 9f5a47338e59b..f193976560dee 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -323,6 +323,10 @@ static int ovl_link_up(struct dentry *parent, struct dentry *dentry)
 	struct dentry *upperdir = ovl_dentry_upper(parent);
 	struct inode *udir = d_inode(upperdir);
 
+	err = ovl_set_nlink_lower(dentry);
+	if (err)
+		return err;
+
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 	upper = lookup_one_len(dentry->d_name.name, upperdir,
 			       dentry->d_name.len);
@@ -335,6 +339,7 @@ static int ovl_link_up(struct dentry *parent, struct dentry *dentry)
 			ovl_dentry_set_upper_alias(dentry);
 	}
 	inode_unlock(udir);
+	ovl_set_nlink_upper(dentry);
 
 	return err;
 }
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 8b2b23181b19c..641d9ee97f91f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -591,6 +591,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 		    struct dentry *new)
 {
 	int err;
+	bool locked = false;
 	struct inode *inode;
 
 	err = ovl_want_write(old);
@@ -601,6 +602,10 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	if (err)
 		goto out_drop_write;
 
+	err = ovl_nlink_start(old, &locked);
+	if (err)
+		goto out_drop_write;
+
 	inode = d_inode(old);
 	ihold(inode);
 
@@ -608,6 +613,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	if (err)
 		iput(inode);
 
+	ovl_nlink_end(old, locked);
 out_drop_write:
 	ovl_drop_write(old);
 out:
@@ -743,8 +749,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
 
 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 {
-	enum ovl_path_type type;
 	int err;
+	bool locked = false;
 	const struct cred *old_cred;
 
 	err = ovl_want_write(dentry);
@@ -755,7 +761,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 	if (err)
 		goto out_drop_write;
 
-	type = ovl_path_type(dentry);
+	err = ovl_nlink_start(dentry, &locked);
+	if (err)
+		goto out_drop_write;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	if (!ovl_lower_positive(dentry))
@@ -769,6 +777,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 		else
 			drop_nlink(dentry->d_inode);
 	}
+	ovl_nlink_end(dentry, locked);
 out_drop_write:
 	ovl_drop_write(dentry);
 out:
@@ -891,6 +900,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		      unsigned int flags)
 {
 	int err;
+	bool locked = false;
 	struct dentry *old_upperdir;
 	struct dentry *new_upperdir;
 	struct dentry *olddentry;
@@ -934,6 +944,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		err = ovl_copy_up(new);
 		if (err)
 			goto out_drop_write;
+	} else {
+		err = ovl_nlink_start(new, &locked);
+		if (err)
+			goto out_drop_write;
 	}
 
 	old_cred = ovl_override_creds(old->d_sb);
@@ -1072,6 +1086,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	unlock_rename(new_upperdir, old_upperdir);
 out_revert_creds:
 	revert_creds(old_cred);
+	ovl_nlink_end(new, locked);
 out_drop_write:
 	ovl_drop_write(old);
 out:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 44d262a0a77ed..196a4e5450c07 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -12,6 +12,7 @@
 #include <linux/cred.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
+#include <linux/ratelimit.h>
 #include "overlayfs.h"
 
 int ovl_setattr(struct dentry *dentry, struct iattr *attr)
@@ -130,6 +131,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 	if (is_dir && OVL_TYPE_MERGE(type))
 		stat->nlink = 1;
 
+	/*
+	 * Return the overlay inode nlinks for indexed upper inodes.
+	 * Overlay inode nlink counts the union of the upper hardlinks
+	 * and non-covered lower hardlinks. It does not include the upper
+	 * index hardlink.
+	 */
+	if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		stat->nlink = dentry->d_inode->i_nlink;
+
 out:
 	revert_creds(old_cred);
 
@@ -442,6 +452,103 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
 	}
 }
 
+/*
+ * With inodes index enabled, an overlay inode nlink counts the union of upper
+ * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
+ * upper inode, the following nlink modifying operations can happen:
+ *
+ * 1. Lower hardlink copy up
+ * 2. Upper hardlink created, unlinked or renamed over
+ * 3. Lower hardlink whiteout or renamed over
+ *
+ * For the first, copy up case, the union nlink does not change, whether the
+ * operation succeeds or fails, but the upper inode nlink may change.
+ * Therefore, before copy up, we store the union nlink value relative to the
+ * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ *
+ * For the second, upper hardlink case, the union nlink should be incremented
+ * or decremented IFF the operation succeeds, aligned with nlink change of the
+ * upper inode. Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in the index inode.
+ *
+ * For the last, lower cover up case, we simplify things by preceding the
+ * whiteout or cover up with copy up. This makes sure that there is an index
+ * upper inode where the nlink xattr can be stored before the copied up upper
+ * entry is unlink.
+ */
+#define OVL_NLINK_ADD_UPPER	(1 << 0)
+
+/*
+ * On-disk format for indexed nlink:
+ *
+ * nlink relative to the upper inode - "U[+-]NUM"
+ * nlink relative to the lower inode - "L[+-]NUM"
+ */
+
+static int ovl_set_nlink_common(struct dentry *dentry,
+				struct dentry *realdentry, const char *format)
+{
+	struct inode *inode = d_inode(dentry);
+	struct inode *realinode = d_inode(realdentry);
+	char buf[13];
+	int len;
+
+	len = snprintf(buf, sizeof(buf), format,
+		       (int) (inode->i_nlink - realinode->i_nlink));
+
+	return ovl_do_setxattr(ovl_dentry_upper(dentry),
+			       OVL_XATTR_NLINK, buf, len, 0);
+}
+
+int ovl_set_nlink_upper(struct dentry *dentry)
+{
+	return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
+}
+
+int ovl_set_nlink_lower(struct dentry *dentry)
+{
+	return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
+}
+
+static unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+				  struct dentry *upperdentry,
+				  unsigned int fallback)
+{
+	int nlink_diff;
+	int nlink;
+	char buf[13];
+	int err;
+
+	if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
+		return fallback;
+
+	err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
+	if (err < 0)
+		goto fail;
+
+	buf[err] = '\0';
+	if ((buf[0] != 'L' && buf[0] != 'U') ||
+	    (buf[1] != '+' && buf[1] != '-'))
+		goto fail;
+
+	err = kstrtoint(buf + 1, 10, &nlink_diff);
+	if (err < 0)
+		goto fail;
+
+	nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink;
+	nlink += nlink_diff;
+
+	if (nlink <= 0)
+		goto fail;
+
+	return nlink;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+			    upperdentry, err);
+	return fallback;
+}
+
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
 {
 	struct inode *inode;
@@ -495,6 +602,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
 	if (!S_ISDIR(realinode->i_mode) &&
 	    (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) {
 		struct inode *key = d_inode(lowerdentry ?: upperdentry);
+		unsigned int nlink;
 
 		inode = iget5_locked(dentry->d_sb, (unsigned long) key,
 				     ovl_inode_test, ovl_inode_set, key);
@@ -515,7 +623,9 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
 			goto out;
 		}
 
-		set_nlink(inode, realinode->i_nlink);
+		nlink = ovl_get_nlink(lowerdentry, upperdentry,
+				      realinode->i_nlink);
+		set_nlink(inode, nlink);
 	} else {
 		inode = new_inode(dentry->d_sb);
 		if (!inode)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 751b36a5c22f2..c1321ab382247 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -25,6 +25,7 @@ enum ovl_path_type {
 #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
 #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
 #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
+#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
 
 enum ovl_flag {
 	OVL_IMPURE,
@@ -229,6 +230,8 @@ void ovl_set_flag(unsigned long flag, struct inode *inode);
 bool ovl_test_flag(unsigned long flag, struct inode *inode);
 bool ovl_inuse_trylock(struct dentry *dentry);
 void ovl_inuse_unlock(struct dentry *dentry);
+int ovl_nlink_start(struct dentry *dentry, bool *locked);
+void ovl_nlink_end(struct dentry *dentry, bool locked);
 
 static inline bool ovl_is_impuredir(struct dentry *dentry)
 {
@@ -258,6 +261,8 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
 			 struct path *lowerstack, unsigned int numlower);
 
 /* inode.c */
+int ovl_set_nlink_upper(struct dentry *dentry);
+int ovl_set_nlink_lower(struct dentry *dentry);
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
 int ovl_getattr(const struct path *path, struct kstat *stat,
 		u32 request_mask, unsigned int flags);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index a290be449b8b6..04d5018e728ea 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -410,3 +410,69 @@ void ovl_inuse_unlock(struct dentry *dentry)
 		spin_unlock(&inode->i_lock);
 	}
 }
+
+/*
+ * Operations that change overlay inode and upper inode nlink need to be
+ * synchronized with copy up for persistent nlink accounting.
+ */
+int ovl_nlink_start(struct dentry *dentry, bool *locked)
+{
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
+	const struct cred *old_cred;
+	int err;
+
+	if (!d_inode(dentry) || d_is_dir(dentry))
+		return 0;
+
+	/*
+	 * With inodes index is enabled, we store the union overlay nlink
+	 * in an xattr on the index inode. When whiting out lower hardlinks
+	 * we need to decrement the overlay persistent nlink, but before the
+	 * first copy up, we have no upper index inode to store the xattr.
+	 *
+	 * As a workaround, before whiteout/rename over of a lower hardlink,
+	 * copy up to create the upper index. Creating the upper index will
+	 * initialize the overlay nlink, so it could be dropped if unlink
+	 * or rename succeeds.
+	 *
+	 * TODO: implement metadata only index copy up when called with
+	 *       ovl_copy_up_flags(dentry, O_PATH).
+	 */
+	if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) &&
+	    d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			return err;
+	}
+
+	err = mutex_lock_interruptible(&oi->lock);
+	if (err)
+		return err;
+
+	if (!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		goto out;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	/*
+	 * The overlay inode nlink should be incremented/decremented IFF the
+	 * upper operation succeeds, along with nlink change of upper inode.
+	 * Therefore, before link/unlink/rename, we store the union nlink
+	 * value relative to the upper inode nlink in an upper inode xattr.
+	 */
+	err = ovl_set_nlink_upper(dentry);
+	revert_creds(old_cred);
+
+out:
+	if (err)
+		mutex_unlock(&oi->lock);
+	else
+		*locked = true;
+
+	return err;
+}
+
+void ovl_nlink_end(struct dentry *dentry, bool locked)
+{
+	if (locked)
+		mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+}
-- 
GitLab


From caf70cb2ba5dff85ea90f494a30075af92df13b0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 21 Jun 2017 13:46:12 +0300
Subject: [PATCH 0694/1958] ovl: cleanup orphan index entries

index entry should live only as long as there are upper or lower
hardlinks.

Cleanup orphan index entries on mount and when dropping the last
overlay inode nlink.

When about to cleanup or link up to orphan index and the index inode
nlink > 1, admit that something went wrong and adjust overlay nlink
to index inode nlink - 1 to prevent it from dropping below zero.
This could happen when adding lower hardlinks underneath a mounted
overlay and then trying to unlink them.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c     |  6 ++--
 fs/overlayfs/namei.c     |  5 +++
 fs/overlayfs/overlayfs.h |  3 ++
 fs/overlayfs/super.c     |  2 +-
 fs/overlayfs/util.c      | 66 +++++++++++++++++++++++++++++++++++++++-
 5 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 196a4e5450c07..69f4fc26ee398 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -510,9 +510,9 @@ int ovl_set_nlink_lower(struct dentry *dentry)
 	return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
 }
 
-static unsigned int ovl_get_nlink(struct dentry *lowerdentry,
-				  struct dentry *upperdentry,
-				  unsigned int fallback)
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+			   struct dentry *upperdentry,
+			   unsigned int fallback)
 {
 	int nlink_diff;
 	int nlink;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 2d8b6292fe21e..9bc0e580a5b3f 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -425,6 +425,11 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack,
 	if (err)
 		goto fail;
 
+	/* Check if index is orphan and don't warn before cleaning it */
+	if (d_inode(index)->i_nlink == 1 &&
+	    ovl_get_nlink(index, origin.dentry, 0) == 0)
+		err = -ENOENT;
+
 	dput(origin.dentry);
 out:
 	kfree(fh);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index c1321ab382247..60d26605e039e 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -263,6 +263,9 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
 /* inode.c */
 int ovl_set_nlink_upper(struct dentry *dentry);
 int ovl_set_nlink_lower(struct dentry *dentry);
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+			   struct dentry *upperdentry,
+			   unsigned int fallback);
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
 int ovl_getattr(const struct path *path, struct kstat *stat,
 		u32 request_mask, unsigned int flags);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index f29ee08cf99f8..44dc2d6ffe0f0 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1069,7 +1069,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			if (err)
 				pr_err("overlayfs: failed to verify index dir origin\n");
 
-			/* Cleanup bad/stale index entries */
+			/* Cleanup bad/stale/orphan index entries */
 			if (!err)
 				err = ovl_indexdir_cleanup(ufs->indexdir,
 							   ufs->upper_mnt,
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 04d5018e728ea..c492ba75c6595 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -14,6 +14,8 @@
 #include <linux/xattr.h>
 #include <linux/exportfs.h>
 #include <linux/uuid.h>
+#include <linux/namei.h>
+#include <linux/ratelimit.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -411,6 +413,58 @@ void ovl_inuse_unlock(struct dentry *dentry)
 	}
 }
 
+/* Called must hold OVL_I(inode)->oi_lock */
+static void ovl_cleanup_index(struct dentry *dentry)
+{
+	struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
+	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+	struct dentry *upperdentry = ovl_dentry_upper(dentry);
+	struct dentry *index = NULL;
+	struct inode *inode;
+	struct qstr name;
+	int err;
+
+	err = ovl_get_index_name(lowerdentry, &name);
+	if (err)
+		goto fail;
+
+	inode = d_inode(upperdentry);
+	if (inode->i_nlink != 1) {
+		pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+				    upperdentry, inode->i_ino, inode->i_nlink);
+		/*
+		 * We either have a bug with persistent union nlink or a lower
+		 * hardlink was added while overlay is mounted. Adding a lower
+		 * hardlink and then unlinking all overlay hardlinks would drop
+		 * overlay nlink to zero before all upper inodes are unlinked.
+		 * As a safety measure, when that situation is detected, set
+		 * the overlay nlink to the index inode nlink minus one for the
+		 * index entry itself.
+		 */
+		set_nlink(d_inode(dentry), inode->i_nlink - 1);
+		ovl_set_nlink_upper(dentry);
+		goto out;
+	}
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	/* TODO: whiteout instead of cleanup to block future open by handle */
+	index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len);
+	err = PTR_ERR(index);
+	if (!IS_ERR(index))
+		err = ovl_cleanup(dir, index);
+	inode_unlock(dir);
+	if (err)
+		goto fail;
+
+out:
+	dput(index);
+	return;
+
+fail:
+	pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
+	goto out;
+}
+
 /*
  * Operations that change overlay inode and upper inode nlink need to be
  * synchronized with copy up for persistent nlink accounting.
@@ -473,6 +527,16 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
 
 void ovl_nlink_end(struct dentry *dentry, bool locked)
 {
-	if (locked)
+	if (locked) {
+		if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
+		    d_inode(dentry)->i_nlink == 0) {
+			const struct cred *old_cred;
+
+			old_cred = ovl_override_creds(dentry->d_sb);
+			ovl_cleanup_index(dentry);
+			revert_creds(old_cred);
+		}
+
 		mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+	}
 }
-- 
GitLab


From 9412812ef54861081904f24ddaf176b957b98d40 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 25 May 2017 15:08:24 +0300
Subject: [PATCH 0695/1958] ovl: document copying layers restrictions with
 inodes index

The inodes index feature introduces a behavior change - on mount,
upper root origin file handle is verified to match the lower root dir.
This implies that copied layers cannot be mounted with the inodes index
feature enabled, without explicitly removing the upper dir origin xattr
and the index dir.

The inodes index feature is required to support:
- Prevent breaking hardlinks on copy up
- NFS export support (upcoming)
- Overlayfs snapshots (POC)

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/overlayfs.txt | 34 +++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index c9e884b526980..36f528a7fdd64 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -201,6 +201,40 @@ rightmost one and going left.  In the above example lower1 will be the
 top, lower2 the middle and lower3 the bottom layer.
 
 
+Sharing and copying layers
+--------------------------
+
+Lower layers may be shared among several overlay mounts and that is indeed
+a very common practice.  An overlay mount may use the same lower layer
+path as another overlay mount and it may use a lower layer path that is
+beneath or above the path of another overlay lower layer path.
+
+Using an upper layer path and/or a workdir path that are already used by
+another overlay mount is not allowed and will fail with EBUSY.  Using
+partially overlapping paths is not allowed but will not fail with EBUSY.
+
+Mounting an overlay using an upper layer path, where the upper layer path
+was previously used by another mounted overlay in combination with a
+different lower layer path, is allowed, unless the "inodes index" feature
+is enabled.
+
+With the "inodes index" feature, on the first time mount, an NFS file
+handle of the lower layer root directory, along with the UUID of the lower
+filesystem, are encoded and stored in the "trusted.overlay.origin" extended
+attribute on the upper layer root directory.  On subsequent mount attempts,
+the lower root directory file handle and lower filesystem UUID are compared
+to the stored origin in upper root directory.  On failure to verify the
+lower root origin, mount will fail with ESTALE.  An overlayfs mount with
+"inodes index" enabled will fail with EOPNOTSUPP if the lower filesystem
+does not support NFS export, lower filesystem does not have a valid UUID or
+if the upper filesystem does not support extended attributes.
+
+It is quite a common practice to copy overlay layers to a different
+directory tree on the same or different underlying filesystem, and even
+to a different machine.  With the "inodes index" feature, trying to mount
+the copied layers will fail the verification of the lower root file handle.
+
+
 Non-standard behavior
 ---------------------
 
-- 
GitLab


From f4439de118283159ff165e52036134a278ebf990 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 4 Jul 2017 22:04:06 +0300
Subject: [PATCH 0696/1958] ovl: mark parent impure and restore timestamp on
 ovl_link_up()

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/overlayfs/copy_up.c | 57 ++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f193976560dee..acb6f97deb97c 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -316,48 +316,57 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	return err;
 }
 
-static int ovl_link_up(struct dentry *parent, struct dentry *dentry)
+struct ovl_copy_up_ctx {
+	struct dentry *parent;
+	struct dentry *dentry;
+	struct path lowerpath;
+	struct kstat stat;
+	struct kstat pstat;
+	const char *link;
+	struct dentry *destdir;
+	struct qstr destname;
+	struct dentry *workdir;
+	bool tmpfile;
+	bool origin;
+};
+
+static int ovl_link_up(struct ovl_copy_up_ctx *c)
 {
 	int err;
 	struct dentry *upper;
-	struct dentry *upperdir = ovl_dentry_upper(parent);
+	struct dentry *upperdir = ovl_dentry_upper(c->parent);
 	struct inode *udir = d_inode(upperdir);
 
-	err = ovl_set_nlink_lower(dentry);
+	/* Mark parent "impure" because it may now contain non-pure upper */
+	err = ovl_set_impure(c->parent, upperdir);
+	if (err)
+		return err;
+
+	err = ovl_set_nlink_lower(c->dentry);
 	if (err)
 		return err;
 
 	inode_lock_nested(udir, I_MUTEX_PARENT);
-	upper = lookup_one_len(dentry->d_name.name, upperdir,
-			       dentry->d_name.len);
+	upper = lookup_one_len(c->dentry->d_name.name, upperdir,
+			       c->dentry->d_name.len);
 	err = PTR_ERR(upper);
 	if (!IS_ERR(upper)) {
-		err = ovl_do_link(ovl_dentry_upper(dentry), udir, upper, true);
+		err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
+				  true);
 		dput(upper);
 
-		if (!err)
-			ovl_dentry_set_upper_alias(dentry);
+		if (!err) {
+			/* Restore timestamps on parent (best effort) */
+			ovl_set_timestamps(upperdir, &c->pstat);
+			ovl_dentry_set_upper_alias(c->dentry);
+		}
 	}
 	inode_unlock(udir);
-	ovl_set_nlink_upper(dentry);
+	ovl_set_nlink_upper(c->dentry);
 
 	return err;
 }
 
-struct ovl_copy_up_ctx {
-	struct dentry *parent;
-	struct dentry *dentry;
-	struct path lowerpath;
-	struct kstat stat;
-	struct kstat pstat;
-	const char *link;
-	struct dentry *destdir;
-	struct qstr destname;
-	struct dentry *workdir;
-	bool tmpfile;
-	bool origin;
-};
-
 static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
 			    struct dentry **newdentry)
 {
@@ -629,7 +638,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 		if (!ovl_dentry_upper(dentry))
 			err = ovl_do_copy_up(&ctx);
 		if (!err && !ovl_dentry_has_upper_alias(dentry))
-			err = ovl_link_up(parent, dentry);
+			err = ovl_link_up(&ctx);
 		ovl_copy_up_end(dentry);
 	}
 	do_delayed_call(&done);
-- 
GitLab


From 26408b24345b1afbc08727e7a899d59db0d6f21d Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Fri, 30 Jun 2017 17:39:05 +0530
Subject: [PATCH 0697/1958] ACPI / power: constify attribute_group structures

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work with const
attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   4622	    304	      8	   4934	   1346	drivers/acpi/power.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   4846	     80	      8	   4934	   1346	drivers/acpi/power.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/power.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 3a6c9b741b233..1b475bc1ae169 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -352,7 +352,7 @@ static struct attribute *attrs[] = {
 	NULL,
 };
 
-static struct attribute_group attr_groups[] = {
+static const struct attribute_group attr_groups[] = {
 	[ACPI_STATE_D0] = {
 		.name = "power_resources_D0",
 		.attrs = attrs,
@@ -371,14 +371,14 @@ static struct attribute_group attr_groups[] = {
 	},
 };
 
-static struct attribute_group wakeup_attr_group = {
+static const struct attribute_group wakeup_attr_group = {
 	.name = "power_resources_wakeup",
 	.attrs = attrs,
 };
 
 static void acpi_power_hide_list(struct acpi_device *adev,
 				 struct list_head *resources,
-				 struct attribute_group *attr_group)
+				 const struct attribute_group *attr_group)
 {
 	struct acpi_power_resource_entry *entry;
 
@@ -397,7 +397,7 @@ static void acpi_power_hide_list(struct acpi_device *adev,
 
 static void acpi_power_expose_list(struct acpi_device *adev,
 				   struct list_head *resources,
-				   struct attribute_group *attr_group)
+				   const struct attribute_group *attr_group)
 {
 	struct acpi_power_resource_entry *entry;
 	int ret;
@@ -425,7 +425,7 @@ static void acpi_power_expose_list(struct acpi_device *adev,
 
 static void acpi_power_expose_hide(struct acpi_device *adev,
 				   struct list_head *resources,
-				   struct attribute_group *attr_group,
+				   const struct attribute_group *attr_group,
 				   bool expose)
 {
 	if (expose)
-- 
GitLab


From 7e53626995d44c58ca148bf56e792d1bdd982e0b Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Fri, 30 Jun 2017 18:06:34 +0530
Subject: [PATCH 0698/1958] ACPI: BGRT: constify attribute_group structures

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work with const
attribute_group. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bgrt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c
index df1c629205e7b..75af78361ce5b 100644
--- a/drivers/acpi/bgrt.c
+++ b/drivers/acpi/bgrt.c
@@ -76,7 +76,7 @@ static struct bin_attribute *bgrt_bin_attributes[] = {
 	NULL,
 };
 
-static struct attribute_group bgrt_attribute_group = {
+static const struct attribute_group bgrt_attribute_group = {
 	.attrs = bgrt_attributes,
 	.bin_attrs = bgrt_bin_attributes,
 };
-- 
GitLab


From 31945d0eb400f793feff61423db71cb6d866715a Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Fri, 30 Jun 2017 18:23:50 +0530
Subject: [PATCH 0699/1958] ACPI / LPSS: constify attribute_group structures

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work with const
attribute_group. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 10347e3d73ad0..e51a1e98e62f4 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -576,7 +576,7 @@ static struct attribute *lpss_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group lpss_attr_group = {
+static const struct attribute_group lpss_attr_group = {
 	.attrs = lpss_attrs,
 	.name = "lpss_ltr",
 };
-- 
GitLab


From 9e4de6a8eee204af51d9e6a4d87f54c7b8b94f11 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Fri, 30 Jun 2017 18:32:49 +0530
Subject: [PATCH 0700/1958] ACPI / DPTF: constify attribute_group structures

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work with const
attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
    904	    496	      0	   1400	    578	drivers/acpi/dptf/dptf_power.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
    944	    432	      0	   1376	    560	drivers/acpi/dptf/dptf_power.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/dptf/dptf_power.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c
index 734642dc5008a..e1c242568341d 100644
--- a/drivers/acpi/dptf/dptf_power.c
+++ b/drivers/acpi/dptf/dptf_power.c
@@ -65,7 +65,7 @@ static struct attribute *dptf_power_attrs[] = {
 	NULL
 };
 
-static struct attribute_group dptf_power_attribute_group = {
+static const struct attribute_group dptf_power_attribute_group = {
 	.attrs = dptf_power_attrs,
 	.name = "dptf_power"
 };
-- 
GitLab


From ea4d12dabf872b496218cdc8e7874feef8676cdd Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Wed, 5 Jul 2017 02:14:10 +0800
Subject: [PATCH 0701/1958] bio-integrity: fix boolreturn.cocci warnings

block/bio-integrity.c:318:10-11: WARNING: return of 0/1 in function 'bio_integrity_prep' with return type bool

 Return statements in functions returning bool should use
 true/false instead of 1/0.
Generated by: scripts/coccinelle/misc/boolreturn.cocci

Fixes: e23947bd76f0 ("bio-integrity: fold bio_integrity_enabled to bio_integrity_prep")
CC: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index f733beab6ca2b..83e92beb3c9fe 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -316,7 +316,7 @@ bool bio_integrity_prep(struct bio *bio)
 					     bytes, offset);
 
 		if (ret == 0)
-			return 0;
+			return false;
 
 		if (ret < bytes)
 			break;
-- 
GitLab


From 8d1630f13754f1435d3ea7078829121c52f38d15 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Tue, 23 May 2017 13:37:05 +0100
Subject: [PATCH 0702/1958] MIPS: MIPS16e2: Identify ASE presence

Identify the presence of the MIPS16e2 ASE as per the architecture
specification[1], by checking for CP0 Config5.CA2 bit being 1[2].

References:

[1] "MIPS32 Architecture for Programmers: MIPS16e2 Application-Specific
    Extension Technical Reference Manual", Imagination Technologies
    Ltd., Document Number: MD01172, Revision 01.00, April 26, 2016,
    Section 1.2 "Software Detection of the ASE", p. 5

[2] "MIPS32 interAptiv Multiprocessing System Software User's Manual",
    Imagination Technologies Ltd., Document Number: MD00904, Revision
    02.01, June 15, 2016, Section 2.2.1.6 "Device Configuration 5 --
    Config5 (CP0 Register 16, Select 5)", pp. 71-72

Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16094/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cpu-features.h | 3 +++
 arch/mips/include/asm/cpu.h          | 1 +
 arch/mips/include/asm/mipsregs.h     | 1 +
 arch/mips/kernel/cpu-probe.c         | 2 ++
 4 files changed, 7 insertions(+)

diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index d6ea8e7c51079..8baa9033b181d 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -138,6 +138,9 @@
 #ifndef cpu_has_mips16
 #define cpu_has_mips16		(cpu_data[0].ases & MIPS_ASE_MIPS16)
 #endif
+#ifndef cpu_has_mips16e2
+#define cpu_has_mips16e2	(cpu_data[0].ases & MIPS_ASE_MIPS16E2)
+#endif
 #ifndef cpu_has_mdmx
 #define cpu_has_mdmx		(cpu_data[0].ases & MIPS_ASE_MDMX)
 #endif
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index ce798594c868a..d0c152b989f8e 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -436,5 +436,6 @@ enum cpu_type_enum {
 #define MIPS_ASE_VZ		0x00000080 /* Virtualization ASE */
 #define MIPS_ASE_MSA		0x00000100 /* MIPS SIMD Architecture */
 #define MIPS_ASE_DSP3		0x00000200 /* Signal Processing ASE Rev 3*/
+#define MIPS_ASE_MIPS16E2	0x00000400 /* MIPS16e2 */
 
 #endif /* _ASM_CPU_H */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 6875b69f59f76..dbb0eceda2c6d 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -652,6 +652,7 @@
 #define MIPS_CONF5_SBRI		(_ULCAST_(1) << 6)
 #define MIPS_CONF5_FRE		(_ULCAST_(1) << 8)
 #define MIPS_CONF5_UFE		(_ULCAST_(1) << 9)
+#define MIPS_CONF5_CA2		(_ULCAST_(1) << 14)
 #define MIPS_CONF5_MSAEN	(_ULCAST_(1) << 27)
 #define MIPS_CONF5_EVA		(_ULCAST_(1) << 28)
 #define MIPS_CONF5_CV		(_ULCAST_(1) << 29)
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 3f0d43ce994ab..d08afc7dc5072 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -845,6 +845,8 @@ static inline unsigned int decode_config5(struct cpuinfo_mips *c)
 		c->options |= MIPS_CPU_MVH;
 	if (cpu_has_mips_r6 && (config5 & MIPS_CONF5_VP))
 		c->options |= MIPS_CPU_VP;
+	if (config5 & MIPS_CONF5_CA2)
+		c->ases |= MIPS_ASE_MIPS16E2;
 
 	return config5 & MIPS_CONF_M;
 }
-- 
GitLab


From f3235d32075137e277a2e4ea0d7cef2b59480f4a Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Tue, 23 May 2017 13:38:19 +0100
Subject: [PATCH 0703/1958] MIPS: MIPS16e2: Subdecode extended LWSP/SWSP
 instructions

Implement extended LWSP/SWSP instruction subdecoding for the purpose of
unaligned GP-relative memory access emulation.

With the introduction of the MIPS16e2 ASE[1] the previously must-be-zero
3-bit field at bits 7..5 of the extended encodings of the instructions
selected with the LWSP and SWSP major opcodes has become a `sel' field,
acting as an opcode extension for additional operations.  In both cases
the `sel' value of 0 has retained the original operation, that is:

	LW	rx, offset(sp)

and:

	SW	rx, offset(sp)

for LWSP and SWSP respectively.  In hardware predating the MIPS16e2 ASE
other values may or may not have been decoded, architecturally yielding
unpredictable results, and in our unaligned memory access emulation we
have treated the 3-bit field as a don't-care, that is effectively making
all the possible encodings of the field alias to the architecturally
defined encoding of 0.

For the non-zero values of the `sel' field the MIPS16e2 ASE has in
particular defined these GP-relative operations:

	LW	rx, offset(gp)		# sel = 1
	LH	rx, offset(gp)		# sel = 2
	LHU	rx, offset(gp)		# sel = 4

and

	SW	rx, offset(gp)		# sel = 1
	SH	rx, offset(gp)		# sel = 2

for LWSP and SWSP respectively, which will trap with an Address Error
exception if the effective address calculated is not naturally-aligned
for the operation requested.  These operations have been selected for
unaligned access emulation, for consistency with the corresponding
regular MIPS and microMIPS operations.

For other non-zero values of the `sel' field the MIPS16e2 ASE has
defined further operations, which however either never trap with an
Address Error exception, such as LWL or GP-relative SB, or are not
supposed to be emulated, such as LL or SC.  These operations have been
selected to exclude from unaligned access emulation, should an Address
Error exception ever happen with them.

Subdecode the `sel' field in unaligned access emulation then for the
extended encodings of the instructions selected with the LWSP and SWSP
major opcodes, whenever support for the MIPS16e2 ASE has been detected
in hardware, and either emulate the operation requested or send SIGBUS
to the originating process, according to the selection described above.
For hardware implementing the MIPS16 ASE, however lacking MIPS16e2 ASE
support retain the original interpretation of the `sel' field.

The effects of this change are illustrated with the following user
program:

$ cat mips16e2-test.c
#include <inttypes.h>
#include <stdio.h>

int main(void)
{
	int64_t scratch[16] = { 0 };
	int32_t *tmp0, *tmp1, *tmp2;
	int i;

	scratch[0] = 0xc8c7c6c5c4c3c2c1;
	scratch[1] = 0xd0cfcecdcccbcac9;

	asm volatile(
		"move	%0, $sp\n\t"
		"move	%1, $gp\n\t"
		"move	$sp, %4\n\t"
		"addiu	%2, %4, 8\n\t"
		"move	$gp, %2\n\t"

		"lw	%2, 2($sp)\n\t"
		"sw	%2, 16(%4)\n\t"
		"lw	%2, 2($gp)\n\t"
		"sw	%2, 24(%4)\n\t"

		"lw	%2, 1($sp)\n\t"
		"sw	%2, 32(%4)\n\t"
		"lh	%2, 1($gp)\n\t"
		"sw	%2, 40(%4)\n\t"

		"lw	%2, 3($sp)\n\t"
		"sw	%2, 48(%4)\n\t"
		"lhu	%2, 3($gp)\n\t"
		"sw	%2, 56(%4)\n\t"

		"lw	%2, 0(%4)\n\t"
		"sw	%2, 66($sp)\n\t"
		"lw	%2, 8(%4)\n\t"
		"sw	%2, 82($gp)\n\t"

		"lw	%2, 0(%4)\n\t"
		"sw	%2, 97($sp)\n\t"
		"lw	%2, 8(%4)\n\t"
		"sh	%2, 113($gp)\n\t"

		"move	$gp, %1\n\t"
		"move	$sp, %0"
		: "=&d" (tmp0), "=&d" (tmp1), "=&d" (tmp2), "=m" (scratch)
		: "d" (scratch));

	for (i = 0; i < sizeof(scratch) / sizeof(*scratch); i += 2)
		printf("%016" PRIx64 "\t%016" PRIx64 "\n",
		       scratch[i], scratch[i + 1]);

	return 0;
}
$

to be compiled with:

$ gcc -mips16 -mips32r2 -Wa,-mmips16e2 -o mips16e2-test mips16e2-test.c
$

With 74Kf hardware, which does not implement the MIPS16e2 ASE, this
program produces the following output:

$ ./mips16e2-test
c8c7c6c5c4c3c2c1        d0cfcecdcccbcac9
00000000c6c5c4c3        00000000c6c5c4c3
00000000c5c4c3c2        00000000c5c4c3c2
00000000c7c6c5c4        00000000c7c6c5c4
0000c4c3c2c10000        0000000000000000
0000cccbcac90000        0000000000000000
000000c4c3c2c100        0000000000000000
000000cccbcac900        0000000000000000
$

regardless of whether the change has been applied or not.

With the change not applied and interAptive MR2 hardware[2], which does
implement the MIPS16e2 ASE, it produces the following output:

$ ./mips16e2-test
c8c7c6c5c4c3c2c1        d0cfcecdcccbcac9
00000000c6c5c4c3        00000000cecdcccb
00000000c5c4c3c2        00000000cdcccbca
00000000c7c6c5c4        00000000cfcecdcc
0000c4c3c2c10000        0000000000000000
0000000000000000        0000cccbcac90000
000000c4c3c2c100        0000000000000000
0000000000000000        000000cccbcac900
$

which shows that for GP-relative operations the correct trapping address
calculated from $gp has been obtained from the CP0 BadVAddr register and
so has data from the source operand, however masking and extension has
not been applied for halfword operations.

With the change applied and interAptive MR2 hardware the program
produces the following output:

$ ./mips16e2-test
c8c7c6c5c4c3c2c1        d0cfcecdcccbcac9
00000000c6c5c4c3        00000000cecdcccb
00000000c5c4c3c2        00000000ffffcbca
00000000c7c6c5c4        000000000000cdcc
0000c4c3c2c10000        0000000000000000
0000000000000000        0000cccbcac90000
000000c4c3c2c100        0000000000000000
0000000000000000        0000000000cac900
$

as expected.

References:

[1] "MIPS32 Architecture for Programmers: MIPS16e2 Application-Specific
    Extension Technical Reference Manual", Imagination Technologies
    Ltd., Document Number: MD01172, Revision 01.00, April 26, 2016

[2] "MIPS32 interAptiv Multiprocessing System Software User's Manual",
    Imagination Technologies Ltd., Document Number: MD00904, Revision
    02.01, June 15, 2016, Chapter 24 "MIPS16e Application-Specific
    Extension to the MIPS32 Instruction Set", pp. 871-883

Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16095/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/unaligned.c | 39 ++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 67946bb98dd02..5eaf2578ac040 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -2010,6 +2010,8 @@ static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr)
 	u16 __user *pc16;
 	unsigned long origpc;
 	union mips16e_instruction mips16inst, oldinst;
+	unsigned int opcode;
+	int extended = 0;
 
 	origpc = regs->cp0_epc;
 	orig31 = regs->regs[31];
@@ -2022,6 +2024,7 @@ static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr)
 
 	/* skip EXTEND instruction */
 	if (mips16inst.ri.opcode == MIPS16e_extend_op) {
+		extended = 1;
 		pc16++;
 		__get_user(mips16inst.full, pc16);
 	} else if (delay_slot(regs)) {
@@ -2034,7 +2037,8 @@ static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr)
 			goto sigbus;
 	}
 
-	switch (mips16inst.ri.opcode) {
+	opcode = mips16inst.ri.opcode;
+	switch (opcode) {
 	case MIPS16e_i64_op:	/* I64 or RI64 instruction */
 		switch (mips16inst.i64.func) {	/* I64/RI64 func field check */
 		case MIPS16e_ldpc_func:
@@ -2054,9 +2058,40 @@ static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr)
 		goto sigbus;
 
 	case MIPS16e_swsp_op:
+		reg = reg16to32[mips16inst.ri.rx];
+		if (extended && cpu_has_mips16e2)
+			switch (mips16inst.ri.imm >> 5) {
+			case 0:		/* SWSP */
+			case 1:		/* SWGP */
+				break;
+			case 2:		/* SHGP */
+				opcode = MIPS16e_sh_op;
+				break;
+			default:
+				goto sigbus;
+			}
+		break;
+
 	case MIPS16e_lwpc_op:
+		reg = reg16to32[mips16inst.ri.rx];
+		break;
+
 	case MIPS16e_lwsp_op:
 		reg = reg16to32[mips16inst.ri.rx];
+		if (extended && cpu_has_mips16e2)
+			switch (mips16inst.ri.imm >> 5) {
+			case 0:		/* LWSP */
+			case 1:		/* LWGP */
+				break;
+			case 2:		/* LHGP */
+				opcode = MIPS16e_lh_op;
+				break;
+			case 4:		/* LHUGP */
+				opcode = MIPS16e_lhu_op;
+				break;
+			default:
+				goto sigbus;
+			}
 		break;
 
 	case MIPS16e_i8_op:
@@ -2070,7 +2105,7 @@ static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr)
 		break;
 	}
 
-	switch (mips16inst.ri.opcode) {
+	switch (opcode) {
 
 	case MIPS16e_lb_op:
 	case MIPS16e_lbu_op:
-- 
GitLab


From b82ce24426a4071da9529d726057e4e642948667 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 4 Jul 2017 12:21:12 +0800
Subject: [PATCH 0704/1958] crypto: sha1-ssse3 - Disable avx2

It has been reported that sha1-avx2 can cause page faults by reading
beyond the end of the input.  This patch disables it until it can be
fixed.

Cc: <stable@vger.kernel.org>
Fixes: 7c1da8d0d046 ("crypto: sha - SHA1 transform x86_64 AVX2")
Reported-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha1_ssse3_glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index fc61739150e7c..f960a043cdeba 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
 
 static bool avx2_usable(void)
 {
-	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
+	if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
 		&& boot_cpu_has(X86_FEATURE_BMI1)
 		&& boot_cpu_has(X86_FEATURE_BMI2))
 		return true;
-- 
GitLab


From 8a05aa4ce3bf767b7bd72851da89c23d084d1ecb Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 5 Jul 2017 12:14:27 +0800
Subject: [PATCH 0705/1958] mtip32xx: avoid to read HOST_CAP from HW in
 .queue_rq()

It is observed reading the register from HW takes a bit long,
for example in my box, the following difference of 'perf report
--no-children fio ...' can be seen when running I/O:

1) V4.12 without patch
+    9.28%       fio  [mtip32xx]           [k] mtip_irq_handler
+    8.48%       fio  [mtip32xx]           [k] mtip_init_cmd_header

2) V4.12 with the following patch
+    9.14%       fio  [mtip32xx]           [k] mtip_irq_handler
......
+    1.14%       fio  [mtip32xx]           [k] mtip_init_cmd_header

IOPS can be increased by ~5% with this patch too.

Fixes: a4e84aae8139(mtip32xx: use runtime tag to initialize command header)
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/mtip32xx/mtip32xx.c | 4 ++--
 drivers/block/mtip32xx/mtip32xx.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 61b046f256ca7..ae910347b7cfb 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -174,7 +174,6 @@ static void mtip_init_cmd_header(struct request *rq)
 {
 	struct driver_data *dd = rq->q->queuedata;
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
 
 	/* Point the command headers at the command tables. */
 	cmd->command_header = dd->port->command_list +
@@ -182,7 +181,7 @@ static void mtip_init_cmd_header(struct request *rq)
 	cmd->command_header_dma = dd->port->command_list_dma +
 				(sizeof(struct mtip_cmd_hdr) * rq->tag);
 
-	if (host_cap_64)
+	if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
 		cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
 
 	cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
@@ -386,6 +385,7 @@ static void mtip_init_port(struct mtip_port *port)
 			 port->mmio + PORT_LST_ADDR_HI);
 		writel((port->rxfis_dma >> 16) >> 16,
 			 port->mmio + PORT_FIS_ADDR_HI);
+		set_bit(MTIP_PF_HOST_CAP_64, &port->flags);
 	}
 
 	writel(port->command_list_dma & 0xFFFFFFFF,
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index e8286af50e16b..e20e55dab4436 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -140,6 +140,7 @@ enum {
 				(1 << MTIP_PF_SE_ACTIVE_BIT) |
 				(1 << MTIP_PF_DM_ACTIVE_BIT) |
 				(1 << MTIP_PF_TO_ACTIVE_BIT)),
+	MTIP_PF_HOST_CAP_64         = 10, /* cache HOST_CAP_64 */
 
 	MTIP_PF_SVC_THD_ACTIVE_BIT  = 4,
 	MTIP_PF_ISSUE_CMDS_BIT      = 5,
-- 
GitLab


From 74a1eb565c3f4ca55af2db8dc42c31ae311a79c0 Mon Sep 17 00:00:00 2001
From: Priyalee Kushwaha <priyalee.kushwaha@intel.com>
Date: Sat, 3 Jun 2017 10:21:24 -0700
Subject: [PATCH 0706/1958] platform/x86: intel_telemetry_debugfs: fix oops
 when load/unload module

This fixes an oops found while testing load/unload of the
intel_telemetry_debugfs module. module_init uses register_pm_notifier
for PM callbacks, but unregister_pm_notifier was missing from
module_exit.

 [ 97.481860] BUG: unable to handle kernel paging request at ffffffffa006f010
 [ 97.489742] IP: blocking_notifier_chain_register+0x3a/0xa0
 [ 97.495898] PGD 2e0a067
 [ 97.495899] PUD 2e0b063
 [ 97.498737] PMD 179e29067
 [ 97.501573] PTE 0

 [ 97.508423] Oops: 0000 1 PREEMPT SMP
 [ 97.512724] Modules linked in: intel_telemetry_debugfs intel_rapl gpio_keys dwc3 udc_core intel_telemetry_pltdrv intel_punit_ipc intel_telemetry_core rtc_cmos efivars x86_pkg_temp_thermal iwlwifi snd_hda_codec_hdmi soc_button_array btusb cfg80211 btrtl mei_me hci_uart btbcm mei btintel i915 bluetooth intel_pmc_ipc snd_hda_intel spi_pxa2xx_platform snd_hda_codec dwc3_pci snd_hda_core tpm_tis tpm_tis_core tpm efivarfs
 [ 97.558453] CPU: 0 PID: 889 Comm: modprobe Not tainted 4.11.0-rc6-intel-dev-bkc #1
 [ 97.566950] Hardware name: Intel Corp. Joule DVT3/SDS, BIOS GTPP181A.X64.0143.B30.1701132137 01/13/2017
 [ 97.577518] task: ffff8801793a21c0 task.stack: ffff8801793f0000
 [ 97.584162] RIP: 0010:blocking_notifier_chain_register+0x3a/0xa0
 [ 97.590903] RSP: 0018:ffff8801793f3c58 EFLAGS: 00010286
 [ 97.596802] RAX: ffffffffa006f000 RBX: ffffffff81e3ea20 RCX: 0000000000000000
 [ 97.604812] RDX: ffff880179eaf210 RSI: ffffffffa0131000 RDI: ffffffff81e3ea20
 [ 97.612821] RBP: ffff8801793f3c68 R08: 0000000000000006 R09: 000000000000005c
 [ 97.620847] R10: 0000000000000000 R11: 0000000000000006 R12: ffffffffa0131000
 [ 97.628855] R13: 0000000000000000 R14: ffff880176e35f48 R15: ffff8801793f3ea8
 [ 97.636865] FS: 00007f7eeba07700(0000) GS:ffff88017fc00000(0000) knlGS:0000000000000000
 [ 97.645948] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 [ 97.652423] CR2: ffffffffa006f010 CR3: 00000001775ef000 CR4: 00000000003406f0
 [ 97.660423] Call Trace:
 [ 97.663166] ? 0xffffffffa0031000
 [ 97.666885] register_pm_notifier+0x18/0x20
 [ 97.671581] telemetry_debugfs_init+0x92/0x1000

Signed-off-by: Priyalee Kushwaha <priyalee.kushwaha@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/intel_telemetry_debugfs.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index ef29f18b19516..4cc2f4ea0a250 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -97,11 +97,9 @@
 	} \
 }
 
-#ifdef CONFIG_PM_SLEEP
 static u8 suspend_prep_ok;
 static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
 static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
-#endif
 
 struct telemetry_susp_stats {
 	u32 shlw_swake_ctr;
@@ -807,7 +805,6 @@ static const struct file_operations telem_ioss_trc_verb_ops = {
 	.release	= single_release,
 };
 
-#ifdef CONFIG_PM_SLEEP
 static int pm_suspend_prep_cb(void)
 {
 	struct telemetry_evtlog evtlog[TELEM_MAX_OS_ALLOCATED_EVENTS];
@@ -937,7 +934,6 @@ static int pm_notification(struct notifier_block *this,
 static struct notifier_block pm_notifier = {
 	.notifier_call = pm_notification,
 };
-#endif /* CONFIG_PM_SLEEP */
 
 static int __init telemetry_debugfs_init(void)
 {
@@ -960,14 +956,13 @@ static int __init telemetry_debugfs_init(void)
 	if (err < 0)
 		return -EINVAL;
 
-
-#ifdef CONFIG_PM_SLEEP
 	register_pm_notifier(&pm_notifier);
-#endif /* CONFIG_PM_SLEEP */
 
 	debugfs_conf->telemetry_dbg_dir = debugfs_create_dir("telemetry", NULL);
-	if (!debugfs_conf->telemetry_dbg_dir)
-		return -ENOMEM;
+	if (!debugfs_conf->telemetry_dbg_dir) {
+		err = -ENOMEM;
+		goto out_pm;
+	}
 
 	f = debugfs_create_file("pss_info", S_IFREG | S_IRUGO,
 				debugfs_conf->telemetry_dbg_dir, NULL,
@@ -1014,6 +1009,8 @@ static int __init telemetry_debugfs_init(void)
 out:
 	debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
 	debugfs_conf->telemetry_dbg_dir = NULL;
+out_pm:
+	unregister_pm_notifier(&pm_notifier);
 
 	return err;
 }
@@ -1022,6 +1019,7 @@ static void __exit telemetry_debugfs_exit(void)
 {
 	debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
 	debugfs_conf->telemetry_dbg_dir = NULL;
+	unregister_pm_notifier(&pm_notifier);
 }
 
 late_initcall(telemetry_debugfs_init);
-- 
GitLab


From 7a3a51ab9467fb634dcfcd18eade61f418f540dd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 30 Jun 2017 10:52:18 +0300
Subject: [PATCH 0707/1958] platform/x86: intel_telemetry_debugfs: fix some
 error codes in init

There are bunch of "goto out;" paths where we don't set the error code.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/intel_telemetry_debugfs.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index 4cc2f4ea0a250..b8be058a87545 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -938,7 +938,7 @@ static struct notifier_block pm_notifier = {
 static int __init telemetry_debugfs_init(void)
 {
 	const struct x86_cpu_id *id;
-	int err = -ENOMEM;
+	int err;
 	struct dentry *f;
 
 	/* Only APL supported for now */
@@ -958,11 +958,10 @@ static int __init telemetry_debugfs_init(void)
 
 	register_pm_notifier(&pm_notifier);
 
+	err = -ENOMEM;
 	debugfs_conf->telemetry_dbg_dir = debugfs_create_dir("telemetry", NULL);
-	if (!debugfs_conf->telemetry_dbg_dir) {
-		err = -ENOMEM;
+	if (!debugfs_conf->telemetry_dbg_dir)
 		goto out_pm;
-	}
 
 	f = debugfs_create_file("pss_info", S_IFREG | S_IRUGO,
 				debugfs_conf->telemetry_dbg_dir, NULL,
-- 
GitLab


From bc8c47abd486014aae543ecdea59554f098df9df Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
Date: Fri, 23 Jun 2017 22:22:35 +0530
Subject: [PATCH 0708/1958] platform/x86: intel_telemetry: Add debugfs entry
 for S0ix residency

This adds a debugfs consumer for the exported kernel API
intel_pmc_read_s0ix_residency. This debugfs entry reads S0ix residency
directly from the PMC hardware counters.

TEST:
- echo freeze > /sys/power/state
- Wake the system, read the S0ix residency i.e.
  cat /sys/kernel/debug/telemetry/s0ix_residency_usec

Signed-off-by: Shanth Murthy <shanth.murthy@intel.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 .../platform/x86/intel_telemetry_debugfs.c    | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index b8be058a87545..cd21df982abd6 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -710,6 +710,24 @@ static const struct file_operations telem_socstate_ops = {
 	.release	= single_release,
 };
 
+static int telem_s0ix_res_get(void *data, u64 *val)
+{
+	u64 s0ix_total_res;
+	int ret;
+
+	ret = intel_pmc_s0ix_counter_read(&s0ix_total_res);
+	if (ret) {
+		pr_err("Failed to read S0ix residency");
+		return ret;
+	}
+
+	*val = s0ix_total_res;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(telem_s0ix_fops, telem_s0ix_res_get, NULL, "%llu\n");
+
 static int telem_pss_trc_verb_show(struct seq_file *s, void *unused)
 {
 	u32 verbosity;
@@ -987,6 +1005,14 @@ static int __init telemetry_debugfs_init(void)
 		goto out;
 	}
 
+	f = debugfs_create_file("s0ix_residency_usec", S_IFREG | S_IRUGO,
+				debugfs_conf->telemetry_dbg_dir,
+				NULL, &telem_s0ix_fops);
+	if (!f) {
+		pr_err("s0ix_residency_usec debugfs register failed\n");
+		goto out;
+	}
+
 	f = debugfs_create_file("pss_trace_verbosity", S_IFREG | S_IRUGO,
 				debugfs_conf->telemetry_dbg_dir, NULL,
 				&telem_pss_trc_verb_ops);
-- 
GitLab


From 0df4b805cbccbe3f8378f49c415adb2fcffdd3dc Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 1 Jul 2017 08:20:18 +0200
Subject: [PATCH 0709/1958] platform/x86: ideapad-laptop: Add IdeaPad
 V510-15IKB to no_hw_rfkill

Like other Lenovo models the IdeaPad V510-15IKB does not have an hw
rfkill switch. This results in hard-blocked radios after boot, resulting
in always blocked radios rendering them unusable.

Add the IdeaPad V510-15IKB to the no_hw_rfkill DMI list and allows using
the built-in radios.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/ideapad-laptop.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 527e5d9ab9bf8..f23e4ad7c5646 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -915,6 +915,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 		        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-15ISK"),
 		},
 	},
+	{
+		.ident = "Lenovo V510-15IKB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V510-15IKB"),
+		},
+	},
 	{
 		.ident = "Lenovo ideapad 310-15IKB",
 		.matches = {
-- 
GitLab


From 710c059c248a24609051f5a3dd1d8468cdc675b0 Mon Sep 17 00:00:00 2001
From: Yang Jiaxun <yjx@flygoat.com>
Date: Tue, 4 Jul 2017 14:39:19 +0000
Subject: [PATCH 0710/1958] platform/x86: ideapad-laptop: Add several models to
 no_hw_rfkill

Some Lenovo ideapad models do not have hardware rfkill switches, but
trying to read the rfkill switches through the ideapad-laptop module.
It caused to always reported blocking breaking wifi.

Fix it by adding those models to no_hw_rfkill_list.

Signed-off-by: Yang Jiaxun <yjx@flygoat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/ideapad-laptop.c | 70 +++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index f23e4ad7c5646..993366a48d5db 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -908,6 +908,27 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G50-30"),
 		},
 	},
+	{
+		.ident = "Lenovo V310-14IKB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-14IKB"),
+		},
+	},
+	{
+		.ident = "Lenovo V310-14ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-14ISK"),
+		},
+	},
+	{
+		.ident = "Lenovo V310-15IKB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-15IKB"),
+		},
+	},
 	{
 		.ident = "Lenovo V310-15ISK",
 		.matches = {
@@ -922,6 +943,41 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V510-15IKB"),
 		},
 	},
+	{
+		.ident = "Lenovo ideapad 300-15IBR",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 300-15IBR"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 300-15IKB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 300-15IKB"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 300S-11IBR",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 300S-11BR"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 310-15ABR",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15ABR"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 310-15IAP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15IAP"),
+		},
+	},
 	{
 		.ident = "Lenovo ideapad 310-15IKB",
 		.matches = {
@@ -929,6 +985,20 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 		        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15IKB"),
 		},
 	},
+	{
+		.ident = "Lenovo ideapad 310-15ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15ISK"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad Y700-14ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-14ISK"),
+		},
+	},
 	{
 		.ident = "Lenovo ideapad Y700-15ACZ",
 		.matches = {
-- 
GitLab


From 5e8f42aa49d9c8686f5ca56a1734767c476c62d5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 4 Jul 2017 18:34:39 +0300
Subject: [PATCH 0711/1958] platform/x86: ideapad-laptop: Fix indentation in
 DMI table

There are couple of places where 8 spaces are used instead of tabs.
Replace former by latter. No functional change intended.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/ideapad-laptop.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 993366a48d5db..603fc60509713 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -932,8 +932,8 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 	{
 		.ident = "Lenovo V310-15ISK",
 		.matches = {
-		        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-15ISK"),
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-15ISK"),
 		},
 	},
 	{
@@ -981,8 +981,8 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 	{
 		.ident = "Lenovo ideapad 310-15IKB",
 		.matches = {
-		        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15IKB"),
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15IKB"),
 		},
 	},
 	{
-- 
GitLab


From 2192b0baea395274ffcd38ec70e284389e3c243d Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 5 Jul 2017 12:07:52 -0700
Subject: [PATCH 0712/1958] xfs: fix contiguous dquot chunk iteration livelock

The patch below updated xfs_dq_get_next_id() to use the XFS iext
lookup helpers to locate the next quota id rather than to seek for
data in the quota file. The updated code fails to correctly handle
the case where the quota inode might have contiguous chunks part of
the same extent. In this case, the start block offset is calculated
based on the next expected id but the extent lookup returns the same
start offset as for the previous chunk. This causes the returned id
to go backwards and livelocks the quota iteration. This problem is
reproduced intermittently by generic/232.

To handle this case, check whether the startoff from the extent
lookup is behind the startoff calculated from the next quota id. If
so, bump up got.br_startoff to the specific file offset that is
expected to hold the next dquot chunk.

Fixes: bda250dbaf39 ("xfs: rewrite xfs_dq_get_next_id using xfs_iext_lookup_extent")
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_dquot.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 8848b397e7a81..f89f7b5241e68 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -733,10 +733,15 @@ xfs_dq_get_next_id(
 			return error;
 	}
 
-	if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got))
+	if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got)) {
+		/* contiguous chunk, bump startoff for the id calculation */
+		if (got.br_startoff < start)
+			got.br_startoff = start;
 		*id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
-	else
+	} else {
 		error = -ENOENT;
+	}
+
 	xfs_iunlock(quotip, lock_flags);
 
 	return error;
-- 
GitLab


From c4f07ecee22ec720936eb58e7c63b89a1e86bf32 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Mon, 26 Jun 2017 14:15:03 -0700
Subject: [PATCH 0713/1958] rtc: brcmstb-waketimer: Add Broadcom STB wake-timer

This adds support for the Broadcom STB wake-timer which is a timer in
the chip's 27Mhz clock domain that offers the ability to wake the system
(wake-up source) from suspend states (S2, S3, S5). It is supported using
the rtc framework allowing us to configure alarms for system wake-up.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Markus Mayer <mmayer@broadcom.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/Kconfig                 |  11 +
 drivers/rtc/Makefile                |   1 +
 drivers/rtc/rtc-brcmstb-waketimer.c | 330 ++++++++++++++++++++++++++++
 3 files changed, 342 insertions(+)
 create mode 100644 drivers/rtc/rtc-brcmstb-waketimer.c

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 8d3b957283266..8e1aa67fe533f 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -197,6 +197,17 @@ config RTC_DRV_AC100
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ac100.
 
+config RTC_DRV_BRCMSTB
+	tristate "Broadcom STB wake-timer"
+	depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST
+	default ARCH_BRCMSTB || BMIPS_GENERIC
+	help
+	  If you say yes here you get support for the wake-timer found on
+	  Broadcom STB SoCs (BCM7xxx).
+
+	  This driver can also be built as a module. If so, the module will
+	  be called rtc-brcmstb-waketimer.
+
 config RTC_DRV_AS3722
 	tristate "ams AS3722 RTC driver"
 	depends on MFD_AS3722
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 13857d2fce09b..df89cac1f9ae7 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
 obj-$(CONFIG_RTC_DRV_AT91SAM9)	+= rtc-at91sam9.o
 obj-$(CONFIG_RTC_DRV_AU1XXX)	+= rtc-au1xxx.o
 obj-$(CONFIG_RTC_DRV_BFIN)	+= rtc-bfin.o
+obj-$(CONFIG_RTC_DRV_BRCMSTB)	+= rtc-brcmstb-waketimer.o
 obj-$(CONFIG_RTC_DRV_BQ32K)	+= rtc-bq32k.o
 obj-$(CONFIG_RTC_DRV_BQ4802)	+= rtc-bq4802.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
diff --git a/drivers/rtc/rtc-brcmstb-waketimer.c b/drivers/rtc/rtc-brcmstb-waketimer.c
new file mode 100644
index 0000000000000..796ac792a381f
--- /dev/null
+++ b/drivers/rtc/rtc-brcmstb-waketimer.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_wakeup.h>
+#include <linux/reboot.h>
+#include <linux/rtc.h>
+#include <linux/stat.h>
+#include <linux/suspend.h>
+
+struct brcmstb_waketmr {
+	struct rtc_device *rtc;
+	struct device *dev;
+	void __iomem *base;
+	int irq;
+	struct notifier_block reboot_notifier;
+	struct clk *clk;
+	u32 rate;
+};
+
+#define BRCMSTB_WKTMR_EVENT		0x00
+#define BRCMSTB_WKTMR_COUNTER		0x04
+#define BRCMSTB_WKTMR_ALARM		0x08
+#define BRCMSTB_WKTMR_PRESCALER		0x0C
+#define BRCMSTB_WKTMR_PRESCALER_VAL	0x10
+
+#define BRCMSTB_WKTMR_DEFAULT_FREQ	27000000
+
+static inline void brcmstb_waketmr_clear_alarm(struct brcmstb_waketmr *timer)
+{
+	writel_relaxed(1, timer->base + BRCMSTB_WKTMR_EVENT);
+	(void)readl_relaxed(timer->base + BRCMSTB_WKTMR_EVENT);
+}
+
+static void brcmstb_waketmr_set_alarm(struct brcmstb_waketmr *timer,
+				      unsigned int secs)
+{
+	brcmstb_waketmr_clear_alarm(timer);
+
+	writel_relaxed(secs + 1, timer->base + BRCMSTB_WKTMR_ALARM);
+}
+
+static irqreturn_t brcmstb_waketmr_irq(int irq, void *data)
+{
+	struct brcmstb_waketmr *timer = data;
+
+	pm_wakeup_event(timer->dev, 0);
+
+	return IRQ_HANDLED;
+}
+
+struct wktmr_time {
+	u32 sec;
+	u32 pre;
+};
+
+static void wktmr_read(struct brcmstb_waketmr *timer,
+		       struct wktmr_time *t)
+{
+	u32 tmp;
+
+	do {
+		t->sec = readl_relaxed(timer->base + BRCMSTB_WKTMR_COUNTER);
+		tmp = readl_relaxed(timer->base + BRCMSTB_WKTMR_PRESCALER_VAL);
+	} while (tmp >= timer->rate);
+
+	t->pre = timer->rate - tmp;
+}
+
+static int brcmstb_waketmr_prepare_suspend(struct brcmstb_waketmr *timer)
+{
+	struct device *dev = timer->dev;
+	int ret = 0;
+
+	if (device_may_wakeup(dev)) {
+		ret = enable_irq_wake(timer->irq);
+		if (ret) {
+			dev_err(dev, "failed to enable wake-up interrupt\n");
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+/* If enabled as a wakeup-source, arm the timer when powering off */
+static int brcmstb_waketmr_reboot(struct notifier_block *nb,
+		unsigned long action, void *data)
+{
+	struct brcmstb_waketmr *timer;
+
+	timer = container_of(nb, struct brcmstb_waketmr, reboot_notifier);
+
+	/* Set timer for cold boot */
+	if (action == SYS_POWER_OFF)
+		brcmstb_waketmr_prepare_suspend(timer);
+
+	return NOTIFY_DONE;
+}
+
+static int brcmstb_waketmr_gettime(struct device *dev,
+				   struct rtc_time *tm)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	struct wktmr_time now;
+
+	wktmr_read(timer, &now);
+
+	rtc_time_to_tm(now.sec, tm);
+
+	return 0;
+}
+
+static int brcmstb_waketmr_settime(struct device *dev,
+				   struct rtc_time *tm)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	time64_t sec;
+
+	sec = rtc_tm_to_time64(tm);
+
+	if (sec > U32_MAX || sec < 0)
+		return -EINVAL;
+
+	writel_relaxed(sec, timer->base + BRCMSTB_WKTMR_COUNTER);
+
+	return 0;
+}
+
+static int brcmstb_waketmr_getalarm(struct device *dev,
+				    struct rtc_wkalrm *alarm)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	time64_t sec;
+	u32 reg;
+
+	sec = readl_relaxed(timer->base + BRCMSTB_WKTMR_ALARM);
+	if (sec != 0) {
+		/* Alarm is enabled */
+		alarm->enabled = 1;
+		rtc_time64_to_tm(sec, &alarm->time);
+	}
+
+	reg = readl_relaxed(timer->base + BRCMSTB_WKTMR_EVENT);
+	alarm->pending = !!(reg & 1);
+
+	return 0;
+}
+
+static int brcmstb_waketmr_setalarm(struct device *dev,
+				     struct rtc_wkalrm *alarm)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	time64_t sec;
+
+	if (alarm->enabled)
+		sec = rtc_tm_to_time64(&alarm->time);
+	else
+		sec = 0;
+
+	if (sec > U32_MAX || sec < 0)
+		return -EINVAL;
+
+	brcmstb_waketmr_set_alarm(timer, sec);
+
+	return 0;
+}
+
+/*
+ * Does not do much but keep the RTC class happy. We always support
+ * alarms.
+ */
+static int brcmstb_waketmr_alarm_enable(struct device *dev,
+					unsigned int enabled)
+{
+	return 0;
+}
+
+static const struct rtc_class_ops brcmstb_waketmr_ops = {
+	.read_time	= brcmstb_waketmr_gettime,
+	.set_time	= brcmstb_waketmr_settime,
+	.read_alarm	= brcmstb_waketmr_getalarm,
+	.set_alarm	= brcmstb_waketmr_setalarm,
+	.alarm_irq_enable = brcmstb_waketmr_alarm_enable,
+};
+
+static int brcmstb_waketmr_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct brcmstb_waketmr *timer;
+	struct resource *res;
+	int ret;
+
+	timer = devm_kzalloc(dev, sizeof(*timer), GFP_KERNEL);
+	if (!timer)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, timer);
+	timer->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	timer->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(timer->base))
+		return PTR_ERR(timer->base);
+
+	/*
+	 * Set wakeup capability before requesting wakeup interrupt, so we can
+	 * process boot-time "wakeups" (e.g., from S5 soft-off)
+	 */
+	device_set_wakeup_capable(dev, true);
+	device_wakeup_enable(dev);
+
+	timer->irq = platform_get_irq(pdev, 0);
+	if (timer->irq < 0)
+		return -ENODEV;
+
+	timer->clk = devm_clk_get(dev, NULL);
+	if (!IS_ERR(timer->clk)) {
+		ret = clk_prepare_enable(timer->clk);
+		if (ret)
+			return ret;
+		timer->rate = clk_get_rate(timer->clk);
+		if (!timer->rate)
+			timer->rate = BRCMSTB_WKTMR_DEFAULT_FREQ;
+	} else {
+		timer->rate = BRCMSTB_WKTMR_DEFAULT_FREQ;
+		timer->clk = NULL;
+	}
+
+	ret = devm_request_irq(dev, timer->irq, brcmstb_waketmr_irq, 0,
+			       "brcmstb-waketimer", timer);
+	if (ret < 0)
+		return ret;
+
+	timer->reboot_notifier.notifier_call = brcmstb_waketmr_reboot;
+	register_reboot_notifier(&timer->reboot_notifier);
+
+	timer->rtc = rtc_device_register("brcmstb-waketmr", dev,
+					 &brcmstb_waketmr_ops, THIS_MODULE);
+	if (IS_ERR(timer->rtc)) {
+		dev_err(dev, "unable to register device\n");
+		unregister_reboot_notifier(&timer->reboot_notifier);
+		return PTR_ERR(timer->rtc);
+	}
+
+	dev_info(dev, "registered, with irq %d\n", timer->irq);
+
+	return ret;
+}
+
+static int brcmstb_waketmr_remove(struct platform_device *pdev)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(&pdev->dev);
+
+	unregister_reboot_notifier(&timer->reboot_notifier);
+	rtc_device_unregister(timer->rtc);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int brcmstb_waketmr_suspend(struct device *dev)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+
+	return brcmstb_waketmr_prepare_suspend(timer);
+}
+
+static int brcmstb_waketmr_resume(struct device *dev)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	int ret;
+
+	if (!device_may_wakeup(dev))
+		return 0;
+
+	ret = disable_irq_wake(timer->irq);
+
+	brcmstb_waketmr_clear_alarm(timer);
+
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(brcmstb_waketmr_pm_ops,
+			 brcmstb_waketmr_suspend, brcmstb_waketmr_resume);
+
+static const struct of_device_id brcmstb_waketmr_of_match[] = {
+	{ .compatible = "brcm,brcmstb-waketimer" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver brcmstb_waketmr_driver = {
+	.probe			= brcmstb_waketmr_probe,
+	.remove			= brcmstb_waketmr_remove,
+	.driver = {
+		.name		= "brcmstb-waketimer",
+		.pm		= &brcmstb_waketmr_pm_ops,
+		.of_match_table	= of_match_ptr(brcmstb_waketmr_of_match),
+	}
+};
+module_platform_driver(brcmstb_waketmr_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Brian Norris");
+MODULE_AUTHOR("Markus Mayer");
+MODULE_DESCRIPTION("Wake-up timer driver for STB chips");
-- 
GitLab


From 078f3f6452c8be74fd406733f99f2cb0d9c50efc Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 5 Jun 2017 17:57:29 +0200
Subject: [PATCH 0714/1958] rtc: ds1307: use regmap_update_bits where
 applicable

After the switch to regmap we can now make use of regmap_update_bits
to simplify read/modify/write ops.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 82 ++++++++++------------------------------
 1 file changed, 20 insertions(+), 62 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 3cbb7636b405c..9b4106a7e8e4e 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -297,7 +297,7 @@ static irqreturn_t ds1307_irq(int irq, void *dev_id)
 {
 	struct ds1307		*ds1307 = dev_id;
 	struct mutex		*lock = &ds1307->rtc->ops_lock;
-	int			stat, control, ret;
+	int			stat, ret;
 
 	mutex_lock(lock);
 	ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &stat);
@@ -308,13 +308,11 @@ static irqreturn_t ds1307_irq(int irq, void *dev_id)
 		stat &= ~DS1337_BIT_A1I;
 		regmap_write(ds1307->regmap, DS1337_REG_STATUS, stat);
 
-		ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
+		ret = regmap_update_bits(ds1307->regmap, DS1337_REG_CONTROL,
+					 DS1337_BIT_A1IE, 0);
 		if (ret)
 			goto out;
 
-		control &= ~DS1337_BIT_A1IE;
-		regmap_write(ds1307->regmap, DS1337_REG_CONTROL, control);
-
 		rtc_update_irq(ds1307->rtc, 1, RTC_AF | RTC_IRQF);
 	}
 
@@ -567,21 +565,13 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 static int ds1307_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct ds1307		*ds1307 = dev_get_drvdata(dev);
-	int			control, ret;
 
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -ENOTTY;
 
-	ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
-	if (ret)
-		return ret;
-
-	if (enabled)
-		control |= DS1337_BIT_A1IE;
-	else
-		control &= ~DS1337_BIT_A1IE;
-
-	return regmap_write(ds1307->regmap, DS1337_REG_CONTROL, control);
+	return regmap_update_bits(ds1307->regmap, DS1337_REG_CONTROL,
+				  DS1337_BIT_A1IE,
+				  enabled ? DS1337_BIT_A1IE : 0);
 }
 
 static const struct rtc_class_ops ds13xx_rtc_ops = {
@@ -795,11 +785,8 @@ static irqreturn_t mcp794xx_irq(int irq, void *dev_id)
 		goto out;
 
 	/* Disable alarm 0. */
-	ret = regmap_read(ds1307->regmap, MCP794XX_REG_CONTROL, &reg);
-	if (ret)
-		goto out;
-	reg &= ~MCP794XX_BIT_ALM0_EN;
-	ret = regmap_write(ds1307->regmap, MCP794XX_REG_CONTROL, reg);
+	ret = regmap_update_bits(ds1307->regmap, MCP794XX_REG_CONTROL,
+				 MCP794XX_BIT_ALM0_EN, 0);
 	if (ret)
 		goto out;
 
@@ -897,21 +884,13 @@ static int mcp794xx_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 static int mcp794xx_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct ds1307 *ds1307 = dev_get_drvdata(dev);
-	int reg, ret;
 
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -EINVAL;
 
-	ret = regmap_read(ds1307->regmap, MCP794XX_REG_CONTROL, &reg);
-	if (ret)
-		return ret;
-
-	if (enabled)
-		reg |= MCP794XX_BIT_ALM0_EN;
-	else
-		reg &= ~MCP794XX_BIT_ALM0_EN;
-
-	return regmap_write(ds1307->regmap, MCP794XX_REG_CONTROL, reg);
+	return regmap_update_bits(ds1307->regmap, MCP794XX_REG_CONTROL,
+				  MCP794XX_BIT_ALM0_EN,
+				  enabled ? MCP794XX_BIT_ALM0_EN : 0);
 }
 
 static const struct rtc_class_ops mcp794xx_rtc_ops = {
@@ -1114,20 +1093,11 @@ static int ds3231_clk_sqw_rates[] = {
 static int ds1337_write_control(struct ds1307 *ds1307, u8 mask, u8 value)
 {
 	struct mutex *lock = &ds1307->rtc->ops_lock;
-	int control;
 	int ret;
 
 	mutex_lock(lock);
-
-	ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
-	if (ret)
-		goto out;
-
-	control &= ~mask;
-	control |= value;
-
-	ret = regmap_write(ds1307->regmap, DS1337_REG_CONTROL, control);
-out:
+	ret = regmap_update_bits(ds1307->regmap, DS1337_REG_CONTROL,
+				 mask, value);
 	mutex_unlock(lock);
 
 	return ret;
@@ -1233,22 +1203,12 @@ static unsigned long ds3231_clk_32khz_recalc_rate(struct clk_hw *hw,
 static int ds3231_clk_32khz_control(struct ds1307 *ds1307, bool enable)
 {
 	struct mutex *lock = &ds1307->rtc->ops_lock;
-	int status;
 	int ret;
 
 	mutex_lock(lock);
-
-	ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &status);
-	if (ret)
-		goto out;
-
-	if (enable)
-		status |= DS3231_BIT_EN32KHZ;
-	else
-		status &= ~DS3231_BIT_EN32KHZ;
-
-	ret = regmap_write(ds1307->regmap, DS1337_REG_STATUS, status);
-out:
+	ret = regmap_update_bits(ds1307->regmap, DS1337_REG_STATUS,
+				 DS3231_BIT_EN32KHZ,
+				 enable ? DS3231_BIT_EN32KHZ : 0);
 	mutex_unlock(lock);
 
 	return ret;
@@ -1712,12 +1672,10 @@ static int ds1307_probe(struct i2c_client *client,
 	 * If different then set the wday which we computed using
 	 * timestamp
 	 */
-	if (wday != tm.tm_wday) {
-		regmap_read(ds1307->regmap, MCP794XX_REG_WEEKDAY, &wday);
-		wday = wday & ~MCP794XX_REG_WEEKDAY_WDAY_MASK;
-		wday = wday | (tm.tm_wday + 1);
-		regmap_write(ds1307->regmap, MCP794XX_REG_WEEKDAY, wday);
-	}
+	if (wday != tm.tm_wday)
+		regmap_update_bits(ds1307->regmap, MCP794XX_REG_WEEKDAY,
+				   MCP794XX_REG_WEEKDAY_WDAY_MASK,
+				   tm.tm_wday + 1);
 
 	if (want_irq) {
 		device_set_wakeup_capable(ds1307->dev, true);
-- 
GitLab


From e03c78ac2d7709b7556a9b8dd584e2969c6fce67 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 May 2017 02:14:57 +0900
Subject: [PATCH 0715/1958] um: Use printk instead of printf in make_uml_dir

Since this function will be called after printk buffer
initialized, use printk as other functions do.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/os-Linux/umid.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index c1dc89261f678..37cfaba47aa62 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -35,8 +35,9 @@ static int __init make_uml_dir(void)
 
 		err = -ENOENT;
 		if (home == NULL) {
-			printk(UM_KERN_ERR "make_uml_dir : no value in "
-			       "environment for $HOME\n");
+			printk(UM_KERN_ERR
+				"%s: no value in environment for $HOME\n",
+				__func__);
 			goto err;
 		}
 		strlcpy(dir, home, sizeof(dir));
@@ -50,13 +51,15 @@ static int __init make_uml_dir(void)
 	err = -ENOMEM;
 	uml_dir = malloc(strlen(dir) + 1);
 	if (uml_dir == NULL) {
-		printf("make_uml_dir : malloc failed, errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : malloc failed, errno = %d\n",
+			__func__, errno);
 		goto err;
 	}
 	strcpy(uml_dir, dir);
 
 	if ((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)) {
-	        printf("Failed to mkdir '%s': %s\n", uml_dir, strerror(errno));
+		printk(UM_KERN_ERR "Failed to mkdir '%s': %s\n",
+			uml_dir, strerror(errno));
 		err = -errno;
 		goto err_free;
 	}
-- 
GitLab


From f7887ee11082b468517f31ae315b474eaa48b843 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 May 2017 02:16:05 +0900
Subject: [PATCH 0716/1958] um: Add os_info() for pre-boot information messages

Add os_info() for printing out pre-boot information
level messages in stderr. The messages via os_info()
are suppressed by "quiet" kernel command line.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/shared/os.h |  2 ++
 arch/um/os-Linux/util.c     | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index cd1fa97776c30..9e95bcebaf9b1 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -242,6 +242,8 @@ extern void setup_hostinfo(char *buf, int len);
 extern void os_dump_core(void) __attribute__ ((noreturn));
 extern void um_early_printk(const char *s, unsigned int n);
 extern void os_fix_helper_signals(void);
+extern void os_info(const char *fmt, ...)
+	__attribute__ ((format (printf, 1, 2)));
 
 /* time.c */
 extern void os_idle_sleep(unsigned long long nsecs);
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index faee55ef6d2f4..c9bad1bca1087 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -13,6 +13,7 @@
 #include <wait.h>
 #include <sys/mman.h>
 #include <sys/utsname.h>
+#include <init.h>
 #include <os.h>
 
 void stack_protections(unsigned long address)
@@ -152,3 +153,27 @@ void um_early_printk(const char *s, unsigned int n)
 {
 	printf("%.*s", n, s);
 }
+
+static int quiet_info;
+
+static int __init quiet_cmd_param(char *str, int *add)
+{
+	quiet_info = 1;
+	return 0;
+}
+
+__uml_setup("quiet", quiet_cmd_param,
+"quiet\n"
+"    Turns off information messages during boot.\n\n");
+
+void os_info(const char *fmt, ...)
+{
+	va_list list;
+
+	if (quiet_info)
+		return;
+
+	va_start(list, fmt);
+	vfprintf(stderr, fmt, list);
+	va_end(list);
+}
-- 
GitLab


From d3878bb8003009d148cd787e1a2d74cff914a7b2 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 May 2017 02:17:14 +0900
Subject: [PATCH 0717/1958] um: Use os_info for the messages on normal path

Use os_info() for printing out the messages on the
normal execution path.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/kernel/um_arch.c    |  8 ++++----
 arch/um/os-Linux/main.c     |  3 +--
 arch/um/os-Linux/mem.c      | 18 +++++++++---------
 arch/um/os-Linux/start_up.c | 26 ++++++++++++++------------
 4 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 64a1fd06f3fde..5df91d845a0d2 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -289,8 +289,8 @@ int __init linux_main(int argc, char **argv)
 
 	diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
 	if (diff > 1024 * 1024) {
-		printf("Adding %ld bytes to physical memory to account for "
-		       "exec-shield gap\n", diff);
+		os_info("Adding %ld bytes to physical memory to account for "
+			"exec-shield gap\n", diff);
 		physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
 	}
 
@@ -330,8 +330,8 @@ int __init linux_main(int argc, char **argv)
 	end_vm = start_vm + virtmem_size;
 
 	if (virtmem_size < physmem_size)
-		printf("Kernel virtual memory size shrunk to %lu bytes\n",
-		       virtmem_size);
+		os_info("Kernel virtual memory size shrunk to %lu bytes\n",
+			virtmem_size);
 
 	os_flush_stdout();
 
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 9d499de87e63e..f880dcd6ecc9c 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -184,14 +184,13 @@ int __init main(int argc, char **argv, char **envp)
 	 */
 	unblock_signals();
 
+	os_info("\n");
 	/* Reboot */
 	if (ret) {
-		printf("\n");
 		execvp(new_argv[0], new_argv);
 		perror("Failed to exec kernel");
 		ret = 1;
 	}
-	printf("\n");
 	return uml_exitcode;
 }
 
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 8b1767668515f..a7f6023301cd2 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -25,13 +25,13 @@ static int __init check_tmpfs(const char *dir)
 {
 	struct statfs st;
 
-	printf("Checking if %s is on tmpfs...", dir);
+	os_info("Checking if %s is on tmpfs...", dir);
 	if (statfs(dir, &st) < 0) {
-		printf("%s\n", strerror(errno));
+		os_info("%s\n", strerror(errno));
 	} else if (st.f_type != TMPFS_MAGIC) {
-		printf("no\n");
+		os_info("no\n");
 	} else {
-		printf("OK\n");
+		os_info("OK\n");
 		return 0;
 	}
 	return -1;
@@ -61,18 +61,18 @@ static char * __init choose_tempdir(void)
 	int i;
 	const char *dir;
 
-	printf("Checking environment variables for a tempdir...");
+	os_info("Checking environment variables for a tempdir...");
 	for (i = 0; vars[i]; i++) {
 		dir = getenv(vars[i]);
 		if ((dir != NULL) && (*dir != '\0')) {
-			printf("%s\n", dir);
+			os_info("%s\n", dir);
 			if (check_tmpfs(dir) >= 0)
 				goto done;
 			else
 				goto warn;
 		}
 	}
-	printf("none found\n");
+	os_info("none found\n");
 
 	for (i = 0; tmpfs_dirs[i]; i++) {
 		dir = tmpfs_dirs[i];
@@ -194,7 +194,7 @@ void __init check_tmpexec(void)
 
 	addr = mmap(NULL, UM_KERN_PAGE_SIZE,
 		    PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
-	printf("Checking PROT_EXEC mmap in %s...", tempdir);
+	os_info("Checking PROT_EXEC mmap in %s...", tempdir);
 	if (addr == MAP_FAILED) {
 		err = errno;
 		printf("%s\n", strerror(err));
@@ -203,7 +203,7 @@ void __init check_tmpexec(void)
 			printf("%s must be not mounted noexec\n", tempdir);
 		exit(1);
 	}
-	printf("OK\n");
+	os_info("OK\n");
 	munmap(addr, UM_KERN_PAGE_SIZE);
 
 	close(fd);
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 22a358ef1b0cd..f16e001998e58 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -166,7 +166,7 @@ static void __init check_sysemu(void)
 	unsigned long regs[MAX_REG_NR];
 	int pid, n, status, count=0;
 
-	non_fatal("Checking syscall emulation patch for ptrace...");
+	os_info("Checking syscall emulation patch for ptrace...");
 	sysemu_supported = 0;
 	pid = start_ptraced_child();
 
@@ -199,10 +199,10 @@ static void __init check_sysemu(void)
 		goto fail_stopped;
 
 	sysemu_supported = 1;
-	non_fatal("OK\n");
+	os_info("OK\n");
 	set_using_sysemu(!force_sysemu_disabled);
 
-	non_fatal("Checking advanced syscall emulation patch for ptrace...");
+	os_info("Checking advanced syscall emulation patch for ptrace...");
 	pid = start_ptraced_child();
 
 	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
@@ -244,7 +244,7 @@ static void __init check_sysemu(void)
 		goto fail_stopped;
 
 	sysemu_supported = 2;
-	non_fatal("OK\n");
+	os_info("OK\n");
 
 	if (!force_sysemu_disabled)
 		set_using_sysemu(sysemu_supported);
@@ -260,7 +260,7 @@ static void __init check_ptrace(void)
 {
 	int pid, syscall, n, status;
 
-	non_fatal("Checking that ptrace can change system call numbers...");
+	os_info("Checking that ptrace can change system call numbers...");
 	pid = start_ptraced_child();
 
 	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
@@ -292,7 +292,7 @@ static void __init check_ptrace(void)
 		}
 	}
 	stop_ptraced_child(pid, 0, 1);
-	non_fatal("OK\n");
+	os_info("OK\n");
 	check_sysemu();
 }
 
@@ -308,15 +308,17 @@ static void __init check_coredump_limit(void)
 		return;
 	}
 
-	printf("Core dump limits :\n\tsoft - ");
+	os_info("Core dump limits :\n\tsoft - ");
 	if (lim.rlim_cur == RLIM_INFINITY)
-		printf("NONE\n");
-	else printf("%lu\n", lim.rlim_cur);
+		os_info("NONE\n");
+	else
+		os_info("%llu\n", (unsigned long long)lim.rlim_cur);
 
-	printf("\thard - ");
+	os_info("\thard - ");
 	if (lim.rlim_max == RLIM_INFINITY)
-		printf("NONE\n");
-	else printf("%lu\n", lim.rlim_max);
+		os_info("NONE\n");
+	else
+		os_info("%llu\n", (unsigned long long)lim.rlim_max);
 }
 
 void __init os_early_checks(void)
-- 
GitLab


From 721ccae88d04f7e9334fde4cbb3eecfaa70a31ac Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 May 2017 02:18:22 +0900
Subject: [PATCH 0718/1958] um: Add os_warn() for pre-boot warning/error
 messages

Add os_warn() for printing out pre-boot warning/error
messages in stderr. The messages via os_warn() are not
suppressed by quiet option.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/shared/os.h | 2 ++
 arch/um/os-Linux/util.c     | 9 +++++++++
 2 files changed, 11 insertions(+)

diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 9e95bcebaf9b1..574e03fc7ba25 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -244,6 +244,8 @@ extern void um_early_printk(const char *s, unsigned int n);
 extern void os_fix_helper_signals(void);
 extern void os_info(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
+extern void os_warn(const char *fmt, ...)
+	__attribute__ ((format (printf, 1, 2)));
 
 /* time.c */
 extern void os_idle_sleep(unsigned long long nsecs);
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index c9bad1bca1087..8cc8b2617a670 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -177,3 +177,12 @@ void os_info(const char *fmt, ...)
 	vfprintf(stderr, fmt, list);
 	va_end(list);
 }
+
+void os_warn(const char *fmt, ...)
+{
+	va_list list;
+
+	va_start(list, fmt);
+	vfprintf(stderr, fmt, list);
+	va_end(list);
+}
-- 
GitLab


From 0936d4f3d5a3d5daf453bc341f02ba4e68cfe40e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 May 2017 02:19:31 +0900
Subject: [PATCH 0719/1958] um: Use os_warn to print out pre-boot warning/error
 messages

Use os_warn() instead of printf/fprintf to print out
pre-boot warning/error messages to stderr.
Note that the help message and version message are
kept to print out to stdout, because user explicitly
specifies those options to get such information.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/kernel/physmem.c    | 10 +++++-----
 arch/um/kernel/um_arch.c    |  8 +++++---
 arch/um/kernel/umid.c       |  4 ++--
 arch/um/os-Linux/execvp.c   |  2 +-
 arch/um/os-Linux/main.c     |  6 +++---
 arch/um/os-Linux/mem.c      | 10 +++++-----
 arch/um/os-Linux/start_up.c |  2 +-
 arch/um/os-Linux/umid.c     |  8 ++++----
 8 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 4c9861b421fd9..f02596e9931dc 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -89,8 +89,8 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
 	offset = uml_reserved - uml_physmem;
 	map_size = len - offset;
 	if(map_size <= 0) {
-		printf("Too few physical memory! Needed=%d, given=%d\n",
-		       offset, len);
+		os_warn("Too few physical memory! Needed=%lu, given=%lu\n",
+			offset, len);
 		exit(1);
 	}
 
@@ -99,9 +99,9 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
 	err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
 			    map_size, 1, 1, 1);
 	if (err < 0) {
-		printf("setup_physmem - mapping %ld bytes of memory at 0x%p "
-		       "failed - errno = %d\n", map_size,
-		       (void *) uml_reserved, err);
+		os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p "
+			"failed - errno = %d\n", map_size,
+			(void *) uml_reserved, err);
 		exit(1);
 	}
 
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 5df91d845a0d2..9c5d111f46bea 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -34,7 +34,7 @@ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 };
 static void __init add_arg(char *arg)
 {
 	if (strlen(command_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) {
-		printf("add_arg: Too many command line arguments!\n");
+		os_warn("add_arg: Too many command line arguments!\n");
 		exit(1);
 	}
 	if (strlen(command_line) > 0)
@@ -126,6 +126,7 @@ static const char *usage_string =
 
 static int __init uml_version_setup(char *line, int *add)
 {
+	/* Explicitly use printf() to show version in stdout */
 	printf("%s\n", init_utsname()->release);
 	exit(0);
 
@@ -154,8 +155,8 @@ __uml_setup("root=", uml_root_setup,
 
 static int __init no_skas_debug_setup(char *line, int *add)
 {
-	printf("'debug' is not necessary to gdb UML in skas mode - run \n");
-	printf("'gdb linux'\n");
+	os_warn("'debug' is not necessary to gdb UML in skas mode - run\n");
+	os_warn("'gdb linux'\n");
 
 	return 0;
 }
@@ -171,6 +172,7 @@ static int __init Usage(char *line, int *add)
 
 	printf(usage_string, init_utsname()->release);
 	p = &__uml_help_start;
+	/* Explicitly use printf() to show help in stdout */
 	while (p < &__uml_help_end) {
 		printf("%s", *p);
 		p++;
diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
index f6cc3bd61781f..10bf4aca529f8 100644
--- a/arch/um/kernel/umid.c
+++ b/arch/um/kernel/umid.c
@@ -16,14 +16,14 @@ static int __init set_umid_arg(char *name, int *add)
 	int err;
 
 	if (umid_inited) {
-		printf("umid already set\n");
+		os_warn("umid already set\n");
 		return 0;
 	}
 
 	*add = 0;
 	err = set_umid(name);
 	if (err == -EEXIST)
-		printf("umid '%s' already in use\n", name);
+		os_warn("umid '%s' already in use\n", name);
 	else if (!err)
 		umid_inited = 1;
 
diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c
index 8fb25ca07c46f..84a0777c2a450 100644
--- a/arch/um/os-Linux/execvp.c
+++ b/arch/um/os-Linux/execvp.c
@@ -136,7 +136,7 @@ int main(int argc, char**argv)
 	int ret;
 	argc--;
 	if (!argc) {
-		fprintf(stderr, "Not enough arguments\n");
+		os_warn("Not enough arguments\n");
 		return 1;
 	}
 	argv++;
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index f880dcd6ecc9c..5f970ece5ac3c 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -74,8 +74,8 @@ static void install_fatal_handler(int sig)
 	action.sa_restorer = NULL;
 	action.sa_handler = last_ditch_exit;
 	if (sigaction(sig, &action, NULL) < 0) {
-		printf("failed to install handler for signal %d - errno = %d\n",
-		       sig, errno);
+		os_warn("failed to install handler for signal %d "
+			"- errno = %d\n", sig, errno);
 		exit(1);
 	}
 }
@@ -175,7 +175,7 @@ int __init main(int argc, char **argv, char **envp)
 	/* disable SIGIO for the fds and set SIGIO to be ignored */
 	err = deactivate_all_fds();
 	if (err)
-		printf("deactivate_all_fds failed, errno = %d\n", -err);
+		os_warn("deactivate_all_fds failed, errno = %d\n", -err);
 
 	/*
 	 * Let any pending signals fire now.  This ensures
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index a7f6023301cd2..e162a95ad7ddb 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -82,7 +82,7 @@ static char * __init choose_tempdir(void)
 
 	dir = fallback_dir;
 warn:
-	printf("Warning: tempdir %s is not on tmpfs\n", dir);
+	os_warn("Warning: tempdir %s is not on tmpfs\n", dir);
 done:
 	/* Make a copy since getenv results may not remain valid forever. */
 	return strdup(dir);
@@ -100,7 +100,7 @@ static int __init make_tempfile(const char *template)
 	if (tempdir == NULL) {
 		tempdir = choose_tempdir();
 		if (tempdir == NULL) {
-			fprintf(stderr, "Failed to choose tempdir: %s\n",
+			os_warn("Failed to choose tempdir: %s\n",
 				strerror(errno));
 			return -1;
 		}
@@ -125,7 +125,7 @@ static int __init make_tempfile(const char *template)
 	strcat(tempname, template);
 	fd = mkstemp(tempname);
 	if (fd < 0) {
-		fprintf(stderr, "open - cannot create %s: %s\n", tempname,
+		os_warn("open - cannot create %s: %s\n", tempname,
 			strerror(errno));
 		goto out;
 	}
@@ -197,10 +197,10 @@ void __init check_tmpexec(void)
 	os_info("Checking PROT_EXEC mmap in %s...", tempdir);
 	if (addr == MAP_FAILED) {
 		err = errno;
-		printf("%s\n", strerror(err));
+		os_warn("%s\n", strerror(err));
 		close(fd);
 		if (err == EPERM)
-			printf("%s must be not mounted noexec\n", tempdir);
+			os_warn("%s must be not mounted noexec\n", tempdir);
 		exit(1);
 	}
 	os_info("OK\n");
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index f16e001998e58..b1b6b75c5b173 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -351,7 +351,7 @@ int __init parse_iomem(char *str, int *add)
 	driver = str;
 	file = strchr(str,',');
 	if (file == NULL) {
-		fprintf(stderr, "parse_iomem : failed to parse iomem\n");
+		os_warn("parse_iomem : failed to parse iomem\n");
 		goto out;
 	}
 	*file = '\0';
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 37cfaba47aa62..998fbb4454586 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -354,7 +354,7 @@ char *get_umid(void)
 static int __init set_uml_dir(char *name, int *add)
 {
 	if (*name == '\0') {
-		printf("uml_dir can't be an empty string\n");
+		os_warn("uml_dir can't be an empty string\n");
 		return 0;
 	}
 
@@ -365,7 +365,7 @@ static int __init set_uml_dir(char *name, int *add)
 
 	uml_dir = malloc(strlen(name) + 2);
 	if (uml_dir == NULL) {
-		printf("Failed to malloc uml_dir - error = %d\n", errno);
+		os_warn("Failed to malloc uml_dir - error = %d\n", errno);
 
 		/*
 		 * Return 0 here because do_initcalls doesn't look at
@@ -390,8 +390,8 @@ static void remove_umid_dir(void)
 	sprintf(dir, "%s%s", uml_dir, umid);
 	err = remove_files_and_dir(dir);
 	if (err)
-		printf("remove_umid_dir - remove_files_and_dir failed with "
-		       "err = %d\n", err);
+		os_warn("%s - remove_files_and_dir failed with err = %d\n",
+			__func__, err);
 }
 
 __uml_exitcall(remove_umid_dir);
-- 
GitLab


From 745a600cf1a6230f5e3910c340dcb312b718e114 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 18 May 2017 02:20:39 +0900
Subject: [PATCH 0720/1958] um: console: Ignore console= option

Ignore linux kernel's console= option at uml's console
option handler. Since uml's con= option is only for
setting up new console, and Linux kernel's console=
option specify to which console kernel output its
message, we can use both option for different purpose.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/stdio_console.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 7b361f36ca965..c90817b04da96 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -192,6 +192,9 @@ __uml_exitcall(console_exit);
 
 static int console_chan_setup(char *str)
 {
+	if (!strncmp(str, "sole=", 5))	/* console= option specifies tty */
+		return 0;
+
 	line_setup(vt_conf, MAX_TTYS, &def_conf, str, "console");
 	return 1;
 }
-- 
GitLab


From f44f1e7da7c8e3f4575d5d61c4df978496903fcc Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 23 May 2017 17:32:31 -0700
Subject: [PATCH 0721/1958] um: Avoid longjmp/setjmp symbol clashes with
 libpthread.a

Building a statically linked UML kernel on a Centos 6.9 host resulted in
the following linking failure (GCC 4.4, glibc-2.12):

/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../lib64/libpthread.a(libpthread.o):
In function `siglongjmp':
(.text+0x8490): multiple definition of `longjmp'
arch/x86/um/built-in.o:/local/users/fainelli/openwrt/trunk/build_dir/target-x86_64_musl/linux-uml/linux-4.4.69/arch/x86/um/setjmp_64.S:44:
first defined here
/usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../lib64/libpthread.a(libpthread.o):
In function `sem_open':
(.text+0x77cd): warning: the use of `mktemp' is dangerous, better use
`mkstemp'
collect2: ld returned 1 exit status
make[4]: *** [vmlinux] Error 1

Adopt a solution similar to the one done for vmap where we define
longjmp/setjmp to be kernel_longjmp/setjmp. In the process, make sure we
do rename the functions in arch/x86/um/setjmp_*.S accordingly.

Fixes: a7df4716d195 ("um: link with -lpthread")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/Makefile        |  4 ++++
 arch/x86/um/setjmp_32.S | 16 ++++++++--------
 arch/x86/um/setjmp_64.S | 16 ++++++++--------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/arch/um/Makefile b/arch/um/Makefile
index 0ca46ededfc73..6ca4f66085c14 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -59,10 +59,14 @@ KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um
 # Same things for in6addr_loopback and mktime - found in libc. For these two we
 # only get link-time error, luckily.
 #
+# -Dlongjmp=kernel_longjmp prevents anything from referencing the libpthread.a
+# embedded copy of longjmp, same thing for setjmp.
+#
 # These apply to USER_CFLAGS to.
 
 KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
 	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap	\
+	-Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
 	-Din6addr_loopback=kernel_in6addr_loopback \
 	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
 
diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S
index b766792c99335..39053192918da 100644
--- a/arch/x86/um/setjmp_32.S
+++ b/arch/x86/um/setjmp_32.S
@@ -16,9 +16,9 @@
 
 	.text
 	.align 4
-	.globl setjmp
-	.type setjmp, @function
-setjmp:
+	.globl kernel_setjmp
+	.type kernel_setjmp, @function
+kernel_setjmp:
 #ifdef _REGPARM
 	movl %eax,%edx
 #else
@@ -35,13 +35,13 @@ setjmp:
 	movl %ecx,20(%edx)		# Return address
 	ret
 
-	.size setjmp,.-setjmp
+	.size kernel_setjmp,.-kernel_setjmp
 
 	.text
 	.align 4
-	.globl longjmp
-	.type longjmp, @function
-longjmp:
+	.globl kernel_longjmp
+	.type kernel_longjmp, @function
+kernel_longjmp:
 #ifdef _REGPARM
 	xchgl %eax,%edx
 #else
@@ -55,4 +55,4 @@ longjmp:
 	movl 16(%edx),%edi
 	jmp *20(%edx)
 
-	.size longjmp,.-longjmp
+	.size kernel_longjmp,.-kernel_longjmp
diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S
index 45f547b4043ee..c56942e1a38ca 100644
--- a/arch/x86/um/setjmp_64.S
+++ b/arch/x86/um/setjmp_64.S
@@ -18,9 +18,9 @@
 
 	.text
 	.align 4
-	.globl setjmp
-	.type setjmp, @function
-setjmp:
+	.globl kernel_setjmp
+	.type kernel_setjmp, @function
+kernel_setjmp:
 	pop  %rsi			# Return address, and adjust the stack
 	xorl %eax,%eax			# Return value
 	movq %rbx,(%rdi)
@@ -34,13 +34,13 @@ setjmp:
 	movq %rsi,56(%rdi)		# Return address
 	ret
 
-	.size setjmp,.-setjmp
+	.size kernel_setjmp,.-kernel_setjmp
 
 	.text
 	.align 4
-	.globl longjmp
-	.type longjmp, @function
-longjmp:
+	.globl kernel_longjmp
+	.type kernel_longjmp, @function
+kernel_longjmp:
 	movl %esi,%eax			# Return value (int)
 	movq (%rdi),%rbx
 	movq 8(%rdi),%rsp
@@ -51,4 +51,4 @@ longjmp:
 	movq 48(%rdi),%r15
 	jmp *56(%rdi)
 
-	.size longjmp,.-longjmp
+	.size kernel_longjmp,.-kernel_longjmp
-- 
GitLab


From 0a987645672ebde7844a9c0732a5a25f3d4bb6c6 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 25 May 2017 11:36:26 -0700
Subject: [PATCH 0722/1958] um: Allow building and running on older hosts

Commit a78ff1112263 ("um: add extended processor state save/restore
support") and b6024b21fec8 ("um: extend fpstate to _xstate to support
YMM registers") forced the use of the x86 FP _xstate and
PTRACE_GETREGSET/SETREGSET. On older hosts, we would neither be able to
build UML nor run it anymore with these two commits applied because we
don't have definitions for struct _xstate nor these two ptrace requests.

We can determine at build time which fp context structure to check
against, just like we can keep using the old i387 fp save/restore if
PTRACE_GETRESET/SETREGSET are not defined.

Fixes: a78ff1112263 ("um: add extended processor state save/restore support")
Fixes: b6024b21fec8 ("um: extend fpstate to _xstate to support YMM registers")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/os-Linux/registers.c | 12 ++++++++----
 arch/x86/um/user-offsets.c       |  4 ++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index 00f54a91bb4b0..28775f55bde24 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -26,6 +26,7 @@ int save_i387_registers(int pid, unsigned long *fp_regs)
 
 int save_fp_registers(int pid, unsigned long *fp_regs)
 {
+#ifdef PTRACE_GETREGSET
 	struct iovec iov;
 
 	if (have_xstate_support) {
@@ -34,9 +35,9 @@ int save_fp_registers(int pid, unsigned long *fp_regs)
 		if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
 			return -errno;
 		return 0;
-	} else {
+	} else
+#endif
 		return save_i387_registers(pid, fp_regs);
-	}
 }
 
 int restore_i387_registers(int pid, unsigned long *fp_regs)
@@ -48,6 +49,7 @@ int restore_i387_registers(int pid, unsigned long *fp_regs)
 
 int restore_fp_registers(int pid, unsigned long *fp_regs)
 {
+#ifdef PTRACE_SETREGSET
 	struct iovec iov;
 
 	if (have_xstate_support) {
@@ -56,9 +58,9 @@ int restore_fp_registers(int pid, unsigned long *fp_regs)
 		if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
 			return -errno;
 		return 0;
-	} else {
+	} else
+#endif
 		return restore_i387_registers(pid, fp_regs);
-	}
 }
 
 #ifdef __i386__
@@ -122,6 +124,7 @@ int put_fp_registers(int pid, unsigned long *regs)
 
 void arch_init_registers(int pid)
 {
+#ifdef PTRACE_GETREGSET
 	struct _xstate fp_regs;
 	struct iovec iov;
 
@@ -129,6 +132,7 @@ void arch_init_registers(int pid)
 	iov.iov_len = sizeof(struct _xstate);
 	if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0)
 		have_xstate_support = 1;
+#endif
 }
 #endif
 
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index cb3c22370cf58..8af0fb5d27807 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -50,7 +50,11 @@ void foo(void)
 	DEFINE(HOST_GS, GS);
 	DEFINE(HOST_ORIG_AX, ORIG_EAX);
 #else
+#if defined(PTRACE_GETREGSET) && defined(PTRACE_SETREGSET)
 	DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long));
+#else
+	DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
+#endif
 	DEFINE_LONGS(HOST_BX, RBX);
 	DEFINE_LONGS(HOST_CX, RCX);
 	DEFINE_LONGS(HOST_DI, RDI);
-- 
GitLab


From 1bcbfbfdeb0091036db7a32e1cd31b49cce5983a Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 24 May 2017 15:42:15 -0600
Subject: [PATCH 0723/1958] um: add dummy ioremap and iounmap functions

The user mode architecture does not provide ioremap or iounmap, and
because of this, the arch won't build when the functions are used in some
core libraries.

I have designs to use these functions in scatterlist.c where they'd
almost certainly never be called on the um architecture but it does need
to compile.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Stephen Bates <sbates@raithlin.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/asm/io.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 arch/um/include/asm/io.h

diff --git a/arch/um/include/asm/io.h b/arch/um/include/asm/io.h
new file mode 100644
index 0000000000000..8f35d574f35bc
--- /dev/null
+++ b/arch/um/include/asm/io.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_UM_IO_H
+#define _ASM_UM_IO_H
+
+#define ioremap ioremap
+static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
+{
+	return (void __iomem *)(unsigned long)offset;
+}
+
+#define iounmap iounmap
+static inline void iounmap(void __iomem *addr)
+{
+}
+
+#include <asm-generic/io.h>
+
+#endif
-- 
GitLab


From 4afb9996a254a8ec33801f4b33992d45670164b2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 13 Jun 2017 16:47:55 -0700
Subject: [PATCH 0724/1958] ubifs: require key for truncate(2) of encrypted
 file

Currently, filesystems allow truncate(2) on an encrypted file without
the encryption key.  However, it's impossible to correctly handle the
case where the size being truncated to is not a multiple of the
filesystem block size, because that would require decrypting the final
block, zeroing the part beyond i_size, then encrypting the block.

As other modifications to encrypted file contents are prohibited without
the key, just prohibit truncate(2) as well, making it fail with ENOKEY.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/file.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2cda3d67e2d01..ee3ff4c6bf4a5 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1284,6 +1284,14 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		return err;
 
+	if (ubifs_crypt_is_encrypted(inode) && (attr->ia_valid & ATTR_SIZE)) {
+		err = fscrypt_get_encryption_info(inode);
+		if (err)
+			return err;
+		if (!fscrypt_has_encryption_key(inode))
+			return -ENOKEY;
+	}
+
 	if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size)
 		/* Truncation to a smaller size */
 		err = do_truncation(c, inode, attr);
-- 
GitLab


From af65936a7a5bb8ec1d05b2a7670f84ca2014628d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 May 2017 17:39:44 -0700
Subject: [PATCH 0725/1958] ubifs: don't bother checking for encryption key in
 ->mmap()

Since only an open file can be mmap'ed, and we only allow open()ing an
encrypted file when its key is available, there is no need to check for
the key again before permitting each mmap().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/file.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index ee3ff4c6bf4a5..c1d352842ee2e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1615,15 +1615,6 @@ static const struct vm_operations_struct ubifs_file_vm_ops = {
 static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int err;
-	struct inode *inode = file->f_mapping->host;
-
-	if (ubifs_crypt_is_encrypted(inode)) {
-		err = fscrypt_get_encryption_info(inode);
-		if (err)
-			return -EACCES;
-		if (!fscrypt_has_encryption_key(inode))
-			return -ENOKEY;
-	}
 
 	err = generic_file_mmap(file, vma);
 	if (err)
-- 
GitLab


From da55b1ad4b29b6ab44d5dc8ea8306260246d2699 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Mon, 3 Jul 2017 07:59:26 -0500
Subject: [PATCH 0726/1958] PM / devfreq: rk3399_dmc: fix error return code in
 rk3399_dmcfreq_probe()

platform_get_irq() returns an error code, but the rk3399_dmc
driver ignores it and always returns -EINVAL. This is not correct,
and prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/rk3399_dmc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index 40a2499730fcb..1b89ebbad02c0 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -336,8 +336,9 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(&pdev->dev, "Cannot get the dmc interrupt resource\n");
-		return -EINVAL;
+		dev_err(&pdev->dev,
+			"Cannot get the dmc interrupt resource: %d\n", irq);
+		return irq;
 	}
 	data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL);
 	if (!data)
-- 
GitLab


From 9e578b37505018622dfafc40eed7cd78ff2af221 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Mon, 3 Jul 2017 07:47:38 -0500
Subject: [PATCH 0727/1958] PM / devfreq: tegra: fix error return code in
 tegra_devfreq_probe()

platform_get_irq() returns an error code, but the tegra-devfreq
driver ignores it and always returns -ENODEV. This is not correct,
and prevents -EPROBE_DEFER from being propagated properly.

Notice that platform_get_irq() no longer returns 0 on error:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af

Print and propagate the return value of platform_get_irq on failure.

Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/tegra-devfreq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
index 214fff96fa4a6..ae712159246fb 100644
--- a/drivers/devfreq/tegra-devfreq.c
+++ b/drivers/devfreq/tegra-devfreq.c
@@ -688,9 +688,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0) {
-		dev_err(&pdev->dev, "Failed to get IRQ\n");
-		return -ENODEV;
+	if (irq < 0) {
+		dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq);
+		return irq;
 	}
 
 	platform_set_drvdata(pdev, tegra);
-- 
GitLab


From 37d644aa01c092686af8a96f34df93c74e55b480 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 3 Jul 2017 15:40:04 +0530
Subject: [PATCH 0728/1958] PM / devfreq: constify attribute_group structures.

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work with const
attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
    621	    176	      0	    797	    31d	drivers/devfreq/governor_userspace.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   670	    144	      0	    814	    32e	drivers/devfreq/governor_userspace.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/governor_userspace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 176976068bcd1..77028c27593c6 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -86,7 +86,7 @@ static struct attribute *dev_entries[] = {
 	&dev_attr_set_freq.attr,
 	NULL,
 };
-static struct attribute_group dev_attr_group = {
+static const struct attribute_group dev_attr_group = {
 	.name	= "userspace",
 	.attrs	= dev_entries,
 };
-- 
GitLab


From df8f4c6c0275216498340a64c7d2674377bd3e78 Mon Sep 17 00:00:00 2001
From: Ulrich Hecht <ulrich.hecht+renesas@gmail.com>
Date: Thu, 27 Apr 2017 16:37:43 +0200
Subject: [PATCH 0729/1958] dt-bindings: pwm: Add R-Car M3-W device tree
 bindings

Add device tree bindings for the PWM controller found on R-Car M3-W SoCs.

Signed-off-by: Ulrich Hecht <ulrich.hecht+renesas@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
index d6de643350223..7e94b802395d6 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
@@ -8,6 +8,7 @@ Required Properties:
  - "renesas,pwm-r8a7791": for R-Car M2-W
  - "renesas,pwm-r8a7794": for R-Car E2
  - "renesas,pwm-r8a7795": for R-Car H3
+ - "renesas,pwm-r8a7796": for R-Car M3-W
 - reg: base address and length of the registers block for the PWM.
 - #pwm-cells: should be 2. See pwm.txt in this directory for a description of
   the cells format.
-- 
GitLab


From d7f673d8a0776f3f791fd795b409060ba808b62a Mon Sep 17 00:00:00 2001
From: Fabrice GASNIER <fabrice.gasnier@st.com>
Date: Wed, 14 Jun 2017 17:13:16 +0200
Subject: [PATCH 0730/1958] dt-bindings: pwm: Update STM32 timers clock names

Clock name has been updated during driver/DT binding review:
https://lkml.org/lkml/2016/12/13/718
Update DT binding doc to reflect this.

Fixes: cd9a99c2f8e8 (dt-bindings: pwm: Add STM32 bindings)
Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/pwm-stm32.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
index 6dd040363e5ea..3e6d55018d7ac 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
@@ -24,7 +24,7 @@ Example:
 		compatible = "st,stm32-timers";
 		reg = <0x40010000 0x400>;
 		clocks = <&rcc 0 160>;
-		clock-names = "clk_int";
+		clock-names = "int";
 
 		pwm {
 			compatible = "st,stm32-pwm";
-- 
GitLab


From 0e1921dcd825004551bd4dd2df99697641434a52 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 16 May 2017 11:40:09 +0200
Subject: [PATCH 0731/1958] pwm: bfin: Remove unneeded error message

Omit an extra message for a memory allocation failure in this function.

This issue was detected by using the Coccinelle software.

Link: http://events.linuxfoundation.org/sites/events/files/slides/LCJ16-Refactor_Strings-WSang_0.pdf
Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-bfin.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/pwm/pwm-bfin.c b/drivers/pwm/pwm-bfin.c
index d2ed0a2a18e88..a9a88137f2cb4 100644
--- a/drivers/pwm/pwm-bfin.c
+++ b/drivers/pwm/pwm-bfin.c
@@ -118,10 +118,8 @@ static int bfin_pwm_probe(struct platform_device *pdev)
 	int ret;
 
 	pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
-	if (!pwm) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (!pwm)
 		return -ENOMEM;
-	}
 
 	platform_set_drvdata(pdev, pwm);
 
-- 
GitLab


From c571123c8a94cfbc88e70be4e8883529181417ce Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sat, 17 Jun 2017 12:26:44 -0300
Subject: [PATCH 0732/1958] pwm: Standardize document format

Each text file under Documentation follows a different format. Some
don't even have titles!

Change its representation to follow the adopted standard, using ReST
markup for it to be parseable by Sphinx:

- mark document title;
- mark literal blocks;
- better format the parameters.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/pwm.txt | 46 ++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
index 789b27c6ec996..8fbf0aa3ba2d5 100644
--- a/Documentation/pwm.txt
+++ b/Documentation/pwm.txt
@@ -1,4 +1,6 @@
+======================================
 Pulse Width Modulation (PWM) interface
+======================================
 
 This provides an overview about the Linux PWM interface
 
@@ -16,7 +18,7 @@ Users of the legacy PWM API use unique IDs to refer to PWM devices.
 
 Instead of referring to a PWM device via its unique ID, board setup code
 should instead register a static mapping that can be used to match PWM
-consumers to providers, as given in the following example:
+consumers to providers, as given in the following example::
 
 	static struct pwm_lookup board_pwm_lookup[] = {
 		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL,
@@ -40,9 +42,9 @@ New users should use the pwm_get() function and pass to it the consumer
 device or a consumer name. pwm_put() is used to free the PWM device. Managed
 variants of these functions, devm_pwm_get() and devm_pwm_put(), also exist.
 
-After being requested, a PWM has to be configured using:
+After being requested, a PWM has to be configured using::
 
-int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
+	int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
 
 This API controls both the PWM period/duty_cycle config and the
 enable/disable state.
@@ -72,11 +74,14 @@ interface is provided to use the PWMs from userspace. It is exposed at
 pwmchipN, where N is the base of the PWM chip. Inside the directory you
 will find:
 
-npwm - The number of PWM channels this chip supports (read-only).
+  npwm
+    The number of PWM channels this chip supports (read-only).
 
-export - Exports a PWM channel for use with sysfs (write-only).
+  export
+    Exports a PWM channel for use with sysfs (write-only).
 
-unexport - Unexports a PWM channel from sysfs (write-only).
+  unexport
+   Unexports a PWM channel from sysfs (write-only).
 
 The PWM channels are numbered using a per-chip index from 0 to npwm-1.
 
@@ -84,21 +89,26 @@ When a PWM channel is exported a pwmX directory will be created in the
 pwmchipN directory it is associated with, where X is the number of the
 channel that was exported. The following properties will then be available:
 
-period - The total period of the PWM signal (read/write).
-	Value is in nanoseconds and is the sum of the active and inactive
-	time of the PWM.
+  period
+    The total period of the PWM signal (read/write).
+    Value is in nanoseconds and is the sum of the active and inactive
+    time of the PWM.
 
-duty_cycle - The active time of the PWM signal (read/write).
-	Value is in nanoseconds and must be less than the period.
+  duty_cycle
+    The active time of the PWM signal (read/write).
+    Value is in nanoseconds and must be less than the period.
 
-polarity - Changes the polarity of the PWM signal (read/write).
-	Writes to this property only work if the PWM chip supports changing
-	the polarity. The polarity can only be changed if the PWM is not
-	enabled. Value is the string "normal" or "inversed".
+  polarity
+    Changes the polarity of the PWM signal (read/write).
+    Writes to this property only work if the PWM chip supports changing
+    the polarity. The polarity can only be changed if the PWM is not
+    enabled. Value is the string "normal" or "inversed".
 
-enable - Enable/disable the PWM signal (read/write).
-	0 - disabled
-	1 - enabled
+  enable
+    Enable/disable the PWM signal (read/write).
+
+	- 0 - disabled
+	- 1 - enabled
 
 Implementing a PWM driver
 -------------------------
-- 
GitLab


From 93c292ef3cbb46db78853763fc4d9a5b9dfb97d5 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 23 May 2017 18:05:03 +0200
Subject: [PATCH 0733/1958] pwm: Silently error out on EPROBE_DEFER
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In of_pwm_get(), if we fail to get the PWM chip due to probe deferal, we
shouldn't print an error message. Just be silent in this case.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index a0860b30bd931..1581f6ab1b1f4 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -678,7 +678,9 @@ struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id)
 
 	pc = of_node_to_pwmchip(args.np);
 	if (IS_ERR(pc)) {
-		pr_err("%s(): PWM chip not found\n", __func__);
+		if (PTR_ERR(pc) != -EPROBE_DEFER)
+			pr_err("%s(): PWM chip not found\n", __func__);
+
 		pwm = ERR_CAST(pc);
 		goto put;
 	}
-- 
GitLab


From c034a6fda0179a85cd571c386ed6748188b0a79c Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 13 Jun 2017 15:26:41 +0530
Subject: [PATCH 0734/1958] pwm: hibvt: Constify hibvt_pwm_ops

File size before:
   text	   data	    bss	    dec	    hex	filename
   1510	    296	      0	   1806	    70e	drivers/pwm/pwm-hibvt.o
File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   1606	    192	      0	   1798	    706	drivers/pwm/pwm-hibvt.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-hibvt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
index d0e8f8542626a..8dadc58d6cdfe 100644
--- a/drivers/pwm/pwm-hibvt.c
+++ b/drivers/pwm/pwm-hibvt.c
@@ -165,7 +165,7 @@ static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	return 0;
 }
 
-static struct pwm_ops hibvt_pwm_ops = {
+static const struct pwm_ops hibvt_pwm_ops = {
 	.get_state = hibvt_pwm_get_state,
 	.apply = hibvt_pwm_apply,
 
-- 
GitLab


From 5e599d73c1c1816af07f94ddba879499aa39b43c Mon Sep 17 00:00:00 2001
From: Marta Rybczynska <mrybczyn@kalray.eu>
Date: Tue, 6 Jun 2017 13:27:21 +0200
Subject: [PATCH 0735/1958] nvme-rdma: remove race conditions from IB
 signalling

This patch improves the way the RDMA IB signalling is done by using atomic
operations for the signalling variable. This avoids race conditions on
sig_count.

The signalling interval changes slightly and is now the largest power of
two not larger than queue depth / 2.

ilog() usage idea by Bart Van Assche.

Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
---
 drivers/nvme/host/rdma.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index cfb22531fc161..0ab163e6e640e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -86,7 +86,7 @@ enum nvme_rdma_queue_flags {
 
 struct nvme_rdma_queue {
 	struct nvme_rdma_qe	*rsp_ring;
-	u8			sig_count;
+	atomic_t		sig_count;
 	int			queue_size;
 	size_t			cmnd_capsule_len;
 	struct nvme_rdma_ctrl	*ctrl;
@@ -523,6 +523,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 		queue->cmnd_capsule_len = sizeof(struct nvme_command);
 
 	queue->queue_size = queue_size;
+	atomic_set(&queue->sig_count, 0);
 
 	queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
 			RDMA_PS_TCP, IB_QPT_RC);
@@ -1009,17 +1010,16 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		nvme_rdma_wr_error(cq, wc, "SEND");
 }
 
-static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+/*
+ * We want to signal completion at least every queue depth/2.  This returns the
+ * largest power of two that is not above half of (queue size + 1) to optimize
+ * (avoid divisions).
+ */
+static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
 {
-	int sig_limit;
+	int limit = 1 << ilog2((queue->queue_size + 1) / 2);
 
-	/*
-	 * We signal completion every queue depth/2 and also handle the
-	 * degenerated case of a  device with queue_depth=1, where we
-	 * would need to signal every message.
-	 */
-	sig_limit = max(queue->queue_size / 2, 1);
-	return (++queue->sig_count % sig_limit) == 0;
+	return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0;
 }
 
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
-- 
GitLab


From fb051339727cd8134dd6ef0305a120e6e265dded Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 15:33:32 +0300
Subject: [PATCH 0736/1958] nvme-rdma: quiesce/unquiesce admin_q instead of
 start/stop its hw queues

unlike blk_mq_stop_hw_queues and blk_mq_start_stopped_hw_queues
quiescing/unquiescing respects the submission path rcu grace.
Also make sure to kick the requeue list when appropriate.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0ab163e6e640e..6f8f9ffcbc005 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -779,7 +779,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
 	if (ctrl->ctrl.queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 
 	/* We must take care of fastfail/requeue all our inflight requests */
 	if (ctrl->ctrl.queue_count > 1)
@@ -792,7 +792,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	 * queues are not a live anymore, so restart the queues to fail fast
 	 * new IO
 	 */
-	blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_start_queues(&ctrl->ctrl);
 
 	nvme_rdma_reconnect_or_remove(ctrl);
@@ -1636,9 +1636,10 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 	if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_destroy_admin_queue(ctrl);
 }
 
-- 
GitLab


From f9c5af5f8ff14a31468546b9b1a876d537019e9a Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 15:39:34 +0300
Subject: [PATCH 0737/1958] nvme-fc: quiesce/unquiesce admin_q instead of
 start/stop its hw queues

unlike blk_mq_stop_hw_queues and blk_mq_start_stopped_hw_queues
quiescing/unquiescing respects the submission path rcu grace.

Also, make sure to unquiesce before cleanup the admin queue.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-By: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 50cc3b2b0e118..8d55e78279322 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1703,6 +1703,7 @@ nvme_fc_ctrl_free(struct kref *ref)
 	list_del(&ctrl->ctrl_list);
 	spin_unlock_irqrestore(&ctrl->rport->lock, flags);
 
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	blk_cleanup_queue(ctrl->ctrl.admin_q);
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 
@@ -2321,7 +2322,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		goto out_delete_hw_queue;
 
 	if (ctrl->ctrl.state != NVME_CTRL_NEW)
-		blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
 	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
 	if (ret)
@@ -2475,7 +2476,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 * use blk_mq_tagset_busy_itr() and the transport routine to
 	 * terminate the exchanges.
 	 */
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 
-- 
GitLab


From c1c0ffff3a99caf40c3bd21447135db459194123 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 15:40:17 +0300
Subject: [PATCH 0738/1958] nvme-loop: quiesce/unquiesce admin_q instead of
 start/stop its hw queues

unlike blk_mq_stop_hw_queues and blk_mq_start_stopped_hw_queues
quiescing/unquiescing respects the submission path rcu grace.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 3d51341e62ee5..6a0b70685e774 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -434,9 +434,10 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_loop_destroy_admin_queue(ctrl);
 }
 
-- 
GitLab


From c81545f991a6612d3bdab18a71b3487023ec6b69 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 15:53:27 +0300
Subject: [PATCH 0739/1958] nvme-pci: quiesce/unquiesce admin_q instead of
 start/stop its hw queues

unlike blk_mq_stop_hw_queues and blk_mq_start_stopped_hw_queues
quiescing/unquiescing respects the submission path rcu grace.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index eb729ff70e7d1..d9c0010a9bbcb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1125,7 +1125,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 	spin_unlock_irq(&nvmeq->q_lock);
 
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
-		blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
 
 	pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
 
@@ -1315,7 +1315,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
 		 * user requests may be waiting on a stopped queue. Start the
 		 * queue to flush these to completion.
 		 */
-		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -1352,7 +1352,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 			return -ENODEV;
 		}
 	} else
-		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
 
 	return 0;
 }
-- 
GitLab


From 8d7b8fafad87c3404f72ce2d36c79c48be1129a6 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 18:16:58 +0300
Subject: [PATCH 0740/1958] nvme: kick requeue list when requeueing a request
 instead of when starting the queues

When we requeue a request, we can always insert the request
back to the scheduler instead of doing it when restarting
the queues and kicking the requeue work, so get rid of
the requeue kick in nvme (core and drivers).

Also, now there is no need start hw queues in nvme_kill_queues
We don't stop the hw queues anymore, so no need to
start them.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d70df1d0072d4..48cafaa6fbc50 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -131,7 +131,7 @@ void nvme_complete_rq(struct request *req)
 {
 	if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
 		nvme_req(req)->retries++;
-		blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+		blk_mq_requeue_request(req, true);
 		return;
 	}
 
@@ -2694,9 +2694,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 	/* Forcibly unquiesce queues to avoid blocking dispatch */
 	blk_mq_unquiesce_queue(ctrl->admin_q);
 
-	/* Forcibly start all queues to avoid having stuck requests */
-	blk_mq_start_hw_queues(ctrl->admin_q);
-
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*
 		 * Revalidating a dead namespace sets capacity to 0. This will
@@ -2709,16 +2706,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 
 		/* Forcibly unquiesce queues to avoid blocking dispatch */
 		blk_mq_unquiesce_queue(ns->queue);
-
-		/*
-		 * Forcibly start all queues to avoid having stuck requests.
-		 * Note that we must ensure the queues are not stopped
-		 * when the final removal happens.
-		 */
-		blk_mq_start_hw_queues(ns->queue);
-
-		/* draining requests in requeue list */
-		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
@@ -2787,10 +2774,8 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns;
 
 	mutex_lock(&ctrl->namespaces_mutex);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	list_for_each_entry(ns, &ctrl->namespaces, list)
 		blk_mq_unquiesce_queue(ns->queue);
-		blk_mq_kick_requeue_list(ns->queue);
-	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
-- 
GitLab


From b52c2e92546ee794a5bbab4d8ea435c1de85a8cb Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 09:57:09 +0300
Subject: [PATCH 0741/1958] nbd: quiesce request queues to make sure no
 submissions are inflight

Unlike blk_mq_stop_hw_queues, blk_mq_quiesce_queue respects the
submission path rcu grace. quiesce the queue before iterating
on live tags.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/block/nbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 977ec960dd2f9..dea7d85134ee6 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -661,9 +661,9 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 
 static void nbd_clear_que(struct nbd_device *nbd)
 {
-	blk_mq_stop_hw_queues(nbd->disk->queue);
+	blk_mq_quiesce_queue(nbd->disk->queue);
 	blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
-	blk_mq_start_hw_queues(nbd->disk->queue);
+	blk_mq_unquiesce_queue(nbd->disk->queue);
 	dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
 }
 
-- 
GitLab


From 436c15ab6596b12cfee7618ecaa69a4341ac3c51 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 10:00:41 +0300
Subject: [PATCH 0742/1958] mtip32xx: quiesce request queues to make sure no
 submissions are inflight

Unlike blk_mq_stop_hw_queues, blk_mq_quiesce_queue respects the
submission path rcu grace. quiesce the queue before iterating
on live tags, or performing device io quiescing.

While were at it, verify that the request started in mtip_abort_cmd
amd mtip_queue_cmd tag iteration calls.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/block/mtip32xx/mtip32xx.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 61b046f256ca7..87717a1a5c89d 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -950,7 +950,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 	unsigned long to;
 	bool active = true;
 
-	blk_mq_stop_hw_queues(port->dd->queue);
+	blk_mq_quiesce_queue(port->dd->queue);
 
 	to = jiffies + msecs_to_jiffies(timeout);
 	do {
@@ -970,10 +970,10 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 			break;
 	} while (time_before(jiffies, to));
 
-	blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+	blk_mq_unquiesce_queue(port->dd->queue);
 	return active ? -EBUSY : 0;
 err_fault:
-	blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+	blk_mq_unquiesce_queue(port->dd->queue);
 	return -EFAULT;
 }
 
@@ -2737,6 +2737,9 @@ static void mtip_abort_cmd(struct request *req, void *data,
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct driver_data *dd = data;
 
+	if (!blk_mq_request_started(req))
+		return;
+
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2749,6 +2752,9 @@ static void mtip_queue_cmd(struct request *req, void *data,
 {
 	struct driver_data *dd = data;
 
+	if (!blk_mq_request_started(req))
+		return;
+
 	set_bit(req->tag, dd->port->cmds_to_issue);
 	blk_abort_request(req);
 }
@@ -2814,6 +2820,8 @@ static int mtip_service_thread(void *data)
 				dev_warn(&dd->pdev->dev,
 					"Completion workers still active!");
 
+			blk_mq_quiesce_queue(dd->queue);
+
 			spin_lock(dd->queue->queue_lock);
 			blk_mq_tagset_busy_iter(&dd->tags,
 							mtip_queue_cmd, dd);
@@ -2826,6 +2834,8 @@ static int mtip_service_thread(void *data)
 							mtip_abort_cmd, dd);
 
 			clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
+
+			blk_mq_unquiesce_queue(dd->queue);
 		}
 
 		if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
@@ -3995,8 +4005,9 @@ static int mtip_block_remove(struct driver_data *dd)
 						dd->disk->disk_name);
 
 	blk_freeze_queue_start(dd->queue);
-	blk_mq_stop_hw_queues(dd->queue);
+	blk_mq_quiesce_queue(dd->queue);
 	blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
+	blk_mq_unquiesce_queue(dd->queue);
 
 	/*
 	 * Delete our gendisk structure. This also removes the device
-- 
GitLab


From 9b3e99058453399b506087b3ac99d67a80343333 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 4 Jul 2017 10:03:03 +0300
Subject: [PATCH 0743/1958] virtio_blk: quiesce/unquiesce live IO when entering
 PM states

Without it its not guaranteed that no .queue_rq is inflight.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: virtio-dev@lists.oasis-open.org
Cc: Jason Wang <jasowang@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/block/virtio_blk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0297ad7c1452a..4e02aa5fdac05 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -840,7 +840,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vblk->config_work);
 
-	blk_mq_stop_hw_queues(vblk->disk->queue);
+	blk_mq_quiesce_queue(vblk->disk->queue);
 
 	vdev->config->del_vqs(vdev);
 	return 0;
@@ -857,7 +857,7 @@ static int virtblk_restore(struct virtio_device *vdev)
 
 	virtio_device_ready(vdev);
 
-	blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+	blk_mq_unquiesce_queue(vblk->disk->queue);
 	return 0;
 }
 #endif
-- 
GitLab


From d09f2b45f346f0a9e5e1b5fcea531b1b393671dc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 2 Jul 2017 10:56:43 +0300
Subject: [PATCH 0744/1958] nvme: split nvme_uninit_ctrl into stop and uninit

Usually before we teardown the controller we want to:
1. complete/cancel any ctrl inflight works
2. remove ctrl namespaces (only for removal though, resets
   shouldn't remove any namespaces).

but we do not want to destroy the controller device as
we might use it for logging during the teardown stage.

This patch adds nvme_start_ctrl() which queues inflight
controller works (aen, ns scan, queue start and keep-alive
if kato is set) and nvme_stop_ctrl() which cancels the works
namespace removal is left to the callers to handle.

Move nvme_uninit_ctrl after we are done with the
controller device.

Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c   | 21 +++++++++++++++++++--
 drivers/nvme/host/fc.c     | 16 ++++------------
 drivers/nvme/host/nvme.h   |  2 ++
 drivers/nvme/host/pci.c    | 16 ++++------------
 drivers/nvme/host/rdma.c   | 29 ++++++++---------------------
 drivers/nvme/target/loop.c | 19 ++++++-------------
 6 files changed, 43 insertions(+), 60 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 48cafaa6fbc50..cb96f4a7ae3a9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2591,12 +2591,29 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
 	spin_unlock(&dev_list_lock);
 }
 
-void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
+	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
 	flush_work(&ctrl->scan_work);
-	nvme_remove_namespaces(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
+
+void nvme_start_ctrl(struct nvme_ctrl *ctrl)
+{
+	if (ctrl->kato)
+		nvme_start_keep_alive(ctrl);
+
+	if (ctrl->queue_count > 1) {
+		nvme_queue_scan(ctrl);
+		nvme_queue_async_events(ctrl);
+		nvme_start_queues(ctrl);
+	}
+}
+EXPORT_SYMBOL_GPL(nvme_start_ctrl);
 
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+{
 	device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
 
 	spin_lock(&dev_list_lock);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 8d55e78279322..d666ada39a9be 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2232,7 +2232,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 out_delete_hw_queues:
 	nvme_fc_delete_hw_io_queues(ctrl);
 out_cleanup_blk_queue:
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	blk_cleanup_queue(ctrl->ctrl.connect_q);
 out_free_tag_set:
 	blk_mq_free_tag_set(&ctrl->tag_set);
@@ -2366,8 +2365,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		goto out_disconnect_admin_queue;
 	}
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	/* FC-NVME supports normal SGL Data Block Descriptors */
 
 	if (opts->queue_size > ctrl->ctrl.maxcmd) {
@@ -2401,17 +2398,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->ctrl.queue_count > 1) {
-		nvme_start_queues(&ctrl->ctrl);
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return 0;	/* Success */
 
 out_term_aen_ops:
 	nvme_fc_term_aen_ops(ctrl);
-	nvme_stop_keep_alive(&ctrl->ctrl);
 out_disconnect_admin_queue:
 	/* send a Disconnect(association) LS to fc-nvme target */
 	nvme_fc_xmt_disconnect_assoc(ctrl);
@@ -2434,8 +2426,6 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 {
 	unsigned long flags;
 
-	nvme_stop_keep_alive(&ctrl->ctrl);
-
 	spin_lock_irqsave(&ctrl->lock, flags);
 	ctrl->flags |= FCCTRL_TERMIO;
 	ctrl->iocnt = 0;
@@ -2517,7 +2507,8 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
 	cancel_work_sync(&ctrl->ctrl.reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
-
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	/*
 	 * kill the association on the link side.  this will block
 	 * waiting for io to terminate
@@ -2612,6 +2603,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
 	int ret;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	/* will block will waiting for io to terminate */
 	nvme_fc_delete_association(ctrl);
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e0b83311d5deb..8f2a168ddc013 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -280,6 +280,8 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		const struct nvme_ctrl_ops *ops, unsigned long quirks);
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
+void nvme_start_ctrl(struct nvme_ctrl *ctrl);
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
 void nvme_put_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_identify(struct nvme_ctrl *ctrl);
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d9c0010a9bbcb..882ed36771176 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2134,15 +2134,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
-	/*
-	 * A controller that can not execute IO typically requires user
-	 * intervention to correct. For such degraded controllers, the driver
-	 * should not submit commands the user did not request, so skip
-	 * registering for asynchronous event notification on this condition.
-	 */
-	if (dev->online_queues > 1)
-		nvme_queue_async_events(&dev->ctrl);
-
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
 	 * any working I/O queue.
@@ -2163,8 +2154,7 @@ static void nvme_reset_work(struct work_struct *work)
 		goto out;
 	}
 
-	if (dev->online_queues > 1)
-		nvme_queue_scan(&dev->ctrl);
+	nvme_start_ctrl(&dev->ctrl);
 	return;
 
  out:
@@ -2341,11 +2331,13 @@ static void nvme_remove(struct pci_dev *pdev)
 	}
 
 	flush_work(&dev->ctrl.reset_work);
-	nvme_uninit_ctrl(&dev->ctrl);
+	nvme_stop_ctrl(&dev->ctrl);
+	nvme_remove_namespaces(&dev->ctrl);
 	nvme_dev_disable(dev, true);
 	nvme_free_host_mem(dev);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
+	nvme_uninit_ctrl(&dev->ctrl);
 	nvme_release_prp_pools(dev);
 	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6f8f9ffcbc005..ccdbd99d5a1c8 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -732,8 +732,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto requeue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	if (ctrl->ctrl.queue_count > 1) {
 		ret = nvme_rdma_init_io_queues(ctrl);
 		if (ret)
@@ -751,10 +749,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	WARN_ON_ONCE(!changed);
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->ctrl.queue_count > 1) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
 
@@ -772,7 +767,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 			struct nvme_rdma_ctrl, err_work);
 	int i;
 
-	nvme_stop_keep_alive(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
 
 	for (i = 0; i < ctrl->ctrl.queue_count; i++)
 		clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
@@ -1603,8 +1598,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	return 0;
 
 out_cleanup_queue:
@@ -1622,7 +1615,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 
 static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 {
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->reconnect_work);
 
@@ -1645,10 +1637,12 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 
 static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
-	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	if (shutdown)
 		nvme_rdma_shutdown_ctrl(ctrl);
 
+	nvme_uninit_ctrl(&ctrl->ctrl);
 	if (ctrl->ctrl.tagset) {
 		blk_cleanup_queue(ctrl->ctrl.connect_q);
 		blk_mq_free_tag_set(&ctrl->tag_set);
@@ -1710,6 +1704,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	int ret;
 	bool changed;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_rdma_shutdown_ctrl(ctrl);
 
 	ret = nvme_rdma_configure_admin_queue(ctrl);
@@ -1739,11 +1734,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	if (ctrl->ctrl.queue_count > 1) {
-		nvme_start_queues(&ctrl->ctrl);
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return;
 
@@ -1931,15 +1922,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	if (ctrl->ctrl.queue_count > 1) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return &ctrl->ctrl;
 
 out_remove_admin_queue:
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	nvme_rdma_destroy_admin_queue(ctrl);
 out_kfree_queues:
 	kfree(ctrl->queues);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 6a0b70685e774..717ed7ddb2f6d 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -407,8 +407,6 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	return 0;
 
 out_cleanup_queue:
@@ -422,8 +420,6 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 
 static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
-	nvme_stop_keep_alive(&ctrl->ctrl);
-
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
@@ -446,8 +442,10 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
 	struct nvme_loop_ctrl *ctrl = container_of(work,
 				struct nvme_loop_ctrl, delete_work);
 
-	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	nvme_loop_shutdown_ctrl(ctrl);
+	nvme_uninit_ctrl(&ctrl->ctrl);
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
@@ -495,6 +493,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	bool changed;
 	int ret;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_loop_shutdown_ctrl(ctrl);
 
 	ret = nvme_loop_configure_admin_queue(ctrl);
@@ -515,10 +514,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	nvme_queue_scan(&ctrl->ctrl);
-	nvme_queue_async_events(&ctrl->ctrl);
-
-	nvme_start_queues(&ctrl->ctrl);
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return;
 
@@ -653,10 +649,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
 	mutex_unlock(&nvme_loop_ctrl_mutex);
 
-	if (opts->nr_io_queues) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return &ctrl->ctrl;
 
-- 
GitLab


From 842594c8775b585c58459e044708c0335b6aa6b7 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 5 Jul 2017 09:56:13 +0300
Subject: [PATCH 0745/1958] nvme-rdma: unconditionally recycle the request mr

When our RDMA queue-pair is torn down with high load
of I/O traffic, we have no way of knowing if the
memory region was actually registered by the reg_mr
work request as it completion flushes with error (hw
might have done it or not).

So in order to not deal with all this uncertanty, we
simply recycle the MR in reinit_request.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index ccdbd99d5a1c8..da04df1af2317 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -272,9 +272,6 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	int ret = 0;
 
-	if (!req->mr->need_inval)
-		goto out;
-
 	ib_dereg_mr(req->mr);
 
 	req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
-- 
GitLab


From 93e0dfb2c52f9cb7a7899156475b4e7b0bee7de3 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 30 May 2017 21:32:07 +0200
Subject: [PATCH 0746/1958] pwm: sun4i: Improve hardware read out

Implement .get_state instead of only reading the polarity at probe time.
This allows to get the proper state, period and duty cycle.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-sun4i.c | 65 +++++++++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index 1284ffa05921f..175a69245a8aa 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -44,6 +44,10 @@
 
 #define PWM_DTY_MASK		GENMASK(15, 0)
 
+#define PWM_REG_PRD(reg)	((((reg) >> 16) & PWM_PRD_MASK) + 1)
+#define PWM_REG_DTY(reg)	((reg) & PWM_DTY_MASK)
+#define PWM_REG_PRESCAL(reg, chan)	(((reg) >> ((chan) * PWMCH_OFFSET)) & PWM_PRESCAL_MASK)
+
 #define BIT_CH(bit, chan)	((bit) << ((chan) * PWMCH_OFFSET))
 
 static const u32 prescaler_table[] = {
@@ -96,6 +100,46 @@ static inline void sun4i_pwm_writel(struct sun4i_pwm_chip *chip,
 	writel(val, chip->base + offset);
 }
 
+static void sun4i_pwm_get_state(struct pwm_chip *chip,
+				struct pwm_device *pwm,
+				struct pwm_state *state)
+{
+	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
+	u64 clk_rate, tmp;
+	u32 val;
+	unsigned int prescaler;
+
+	clk_rate = clk_get_rate(sun4i_pwm->clk);
+
+	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
+
+	if ((val == PWM_PRESCAL_MASK) && sun4i_pwm->data->has_prescaler_bypass)
+		prescaler = 1;
+	else
+		prescaler = prescaler_table[PWM_REG_PRESCAL(val, pwm->hwpwm)];
+
+	if (prescaler == 0)
+		return;
+
+	if (val & BIT_CH(PWM_ACT_STATE, pwm->hwpwm))
+		state->polarity = PWM_POLARITY_NORMAL;
+	else
+		state->polarity = PWM_POLARITY_INVERSED;
+
+	if (val & BIT_CH(PWM_CLK_GATING | PWM_EN, pwm->hwpwm))
+		state->enabled = true;
+	else
+		state->enabled = false;
+
+	val = sun4i_pwm_readl(sun4i_pwm, PWM_CH_PRD(pwm->hwpwm));
+
+	tmp = prescaler * NSEC_PER_SEC * PWM_REG_DTY(val);
+	state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate);
+
+	tmp = prescaler * NSEC_PER_SEC * PWM_REG_PRD(val);
+	state->period = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate);
+}
+
 static int sun4i_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			    int duty_ns, int period_ns)
 {
@@ -257,6 +301,7 @@ static const struct pwm_ops sun4i_pwm_ops = {
 	.set_polarity = sun4i_pwm_set_polarity,
 	.enable = sun4i_pwm_enable,
 	.disable = sun4i_pwm_disable,
+	.get_state = sun4i_pwm_get_state,
 	.owner = THIS_MODULE,
 };
 
@@ -316,8 +361,7 @@ static int sun4i_pwm_probe(struct platform_device *pdev)
 {
 	struct sun4i_pwm_chip *pwm;
 	struct resource *res;
-	u32 val;
-	int i, ret;
+	int ret;
 	const struct of_device_id *match;
 
 	match = of_match_device(sun4i_pwm_dt_ids, &pdev->dev);
@@ -353,24 +397,7 @@ static int sun4i_pwm_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, pwm);
 
-	ret = clk_prepare_enable(pwm->clk);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to enable PWM clock\n");
-		goto clk_error;
-	}
-
-	val = sun4i_pwm_readl(pwm, PWM_CTRL_REG);
-	for (i = 0; i < pwm->chip.npwm; i++)
-		if (!(val & BIT_CH(PWM_ACT_STATE, i)))
-			pwm_set_polarity(&pwm->chip.pwms[i],
-					 PWM_POLARITY_INVERSED);
-	clk_disable_unprepare(pwm->clk);
-
 	return 0;
-
-clk_error:
-	pwmchip_remove(&pwm->chip);
-	return ret;
 }
 
 static int sun4i_pwm_remove(struct platform_device *pdev)
-- 
GitLab


From c32c5c50d4fe156da1ecd32f2a16c1b8fcfba392 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 30 May 2017 21:32:08 +0200
Subject: [PATCH 0747/1958] pwm: sun4i: Switch to atomic PWM

Switch the driver to atomic PWM. This makes it easier to wait a proper
amount of time when changing the duty cycle before disabling the channel
(main use case is switching the duty cycle to 0 before disabling).

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-sun4i.c | 166 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)

diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index 175a69245a8aa..cd8737d0804f1 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -8,8 +8,10 @@
 
 #include <linux/bitops.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -81,6 +83,8 @@ struct sun4i_pwm_chip {
 	void __iomem *base;
 	spinlock_t ctrl_lock;
 	const struct sun4i_pwm_data *data;
+	unsigned long next_period[2];
+	bool needs_delay[2];
 };
 
 static inline struct sun4i_pwm_chip *to_sun4i_pwm_chip(struct pwm_chip *chip)
@@ -140,6 +144,167 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip,
 	state->period = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate);
 }
 
+static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm,
+			       struct pwm_state *state,
+			       u32 *dty, u32 *prd, unsigned int *prsclr)
+{
+	u64 clk_rate, div = 0;
+	unsigned int pval, prescaler = 0;
+
+	clk_rate = clk_get_rate(sun4i_pwm->clk);
+
+	if (sun4i_pwm->data->has_prescaler_bypass) {
+		/* First, test without any prescaler when available */
+		prescaler = PWM_PRESCAL_MASK;
+		pval = 1;
+		/*
+		 * When not using any prescaler, the clock period in nanoseconds
+		 * is not an integer so round it half up instead of
+		 * truncating to get less surprising values.
+		 */
+		div = clk_rate * state->period + NSEC_PER_SEC / 2;
+		do_div(div, NSEC_PER_SEC);
+		if (div - 1 > PWM_PRD_MASK)
+			prescaler = 0;
+	}
+
+	if (prescaler == 0) {
+		/* Go up from the first divider */
+		for (prescaler = 0; prescaler < PWM_PRESCAL_MASK; prescaler++) {
+			if (!prescaler_table[prescaler])
+				continue;
+			pval = prescaler_table[prescaler];
+			div = clk_rate;
+			do_div(div, pval);
+			div = div * state->period;
+			do_div(div, NSEC_PER_SEC);
+			if (div - 1 <= PWM_PRD_MASK)
+				break;
+		}
+
+		if (div - 1 > PWM_PRD_MASK)
+			return -EINVAL;
+	}
+
+	*prd = div;
+	div *= state->duty_cycle;
+	do_div(div, state->period);
+	*dty = div;
+	*prsclr = prescaler;
+
+	div = (u64)pval * NSEC_PER_SEC * *prd;
+	state->period = DIV_ROUND_CLOSEST_ULL(div, clk_rate);
+
+	div = (u64)pval * NSEC_PER_SEC * *dty;
+	state->duty_cycle = DIV_ROUND_CLOSEST_ULL(div, clk_rate);
+
+	return 0;
+}
+
+static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			   struct pwm_state *state)
+{
+	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
+	struct pwm_state cstate;
+	u32 ctrl;
+	int ret;
+	unsigned int delay_us;
+	unsigned long now;
+
+	pwm_get_state(pwm, &cstate);
+
+	if (!cstate.enabled) {
+		ret = clk_prepare_enable(sun4i_pwm->clk);
+		if (ret) {
+			dev_err(chip->dev, "failed to enable PWM clock\n");
+			return ret;
+		}
+	}
+
+	spin_lock(&sun4i_pwm->ctrl_lock);
+	ctrl = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
+
+	if ((cstate.period != state->period) ||
+	    (cstate.duty_cycle != state->duty_cycle)) {
+		u32 period, duty, val;
+		unsigned int prescaler;
+
+		ret = sun4i_pwm_calculate(sun4i_pwm, state,
+					  &duty, &period, &prescaler);
+		if (ret) {
+			dev_err(chip->dev, "period exceeds the maximum value\n");
+			spin_unlock(&sun4i_pwm->ctrl_lock);
+			if (!cstate.enabled)
+				clk_disable_unprepare(sun4i_pwm->clk);
+			return ret;
+		}
+
+		if (PWM_REG_PRESCAL(ctrl, pwm->hwpwm) != prescaler) {
+			/* Prescaler changed, the clock has to be gated */
+			ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
+			sun4i_pwm_writel(sun4i_pwm, ctrl, PWM_CTRL_REG);
+
+			ctrl &= ~BIT_CH(PWM_PRESCAL_MASK, pwm->hwpwm);
+			ctrl |= BIT_CH(prescaler, pwm->hwpwm);
+		}
+
+		val = (duty & PWM_DTY_MASK) | PWM_PRD(period);
+		sun4i_pwm_writel(sun4i_pwm, val, PWM_CH_PRD(pwm->hwpwm));
+		sun4i_pwm->next_period[pwm->hwpwm] = jiffies +
+			usecs_to_jiffies(cstate.period / 1000 + 1);
+		sun4i_pwm->needs_delay[pwm->hwpwm] = true;
+	}
+
+	if (state->polarity != PWM_POLARITY_NORMAL)
+		ctrl &= ~BIT_CH(PWM_ACT_STATE, pwm->hwpwm);
+	else
+		ctrl |= BIT_CH(PWM_ACT_STATE, pwm->hwpwm);
+
+	ctrl |= BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
+	if (state->enabled) {
+		ctrl |= BIT_CH(PWM_EN, pwm->hwpwm);
+	} else if (!sun4i_pwm->needs_delay[pwm->hwpwm]) {
+		ctrl &= ~BIT_CH(PWM_EN, pwm->hwpwm);
+		ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
+	}
+
+	sun4i_pwm_writel(sun4i_pwm, ctrl, PWM_CTRL_REG);
+
+	spin_unlock(&sun4i_pwm->ctrl_lock);
+
+	if (state->enabled)
+		return 0;
+
+	if (!sun4i_pwm->needs_delay[pwm->hwpwm]) {
+		clk_disable_unprepare(sun4i_pwm->clk);
+		return 0;
+	}
+
+	/* We need a full period to elapse before disabling the channel. */
+	now = jiffies;
+	if (sun4i_pwm->needs_delay[pwm->hwpwm] &&
+	    time_before(now, sun4i_pwm->next_period[pwm->hwpwm])) {
+		delay_us = jiffies_to_usecs(sun4i_pwm->next_period[pwm->hwpwm] -
+					   now);
+		if ((delay_us / 500) > MAX_UDELAY_MS)
+			msleep(delay_us / 1000 + 1);
+		else
+			usleep_range(delay_us, delay_us * 2);
+	}
+	sun4i_pwm->needs_delay[pwm->hwpwm] = false;
+
+	spin_lock(&sun4i_pwm->ctrl_lock);
+	ctrl = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
+	ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
+	ctrl &= ~BIT_CH(PWM_EN, pwm->hwpwm);
+	sun4i_pwm_writel(sun4i_pwm, ctrl, PWM_CTRL_REG);
+	spin_unlock(&sun4i_pwm->ctrl_lock);
+
+	clk_disable_unprepare(sun4i_pwm->clk);
+
+	return 0;
+}
+
 static int sun4i_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			    int duty_ns, int period_ns)
 {
@@ -301,6 +466,7 @@ static const struct pwm_ops sun4i_pwm_ops = {
 	.set_polarity = sun4i_pwm_set_polarity,
 	.enable = sun4i_pwm_enable,
 	.disable = sun4i_pwm_disable,
+	.apply = sun4i_pwm_apply,
 	.get_state = sun4i_pwm_get_state,
 	.owner = THIS_MODULE,
 };
-- 
GitLab


From a054c4d68408cdbb260ba9384415fb53edb29d7a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 30 May 2017 21:32:09 +0200
Subject: [PATCH 0748/1958] pwm: sun4i: Drop legacy callbacks

Remove the legacy callbacks .enable(), .disable(), .set_polarity() and
.config().

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-sun4i.c | 160 ----------------------------------------
 1 file changed, 160 deletions(-)

diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index cd8737d0804f1..6d23f1d1c9b73 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -305,167 +305,7 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	return 0;
 }
 
-static int sun4i_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			    int duty_ns, int period_ns)
-{
-	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
-	u32 prd, dty, val, clk_gate;
-	u64 clk_rate, div = 0;
-	unsigned int prescaler = 0;
-	int err;
-
-	clk_rate = clk_get_rate(sun4i_pwm->clk);
-
-	if (sun4i_pwm->data->has_prescaler_bypass) {
-		/* First, test without any prescaler when available */
-		prescaler = PWM_PRESCAL_MASK;
-		/*
-		 * When not using any prescaler, the clock period in nanoseconds
-		 * is not an integer so round it half up instead of
-		 * truncating to get less surprising values.
-		 */
-		div = clk_rate * period_ns + NSEC_PER_SEC / 2;
-		do_div(div, NSEC_PER_SEC);
-		if (div - 1 > PWM_PRD_MASK)
-			prescaler = 0;
-	}
-
-	if (prescaler == 0) {
-		/* Go up from the first divider */
-		for (prescaler = 0; prescaler < PWM_PRESCAL_MASK; prescaler++) {
-			if (!prescaler_table[prescaler])
-				continue;
-			div = clk_rate;
-			do_div(div, prescaler_table[prescaler]);
-			div = div * period_ns;
-			do_div(div, NSEC_PER_SEC);
-			if (div - 1 <= PWM_PRD_MASK)
-				break;
-		}
-
-		if (div - 1 > PWM_PRD_MASK) {
-			dev_err(chip->dev, "period exceeds the maximum value\n");
-			return -EINVAL;
-		}
-	}
-
-	prd = div;
-	div *= duty_ns;
-	do_div(div, period_ns);
-	dty = div;
-
-	err = clk_prepare_enable(sun4i_pwm->clk);
-	if (err) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return err;
-	}
-
-	spin_lock(&sun4i_pwm->ctrl_lock);
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-
-	if (sun4i_pwm->data->has_rdy && (val & PWM_RDY(pwm->hwpwm))) {
-		spin_unlock(&sun4i_pwm->ctrl_lock);
-		clk_disable_unprepare(sun4i_pwm->clk);
-		return -EBUSY;
-	}
-
-	clk_gate = val & BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-	if (clk_gate) {
-		val &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-		sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-	}
-
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-	val &= ~BIT_CH(PWM_PRESCAL_MASK, pwm->hwpwm);
-	val |= BIT_CH(prescaler, pwm->hwpwm);
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-
-	val = (dty & PWM_DTY_MASK) | PWM_PRD(prd);
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CH_PRD(pwm->hwpwm));
-
-	if (clk_gate) {
-		val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-		val |= clk_gate;
-		sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-	}
-
-	spin_unlock(&sun4i_pwm->ctrl_lock);
-	clk_disable_unprepare(sun4i_pwm->clk);
-
-	return 0;
-}
-
-static int sun4i_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
-				  enum pwm_polarity polarity)
-{
-	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
-	u32 val;
-	int ret;
-
-	ret = clk_prepare_enable(sun4i_pwm->clk);
-	if (ret) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return ret;
-	}
-
-	spin_lock(&sun4i_pwm->ctrl_lock);
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-
-	if (polarity != PWM_POLARITY_NORMAL)
-		val &= ~BIT_CH(PWM_ACT_STATE, pwm->hwpwm);
-	else
-		val |= BIT_CH(PWM_ACT_STATE, pwm->hwpwm);
-
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-
-	spin_unlock(&sun4i_pwm->ctrl_lock);
-	clk_disable_unprepare(sun4i_pwm->clk);
-
-	return 0;
-}
-
-static int sun4i_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
-	u32 val;
-	int ret;
-
-	ret = clk_prepare_enable(sun4i_pwm->clk);
-	if (ret) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return ret;
-	}
-
-	spin_lock(&sun4i_pwm->ctrl_lock);
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-	val |= BIT_CH(PWM_EN, pwm->hwpwm);
-	val |= BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-	spin_unlock(&sun4i_pwm->ctrl_lock);
-
-	return 0;
-}
-
-static void sun4i_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
-	u32 val;
-
-	spin_lock(&sun4i_pwm->ctrl_lock);
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-	val &= ~BIT_CH(PWM_EN, pwm->hwpwm);
-	val &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-	spin_unlock(&sun4i_pwm->ctrl_lock);
-
-	clk_disable_unprepare(sun4i_pwm->clk);
-}
-
 static const struct pwm_ops sun4i_pwm_ops = {
-	.config = sun4i_pwm_config,
-	.set_polarity = sun4i_pwm_set_polarity,
-	.enable = sun4i_pwm_enable,
-	.disable = sun4i_pwm_disable,
 	.apply = sun4i_pwm_apply,
 	.get_state = sun4i_pwm_get_state,
 	.owner = THIS_MODULE,
-- 
GitLab


From d396b20a1e88ca2cabf7ec99c6c2138902aff1f3 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 8 Jun 2017 14:24:15 +0200
Subject: [PATCH 0749/1958] pwm: meson: Add compatible for the gxbb ao PWMs

On the gxbb (and gxl) family, the PWMs of the AO domain require a
specific compatible because the possible input clocks are different
from the EE PWMs input clocks.

Since the number of possible input clocks is also different, the
'num_parents' field is added to all the Meson PWM data.

Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-meson.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index 045ef9fa6fe39..defc27d880f3b 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -103,6 +103,7 @@ struct meson_pwm_channel {
 
 struct meson_pwm_data {
 	const char * const *parent_names;
+	unsigned int num_parents;
 };
 
 struct meson_pwm {
@@ -381,6 +382,7 @@ static const char * const pwm_meson8b_parent_names[] = {
 
 static const struct meson_pwm_data pwm_meson8b_data = {
 	.parent_names = pwm_meson8b_parent_names,
+	.num_parents = ARRAY_SIZE(pwm_meson8b_parent_names),
 };
 
 static const char * const pwm_gxbb_parent_names[] = {
@@ -389,11 +391,35 @@ static const char * const pwm_gxbb_parent_names[] = {
 
 static const struct meson_pwm_data pwm_gxbb_data = {
 	.parent_names = pwm_gxbb_parent_names,
+	.num_parents = ARRAY_SIZE(pwm_gxbb_parent_names),
+};
+
+/*
+ * Only the 2 first inputs of the GXBB AO PWMs are valid
+ * The last 2 are grounded
+ */
+static const char * const pwm_gxbb_ao_parent_names[] = {
+	"xtal", "clk81"
+};
+
+static const struct meson_pwm_data pwm_gxbb_ao_data = {
+	.parent_names = pwm_gxbb_ao_parent_names,
+	.num_parents = ARRAY_SIZE(pwm_gxbb_ao_parent_names),
 };
 
 static const struct of_device_id meson_pwm_matches[] = {
-	{ .compatible = "amlogic,meson8b-pwm", .data = &pwm_meson8b_data },
-	{ .compatible = "amlogic,meson-gxbb-pwm", .data = &pwm_gxbb_data },
+	{
+		.compatible = "amlogic,meson8b-pwm",
+		.data = &pwm_meson8b_data
+	},
+	{
+		.compatible = "amlogic,meson-gxbb-pwm",
+		.data = &pwm_gxbb_data
+	},
+	{
+		.compatible = "amlogic,meson-gxbb-ao-pwm",
+		.data = &pwm_gxbb_ao_data
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, meson_pwm_matches);
@@ -417,7 +443,7 @@ static int meson_pwm_init_channels(struct meson_pwm *meson,
 		init.ops = &clk_mux_ops;
 		init.flags = CLK_IS_BASIC;
 		init.parent_names = meson->data->parent_names;
-		init.num_parents = 1 << MISC_CLK_SEL_WIDTH;
+		init.num_parents = meson->data->num_parents;
 
 		channel->mux.reg = meson->base + REG_MISC_AB;
 		channel->mux.shift = mux_reg_shifts[i];
-- 
GitLab


From 6ede2b7df92f4f8da1abfa831a038688fcf409ea Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 5 Jul 2017 13:02:27 -0700
Subject: [PATCH 0750/1958] ALSA: opl4: Move inline before return type

Make the code like the rest of the kernel.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/drivers/opl4/opl4_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/drivers/opl4/opl4_lib.c b/sound/drivers/opl4/opl4_lib.c
index bc345d564f8d0..db76a5bf2bd2e 100644
--- a/sound/drivers/opl4/opl4_lib.c
+++ b/sound/drivers/opl4/opl4_lib.c
@@ -29,7 +29,7 @@ MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
 MODULE_DESCRIPTION("OPL4 driver");
 MODULE_LICENSE("GPL");
 
-static void inline snd_opl4_wait(struct snd_opl4 *opl4)
+static inline void snd_opl4_wait(struct snd_opl4 *opl4)
 {
 	int timeout = 10;
 	while ((inb(opl4->fm_port) & OPL4_STATUS_BUSY) && --timeout > 0)
-- 
GitLab


From f35157417215ec138c920320c746fdb3e04ef1d5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Jul 2017 17:25:02 +0100
Subject: [PATCH 0751/1958] Provide a function to create a NUL-terminated
 string from unterminated data

Provide a function, kmemdup_nul(), that will create a NUL-terminated string
from an unterminated character array where the length is known in advance.

This is better than kstrndup() in situations where we already know the
string length as the strnlen() in kstrndup() is superfluous.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/string.h |  1 +
 mm/util.c              | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/include/linux/string.h b/include/linux/string.h
index 537918f8a98ee..3dd944cfe171f 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -131,6 +131,7 @@ extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
 extern const char *kstrdup_const(const char *s, gfp_t gfp);
 extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
 extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
+extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
 
 extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
diff --git a/mm/util.c b/mm/util.c
index 26be6407abd7e..21ddf90f883d9 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL(kstrdup_const);
  * @s: the string to duplicate
  * @max: read at most @max chars from @s
  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Note: Use kmemdup_nul() instead if the size is known exactly.
  */
 char *kstrndup(const char *s, size_t max, gfp_t gfp)
 {
@@ -120,6 +122,28 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
 }
 EXPORT_SYMBOL(kmemdup);
 
+/**
+ * kmemdup_nul - Create a NUL-terminated string from unterminated data
+ * @s: The data to stringify
+ * @len: The size of the data
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ */
+char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
+{
+	char *buf;
+
+	if (!s)
+		return NULL;
+
+	buf = kmalloc_track_caller(len + 1, gfp);
+	if (buf) {
+		memcpy(buf, s, len);
+		buf[len] = '\0';
+	}
+	return buf;
+}
+EXPORT_SYMBOL(kmemdup_nul);
+
 /**
  * memdup_user - duplicate memory region from user space
  *
-- 
GitLab


From dd111b31e951c4ffb3a525b51b11d240118693c1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Jul 2017 17:25:09 +0100
Subject: [PATCH 0752/1958] VFS: Clean up whitespace in fs/namespace.c and
 fs/super.c

Clean up line terminal whitespace in fs/namespace.c and fs/super.c.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 4 ++--
 fs/super.c     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 5a4438445bf78..544ab84642ebd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1656,7 +1656,7 @@ void __detach_mounts(struct dentry *dentry)
 	namespace_unlock();
 }
 
-/* 
+/*
  * Is the caller allowed to modify his namespace?
  */
 static inline bool may_mount(void)
@@ -2210,7 +2210,7 @@ static int do_loopback(struct path *path, const char *old_name,
 
 	err = -EINVAL;
 	if (mnt_ns_loop(old_path.dentry))
-		goto out; 
+		goto out;
 
 	mp = lock_mount(path);
 	err = PTR_ERR(mp);
diff --git a/fs/super.c b/fs/super.c
index adb0c0de428c2..dfb56a9665d85 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -508,7 +508,7 @@ struct super_block *sget_userns(struct file_system_type *type,
 			return ERR_PTR(-ENOMEM);
 		goto retry;
 	}
-		
+
 	err = set(s, data);
 	if (err) {
 		spin_unlock(&sb_lock);
@@ -771,7 +771,7 @@ struct super_block *get_active_super(struct block_device *bdev)
 	spin_unlock(&sb_lock);
 	return NULL;
 }
- 
+
 struct super_block *user_get_super(dev_t dev)
 {
 	struct super_block *sb;
-- 
GitLab


From ee416bcdba9975065de571e09de1f7ebfde2156a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Jul 2017 17:25:16 +0100
Subject: [PATCH 0753/1958] VFS: Make get_filesystem() return the affected
 filesystem

Make get_filesystem() return a pointer to the filesystem on which it just
got a ref.

Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/filesystems.c   | 3 ++-
 include/linux/fs.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/filesystems.c b/fs/filesystems.c
index cac75547d35cc..591e52d23ed4d 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -33,9 +33,10 @@ static struct file_system_type *file_systems;
 static DEFINE_RWLOCK(file_systems_lock);
 
 /* WARNING: This can be used only if we _already_ own a reference */
-void get_filesystem(struct file_system_type *fs)
+struct file_system_type *get_filesystem(struct file_system_type *fs)
 {
 	__module_get(fs->owner);
+	return fs;
 }
 
 void put_filesystem(struct file_system_type *fs)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9b26542..bc0c054894b9e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2956,7 +2956,7 @@ extern int generic_block_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo, u64 start,
 				u64 len, get_block_t *get_block);
 
-extern void get_filesystem(struct file_system_type *fs);
+extern struct file_system_type *get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
-- 
GitLab


From cdf01226b26e98c79c13b335fbe0cbbbe850cf44 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 4 Jul 2017 17:25:22 +0100
Subject: [PATCH 0754/1958] VFS: Provide empty name qstr

Provide an empty name (ie. "") qstr for general use.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 8 ++++++--
 fs/gfs2/dir.c          | 3 +--
 fs/namei.c             | 3 +--
 fs/nsfs.c              | 3 +--
 fs/pipe.c              | 3 +--
 include/linux/dcache.h | 5 +++++
 6 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index a9f995f6859eb..95efb7b2bf841 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -90,6 +90,11 @@ EXPORT_SYMBOL(rename_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
+const struct qstr empty_name = QSTR_INIT("", 0);
+EXPORT_SYMBOL(empty_name);
+const struct qstr slash_name = QSTR_INIT("/", 1);
+EXPORT_SYMBOL(slash_name);
+
 /*
  * This is the single most critical data structure when it comes
  * to the dcache: the hashtable for lookups. Somebody should try
@@ -1578,8 +1583,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	 */
 	dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
 	if (unlikely(!name)) {
-		static const struct qstr anon = QSTR_INIT("/", 1);
-		name = &anon;
+		name = &slash_name;
 		dname = dentry->d_iname;
 	} else if (name->len > DNAME_INLINE_LEN-1) {
 		size_t size = offsetof(struct external_name, name[1]);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 79113219be5f9..a5dfff6a033eb 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -872,7 +872,6 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
 	struct buffer_head *bh;
 	struct gfs2_leaf *leaf;
 	struct gfs2_dirent *dent;
-	struct qstr name = { .name = "" };
 	struct timespec tv = current_time(inode);
 
 	error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
@@ -896,7 +895,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
 	leaf->lf_sec = cpu_to_be64(tv.tv_sec);
 	memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2));
 	dent = (struct gfs2_dirent *)(leaf+1);
-	gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
+	gfs2_qstr2dirent(&empty_name, bh->b_size - sizeof(struct gfs2_leaf), dent);
 	*pbh = bh;
 	return leaf;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 6571a5f5112ed..0d35760fee00a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3400,7 +3400,6 @@ static int do_last(struct nameidata *nd,
 
 struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
 {
-	static const struct qstr name = QSTR_INIT("/", 1);
 	struct dentry *child = NULL;
 	struct inode *dir = dentry->d_inode;
 	struct inode *inode;
@@ -3414,7 +3413,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
 	if (!dir->i_op->tmpfile)
 		goto out_err;
 	error = -ENOMEM;
-	child = d_alloc(dentry, &name);
+	child = d_alloc(dentry, &slash_name);
 	if (unlikely(!child))
 		goto out_err;
 	error = dir->i_op->tmpfile(dir, child, mode);
diff --git a/fs/nsfs.c b/fs/nsfs.c
index f3db56e83dd20..08127a2b8559a 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -53,7 +53,6 @@ static void nsfs_evict(struct inode *inode)
 static void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
 	struct vfsmount *mnt = nsfs_mnt;
-	struct qstr qname = { .name = "", };
 	struct dentry *dentry;
 	struct inode *inode;
 	unsigned long d;
@@ -85,7 +84,7 @@ static void *__ns_get_path(struct path *path, struct ns_common *ns)
 	inode->i_fop = &ns_file_operations;
 	inode->i_private = ns;
 
-	dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
+	dentry = d_alloc_pseudo(mnt->mnt_sb, &empty_name);
 	if (!dentry) {
 		iput(inode);
 		return ERR_PTR(-ENOMEM);
diff --git a/fs/pipe.c b/fs/pipe.c
index 73b84baf58f87..97e5be897753e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -739,13 +739,12 @@ int create_pipe_files(struct file **res, int flags)
 	struct inode *inode = get_pipe_inode();
 	struct file *f;
 	struct path path;
-	static struct qstr name = { .name = "" };
 
 	if (!inode)
 		return -ENFILE;
 
 	err = -ENOMEM;
-	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
+	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &empty_name);
 	if (!path.dentry)
 		goto err_inode;
 	path.mnt = mntget(pipe_mnt);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d2e38dc6172c0..3f65a4fa72eda 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -55,6 +55,11 @@ struct qstr {
 
 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
 
+extern const char empty_string[];
+extern const struct qstr empty_name;
+extern const char slash_string[];
+extern const struct qstr slash_name;
+
 struct dentry_stat_t {
 	long nr_dentry;
 	long nr_unused;
-- 
GitLab


From c3d98ea08291ca26144780f601e1fd39e4f20f7e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:24:09 +0100
Subject: [PATCH 0755/1958] VFS: Don't use save/replace_mount_options if not
 using generic_show_options

btrfs, debugfs, reiserfs and tracefs call save_mount_options() and reiserfs
calls replace_mount_options(), but they then implement their own
->show_options() methods and don't touch s_options, rendering the saved
options unnecessary.  I'm trying to eliminate s_options to make it easier
to implement a context-based mount where the mount options can be passed
individually over a file descriptor.

Remove the calls to save/replace_mount_options() call in these cases.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Chris Mason <clm@fb.com>
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Steven Rostedt <rostedt@goodmis.org>
cc: linux-btrfs@vger.kernel.org
cc: reiserfs-devel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/super.c    | 1 -
 fs/debugfs/inode.c  | 2 --
 fs/reiserfs/super.c | 4 ----
 fs/tracefs/inode.c  | 2 --
 4 files changed, 9 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4f1cdd5058f12..8e9758b3eb232 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1164,7 +1164,6 @@ static int btrfs_fill_super(struct super_block *sb,
 		goto fail_close;
 	}
 
-	save_mount_options(sb, data);
 	cleancache_init_fs(sb);
 	sb->s_flags |= MS_ACTIVE;
 	return 0;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e892ae7d89f8b..0dc9e9c0e0f81 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -203,8 +203,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
 	struct debugfs_fs_info *fsi;
 	int err;
 
-	save_mount_options(sb, data);
-
 	fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
 	sb->s_fs_info = fsi;
 	if (!fsi) {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 685f1e0569986..306e4e9d172d4 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1599,8 +1599,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	}
 
 out_ok_unlocked:
-	if (new_opts)
-		replace_mount_options(s, new_opts);
 	return 0;
 
 out_err_unlock:
@@ -1916,8 +1914,6 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	char *qf_names[REISERFS_MAXQUOTAS] = {};
 	unsigned int qfmt = 0;
 
-	save_mount_options(s, data);
-
 	sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
 	if (!sbi)
 		return -ENOMEM;
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 328e89c2cf835..bea8ad876bf9a 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -270,8 +270,6 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
 	struct tracefs_fs_info *fsi;
 	int err;
 
-	save_mount_options(sb, data);
-
 	fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
 	sb->s_fs_info = fsi;
 	if (!fsi) {
-- 
GitLab


From 4a25220d4e43bb2461823dbc7eb1502a34087958 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:24:18 +0100
Subject: [PATCH 0756/1958] hugetlbfs: Implement show_options

Implement the show_options superblock op for hugetlbfs as part of a bid to
get rid of s_options and generic_show_options() to make it easier to
implement a context-based mount where the mount options can be passed
individually over a file descriptor.

Note that the uid and gid should possibly be displayed relative to the
viewer's user namespace.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Nadia Yvette Chambers <nyc@holomorphy.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hugetlbfs/inode.c    | 70 ++++++++++++++++++++++++++++++++---------
 include/linux/hugetlb.h |  3 ++
 2 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d44f5456eb9ba..99b3b9836575c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -46,13 +46,13 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
 static const struct inode_operations hugetlbfs_inode_operations;
 
 struct hugetlbfs_config {
-	kuid_t   uid;
-	kgid_t   gid;
-	umode_t mode;
-	long	max_hpages;
-	long	nr_inodes;
-	struct hstate *hstate;
-	long    min_hpages;
+	struct hstate		*hstate;
+	long			max_hpages;
+	long			nr_inodes;
+	long			min_hpages;
+	kuid_t			uid;
+	kgid_t			gid;
+	umode_t			mode;
 };
 
 struct hugetlbfs_inode_info {
@@ -851,6 +851,46 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
 	return MIGRATEPAGE_SUCCESS;
 }
 
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int hugetlbfs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(root->d_sb);
+	struct hugepage_subpool *spool = sbinfo->spool;
+	unsigned long hpage_size = huge_page_size(sbinfo->hstate);
+	unsigned hpage_shift = huge_page_shift(sbinfo->hstate);
+	char mod;
+
+	if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, sbinfo->uid));
+	if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, sbinfo->gid));
+	if (sbinfo->mode != 0755)
+		seq_printf(m, ",mode=%o", sbinfo->mode);
+	if (sbinfo->max_inodes != -1)
+		seq_printf(m, ",nr_inodes=%lu", sbinfo->max_inodes);
+
+	hpage_size /= 1024;
+	mod = 'K';
+	if (hpage_size >= 1024) {
+		hpage_size /= 1024;
+		mod = 'M';
+	}
+	seq_printf(m, ",pagesize=%lu%c", hpage_size, mod);
+	if (spool) {
+		if (spool->max_hpages != -1)
+			seq_printf(m, ",size=%llu",
+				   (unsigned long long)spool->max_hpages << hpage_shift);
+		if (spool->min_hpages != -1)
+			seq_printf(m, ",min_size=%llu",
+				   (unsigned long long)spool->min_hpages << hpage_shift);
+	}
+	return 0;
+}
+
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -1008,19 +1048,19 @@ static const struct super_operations hugetlbfs_ops = {
 	.evict_inode	= hugetlbfs_evict_inode,
 	.statfs		= hugetlbfs_statfs,
 	.put_super	= hugetlbfs_put_super,
-	.show_options	= generic_show_options,
+	.show_options	= hugetlbfs_show_options,
 };
 
-enum { NO_SIZE, SIZE_STD, SIZE_PERCENT };
+enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
 
 /*
  * Convert size option passed from command line to number of huge pages
  * in the pool specified by hstate.  Size option could be in bytes
  * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
  */
-static long long
+static long
 hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
-								int val_type)
+			 enum hugetlbfs_size_type val_type)
 {
 	if (val_type == NO_SIZE)
 		return -1;
@@ -1042,7 +1082,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 	substring_t args[MAX_OPT_ARGS];
 	int option;
 	unsigned long long max_size_opt = 0, min_size_opt = 0;
-	int max_val_type = NO_SIZE, min_val_type = NO_SIZE;
+	enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE;
 
 	if (!options)
 		return 0;
@@ -1156,8 +1196,6 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	struct hugetlbfs_config config;
 	struct hugetlbfs_sb_info *sbinfo;
 
-	save_mount_options(sb, data);
-
 	config.max_hpages = -1; /* No limit on size by default */
 	config.nr_inodes = -1; /* No limit on number of inodes by default */
 	config.uid = current_fsuid();
@@ -1178,6 +1216,10 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	sbinfo->max_inodes = config.nr_inodes;
 	sbinfo->free_inodes = config.nr_inodes;
 	sbinfo->spool = NULL;
+	sbinfo->uid = config.uid;
+	sbinfo->gid = config.gid;
+	sbinfo->mode = config.mode;
+
 	/*
 	 * Allocate and initialize subpool if maximum or minimum size is
 	 * specified.  Any needed reservations (for minimim size) are taken
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b857fc8cc2eca..3b6eeaad2f779 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -262,6 +262,9 @@ struct hugetlbfs_sb_info {
 	spinlock_t	stat_lock;
 	struct hstate *hstate;
 	struct hugepage_subpool *spool;
+	kuid_t	uid;
+	kgid_t	gid;
+	umode_t mode;
 };
 
 static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
-- 
GitLab


From d86efb0df98afe0acdda7ed94963684c3fa7cccd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:24:27 +0100
Subject: [PATCH 0757/1958] omfs: Implement show_options

Implement the show_options superblock op for omfs as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Note that the uid and gid should possibly be displayed relative to the
viewer's user namespace.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Bob Copeland <me@bobcopeland.com>
cc: linux-karma-devel@lists.sourceforge.net
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/omfs/inode.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 8c9034ee7383a..ee14af9e26f28 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -13,6 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/vmalloc.h>
 #include <linux/writeback.h>
+#include <linux/seq_file.h>
 #include <linux/crc-itu-t.h>
 #include "omfs.h"
 
@@ -290,12 +291,40 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int omfs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct omfs_sb_info *sbi = OMFS_SB(root->d_sb);
+	umode_t cur_umask = current_umask();
+
+	if (!uid_eq(sbi->s_uid, current_uid()))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, sbi->s_uid));
+	if (!gid_eq(sbi->s_gid, current_gid()))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, sbi->s_gid));
+
+	if (sbi->s_dmask == sbi->s_fmask) {
+		if (sbi->s_fmask != cur_umask)
+			seq_printf(m, ",umask=%o", sbi->s_fmask);
+	} else {
+		if (sbi->s_dmask != cur_umask)
+			seq_printf(m, ",dmask=%o", sbi->s_dmask);
+		if (sbi->s_fmask != cur_umask)
+			seq_printf(m, ",fmask=%o", sbi->s_fmask);
+	}
+
+	return 0;
+}
+
 static const struct super_operations omfs_sops = {
 	.write_inode	= omfs_write_inode,
 	.evict_inode	= omfs_evict_inode,
 	.put_super	= omfs_put_super,
 	.statfs		= omfs_statfs,
-	.show_options	= generic_show_options,
+	.show_options	= omfs_show_options,
 };
 
 /*
@@ -434,8 +463,6 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 	struct inode *root;
 	int ret = -EINVAL;
 
-	save_mount_options(sb, (char *) data);
-
 	sbi = kzalloc(sizeof(struct omfs_sb_info), GFP_KERNEL);
 	if (!sbi)
 		return -ENOMEM;
-- 
GitLab


From 349d743895e2371bda9a02a5b465b50cc24d2825 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:24:34 +0100
Subject: [PATCH 0758/1958] pstore: Implement show_options

Implement the show_options superblock op for pstore as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Kees Cook <keescook@chromium.org>
cc: Anton Vorontsov <anton@enomsg.org>
cc: Colin Cross <ccross@android.com>
cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pstore/inode.c    | 14 +++++++++++---
 fs/pstore/internal.h |  3 +++
 fs/pstore/platform.c |  2 +-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 792a4e5f9226d..913e839ac1f5b 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -283,6 +283,16 @@ static void parse_options(char *options)
 	}
 }
 
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int pstore_show_options(struct seq_file *m, struct dentry *root)
+{
+	if (kmsg_bytes != PSTORE_DEFAULT_KMSG_BYTES)
+		seq_printf(m, ",kmsg_bytes=%lu", kmsg_bytes);
+	return 0;
+}
+
 static int pstore_remount(struct super_block *sb, int *flags, char *data)
 {
 	sync_filesystem(sb);
@@ -296,7 +306,7 @@ static const struct super_operations pstore_ops = {
 	.drop_inode	= generic_delete_inode,
 	.evict_inode	= pstore_evict_inode,
 	.remount_fs	= pstore_remount,
-	.show_options	= generic_show_options,
+	.show_options	= pstore_show_options,
 };
 
 static struct super_block *pstore_sb;
@@ -448,8 +458,6 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
 
-	save_mount_options(sb, data);
-
 	pstore_sb = sb;
 
 	sb->s_maxbytes		= MAX_LFS_FILESIZE;
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index c416e653dc4f5..4d5913130580e 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -5,6 +5,9 @@
 #include <linux/time.h>
 #include <linux/pstore.h>
 
+#define PSTORE_DEFAULT_KMSG_BYTES 10240
+extern unsigned long kmsg_bytes;
+
 #ifdef CONFIG_PSTORE_FTRACE
 extern void pstore_register_ftrace(void);
 extern void pstore_unregister_ftrace(void);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index d468eec9b8a6b..6640df40908d4 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -99,7 +99,7 @@ static char *big_oops_buf;
 static size_t big_oops_buf_sz;
 
 /* How much of the console log to snapshot */
-static unsigned long kmsg_bytes = 10240;
+unsigned long kmsg_bytes = PSTORE_DEFAULT_KMSG_BYTES;
 
 void pstore_set_kmsg_bytes(int bytes)
 {
-- 
GitLab


From 604ecf4288933dec3c235ed1ee575a154670c882 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:24:42 +0100
Subject: [PATCH 0759/1958] ramfs: Implement show_options

Implement the show_options superblock op for ramfs as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ramfs/inode.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 26e45863e4995..11201b2d06b92 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -38,6 +38,14 @@
 #include <linux/uaccess.h>
 #include "internal.h"
 
+struct ramfs_mount_opts {
+	umode_t mode;
+};
+
+struct ramfs_fs_info {
+	struct ramfs_mount_opts mount_opts;
+};
+
 #define RAMFS_DEFAULT_MODE	0755
 
 static const struct super_operations ramfs_ops;
@@ -149,14 +157,22 @@ static const struct inode_operations ramfs_dir_inode_operations = {
 	.rename		= simple_rename,
 };
 
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int ramfs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct ramfs_fs_info *fsi = root->d_sb->s_fs_info;
+
+	if (fsi->mount_opts.mode != RAMFS_DEFAULT_MODE)
+		seq_printf(m, ",mode=%o", fsi->mount_opts.mode);
+	return 0;
+}
+
 static const struct super_operations ramfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
-	.show_options	= generic_show_options,
-};
-
-struct ramfs_mount_opts {
-	umode_t mode;
+	.show_options	= ramfs_show_options,
 };
 
 enum {
@@ -169,10 +185,6 @@ static const match_table_t tokens = {
 	{Opt_err, NULL}
 };
 
-struct ramfs_fs_info {
-	struct ramfs_mount_opts mount_opts;
-};
-
 static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
 {
 	substring_t args[MAX_OPT_ARGS];
@@ -211,8 +223,6 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent)
 	struct inode *inode;
 	int err;
 
-	save_mount_options(sb, data);
-
 	fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
 	sb->s_fs_info = fsi;
 	if (!fsi)
-- 
GitLab


From 4cc7c1864bbd4cf80f6bdc8ba3217de5aa5f4688 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:24:49 +0100
Subject: [PATCH 0760/1958] bpf: Implement show_options

Implement the show_options superblock op for bpf as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Alexei Starovoitov <ast@kernel.org>
cc: Daniel Borkmann <daniel@iogearbox.net>
cc: netdev@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/bpf/inode.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 9bbd33497d3d0..e833ed9143583 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -377,10 +377,22 @@ static void bpf_evict_inode(struct inode *inode)
 		bpf_any_put(inode->i_private, type);
 }
 
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int bpf_show_options(struct seq_file *m, struct dentry *root)
+{
+	umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
+
+	if (mode != S_IRWXUGO)
+		seq_printf(m, ",mode=%o", mode);
+	return 0;
+}
+
 static const struct super_operations bpf_super_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
-	.show_options	= generic_show_options,
+	.show_options	= bpf_show_options,
 	.evict_inode	= bpf_evict_inode,
 };
 
@@ -434,8 +446,6 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
 	struct inode *inode;
 	int ret;
 
-	save_mount_options(sb, data);
-
 	ret = bpf_parse_options(data, &opts);
 	if (ret)
 		return ret;
-- 
GitLab


From 8517bb1f19679bf2bf6c29a98b7a4f3a78629554 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 8 Jun 2017 14:24:14 +0200
Subject: [PATCH 0761/1958] dt-bindings: pwm: meson: Add compatible for gxbb ao
 PWMs

Add compatible string to properly handle the PWMs found in the AO domain
of the gxbb (and gxl) family.

Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/pwm-meson.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/pwm/pwm-meson.txt b/Documentation/devicetree/bindings/pwm/pwm-meson.txt
index 5376a4468cb68..5b07bebbf6f7d 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-meson.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-meson.txt
@@ -2,7 +2,9 @@ Amlogic Meson PWM Controller
 ============================
 
 Required properties:
-- compatible: Shall contain "amlogic,meson8b-pwm" or "amlogic,meson-gxbb-pwm".
+- compatible: Shall contain "amlogic,meson8b-pwm"
+                         or "amlogic,meson-gxbb-pwm"
+                         or "amlogic,meson-gxbb-ao-pwm"
 - #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
   the cells format.
 
-- 
GitLab


From aa07b633846db4192e6bb35675fbdaf6e78c98c7 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Thu, 29 Jun 2017 15:06:08 +0300
Subject: [PATCH 0762/1958] net/mlx5: Add missing include in lib/gid.c

Fix warnings when building with -Wall:
drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c:38:6: warning: symbol 'mlx5_init_reserved_gids' was not declared. Should it be static?
drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c:47:6: warning: symbol 'mlx5_cleanup_reserved_gids' was not declared. Should it be static?
drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c:55:5: warning: symbol 'mlx5_core_reserve_gids' was not declared. Should it be static?
drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c:79:6: warning: symbol 'mlx5_core_unreserve_gids' was not declared. Should it be static?
drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c:92:5: warning: symbol 'mlx5_core_reserved_gid_alloc' was not declared. Should it be static?
drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c:109:6: warning: symbol 'mlx5_core_reserved_gid_free' was not declared. Should it be static?

Fixes: 52ec462eca9b ("net/mlx5: Add reserved-gids support")
Reported-by: Or Gerlitz <gerlitz.or@gmail.com>
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index de2aed44ab858..573f59f46d41e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -34,6 +34,7 @@
 #include <linux/etherdevice.h>
 #include <linux/idr.h>
 #include "mlx5_core.h"
+#include "lib/mlx5.h"
 
 void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
 {
-- 
GitLab


From 5dfd87b67cd98375896890e01437edefeb45a707 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Sun, 2 Jul 2017 10:57:56 +0300
Subject: [PATCH 0763/1958] net/mlx5: IPSec, Fix 64-bit division on 32-bit
 builds

Fix warnings when building 386 kernel:
>> ERROR: "__udivdi3" [drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.ko] undefined!

Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 4a78aefdf1577..7d06c673851a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -363,6 +363,7 @@ void mlx5e_ipsec_build_inverse_table(void)
 {
 	u16 mss_inv;
 	u32 mss;
+	u64 n;
 
 	/* Calculate 1/x inverse table for use in GSO data path.
 	 * Using this table, we provide the IPSec accelerator with the value of
@@ -372,7 +373,8 @@ void mlx5e_ipsec_build_inverse_table(void)
 	 */
 	mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
 	for (mss = 2; mss < MAX_LSO_MSS; mss++) {
-		mss_inv = ((1ULL << 32) / mss) >> 16;
+		n = 1ULL << 32;
+		mss_inv = do_div(n, mss) >> 16;
 		mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
 	}
 }
-- 
GitLab


From c8af01692e4950504fb2e5c65ca87acbe73027cb Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Sun, 2 Jul 2017 14:36:57 +0300
Subject: [PATCH 0764/1958] net/mlx5: FPGA, make mlx5_fpga_device_brb static

Fix warning when building with -Wall:
drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c:105:5: warning: symbol 'mlx5_fpga_device_brb' was not declared. Should it be static?

Fixes: c43051d72a8d ("net/mlx5: FPGA, Add SBU bypass and reset flows")
Reported-by: Or Gerlitz <gerlitz.or@gmail.com>
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 31e5a2627eb8a..9034e9960a761 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -102,7 +102,7 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
 	return 0;
 }
 
-int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
 {
 	int err;
 	struct mlx5_core_dev *mdev = fdev->mdev;
-- 
GitLab


From 2a41d15b7906e8f749d3656e1bdc2a5ee7dd65ff Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Sun, 2 Jul 2017 14:38:14 +0300
Subject: [PATCH 0765/1958] net/mlx5: FPGA, Fix datatype mismatch

Fix warnings when building with -Wall:
drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c:313:36: warning: cast to restricted __be32
drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c:314:37: warning: cast to restricted __be32

Fixes: bebb23e6cb02 ("net/mlx5: Accel, Add IPSec acceleration interface")
Reported-by: Or Gerlitz <gerlitz.or@gmail.com>
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 42970e2a05ff4..35d0e33381ca0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -275,7 +275,7 @@ int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned int i;
-	u32 *data;
+	__be32 *data;
 	u32 count;
 	u64 addr;
 	int ret;
@@ -290,7 +290,7 @@ int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 
 	count = mlx5_fpga_ipsec_counters_count(mdev);
 
-	data = kzalloc(sizeof(u32) * count * 2, GFP_KERNEL);
+	data = kzalloc(sizeof(*data) * count * 2, GFP_KERNEL);
 	if (!data) {
 		ret = -ENOMEM;
 		goto out;
-- 
GitLab


From 111a676367699dc8e3bc567ab17ab573ade54f0d Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Mon, 3 Jul 2017 10:01:19 +0300
Subject: [PATCH 0766/1958] net/mlx5: Build wq.o even if MLX5_CORE_EN is not
 selected

Both the ethernet and FPGA portions of MLX5 now require the wq functions,
and we get a link error when CONFIG_MLX5_CORE_EN is disabled:

drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.o: In function `mlx5_fpga_conn_create_cq':
conn.c:(.text+0x10b3): undefined reference to `mlx5_cqwq_create'
conn.c:(.text+0x10c6): undefined reference to `mlx5_cqwq_get_size'
conn.c:(.text+0x12bc): undefined reference to `mlx5_cqwq_destroy'

Build wq.o even if MLX5_CORE_EN is not selected.

Fixes: 537a50574175 ("net/mlx5: FPGA, Add high-speed connection routines")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index ca367445f8642..9d17e4e76d3a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -4,14 +4,14 @@ subdir-ccflags-y += -I$(src)
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-		fs_counters.o rl.o lag.o dev.o lib/gid.o
+		fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o
 
 mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
 
 mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
 		fpga/ipsec.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += eswitch.o eswitch_offloads.o \
 		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
 		en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
 		en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o
-- 
GitLab


From fb000f781783867ab442971c63f0005c2547c72b Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Mon, 3 Jul 2017 10:46:00 +0300
Subject: [PATCH 0767/1958] net/mlx5: Add Makefiles for subdirectories

Currently it is not possible to build just one .o file inside
a subdirectory, because the subdirectories lack a Makefile.

Add a Makefile to the mlx5 subdirectories.

Fixes: e29341fb3a5b ("net/mlx5: FPGA, Add basic support for Innova")
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile    | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile     | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile    | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile      | 1 +
 5 files changed, 5 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
new file mode 100644
index 0000000000000..d8e17110f25d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
new file mode 100644
index 0000000000000..d8e17110f25d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
new file mode 100644
index 0000000000000..d8e17110f25d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
new file mode 100644
index 0000000000000..d8e17110f25d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
new file mode 100644
index 0000000000000..d8e17110f25d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
-- 
GitLab


From d968f0f2e4404152f37ed2384b4a2269dd2dae5a Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Thu, 29 Jun 2017 16:50:01 -0500
Subject: [PATCH 0768/1958] net/mlx5e: Initialize CEE's getpermhwaddr address
 buffer to 0xff

Latest change in open-lldp code uses bytes 6-11 of perm_addr buffer
as the Ethernet source address for the host TLV packet.
Since our driver does not fill these bytes, they stay at zero and
the open-lldp code ends up sending the TLV packet with zero source
address and the switch drops this packet.

The fix is to initialize these bytes to 0xff. The open-lldp code
considers 0xff:ff:ff:ff:ff:ff as the invalid address and falls back to
use the host's mac address as the Ethernet source address.

Fixes: 3a6a931dfb8e ("net/mlx5e: Support DCBX CEE API")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 8fa23f6a1f67f..2eb54d36e16ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -464,6 +464,8 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
 	if (!perm_addr)
 		return;
 
+	memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
 }
 
-- 
GitLab


From 99c621d704cf1c4eb74c3c42e674edf3df64f92d Mon Sep 17 00:00:00 2001
From: Michael Sartain <mikesart@fastmail.com>
Date: Wed, 5 Jul 2017 22:07:15 -0600
Subject: [PATCH 0769/1958] tracing: Add saved_tgids file to show cached pid to
 tgid mappings

Export the cached pid / tgid mappings in debugfs tracing saved_tgids file.
This allows user apps to translate the pids from a trace to their respective
thread group.

Example saved_tgids file with pid / tgid values separated by ' ':

  # cat saved_tgids
  1048 1048
  1047 1047
  7 7
  1049 1047
  1054 1047
  1053 1047

Link: http://lkml.kernel.org/r/20170630004023.064965233@goodmis.org
Link: http://lkml.kernel.org/r/20170706040713.unwkumbta5menygi@mikesart-cos

Reviewed-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Michael Sartain <mikesart@fastmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 00e2e4169b1e8..f079a8ca11175 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4688,6 +4688,76 @@ static const struct file_operations tracing_readme_fops = {
 	.llseek		= generic_file_llseek,
 };
 
+static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	int *ptr = v;
+
+	if (*pos || m->count)
+		ptr++;
+
+	(*pos)++;
+
+	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
+		if (trace_find_tgid(*ptr))
+			return ptr;
+	}
+
+	return NULL;
+}
+
+static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
+{
+	void *v;
+	loff_t l = 0;
+
+	if (!tgid_map)
+		return NULL;
+
+	v = &tgid_map[0];
+	while (l <= *pos) {
+		v = saved_tgids_next(m, v, &l);
+		if (!v)
+			return NULL;
+	}
+
+	return v;
+}
+
+static void saved_tgids_stop(struct seq_file *m, void *v)
+{
+}
+
+static int saved_tgids_show(struct seq_file *m, void *v)
+{
+	int pid = (int *)v - tgid_map;
+
+	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
+	return 0;
+}
+
+static const struct seq_operations tracing_saved_tgids_seq_ops = {
+	.start		= saved_tgids_start,
+	.stop		= saved_tgids_stop,
+	.next		= saved_tgids_next,
+	.show		= saved_tgids_show,
+};
+
+static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
+{
+	if (tracing_disabled)
+		return -ENODEV;
+
+	return seq_open(filp, &tracing_saved_tgids_seq_ops);
+}
+
+
+static const struct file_operations tracing_saved_tgids_fops = {
+	.open		= tracing_saved_tgids_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	unsigned int *ptr = v;
@@ -7920,6 +7990,9 @@ static __init int tracer_init_tracefs(void)
 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
 			  NULL, &tracing_saved_cmdlines_size_fops);
 
+	trace_create_file("saved_tgids", 0444, d_tracer,
+			NULL, &tracing_saved_tgids_fops);
+
 	trace_eval_init();
 
 	trace_create_eval_file(d_tracer);
-- 
GitLab


From fd7b2be8cbcf6cd6d9c9e843ffff36fb91388e51 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 8 Jun 2017 14:24:16 +0200
Subject: [PATCH 0770/1958] pwm: meson: Improve PWM calculation precision

When using input clocks with high rates, such as clk81 (166MHz), the
fin_ns = NSEC_PER_SEC / fin_freq can introduce a significant error.

Ex: fin_freq = 166666667, NSEC_PER_SEC = 1000000000
    fin_ns = 5,9999999

which is, of course, rounded down to 5. This introduces an error of ~20%
on the period requested from the PWM.

This patch uses ps instead of ns (and 64 bit integers) to perform the
calculation. This should give a good enough precision.

Fixes: 211ed630753d ("pwm: Add support for Meson PWM Controller")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>

squash! pwm: meson: Improve pwm calculation precision
---
 drivers/pwm/pwm-meson.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index defc27d880f3b..cb845edfe2b42 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -163,7 +163,8 @@ static int meson_pwm_calc(struct meson_pwm *meson,
 			  unsigned int duty, unsigned int period)
 {
 	unsigned int pre_div, cnt, duty_cnt;
-	unsigned long fin_freq = -1, fin_ns;
+	unsigned long fin_freq = -1;
+	u64 fin_ps;
 
 	if (~(meson->inverter_mask >> id) & 0x1)
 		duty = period - duty;
@@ -179,13 +180,15 @@ static int meson_pwm_calc(struct meson_pwm *meson,
 	}
 
 	dev_dbg(meson->chip.dev, "fin_freq: %lu Hz\n", fin_freq);
-	fin_ns = NSEC_PER_SEC / fin_freq;
+	fin_ps = (u64)NSEC_PER_SEC * 1000;
+	do_div(fin_ps, fin_freq);
 
 	/* Calc pre_div with the period */
 	for (pre_div = 0; pre_div < MISC_CLK_DIV_MASK; pre_div++) {
-		cnt = DIV_ROUND_CLOSEST(period, fin_ns * (pre_div + 1));
-		dev_dbg(meson->chip.dev, "fin_ns=%lu pre_div=%u cnt=%u\n",
-			fin_ns, pre_div, cnt);
+		cnt = DIV_ROUND_CLOSEST_ULL((u64)period * 1000,
+					    fin_ps * (pre_div + 1));
+		dev_dbg(meson->chip.dev, "fin_ps=%llu pre_div=%u cnt=%u\n",
+			fin_ps, pre_div, cnt);
 		if (cnt <= 0xffff)
 			break;
 	}
@@ -208,7 +211,8 @@ static int meson_pwm_calc(struct meson_pwm *meson,
 		channel->lo = cnt;
 	} else {
 		/* Then check is we can have the duty with the same pre_div */
-		duty_cnt = DIV_ROUND_CLOSEST(duty, fin_ns * (pre_div + 1));
+		duty_cnt = DIV_ROUND_CLOSEST_ULL((u64)duty * 1000,
+						 fin_ps * (pre_div + 1));
 		if (duty_cnt > 0xffff) {
 			dev_err(meson->chip.dev, "unable to get duty cycle\n");
 			return -EINVAL;
-- 
GitLab


From e47866a177cf0baba1d714fa93cb762f25bd6cef Mon Sep 17 00:00:00 2001
From: Nick Vaccaro <nvaccaro@google.com>
Date: Fri, 23 Jun 2017 14:52:47 -0700
Subject: [PATCH 0771/1958] pwm: cros-ec: Fix transposed param settings

The __cros_ec_pwm_get_duty() routine was transposing the insize and
outsize fields when calling cros_ec_cmd_xfer_status().

The original code worked without error due to size of the two particular
parameter blocks passed to cros_ec_cmd_xfer_status(), so this change is
not fixing an actual runtime problem, just correcting the calling usage.

Signed-off-by: Nick Vaccaro <nvaccaro@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-cros-ec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index f6ca4e8c6253f..9c13694eaa248 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c
@@ -75,8 +75,8 @@ static int __cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index,
 
 	msg->version = 0;
 	msg->command = EC_CMD_PWM_GET_DUTY;
-	msg->insize = sizeof(*params);
-	msg->outsize = sizeof(*resp);
+	msg->insize = sizeof(*resp);
+	msg->outsize = sizeof(*params);
 
 	params->pwm_type = EC_PWM_TYPE_GENERIC;
 	params->index = index;
-- 
GitLab


From c2d42edb528af75890cf73658e86e6dc9ac2b22b Mon Sep 17 00:00:00 2001
From: Allen Hubbe <Allen.Hubbe@dell.com>
Date: Tue, 27 Dec 2016 19:12:27 -0500
Subject: [PATCH 0772/1958] NTB: ntb_test: modprobe on remote host

Signed-off-by: Allen Hubbe <Allen.Hubbe@dell.com>
Acked-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 13f5198ba0ee7..1ee8ea350f65c 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -85,6 +85,10 @@ set -e
 function _modprobe()
 {
         modprobe "$@"
+
+	if [[ "$REMOTE_HOST" != "" ]]; then
+		ssh "$REMOTE_HOST" modprobe "$@"
+	fi
 }
 
 function split_remote()
-- 
GitLab


From 7c49c9855a890ce32ea08f01366ecb221a027ad8 Mon Sep 17 00:00:00 2001
From: Allen Hubbe <Allen.Hubbe@dell.com>
Date: Tue, 27 Dec 2016 19:12:28 -0500
Subject: [PATCH 0773/1958] NTB: ntb_test: add parameter for doorbell bitmask

If the test attempts to clear doorbell bits that are invalid for the
hardware, then the test will fail.  Provide a parameter to specify the
doorbell bits to clear.  Set default doorbell bits that work for XEON.

Signed-off-by: Allen Hubbe <Allen.Hubbe@dell.com>
Acked-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 tools/testing/selftests/ntb/ntb_test.sh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 1ee8ea350f65c..1c12b5855e4f9 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -18,6 +18,7 @@ LIST_DEVS=FALSE
 
 DEBUGFS=${DEBUGFS-/sys/kernel/debug}
 
+DB_BITMASK=0x7FFF
 PERF_RUN_ORDER=32
 MAX_MW_SIZE=0
 RUN_DMA_TESTS=
@@ -38,6 +39,7 @@ function show_help()
 	echo "be highly recommended."
 	echo
 	echo "Options:"
+	echo "  -b BITMASK      doorbell clear bitmask for ntb_tool"
 	echo "  -C              don't cleanup ntb modules on exit"
 	echo "  -d              run dma tests"
 	echo "  -h              show this help message"
@@ -52,8 +54,9 @@ function show_help()
 function parse_args()
 {
 	OPTIND=0
-	while getopts "Cdhlm:r:p:w:" opt; do
+	while getopts "b:Cdhlm:r:p:w:" opt; do
 		case "$opt" in
+		b)  DB_BITMASK=${OPTARG} ;;
 		C)  DONT_CLEANUP=1 ;;
 		d)  RUN_DMA_TESTS=1 ;;
 		h)  show_help; exit 0 ;;
@@ -158,7 +161,7 @@ function doorbell_test()
 
 	echo "Running db tests on: $(basename $LOC) / $(basename $REM)"
 
-	write_file "c 0xFFFFFFFF" "$REM/db"
+	write_file "c $DB_BITMASK" "$REM/db"
 
 	for ((i=1; i <= 8; i++)); do
 		let DB=$(read_file "$REM/db") || true
-- 
GitLab


From 60934b200ddd62187b149a7f32cc0f160c08a7ed Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:13 +0300
Subject: [PATCH 0774/1958] NTB: Make link-state API being declared first

Since link operations are usually performed before memory window access
operations, it's logically better to declare link-related API before any
of MW/Doorbell/Scratchpad methods.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 137 ++++++++++++++++++++++----------------------
 1 file changed, 69 insertions(+), 68 deletions(-)

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index de87ceac110e2..d7ab3e1ec88fc 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -179,13 +179,13 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
 
 /**
  * struct ntb_ctx_ops - ntb device operations
+ * @link_is_up:		See ntb_link_is_up().
+ * @link_enable:	See ntb_link_enable().
+ * @link_disable:	See ntb_link_disable().
  * @mw_count:		See ntb_mw_count().
  * @mw_get_range:	See ntb_mw_get_range().
  * @mw_set_trans:	See ntb_mw_set_trans().
  * @mw_clear_trans:	See ntb_mw_clear_trans().
- * @link_is_up:		See ntb_link_is_up().
- * @link_enable:	See ntb_link_enable().
- * @link_disable:	See ntb_link_disable().
  * @db_is_unsafe:	See ntb_db_is_unsafe().
  * @db_valid_mask:	See ntb_db_valid_mask().
  * @db_vector_count:	See ntb_db_vector_count().
@@ -212,6 +212,12 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @peer_spad_write:	See ntb_peer_spad_write().
  */
 struct ntb_dev_ops {
+	int (*link_is_up)(struct ntb_dev *ntb,
+			  enum ntb_speed *speed, enum ntb_width *width);
+	int (*link_enable)(struct ntb_dev *ntb,
+			   enum ntb_speed max_speed, enum ntb_width max_width);
+	int (*link_disable)(struct ntb_dev *ntb);
+
 	int (*mw_count)(struct ntb_dev *ntb);
 	int (*mw_get_range)(struct ntb_dev *ntb, int idx,
 			    phys_addr_t *base, resource_size_t *size,
@@ -220,12 +226,6 @@ struct ntb_dev_ops {
 			    dma_addr_t addr, resource_size_t size);
 	int (*mw_clear_trans)(struct ntb_dev *ntb, int idx);
 
-	int (*link_is_up)(struct ntb_dev *ntb,
-			  enum ntb_speed *speed, enum ntb_width *width);
-	int (*link_enable)(struct ntb_dev *ntb,
-			   enum ntb_speed max_speed, enum ntb_width max_width);
-	int (*link_disable)(struct ntb_dev *ntb);
-
 	int (*db_is_unsafe)(struct ntb_dev *ntb);
 	u64 (*db_valid_mask)(struct ntb_dev *ntb);
 	int (*db_vector_count)(struct ntb_dev *ntb);
@@ -265,13 +265,14 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 {
 	/* commented callbacks are not required: */
 	return
+		ops->link_is_up				&&
+		ops->link_enable			&&
+		ops->link_disable			&&
 		ops->mw_count				&&
 		ops->mw_get_range			&&
 		ops->mw_set_trans			&&
 		/* ops->mw_clear_trans			&& */
-		ops->link_is_up				&&
-		ops->link_enable			&&
-		ops->link_disable			&&
+
 		/* ops->db_is_unsafe			&& */
 		ops->db_valid_mask			&&
 
@@ -440,6 +441,62 @@ void ntb_link_event(struct ntb_dev *ntb);
  */
 void ntb_db_event(struct ntb_dev *ntb, int vector);
 
+/**
+ * ntb_link_is_up() - get the current ntb link state
+ * @ntb:	NTB device context.
+ * @speed:	OUT - The link speed expressed as PCIe generation number.
+ * @width:	OUT - The link width expressed as the number of PCIe lanes.
+ *
+ * Get the current state of the ntb link.  It is recommended to query the link
+ * state once after every link event.  It is safe to query the link state in
+ * the context of the link event callback.
+ *
+ * Return: One if the link is up, zero if the link is down, otherwise a
+ *		negative value indicating the error number.
+ */
+static inline int ntb_link_is_up(struct ntb_dev *ntb,
+				 enum ntb_speed *speed, enum ntb_width *width)
+{
+	return ntb->ops->link_is_up(ntb, speed, width);
+}
+
+/**
+ * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * @ntb:	NTB device context.
+ * @max_speed:	The maximum link speed expressed as PCIe generation number.
+ * @max_width:	The maximum link width expressed as the number of PCIe lanes.
+ *
+ * Enable the link on the secondary side of the ntb.  This can only be done
+ * from the primary side of the ntb in primary or b2b topology.  The ntb device
+ * should train the link to its maximum speed and width, or the requested speed
+ * and width, whichever is smaller, if supported.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_link_enable(struct ntb_dev *ntb,
+				  enum ntb_speed max_speed,
+				  enum ntb_width max_width)
+{
+	return ntb->ops->link_enable(ntb, max_speed, max_width);
+}
+
+/**
+ * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * @ntb:	NTB device context.
+ *
+ * Disable the link on the secondary side of the ntb.  This can only be
+ * done from the primary side of the ntb in primary or b2b topology.  The ntb
+ * device should disable the link.  Returning from this call must indicate that
+ * a barrier has passed, though with no more writes may pass in either
+ * direction across the link, except if this call returns an error number.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_link_disable(struct ntb_dev *ntb)
+{
+	return ntb->ops->link_disable(ntb);
+}
+
 /**
  * ntb_mw_count() - get the number of memory windows
  * @ntb:	NTB device context.
@@ -516,62 +573,6 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
 	return ntb->ops->mw_clear_trans(ntb, idx);
 }
 
-/**
- * ntb_link_is_up() - get the current ntb link state
- * @ntb:	NTB device context.
- * @speed:	OUT - The link speed expressed as PCIe generation number.
- * @width:	OUT - The link width expressed as the number of PCIe lanes.
- *
- * Get the current state of the ntb link.  It is recommended to query the link
- * state once after every link event.  It is safe to query the link state in
- * the context of the link event callback.
- *
- * Return: One if the link is up, zero if the link is down, otherwise a
- *		negative value indicating the error number.
- */
-static inline int ntb_link_is_up(struct ntb_dev *ntb,
-				 enum ntb_speed *speed, enum ntb_width *width)
-{
-	return ntb->ops->link_is_up(ntb, speed, width);
-}
-
-/**
- * ntb_link_enable() - enable the link on the secondary side of the ntb
- * @ntb:	NTB device context.
- * @max_speed:	The maximum link speed expressed as PCIe generation number.
- * @max_width:	The maximum link width expressed as the number of PCIe lanes.
- *
- * Enable the link on the secondary side of the ntb.  This can only be done
- * from the primary side of the ntb in primary or b2b topology.  The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
- *
- * Return: Zero on success, otherwise an error number.
- */
-static inline int ntb_link_enable(struct ntb_dev *ntb,
-				  enum ntb_speed max_speed,
-				  enum ntb_width max_width)
-{
-	return ntb->ops->link_enable(ntb, max_speed, max_width);
-}
-
-/**
- * ntb_link_disable() - disable the link on the secondary side of the ntb
- * @ntb:	NTB device context.
- *
- * Disable the link on the secondary side of the ntb.  This can only be
- * done from the primary side of the ntb in primary or b2b topology.  The ntb
- * device should disable the link.  Returning from this call must indicate that
- * a barrier has passed, though with no more writes may pass in either
- * direction across the link, except if this call returns an error number.
- *
- * Return: Zero on success, otherwise an error number.
- */
-static inline int ntb_link_disable(struct ntb_dev *ntb)
-{
-	return ntb->ops->link_disable(ntb);
-}
-
 /**
  * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell
  * @ntb:	NTB device context.
-- 
GitLab


From 1e5301196a88961b02fe43c73a952f78b2c84712 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:14 +0300
Subject: [PATCH 0775/1958] NTB: Add indexed ports NTB API

There is some NTB hardware, which can combine more than just two domains
over NTB. For instance, some IDT PCIe-switches can have NTB-functions
activated on more than two-ports. The different domains are distinguished
by ports they are connected to. So the new port-related methods are added to
the NTB API:
 ntb_port_number() - return local port
 ntb_peer_port_count() - return number of peers local port can connect to
 ntb_peer_port_number(pdix) - return port number by it index
 ntb_peer_port_idx(port) - return port index by it number

Current test-drivers aren't changed much. They still support two-ports devices
for the time being while multi-ports hardware drivers aren't added.

By default port-related API is declared for two-ports hardware.
So corresponding hardware drivers won't need to implement it.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/ntb.c               |  54 +++++++++++
 drivers/ntb/ntb_transport.c     |   6 ++
 drivers/ntb/test/ntb_perf.c     |   4 +
 drivers/ntb/test/ntb_pingpong.c |   6 ++
 drivers/ntb/test/ntb_tool.c     |   5 +
 include/linux/ntb.h             | 156 ++++++++++++++++++++++++++++++++
 6 files changed, 231 insertions(+)

diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c
index 2e2530743831a..1e92e52813aaa 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/ntb.c
@@ -191,6 +191,60 @@ void ntb_db_event(struct ntb_dev *ntb, int vector)
 }
 EXPORT_SYMBOL(ntb_db_event);
 
+int ntb_default_port_number(struct ntb_dev *ntb)
+{
+	switch (ntb->topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_B2B_USD:
+		return NTB_PORT_PRI_USD;
+	case NTB_TOPO_SEC:
+	case NTB_TOPO_B2B_DSD:
+		return NTB_PORT_SEC_DSD;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ntb_default_port_number);
+
+int ntb_default_peer_port_count(struct ntb_dev *ntb)
+{
+	return NTB_DEF_PEER_CNT;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_count);
+
+int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	switch (ntb->topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_B2B_USD:
+		return NTB_PORT_SEC_DSD;
+	case NTB_TOPO_SEC:
+	case NTB_TOPO_B2B_DSD:
+		return NTB_PORT_PRI_USD;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_number);
+
+int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	int peer_port = ntb_default_peer_port_number(ntb, NTB_DEF_PEER_IDX);
+
+	if (peer_port == -EINVAL || port != peer_port)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_idx);
+
 static int ntb_probe(struct device *dev)
 {
 	struct ntb_dev *ntb;
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 10e5bf4601398..cc6ae35109b1b 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -95,6 +95,9 @@ MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
 
 static struct dentry *nt_debugfs_dir;
 
+/* Only two-ports NTB devices are supported */
+#define PIDX		NTB_DEF_PEER_IDX
+
 struct ntb_queue_entry {
 	/* ntb_queue list reference */
 	struct list_head entry;
@@ -1064,6 +1067,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 		dev_dbg(&ndev->dev,
 			"scratchpad is unsafe, proceed anyway...\n");
 
+	if (ntb_peer_port_count(ndev) != NTB_DEF_PEER_CNT)
+		dev_warn(&ndev->dev, "Multi-port NTB devices unsupported\n");
+
 	node = dev_to_node(&ndev->dev);
 
 	nt = kzalloc_node(sizeof(*nt), GFP_KERNEL, node);
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 5cab2831ce99a..e52cc4eace905 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -76,6 +76,7 @@
 #define DMA_RETRIES		20
 #define SZ_4G			(1ULL << 32)
 #define MAX_SEG_ORDER		20 /* no larger than 1M for kmalloc buffer */
+#define PIDX			NTB_DEF_PEER_IDX
 
 MODULE_LICENSE(DRIVER_LICENSE);
 MODULE_VERSION(DRIVER_VERSION);
@@ -766,6 +767,9 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
 		return -EIO;
 	}
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
+
 	node = dev_to_node(&pdev->dev);
 
 	perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
index 435861189d97f..12f8b40cb11a7 100644
--- a/drivers/ntb/test/ntb_pingpong.c
+++ b/drivers/ntb/test/ntb_pingpong.c
@@ -90,6 +90,9 @@ static unsigned long db_init = 0x7;
 module_param(db_init, ulong, 0644);
 MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer");
 
+/* Only two-ports NTB devices are supported */
+#define PIDX		NTB_DEF_PEER_IDX
+
 struct pp_ctx {
 	struct ntb_dev			*ntb;
 	u64				db_bits;
@@ -230,6 +233,9 @@ static int pp_probe(struct ntb_client *client,
 		}
 	}
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "multi-port NTB is unsupported\n");
+
 	pp = kmalloc(sizeof(*pp), GFP_KERNEL);
 	if (!pp) {
 		rc = -ENOMEM;
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index 61bf2ef87e0eb..690862d90411d 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -120,6 +120,8 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
 
 #define MAX_MWS 16
+/* Only two-ports devices are supported */
+#define PIDX	NTB_DEF_PEER_IDX
 
 static struct dentry *tool_dbgfs;
 
@@ -919,6 +921,9 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	if (ntb_spad_is_unsafe(ntb))
 		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "multi-port NTB is unsupported\n");
+
 	tc = kzalloc(sizeof(*tc), GFP_KERNEL);
 	if (!tc) {
 		rc = -ENOMEM;
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index d7ab3e1ec88fc..d23483bae6f39 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -139,6 +139,20 @@ enum ntb_width {
 	NTB_WIDTH_32 = 32,
 };
 
+/**
+ * enum ntb_default_port - NTB default port number
+ * @NTB_PORT_PRI_USD:	Default port of the NTB_TOPO_PRI/NTB_TOPO_B2B_USD
+ *			topologies
+ * @NTB_PORT_SEC_DSD:	Default port of the NTB_TOPO_SEC/NTB_TOPO_B2B_DSD
+ *			topologies
+ */
+enum ntb_default_port {
+	NTB_PORT_PRI_USD,
+	NTB_PORT_SEC_DSD
+};
+#define NTB_DEF_PEER_CNT	(1)
+#define NTB_DEF_PEER_IDX	(0)
+
 /**
  * struct ntb_client_ops - ntb client operations
  * @probe:		Notify client of a new device.
@@ -179,6 +193,10 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
 
 /**
  * struct ntb_ctx_ops - ntb device operations
+ * @port_number:	See ntb_port_number().
+ * @peer_port_count:	See ntb_peer_port_count().
+ * @peer_port_number:	See ntb_peer_port_number().
+ * @peer_port_idx:	See ntb_peer_port_idx().
  * @link_is_up:		See ntb_link_is_up().
  * @link_enable:	See ntb_link_enable().
  * @link_disable:	See ntb_link_disable().
@@ -212,6 +230,11 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @peer_spad_write:	See ntb_peer_spad_write().
  */
 struct ntb_dev_ops {
+	int (*port_number)(struct ntb_dev *ntb);
+	int (*peer_port_count)(struct ntb_dev *ntb);
+	int (*peer_port_number)(struct ntb_dev *ntb, int pidx);
+	int (*peer_port_idx)(struct ntb_dev *ntb, int port);
+
 	int (*link_is_up)(struct ntb_dev *ntb,
 			  enum ntb_speed *speed, enum ntb_width *width);
 	int (*link_enable)(struct ntb_dev *ntb,
@@ -265,6 +288,9 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 {
 	/* commented callbacks are not required: */
 	return
+		!ops->peer_port_count == !ops->port_number	&&
+		!ops->peer_port_number == !ops->port_number	&&
+		!ops->peer_port_idx == !ops->port_number	&&
 		ops->link_is_up				&&
 		ops->link_enable			&&
 		ops->link_disable			&&
@@ -441,6 +467,136 @@ void ntb_link_event(struct ntb_dev *ntb);
  */
 void ntb_db_event(struct ntb_dev *ntb, int vector);
 
+/**
+ * ntb_default_port_number() - get the default local port number
+ * @ntb:	NTB device context.
+ *
+ * If hardware driver doesn't specify port_number() callback method, the NTB
+ * is considered with just two ports. So this method returns default local
+ * port number in compliance with topology.
+ *
+ * NOTE Don't call this method directly. The ntb_port_number() function should
+ * be used instead.
+ *
+ * Return: the default local port number
+ */
+int ntb_default_port_number(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_port_count() - get the default number of peer device ports
+ * @ntb:	NTB device context.
+ *
+ * By default hardware driver supports just one peer device.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_count() function
+ * should be used instead.
+ *
+ * Return: the default number of peer ports
+ */
+int ntb_default_peer_port_count(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_peer_port_number() - get the default peer port by given index
+ * @ntb:	NTB device context.
+ * @idx:	Peer port index (should not differ from zero).
+ *
+ * By default hardware driver supports just one peer device, so this method
+ * shall return the corresponding value from enum ntb_default_port.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_number() function
+ * should be used instead.
+ *
+ * Return: the peer device port or negative value indicating an error
+ */
+int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx);
+
+/**
+ * ntb_default_peer_port_idx() - get the default peer device port index by
+ *				 given port number
+ * @ntb:	NTB device context.
+ * @port:	Peer port number (should be one of enum ntb_default_port).
+ *
+ * By default hardware driver supports just one peer device, so while
+ * specified port-argument indicates peer port from enum ntb_default_port,
+ * the return value shall be zero.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_idx() function
+ * should be used instead.
+ *
+ * Return: the peer port index or negative value indicating an error
+ */
+int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port);
+
+/**
+ * ntb_port_number() - get the local port number
+ * @ntb:	NTB device context.
+ *
+ * Hardware must support at least simple two-ports ntb connection
+ *
+ * Return: the local port number
+ */
+static inline int ntb_port_number(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->port_number)
+		return ntb_default_port_number(ntb);
+
+	return ntb->ops->port_number(ntb);
+}
+
+/**
+ * ntb_peer_port_count() - get the number of peer device ports
+ * @ntb:	NTB device context.
+ *
+ * Hardware may support an access to memory of several remote domains
+ * over multi-port NTB devices. This method returns the number of peers,
+ * local device can have shared memory with.
+ *
+ * Return: the number of peer ports
+ */
+static inline int ntb_peer_port_count(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->peer_port_count)
+		return ntb_default_peer_port_count(ntb);
+
+	return ntb->ops->peer_port_count(ntb);
+}
+
+/**
+ * ntb_peer_port_number() - get the peer port by given index
+ * @ntb:	NTB device context.
+ * @pidx:	Peer port index.
+ *
+ * Peer ports are continuously enumerated by NTB API logic, so this method
+ * lets to retrieve port real number by its index.
+ *
+ * Return: the peer device port or negative value indicating an error
+ */
+static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	if (!ntb->ops->peer_port_number)
+		return ntb_default_peer_port_number(ntb, pidx);
+
+	return ntb->ops->peer_port_number(ntb, pidx);
+}
+
+/**
+ * ntb_peer_port_idx() - get the peer device port index by given port number
+ * @ntb:	NTB device context.
+ * @port:	Peer port number.
+ *
+ * Inverse operation of ntb_peer_port_number(), so one can get port index
+ * by specified port number.
+ *
+ * Return: the peer port index or negative value indicating an error
+ */
+static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	if (!ntb->ops->peer_port_idx)
+		return ntb_default_peer_port_idx(ntb, port);
+
+	return ntb->ops->peer_port_idx(ntb, port);
+}
+
 /**
  * ntb_link_is_up() - get the current ntb link state
  * @ntb:	NTB device context.
-- 
GitLab


From 4e8c11b7fd29f70eb7af43bae908297689f2c3da Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:15 +0300
Subject: [PATCH 0776/1958] NTB: Alter link-state API to support multi-port
 devices

Multi-port devices permit the NTB connections between multiple domains,
so a local device can have NTB link being up with one peer and being
down with another. NTB link-state API is appropriately altered to return
a bitfield of the link-states between the local device and possible peers.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/amd/ntb_hw_amd.c     |  2 +-
 drivers/ntb/hw/intel/ntb_hw_intel.c |  2 +-
 include/linux/ntb.h                 | 31 +++++++++++++++--------------
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index 019a158e1128d..e71ab4c1fe0ec 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -212,7 +212,7 @@ static int amd_link_is_up(struct amd_ntb_dev *ndev)
 	return 0;
 }
 
-static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+static u64 amd_ntb_link_is_up(struct ntb_dev *ntb,
 			      enum ntb_speed *speed,
 			      enum ntb_width *width)
 {
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 7b3b6fd63d7d7..6b25bf816f286 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -1171,7 +1171,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	return 0;
 }
 
-static int intel_ntb_link_is_up(struct ntb_dev *ntb,
+static u64 intel_ntb_link_is_up(struct ntb_dev *ntb,
 				enum ntb_speed *speed,
 				enum ntb_width *width)
 {
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index d23483bae6f39..b2b2924f5f432 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -235,7 +235,7 @@ struct ntb_dev_ops {
 	int (*peer_port_number)(struct ntb_dev *ntb, int pidx);
 	int (*peer_port_idx)(struct ntb_dev *ntb, int port);
 
-	int (*link_is_up)(struct ntb_dev *ntb,
+	u64 (*link_is_up)(struct ntb_dev *ntb,
 			  enum ntb_speed *speed, enum ntb_width *width);
 	int (*link_enable)(struct ntb_dev *ntb,
 			   enum ntb_speed max_speed, enum ntb_width max_width);
@@ -607,25 +607,26 @@ static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port)
  * state once after every link event.  It is safe to query the link state in
  * the context of the link event callback.
  *
- * Return: One if the link is up, zero if the link is down, otherwise a
- *		negative value indicating the error number.
+ * Return: bitfield of indexed ports link state: bit is set/cleared if the
+ *         link is up/down respectively.
  */
-static inline int ntb_link_is_up(struct ntb_dev *ntb,
+static inline u64 ntb_link_is_up(struct ntb_dev *ntb,
 				 enum ntb_speed *speed, enum ntb_width *width)
 {
 	return ntb->ops->link_is_up(ntb, speed, width);
 }
 
 /**
- * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * ntb_link_enable() - enable the local port ntb connection
  * @ntb:	NTB device context.
  * @max_speed:	The maximum link speed expressed as PCIe generation number.
  * @max_width:	The maximum link width expressed as the number of PCIe lanes.
  *
- * Enable the link on the secondary side of the ntb.  This can only be done
- * from the primary side of the ntb in primary or b2b topology.  The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
+ * Enable the NTB/PCIe link on the local or remote (for bridge-to-bridge
+ * topology) side of the bridge. If it's supported the ntb device should train
+ * the link to its maximum speed and width, or the requested speed and width,
+ * whichever is smaller. Some hardware doesn't support PCIe link training, so
+ * the last two arguments will be ignored then.
  *
  * Return: Zero on success, otherwise an error number.
  */
@@ -637,14 +638,14 @@ static inline int ntb_link_enable(struct ntb_dev *ntb,
 }
 
 /**
- * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * ntb_link_disable() - disable the local port ntb connection
  * @ntb:	NTB device context.
  *
- * Disable the link on the secondary side of the ntb.  This can only be
- * done from the primary side of the ntb in primary or b2b topology.  The ntb
- * device should disable the link.  Returning from this call must indicate that
- * a barrier has passed, though with no more writes may pass in either
- * direction across the link, except if this call returns an error number.
+ * Disable the link on the local or remote (for b2b topology) of the ntb.
+ * The ntb device should disable the link.  Returning from this call must
+ * indicate that a barrier has passed, though with no more writes may pass in
+ * either direction across the link, except if this call returns an error
+ * number.
  *
  * Return: Zero on success, otherwise an error number.
  */
-- 
GitLab


From 443b9a14ecbe811071467d54d6f2f1182835cc4d Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 11 Jan 2017 03:11:33 +0300
Subject: [PATCH 0777/1958] NTB: Alter MW API to support multi-ports devices

Multi-port NTB devices permit to share a memory between all accessible peers.
Memory Windows API is altered to correspondingly initialize and map memory
windows for such devices:
 ntb_mw_count(pidx); - number of inbound memory windows, which can be allocated
for shared buffer with specified peer device.
 ntb_mw_get_align(pidx, widx); - get alignment and size restriction parameters
to properly allocate inbound memory region.
 ntb_peer_mw_count(); - get number of outbound memory windows.
 ntb_peer_mw_get_addr(widx); - get mapping address of an outbound memory window

If hardware supports inbound translation configured on the local ntb port:
 ntb_mw_set_trans(pidx, widx); - set translation address of allocated inbound
memory window so a peer device could access it.
 ntb_mw_clear_trans(pidx, widx); - clear the translation address of an inbound
memory window.

If hardware supports outbound translation configured on the peer ntb port:
 ntb_peer_mw_set_trans(pidx, widx); - set translation address of a memory
window retrieved from a peer device
 ntb_peer_mw_clear_trans(pidx, widx); - clear the translation address of an
outbound memory window

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/amd/ntb_hw_amd.c     |  68 ++++++---
 drivers/ntb/hw/intel/ntb_hw_intel.c |  90 +++++++++---
 drivers/ntb/ntb.c                   |   2 +
 drivers/ntb/ntb_transport.c         |  21 ++-
 drivers/ntb/test/ntb_perf.c         |  17 ++-
 drivers/ntb/test/ntb_tool.c         |  43 +++---
 include/linux/ntb.h                 | 208 ++++++++++++++++++++++------
 7 files changed, 342 insertions(+), 107 deletions(-)

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index e71ab4c1fe0ec..bcefe1df9ce34 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -13,6 +14,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -79,40 +81,42 @@ static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
 	return 1 << idx;
 }
 
-static int amd_ntb_mw_count(struct ntb_dev *ntb)
+static int amd_ntb_mw_count(struct ntb_dev *ntb, int pidx)
 {
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	return ntb_ndev(ntb)->mw_count;
 }
 
-static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				phys_addr_t *base,
-				resource_size_t *size,
-				resource_size_t *align,
-				resource_size_t *align_size)
+static int amd_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+				resource_size_t *addr_align,
+				resource_size_t *size_align,
+				resource_size_t *size_max)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	bar = ndev_mw_to_bar(ndev, idx);
 	if (bar < 0)
 		return bar;
 
-	if (base)
-		*base = pci_resource_start(ndev->ntb.pdev, bar);
-
-	if (size)
-		*size = pci_resource_len(ndev->ntb.pdev, bar);
+	if (addr_align)
+		*addr_align = SZ_4K;
 
-	if (align)
-		*align = SZ_4K;
+	if (size_align)
+		*size_align = 1;
 
-	if (align_size)
-		*align_size = 1;
+	if (size_max)
+		*size_max = pci_resource_len(ndev->ntb.pdev, bar);
 
 	return 0;
 }
 
-static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				dma_addr_t addr, resource_size_t size)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
@@ -122,6 +126,9 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base_addr, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	bar = ndev_mw_to_bar(ndev, idx);
 	if (bar < 0)
 		return bar;
@@ -284,6 +291,31 @@ static int amd_ntb_link_disable(struct ntb_dev *ntb)
 	return 0;
 }
 
+static int amd_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	/* The same as for inbound MWs */
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+				    phys_addr_t *base, resource_size_t *size)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	return 0;
+}
+
 static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
 {
 	return ntb_ndev(ntb)->db_valid_mask;
@@ -431,8 +463,10 @@ static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
 
 static const struct ntb_dev_ops amd_ntb_ops = {
 	.mw_count		= amd_ntb_mw_count,
-	.mw_get_range		= amd_ntb_mw_get_range,
+	.mw_get_align		= amd_ntb_mw_get_align,
 	.mw_set_trans		= amd_ntb_mw_set_trans,
+	.peer_mw_count		= amd_ntb_peer_mw_count,
+	.peer_mw_get_addr	= amd_ntb_peer_mw_get_addr,
 	.link_is_up		= amd_ntb_link_is_up,
 	.link_enable		= amd_ntb_link_enable,
 	.link_disable		= amd_ntb_link_disable,
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 6b25bf816f286..a9b4ed4d7b52c 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -6,6 +6,7 @@
  *
  *   Copyright(c) 2012 Intel Corporation. All rights reserved.
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -15,6 +16,7 @@
  *
  *   Copyright(c) 2012 Intel Corporation. All rights reserved.
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -1035,20 +1037,26 @@ static void ndev_deinit_debugfs(struct intel_ntb_dev *ndev)
 	debugfs_remove_recursive(ndev->debugfs_dir);
 }
 
-static int intel_ntb_mw_count(struct ntb_dev *ntb)
+static int intel_ntb_mw_count(struct ntb_dev *ntb, int pidx)
 {
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	return ntb_ndev(ntb)->mw_count;
 }
 
-static int intel_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				  phys_addr_t *base,
-				  resource_size_t *size,
-				  resource_size_t *align,
-				  resource_size_t *align_size)
+static int intel_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+				  resource_size_t *addr_align,
+				  resource_size_t *size_align,
+				  resource_size_t *size_max)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	resource_size_t bar_size, mw_size;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -1056,24 +1064,26 @@ static int intel_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
 	if (bar < 0)
 		return bar;
 
-	if (base)
-		*base = pci_resource_start(ndev->ntb.pdev, bar) +
-			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+	bar_size = pci_resource_len(ndev->ntb.pdev, bar);
 
-	if (size)
-		*size = pci_resource_len(ndev->ntb.pdev, bar) -
-			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+	if (idx == ndev->b2b_idx)
+		mw_size = bar_size - ndev->b2b_off;
+	else
+		mw_size = bar_size;
+
+	if (addr_align)
+		*addr_align = pci_resource_len(ndev->ntb.pdev, bar);
 
-	if (align)
-		*align = pci_resource_len(ndev->ntb.pdev, bar);
+	if (size_align)
+		*size_align = 1;
 
-	if (align_size)
-		*align_size = 1;
+	if (size_max)
+		*size_max = mw_size;
 
 	return 0;
 }
 
-static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				  dma_addr_t addr, resource_size_t size)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
@@ -1083,6 +1093,9 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -1249,6 +1262,36 @@ static int intel_ntb_link_disable(struct ntb_dev *ntb)
 	return 0;
 }
 
+static int intel_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	/* Numbers of inbound and outbound memory windows match */
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int intel_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+				     phys_addr_t *base, resource_size_t *size)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
+		idx += 1;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar) +
+			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar) -
+			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+
+	return 0;
+}
+
 static int intel_ntb_db_is_unsafe(struct ntb_dev *ntb)
 {
 	return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_DB);
@@ -1902,7 +1945,7 @@ static int intel_ntb3_link_enable(struct ntb_dev *ntb,
 
 	return 0;
 }
-static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				   dma_addr_t addr, resource_size_t size)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
@@ -1912,6 +1955,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -2906,8 +2952,10 @@ static const struct intel_ntb_xlat_reg skx_sec_xlat = {
 /* operations for primary side of local ntb */
 static const struct ntb_dev_ops intel_ntb_ops = {
 	.mw_count		= intel_ntb_mw_count,
-	.mw_get_range		= intel_ntb_mw_get_range,
+	.mw_get_align		= intel_ntb_mw_get_align,
 	.mw_set_trans		= intel_ntb_mw_set_trans,
+	.peer_mw_count		= intel_ntb_peer_mw_count,
+	.peer_mw_get_addr	= intel_ntb_peer_mw_get_addr,
 	.link_is_up		= intel_ntb_link_is_up,
 	.link_enable		= intel_ntb_link_enable,
 	.link_disable		= intel_ntb_link_disable,
@@ -2932,8 +2980,10 @@ static const struct ntb_dev_ops intel_ntb_ops = {
 
 static const struct ntb_dev_ops intel_ntb3_ops = {
 	.mw_count		= intel_ntb_mw_count,
-	.mw_get_range		= intel_ntb_mw_get_range,
+	.mw_get_align		= intel_ntb_mw_get_align,
 	.mw_set_trans		= intel_ntb3_mw_set_trans,
+	.peer_mw_count		= intel_ntb_peer_mw_count,
+	.peer_mw_get_addr	= intel_ntb_peer_mw_get_addr,
 	.link_is_up		= intel_ntb_link_is_up,
 	.link_enable		= intel_ntb3_link_enable,
 	.link_disable		= intel_ntb_link_disable,
diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c
index 1e92e52813aaa..2551bb2ff4a4c 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/ntb.c
@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -18,6 +19,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index cc6ae35109b1b..771b469cebf06 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -673,7 +673,7 @@ static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
 	if (!mw->virt_addr)
 		return;
 
-	ntb_mw_clear_trans(nt->ndev, num_mw);
+	ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
 	dma_free_coherent(&pdev->dev, mw->buff_size,
 			  mw->virt_addr, mw->dma_addr);
 	mw->xlat_size = 0;
@@ -730,7 +730,8 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 	}
 
 	/* Notify HW the memory location of the receive buffer */
-	rc = ntb_mw_set_trans(nt->ndev, num_mw, mw->dma_addr, mw->xlat_size);
+	rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr,
+			      mw->xlat_size);
 	if (rc) {
 		dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw);
 		ntb_free_mw(nt, num_mw);
@@ -1058,7 +1059,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	int node;
 	int rc, i;
 
-	mw_count = ntb_mw_count(ndev);
+	mw_count = ntb_mw_count(ndev, PIDX);
+
+	if (!ndev->ops->mw_set_trans) {
+		dev_err(&ndev->dev, "Inbound MW based NTB API is required\n");
+		return -EINVAL;
+	}
 
 	if (ntb_db_is_unsafe(ndev))
 		dev_dbg(&ndev->dev,
@@ -1100,8 +1106,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	for (i = 0; i < mw_count; i++) {
 		mw = &nt->mw_vec[i];
 
-		rc = ntb_mw_get_range(ndev, i, &mw->phys_addr, &mw->phys_size,
-				      &mw->xlat_align, &mw->xlat_align_size);
+		rc = ntb_mw_get_align(ndev, PIDX, i, &mw->xlat_align,
+				      &mw->xlat_align_size, NULL);
+		if (rc)
+			goto err1;
+
+		rc = ntb_peer_mw_get_addr(ndev, i, &mw->phys_addr,
+					  &mw->phys_size);
 		if (rc)
 			goto err1;
 
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index e52cc4eace905..7f89cceaf350e 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -455,7 +455,7 @@ static void perf_free_mw(struct perf_ctx *perf)
 	if (!mw->virt_addr)
 		return;
 
-	ntb_mw_clear_trans(perf->ntb, 0);
+	ntb_mw_clear_trans(perf->ntb, PIDX, 0);
 	dma_free_coherent(&pdev->dev, mw->buf_size,
 			  mw->virt_addr, mw->dma_addr);
 	mw->xlat_size = 0;
@@ -491,7 +491,7 @@ static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
 		mw->buf_size = 0;
 	}
 
-	rc = ntb_mw_set_trans(perf->ntb, 0, mw->dma_addr, mw->xlat_size);
+	rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size);
 	if (rc) {
 		dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
 		perf_free_mw(perf);
@@ -562,8 +562,12 @@ static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
 
 	mw = &perf->mw;
 
-	rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
-			      &mw->xlat_align, &mw->xlat_align_size);
+	rc = ntb_mw_get_align(ntb, PIDX, 0, &mw->xlat_align,
+			      &mw->xlat_align_size, NULL);
+	if (rc)
+		return rc;
+
+	rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size);
 	if (rc)
 		return rc;
 
@@ -767,6 +771,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
 		return -EIO;
 	}
 
+	if (!ntb->ops->mw_set_trans) {
+		dev_err(&ntb->dev, "Need inbound MW based NTB API\n");
+		return -EINVAL;
+	}
+
 	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
 		dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
 
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index 690862d90411d..cb692473d457f 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -119,7 +119,8 @@ MODULE_VERSION(DRIVER_VERSION);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
 
-#define MAX_MWS 16
+/* It is rare to have hadrware with greater than six MWs */
+#define MAX_MWS	6
 /* Only two-ports devices are supported */
 #define PIDX	NTB_DEF_PEER_IDX
 
@@ -670,28 +671,27 @@ static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size)
 {
 	int rc;
 	struct tool_mw *mw = &tc->mws[idx];
-	phys_addr_t base;
-	resource_size_t size, align, align_size;
+	resource_size_t size, align_addr, align_size;
 	char buf[16];
 
 	if (mw->peer)
 		return 0;
 
-	rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align,
-			      &align_size);
+	rc = ntb_mw_get_align(tc->ntb, PIDX, idx, &align_addr,
+				&align_size, &size);
 	if (rc)
 		return rc;
 
 	mw->size = min_t(resource_size_t, req_size, size);
-	mw->size = round_up(mw->size, align);
+	mw->size = round_up(mw->size, align_addr);
 	mw->size = round_up(mw->size, align_size);
 	mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size,
 				      &mw->peer_dma, GFP_KERNEL);
 
-	if (!mw->peer)
+	if (!mw->peer || !IS_ALIGNED(mw->peer_dma, align_addr))
 		return -ENOMEM;
 
-	rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size);
+	rc = ntb_mw_set_trans(tc->ntb, PIDX, idx, mw->peer_dma, mw->size);
 	if (rc)
 		goto err_free_dma;
 
@@ -718,7 +718,7 @@ static void tool_free_mw(struct tool_ctx *tc, int idx)
 	struct tool_mw *mw = &tc->mws[idx];
 
 	if (mw->peer) {
-		ntb_mw_clear_trans(tc->ntb, idx);
+		ntb_mw_clear_trans(tc->ntb, PIDX, idx);
 		dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
 				  mw->peer,
 				  mw->peer_dma);
@@ -744,8 +744,9 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 
 	phys_addr_t base;
 	resource_size_t mw_size;
-	resource_size_t align;
+	resource_size_t align_addr;
 	resource_size_t align_size;
+	resource_size_t max_size;
 
 	buf_size = min_t(size_t, size, 512);
 
@@ -753,8 +754,9 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 	if (!buf)
 		return -ENOMEM;
 
-	ntb_mw_get_range(mw->tc->ntb, mw->idx,
-			 &base, &mw_size, &align, &align_size);
+	ntb_mw_get_align(mw->tc->ntb, PIDX, mw->idx,
+			 &align_addr, &align_size, &max_size);
+	ntb_peer_mw_get_addr(mw->tc->ntb, mw->idx, &base, &mw_size);
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Peer MW %d Information:\n", mw->idx);
@@ -769,12 +771,16 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Alignment             \t%lld\n",
-			 (unsigned long long)align);
+			 (unsigned long long)align_addr);
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Size Alignment        \t%lld\n",
 			 (unsigned long long)align_size);
 
+	off += scnprintf(buf + off, buf_size - off,
+			 "Size Max              \t%lld\n",
+			 (unsigned long long)max_size);
+
 	off += scnprintf(buf + off, buf_size - off,
 			 "Ready                 \t%c\n",
 			 (mw->peer) ? 'Y' : 'N');
@@ -829,8 +835,7 @@ static int tool_init_mw(struct tool_ctx *tc, int idx)
 	phys_addr_t base;
 	int rc;
 
-	rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size,
-			      NULL, NULL);
+	rc = ntb_peer_mw_get_addr(tc->ntb, idx, &base, &mw->win_size);
 	if (rc)
 		return rc;
 
@@ -915,6 +920,12 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	int rc;
 	int i;
 
+	if (!ntb->ops->mw_set_trans) {
+		dev_dbg(&ntb->dev, "need inbound MW based NTB API\n");
+		rc = -EINVAL;
+		goto err_tc;
+	}
+
 	if (ntb_db_is_unsafe(ntb))
 		dev_dbg(&ntb->dev, "doorbell is unsafe\n");
 
@@ -933,7 +944,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	tc->ntb = ntb;
 	init_waitqueue_head(&tc->link_wq);
 
-	tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS);
+	tc->mw_count = min(ntb_mw_count(tc->ntb, PIDX), MAX_MWS);
 	for (i = 0; i < tc->mw_count; i++) {
 		rc = tool_init_mw(tc, i);
 		if (rc)
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index b2b2924f5f432..2ea83f91a2366 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -18,6 +19,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -201,9 +203,13 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @link_enable:	See ntb_link_enable().
  * @link_disable:	See ntb_link_disable().
  * @mw_count:		See ntb_mw_count().
- * @mw_get_range:	See ntb_mw_get_range().
+ * @mw_get_align:	See ntb_mw_get_align().
  * @mw_set_trans:	See ntb_mw_set_trans().
  * @mw_clear_trans:	See ntb_mw_clear_trans().
+ * @peer_mw_count:	See ntb_peer_mw_count().
+ * @peer_mw_get_addr:	See ntb_peer_mw_get_addr().
+ * @peer_mw_set_trans:	See ntb_peer_mw_set_trans().
+ * @peer_mw_clear_trans:See ntb_peer_mw_clear_trans().
  * @db_is_unsafe:	See ntb_db_is_unsafe().
  * @db_valid_mask:	See ntb_db_valid_mask().
  * @db_vector_count:	See ntb_db_vector_count().
@@ -241,13 +247,20 @@ struct ntb_dev_ops {
 			   enum ntb_speed max_speed, enum ntb_width max_width);
 	int (*link_disable)(struct ntb_dev *ntb);
 
-	int (*mw_count)(struct ntb_dev *ntb);
-	int (*mw_get_range)(struct ntb_dev *ntb, int idx,
-			    phys_addr_t *base, resource_size_t *size,
-			resource_size_t *align, resource_size_t *align_size);
-	int (*mw_set_trans)(struct ntb_dev *ntb, int idx,
+	int (*mw_count)(struct ntb_dev *ntb, int pidx);
+	int (*mw_get_align)(struct ntb_dev *ntb, int pidx, int widx,
+			    resource_size_t *addr_align,
+			    resource_size_t *size_align,
+			    resource_size_t *size_max);
+	int (*mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx,
 			    dma_addr_t addr, resource_size_t size);
-	int (*mw_clear_trans)(struct ntb_dev *ntb, int idx);
+	int (*mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx);
+	int (*peer_mw_count)(struct ntb_dev *ntb);
+	int (*peer_mw_get_addr)(struct ntb_dev *ntb, int widx,
+				phys_addr_t *base, resource_size_t *size);
+	int (*peer_mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx,
+				 u64 addr, resource_size_t size);
+	int (*peer_mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx);
 
 	int (*db_is_unsafe)(struct ntb_dev *ntb);
 	u64 (*db_valid_mask)(struct ntb_dev *ntb);
@@ -295,9 +308,13 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		ops->link_enable			&&
 		ops->link_disable			&&
 		ops->mw_count				&&
-		ops->mw_get_range			&&
-		ops->mw_set_trans			&&
+		ops->mw_get_align			&&
+		(ops->mw_set_trans			||
+		 ops->peer_mw_set_trans)		&&
 		/* ops->mw_clear_trans			&& */
+		ops->peer_mw_count			&&
+		ops->peer_mw_get_addr			&&
+		/* ops->peer_mw_clear_trans		&& */
 
 		/* ops->db_is_unsafe			&& */
 		ops->db_valid_mask			&&
@@ -655,79 +672,180 @@ static inline int ntb_link_disable(struct ntb_dev *ntb)
 }
 
 /**
- * ntb_mw_count() - get the number of memory windows
+ * ntb_mw_count() - get the number of inbound memory windows, which could
+ *                  be created for a specified peer device
  * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
  *
  * Hardware and topology may support a different number of memory windows.
+ * Moreover different peer devices can support different number of memory
+ * windows. Simply speaking this method returns the number of possible inbound
+ * memory windows to share with specified peer device.
  *
  * Return: the number of memory windows.
  */
-static inline int ntb_mw_count(struct ntb_dev *ntb)
+static inline int ntb_mw_count(struct ntb_dev *ntb, int pidx)
 {
-	return ntb->ops->mw_count(ntb);
+	return ntb->ops->mw_count(ntb, pidx);
 }
 
 /**
- * ntb_mw_get_range() - get the range of a memory window
+ * ntb_mw_get_align() - get the restriction parameters of inbound memory window
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
- * @base:	OUT - the base address for mapping the memory window
- * @size:	OUT - the size for mapping the memory window
- * @align:	OUT - the base alignment for translating the memory window
- * @align_size:	OUT - the size alignment for translating the memory window
- *
- * Get the range of a memory window.  NULL may be given for any output
- * parameter if the value is not needed.  The base and size may be used for
- * mapping the memory window, to access the peer memory.  The alignment and
- * size may be used for translating the memory window, for the peer to access
- * memory on the local system.
- *
- * Return: Zero on success, otherwise an error number.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr_align:	OUT - the base alignment for translating the memory window
+ * @size_align:	OUT - the size alignment for translating the memory window
+ * @size_max:	OUT - the maximum size of the memory window
+ *
+ * Get the alignments of an inbound memory window with specified index.
+ * NULL may be given for any output parameter if the value is not needed.
+ * The alignment and size parameters may be used for allocation of proper
+ * shared memory.
+ *
+ * Return: Zero on success, otherwise a negative error number.
  */
-static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				   phys_addr_t *base, resource_size_t *size,
-		resource_size_t *align, resource_size_t *align_size)
+static inline int ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx,
+				   resource_size_t *addr_align,
+				   resource_size_t *size_align,
+				   resource_size_t *size_max)
 {
-	return ntb->ops->mw_get_range(ntb, idx, base, size,
-			align, align_size);
+	return ntb->ops->mw_get_align(ntb, pidx, widx, addr_align, size_align,
+				      size_max);
 }
 
 /**
- * ntb_mw_set_trans() - set the translation of a memory window
+ * ntb_mw_set_trans() - set the translation of an inbound memory window
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
- * @addr:	The dma address local memory to expose to the peer.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of local memory to expose to the peer.
  * @size:	The size of the local memory to expose to the peer.
  *
  * Set the translation of a memory window.  The peer may access local memory
  * through the window starting at the address, up to the size.  The address
- * must be aligned to the alignment specified by ntb_mw_get_range().  The size
- * must be aligned to the size alignment specified by ntb_mw_get_range().
+ * and size must be aligned in compliance with restrictions of
+ * ntb_mw_get_align(). The region size should not exceed the size_max parameter
+ * of that method.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
 				   dma_addr_t addr, resource_size_t size)
 {
-	return ntb->ops->mw_set_trans(ntb, idx, addr, size);
+	if (!ntb->ops->mw_set_trans)
+		return 0;
+
+	return ntb->ops->mw_set_trans(ntb, pidx, widx, addr, size);
 }
 
 /**
- * ntb_mw_clear_trans() - clear the translation of a memory window
+ * ntb_mw_clear_trans() - clear the translation address of an inbound memory
+ *                        window
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
  *
- * Clear the translation of a memory window.  The peer may no longer access
- * local memory through the window.
+ * Clear the translation of an inbound memory window.  The peer may no longer
+ * access local memory through the window.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
+static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int pidx, int widx)
 {
 	if (!ntb->ops->mw_clear_trans)
-		return ntb->ops->mw_set_trans(ntb, idx, 0, 0);
+		return ntb_mw_set_trans(ntb, pidx, widx, 0, 0);
+
+	return ntb->ops->mw_clear_trans(ntb, pidx, widx);
+}
+
+/**
+ * ntb_peer_mw_count() - get the number of outbound memory windows, which could
+ *                       be mapped to access a shared memory
+ * @ntb:	NTB device context.
+ *
+ * Hardware and topology may support a different number of memory windows.
+ * This method returns the number of outbound memory windows supported by
+ * local device.
+ *
+ * Return: the number of memory windows.
+ */
+static inline int ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	return ntb->ops->peer_mw_count(ntb);
+}
+
+/**
+ * ntb_peer_mw_get_addr() - get map address of an outbound memory window
+ * @ntb:	NTB device context.
+ * @widx:	Memory window index (within ntb_peer_mw_count() return value).
+ * @base:	OUT - the base address of mapping region.
+ * @size:	OUT - the size of mapping region.
+ *
+ * Get base and size of memory region to map.  NULL may be given for any output
+ * parameter if the value is not needed.  The base and size may be used for
+ * mapping the memory window, to access the peer memory.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx,
+				      phys_addr_t *base, resource_size_t *size)
+{
+	return ntb->ops->peer_mw_get_addr(ntb, widx, base, size);
+}
+
+/**
+ * ntb_peer_mw_set_trans() - set a translation address of a memory window
+ *                           retrieved from a peer device
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device the translation address received from.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of the shared memory to access.
+ * @size:	The size of the shared memory to access.
+ *
+ * Set the translation of an outbound memory window.  The local device may
+ * access shared memory allocated by a peer device sent the address.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface, so a translation address can be only set on the side,
+ * where shared memory (inbound memory windows) is allocated.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+					u64 addr, resource_size_t size)
+{
+	if (!ntb->ops->peer_mw_set_trans)
+		return 0;
+
+	return ntb->ops->peer_mw_set_trans(ntb, pidx, widx, addr, size);
+}
+
+/**
+ * ntb_peer_mw_clear_trans() - clear the translation address of an outbound
+ *                             memory window
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ *
+ * Clear the translation of a outbound memory window.  The local device may no
+ * longer access a shared memory through the window.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx,
+					  int widx)
+{
+	if (!ntb->ops->peer_mw_clear_trans)
+		return ntb_peer_mw_set_trans(ntb, pidx, widx, 0, 0);
 
-	return ntb->ops->mw_clear_trans(ntb, idx);
+	return ntb->ops->peer_mw_clear_trans(ntb, pidx, widx);
 }
 
 /**
-- 
GitLab


From d67288a39584daad11edee9b03d53264ba147453 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 11 Jan 2017 03:13:20 +0300
Subject: [PATCH 0778/1958] NTB: Alter Scratchpads API to support multi-ports
 devices

Even though there is no any real NTB hardware, which would have both more
than two ports and Scratchpad registers, it is logically correct to have
Scratchpad API accepting a peer port index as well. Intel/AMD drivers utilize
Primary and Secondary topology to split Scratchpad between connected root
devices. Since port-index API introduced, Intel/AMD NTB hardware drivers can
use device port to determine which Scratchpad registers actually belong to
local and peer devices. The same approach can be used if some potential
hardware in future will be multi-port and have some set of Scratchpads.
Here are the brief of changes in the API:
 ntb_spad_count() - return number of Scratchpads per each port
 ntb_peer_spad_addr(pidx, sidx) - address of Scratchpad register of the
peer device with pidx-index
 ntb_peer_spad_read(pidx, sidx) - read specified Scratchpad register of the
peer with pidx-index
 ntb_peer_spad_write(pidx, sidx) - write data to Scratchpad register of the
peer with pidx-index

Since there is hardware which doesn't support Scratchpad registers, the
corresponding API methods are now made optional.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/amd/ntb_hw_amd.c     | 14 +++---
 drivers/ntb/hw/intel/ntb_hw_intel.c | 14 +++---
 drivers/ntb/ntb_transport.c         | 15 +++---
 drivers/ntb/test/ntb_perf.c         |  6 +--
 drivers/ntb/test/ntb_pingpong.c     |  8 +++-
 drivers/ntb/test/ntb_tool.c         | 21 +++++++--
 include/linux/ntb.h                 | 73 ++++++++++++++++++-----------
 7 files changed, 94 insertions(+), 57 deletions(-)

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index bcefe1df9ce34..891beb9f26c2b 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -432,30 +432,30 @@ static int amd_ntb_spad_write(struct ntb_dev *ntb,
 	return 0;
 }
 
-static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	void __iomem *mmio = ndev->self_mmio;
 	u32 offset;
 
-	if (idx < 0 || idx >= ndev->spad_count)
+	if (sidx < 0 || sidx >= ndev->spad_count)
 		return -EINVAL;
 
-	offset = ndev->peer_spad + (idx << 2);
+	offset = ndev->peer_spad + (sidx << 2);
 	return readl(mmio + AMD_SPAD_OFFSET + offset);
 }
 
-static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
-				   int idx, u32 val)
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+				   int sidx, u32 val)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	void __iomem *mmio = ndev->self_mmio;
 	u32 offset;
 
-	if (idx < 0 || idx >= ndev->spad_count)
+	if (sidx < 0 || sidx >= ndev->spad_count)
 		return -EINVAL;
 
-	offset = ndev->peer_spad + (idx << 2);
+	offset = ndev->peer_spad + (sidx << 2);
 	writel(val, mmio + AMD_SPAD_OFFSET + offset);
 
 	return 0;
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index a9b4ed4d7b52c..e00aa39a45444 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -1409,30 +1409,30 @@ static int intel_ntb_spad_write(struct ntb_dev *ntb,
 			       ndev->self_reg->spad);
 }
 
-static int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+static int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
 				    phys_addr_t *spad_addr)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_addr(ndev, idx, spad_addr, ndev->peer_addr,
+	return ndev_spad_addr(ndev, sidx, spad_addr, ndev->peer_addr,
 			      ndev->peer_reg->spad);
 }
 
-static u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_read(ndev, idx,
+	return ndev_spad_read(ndev, sidx,
 			      ndev->peer_mmio +
 			      ndev->peer_reg->spad);
 }
 
-static int intel_ntb_peer_spad_write(struct ntb_dev *ntb,
-				     int idx, u32 val)
+static int intel_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+				     int sidx, u32 val)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_write(ndev, idx, val,
+	return ndev_spad_write(ndev, sidx, val,
 			       ndev->peer_mmio +
 			       ndev->peer_reg->spad);
 }
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 771b469cebf06..9a03c5871efe6 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -862,17 +862,17 @@ static void ntb_transport_link_work(struct work_struct *work)
 			size = max_mw_size;
 
 		spad = MW0_SZ_HIGH + (i * 2);
-		ntb_peer_spad_write(ndev, spad, upper_32_bits(size));
+		ntb_peer_spad_write(ndev, PIDX, spad, upper_32_bits(size));
 
 		spad = MW0_SZ_LOW + (i * 2);
-		ntb_peer_spad_write(ndev, spad, lower_32_bits(size));
+		ntb_peer_spad_write(ndev, PIDX, spad, lower_32_bits(size));
 	}
 
-	ntb_peer_spad_write(ndev, NUM_MWS, nt->mw_count);
+	ntb_peer_spad_write(ndev, PIDX, NUM_MWS, nt->mw_count);
 
-	ntb_peer_spad_write(ndev, NUM_QPS, nt->qp_count);
+	ntb_peer_spad_write(ndev, PIDX, NUM_QPS, nt->qp_count);
 
-	ntb_peer_spad_write(ndev, VERSION, NTB_TRANSPORT_VERSION);
+	ntb_peer_spad_write(ndev, PIDX, VERSION, NTB_TRANSPORT_VERSION);
 
 	/* Query the remote side for its info */
 	val = ntb_spad_read(ndev, VERSION);
@@ -948,7 +948,7 @@ static void ntb_qp_link_work(struct work_struct *work)
 
 	val = ntb_spad_read(nt->ndev, QP_LINKS);
 
-	ntb_peer_spad_write(nt->ndev, QP_LINKS, val | BIT(qp->qp_num));
+	ntb_peer_spad_write(nt->ndev, PIDX, QP_LINKS, val | BIT(qp->qp_num));
 
 	/* query remote spad for qp ready bits */
 	dev_dbg_ratelimited(&pdev->dev, "Remote QP link status = %x\n", val);
@@ -2108,8 +2108,7 @@ void ntb_transport_link_down(struct ntb_transport_qp *qp)
 
 	val = ntb_spad_read(qp->ndev, QP_LINKS);
 
-	ntb_peer_spad_write(qp->ndev, QP_LINKS,
-			    val & ~BIT(qp->qp_num));
+	ntb_peer_spad_write(qp->ndev, PIDX, QP_LINKS, val & ~BIT(qp->qp_num));
 
 	if (qp->link_is_up)
 		ntb_send_link_down(qp);
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 7f89cceaf350e..42756a98a728a 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -518,9 +518,9 @@ static void perf_link_work(struct work_struct *work)
 	if (max_mw_size && size > max_mw_size)
 		size = max_mw_size;
 
-	ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
-	ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
-	ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+	ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size));
+	ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size));
+	ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION);
 
 	/* now read what peer wrote */
 	val = ntb_spad_read(ndev, VERSION);
diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
index 12f8b40cb11a7..938a18bcfc3f8 100644
--- a/drivers/ntb/test/ntb_pingpong.c
+++ b/drivers/ntb/test/ntb_pingpong.c
@@ -138,7 +138,7 @@ static void pp_ping(unsigned long ctx)
 			"Ping bits %#llx read %#x write %#x\n",
 			db_bits, spad_rd, spad_wr);
 
-		ntb_peer_spad_write(pp->ntb, 0, spad_wr);
+		ntb_peer_spad_write(pp->ntb, PIDX, 0, spad_wr);
 		ntb_peer_db_set(pp->ntb, db_bits);
 		ntb_db_clear_mask(pp->ntb, db_mask);
 
@@ -225,6 +225,12 @@ static int pp_probe(struct ntb_client *client,
 		}
 	}
 
+	if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "no enough scratchpads\n");
+		rc = -EINVAL;
+		goto err_pp;
+	}
+
 	if (ntb_spad_is_unsafe(ntb)) {
 		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
 		if (!unsafe) {
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index cb692473d457f..f002bf48a08db 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -462,13 +462,22 @@ static TOOL_FOPS_RDWR(tool_spad_fops,
 		      tool_spad_read,
 		      tool_spad_write);
 
+static u32 ntb_tool_peer_spad_read(struct ntb_dev *ntb, int sidx)
+{
+	return ntb_peer_spad_read(ntb, PIDX, sidx);
+}
+
 static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf,
 				   size_t size, loff_t *offp)
 {
 	struct tool_ctx *tc = filep->private_data;
 
-	return tool_spadfn_read(tc, ubuf, size, offp,
-				tc->ntb->ops->peer_spad_read);
+	return tool_spadfn_read(tc, ubuf, size, offp, ntb_tool_peer_spad_read);
+}
+
+static int ntb_tool_peer_spad_write(struct ntb_dev *ntb, int sidx, u32 val)
+{
+	return ntb_peer_spad_write(ntb, PIDX, sidx, val);
 }
 
 static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf,
@@ -477,7 +486,7 @@ static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf,
 	struct tool_ctx *tc = filep->private_data;
 
 	return tool_spadfn_write(tc, ubuf, size, offp,
-				 tc->ntb->ops->peer_spad_write);
+				 ntb_tool_peer_spad_write);
 }
 
 static TOOL_FOPS_RDWR(tool_peer_spad_fops,
@@ -926,6 +935,12 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 		goto err_tc;
 	}
 
+	if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "no enough scratchpads\n");
+		rc = -EINVAL;
+		goto err_tc;
+	}
+
 	if (ntb_db_is_unsafe(ntb))
 		dev_dbg(&ntb->dev, "doorbell is unsafe\n");
 
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 2ea83f91a2366..4e3cd56af7321 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -288,13 +288,14 @@ struct ntb_dev_ops {
 	int (*spad_is_unsafe)(struct ntb_dev *ntb);
 	int (*spad_count)(struct ntb_dev *ntb);
 
-	u32 (*spad_read)(struct ntb_dev *ntb, int idx);
-	int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val);
+	u32 (*spad_read)(struct ntb_dev *ntb, int sidx);
+	int (*spad_write)(struct ntb_dev *ntb, int sidx, u32 val);
 
-	int (*peer_spad_addr)(struct ntb_dev *ntb, int idx,
+	int (*peer_spad_addr)(struct ntb_dev *ntb, int pidx, int sidx,
 			      phys_addr_t *spad_addr);
-	u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx);
-	int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val);
+	u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx);
+	int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx,
+			       u32 val);
 };
 
 static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
@@ -335,13 +336,12 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* ops->peer_db_read_mask		&& */
 		/* ops->peer_db_set_mask		&& */
 		/* ops->peer_db_clear_mask		&& */
-		/* ops->spad_is_unsafe			&& */
-		ops->spad_count				&&
-		ops->spad_read				&&
-		ops->spad_write				&&
-		/* ops->peer_spad_addr			&& */
-		/* ops->peer_spad_read			&& */
-		ops->peer_spad_write			&&
+		/* !ops->spad_is_unsafe == !ops->spad_count	&& */
+		!ops->spad_read == !ops->spad_count		&&
+		!ops->spad_write == !ops->spad_count		&&
+		/* !ops->peer_spad_addr == !ops->spad_count	&& */
+		/* !ops->peer_spad_read == !ops->spad_count	&& */
+		!ops->peer_spad_write == !ops->spad_count	&&
 		1;
 }
 
@@ -1176,47 +1176,58 @@ static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb)
  * @ntb:	NTB device context.
  *
  * Hardware and topology may support a different number of scratchpads.
+ * Although it must be the same for all ports per NTB device.
  *
  * Return: the number of scratchpads.
  */
 static inline int ntb_spad_count(struct ntb_dev *ntb)
 {
+	if (!ntb->ops->spad_count)
+		return 0;
+
 	return ntb->ops->spad_count(ntb);
 }
 
 /**
  * ntb_spad_read() - read the local scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @sidx:	Scratchpad index.
  *
  * Read the local scratchpad register, and return the value.
  *
  * Return: The value of the local scratchpad register.
  */
-static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx)
+static inline u32 ntb_spad_read(struct ntb_dev *ntb, int sidx)
 {
-	return ntb->ops->spad_read(ntb, idx);
+	if (!ntb->ops->spad_read)
+		return ~(u32)0;
+
+	return ntb->ops->spad_read(ntb, sidx);
 }
 
 /**
  * ntb_spad_write() - write the local scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @sidx:	Scratchpad index.
  * @val:	Scratchpad value.
  *
  * Write the value to the local scratchpad register.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+static inline int ntb_spad_write(struct ntb_dev *ntb, int sidx, u32 val)
 {
-	return ntb->ops->spad_write(ntb, idx, val);
+	if (!ntb->ops->spad_write)
+		return -EINVAL;
+
+	return ntb->ops->spad_write(ntb, sidx, val);
 }
 
 /**
  * ntb_peer_spad_addr() - address of the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  * @spad_addr:	OUT - The address of the peer scratchpad register.
  *
  * Return the address of the peer doorbell register.  This may be used, for
@@ -1224,45 +1235,51 @@ static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
 				     phys_addr_t *spad_addr)
 {
 	if (!ntb->ops->peer_spad_addr)
 		return -EINVAL;
 
-	return ntb->ops->peer_spad_addr(ntb, idx, spad_addr);
+	return ntb->ops->peer_spad_addr(ntb, pidx, sidx, spad_addr);
 }
 
 /**
  * ntb_peer_spad_read() - read the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  *
  * Read the peer scratchpad register, and return the value.
  *
  * Return: The value of the local scratchpad register.
  */
-static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	if (!ntb->ops->peer_spad_read)
-		return 0;
+		return ~(u32)0;
 
-	return ntb->ops->peer_spad_read(ntb, idx);
+	return ntb->ops->peer_spad_read(ntb, pidx, sidx);
 }
 
 /**
  * ntb_peer_spad_write() - write the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  * @val:	Scratchpad value.
  *
  * Write the value to the peer scratchpad register.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx,
+				      u32 val)
 {
-	return ntb->ops->peer_spad_write(ntb, idx, val);
+	if (!ntb->ops->peer_spad_write)
+		return -EINVAL;
+
+	return ntb->ops->peer_spad_write(ntb, pidx, sidx, val);
 }
 
 #endif
-- 
GitLab


From bc3e49adc279c5505d6df8dd8c7fca45d6d3d21a Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Tue, 20 Dec 2016 12:48:20 +0300
Subject: [PATCH 0779/1958] NTB: Add Messaging NTB API

Some IDT NTB-capable PCIe-switches have message registers to communicate with
peer devices. This patch adds new NTB API callback methods, which can be used
to utilize these registers functionality:
 ntb_msg_count(); - get number of message registers
 ntb_msg_inbits(); - get bitfield of inbound message registers status
 ntb_msg_outbits(); - get bitfield of outbound message registers status
 ntb_msg_read_sts(); - read the inbound and outbound message registers status
 ntb_msg_clear_sts(); - clear status bits of message registers
 ntb_msg_set_mask(); - mask interrupts raised by status bits of message
registers.
 ntb_msg_clear_mask(); - clear interrupts mask bits of message registers
 ntb_msg_read(midx, *pidx); - read message register with specified index,
additionally getting peer port index which data received from
 ntb_msg_write(midx, pidx); - write data to the specified message register
sending it to the passed peer device connected over a pidx port
 ntb_msg_event(); - notify driver context of a new message event

Of course there is hardware which doesn't support Message registers, so
this API is made optional.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/ntb.c   |  13 +++
 include/linux/ntb.h | 205 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 218 insertions(+)

diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c
index 2551bb2ff4a4c..03b80d89b9800 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/ntb.c
@@ -193,6 +193,19 @@ void ntb_db_event(struct ntb_dev *ntb, int vector)
 }
 EXPORT_SYMBOL(ntb_db_event);
 
+void ntb_msg_event(struct ntb_dev *ntb)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ntb->ctx_lock, irqflags);
+	{
+		if (ntb->ctx_ops && ntb->ctx_ops->msg_event)
+			ntb->ctx_ops->msg_event(ntb->ctx);
+	}
+	spin_unlock_irqrestore(&ntb->ctx_lock, irqflags);
+}
+EXPORT_SYMBOL(ntb_msg_event);
+
 int ntb_default_port_number(struct ntb_dev *ntb)
 {
 	switch (ntb->topo) {
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 4e3cd56af7321..d59688f916187 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -178,10 +178,12 @@ static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops)
  * struct ntb_ctx_ops - ntb driver context operations
  * @link_event:		See ntb_link_event().
  * @db_event:		See ntb_db_event().
+ * @msg_event:		See ntb_msg_event().
  */
 struct ntb_ctx_ops {
 	void (*link_event)(void *ctx);
 	void (*db_event)(void *ctx, int db_vector);
+	void (*msg_event)(void *ctx);
 };
 
 static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
@@ -190,6 +192,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
 	return
 		/* ops->link_event		&& */
 		/* ops->db_event		&& */
+		/* ops->msg_event		&& */
 		1;
 }
 
@@ -234,6 +237,15 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @peer_spad_addr:	See ntb_peer_spad_addr().
  * @peer_spad_read:	See ntb_peer_spad_read().
  * @peer_spad_write:	See ntb_peer_spad_write().
+ * @msg_count:		See ntb_msg_count().
+ * @msg_inbits:		See ntb_msg_inbits().
+ * @msg_outbits:	See ntb_msg_outbits().
+ * @msg_read_sts:	See ntb_msg_read_sts().
+ * @msg_clear_sts:	See ntb_msg_clear_sts().
+ * @msg_set_mask:	See ntb_msg_set_mask().
+ * @msg_clear_mask:	See ntb_msg_clear_mask().
+ * @msg_read:		See ntb_msg_read().
+ * @msg_write:		See ntb_msg_write().
  */
 struct ntb_dev_ops {
 	int (*port_number)(struct ntb_dev *ntb);
@@ -296,6 +308,16 @@ struct ntb_dev_ops {
 	u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx);
 	int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx,
 			       u32 val);
+
+	int (*msg_count)(struct ntb_dev *ntb);
+	u64 (*msg_inbits)(struct ntb_dev *ntb);
+	u64 (*msg_outbits)(struct ntb_dev *ntb);
+	u64 (*msg_read_sts)(struct ntb_dev *ntb);
+	int (*msg_clear_sts)(struct ntb_dev *ntb, u64 sts_bits);
+	int (*msg_set_mask)(struct ntb_dev *ntb, u64 mask_bits);
+	int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits);
+	int (*msg_read)(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg);
+	int (*msg_write)(struct ntb_dev *ntb, int midx, int pidx, u32 msg);
 };
 
 static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
@@ -342,6 +364,15 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* !ops->peer_spad_addr == !ops->spad_count	&& */
 		/* !ops->peer_spad_read == !ops->spad_count	&& */
 		!ops->peer_spad_write == !ops->spad_count	&&
+
+		!ops->msg_inbits == !ops->msg_count		&&
+		!ops->msg_outbits == !ops->msg_count		&&
+		!ops->msg_read_sts == !ops->msg_count		&&
+		!ops->msg_clear_sts == !ops->msg_count		&&
+		/* !ops->msg_set_mask == !ops->msg_count	&& */
+		/* !ops->msg_clear_mask == !ops->msg_count	&& */
+		!ops->msg_read == !ops->msg_count		&&
+		!ops->msg_write == !ops->msg_count		&&
 		1;
 }
 
@@ -484,6 +515,18 @@ void ntb_link_event(struct ntb_dev *ntb);
  */
 void ntb_db_event(struct ntb_dev *ntb, int vector);
 
+/**
+ * ntb_msg_event() - notify driver context of a message event
+ * @ntb:	NTB device context.
+ *
+ * Notify the driver context of a message event.  If hardware supports
+ * message registers, this event indicates, that a new message arrived in
+ * some incoming message register or last sent message couldn't be delivered.
+ * The events can be masked/unmasked by the methods ntb_msg_set_mask() and
+ * ntb_msg_clear_mask().
+ */
+void ntb_msg_event(struct ntb_dev *ntb);
+
 /**
  * ntb_default_port_number() - get the default local port number
  * @ntb:	NTB device context.
@@ -1282,4 +1325,166 @@ static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx,
 	return ntb->ops->peer_spad_write(ntb, pidx, sidx, val);
 }
 
+/**
+ * ntb_msg_count() - get the number of message registers
+ * @ntb:	NTB device context.
+ *
+ * Hardware may support a different number of message registers.
+ *
+ * Return: the number of message registers.
+ */
+static inline int ntb_msg_count(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_count)
+		return 0;
+
+	return ntb->ops->msg_count(ntb);
+}
+
+/**
+ * ntb_msg_inbits() - get a bitfield of inbound message registers status
+ * @ntb:	NTB device context.
+ *
+ * The method returns the bitfield of status and mask registers, which related
+ * to inbound message registers.
+ *
+ * Return: bitfield of inbound message registers.
+ */
+static inline u64 ntb_msg_inbits(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_inbits)
+		return 0;
+
+	return ntb->ops->msg_inbits(ntb);
+}
+
+/**
+ * ntb_msg_outbits() - get a bitfield of outbound message registers status
+ * @ntb:	NTB device context.
+ *
+ * The method returns the bitfield of status and mask registers, which related
+ * to outbound message registers.
+ *
+ * Return: bitfield of outbound message registers.
+ */
+static inline u64 ntb_msg_outbits(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_outbits)
+		return 0;
+
+	return ntb->ops->msg_outbits(ntb);
+}
+
+/**
+ * ntb_msg_read_sts() - read the message registers status
+ * @ntb:	NTB device context.
+ *
+ * Read the status of message register. Inbound and outbound message registers
+ * related bits can be filtered by masks retrieved from ntb_msg_inbits() and
+ * ntb_msg_outbits().
+ *
+ * Return: status bits of message registers
+ */
+static inline u64 ntb_msg_read_sts(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_read_sts)
+		return 0;
+
+	return ntb->ops->msg_read_sts(ntb);
+}
+
+/**
+ * ntb_msg_clear_sts() - clear status bits of message registers
+ * @ntb:	NTB device context.
+ * @sts_bits:	Status bits to clear.
+ *
+ * Clear bits in the status register.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits)
+{
+	if (!ntb->ops->msg_clear_sts)
+		return -EINVAL;
+
+	return ntb->ops->msg_clear_sts(ntb, sts_bits);
+}
+
+/**
+ * ntb_msg_set_mask() - set mask of message register status bits
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Mask the message registers status bits from raising the message event.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	if (!ntb->ops->msg_set_mask)
+		return -EINVAL;
+
+	return ntb->ops->msg_set_mask(ntb, mask_bits);
+}
+
+/**
+ * ntb_msg_clear_mask() - clear message registers mask
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits to clear.
+ *
+ * Clear bits in the message events mask register.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	if (!ntb->ops->msg_clear_mask)
+		return -EINVAL;
+
+	return ntb->ops->msg_clear_mask(ntb, mask_bits);
+}
+
+/**
+ * ntb_msg_read() - read message register with specified index
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	OUT - Port index of peer device a message retrieved from
+ * @msg:	OUT - Data
+ *
+ * Read data from the specified message register. Source port index of a
+ * message is retrieved as well.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx,
+			       u32 *msg)
+{
+	if (!ntb->ops->msg_read)
+		return -EINVAL;
+
+	return ntb->ops->msg_read(ntb, midx, pidx, msg);
+}
+
+/**
+ * ntb_msg_write() - write data to the specified message register
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	Port index of peer device a message being sent to
+ * @msg:	Data to send
+ *
+ * Send data to a specified peer device using the defined message register.
+ * Message event can be raised if the midx registers isn't empty while
+ * calling this method and the corresponding interrupt isn't masked.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx,
+				u32 msg)
+{
+	if (!ntb->ops->msg_write)
+		return -EINVAL;
+
+	return ntb->ops->msg_write(ntb, midx, pidx, msg);
+}
+
 #endif
-- 
GitLab


From cdcca896aee19e338adf3000512cade4befa5c69 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:19 +0300
Subject: [PATCH 0780/1958] NTB: Add new Memory Windows API documentation

Since the new API slightly changes the way a typical NTB client driver
works, the documentation file needs to be appropriately updated.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 Documentation/ntb.txt | 99 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 91 insertions(+), 8 deletions(-)

diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt
index 1d9bbabb6c79a..a5af4f0159f34 100644
--- a/Documentation/ntb.txt
+++ b/Documentation/ntb.txt
@@ -1,14 +1,16 @@
 # NTB Drivers
 
 NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects
-the separate memory systems of two computers to the same PCI-Express fabric.
-Existing NTB hardware supports a common feature set, including scratchpad
-registers, doorbell registers, and memory translation windows.  Scratchpad
-registers are read-and-writable registers that are accessible from either side
-of the device, so that peers can exchange a small amount of information at a
-fixed address.  Doorbell registers provide a way for peers to send interrupt
-events.  Memory windows allow translated read and write access to the peer
-memory.
+the separate memory systems of two or more computers to the same PCI-Express
+fabric. Existing NTB hardware supports a common feature set: doorbell
+registers and memory translation windows, as well as non common features like
+scratchpad and message registers. Scratchpad registers are read-and-writable
+registers that are accessible from either side of the device, so that peers can
+exchange a small amount of information at a fixed address. Message registers can
+be utilized for the same purpose. Additionally they are provided with with
+special status bits to make sure the information isn't rewritten by another
+peer. Doorbell registers provide a way for peers to send interrupt events.
+Memory windows allow translated read and write access to the peer memory.
 
 ## NTB Core Driver (ntb)
 
@@ -26,6 +28,87 @@ as ntb hardware, or hardware drivers, are inserted and removed.  The
 registration uses the Linux Device framework, so it should feel familiar to
 anyone who has written a pci driver.
 
+### NTB Typical client driver implementation
+
+Primary purpose of NTB is to share some peace of memory between at least two
+systems. So the NTB device features like Scratchpad/Message registers are
+mainly used to perform the proper memory window initialization. Typically
+there are two types of memory window interfaces supported by the NTB API:
+inbound translation configured on the local ntb port and outbound translation
+configured by the peer, on the peer ntb port. The first type is
+depicted on the next figure
+
+Inbound translation:
+ Memory:              Local NTB Port:      Peer NTB Port:      Peer MMIO:
+  ____________
+ | dma-mapped |-ntb_mw_set_trans(addr)  |
+ | memory     |        _v____________   |   ______________
+ | (addr)     |<======| MW xlat addr |<====| MW base addr |<== memory-mapped IO
+ |------------|       |--------------|  |  |--------------|
+
+So typical scenario of the first type memory window initialization looks:
+1) allocate a memory region, 2) put translated address to NTB config,
+3) somehow notify a peer device of performed initialization, 4) peer device
+maps corresponding outbound memory window so to have access to the shared
+memory region.
+
+The second type of interface, that implies the shared windows being
+initialized by a peer device, is depicted on the figure:
+
+Outbound translation:
+ Memory:        Local NTB Port:    Peer NTB Port:      Peer MMIO:
+  ____________                      ______________
+ | dma-mapped |                |   | MW base addr |<== memory-mapped IO
+ | memory     |                |   |--------------|
+ | (addr)     |<===================| MW xlat addr |<-ntb_peer_mw_set_trans(addr)
+ |------------|                |   |--------------|
+
+Typical scenario of the second type interface initialization would be:
+1) allocate a memory region, 2) somehow deliver a translated address to a peer
+device, 3) peer puts the translated address to NTB config, 4) peer device maps
+outbound memory window so to have access to the shared memory region.
+
+As one can see the described scenarios can be combined in one portable
+algorithm.
+ Local device:
+  1) Allocate memory for a shared window
+  2) Initialize memory window by translated address of the allocated region
+     (it may fail if local memory window initialization is unsupported)
+  3) Send the translated address and memory window index to a peer device
+ Peer device:
+  1) Initialize memory window with retrieved address of the allocated
+     by another device memory region (it may fail if peer memory window
+     initialization is unsupported)
+  2) Map outbound memory window
+
+In accordance with this scenario, the NTB Memory Window API can be used as
+follows:
+ Local device:
+  1) ntb_mw_count(pidx) - retrieve number of memory ranges, which can
+     be allocated for memory windows between local device and peer device
+     of port with specified index.
+  2) ntb_get_align(pidx, midx) - retrieve parameters restricting the
+     shared memory region alignment and size. Then memory can be properly
+     allocated.
+  3) Allocate physically contiguous memory region in compliance with
+     restrictions retrieved in 2).
+  4) ntb_mw_set_trans(pidx, midx) - try to set translation address of
+     the memory window with specified index for the defined peer device
+     (it may fail if local translated address setting is not supported)
+  5) Send translated base address (usually together with memory window
+     number) to the peer device using, for instance, scratchpad or message
+     registers.
+ Peer device:
+  1) ntb_peer_mw_set_trans(pidx, midx) - try to set received from other
+     device (related to pidx) translated address for specified memory
+     window. It may fail if retrieved address, for instance, exceeds
+     maximum possible address or isn't properly aligned.
+  2) ntb_peer_mw_get_addr(widx) - retrieve MMIO address to map the memory
+     window so to have an access to the shared memory.
+
+Also it is worth to note, that method ntb_mw_count(pidx) should return the
+same value as ntb_peer_mw_count() on the peer with port index - pidx.
+
 ### NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev)
 
 The primary client for NTB is the Transport client, used in tandem with NTB
-- 
GitLab


From 85dce3aaae98a8440f4a1a2404bcbab890574b46 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 14 Dec 2016 02:49:20 +0300
Subject: [PATCH 0781/1958] NTB: Add PCIe Gen4 link speed

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index d59688f916187..a6f5697278457 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -108,6 +108,7 @@ static inline char *ntb_topo_string(enum ntb_topo topo)
  * @NTB_SPEED_GEN1:	Link is trained to gen1 speed.
  * @NTB_SPEED_GEN2:	Link is trained to gen2 speed.
  * @NTB_SPEED_GEN3:	Link is trained to gen3 speed.
+ * @NTB_SPEED_GEN4:	Link is trained to gen4 speed.
  */
 enum ntb_speed {
 	NTB_SPEED_AUTO = -1,
@@ -115,6 +116,7 @@ enum ntb_speed {
 	NTB_SPEED_GEN1 = 1,
 	NTB_SPEED_GEN2 = 2,
 	NTB_SPEED_GEN3 = 3,
+	NTB_SPEED_GEN4 = 4
 };
 
 /**
-- 
GitLab


From 3c69f5d6731c43a5b6b9e78b385948e8d76460be Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Tue, 20 Dec 2016 12:50:09 +0300
Subject: [PATCH 0782/1958] NTB: Add ntb.h comments

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index a6f5697278457..609e232c00da8 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -326,12 +326,17 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 {
 	/* commented callbacks are not required: */
 	return
+		/* Port operations are required for multiport devices */
 		!ops->peer_port_count == !ops->port_number	&&
 		!ops->peer_port_number == !ops->port_number	&&
 		!ops->peer_port_idx == !ops->port_number	&&
+
+		/* Link operations are required */
 		ops->link_is_up				&&
 		ops->link_enable			&&
 		ops->link_disable			&&
+
+		/* One or both MW interfaces should be developed */
 		ops->mw_count				&&
 		ops->mw_get_align			&&
 		(ops->mw_set_trans			||
@@ -341,12 +346,11 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		ops->peer_mw_get_addr			&&
 		/* ops->peer_mw_clear_trans		&& */
 
+		/* Doorbell operations are mostly required */
 		/* ops->db_is_unsafe			&& */
 		ops->db_valid_mask			&&
-
 		/* both set, or both unset */
-		(!ops->db_vector_count == !ops->db_vector_mask) &&
-
+		(!ops->db_vector_count == !ops->db_vector_mask)	&&
 		ops->db_read				&&
 		/* ops->db_set				&& */
 		ops->db_clear				&&
@@ -360,6 +364,8 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* ops->peer_db_read_mask		&& */
 		/* ops->peer_db_set_mask		&& */
 		/* ops->peer_db_clear_mask		&& */
+
+		/* Scrachpads interface is optional */
 		/* !ops->spad_is_unsafe == !ops->spad_count	&& */
 		!ops->spad_read == !ops->spad_count		&&
 		!ops->spad_write == !ops->spad_count		&&
@@ -367,6 +373,7 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* !ops->peer_spad_read == !ops->spad_count	&& */
 		!ops->peer_spad_write == !ops->spad_count	&&
 
+		/* Messaging interface is optional */
 		!ops->msg_inbits == !ops->msg_count		&&
 		!ops->msg_outbits == !ops->msg_count		&&
 		!ops->msg_read_sts == !ops->msg_count		&&
@@ -387,13 +394,12 @@ struct ntb_client {
 	struct device_driver		drv;
 	const struct ntb_client_ops	ops;
 };
-
 #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv)
 
 /**
  * struct ntb_device - ntb device
  * @dev:		Linux device object.
- * @pdev:		Pci device entry of the ntb.
+ * @pdev:		PCI device entry of the ntb.
  * @topo:		Detected topology of the ntb.
  * @ops:		See &ntb_dev_ops.
  * @ctx:		See &ntb_ctx_ops.
@@ -414,7 +420,6 @@ struct ntb_dev {
 	/* block unregister until device is fully released */
 	struct completion		released;
 };
-
 #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev)
 
 /**
@@ -511,7 +516,7 @@ void ntb_link_event(struct ntb_dev *ntb);
  * multiple interrupt vectors for doorbells, the vector number indicates which
  * vector received the interrupt.  The vector number is relative to the first
  * vector used for doorbells, starting at zero, and must be less than
- ** ntb_db_vector_count().  The driver may call ntb_db_read() to check which
+ * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
  * doorbell bits need service, and ntb_db_vector_mask() to determine which of
  * those bits are associated with the vector number.
  */
-- 
GitLab


From 0f9bfb979a5fae2936afa128c04f29ab5e07a9ad Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Tue, 10 Jan 2017 17:33:36 -0700
Subject: [PATCH 0783/1958] ntb_hw_amd: Style fixes: open code macros that just
 obfuscate code

As per a comments in [1] by Greg Kroah-Hartman, the ndev_* macros should
be cleaned up. This makes it more clear what's actually going on when
reading the code.

[1] http://www.spinics.net/lists/linux-pci/msg56904.html

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/amd/ntb_hw_amd.c | 55 +++++++++++++++++----------------
 drivers/ntb/hw/amd/ntb_hw_amd.h |  3 --
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index 891beb9f26c2b..f0788aae05c9c 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -133,7 +133,7 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 	if (bar < 0)
 		return bar;
 
-	mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+	mw_size = pci_resource_len(ntb->pdev, bar);
 
 	/* make sure the range fits in the usable mw size */
 	if (size > mw_size)
@@ -142,7 +142,7 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 	mmio = ndev->self_mmio;
 	peer_mmio = ndev->peer_mmio;
 
-	base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+	base_addr = pci_resource_start(ntb->pdev, bar);
 
 	if (bar != 1) {
 		xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 2);
@@ -232,7 +232,7 @@ static u64 amd_ntb_link_is_up(struct ntb_dev *ntb,
 		if (width)
 			*width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
 
-		dev_dbg(ndev_dev(ndev), "link is up.\n");
+		dev_dbg(&ntb->pdev->dev, "link is up.\n");
 
 		ret = 1;
 	} else {
@@ -241,7 +241,7 @@ static u64 amd_ntb_link_is_up(struct ntb_dev *ntb,
 		if (width)
 			*width = NTB_WIDTH_NONE;
 
-		dev_dbg(ndev_dev(ndev), "link is down.\n");
+		dev_dbg(&ntb->pdev->dev, "link is down.\n");
 	}
 
 	return ret;
@@ -261,7 +261,7 @@ static int amd_ntb_link_enable(struct ntb_dev *ntb,
 
 	if (ndev->ntb.topo == NTB_TOPO_SEC)
 		return -EINVAL;
-	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+	dev_dbg(&ntb->pdev->dev, "Enabling Link.\n");
 
 	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
 	ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
@@ -282,7 +282,7 @@ static int amd_ntb_link_disable(struct ntb_dev *ntb)
 
 	if (ndev->ntb.topo == NTB_TOPO_SEC)
 		return -EINVAL;
-	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+	dev_dbg(&ntb->pdev->dev, "Enabling Link.\n");
 
 	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
 	ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
@@ -500,18 +500,19 @@ static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
 static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
 {
 	void __iomem *mmio = ndev->self_mmio;
+	struct device *dev = &ndev->ntb.pdev->dev;
 	u32 status;
 
 	status = readl(mmio + AMD_INTSTAT_OFFSET);
 	if (!(status & AMD_EVENT_INTMASK))
 		return;
 
-	dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+	dev_dbg(dev, "status = 0x%x and vec = %d\n", status, vec);
 
 	status &= AMD_EVENT_INTMASK;
 	switch (status) {
 	case AMD_PEER_FLUSH_EVENT:
-		dev_info(ndev_dev(ndev), "Flush is done.\n");
+		dev_info(dev, "Flush is done.\n");
 		break;
 	case AMD_PEER_RESET_EVENT:
 		amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
@@ -537,7 +538,7 @@ static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
 		status = readl(mmio + AMD_PMESTAT_OFFSET);
 		/* check if this is WAKEUP event */
 		if (status & 0x1)
-			dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+			dev_info(dev, "Wakeup is done.\n");
 
 		amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
 
@@ -546,14 +547,14 @@ static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
 				      AMD_LINK_HB_TIMEOUT);
 		break;
 	default:
-		dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+		dev_info(dev, "event status = 0x%x.\n", status);
 		break;
 	}
 }
 
 static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
 {
-	dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+	dev_dbg(&ndev->ntb.pdev->dev, "vec %d\n", vec);
 
 	if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
 		amd_handle_event(ndev, vec);
@@ -575,7 +576,7 @@ static irqreturn_t ndev_irq_isr(int irq, void *dev)
 {
 	struct amd_ntb_dev *ndev = dev;
 
-	return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+	return ndev_interrupt(ndev, irq - ndev->ntb.pdev->irq);
 }
 
 static int ndev_init_isr(struct amd_ntb_dev *ndev,
@@ -584,7 +585,7 @@ static int ndev_init_isr(struct amd_ntb_dev *ndev,
 	struct pci_dev *pdev;
 	int rc, i, msix_count, node;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	node = dev_to_node(&pdev->dev);
 
@@ -626,7 +627,7 @@ static int ndev_init_isr(struct amd_ntb_dev *ndev,
 			goto err_msix_request;
 	}
 
-	dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+	dev_dbg(&pdev->dev, "Using msix interrupts\n");
 	ndev->db_count = msix_min;
 	ndev->msix_vec_count = msix_max;
 	return 0;
@@ -653,7 +654,7 @@ static int ndev_init_isr(struct amd_ntb_dev *ndev,
 	if (rc)
 		goto err_msi_request;
 
-	dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+	dev_dbg(&pdev->dev, "Using msi interrupts\n");
 	ndev->db_count = 1;
 	ndev->msix_vec_count = 1;
 	return 0;
@@ -670,7 +671,7 @@ static int ndev_init_isr(struct amd_ntb_dev *ndev,
 	if (rc)
 		goto err_intx_request;
 
-	dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+	dev_dbg(&pdev->dev, "Using intx interrupts\n");
 	ndev->db_count = 1;
 	ndev->msix_vec_count = 1;
 	return 0;
@@ -685,7 +686,7 @@ static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
 	void __iomem *mmio = ndev->self_mmio;
 	int i;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	/* Mask all doorbell interrupts */
 	ndev->db_mask = ndev->db_valid_mask;
@@ -811,7 +812,8 @@ static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
 		ndev->debugfs_info = NULL;
 	} else {
 		ndev->debugfs_dir =
-			debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+			debugfs_create_dir(pci_name(ndev->ntb.pdev),
+					   debugfs_dir);
 		if (!ndev->debugfs_dir)
 			ndev->debugfs_info = NULL;
 		else
@@ -846,7 +848,7 @@ static int amd_poll_link(struct amd_ntb_dev *ndev)
 	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
 	reg &= NTB_LIN_STA_ACTIVE_BIT;
 
-	dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+	dev_dbg(&ndev->ntb.pdev->dev, "%s: reg_val = 0x%x.\n", __func__, reg);
 
 	if (reg == ndev->cntl_sta)
 		return 0;
@@ -928,7 +930,8 @@ static int amd_init_ntb(struct amd_ntb_dev *ndev)
 
 		break;
 	default:
-		dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+		dev_err(&ndev->ntb.pdev->dev,
+			"AMD NTB does not support B2B mode.\n");
 		return -EINVAL;
 	}
 
@@ -957,10 +960,10 @@ static int amd_init_dev(struct amd_ntb_dev *ndev)
 	struct pci_dev *pdev;
 	int rc = 0;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	ndev->ntb.topo = amd_get_topo(ndev);
-	dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+	dev_dbg(&pdev->dev, "AMD NTB topo is %s\n",
 		ntb_topo_string(ndev->ntb.topo));
 
 	rc = amd_init_ntb(ndev);
@@ -969,7 +972,7 @@ static int amd_init_dev(struct amd_ntb_dev *ndev)
 
 	rc = amd_init_isr(ndev);
 	if (rc) {
-		dev_err(ndev_dev(ndev), "fail to init isr.\n");
+		dev_err(&pdev->dev, "fail to init isr.\n");
 		return rc;
 	}
 
@@ -1007,7 +1010,7 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
 		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
-		dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
 	}
 
 	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1015,7 +1018,7 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
 		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
-		dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
 	}
 
 	ndev->self_mmio = pci_iomap(pdev, 0, 0);
@@ -1038,7 +1041,7 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
 
 static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
 {
-	struct pci_dev *pdev = ndev_pdev(ndev);
+	struct pci_dev *pdev = ndev->ntb.pdev;
 
 	pci_iounmap(pdev, ndev->self_mmio);
 
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
index 13d73ed94a52d..8f3617a46292d 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.h
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
@@ -211,9 +211,6 @@ struct amd_ntb_dev {
 	struct dentry *debugfs_info;
 };
 
-#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
-#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
-#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
 #define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
 #define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
 
-- 
GitLab


From 48ea02184a9d000f877f901951d37843b26d26ad Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Tue, 10 Jan 2017 17:33:37 -0700
Subject: [PATCH 0784/1958] ntb_hw_intel: Style fixes: open code macros that
 just obfuscate code

As per a comments in [1] by Greg Kroah-Hartman, the ndev_* macros should
be cleaned up. This makes it more clear what's actually going on when
reading the code.

[1] http://www.spinics.net/lists/linux-pci/msg56904.html

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/intel/ntb_hw_intel.c | 192 ++++++++++++++--------------
 drivers/ntb/hw/intel/ntb_hw_intel.h |   3 -
 2 files changed, 95 insertions(+), 100 deletions(-)

diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index e00aa39a45444..2557e2c05b90c 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -272,12 +272,12 @@ static inline int ndev_db_addr(struct intel_ntb_dev *ndev,
 
 	if (db_addr) {
 		*db_addr = reg_addr + reg;
-		dev_dbg(ndev_dev(ndev), "Peer db addr %llx\n", *db_addr);
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer db addr %llx\n", *db_addr);
 	}
 
 	if (db_size) {
 		*db_size = ndev->reg->db_size;
-		dev_dbg(ndev_dev(ndev), "Peer db size %llx\n", *db_size);
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer db size %llx\n", *db_size);
 	}
 
 	return 0;
@@ -370,7 +370,8 @@ static inline int ndev_spad_addr(struct intel_ntb_dev *ndev, int idx,
 
 	if (spad_addr) {
 		*spad_addr = reg_addr + reg + (idx << 2);
-		dev_dbg(ndev_dev(ndev), "Peer spad addr %llx\n", *spad_addr);
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer spad addr %llx\n",
+			*spad_addr);
 	}
 
 	return 0;
@@ -411,7 +412,7 @@ static irqreturn_t ndev_interrupt(struct intel_ntb_dev *ndev, int vec)
 	if ((ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD) && (vec == 31))
 		vec_mask |= ndev->db_link_mask;
 
-	dev_dbg(ndev_dev(ndev), "vec %d vec_mask %llx\n", vec, vec_mask);
+	dev_dbg(&ndev->ntb.pdev->dev, "vec %d vec_mask %llx\n", vec, vec_mask);
 
 	ndev->last_ts = jiffies;
 
@@ -430,7 +431,7 @@ static irqreturn_t ndev_vec_isr(int irq, void *dev)
 {
 	struct intel_ntb_vec *nvec = dev;
 
-	dev_dbg(ndev_dev(nvec->ndev), "irq: %d  nvec->num: %d\n",
+	dev_dbg(&nvec->ndev->ntb.pdev->dev, "irq: %d  nvec->num: %d\n",
 		irq, nvec->num);
 
 	return ndev_interrupt(nvec->ndev, nvec->num);
@@ -440,7 +441,7 @@ static irqreturn_t ndev_irq_isr(int irq, void *dev)
 {
 	struct intel_ntb_dev *ndev = dev;
 
-	return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+	return ndev_interrupt(ndev, irq - ndev->ntb.pdev->irq);
 }
 
 static int ndev_init_isr(struct intel_ntb_dev *ndev,
@@ -450,7 +451,7 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev,
 	struct pci_dev *pdev;
 	int rc, i, msix_count, node;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	node = dev_to_node(&pdev->dev);
 
@@ -489,7 +490,7 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev,
 			goto err_msix_request;
 	}
 
-	dev_dbg(ndev_dev(ndev), "Using %d msix interrupts\n", msix_count);
+	dev_dbg(&pdev->dev, "Using %d msix interrupts\n", msix_count);
 	ndev->db_vec_count = msix_count;
 	ndev->db_vec_shift = msix_shift;
 	return 0;
@@ -517,7 +518,7 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev,
 	if (rc)
 		goto err_msi_request;
 
-	dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+	dev_dbg(&pdev->dev, "Using msi interrupts\n");
 	ndev->db_vec_count = 1;
 	ndev->db_vec_shift = total_shift;
 	return 0;
@@ -535,7 +536,7 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev,
 	if (rc)
 		goto err_intx_request;
 
-	dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+	dev_dbg(&pdev->dev, "Using intx interrupts\n");
 	ndev->db_vec_count = 1;
 	ndev->db_vec_shift = total_shift;
 	return 0;
@@ -549,7 +550,7 @@ static void ndev_deinit_isr(struct intel_ntb_dev *ndev)
 	struct pci_dev *pdev;
 	int i;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	/* Mask all doorbell interrupts */
 	ndev->db_mask = ndev->db_valid_mask;
@@ -746,7 +747,7 @@ static ssize_t ndev_ntb_debugfs_read(struct file *filp, char __user *ubuf,
 	union { u64 v64; u32 v32; u16 v16; u8 v8; } u;
 
 	ndev = filp->private_data;
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 	mmio = ndev->self_mmio;
 
 	buf_size = min(count, 0x800ul);
@@ -1021,7 +1022,8 @@ static void ndev_init_debugfs(struct intel_ntb_dev *ndev)
 		ndev->debugfs_info = NULL;
 	} else {
 		ndev->debugfs_dir =
-			debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+			debugfs_create_dir(pci_name(ndev->ntb.pdev),
+					   debugfs_dir);
 		if (!ndev->debugfs_dir)
 			ndev->debugfs_info = NULL;
 		else
@@ -1219,13 +1221,13 @@ static int intel_ntb_link_enable(struct ntb_dev *ntb,
 	if (ndev->ntb.topo == NTB_TOPO_SEC)
 		return -EINVAL;
 
-	dev_dbg(ndev_dev(ndev),
+	dev_dbg(&ntb->pdev->dev,
 		"Enabling link with max_speed %d max_width %d\n",
 		max_speed, max_width);
 	if (max_speed != NTB_SPEED_AUTO)
-		dev_dbg(ndev_dev(ndev), "ignoring max_speed %d\n", max_speed);
+		dev_dbg(&ntb->pdev->dev, "ignoring max_speed %d\n", max_speed);
 	if (max_width != NTB_WIDTH_AUTO)
-		dev_dbg(ndev_dev(ndev), "ignoring max_width %d\n", max_width);
+		dev_dbg(&ntb->pdev->dev, "ignoring max_width %d\n", max_width);
 
 	ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
 	ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK);
@@ -1248,7 +1250,7 @@ static int intel_ntb_link_disable(struct ntb_dev *ntb)
 	if (ndev->ntb.topo == NTB_TOPO_SEC)
 		return -EINVAL;
 
-	dev_dbg(ndev_dev(ndev), "Disabling link\n");
+	dev_dbg(&ntb->pdev->dev, "Disabling link\n");
 
 	/* Bring NTB link down */
 	ntb_cntl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
@@ -1485,30 +1487,33 @@ static int atom_link_is_err(struct intel_ntb_dev *ndev)
 
 static inline enum ntb_topo atom_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd)
 {
+	struct device *dev = &ndev->ntb.pdev->dev;
+
 	switch (ppd & ATOM_PPD_TOPO_MASK) {
 	case ATOM_PPD_TOPO_B2B_USD:
-		dev_dbg(ndev_dev(ndev), "PPD %d B2B USD\n", ppd);
+		dev_dbg(dev, "PPD %d B2B USD\n", ppd);
 		return NTB_TOPO_B2B_USD;
 
 	case ATOM_PPD_TOPO_B2B_DSD:
-		dev_dbg(ndev_dev(ndev), "PPD %d B2B DSD\n", ppd);
+		dev_dbg(dev, "PPD %d B2B DSD\n", ppd);
 		return NTB_TOPO_B2B_DSD;
 
 	case ATOM_PPD_TOPO_PRI_USD:
 	case ATOM_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */
 	case ATOM_PPD_TOPO_SEC_USD:
 	case ATOM_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */
-		dev_dbg(ndev_dev(ndev), "PPD %d non B2B disabled\n", ppd);
+		dev_dbg(dev, "PPD %d non B2B disabled\n", ppd);
 		return NTB_TOPO_NONE;
 	}
 
-	dev_dbg(ndev_dev(ndev), "PPD %d invalid\n", ppd);
+	dev_dbg(dev, "PPD %d invalid\n", ppd);
 	return NTB_TOPO_NONE;
 }
 
 static void atom_link_hb(struct work_struct *work)
 {
 	struct intel_ntb_dev *ndev = hb_ndev(work);
+	struct device *dev = &ndev->ntb.pdev->dev;
 	unsigned long poll_ts;
 	void __iomem *mmio;
 	u32 status32;
@@ -1546,30 +1551,30 @@ static void atom_link_hb(struct work_struct *work)
 
 	/* Clear AER Errors, write to clear */
 	status32 = ioread32(mmio + ATOM_ERRCORSTS_OFFSET);
-	dev_dbg(ndev_dev(ndev), "ERRCORSTS = %x\n", status32);
+	dev_dbg(dev, "ERRCORSTS = %x\n", status32);
 	status32 &= PCI_ERR_COR_REP_ROLL;
 	iowrite32(status32, mmio + ATOM_ERRCORSTS_OFFSET);
 
 	/* Clear unexpected electrical idle event in LTSSM, write to clear */
 	status32 = ioread32(mmio + ATOM_LTSSMERRSTS0_OFFSET);
-	dev_dbg(ndev_dev(ndev), "LTSSMERRSTS0 = %x\n", status32);
+	dev_dbg(dev, "LTSSMERRSTS0 = %x\n", status32);
 	status32 |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
 	iowrite32(status32, mmio + ATOM_LTSSMERRSTS0_OFFSET);
 
 	/* Clear DeSkew Buffer error, write to clear */
 	status32 = ioread32(mmio + ATOM_DESKEWSTS_OFFSET);
-	dev_dbg(ndev_dev(ndev), "DESKEWSTS = %x\n", status32);
+	dev_dbg(dev, "DESKEWSTS = %x\n", status32);
 	status32 |= ATOM_DESKEWSTS_DBERR;
 	iowrite32(status32, mmio + ATOM_DESKEWSTS_OFFSET);
 
 	status32 = ioread32(mmio + ATOM_IBSTERRRCRVSTS0_OFFSET);
-	dev_dbg(ndev_dev(ndev), "IBSTERRRCRVSTS0 = %x\n", status32);
+	dev_dbg(dev, "IBSTERRRCRVSTS0 = %x\n", status32);
 	status32 &= ATOM_IBIST_ERR_OFLOW;
 	iowrite32(status32, mmio + ATOM_IBSTERRRCRVSTS0_OFFSET);
 
 	/* Releases the NTB state machine to allow the link to retrain */
 	status32 = ioread32(mmio + ATOM_LTSSMSTATEJMP_OFFSET);
-	dev_dbg(ndev_dev(ndev), "LTSSMSTATEJMP = %x\n", status32);
+	dev_dbg(dev, "LTSSMSTATEJMP = %x\n", status32);
 	status32 &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
 	iowrite32(status32, mmio + ATOM_LTSSMSTATEJMP_OFFSET);
 
@@ -1742,11 +1747,11 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	int b2b_bar;
 	u8 bar_sz;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 	mmio = ndev->self_mmio;
 
 	if (ndev->b2b_idx == UINT_MAX) {
-		dev_dbg(ndev_dev(ndev), "not using b2b mw\n");
+		dev_dbg(&pdev->dev, "not using b2b mw\n");
 		b2b_bar = 0;
 		ndev->b2b_off = 0;
 	} else {
@@ -1754,24 +1759,21 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		if (b2b_bar < 0)
 			return -EIO;
 
-		dev_dbg(ndev_dev(ndev), "using b2b mw bar %d\n", b2b_bar);
+		dev_dbg(&pdev->dev, "using b2b mw bar %d\n", b2b_bar);
 
 		bar_size = pci_resource_len(ndev->ntb.pdev, b2b_bar);
 
-		dev_dbg(ndev_dev(ndev), "b2b bar size %#llx\n", bar_size);
+		dev_dbg(&pdev->dev, "b2b bar size %#llx\n", bar_size);
 
 		if (b2b_mw_share && ((bar_size >> 1) >= XEON_B2B_MIN_SIZE)) {
-			dev_dbg(ndev_dev(ndev),
-				"b2b using first half of bar\n");
+			dev_dbg(&pdev->dev, "b2b using first half of bar\n");
 			ndev->b2b_off = bar_size >> 1;
 		} else if (bar_size >= XEON_B2B_MIN_SIZE) {
-			dev_dbg(ndev_dev(ndev),
-				"b2b using whole bar\n");
+			dev_dbg(&pdev->dev, "b2b using whole bar\n");
 			ndev->b2b_off = 0;
 			--ndev->mw_count;
 		} else {
-			dev_dbg(ndev_dev(ndev),
-				"b2b bar size is too small\n");
+			dev_dbg(&pdev->dev, "b2b bar size is too small\n");
 			return -EIO;
 		}
 	}
@@ -1781,7 +1783,7 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	 * except disable or halve the size of the b2b secondary bar.
 	 */
 	pci_read_config_byte(pdev, SKX_IMBAR1SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "IMBAR1SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "IMBAR1SZ %#x\n", bar_sz);
 	if (b2b_bar == 1) {
 		if (ndev->b2b_off)
 			bar_sz -= 1;
@@ -1791,10 +1793,10 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 
 	pci_write_config_byte(pdev, SKX_EMBAR1SZ_OFFSET, bar_sz);
 	pci_read_config_byte(pdev, SKX_EMBAR1SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "EMBAR1SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "EMBAR1SZ %#x\n", bar_sz);
 
 	pci_read_config_byte(pdev, SKX_IMBAR2SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "IMBAR2SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "IMBAR2SZ %#x\n", bar_sz);
 	if (b2b_bar == 2) {
 		if (ndev->b2b_off)
 			bar_sz -= 1;
@@ -1804,7 +1806,7 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 
 	pci_write_config_byte(pdev, SKX_EMBAR2SZ_OFFSET, bar_sz);
 	pci_read_config_byte(pdev, SKX_EMBAR2SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "EMBAR2SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "EMBAR2SZ %#x\n", bar_sz);
 
 	/* SBAR01 hit by first part of the b2b bar */
 	if (b2b_bar == 0)
@@ -1820,12 +1822,12 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	bar_addr = addr->bar2_addr64 + (b2b_bar == 1 ? ndev->b2b_off : 0);
 	iowrite64(bar_addr, mmio + SKX_IMBAR1XLMT_OFFSET);
 	bar_addr = ioread64(mmio + SKX_IMBAR1XLMT_OFFSET);
-	dev_dbg(ndev_dev(ndev), "IMBAR1XLMT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "IMBAR1XLMT %#018llx\n", bar_addr);
 
 	bar_addr = addr->bar4_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
 	iowrite64(bar_addr, mmio + SKX_IMBAR2XLMT_OFFSET);
 	bar_addr = ioread64(mmio + SKX_IMBAR2XLMT_OFFSET);
-	dev_dbg(ndev_dev(ndev), "IMBAR2XLMT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "IMBAR2XLMT %#018llx\n", bar_addr);
 
 	/* zero incoming translation addrs */
 	iowrite64(0, mmio + SKX_IMBAR1XBASE_OFFSET);
@@ -1895,7 +1897,7 @@ static int skx_init_dev(struct intel_ntb_dev *ndev)
 	u8 ppd;
 	int rc;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	ndev->reg = &skx_reg;
 
@@ -1904,7 +1906,7 @@ static int skx_init_dev(struct intel_ntb_dev *ndev)
 		return -EIO;
 
 	ndev->ntb.topo = xeon_ppd_topo(ndev, ppd);
-	dev_dbg(ndev_dev(ndev), "ppd %#x topo %s\n", ppd,
+	dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd,
 		ntb_topo_string(ndev->ntb.topo));
 	if (ndev->ntb.topo == NTB_TOPO_NONE)
 		return -EINVAL;
@@ -1928,14 +1930,14 @@ static int intel_ntb3_link_enable(struct ntb_dev *ntb,
 
 	ndev = container_of(ntb, struct intel_ntb_dev, ntb);
 
-	dev_dbg(ndev_dev(ndev),
+	dev_dbg(&ntb->pdev->dev,
 		"Enabling link with max_speed %d max_width %d\n",
 		max_speed, max_width);
 
 	if (max_speed != NTB_SPEED_AUTO)
-		dev_dbg(ndev_dev(ndev), "ignoring max_speed %d\n", max_speed);
+		dev_dbg(&ntb->pdev->dev, "ignoring max_speed %d\n", max_speed);
 	if (max_width != NTB_WIDTH_AUTO)
-		dev_dbg(ndev_dev(ndev), "ignoring max_width %d\n", max_width);
+		dev_dbg(&ntb->pdev->dev, "ignoring max_width %d\n", max_width);
 
 	ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
 	ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK);
@@ -1999,7 +2001,7 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 		return -EIO;
 	}
 
-	dev_dbg(ndev_dev(ndev), "BAR %d IMBARXBASE: %#Lx\n", bar, reg_val);
+	dev_dbg(&ntb->pdev->dev, "BAR %d IMBARXBASE: %#Lx\n", bar, reg_val);
 
 	/* set and verify setting the limit */
 	iowrite64(limit, mmio + limit_reg);
@@ -2010,7 +2012,7 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 		return -EIO;
 	}
 
-	dev_dbg(ndev_dev(ndev), "BAR %d IMBARXLMT: %#Lx\n", bar, reg_val);
+	dev_dbg(&ntb->pdev->dev, "BAR %d IMBARXLMT: %#Lx\n", bar, reg_val);
 
 	/* setup the EP */
 	limit_reg = ndev->xlat_reg->bar2_limit + (idx * 0x10) + 0x4000;
@@ -2031,7 +2033,7 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 		return -EIO;
 	}
 
-	dev_dbg(ndev_dev(ndev), "BAR %d EMBARXLMT: %#Lx\n", bar, reg_val);
+	dev_dbg(&ntb->pdev->dev, "BAR %d EMBARXLMT: %#Lx\n", bar, reg_val);
 
 	return 0;
 }
@@ -2138,7 +2140,7 @@ static inline enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd)
 static inline int xeon_ppd_bar4_split(struct intel_ntb_dev *ndev, u8 ppd)
 {
 	if (ppd & XEON_PPD_SPLIT_BAR_MASK) {
-		dev_dbg(ndev_dev(ndev), "PPD %d split bar\n", ppd);
+		dev_dbg(&ndev->ntb.pdev->dev, "PPD %d split bar\n", ppd);
 		return 1;
 	}
 	return 0;
@@ -2168,11 +2170,11 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	int b2b_bar;
 	u8 bar_sz;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 	mmio = ndev->self_mmio;
 
 	if (ndev->b2b_idx == UINT_MAX) {
-		dev_dbg(ndev_dev(ndev), "not using b2b mw\n");
+		dev_dbg(&pdev->dev, "not using b2b mw\n");
 		b2b_bar = 0;
 		ndev->b2b_off = 0;
 	} else {
@@ -2180,24 +2182,21 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		if (b2b_bar < 0)
 			return -EIO;
 
-		dev_dbg(ndev_dev(ndev), "using b2b mw bar %d\n", b2b_bar);
+		dev_dbg(&pdev->dev, "using b2b mw bar %d\n", b2b_bar);
 
 		bar_size = pci_resource_len(ndev->ntb.pdev, b2b_bar);
 
-		dev_dbg(ndev_dev(ndev), "b2b bar size %#llx\n", bar_size);
+		dev_dbg(&pdev->dev, "b2b bar size %#llx\n", bar_size);
 
 		if (b2b_mw_share && XEON_B2B_MIN_SIZE <= bar_size >> 1) {
-			dev_dbg(ndev_dev(ndev),
-				"b2b using first half of bar\n");
+			dev_dbg(&pdev->dev, "b2b using first half of bar\n");
 			ndev->b2b_off = bar_size >> 1;
 		} else if (XEON_B2B_MIN_SIZE <= bar_size) {
-			dev_dbg(ndev_dev(ndev),
-				"b2b using whole bar\n");
+			dev_dbg(&pdev->dev, "b2b using whole bar\n");
 			ndev->b2b_off = 0;
 			--ndev->mw_count;
 		} else {
-			dev_dbg(ndev_dev(ndev),
-				"b2b bar size is too small\n");
+			dev_dbg(&pdev->dev, "b2b bar size is too small\n");
 			return -EIO;
 		}
 	}
@@ -2209,7 +2208,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	 * offsets are not in a consistent order (bar5sz comes after ppd, odd).
 	 */
 	pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "PBAR23SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "PBAR23SZ %#x\n", bar_sz);
 	if (b2b_bar == 2) {
 		if (ndev->b2b_off)
 			bar_sz -= 1;
@@ -2218,11 +2217,11 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	}
 	pci_write_config_byte(pdev, XEON_SBAR23SZ_OFFSET, bar_sz);
 	pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "SBAR23SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "SBAR23SZ %#x\n", bar_sz);
 
 	if (!ndev->bar4_split) {
 		pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "PBAR45SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "PBAR45SZ %#x\n", bar_sz);
 		if (b2b_bar == 4) {
 			if (ndev->b2b_off)
 				bar_sz -= 1;
@@ -2231,10 +2230,10 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		}
 		pci_write_config_byte(pdev, XEON_SBAR45SZ_OFFSET, bar_sz);
 		pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "SBAR45SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "SBAR45SZ %#x\n", bar_sz);
 	} else {
 		pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "PBAR4SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "PBAR4SZ %#x\n", bar_sz);
 		if (b2b_bar == 4) {
 			if (ndev->b2b_off)
 				bar_sz -= 1;
@@ -2243,10 +2242,10 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		}
 		pci_write_config_byte(pdev, XEON_SBAR4SZ_OFFSET, bar_sz);
 		pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "SBAR4SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "SBAR4SZ %#x\n", bar_sz);
 
 		pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "PBAR5SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "PBAR5SZ %#x\n", bar_sz);
 		if (b2b_bar == 5) {
 			if (ndev->b2b_off)
 				bar_sz -= 1;
@@ -2255,7 +2254,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		}
 		pci_write_config_byte(pdev, XEON_SBAR5SZ_OFFSET, bar_sz);
 		pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "SBAR5SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "SBAR5SZ %#x\n", bar_sz);
 	}
 
 	/* SBAR01 hit by first part of the b2b bar */
@@ -2272,7 +2271,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	else
 		return -EIO;
 
-	dev_dbg(ndev_dev(ndev), "SBAR01 %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "SBAR01 %#018llx\n", bar_addr);
 	iowrite64(bar_addr, mmio + XEON_SBAR0BASE_OFFSET);
 
 	/* Other SBAR are normally hit by the PBAR xlat, except for b2b bar.
@@ -2283,26 +2282,26 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
 	iowrite64(bar_addr, mmio + XEON_SBAR23BASE_OFFSET);
 	bar_addr = ioread64(mmio + XEON_SBAR23BASE_OFFSET);
-	dev_dbg(ndev_dev(ndev), "SBAR23 %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "SBAR23 %#018llx\n", bar_addr);
 
 	if (!ndev->bar4_split) {
 		bar_addr = addr->bar4_addr64 +
 			(b2b_bar == 4 ? ndev->b2b_off : 0);
 		iowrite64(bar_addr, mmio + XEON_SBAR45BASE_OFFSET);
 		bar_addr = ioread64(mmio + XEON_SBAR45BASE_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR45 %#018llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR45 %#018llx\n", bar_addr);
 	} else {
 		bar_addr = addr->bar4_addr32 +
 			(b2b_bar == 4 ? ndev->b2b_off : 0);
 		iowrite32(bar_addr, mmio + XEON_SBAR4BASE_OFFSET);
 		bar_addr = ioread32(mmio + XEON_SBAR4BASE_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR4 %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR4 %#010llx\n", bar_addr);
 
 		bar_addr = addr->bar5_addr32 +
 			(b2b_bar == 5 ? ndev->b2b_off : 0);
 		iowrite32(bar_addr, mmio + XEON_SBAR5BASE_OFFSET);
 		bar_addr = ioread32(mmio + XEON_SBAR5BASE_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR5 %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR5 %#010llx\n", bar_addr);
 	}
 
 	/* setup incoming bar limits == base addrs (zero length windows) */
@@ -2310,26 +2309,26 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
 	iowrite64(bar_addr, mmio + XEON_SBAR23LMT_OFFSET);
 	bar_addr = ioread64(mmio + XEON_SBAR23LMT_OFFSET);
-	dev_dbg(ndev_dev(ndev), "SBAR23LMT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "SBAR23LMT %#018llx\n", bar_addr);
 
 	if (!ndev->bar4_split) {
 		bar_addr = addr->bar4_addr64 +
 			(b2b_bar == 4 ? ndev->b2b_off : 0);
 		iowrite64(bar_addr, mmio + XEON_SBAR45LMT_OFFSET);
 		bar_addr = ioread64(mmio + XEON_SBAR45LMT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR45LMT %#018llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR45LMT %#018llx\n", bar_addr);
 	} else {
 		bar_addr = addr->bar4_addr32 +
 			(b2b_bar == 4 ? ndev->b2b_off : 0);
 		iowrite32(bar_addr, mmio + XEON_SBAR4LMT_OFFSET);
 		bar_addr = ioread32(mmio + XEON_SBAR4LMT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR4LMT %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR4LMT %#010llx\n", bar_addr);
 
 		bar_addr = addr->bar5_addr32 +
 			(b2b_bar == 5 ? ndev->b2b_off : 0);
 		iowrite32(bar_addr, mmio + XEON_SBAR5LMT_OFFSET);
 		bar_addr = ioread32(mmio + XEON_SBAR5LMT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR5LMT %#05llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR5LMT %#05llx\n", bar_addr);
 	}
 
 	/* zero incoming translation addrs */
@@ -2355,23 +2354,23 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	bar_addr = peer_addr->bar2_addr64;
 	iowrite64(bar_addr, mmio + XEON_PBAR23XLAT_OFFSET);
 	bar_addr = ioread64(mmio + XEON_PBAR23XLAT_OFFSET);
-	dev_dbg(ndev_dev(ndev), "PBAR23XLAT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "PBAR23XLAT %#018llx\n", bar_addr);
 
 	if (!ndev->bar4_split) {
 		bar_addr = peer_addr->bar4_addr64;
 		iowrite64(bar_addr, mmio + XEON_PBAR45XLAT_OFFSET);
 		bar_addr = ioread64(mmio + XEON_PBAR45XLAT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "PBAR45XLAT %#018llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "PBAR45XLAT %#018llx\n", bar_addr);
 	} else {
 		bar_addr = peer_addr->bar4_addr32;
 		iowrite32(bar_addr, mmio + XEON_PBAR4XLAT_OFFSET);
 		bar_addr = ioread32(mmio + XEON_PBAR4XLAT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "PBAR4XLAT %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "PBAR4XLAT %#010llx\n", bar_addr);
 
 		bar_addr = peer_addr->bar5_addr32;
 		iowrite32(bar_addr, mmio + XEON_PBAR5XLAT_OFFSET);
 		bar_addr = ioread32(mmio + XEON_PBAR5XLAT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "PBAR5XLAT %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "PBAR5XLAT %#010llx\n", bar_addr);
 	}
 
 	/* set the translation offset for b2b registers */
@@ -2389,7 +2388,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		return -EIO;
 
 	/* B2B_XLAT_OFFSET is 64bit, but can only take 32bit writes */
-	dev_dbg(ndev_dev(ndev), "B2BXLAT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "B2BXLAT %#018llx\n", bar_addr);
 	iowrite32(bar_addr, mmio + XEON_B2B_XLAT_OFFSETL);
 	iowrite32(bar_addr >> 32, mmio + XEON_B2B_XLAT_OFFSETU);
 
@@ -2408,6 +2407,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 
 static int xeon_init_ntb(struct intel_ntb_dev *ndev)
 {
+	struct device *dev = &ndev->ntb.pdev->dev;
 	int rc;
 	u32 ntb_ctl;
 
@@ -2423,7 +2423,7 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev)
 	switch (ndev->ntb.topo) {
 	case NTB_TOPO_PRI:
 		if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) {
-			dev_err(ndev_dev(ndev), "NTB Primary config disabled\n");
+			dev_err(dev, "NTB Primary config disabled\n");
 			return -EINVAL;
 		}
 
@@ -2441,7 +2441,7 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev)
 
 	case NTB_TOPO_SEC:
 		if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) {
-			dev_err(ndev_dev(ndev), "NTB Secondary config disabled\n");
+			dev_err(dev, "NTB Secondary config disabled\n");
 			return -EINVAL;
 		}
 		/* use half the spads for the peer */
@@ -2466,18 +2466,17 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev)
 				ndev->b2b_idx = b2b_mw_idx;
 
 			if (ndev->b2b_idx >= ndev->mw_count) {
-				dev_dbg(ndev_dev(ndev),
+				dev_dbg(dev,
 					"b2b_mw_idx %d invalid for mw_count %u\n",
 					b2b_mw_idx, ndev->mw_count);
 				return -EINVAL;
 			}
 
-			dev_dbg(ndev_dev(ndev),
-				"setting up b2b mw idx %d means %d\n",
+			dev_dbg(dev, "setting up b2b mw idx %d means %d\n",
 				b2b_mw_idx, ndev->b2b_idx);
 
 		} else if (ndev->hwerr_flags & NTB_HWERR_B2BDOORBELL_BIT14) {
-			dev_warn(ndev_dev(ndev), "Reduce doorbell count by 1\n");
+			dev_warn(dev, "Reduce doorbell count by 1\n");
 			ndev->db_count -= 1;
 		}
 
@@ -2518,7 +2517,7 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
 	u8 ppd;
 	int rc, mem;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	switch (pdev->device) {
 	/* There is a Xeon hardware errata related to writes to SDOORBELL or
@@ -2594,14 +2593,14 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
 		return -EIO;
 
 	ndev->ntb.topo = xeon_ppd_topo(ndev, ppd);
-	dev_dbg(ndev_dev(ndev), "ppd %#x topo %s\n", ppd,
+	dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd,
 		ntb_topo_string(ndev->ntb.topo));
 	if (ndev->ntb.topo == NTB_TOPO_NONE)
 		return -EINVAL;
 
 	if (ndev->ntb.topo != NTB_TOPO_SEC) {
 		ndev->bar4_split = xeon_ppd_bar4_split(ndev, ppd);
-		dev_dbg(ndev_dev(ndev), "ppd %#x bar4_split %d\n",
+		dev_dbg(&pdev->dev, "ppd %#x bar4_split %d\n",
 			ppd, ndev->bar4_split);
 	} else {
 		/* This is a way for transparent BAR to figure out if we are
@@ -2611,7 +2610,7 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
 		mem = pci_select_bars(pdev, IORESOURCE_MEM);
 		ndev->bar4_split = hweight32(mem) ==
 			HSX_SPLIT_BAR_MW_COUNT + 1;
-		dev_dbg(ndev_dev(ndev), "mem %#x bar4_split %d\n",
+		dev_dbg(&pdev->dev, "mem %#x bar4_split %d\n",
 			mem, ndev->bar4_split);
 	}
 
@@ -2648,7 +2647,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
 		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
-		dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
 	}
 
 	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -2656,7 +2655,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
 		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
-		dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
 	}
 
 	ndev->self_mmio = pci_iomap(pdev, 0, 0);
@@ -2682,7 +2681,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
 
 static void intel_ntb_deinit_pci(struct intel_ntb_dev *ndev)
 {
-	struct pci_dev *pdev = ndev_pdev(ndev);
+	struct pci_dev *pdev = ndev->ntb.pdev;
 
 	if (ndev->peer_mmio && ndev->peer_mmio != ndev->self_mmio)
 		pci_iounmap(pdev, ndev->peer_mmio);
@@ -3058,4 +3057,3 @@ static void __exit intel_ntb_pci_driver_exit(void)
 	debugfs_remove_recursive(debugfs_dir);
 }
 module_exit(intel_ntb_pci_driver_exit);
-
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index f2cf8a783f1ee..2d6c38afb128f 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -382,9 +382,6 @@ struct intel_ntb_dev {
 	struct dentry			*debugfs_info;
 };
 
-#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
-#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
-#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
 #define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
 #define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
 				     hb_timer.work)
-- 
GitLab


From bf2a952d31d2cd28bb3454f15645a76fda70addd Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 12 Apr 2017 15:44:45 +0300
Subject: [PATCH 0785/1958] NTB: Add IDT 89HPESxNTx PCIe-switches support

IDT 89HPESxNTx device series is PCIe-switches, which support
Non-Transparent bridging between domains connected to the device ports.
Since new NTB API exposes multi-port interface and messaging API, the
IDT NT-functions can be now supported in the kernel. This driver adds
the following functionality:
1) Multi-port NTB API to have information of possible NT-functions
activated in compliance with available device ports.
2) Memory windows of direct and look up table based address translation
with all possible combinations of BARs setup.
3) Traditional doorbell NTB API.
4) One-on-one messaging NTB API.

There are some IDT PCIe-switch setups, which must be done before any of
the NTB peers started. It can be performed either by system BIOS via
IDT SMBus-slave interface or by pre-initialized IDT PCIe-switch EEPROM:
1) NT-functions of corresponding ports must be activated using
SWPARTxCTL and SWPORTxCTL registers.
2) BAR0 must be configured to expose NT-function configuration
registers map.
3) The rest of the BARs must have at least one memory window
configured, otherwise the driver will just return an error.
Temperature sensor of IDT PCIe-switches can be also optionally
activated by BIOS or EEPROM.
(See IDT documentations for details of how the pre-initialization can
be done)

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 MAINTAINERS                     |    6 +
 drivers/ntb/hw/Kconfig          |    1 +
 drivers/ntb/hw/Makefile         |    1 +
 drivers/ntb/hw/idt/Kconfig      |   31 +
 drivers/ntb/hw/idt/Makefile     |    1 +
 drivers/ntb/hw/idt/ntb_hw_idt.c | 2712 +++++++++++++++++++++++++++++++
 drivers/ntb/hw/idt/ntb_hw_idt.h | 1149 +++++++++++++
 7 files changed, 3901 insertions(+)
 create mode 100644 drivers/ntb/hw/idt/Kconfig
 create mode 100644 drivers/ntb/hw/idt/Makefile
 create mode 100644 drivers/ntb/hw/idt/ntb_hw_idt.c
 create mode 100644 drivers/ntb/hw/idt/ntb_hw_idt.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 767e9d202adf8..66f7649fad950 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9122,6 +9122,12 @@ F:	include/linux/ntb.h
 F:	include/linux/ntb_transport.h
 F:	tools/testing/selftests/ntb/
 
+NTB IDT DRIVER
+M:	Serge Semin <fancer.lancer@gmail.com>
+L:	linux-ntb@googlegroups.com
+S:	Supported
+F:	drivers/ntb/hw/idt/
+
 NTB INTEL DRIVER
 M:	Jon Mason <jdmason@kudzu.us>
 M:	Dave Jiang <dave.jiang@intel.com>
diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig
index 7116472b46257..a89243c9fdd33 100644
--- a/drivers/ntb/hw/Kconfig
+++ b/drivers/ntb/hw/Kconfig
@@ -1,2 +1,3 @@
 source "drivers/ntb/hw/amd/Kconfig"
+source "drivers/ntb/hw/idt/Kconfig"
 source "drivers/ntb/hw/intel/Kconfig"
diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile
index 532e0859b4a11..87332c3905f07 100644
--- a/drivers/ntb/hw/Makefile
+++ b/drivers/ntb/hw/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_NTB_AMD)	+= amd/
+obj-$(CONFIG_NTB_IDT)	+= idt/
 obj-$(CONFIG_NTB_INTEL)	+= intel/
diff --git a/drivers/ntb/hw/idt/Kconfig b/drivers/ntb/hw/idt/Kconfig
new file mode 100644
index 0000000000000..b360e5613b9f1
--- /dev/null
+++ b/drivers/ntb/hw/idt/Kconfig
@@ -0,0 +1,31 @@
+config NTB_IDT
+	tristate "IDT PCIe-switch Non-Transparent Bridge support"
+	depends on PCI
+	help
+	 This driver supports NTB of cappable IDT PCIe-switches.
+
+	 Some of the pre-initializations must be made before IDT PCIe-switch
+	 exposes it NT-functions correctly. It should be done by either proper
+	 initialisation of EEPROM connected to master smbus of the switch or
+	 by BIOS using slave-SMBus interface changing corresponding registers
+	 value. Evidently it must be done before PCI bus enumeration is
+	 finished in Linux kernel.
+
+	 First of all partitions must be activated and properly assigned to all
+	 the ports with NT-functions intended to be activated (see SWPARTxCTL
+	 and SWPORTxCTL registers). Then all NT-function BARs must be enabled
+	 with chosen valid aperture. For memory windows related BARs the
+	 aperture settings shall determine the maximum size of memory windows
+	 accepted by a BAR. Note that BAR0 must map PCI configuration space
+	 registers.
+
+	 It's worth to note, that since a part of this driver relies on the
+	 BAR settings of peer NT-functions, the BAR setups can't be done over
+	 kernel PCI fixups. That's why the alternative pre-initialization
+	 techniques like BIOS using SMBus interface or EEPROM should be
+	 utilized. Additionally if one needs to have temperature sensor
+	 information printed to system log, the corresponding registers must
+	 be initialized within BIOS/EEPROM as well.
+
+	 If unsure, say N.
+
diff --git a/drivers/ntb/hw/idt/Makefile b/drivers/ntb/hw/idt/Makefile
new file mode 100644
index 0000000000000..a102cf154be08
--- /dev/null
+++ b/drivers/ntb/hw/idt/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_IDT) += ntb_hw_idt.o
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
new file mode 100644
index 0000000000000..d44d7ef38fe88
--- /dev/null
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -0,0 +1,2712 @@
+/*
+ *   This file is provided under a GPLv2 license.  When using or
+ *   redistributing this file, you may do so under that license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms and conditions of the GNU General Public License,
+ *   version 2, as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ *   Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License along
+ *   with this program; if not, one can be found http://www.gnu.org/licenses/.
+ *
+ *   The full GNU General Public License is included in this distribution in
+ *   the file called "COPYING".
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * IDT PCIe-switch NTB Linux driver
+ *
+ * Contact Information:
+ * Serge Semin <fancer.lancer@gmail.com>, <Sergey.Semin@t-platforms.ru>
+ */
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/sizes.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/debugfs.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_idt.h"
+
+#define NTB_NAME	"ntb_hw_idt"
+#define NTB_DESC	"IDT PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER		"2.0"
+#define NTB_IRQNAME	"ntb_irq_idt"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("T-platforms");
+
+/*
+ * NT Endpoint registers table simplifying a loop access to the functionally
+ * related registers
+ */
+static const struct idt_ntb_regs ntdata_tbl = {
+	{ {IDT_NT_BARSETUP0,	IDT_NT_BARLIMIT0,
+	   IDT_NT_BARLTBASE0,	IDT_NT_BARUTBASE0},
+	  {IDT_NT_BARSETUP1,	IDT_NT_BARLIMIT1,
+	   IDT_NT_BARLTBASE1,	IDT_NT_BARUTBASE1},
+	  {IDT_NT_BARSETUP2,	IDT_NT_BARLIMIT2,
+	   IDT_NT_BARLTBASE2,	IDT_NT_BARUTBASE2},
+	  {IDT_NT_BARSETUP3,	IDT_NT_BARLIMIT3,
+	   IDT_NT_BARLTBASE3,	IDT_NT_BARUTBASE3},
+	  {IDT_NT_BARSETUP4,	IDT_NT_BARLIMIT4,
+	   IDT_NT_BARLTBASE4,	IDT_NT_BARUTBASE4},
+	  {IDT_NT_BARSETUP5,	IDT_NT_BARLIMIT5,
+	   IDT_NT_BARLTBASE5,	IDT_NT_BARUTBASE5} },
+	{ {IDT_NT_INMSG0,	IDT_NT_OUTMSG0,	IDT_NT_INMSGSRC0},
+	  {IDT_NT_INMSG1,	IDT_NT_OUTMSG1,	IDT_NT_INMSGSRC1},
+	  {IDT_NT_INMSG2,	IDT_NT_OUTMSG2,	IDT_NT_INMSGSRC2},
+	  {IDT_NT_INMSG3,	IDT_NT_OUTMSG3,	IDT_NT_INMSGSRC3} }
+};
+
+/*
+ * NT Endpoint ports data table with the corresponding pcie command, link
+ * status, control and BAR-related registers
+ */
+static const struct idt_ntb_port portdata_tbl[IDT_MAX_NR_PORTS] = {
+/*0*/	{ IDT_SW_NTP0_PCIECMDSTS,	IDT_SW_NTP0_PCIELCTLSTS,
+	  IDT_SW_NTP0_NTCTL,
+	  IDT_SW_SWPORT0CTL,		IDT_SW_SWPORT0STS,
+	  { {IDT_SW_NTP0_BARSETUP0,	IDT_SW_NTP0_BARLIMIT0,
+	     IDT_SW_NTP0_BARLTBASE0,	IDT_SW_NTP0_BARUTBASE0},
+	    {IDT_SW_NTP0_BARSETUP1,	IDT_SW_NTP0_BARLIMIT1,
+	     IDT_SW_NTP0_BARLTBASE1,	IDT_SW_NTP0_BARUTBASE1},
+	    {IDT_SW_NTP0_BARSETUP2,	IDT_SW_NTP0_BARLIMIT2,
+	     IDT_SW_NTP0_BARLTBASE2,	IDT_SW_NTP0_BARUTBASE2},
+	    {IDT_SW_NTP0_BARSETUP3,	IDT_SW_NTP0_BARLIMIT3,
+	     IDT_SW_NTP0_BARLTBASE3,	IDT_SW_NTP0_BARUTBASE3},
+	    {IDT_SW_NTP0_BARSETUP4,	IDT_SW_NTP0_BARLIMIT4,
+	     IDT_SW_NTP0_BARLTBASE4,	IDT_SW_NTP0_BARUTBASE4},
+	    {IDT_SW_NTP0_BARSETUP5,	IDT_SW_NTP0_BARLIMIT5,
+	     IDT_SW_NTP0_BARLTBASE5,	IDT_SW_NTP0_BARUTBASE5} } },
+/*1*/	{0},
+/*2*/	{ IDT_SW_NTP2_PCIECMDSTS,	IDT_SW_NTP2_PCIELCTLSTS,
+	  IDT_SW_NTP2_NTCTL,
+	  IDT_SW_SWPORT2CTL,		IDT_SW_SWPORT2STS,
+	  { {IDT_SW_NTP2_BARSETUP0,	IDT_SW_NTP2_BARLIMIT0,
+	     IDT_SW_NTP2_BARLTBASE0,	IDT_SW_NTP2_BARUTBASE0},
+	    {IDT_SW_NTP2_BARSETUP1,	IDT_SW_NTP2_BARLIMIT1,
+	     IDT_SW_NTP2_BARLTBASE1,	IDT_SW_NTP2_BARUTBASE1},
+	    {IDT_SW_NTP2_BARSETUP2,	IDT_SW_NTP2_BARLIMIT2,
+	     IDT_SW_NTP2_BARLTBASE2,	IDT_SW_NTP2_BARUTBASE2},
+	    {IDT_SW_NTP2_BARSETUP3,	IDT_SW_NTP2_BARLIMIT3,
+	     IDT_SW_NTP2_BARLTBASE3,	IDT_SW_NTP2_BARUTBASE3},
+	    {IDT_SW_NTP2_BARSETUP4,	IDT_SW_NTP2_BARLIMIT4,
+	     IDT_SW_NTP2_BARLTBASE4,	IDT_SW_NTP2_BARUTBASE4},
+	    {IDT_SW_NTP2_BARSETUP5,	IDT_SW_NTP2_BARLIMIT5,
+	     IDT_SW_NTP2_BARLTBASE5,	IDT_SW_NTP2_BARUTBASE5} } },
+/*3*/	{0},
+/*4*/	{ IDT_SW_NTP4_PCIECMDSTS,	IDT_SW_NTP4_PCIELCTLSTS,
+	  IDT_SW_NTP4_NTCTL,
+	  IDT_SW_SWPORT4CTL,		IDT_SW_SWPORT4STS,
+	  { {IDT_SW_NTP4_BARSETUP0,	IDT_SW_NTP4_BARLIMIT0,
+	     IDT_SW_NTP4_BARLTBASE0,	IDT_SW_NTP4_BARUTBASE0},
+	    {IDT_SW_NTP4_BARSETUP1,	IDT_SW_NTP4_BARLIMIT1,
+	     IDT_SW_NTP4_BARLTBASE1,	IDT_SW_NTP4_BARUTBASE1},
+	    {IDT_SW_NTP4_BARSETUP2,	IDT_SW_NTP4_BARLIMIT2,
+	     IDT_SW_NTP4_BARLTBASE2,	IDT_SW_NTP4_BARUTBASE2},
+	    {IDT_SW_NTP4_BARSETUP3,	IDT_SW_NTP4_BARLIMIT3,
+	     IDT_SW_NTP4_BARLTBASE3,	IDT_SW_NTP4_BARUTBASE3},
+	    {IDT_SW_NTP4_BARSETUP4,	IDT_SW_NTP4_BARLIMIT4,
+	     IDT_SW_NTP4_BARLTBASE4,	IDT_SW_NTP4_BARUTBASE4},
+	    {IDT_SW_NTP4_BARSETUP5,	IDT_SW_NTP4_BARLIMIT5,
+	     IDT_SW_NTP4_BARLTBASE5,	IDT_SW_NTP4_BARUTBASE5} } },
+/*5*/	{0},
+/*6*/	{ IDT_SW_NTP6_PCIECMDSTS,	IDT_SW_NTP6_PCIELCTLSTS,
+	  IDT_SW_NTP6_NTCTL,
+	  IDT_SW_SWPORT6CTL,		IDT_SW_SWPORT6STS,
+	  { {IDT_SW_NTP6_BARSETUP0,	IDT_SW_NTP6_BARLIMIT0,
+	     IDT_SW_NTP6_BARLTBASE0,	IDT_SW_NTP6_BARUTBASE0},
+	    {IDT_SW_NTP6_BARSETUP1,	IDT_SW_NTP6_BARLIMIT1,
+	     IDT_SW_NTP6_BARLTBASE1,	IDT_SW_NTP6_BARUTBASE1},
+	    {IDT_SW_NTP6_BARSETUP2,	IDT_SW_NTP6_BARLIMIT2,
+	     IDT_SW_NTP6_BARLTBASE2,	IDT_SW_NTP6_BARUTBASE2},
+	    {IDT_SW_NTP6_BARSETUP3,	IDT_SW_NTP6_BARLIMIT3,
+	     IDT_SW_NTP6_BARLTBASE3,	IDT_SW_NTP6_BARUTBASE3},
+	    {IDT_SW_NTP6_BARSETUP4,	IDT_SW_NTP6_BARLIMIT4,
+	     IDT_SW_NTP6_BARLTBASE4,	IDT_SW_NTP6_BARUTBASE4},
+	    {IDT_SW_NTP6_BARSETUP5,	IDT_SW_NTP6_BARLIMIT5,
+	     IDT_SW_NTP6_BARLTBASE5,	IDT_SW_NTP6_BARUTBASE5} } },
+/*7*/	{0},
+/*8*/	{ IDT_SW_NTP8_PCIECMDSTS,	IDT_SW_NTP8_PCIELCTLSTS,
+	  IDT_SW_NTP8_NTCTL,
+	  IDT_SW_SWPORT8CTL,		IDT_SW_SWPORT8STS,
+	  { {IDT_SW_NTP8_BARSETUP0,	IDT_SW_NTP8_BARLIMIT0,
+	     IDT_SW_NTP8_BARLTBASE0,	IDT_SW_NTP8_BARUTBASE0},
+	    {IDT_SW_NTP8_BARSETUP1,	IDT_SW_NTP8_BARLIMIT1,
+	     IDT_SW_NTP8_BARLTBASE1,	IDT_SW_NTP8_BARUTBASE1},
+	    {IDT_SW_NTP8_BARSETUP2,	IDT_SW_NTP8_BARLIMIT2,
+	     IDT_SW_NTP8_BARLTBASE2,	IDT_SW_NTP8_BARUTBASE2},
+	    {IDT_SW_NTP8_BARSETUP3,	IDT_SW_NTP8_BARLIMIT3,
+	     IDT_SW_NTP8_BARLTBASE3,	IDT_SW_NTP8_BARUTBASE3},
+	    {IDT_SW_NTP8_BARSETUP4,	IDT_SW_NTP8_BARLIMIT4,
+	     IDT_SW_NTP8_BARLTBASE4,	IDT_SW_NTP8_BARUTBASE4},
+	    {IDT_SW_NTP8_BARSETUP5,	IDT_SW_NTP8_BARLIMIT5,
+	     IDT_SW_NTP8_BARLTBASE5,	IDT_SW_NTP8_BARUTBASE5} } },
+/*9*/	{0},
+/*10*/	{0},
+/*11*/	{0},
+/*12*/	{ IDT_SW_NTP12_PCIECMDSTS,	IDT_SW_NTP12_PCIELCTLSTS,
+	  IDT_SW_NTP12_NTCTL,
+	  IDT_SW_SWPORT12CTL,		IDT_SW_SWPORT12STS,
+	  { {IDT_SW_NTP12_BARSETUP0,	IDT_SW_NTP12_BARLIMIT0,
+	     IDT_SW_NTP12_BARLTBASE0,	IDT_SW_NTP12_BARUTBASE0},
+	    {IDT_SW_NTP12_BARSETUP1,	IDT_SW_NTP12_BARLIMIT1,
+	     IDT_SW_NTP12_BARLTBASE1,	IDT_SW_NTP12_BARUTBASE1},
+	    {IDT_SW_NTP12_BARSETUP2,	IDT_SW_NTP12_BARLIMIT2,
+	     IDT_SW_NTP12_BARLTBASE2,	IDT_SW_NTP12_BARUTBASE2},
+	    {IDT_SW_NTP12_BARSETUP3,	IDT_SW_NTP12_BARLIMIT3,
+	     IDT_SW_NTP12_BARLTBASE3,	IDT_SW_NTP12_BARUTBASE3},
+	    {IDT_SW_NTP12_BARSETUP4,	IDT_SW_NTP12_BARLIMIT4,
+	     IDT_SW_NTP12_BARLTBASE4,	IDT_SW_NTP12_BARUTBASE4},
+	    {IDT_SW_NTP12_BARSETUP5,	IDT_SW_NTP12_BARLIMIT5,
+	     IDT_SW_NTP12_BARLTBASE5,	IDT_SW_NTP12_BARUTBASE5} } },
+/*13*/	{0},
+/*14*/	{0},
+/*15*/	{0},
+/*16*/	{ IDT_SW_NTP16_PCIECMDSTS,	IDT_SW_NTP16_PCIELCTLSTS,
+	  IDT_SW_NTP16_NTCTL,
+	  IDT_SW_SWPORT16CTL,		IDT_SW_SWPORT16STS,
+	  { {IDT_SW_NTP16_BARSETUP0,	IDT_SW_NTP16_BARLIMIT0,
+	     IDT_SW_NTP16_BARLTBASE0,	IDT_SW_NTP16_BARUTBASE0},
+	    {IDT_SW_NTP16_BARSETUP1,	IDT_SW_NTP16_BARLIMIT1,
+	     IDT_SW_NTP16_BARLTBASE1,	IDT_SW_NTP16_BARUTBASE1},
+	    {IDT_SW_NTP16_BARSETUP2,	IDT_SW_NTP16_BARLIMIT2,
+	     IDT_SW_NTP16_BARLTBASE2,	IDT_SW_NTP16_BARUTBASE2},
+	    {IDT_SW_NTP16_BARSETUP3,	IDT_SW_NTP16_BARLIMIT3,
+	     IDT_SW_NTP16_BARLTBASE3,	IDT_SW_NTP16_BARUTBASE3},
+	    {IDT_SW_NTP16_BARSETUP4,	IDT_SW_NTP16_BARLIMIT4,
+	     IDT_SW_NTP16_BARLTBASE4,	IDT_SW_NTP16_BARUTBASE4},
+	    {IDT_SW_NTP16_BARSETUP5,	IDT_SW_NTP16_BARLIMIT5,
+	     IDT_SW_NTP16_BARLTBASE5,	IDT_SW_NTP16_BARUTBASE5} } },
+/*17*/	{0},
+/*18*/	{0},
+/*19*/	{0},
+/*20*/	{ IDT_SW_NTP20_PCIECMDSTS,	IDT_SW_NTP20_PCIELCTLSTS,
+	  IDT_SW_NTP20_NTCTL,
+	  IDT_SW_SWPORT20CTL,		IDT_SW_SWPORT20STS,
+	  { {IDT_SW_NTP20_BARSETUP0,	IDT_SW_NTP20_BARLIMIT0,
+	     IDT_SW_NTP20_BARLTBASE0,	IDT_SW_NTP20_BARUTBASE0},
+	    {IDT_SW_NTP20_BARSETUP1,	IDT_SW_NTP20_BARLIMIT1,
+	     IDT_SW_NTP20_BARLTBASE1,	IDT_SW_NTP20_BARUTBASE1},
+	    {IDT_SW_NTP20_BARSETUP2,	IDT_SW_NTP20_BARLIMIT2,
+	     IDT_SW_NTP20_BARLTBASE2,	IDT_SW_NTP20_BARUTBASE2},
+	    {IDT_SW_NTP20_BARSETUP3,	IDT_SW_NTP20_BARLIMIT3,
+	     IDT_SW_NTP20_BARLTBASE3,	IDT_SW_NTP20_BARUTBASE3},
+	    {IDT_SW_NTP20_BARSETUP4,	IDT_SW_NTP20_BARLIMIT4,
+	     IDT_SW_NTP20_BARLTBASE4,	IDT_SW_NTP20_BARUTBASE4},
+	    {IDT_SW_NTP20_BARSETUP5,	IDT_SW_NTP20_BARLIMIT5,
+	     IDT_SW_NTP20_BARLTBASE5,	IDT_SW_NTP20_BARUTBASE5} } },
+/*21*/	{0},
+/*22*/	{0},
+/*23*/	{0}
+};
+
+/*
+ * IDT PCIe-switch partitions table with the corresponding control, status
+ * and messages control registers
+ */
+static const struct idt_ntb_part partdata_tbl[IDT_MAX_NR_PARTS] = {
+/*0*/	{ IDT_SW_SWPART0CTL,	IDT_SW_SWPART0STS,
+	  {IDT_SW_SWP0MSGCTL0,	IDT_SW_SWP0MSGCTL1,
+	   IDT_SW_SWP0MSGCTL2,	IDT_SW_SWP0MSGCTL3} },
+/*1*/	{ IDT_SW_SWPART1CTL,	IDT_SW_SWPART1STS,
+	  {IDT_SW_SWP1MSGCTL0,	IDT_SW_SWP1MSGCTL1,
+	   IDT_SW_SWP1MSGCTL2,	IDT_SW_SWP1MSGCTL3} },
+/*2*/	{ IDT_SW_SWPART2CTL,	IDT_SW_SWPART2STS,
+	  {IDT_SW_SWP2MSGCTL0,	IDT_SW_SWP2MSGCTL1,
+	   IDT_SW_SWP2MSGCTL2,	IDT_SW_SWP2MSGCTL3} },
+/*3*/	{ IDT_SW_SWPART3CTL,	IDT_SW_SWPART3STS,
+	  {IDT_SW_SWP3MSGCTL0,	IDT_SW_SWP3MSGCTL1,
+	   IDT_SW_SWP3MSGCTL2,	IDT_SW_SWP3MSGCTL3} },
+/*4*/	{ IDT_SW_SWPART4CTL,	IDT_SW_SWPART4STS,
+	  {IDT_SW_SWP4MSGCTL0,	IDT_SW_SWP4MSGCTL1,
+	   IDT_SW_SWP4MSGCTL2,	IDT_SW_SWP4MSGCTL3} },
+/*5*/	{ IDT_SW_SWPART5CTL,	IDT_SW_SWPART5STS,
+	  {IDT_SW_SWP5MSGCTL0,	IDT_SW_SWP5MSGCTL1,
+	   IDT_SW_SWP5MSGCTL2,	IDT_SW_SWP5MSGCTL3} },
+/*6*/	{ IDT_SW_SWPART6CTL,	IDT_SW_SWPART6STS,
+	  {IDT_SW_SWP6MSGCTL0,	IDT_SW_SWP6MSGCTL1,
+	   IDT_SW_SWP6MSGCTL2,	IDT_SW_SWP6MSGCTL3} },
+/*7*/	{ IDT_SW_SWPART7CTL,	IDT_SW_SWPART7STS,
+	  {IDT_SW_SWP7MSGCTL0,	IDT_SW_SWP7MSGCTL1,
+	   IDT_SW_SWP7MSGCTL2,	IDT_SW_SWP7MSGCTL3} }
+};
+
+/*
+ * DebugFS directory to place the driver debug file
+ */
+static struct dentry *dbgfs_topdir;
+
+/*=============================================================================
+ *                1. IDT PCIe-switch registers IO-functions
+ *
+ *    Beside ordinary configuration space registers IDT PCIe-switch expose
+ * global configuration registers, which are used to determine state of other
+ * device ports as well as being notified of some switch-related events.
+ * Additionally all the configuration space registers of all the IDT
+ * PCIe-switch functions are mapped to the Global Address space, so each
+ * function can determine a configuration of any other PCI-function.
+ *    Functions declared in this chapter are created to encapsulate access
+ * to configuration and global registers, so the driver code just need to
+ * provide IDT NTB hardware descriptor and a register address.
+ *=============================================================================
+ */
+
+/*
+ * idt_nt_write() - PCI configuration space registers write method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ * @data:	Value to write to the register
+ *
+ * IDT PCIe-switch registers are all Little endian.
+ */
+static void idt_nt_write(struct idt_ntb_dev *ndev,
+			 const unsigned int reg, const u32 data)
+{
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_PCI_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return;
+
+	/* Just write the value to the specified register */
+	iowrite32(data, ndev->cfgspc + (ptrdiff_t)reg);
+}
+
+/*
+ * idt_nt_read() - PCI configuration space registers read method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ *
+ * Return: register value
+ */
+static u32 idt_nt_read(struct idt_ntb_dev *ndev, const unsigned int reg)
+{
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_PCI_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return ~0;
+
+	/* Just read the value from the specified register */
+	return ioread32(ndev->cfgspc + (ptrdiff_t)reg);
+}
+
+/*
+ * idt_sw_write() - Global registers write method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ * @data:	Value to write to the register
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ */
+static void idt_sw_write(struct idt_ntb_dev *ndev,
+			 const unsigned int reg, const u32 data)
+{
+	unsigned long irqflags;
+
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_SW_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return;
+
+	/* Lock GASA registers operations */
+	spin_lock_irqsave(&ndev->gasa_lock, irqflags);
+	/* Set the global register address */
+	iowrite32((u32)reg, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASAADDR);
+	/* Put the new value of the register */
+	iowrite32(data, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASADATA);
+	/* Make sure the PCIe transactions are executed */
+	mmiowb();
+	/* Unlock GASA registers operations */
+	spin_unlock_irqrestore(&ndev->gasa_lock, irqflags);
+}
+
+/*
+ * idt_sw_read() - Global registers read method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ *
+ * Return: register value
+ */
+static u32 idt_sw_read(struct idt_ntb_dev *ndev, const unsigned int reg)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_SW_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return ~0;
+
+	/* Lock GASA registers operations */
+	spin_lock_irqsave(&ndev->gasa_lock, irqflags);
+	/* Set the global register address */
+	iowrite32((u32)reg, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASAADDR);
+	/* Get the data of the register (read ops acts as MMIO barrier) */
+	data = ioread32(ndev->cfgspc + (ptrdiff_t)IDT_NT_GASADATA);
+	/* Unlock GASA registers operations */
+	spin_unlock_irqrestore(&ndev->gasa_lock, irqflags);
+
+	return data;
+}
+
+/*
+ * idt_reg_set_bits() - set bits of a passed register
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to change bits of
+ * @reg_lock:	Register access spin lock
+ * @valid_mask:	Mask of valid bits
+ * @set_bits:	Bitmask to set
+ *
+ * Helper method to check whether a passed bitfield is valid and set
+ * corresponding bits of a register.
+ *
+ * WARNING! Make sure the passed register isn't accessed over plane
+ * idt_nt_write() method (read method is ok to be used concurrently).
+ *
+ * Return: zero on success, negative error on invalid bitmask.
+ */
+static inline int idt_reg_set_bits(struct idt_ntb_dev *ndev, unsigned int reg,
+				   spinlock_t *reg_lock,
+				   u64 valid_mask, u64 set_bits)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	if (set_bits & ~(u64)valid_mask)
+		return -EINVAL;
+
+	/* Lock access to the register unless the change is written back */
+	spin_lock_irqsave(reg_lock, irqflags);
+	data = idt_nt_read(ndev, reg) | (u32)set_bits;
+	idt_nt_write(ndev, reg, data);
+	/* Unlock the register */
+	spin_unlock_irqrestore(reg_lock, irqflags);
+
+	return 0;
+}
+
+/*
+ * idt_reg_clear_bits() - clear bits of a passed register
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to change bits of
+ * @reg_lock:	Register access spin lock
+ * @set_bits:	Bitmask to clear
+ *
+ * Helper method to check whether a passed bitfield is valid and clear
+ * corresponding bits of a register.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * WARNING! Make sure the passed register isn't accessed over plane
+ * idt_nt_write() method (read method is ok to use concurrently).
+ */
+static inline void idt_reg_clear_bits(struct idt_ntb_dev *ndev,
+				     unsigned int reg, spinlock_t *reg_lock,
+				     u64 clear_bits)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/* Lock access to the register unless the change is written back */
+	spin_lock_irqsave(reg_lock, irqflags);
+	data = idt_nt_read(ndev, reg) & ~(u32)clear_bits;
+	idt_nt_write(ndev, reg, data);
+	/* Unlock the register */
+	spin_unlock_irqrestore(reg_lock, irqflags);
+}
+
+/*===========================================================================
+ *                           2. Ports operations
+ *
+ *    IDT PCIe-switches can have from 3 up to 8 ports with possible
+ * NT-functions enabled. So all the possible ports need to be scanned looking
+ * for NTB activated. NTB API will have enumerated only the ports with NTB.
+ *===========================================================================
+ */
+
+/*
+ * idt_scan_ports() - scan IDT PCIe-switch ports collecting info in the tables
+ * @ndev:	Pointer to the PCI device descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_scan_ports(struct idt_ntb_dev *ndev)
+{
+	unsigned char pidx, port, part;
+	u32 data, portsts, partsts;
+
+	/* Retrieve the local port number */
+	data = idt_nt_read(ndev, IDT_NT_PCIELCAP);
+	ndev->port = GET_FIELD(PCIELCAP_PORTNUM, data);
+
+	/* Retrieve the local partition number */
+	portsts = idt_sw_read(ndev, portdata_tbl[ndev->port].sts);
+	ndev->part = GET_FIELD(SWPORTxSTS_SWPART, portsts);
+
+	/* Initialize port/partition -> index tables with invalid values */
+	memset(ndev->port_idx_map, -EINVAL, sizeof(ndev->port_idx_map));
+	memset(ndev->part_idx_map, -EINVAL, sizeof(ndev->part_idx_map));
+
+	/*
+	 * Walk over all the possible ports checking whether any of them has
+	 * NT-function activated
+	 */
+	ndev->peer_cnt = 0;
+	for (pidx = 0; pidx < ndev->swcfg->port_cnt; pidx++) {
+		port = ndev->swcfg->ports[pidx];
+		/* Skip local port */
+		if (port == ndev->port)
+			continue;
+
+		/* Read the port status register to get it partition */
+		portsts = idt_sw_read(ndev, portdata_tbl[port].sts);
+		part = GET_FIELD(SWPORTxSTS_SWPART, portsts);
+
+		/* Retrieve the partition status */
+		partsts = idt_sw_read(ndev, partdata_tbl[part].sts);
+		/* Check if partition state is active and port has NTB */
+		if (IS_FLD_SET(SWPARTxSTS_STATE, partsts, ACT) &&
+		    (IS_FLD_SET(SWPORTxSTS_MODE, portsts, NT) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, USNT) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, USNTDMA) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, NTDMA))) {
+			/* Save the port and partition numbers */
+			ndev->peers[ndev->peer_cnt].port = port;
+			ndev->peers[ndev->peer_cnt].part = part;
+			/* Fill in the port/partition -> index tables */
+			ndev->port_idx_map[port] = ndev->peer_cnt;
+			ndev->part_idx_map[part] = ndev->peer_cnt;
+			ndev->peer_cnt++;
+		}
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local port: %hhu, num of peers: %hhu\n",
+		ndev->port, ndev->peer_cnt);
+
+	/* It's useless to have this driver loaded if there is no any peer */
+	if (ndev->peer_cnt == 0) {
+		dev_warn(&ndev->ntb.pdev->dev, "No active peer found\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_ntb_port_number() - get the local port number
+ * @ntb:	NTB device context.
+ *
+ * Return: the local port number
+ */
+static int idt_ntb_port_number(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->port;
+}
+
+/*
+ * idt_ntb_peer_port_count() - get the number of peer ports
+ * @ntb:	NTB device context.
+ *
+ * Return the count of detected peer NT-functions.
+ *
+ * Return: number of peer ports
+ */
+static int idt_ntb_peer_port_count(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->peer_cnt;
+}
+
+/*
+ * idt_ntb_peer_port_number() - get peer port by given index
+ * @ntb:	NTB device context.
+ * @pidx:	Peer port index.
+ *
+ * Return: peer port or negative error
+ */
+static int idt_ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	/* Return the detected NT-function port number */
+	return ndev->peers[pidx].port;
+}
+
+/*
+ * idt_ntb_peer_port_idx() - get peer port index by given port number
+ * @ntb:	NTB device context.
+ * @port:	Peer port number.
+ *
+ * Internal port -> index table is pre-initialized with -EINVAL values,
+ * so we just need to return it value
+ *
+ * Return: peer NT-function port index or negative error
+ */
+static int idt_ntb_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (port < 0 || IDT_MAX_NR_PORTS <= port)
+		return -EINVAL;
+
+	return ndev->port_idx_map[port];
+}
+
+/*===========================================================================
+ *                         3. Link status operations
+ *    There is no any ready-to-use method to have peer ports notified if NTB
+ * link is set up or got down. Instead global signal can be used instead.
+ * In case if any one of ports changes local NTB link state, it sends
+ * global signal and clears corresponding global state bit. Then all the ports
+ * receive a notification of that, so to make client driver being aware of
+ * possible NTB link change.
+ *    Additionally each of active NT-functions is subscribed to PCIe-link
+ * state changes of peer ports.
+ *===========================================================================
+ */
+
+static void idt_ntb_local_link_disable(struct idt_ntb_dev *ndev);
+
+/*
+ * idt_init_link() - Initialize NTB link state notification subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Function performs the basic initialization of some global registers
+ * needed to enable IRQ-based notifications of PCIe Link Up/Down and
+ * Global Signal events.
+ * NOTE Since it's not possible to determine when all the NTB peer drivers are
+ * unloaded as well as have those registers accessed concurrently, we must
+ * preinitialize them with the same value and leave it uncleared on local
+ * driver unload.
+ */
+static void idt_init_link(struct idt_ntb_dev *ndev)
+{
+	u32 part_mask, port_mask, se_mask;
+	unsigned char pidx;
+
+	/* Initialize spin locker of Mapping Table access registers */
+	spin_lock_init(&ndev->mtbl_lock);
+
+	/* Walk over all detected peers collecting port and partition masks */
+	port_mask = ~BIT(ndev->port);
+	part_mask = ~BIT(ndev->part);
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		port_mask &= ~BIT(ndev->peers[pidx].port);
+		part_mask &= ~BIT(ndev->peers[pidx].part);
+	}
+
+	/* Clean the Link Up/Down and GLobal Signal status registers */
+	idt_sw_write(ndev, IDT_SW_SELINKUPSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SELINKDNSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)-1);
+
+	/* Unmask NT-activated partitions to receive Global Switch events */
+	idt_sw_write(ndev, IDT_SW_SEPMSK, part_mask);
+
+	/* Enable PCIe Link Up events of NT-activated ports */
+	idt_sw_write(ndev, IDT_SW_SELINKUPMSK, port_mask);
+
+	/* Enable PCIe Link Down events of NT-activated ports */
+	idt_sw_write(ndev, IDT_SW_SELINKDNMSK, port_mask);
+
+	/* Unmask NT-activated partitions to receive Global Signal events */
+	idt_sw_write(ndev, IDT_SW_SEGSIGMSK, part_mask);
+
+	/* Unmask Link Up/Down and Global Switch Events */
+	se_mask = ~(IDT_SEMSK_LINKUP | IDT_SEMSK_LINKDN | IDT_SEMSK_GSIGNAL);
+	idt_sw_write(ndev, IDT_SW_SEMSK, se_mask);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB link status events initialized");
+}
+
+/*
+ * idt_deinit_link() - deinitialize link subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just disable the link back.
+ */
+static void idt_deinit_link(struct idt_ntb_dev *ndev)
+{
+	/* Disable the link */
+	idt_ntb_local_link_disable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB link status events deinitialized");
+}
+
+/*
+ * idt_se_isr() - switch events ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * This driver doesn't support IDT PCIe-switch dynamic reconfigurations,
+ * Failover capability, etc, so switch events are utilized to notify of
+ * PCIe and NTB link events.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_se_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	u32 sests;
+
+	/* Read Switch Events status */
+	sests = idt_sw_read(ndev, IDT_SW_SESTS);
+
+	/* Clean the Link Up/Down and Global Signal status registers */
+	idt_sw_write(ndev, IDT_SW_SELINKUPSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SELINKDNSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)-1);
+
+	/* Clean the corresponding interrupt bit */
+	idt_nt_write(ndev, IDT_NT_NTINTSTS, IDT_NTINTSTS_SEVENT);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "SE IRQ detected %#08x (SESTS %#08x)",
+			  ntint_sts, sests);
+
+	/* Notify the client driver of possible link state change */
+	ntb_link_event(&ndev->ntb);
+}
+
+/*
+ * idt_ntb_local_link_enable() - enable the local NTB link.
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * In order to enable the NTB link we need:
+ * - enable Completion TLPs translation
+ * - initialize mapping table to enable the Request ID translation
+ * - notify peers of NTB link state change
+ */
+static void idt_ntb_local_link_enable(struct idt_ntb_dev *ndev)
+{
+	u32 reqid, mtbldata = 0;
+	unsigned long irqflags;
+
+	/* Enable the ID protection and Completion TLPs translation */
+	idt_nt_write(ndev, IDT_NT_NTCTL, IDT_NTCTL_CPEN);
+
+	/* Retrieve the current Requester ID (Bus:Device:Function) */
+	reqid = idt_nt_read(ndev, IDT_NT_REQIDCAP);
+
+	/*
+	 * Set the corresponding NT Mapping table entry of port partition index
+	 * with the data to perform the Request ID translation
+	 */
+	mtbldata = SET_FIELD(NTMTBLDATA_REQID, 0, reqid) |
+		   SET_FIELD(NTMTBLDATA_PART, 0, ndev->part) |
+		   IDT_NTMTBLDATA_VALID;
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	idt_nt_write(ndev, IDT_NT_NTMTBLDATA, mtbldata);
+	mmiowb();
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	/* Notify the peers by setting and clearing the global signal bit */
+	idt_nt_write(ndev, IDT_NT_NTGSIGNAL, IDT_NTGSIGNAL_SET);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)1 << ndev->part);
+}
+
+/*
+ * idt_ntb_local_link_disable() - disable the local NTB link.
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * In order to enable the NTB link we need:
+ * - disable Completion TLPs translation
+ * - clear corresponding mapping table entry
+ * - notify peers of NTB link state change
+ */
+static void idt_ntb_local_link_disable(struct idt_ntb_dev *ndev)
+{
+	unsigned long irqflags;
+
+	/* Disable Completion TLPs translation */
+	idt_nt_write(ndev, IDT_NT_NTCTL, 0);
+
+	/* Clear the corresponding NT Mapping table entry */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	idt_nt_write(ndev, IDT_NT_NTMTBLDATA, 0);
+	mmiowb();
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	/* Notify the peers by setting and clearing the global signal bit */
+	idt_nt_write(ndev, IDT_NT_NTGSIGNAL, IDT_NTGSIGNAL_SET);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)1 << ndev->part);
+}
+
+/*
+ * idt_ntb_local_link_is_up() - test wethter local NTB link is up
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Local link is up under the following conditions:
+ * - Bus mastering is enabled
+ * - NTCTL has Completion TLPs translation enabled
+ * - Mapping table permits Request TLPs translation
+ * NOTE: We don't need to check PCIe link state since it's obviously
+ * up while we are able to communicate with IDT PCIe-switch
+ *
+ * Return: true if link is up, otherwise false
+ */
+static bool idt_ntb_local_link_is_up(struct idt_ntb_dev *ndev)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/* Read the local Bus Master Enable status */
+	data = idt_nt_read(ndev, IDT_NT_PCICMDSTS);
+	if (!(data & IDT_PCICMDSTS_BME))
+		return false;
+
+	/* Read the local Completion TLPs translation enable status */
+	data = idt_nt_read(ndev, IDT_NT_NTCTL);
+	if (!(data & IDT_NTCTL_CPEN))
+		return false;
+
+	/* Read Mapping table entry corresponding to the local partition */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	return !!(data & IDT_NTMTBLDATA_VALID);
+}
+
+/*
+ * idt_ntb_peer_link_is_up() - test whether peer NTB link is up
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @pidx:	Peer port index
+ *
+ * Peer link is up under the following conditions:
+ * - PCIe link is up
+ * - Bus mastering is enabled
+ * - NTCTL has Completion TLPs translation enabled
+ * - Mapping table permits Request TLPs translation
+ *
+ * Return: true if link is up, otherwise false
+ */
+static bool idt_ntb_peer_link_is_up(struct idt_ntb_dev *ndev, int pidx)
+{
+	unsigned long irqflags;
+	unsigned char port;
+	u32 data;
+
+	/* Retrieve the device port number */
+	port = ndev->peers[pidx].port;
+
+	/* Check whether PCIe link is up */
+	data = idt_sw_read(ndev, portdata_tbl[port].sts);
+	if (!(data & IDT_SWPORTxSTS_LINKUP))
+		return false;
+
+	/* Check whether bus mastering is enabled on the peer port */
+	data = idt_sw_read(ndev, portdata_tbl[port].pcicmdsts);
+	if (!(data & IDT_PCICMDSTS_BME))
+		return false;
+
+	/* Check if Completion TLPs translation is enabled on the peer port */
+	data = idt_sw_read(ndev, portdata_tbl[port].ntctl);
+	if (!(data & IDT_NTCTL_CPEN))
+		return false;
+
+	/* Read Mapping table entry corresponding to the peer partition */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->peers[pidx].part);
+	data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	return !!(data & IDT_NTMTBLDATA_VALID);
+}
+
+/*
+ * idt_ntb_link_is_up() - get the current ntb link state (NTB API callback)
+ * @ntb:	NTB device context.
+ * @speed:	OUT - The link speed expressed as PCIe generation number.
+ * @width:	OUT - The link width expressed as the number of PCIe lanes.
+ *
+ * Get the bitfield of NTB link states for all peer ports
+ *
+ * Return: bitfield of indexed ports link state: bit is set/cleared if the
+ *         link is up/down respectively.
+ */
+static u64 idt_ntb_link_is_up(struct ntb_dev *ntb,
+			      enum ntb_speed *speed, enum ntb_width *width)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	unsigned char pidx;
+	u64 status;
+	u32 data;
+
+	/* Retrieve the local link speed and width */
+	if (speed != NULL || width != NULL) {
+		data = idt_nt_read(ndev, IDT_NT_PCIELCTLSTS);
+		if (speed != NULL)
+			*speed = GET_FIELD(PCIELCTLSTS_CLS, data);
+		if (width != NULL)
+			*width = GET_FIELD(PCIELCTLSTS_NLW, data);
+	}
+
+	/* If local NTB link isn't up then all the links are considered down */
+	if (!idt_ntb_local_link_is_up(ndev))
+		return 0;
+
+	/* Collect all the peer ports link states into the bitfield */
+	status = 0;
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		if (idt_ntb_peer_link_is_up(ndev, pidx))
+			status |= ((u64)1 << pidx);
+	}
+
+	return status;
+}
+
+/*
+ * idt_ntb_link_enable() - enable local port ntb link (NTB API callback)
+ * @ntb:	NTB device context.
+ * @max_speed:	The maximum link speed expressed as PCIe generation number.
+ * @max_width:	The maximum link width expressed as the number of PCIe lanes.
+ *
+ * Enable just local NTB link. PCIe link parameters are ignored.
+ *
+ * Return: always zero.
+ */
+static int idt_ntb_link_enable(struct ntb_dev *ntb, enum ntb_speed speed,
+			       enum ntb_width width)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	/* Just enable the local NTB link */
+	idt_ntb_local_link_enable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local NTB link enabled");
+
+	return 0;
+}
+
+/*
+ * idt_ntb_link_disable() - disable local port ntb link (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Disable just local NTB link.
+ *
+ * Return: always zero.
+ */
+static int idt_ntb_link_disable(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	/* Just disable the local NTB link */
+	idt_ntb_local_link_disable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local NTB link disabled");
+
+	return 0;
+}
+
+/*=============================================================================
+ *                         4. Memory Window operations
+ *
+ *    IDT PCIe-switches have two types of memory windows: MWs with direct
+ * address translation and MWs with LUT based translation. The first type of
+ * MWs is simple map of corresponding BAR address space to a memory space
+ * of specified target port. So it implemets just ont-to-one mapping. Lookup
+ * table in its turn can map one BAR address space to up to 24 different
+ * memory spaces of different ports.
+ *    NT-functions BARs can be turned on to implement either direct or lookup
+ * table based address translations, so:
+ * BAR0 - NT configuration registers space/direct address translation
+ * BAR1 - direct address translation/upper address of BAR0x64
+ * BAR2 - direct address translation/Lookup table with either 12 or 24 entries
+ * BAR3 - direct address translation/upper address of BAR2x64
+ * BAR4 - direct address translation/Lookup table with either 12 or 24 entries
+ * BAR5 - direct address translation/upper address of BAR4x64
+ *    Additionally BAR2 and BAR4 can't have 24-entries LUT enabled at the same
+ * time. Since the BARs setup can be rather complicated this driver implements
+ * a scanning algorithm to have all the possible memory windows configuration
+ * covered.
+ *
+ * NOTE 1 BAR setup must be done before Linux kernel enumerated NT-function
+ * of any port, so this driver would have memory windows configurations fixed.
+ * In this way all initializations must be performed either by platform BIOS
+ * or using EEPROM connected to IDT PCIe-switch master SMBus.
+ *
+ * NOTE 2 This driver expects BAR0 mapping NT-function configuration space.
+ * Easy calculation can give us an upper boundary of 29 possible memory windows
+ * per each NT-function if all the BARs are of 32bit type.
+ *=============================================================================
+ */
+
+/*
+ * idt_get_mw_count() - get memory window count
+ * @mw_type:	Memory window type
+ *
+ * Return: number of memory windows with respect to the BAR type
+ */
+static inline unsigned char idt_get_mw_count(enum idt_mw_type mw_type)
+{
+	switch (mw_type) {
+	case IDT_MW_DIR:
+		return 1;
+	case IDT_MW_LUT12:
+		return 12;
+	case IDT_MW_LUT24:
+		return 24;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_get_mw_name() - get memory window name
+ * @mw_type:	Memory window type
+ *
+ * Return: pointer to a string with name
+ */
+static inline char *idt_get_mw_name(enum idt_mw_type mw_type)
+{
+	switch (mw_type) {
+	case IDT_MW_DIR:
+		return "DIR  ";
+	case IDT_MW_LUT12:
+		return "LUT12";
+	case IDT_MW_LUT24:
+		return "LUT24";
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+/*
+ * idt_scan_mws() - scan memory windows of the port
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @port:	Port to get number of memory windows for
+ * @mw_cnt:	Out - number of memory windows
+ *
+ * It walks over BAR setup registers of the specified port and determines
+ * the memory windows parameters if any activated.
+ *
+ * Return: array of memory windows
+ */
+static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port,
+				       unsigned char *mw_cnt)
+{
+	struct idt_mw_cfg mws[IDT_MAX_NR_MWS], *ret_mws;
+	const struct idt_ntb_bar *bars;
+	enum idt_mw_type mw_type;
+	unsigned char widx, bidx, en_cnt;
+	bool bar_64bit = false;
+	int aprt_size;
+	u32 data;
+
+	/* Retrieve the array of the BARs registers */
+	bars = portdata_tbl[port].bars;
+
+	/* Scan all the BARs belonging to the port */
+	*mw_cnt = 0;
+	for (bidx = 0; bidx < IDT_BAR_CNT; bidx += 1 + bar_64bit) {
+		/* Read BARSETUP register value */
+		data = idt_sw_read(ndev, bars[bidx].setup);
+
+		/* Skip disabled BARs */
+		if (!(data & IDT_BARSETUP_EN)) {
+			bar_64bit = false;
+			continue;
+		}
+
+		/* Skip next BARSETUP if current one has 64bit addressing */
+		bar_64bit = IS_FLD_SET(BARSETUP_TYPE, data, 64);
+
+		/* Skip configuration space mapping BARs */
+		if (data & IDT_BARSETUP_MODE_CFG)
+			continue;
+
+		/* Retrieve MW type/entries count and aperture size */
+		mw_type = GET_FIELD(BARSETUP_ATRAN, data);
+		en_cnt = idt_get_mw_count(mw_type);
+		aprt_size = (u64)1 << GET_FIELD(BARSETUP_SIZE, data);
+
+		/* Save configurations of all available memory windows */
+		for (widx = 0; widx < en_cnt; widx++, (*mw_cnt)++) {
+			/*
+			 * IDT can expose a limited number of MWs, so it's bug
+			 * to have more than the driver expects
+			 */
+			if (*mw_cnt >= IDT_MAX_NR_MWS)
+				return ERR_PTR(-EINVAL);
+
+			/* Save basic MW info */
+			mws[*mw_cnt].type = mw_type;
+			mws[*mw_cnt].bar = bidx;
+			mws[*mw_cnt].idx = widx;
+			/* It's always DWORD aligned */
+			mws[*mw_cnt].addr_align = IDT_TRANS_ALIGN;
+			/* DIR and LUT approachs differently configure MWs */
+			if (mw_type == IDT_MW_DIR)
+				mws[*mw_cnt].size_max = aprt_size;
+			else if (mw_type == IDT_MW_LUT12)
+				mws[*mw_cnt].size_max = aprt_size / 16;
+			else
+				mws[*mw_cnt].size_max = aprt_size / 32;
+			mws[*mw_cnt].size_align = (mw_type == IDT_MW_DIR) ?
+				IDT_DIR_SIZE_ALIGN : mws[*mw_cnt].size_max;
+		}
+	}
+
+	/* Allocate memory for memory window descriptors */
+	ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt,
+				sizeof(*ret_mws), GFP_KERNEL);
+	if (IS_ERR_OR_NULL(ret_mws))
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy the info of detected memory windows */
+	memcpy(ret_mws, mws, (*mw_cnt)*sizeof(*ret_mws));
+
+	return ret_mws;
+}
+
+/*
+ * idt_init_mws() - initialize memory windows subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Scan BAR setup registers of local and peer ports to determine the
+ * outbound and inbound memory windows parameters
+ *
+ * Return: zero on success, otherwise a negative error number
+ */
+static int idt_init_mws(struct idt_ntb_dev *ndev)
+{
+	struct idt_ntb_peer *peer;
+	unsigned char pidx;
+
+	/* Scan memory windows of the local port */
+	ndev->mws = idt_scan_mws(ndev, ndev->port, &ndev->mw_cnt);
+	if (IS_ERR(ndev->mws)) {
+		dev_err(&ndev->ntb.pdev->dev,
+			"Failed to scan mws of local port %hhu", ndev->port);
+		return PTR_ERR(ndev->mws);
+	}
+
+	/* Scan memory windows of the peer ports */
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		peer = &ndev->peers[pidx];
+		peer->mws = idt_scan_mws(ndev, peer->port, &peer->mw_cnt);
+		if (IS_ERR(peer->mws)) {
+			dev_err(&ndev->ntb.pdev->dev,
+				"Failed to scan mws of port %hhu", peer->port);
+			return PTR_ERR(peer->mws);
+		}
+	}
+
+	/* Initialize spin locker of the LUT registers */
+	spin_lock_init(&ndev->lut_lock);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Outbound and inbound MWs initialized");
+
+	return 0;
+}
+
+/*
+ * idt_ntb_mw_count() - number of inbound memory windows (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ *
+ * The value is returned for the specified peer, so generally speaking it can
+ * be different for different port depending on the IDT PCIe-switch
+ * initialization.
+ *
+ * Return: the number of memory windows.
+ */
+static int idt_ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	return ndev->peers[pidx].mw_cnt;
+}
+
+/*
+ * idt_ntb_mw_get_align() - inbound memory window parameters (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr_align:	OUT - the base alignment for translating the memory window
+ * @size_align:	OUT - the size alignment for translating the memory window
+ * @size_max:	OUT - the maximum size of the memory window
+ *
+ * The peer memory window parameters have already been determined, so just
+ * return the corresponding values, which mustn't change within session.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static int idt_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx,
+				resource_size_t *addr_align,
+				resource_size_t *size_align,
+				resource_size_t *size_max)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_ntb_peer *peer;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	peer = &ndev->peers[pidx];
+
+	if (widx < 0 || peer->mw_cnt <= widx)
+		return -EINVAL;
+
+	if (addr_align != NULL)
+		*addr_align = peer->mws[widx].addr_align;
+
+	if (size_align != NULL)
+		*size_align = peer->mws[widx].size_align;
+
+	if (size_max != NULL)
+		*size_max = peer->mws[widx].size_max;
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_count() - number of outbound memory windows
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Outbound memory windows parameters have been determined based on the
+ * BAR setup registers value, which are mostly constants within one session.
+ *
+ * Return: the number of memory windows.
+ */
+static int idt_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->mw_cnt;
+}
+
+/*
+ * idt_ntb_peer_mw_get_addr() - get map address of an outbound memory window
+ *				(NTB API callback)
+ * @ntb:	NTB device context.
+ * @widx:	Memory window index (within ntb_peer_mw_count() return value).
+ * @base:	OUT - the base address of mapping region.
+ * @size:	OUT - the size of mapping region.
+ *
+ * Return just parameters of BAR resources mapping. Size reflects just the size
+ * of the resource
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static int idt_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx,
+				    phys_addr_t *base, resource_size_t *size)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	/* Mapping address is just properly shifted BAR resource start */
+	if (base != NULL)
+		*base = pci_resource_start(ntb->pdev, ndev->mws[widx].bar) +
+			ndev->mws[widx].idx * ndev->mws[widx].size_max;
+
+	/* Mapping size has already been calculated at MWs scanning */
+	if (size != NULL)
+		*size = ndev->mws[widx].size_max;
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_set_trans() - set a translation address of a memory window
+ *				 (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device the translation address received from.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of the shared memory to access.
+ * @size:	The size of the shared memory to access.
+ *
+ * The Direct address translation and LUT base translation is initialized a
+ * bit differenet. Although the parameters restriction are now determined by
+ * the same code.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static int idt_ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+				     u64 addr, resource_size_t size)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_mw_cfg *mw_cfg;
+	u32 data = 0, lutoff = 0;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	/*
+	 * Retrieve the memory window config to make sure the passed arguments
+	 * fit it restrictions
+	 */
+	mw_cfg = &ndev->mws[widx];
+	if (!IS_ALIGNED(addr, mw_cfg->addr_align))
+		return -EINVAL;
+	if (!IS_ALIGNED(size, mw_cfg->size_align) || size > mw_cfg->size_max)
+		return -EINVAL;
+
+	/* DIR and LUT based translations are initialized differently */
+	if (mw_cfg->type == IDT_MW_DIR) {
+		const struct idt_ntb_bar *bar = &ntdata_tbl.bars[mw_cfg->bar];
+		u64 limit;
+		/* Set destination partition of translation */
+		data = idt_nt_read(ndev, bar->setup);
+		data = SET_FIELD(BARSETUP_TPART, data, ndev->peers[pidx].part);
+		idt_nt_write(ndev, bar->setup, data);
+		/* Set translation base address */
+		idt_nt_write(ndev, bar->ltbase, (u32)addr);
+		idt_nt_write(ndev, bar->utbase, (u32)(addr >> 32));
+		/* Set the custom BAR aperture limit */
+		limit = pci_resource_start(ntb->pdev, mw_cfg->bar) + size;
+		idt_nt_write(ndev, bar->limit, (u32)limit);
+		if (IS_FLD_SET(BARSETUP_TYPE, data, 64))
+			idt_nt_write(ndev, (bar + 1)->limit, (limit >> 32));
+	} else {
+		unsigned long irqflags;
+		/* Initialize corresponding LUT entry */
+		lutoff = SET_FIELD(LUTOFFSET_INDEX, 0, mw_cfg->idx) |
+			 SET_FIELD(LUTOFFSET_BAR, 0, mw_cfg->bar);
+		data = SET_FIELD(LUTUDATA_PART, 0, ndev->peers[pidx].part) |
+			IDT_LUTUDATA_VALID;
+		spin_lock_irqsave(&ndev->lut_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_LUTOFFSET, lutoff);
+		idt_nt_write(ndev, IDT_NT_LUTLDATA, (u32)addr);
+		idt_nt_write(ndev, IDT_NT_LUTMDATA, (u32)(addr >> 32));
+		idt_nt_write(ndev, IDT_NT_LUTUDATA, data);
+		mmiowb();
+		spin_unlock_irqrestore(&ndev->lut_lock, irqflags);
+		/* Limit address isn't specified since size is fixed for LUT */
+	}
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_clear_trans() - clear the outbound MW translation address
+ *				   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ *
+ * It effectively disables the translation over the specified outbound MW.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static int idt_ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx,
+					int widx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_mw_cfg *mw_cfg;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	mw_cfg = &ndev->mws[widx];
+
+	/* DIR and LUT based translations are initialized differently */
+	if (mw_cfg->type == IDT_MW_DIR) {
+		const struct idt_ntb_bar *bar = &ntdata_tbl.bars[mw_cfg->bar];
+		u32 data;
+		/* Read BARSETUP to check BAR type */
+		data = idt_nt_read(ndev, bar->setup);
+		/* Disable translation by specifying zero BAR limit */
+		idt_nt_write(ndev, bar->limit, 0);
+		if (IS_FLD_SET(BARSETUP_TYPE, data, 64))
+			idt_nt_write(ndev, (bar + 1)->limit, 0);
+	} else {
+		unsigned long irqflags;
+		u32 lutoff;
+		/* Clear the corresponding LUT entry up */
+		lutoff = SET_FIELD(LUTOFFSET_INDEX, 0, mw_cfg->idx) |
+			 SET_FIELD(LUTOFFSET_BAR, 0, mw_cfg->bar);
+		spin_lock_irqsave(&ndev->lut_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_LUTOFFSET, lutoff);
+		idt_nt_write(ndev, IDT_NT_LUTLDATA, 0);
+		idt_nt_write(ndev, IDT_NT_LUTMDATA, 0);
+		idt_nt_write(ndev, IDT_NT_LUTUDATA, 0);
+		mmiowb();
+		spin_unlock_irqrestore(&ndev->lut_lock, irqflags);
+	}
+
+	return 0;
+}
+
+/*=============================================================================
+ *                          5. Doorbell operations
+ *
+ *    Doorbell functionality of IDT PCIe-switches is pretty unusual. First of
+ * all there is global doorbell register which state can by changed by any
+ * NT-function of the IDT device in accordance with global permissions. These
+ * permissions configs are not supported by NTB API, so it must be done by
+ * either BIOS or EEPROM settings. In the same way the state of the global
+ * doorbell is reflected to the NT-functions local inbound doorbell registers.
+ * It can lead to situations when client driver sets some peer doorbell bits
+ * and get them bounced back to local inbound doorbell if permissions are
+ * granted.
+ *    Secondly there is just one IRQ vector for Doorbell, Message, Temperature
+ * and Switch events, so if client driver left any of Doorbell bits set and
+ * some other event occurred, the driver will be notified of Doorbell event
+ * again.
+ *=============================================================================
+ */
+
+/*
+ * idt_db_isr() - doorbell event ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * Doorbell event happans when DBELL bit of NTINTSTS switches from 0 to 1.
+ * It happens only when unmasked doorbell bits are set to ones on completely
+ * zeroed doorbell register.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_db_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	/*
+	 * Doorbell IRQ status will be cleaned only when client
+	 * driver unsets all the doorbell bits.
+	 */
+	dev_dbg(&ndev->ntb.pdev->dev, "DB IRQ detected %#08x", ntint_sts);
+
+	/* Notify the client driver of possible doorbell state change */
+	ntb_db_event(&ndev->ntb, 0);
+}
+
+/*
+ * idt_ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches expose just one Doorbell register of DWORD size.
+ *
+ * Return: A mask of doorbell bits supported by the ntb.
+ */
+static u64 idt_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+	return IDT_DBELL_MASK;
+}
+
+/*
+ * idt_ntb_db_read() - read the local doorbell register (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * There is just on inbound doorbell register of each NT-function, so
+ * this method return it value.
+ *
+ * Return: The bits currently set in the local doorbell register.
+ */
+static u64 idt_ntb_db_read(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_INDBELLSTS);
+}
+
+/*
+ * idt_ntb_db_clear() - clear bits in the local doorbell register
+ *			(NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * Clear bits of inbound doorbell register by writing ones to it.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_nt_write(ndev, IDT_NT_INDBELLSTS, (u32)db_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_db_read_mask() - read the local doorbell mask (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Each inbound doorbell bit can be masked from generating IRQ by setting
+ * the corresponding bit in inbound doorbell mask. So this method returns
+ * the value of the register.
+ *
+ * Return: The bits currently set in the local doorbell mask register.
+ */
+static u64 idt_ntb_db_read_mask(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_INDBELLMSK);
+}
+
+/*
+ * idt_ntb_db_set_mask() - set bits in the local doorbell mask
+ *			   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell mask bits to set.
+ *
+ * The inbound doorbell register mask value must be read, then OR'ed with
+ * passed field and only then set back.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_reg_set_bits(ndev, IDT_NT_INDBELLMSK, &ndev->db_mask_lock,
+				IDT_DBELL_MASK, db_bits);
+}
+
+/*
+ * idt_ntb_db_clear_mask() - clear bits in the local doorbell mask
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * The method just clears the set bits up in accordance with the passed
+ * bitfield. IDT PCIe-switch shall generate an interrupt if there hasn't
+ * been any unmasked bit set before current unmasking. Otherwise IRQ won't
+ * be generated since there is only one IRQ vector for all doorbells.
+ *
+ * Return: always zero as success
+ */
+static int idt_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_reg_clear_bits(ndev, IDT_NT_INDBELLMSK, &ndev->db_mask_lock,
+			   db_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_db_set() - set bits in the peer doorbell register
+ *			   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to set.
+ *
+ * IDT PCIe-switches exposes local outbound doorbell register to change peer
+ * inbound doorbell register state.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (db_bits & ~(u64)IDT_DBELL_MASK)
+		return -EINVAL;
+
+	idt_nt_write(ndev, IDT_NT_OUTDBELLSET, (u32)db_bits);
+	return 0;
+}
+
+/*=============================================================================
+ *                          6. Messaging operations
+ *
+ *    Each NT-function of IDT PCIe-switch has four inbound and four outbound
+ * message registers. Each outbound message register can be connected to one or
+ * even more than one peer inbound message registers by setting global
+ * configurations. Since NTB API permits one-on-one message registers mapping
+ * only, the driver acts in according with that restriction.
+ *=============================================================================
+ */
+
+/*
+ * idt_init_msg() - initialize messaging interface
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just initialize the message registers routing tables locker.
+ */
+static void idt_init_msg(struct idt_ntb_dev *ndev)
+{
+	unsigned char midx;
+
+	/* Init the messages routing table lockers */
+	for (midx = 0; midx < IDT_MSG_CNT; midx++)
+		spin_lock_init(&ndev->msg_locks[midx]);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB Messaging initialized");
+}
+
+/*
+ * idt_msg_isr() - message event ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * Message event happens when MSG bit of NTINTSTS switches from 0 to 1.
+ * It happens only when unmasked message status bits are set to ones on
+ * completely zeroed message status register.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_msg_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	/*
+	 * Message IRQ status will be cleaned only when client
+	 * driver unsets all the message status bits.
+	 */
+	dev_dbg(&ndev->ntb.pdev->dev, "Message IRQ detected %#08x", ntint_sts);
+
+	/* Notify the client driver of possible message status change */
+	ntb_msg_event(&ndev->ntb);
+}
+
+/*
+ * idt_ntb_msg_count() - get the number of message registers (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches support four message registers.
+ *
+ * Return: the number of message registers.
+ */
+static int idt_ntb_msg_count(struct ntb_dev *ntb)
+{
+	return IDT_MSG_CNT;
+}
+
+/*
+ * idt_ntb_msg_inbits() - get a bitfield of inbound message registers status
+ *			  (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * NT message status register is shared between inbound and outbound message
+ * registers status
+ *
+ * Return: bitfield of inbound message registers.
+ */
+static u64 idt_ntb_msg_inbits(struct ntb_dev *ntb)
+{
+	return (u64)IDT_INMSG_MASK;
+}
+
+/*
+ * idt_ntb_msg_outbits() - get a bitfield of outbound message registers status
+ *			  (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * NT message status register is shared between inbound and outbound message
+ * registers status
+ *
+ * Return: bitfield of outbound message registers.
+ */
+static u64 idt_ntb_msg_outbits(struct ntb_dev *ntb)
+{
+	return (u64)IDT_OUTMSG_MASK;
+}
+
+/*
+ * idt_ntb_msg_read_sts() - read the message registers status (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches expose message status registers to notify drivers of
+ * incoming data and failures in case if peer message register isn't freed.
+ *
+ * Return: status bits of message registers
+ */
+static u64 idt_ntb_msg_read_sts(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_MSGSTS);
+}
+
+/*
+ * idt_ntb_msg_clear_sts() - clear status bits of message registers
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ * @sts_bits:	Status bits to clear.
+ *
+ * Clear bits in the status register by writing ones.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_nt_write(ndev, IDT_NT_MSGSTS, sts_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_msg_set_mask() - set mask of message register status bits
+ *			    (NTB API callback)
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Mask the message status bits from raising an IRQ.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_reg_set_bits(ndev, IDT_NT_MSGSTSMSK, &ndev->msg_mask_lock,
+				IDT_MSG_MASK, mask_bits);
+}
+
+/*
+ * idt_ntb_msg_clear_mask() - clear message registers mask
+ *			      (NTB API callback)
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Clear mask of message status bits IRQs.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_reg_clear_bits(ndev, IDT_NT_MSGSTSMSK, &ndev->msg_mask_lock,
+			   mask_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_msg_read() - read message register with specified index
+ *			(NTB API callback)
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	OUT - Port index of peer device a message retrieved from
+ * @msg:	OUT - Data
+ *
+ * Read data from the specified message register and source register.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (midx < 0 || IDT_MSG_CNT <= midx)
+		return -EINVAL;
+
+	/* Retrieve source port index of the message */
+	if (pidx != NULL) {
+		u32 srcpart;
+
+		srcpart = idt_nt_read(ndev, ntdata_tbl.msgs[midx].src);
+		*pidx = ndev->part_idx_map[srcpart];
+
+		/* Sanity check partition index (for initial case) */
+		if (*pidx == -EINVAL)
+			*pidx = 0;
+	}
+
+	/* Retrieve data of the corresponding message register */
+	if (msg != NULL)
+		*msg = idt_nt_read(ndev, ntdata_tbl.msgs[midx].in);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_msg_write() - write data to the specified message register
+ *			 (NTB API callback)
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	Port index of peer device a message being sent to
+ * @msg:	Data to send
+ *
+ * Just try to send data to a peer. Message status register should be
+ * checked by client driver.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx, u32 msg)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	unsigned long irqflags;
+	u32 swpmsgctl = 0;
+
+	if (midx < 0 || IDT_MSG_CNT <= midx)
+		return -EINVAL;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	/* Collect the routing information */
+	swpmsgctl = SET_FIELD(SWPxMSGCTL_REG, 0, midx) |
+		    SET_FIELD(SWPxMSGCTL_PART, 0, ndev->peers[pidx].part);
+
+	/* Lock the messages routing table of the specified register */
+	spin_lock_irqsave(&ndev->msg_locks[midx], irqflags);
+	/* Set the route and send the data */
+	idt_sw_write(ndev, partdata_tbl[ndev->part].msgctl[midx], swpmsgctl);
+	idt_nt_write(ndev, ntdata_tbl.msgs[midx].out, msg);
+	mmiowb();
+	/* Unlock the messages routing table */
+	spin_unlock_irqrestore(&ndev->msg_locks[midx], irqflags);
+
+	/* Client driver shall check the status register */
+	return 0;
+}
+
+/*=============================================================================
+ *                      7. Temperature sensor operations
+ *
+ *    IDT PCIe-switch has an embedded temperature sensor, which can be used to
+ * warn a user-space of possible chip overheating. Since workload temperature
+ * can be different on different platforms, temperature thresholds as well as
+ * general sensor settings must be setup in the framework of BIOS/EEPROM
+ * initializations. It includes the actual sensor enabling as well.
+ *=============================================================================
+ */
+
+/*
+ * idt_read_temp() - read temperature from chip sensor
+ * @ntb:	NTB device context.
+ * @val:	OUT - integer value of temperature
+ * @frac:	OUT - fraction
+ */
+static void idt_read_temp(struct idt_ntb_dev *ndev, unsigned char *val,
+			  unsigned char *frac)
+{
+	u32 data;
+
+	/* Read the data from TEMP field of the TMPSTS register */
+	data = idt_sw_read(ndev, IDT_SW_TMPSTS);
+	data = GET_FIELD(TMPSTS_TEMP, data);
+	/* TEMP field has one fractional bit and seven integer bits */
+	*val = data >> 1;
+	*frac = ((data & 0x1) ? 5 : 0);
+}
+
+/*
+ * idt_temp_isr() - temperature sensor alarm events ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * It handles events of temperature crossing alarm thresholds. Since reading
+ * of TMPALARM register clears it up, the function doesn't analyze the
+ * read value, instead the current temperature value just warningly printed to
+ * log.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_temp_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	unsigned char val, frac;
+
+	/* Read the current temperature value */
+	idt_read_temp(ndev, &val, &frac);
+
+	/* Read the temperature alarm to clean the alarm status out */
+	/*(void)idt_sw_read(ndev, IDT_SW_TMPALARM);*/
+
+	/* Clean the corresponding interrupt bit */
+	idt_nt_write(ndev, IDT_NT_NTINTSTS, IDT_NTINTSTS_TMPSENSOR);
+
+	dev_dbg(&ndev->ntb.pdev->dev,
+		"Temp sensor IRQ detected %#08x", ntint_sts);
+
+	/* Print temperature value to log */
+	dev_warn(&ndev->ntb.pdev->dev, "Temperature %hhu.%hhu", val, frac);
+}
+
+/*=============================================================================
+ *                           8. ISRs related operations
+ *
+ *    IDT PCIe-switch has strangely developed IRQ system. There is just one
+ * interrupt vector for doorbell and message registers. So the hardware driver
+ * can't determine actual source of IRQ if, for example, message event happened
+ * while any of unmasked doorbell is still set. The similar situation may be if
+ * switch or temperature sensor events pop up. The difference is that SEVENT
+ * and TMPSENSOR bits of NT interrupt status register can be cleaned by
+ * IRQ handler so a next interrupt request won't have false handling of
+ * corresponding events.
+ *    The hardware driver has only bottom-half handler of the IRQ, since if any
+ * of events happened the device won't raise it again before the last one is
+ * handled by clearing of corresponding NTINTSTS bit.
+ *=============================================================================
+ */
+
+static irqreturn_t idt_thread_isr(int irq, void *devid);
+
+/*
+ * idt_init_isr() - initialize PCIe interrupt handler
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_init_isr(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	u32 ntint_mask;
+	int ret;
+
+	/* Allocate just one interrupt vector for the ISR */
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+	if (ret != 1) {
+		dev_err(&pdev->dev, "Failed to allocate IRQ vector");
+		return ret;
+	}
+
+	/* Retrieve the IRQ vector */
+	ret = pci_irq_vector(pdev, 0);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to get IRQ vector");
+		goto err_free_vectors;
+	}
+
+	/* Set the IRQ handler */
+	ret = devm_request_threaded_irq(&pdev->dev, ret, NULL, idt_thread_isr,
+					IRQF_ONESHOT, NTB_IRQNAME, ndev);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to set MSI IRQ handler, %d", ret);
+		goto err_free_vectors;
+	}
+
+	/* Unmask Message/Doorbell/SE/Temperature interrupts */
+	ntint_mask = idt_nt_read(ndev, IDT_NT_NTINTMSK) & ~IDT_NTINTMSK_ALL;
+	idt_nt_write(ndev, IDT_NT_NTINTMSK, ntint_mask);
+
+	/* From now on the interrupts are enabled */
+	dev_dbg(&pdev->dev, "NTB interrupts initialized");
+
+	return 0;
+
+err_free_vectors:
+	pci_free_irq_vectors(pdev);
+
+	return ret;
+}
+
+
+/*
+ * idt_deinit_ist() - deinitialize PCIe interrupt handler
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Disable corresponding interrupts and free allocated IRQ vectors.
+ */
+static void idt_deinit_isr(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	u32 ntint_mask;
+
+	/* Mask interrupts back */
+	ntint_mask = idt_nt_read(ndev, IDT_NT_NTINTMSK) | IDT_NTINTMSK_ALL;
+	idt_nt_write(ndev, IDT_NT_NTINTMSK, ntint_mask);
+
+	/* Manually free IRQ otherwise PCI free irq vectors will fail */
+	devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 0), ndev);
+
+	/* Free allocated IRQ vectors */
+	pci_free_irq_vectors(pdev);
+
+	dev_dbg(&pdev->dev, "NTB interrupts deinitialized");
+}
+
+/*
+ * idt_thread_isr() - NT function interrupts handler
+ * @irq:	IRQ number
+ * @devid:	Custom buffer
+ *
+ * It reads current NT interrupts state register and handles all the event
+ * it declares.
+ * The method is bottom-half routine of actual default PCIe IRQ handler.
+ */
+static irqreturn_t idt_thread_isr(int irq, void *devid)
+{
+	struct idt_ntb_dev *ndev = devid;
+	bool handled = false;
+	u32 ntint_sts;
+
+	/* Read the NT interrupts status register */
+	ntint_sts = idt_nt_read(ndev, IDT_NT_NTINTSTS);
+
+	/* Handle messaging interrupts */
+	if (ntint_sts & IDT_NTINTSTS_MSG) {
+		idt_msg_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle doorbell interrupts */
+	if (ntint_sts & IDT_NTINTSTS_DBELL) {
+		idt_db_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle switch event interrupts */
+	if (ntint_sts & IDT_NTINTSTS_SEVENT) {
+		idt_se_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle temperature sensor interrupt */
+	if (ntint_sts & IDT_NTINTSTS_TMPSENSOR) {
+		idt_temp_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "IDT IRQs 0x%08x handled", ntint_sts);
+
+	return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*===========================================================================
+ *                     9. NTB hardware driver initialization
+ *===========================================================================
+ */
+
+/*
+ * NTB API operations
+ */
+static const struct ntb_dev_ops idt_ntb_ops = {
+	.port_number		= idt_ntb_port_number,
+	.peer_port_count	= idt_ntb_peer_port_count,
+	.peer_port_number	= idt_ntb_peer_port_number,
+	.peer_port_idx		= idt_ntb_peer_port_idx,
+	.link_is_up		= idt_ntb_link_is_up,
+	.link_enable		= idt_ntb_link_enable,
+	.link_disable		= idt_ntb_link_disable,
+	.mw_count		= idt_ntb_mw_count,
+	.mw_get_align		= idt_ntb_mw_get_align,
+	.peer_mw_count		= idt_ntb_peer_mw_count,
+	.peer_mw_get_addr	= idt_ntb_peer_mw_get_addr,
+	.peer_mw_set_trans	= idt_ntb_peer_mw_set_trans,
+	.peer_mw_clear_trans	= idt_ntb_peer_mw_clear_trans,
+	.db_valid_mask		= idt_ntb_db_valid_mask,
+	.db_read		= idt_ntb_db_read,
+	.db_clear		= idt_ntb_db_clear,
+	.db_read_mask		= idt_ntb_db_read_mask,
+	.db_set_mask		= idt_ntb_db_set_mask,
+	.db_clear_mask		= idt_ntb_db_clear_mask,
+	.peer_db_set		= idt_ntb_peer_db_set,
+	.msg_count		= idt_ntb_msg_count,
+	.msg_inbits		= idt_ntb_msg_inbits,
+	.msg_outbits		= idt_ntb_msg_outbits,
+	.msg_read_sts		= idt_ntb_msg_read_sts,
+	.msg_clear_sts		= idt_ntb_msg_clear_sts,
+	.msg_set_mask		= idt_ntb_msg_set_mask,
+	.msg_clear_mask		= idt_ntb_msg_clear_mask,
+	.msg_read		= idt_ntb_msg_read,
+	.msg_write		= idt_ntb_msg_write
+};
+
+/*
+ * idt_register_device() - register IDT NTB device
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_register_device(struct idt_ntb_dev *ndev)
+{
+	int ret;
+
+	/* Initialize the rest of NTB device structure and register it */
+	ndev->ntb.ops = &idt_ntb_ops;
+	ndev->ntb.topo = NTB_TOPO_PRI;
+
+	ret = ntb_register_device(&ndev->ntb);
+	if (ret != 0) {
+		dev_err(&ndev->ntb.pdev->dev, "Failed to register NTB device");
+		return ret;
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device successfully registered");
+
+	return 0;
+}
+
+/*
+ * idt_unregister_device() - unregister IDT NTB device
+ * @ndev:	IDT NTB hardware driver descriptor
+ */
+static void idt_unregister_device(struct idt_ntb_dev *ndev)
+{
+	/* Just unregister the NTB device */
+	ntb_unregister_device(&ndev->ntb);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device unregistered");
+}
+
+/*=============================================================================
+ *                        10. DebugFS node initialization
+ *=============================================================================
+ */
+
+static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
+				   size_t count, loff_t *offp);
+
+/*
+ * Driver DebugFS info file operations
+ */
+static const struct file_operations idt_dbgfs_info_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = idt_dbgfs_info_read
+};
+
+/*
+ * idt_dbgfs_info_read() - DebugFS read info node callback
+ * @file:	File node descriptor.
+ * @ubuf:	User-space buffer to put data to
+ * @count:	Size of the buffer
+ * @offp:	Offset within the buffer
+ */
+static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
+				   size_t count, loff_t *offp)
+{
+	struct idt_ntb_dev *ndev = filp->private_data;
+	unsigned char temp, frac, idx, pidx, cnt;
+	ssize_t ret = 0, off = 0;
+	unsigned long irqflags;
+	enum ntb_speed speed;
+	enum ntb_width width;
+	char *strbuf;
+	size_t size;
+	u32 data;
+
+	/* Lets limit the buffer size the way the Intel/AMD drivers do */
+	size = min_t(size_t, count, 0x1000U);
+
+	/* Allocate the memory for the buffer */
+	strbuf = kmalloc(size, GFP_KERNEL);
+	if (strbuf == NULL)
+		return -ENOMEM;
+
+	/* Put the data into the string buffer */
+	off += scnprintf(strbuf + off, size - off,
+		"\n\t\tIDT NTB device Information:\n\n");
+
+	/* General local device configurations */
+	off += scnprintf(strbuf + off, size - off,
+		"Local Port %hhu, Partition %hhu\n", ndev->port, ndev->part);
+
+	/* Peer ports information */
+	off += scnprintf(strbuf + off, size - off, "Peers:\n");
+	for (idx = 0; idx < ndev->peer_cnt; idx++) {
+		off += scnprintf(strbuf + off, size - off,
+			"\t%hhu. Port %hhu, Partition %hhu\n",
+			idx, ndev->peers[idx].port, ndev->peers[idx].part);
+	}
+
+	/* Links status */
+	data = idt_ntb_link_is_up(&ndev->ntb, &speed, &width);
+	off += scnprintf(strbuf + off, size - off,
+		"NTB link status\t- 0x%08x, ", data);
+	off += scnprintf(strbuf + off, size - off, "PCIe Gen %d x%d lanes\n",
+		speed, width);
+
+	/* Mapping table entries */
+	off += scnprintf(strbuf + off, size - off, "NTB Mapping Table:\n");
+	for (idx = 0; idx < IDT_MTBL_ENTRY_CNT; idx++) {
+		spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_NTMTBLADDR, idx);
+		data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+		spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+		/* Print valid entries only */
+		if (data & IDT_NTMTBLDATA_VALID) {
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu. Partition %d, Requester ID 0x%04x\n",
+				idx, GET_FIELD(NTMTBLDATA_PART, data),
+				GET_FIELD(NTMTBLDATA_REQID, data));
+		}
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Outbound memory windows information */
+	off += scnprintf(strbuf + off, size - off,
+		"Outbound Memory Windows:\n");
+	for (idx = 0; idx < ndev->mw_cnt; idx += cnt) {
+		data = ndev->mws[idx].type;
+		cnt = idt_get_mw_count(data);
+
+		/* Print Memory Window information */
+		if (data == IDT_MW_DIR)
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu.\t", idx);
+		else
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu-%hhu.\t", idx, idx + cnt - 1);
+
+		off += scnprintf(strbuf + off, size - off, "%s BAR%hhu, ",
+			idt_get_mw_name(data), ndev->mws[idx].bar);
+
+		off += scnprintf(strbuf + off, size - off,
+			"Address align 0x%08llx, ", ndev->mws[idx].addr_align);
+
+		off += scnprintf(strbuf + off, size - off,
+			"Size align 0x%08llx, Size max %llu\n",
+			ndev->mws[idx].size_align, ndev->mws[idx].size_max);
+	}
+
+	/* Inbound memory windows information */
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		off += scnprintf(strbuf + off, size - off,
+			"Inbound Memory Windows for peer %hhu (Port %hhu):\n",
+			pidx, ndev->peers[pidx].port);
+
+		/* Print Memory Windows information */
+		for (idx = 0; idx < ndev->peers[pidx].mw_cnt; idx += cnt) {
+			data = ndev->peers[pidx].mws[idx].type;
+			cnt = idt_get_mw_count(data);
+
+			if (data == IDT_MW_DIR)
+				off += scnprintf(strbuf + off, size - off,
+					"\t%hhu.\t", idx);
+			else
+				off += scnprintf(strbuf + off, size - off,
+					"\t%hhu-%hhu.\t", idx, idx + cnt - 1);
+
+			off += scnprintf(strbuf + off, size - off,
+				"%s BAR%hhu, ", idt_get_mw_name(data),
+				ndev->peers[pidx].mws[idx].bar);
+
+			off += scnprintf(strbuf + off, size - off,
+				"Address align 0x%08llx, ",
+				ndev->peers[pidx].mws[idx].addr_align);
+
+			off += scnprintf(strbuf + off, size - off,
+				"Size align 0x%08llx, Size max %llu\n",
+				ndev->peers[pidx].mws[idx].size_align,
+				ndev->peers[pidx].mws[idx].size_max);
+		}
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Doorbell information */
+	data = idt_sw_read(ndev, IDT_SW_GDBELLSTS);
+	off += scnprintf(strbuf + off, size - off,
+		 "Global Doorbell state\t- 0x%08x\n", data);
+	data = idt_ntb_db_read(&ndev->ntb);
+	off += scnprintf(strbuf + off, size - off,
+		 "Local  Doorbell state\t- 0x%08x\n", data);
+	data = idt_nt_read(ndev, IDT_NT_INDBELLMSK);
+	off += scnprintf(strbuf + off, size - off,
+		 "Local  Doorbell mask\t- 0x%08x\n", data);
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Messaging information */
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event valid\t- 0x%08x\n", IDT_MSG_MASK);
+	data = idt_ntb_msg_read_sts(&ndev->ntb);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event status\t- 0x%08x\n", data);
+	data = idt_nt_read(ndev, IDT_NT_MSGSTSMSK);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event mask\t- 0x%08x\n", data);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message data:\n");
+	for (idx = 0; idx < IDT_MSG_CNT; idx++) {
+		int src;
+		(void)idt_ntb_msg_read(&ndev->ntb, idx, &src, &data);
+		off += scnprintf(strbuf + off, size - off,
+			"\t%hhu. 0x%08x from peer %hhu (Port %hhu)\n",
+			idx, data, src, ndev->peers[src].port);
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Current temperature */
+	idt_read_temp(ndev, &temp, &frac);
+	off += scnprintf(strbuf + off, size - off,
+		"Switch temperature\t\t- %hhu.%hhuC\n", temp, frac);
+
+	/* Copy the buffer to the User Space */
+	ret = simple_read_from_buffer(ubuf, count, offp, strbuf, off);
+	kfree(strbuf);
+
+	return ret;
+}
+
+/*
+ * idt_init_dbgfs() - initialize DebugFS node
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_init_dbgfs(struct idt_ntb_dev *ndev)
+{
+	char devname[64];
+
+	/* If the top directory is not created then do nothing */
+	if (IS_ERR_OR_NULL(dbgfs_topdir)) {
+		dev_info(&ndev->ntb.pdev->dev, "Top DebugFS directory absent");
+		return PTR_ERR(dbgfs_topdir);
+	}
+
+	/* Create the info file node */
+	snprintf(devname, 64, "info:%s", pci_name(ndev->ntb.pdev));
+	ndev->dbgfs_info = debugfs_create_file(devname, 0400, dbgfs_topdir,
+		ndev, &idt_dbgfs_info_ops);
+	if (IS_ERR(ndev->dbgfs_info)) {
+		dev_dbg(&ndev->ntb.pdev->dev, "Failed to create DebugFS node");
+		return PTR_ERR(ndev->dbgfs_info);
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device DebugFS node created");
+
+	return 0;
+}
+
+/*
+ * idt_deinit_dbgfs() - deinitialize DebugFS node
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just discard the info node from DebugFS
+ */
+static void idt_deinit_dbgfs(struct idt_ntb_dev *ndev)
+{
+	debugfs_remove(ndev->dbgfs_info);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device DebugFS node discarded");
+}
+
+/*=============================================================================
+ *                     11. Basic PCIe device initialization
+ *=============================================================================
+ */
+
+/*
+ * idt_check_setup() - Check whether the IDT PCIe-swtich is properly
+ *		       pre-initialized
+ * @pdev:	Pointer to the PCI device descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_check_setup(struct pci_dev *pdev)
+{
+	u32 data;
+	int ret;
+
+	/* Read the BARSETUP0 */
+	ret = pci_read_config_dword(pdev, IDT_NT_BARSETUP0, &data);
+	if (ret != 0) {
+		dev_err(&pdev->dev,
+			"Failed to read BARSETUP0 config register");
+		return ret;
+	}
+
+	/* Check whether the BAR0 register is enabled to be of config space */
+	if (!(data & IDT_BARSETUP_EN) || !(data & IDT_BARSETUP_MODE_CFG)) {
+		dev_err(&pdev->dev, "BAR0 doesn't map config space");
+		return -EINVAL;
+	}
+
+	/* Configuration space BAR0 must have certain size */
+	if ((data & IDT_BARSETUP_SIZE_MASK) != IDT_BARSETUP_SIZE_CFG) {
+		dev_err(&pdev->dev, "Invalid size of config space");
+		return -EINVAL;
+	}
+
+	dev_dbg(&pdev->dev, "NTB device pre-initialized correctly");
+
+	return 0;
+}
+
+/*
+ * Create the IDT PCIe-switch driver descriptor
+ * @pdev:	Pointer to the PCI device descriptor
+ * @id:		IDT PCIe-device configuration
+ *
+ * It just allocates a memory for IDT PCIe-switch device structure and
+ * initializes some commonly used fields.
+ *
+ * No need of release method, since managed device resource is used for
+ * memory allocation.
+ *
+ * Return: pointer to the descriptor, otherwise a negative error number.
+ */
+static struct idt_ntb_dev *idt_create_dev(struct pci_dev *pdev,
+					  const struct pci_device_id *id)
+{
+	struct idt_ntb_dev *ndev;
+
+	/* Allocate memory for the IDT PCIe-device descriptor */
+	ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
+	if (IS_ERR_OR_NULL(ndev)) {
+		dev_err(&pdev->dev, "Memory allocation failed for descriptor");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Save the IDT PCIe-switch ports configuration */
+	ndev->swcfg = (struct idt_89hpes_cfg *)id->driver_data;
+	/* Save the PCI-device pointer inside the NTB device structure */
+	ndev->ntb.pdev = pdev;
+
+	/* Initialize spin locker of Doorbell, Message and GASA registers */
+	spin_lock_init(&ndev->db_mask_lock);
+	spin_lock_init(&ndev->msg_mask_lock);
+	spin_lock_init(&ndev->gasa_lock);
+
+	dev_info(&pdev->dev, "IDT %s discovered", ndev->swcfg->name);
+
+	dev_dbg(&pdev->dev, "NTB device descriptor created");
+
+	return ndev;
+}
+
+/*
+ * idt_init_pci() - initialize the basic PCI-related subsystem
+ * @ndev:	Pointer to the IDT PCIe-switch driver descriptor
+ *
+ * Managed device resources will be freed automatically in case of failure or
+ * driver detachment.
+ *
+ * Return: zero on success, otherwise negative error number.
+ */
+static int idt_init_pci(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	int ret;
+
+	/* Initialize the bit mask of DMA */
+	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret != 0) {
+		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (ret != 0) {
+			dev_err(&pdev->dev, "Failed to set DMA bit mask\n");
+			return ret;
+		}
+		dev_warn(&pdev->dev, "Cannot set DMA highmem bit mask\n");
+	}
+	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret != 0) {
+		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (ret != 0) {
+			dev_err(&pdev->dev,
+				"Failed to set consistent DMA bit mask\n");
+			return ret;
+		}
+		dev_warn(&pdev->dev,
+			"Cannot set consistent DMA highmem bit mask\n");
+	}
+
+	/*
+	 * Enable the device advanced error reporting. It's not critical to
+	 * have AER disabled in the kernel.
+	 */
+	ret = pci_enable_pcie_error_reporting(pdev);
+	if (ret != 0)
+		dev_warn(&pdev->dev, "PCIe AER capability disabled\n");
+	else /* Cleanup uncorrectable error status before getting to init */
+		pci_cleanup_aer_uncorrect_error_status(pdev);
+
+	/* First enable the PCI device */
+	ret = pcim_enable_device(pdev);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to enable PCIe device\n");
+		goto err_disable_aer;
+	}
+
+	/*
+	 * Enable the bus mastering, which effectively enables MSI IRQs and
+	 * Request TLPs translation
+	 */
+	pci_set_master(pdev);
+
+	/* Request all BARs resources and map BAR0 only */
+	ret = pcim_iomap_regions_request_all(pdev, 1, NTB_NAME);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request resources\n");
+		goto err_clear_master;
+	}
+
+	/* Retrieve virtual address of BAR0 - PCI configuration space */
+	ndev->cfgspc = pcim_iomap_table(pdev)[0];
+
+	/* Put the IDT driver data pointer to the PCI-device private pointer */
+	pci_set_drvdata(pdev, ndev);
+
+	dev_dbg(&pdev->dev, "NT-function PCIe interface initialized");
+
+	return 0;
+
+err_clear_master:
+	pci_clear_master(pdev);
+err_disable_aer:
+	(void)pci_disable_pcie_error_reporting(pdev);
+
+	return ret;
+}
+
+/*
+ * idt_deinit_pci() - deinitialize the basic PCI-related subsystem
+ * @ndev:	Pointer to the IDT PCIe-switch driver descriptor
+ *
+ * Managed resources will be freed on the driver detachment
+ */
+static void idt_deinit_pci(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+
+	/* Clean up the PCI-device private data pointer */
+	pci_set_drvdata(pdev, NULL);
+
+	/* Clear the bus master disabling the Request TLPs translation */
+	pci_clear_master(pdev);
+
+	/* Disable the AER capability */
+	(void)pci_disable_pcie_error_reporting(pdev);
+
+	dev_dbg(&pdev->dev, "NT-function PCIe interface cleared");
+}
+
+/*===========================================================================
+ *                       12. PCI bus callback functions
+ *===========================================================================
+ */
+
+/*
+ * idt_pci_probe() - PCI device probe callback
+ * @pdev:	Pointer to PCI device structure
+ * @id:		PCIe device custom descriptor
+ *
+ * Return: zero on success, otherwise negative error number
+ */
+static int idt_pci_probe(struct pci_dev *pdev,
+			 const struct pci_device_id *id)
+{
+	struct idt_ntb_dev *ndev;
+	int ret;
+
+	/* Check whether IDT PCIe-switch is properly pre-initialized */
+	ret = idt_check_setup(pdev);
+	if (ret != 0)
+		return ret;
+
+	/* Allocate the memory for IDT NTB device data */
+	ndev = idt_create_dev(pdev, id);
+	if (IS_ERR_OR_NULL(ndev))
+		return PTR_ERR(ndev);
+
+	/* Initialize the basic PCI subsystem of the device */
+	ret = idt_init_pci(ndev);
+	if (ret != 0)
+		return ret;
+
+	/* Scan ports of the IDT PCIe-switch */
+	(void)idt_scan_ports(ndev);
+
+	/* Initialize NTB link events subsystem */
+	idt_init_link(ndev);
+
+	/* Initialize MWs subsystem */
+	ret = idt_init_mws(ndev);
+	if (ret != 0)
+		goto err_deinit_link;
+
+	/* Initialize Messaging subsystem */
+	idt_init_msg(ndev);
+
+	/* Initialize IDT interrupts handler */
+	ret = idt_init_isr(ndev);
+	if (ret != 0)
+		goto err_deinit_link;
+
+	/* Register IDT NTB devices on the NTB bus */
+	ret = idt_register_device(ndev);
+	if (ret != 0)
+		goto err_deinit_isr;
+
+	/* Initialize DebugFS info node */
+	(void)idt_init_dbgfs(ndev);
+
+	/* IDT PCIe-switch NTB driver is finally initialized */
+	dev_info(&pdev->dev, "IDT NTB device is ready");
+
+	/* May the force be with us... */
+	return 0;
+
+err_deinit_isr:
+	idt_deinit_isr(ndev);
+err_deinit_link:
+	idt_deinit_link(ndev);
+	idt_deinit_pci(ndev);
+
+	return ret;
+}
+
+/*
+ * idt_pci_probe() - PCI device remove callback
+ * @pdev:	Pointer to PCI device structure
+ */
+static void idt_pci_remove(struct pci_dev *pdev)
+{
+	struct idt_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+	/* Deinit the DebugFS node */
+	idt_deinit_dbgfs(ndev);
+
+	/* Unregister NTB device */
+	idt_unregister_device(ndev);
+
+	/* Stop the interrupts handling */
+	idt_deinit_isr(ndev);
+
+	/* Deinitialize link event subsystem */
+	idt_deinit_link(ndev);
+
+	/* Deinit basic PCI subsystem */
+	idt_deinit_pci(ndev);
+
+	/* IDT PCIe-switch NTB driver is finally initialized */
+	dev_info(&pdev->dev, "IDT NTB device is removed");
+
+	/* Sayonara... */
+}
+
+/*
+ * IDT PCIe-switch models ports configuration structures
+ */
+static struct idt_89hpes_cfg idt_89hpes24nt6ag2_config = {
+	.name = "89HPES24NT6AG2",
+	.port_cnt = 6, .ports = {0, 2, 4, 6, 8, 12}
+};
+static struct idt_89hpes_cfg idt_89hpes32nt8ag2_config = {
+	.name = "89HPES32NT8AG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static struct idt_89hpes_cfg idt_89hpes32nt8bg2_config = {
+	.name = "89HPES32NT8BG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static struct idt_89hpes_cfg idt_89hpes12nt12g2_config = {
+	.name = "89HPES12NT12G2",
+	.port_cnt = 3, .ports = {0, 8, 16}
+};
+static struct idt_89hpes_cfg idt_89hpes16nt16g2_config = {
+	.name = "89HPES16NT16G2",
+	.port_cnt = 4, .ports = {0, 8, 12, 16}
+};
+static struct idt_89hpes_cfg idt_89hpes24nt24g2_config = {
+	.name = "89HPES24NT24G2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static struct idt_89hpes_cfg idt_89hpes32nt24ag2_config = {
+	.name = "89HPES32NT24AG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static struct idt_89hpes_cfg idt_89hpes32nt24bg2_config = {
+	.name = "89HPES32NT24BG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+
+/*
+ * PCI-ids table of the supported IDT PCIe-switch devices
+ */
+static const struct pci_device_id idt_pci_tbl[] = {
+	{IDT_PCI_DEVICE_IDS(89HPES24NT6AG2,  idt_89hpes24nt6ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT8AG2,  idt_89hpes32nt8ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT8BG2,  idt_89hpes32nt8bg2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES12NT12G2,  idt_89hpes12nt12g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES16NT16G2,  idt_89hpes16nt16g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES24NT24G2,  idt_89hpes24nt24g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT24AG2, idt_89hpes32nt24ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT24BG2, idt_89hpes32nt24bg2_config)},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, idt_pci_tbl);
+
+/*
+ * IDT PCIe-switch NT-function device driver structure definition
+ */
+static struct pci_driver idt_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.probe		= idt_pci_probe,
+	.remove		= idt_pci_remove,
+	.id_table	= idt_pci_tbl,
+};
+
+static int __init idt_pci_driver_init(void)
+{
+	pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+	/* Create the top DebugFS directory if the FS is initialized */
+	if (debugfs_initialized())
+		dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	/* Register the NTB hardware driver to handle the PCI device */
+	return pci_register_driver(&idt_pci_driver);
+}
+module_init(idt_pci_driver_init);
+
+static void __exit idt_pci_driver_exit(void)
+{
+	/* Unregister the NTB hardware driver */
+	pci_unregister_driver(&idt_pci_driver);
+
+	/* Discard the top DebugFS directory */
+	debugfs_remove_recursive(dbgfs_topdir);
+}
+module_exit(idt_pci_driver_exit);
+
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.h b/drivers/ntb/hw/idt/ntb_hw_idt.h
new file mode 100644
index 0000000000000..856fd182f6f4f
--- /dev/null
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.h
@@ -0,0 +1,1149 @@
+/*
+ *   This file is provided under a GPLv2 license.  When using or
+ *   redistributing this file, you may do so under that license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms and conditions of the GNU General Public License,
+ *   version 2, as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ *   Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License along
+ *   with this program; if not, one can be found http://www.gnu.org/licenses/.
+ *
+ *   The full GNU General Public License is included in this distribution in
+ *   the file called "COPYING".
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * IDT PCIe-switch NTB Linux driver
+ *
+ * Contact Information:
+ * Serge Semin <fancer.lancer@gmail.com>, <Sergey.Semin@t-platforms.ru>
+ */
+
+#ifndef NTB_HW_IDT_H
+#define NTB_HW_IDT_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/ntb.h>
+
+
+/*
+ * Macro is used to create the struct pci_device_id that matches
+ * the supported IDT PCIe-switches
+ * @devname: Capitalized name of the particular device
+ * @data: Variable passed to the driver of the particular device
+ */
+#define IDT_PCI_DEVICE_IDS(devname, data) \
+	.vendor = PCI_VENDOR_ID_IDT, .device = PCI_DEVICE_ID_IDT_##devname, \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \
+	.class = (PCI_CLASS_BRIDGE_OTHER << 8), .class_mask = (0xFFFF00), \
+	.driver_data = (kernel_ulong_t)&data
+
+/*
+ * IDT PCIe-switches device IDs
+ */
+#define PCI_DEVICE_ID_IDT_89HPES24NT6AG2  0x8091
+#define PCI_DEVICE_ID_IDT_89HPES32NT8AG2  0x808F
+#define PCI_DEVICE_ID_IDT_89HPES32NT8BG2  0x8088
+#define PCI_DEVICE_ID_IDT_89HPES12NT12G2  0x8092
+#define PCI_DEVICE_ID_IDT_89HPES16NT16G2  0x8090
+#define PCI_DEVICE_ID_IDT_89HPES24NT24G2  0x808E
+#define PCI_DEVICE_ID_IDT_89HPES32NT24AG2 0x808C
+#define PCI_DEVICE_ID_IDT_89HPES32NT24BG2 0x808A
+
+/*
+ * NT-function Configuration Space registers
+ * NOTE 1) The IDT PCIe-switch internal data is little-endian
+ *      so it must be taken into account in the driver
+ *      internals.
+ *      2) Additionally the registers should be accessed either
+ *      with byte-enables corresponding to their native size or
+ *      the size of one DWORD
+ *
+ * So to simplify the driver code, there is only DWORD-sized read/write
+ * operations utilized.
+ */
+/* PCI Express Configuration Space */
+/* PCI Express command/status register	(DWORD) */
+#define IDT_NT_PCICMDSTS		0x00004U
+/* PCI Express Device Capabilities	(DWORD) */
+#define IDT_NT_PCIEDCAP			0x00044U
+/* PCI Express Device Control/Status	(WORD+WORD) */
+#define IDT_NT_PCIEDCTLSTS		0x00048U
+/* PCI Express Link Capabilities	(DWORD) */
+#define IDT_NT_PCIELCAP			0x0004CU
+/* PCI Express Link Control/Status	(WORD+WORD) */
+#define IDT_NT_PCIELCTLSTS		0x00050U
+/* PCI Express Device Capabilities 2	(DWORD) */
+#define IDT_NT_PCIEDCAP2		0x00064U
+/* PCI Express Device Control 2		(WORD+WORD) */
+#define IDT_NT_PCIEDCTL2		0x00068U
+/* PCI Power Management Control and Status (DWORD) */
+#define IDT_NT_PMCSR			0x000C4U
+/*==========================================*/
+/* IDT Proprietary NT-port-specific registers */
+/* NT-function main control registers */
+/* NT Endpoint Control			(DWORD) */
+#define IDT_NT_NTCTL			0x00400U
+/* NT Endpoint Interrupt Status/Mask	(DWORD) */
+#define IDT_NT_NTINTSTS			0x00404U
+#define IDT_NT_NTINTMSK			0x00408U
+/* NT Endpoint Signal Data		(DWORD) */
+#define IDT_NT_NTSDATA			0x0040CU
+/* NT Endpoint Global Signal		(DWORD) */
+#define IDT_NT_NTGSIGNAL		0x00410U
+/* Internal Error Reporting Mask 0/1	(DWORD) */
+#define IDT_NT_NTIERRORMSK0		0x00414U
+#define IDT_NT_NTIERRORMSK1		0x00418U
+/* Doorbel registers */
+/* NT Outbound Doorbell Set		(DWORD) */
+#define IDT_NT_OUTDBELLSET		0x00420U
+/* NT Inbound Doorbell Status/Mask	(DWORD) */
+#define IDT_NT_INDBELLSTS		0x00428U
+#define IDT_NT_INDBELLMSK		0x0042CU
+/* Message registers */
+/* Outbound Message N			(DWORD) */
+#define IDT_NT_OUTMSG0			0x00430U
+#define IDT_NT_OUTMSG1			0x00434U
+#define IDT_NT_OUTMSG2			0x00438U
+#define IDT_NT_OUTMSG3			0x0043CU
+/* Inbound Message N			(DWORD) */
+#define IDT_NT_INMSG0			0x00440U
+#define IDT_NT_INMSG1			0x00444U
+#define IDT_NT_INMSG2			0x00448U
+#define IDT_NT_INMSG3			0x0044CU
+/* Inbound Message Source N		(DWORD) */
+#define IDT_NT_INMSGSRC0		0x00450U
+#define IDT_NT_INMSGSRC1		0x00454U
+#define IDT_NT_INMSGSRC2		0x00458U
+#define IDT_NT_INMSGSRC3		0x0045CU
+/* Message Status			(DWORD) */
+#define IDT_NT_MSGSTS			0x00460U
+/* Message Status Mask			(DWORD) */
+#define IDT_NT_MSGSTSMSK		0x00464U
+/* BAR-setup registers */
+/* BAR N Setup/Limit Address/Lower and Upper Translated Base Address (DWORD) */
+#define IDT_NT_BARSETUP0		0x00470U
+#define IDT_NT_BARLIMIT0		0x00474U
+#define IDT_NT_BARLTBASE0		0x00478U
+#define IDT_NT_BARUTBASE0		0x0047CU
+#define IDT_NT_BARSETUP1		0x00480U
+#define IDT_NT_BARLIMIT1		0x00484U
+#define IDT_NT_BARLTBASE1		0x00488U
+#define IDT_NT_BARUTBASE1		0x0048CU
+#define IDT_NT_BARSETUP2		0x00490U
+#define IDT_NT_BARLIMIT2		0x00494U
+#define IDT_NT_BARLTBASE2		0x00498U
+#define IDT_NT_BARUTBASE2		0x0049CU
+#define IDT_NT_BARSETUP3		0x004A0U
+#define IDT_NT_BARLIMIT3		0x004A4U
+#define IDT_NT_BARLTBASE3		0x004A8U
+#define IDT_NT_BARUTBASE3		0x004ACU
+#define IDT_NT_BARSETUP4		0x004B0U
+#define IDT_NT_BARLIMIT4		0x004B4U
+#define IDT_NT_BARLTBASE4		0x004B8U
+#define IDT_NT_BARUTBASE4		0x004BCU
+#define IDT_NT_BARSETUP5		0x004C0U
+#define IDT_NT_BARLIMIT5		0x004C4U
+#define IDT_NT_BARLTBASE5		0x004C8U
+#define IDT_NT_BARUTBASE5		0x004CCU
+/* NT mapping table registers */
+/* NT Mapping Table Address/Status/Data	(DWORD) */
+#define IDT_NT_NTMTBLADDR		0x004D0U
+#define IDT_NT_NTMTBLSTS		0x004D4U
+#define IDT_NT_NTMTBLDATA		0x004D8U
+/* Requester ID (Bus:Device:Function) Capture	(DWORD) */
+#define IDT_NT_REQIDCAP			0x004DCU
+/* Memory Windows Lookup table registers */
+/* Lookup Table Offset/Lower, Middle and Upper data	(DWORD) */
+#define IDT_NT_LUTOFFSET		0x004E0U
+#define IDT_NT_LUTLDATA			0x004E4U
+#define IDT_NT_LUTMDATA			0x004E8U
+#define IDT_NT_LUTUDATA			0x004ECU
+/* NT Endpoint Uncorrectable/Correctable Errors Emulation registers (DWORD) */
+#define IDT_NT_NTUEEM			0x004F0U
+#define IDT_NT_NTCEEM			0x004F4U
+/* Global Address Space Access/Data registers	(DWARD) */
+#define IDT_NT_GASAADDR			0x00FF8U
+#define IDT_NT_GASADATA			0x00FFCU
+
+/*
+ * IDT PCIe-switch Global Configuration and Status registers
+ */
+/* Port N Configuration register in global space */
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP0_PCIECMDSTS		0x01004U
+#define IDT_SW_NTP0_PCIELCTLSTS		0x01050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP0_NTCTL		0x01400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP0_BARSETUP0		0x01470U
+#define IDT_SW_NTP0_BARLIMIT0		0x01474U
+#define IDT_SW_NTP0_BARLTBASE0		0x01478U
+#define IDT_SW_NTP0_BARUTBASE0		0x0147CU
+#define IDT_SW_NTP0_BARSETUP1		0x01480U
+#define IDT_SW_NTP0_BARLIMIT1		0x01484U
+#define IDT_SW_NTP0_BARLTBASE1		0x01488U
+#define IDT_SW_NTP0_BARUTBASE1		0x0148CU
+#define IDT_SW_NTP0_BARSETUP2		0x01490U
+#define IDT_SW_NTP0_BARLIMIT2		0x01494U
+#define IDT_SW_NTP0_BARLTBASE2		0x01498U
+#define IDT_SW_NTP0_BARUTBASE2		0x0149CU
+#define IDT_SW_NTP0_BARSETUP3		0x014A0U
+#define IDT_SW_NTP0_BARLIMIT3		0x014A4U
+#define IDT_SW_NTP0_BARLTBASE3		0x014A8U
+#define IDT_SW_NTP0_BARUTBASE3		0x014ACU
+#define IDT_SW_NTP0_BARSETUP4		0x014B0U
+#define IDT_SW_NTP0_BARLIMIT4		0x014B4U
+#define IDT_SW_NTP0_BARLTBASE4		0x014B8U
+#define IDT_SW_NTP0_BARUTBASE4		0x014BCU
+#define IDT_SW_NTP0_BARSETUP5		0x014C0U
+#define IDT_SW_NTP0_BARLIMIT5		0x014C4U
+#define IDT_SW_NTP0_BARLTBASE5		0x014C8U
+#define IDT_SW_NTP0_BARUTBASE5		0x014CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP2_PCIECMDSTS		0x05004U
+#define IDT_SW_NTP2_PCIELCTLSTS		0x05050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP2_NTCTL		0x05400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP2_BARSETUP0		0x05470U
+#define IDT_SW_NTP2_BARLIMIT0		0x05474U
+#define IDT_SW_NTP2_BARLTBASE0		0x05478U
+#define IDT_SW_NTP2_BARUTBASE0		0x0547CU
+#define IDT_SW_NTP2_BARSETUP1		0x05480U
+#define IDT_SW_NTP2_BARLIMIT1		0x05484U
+#define IDT_SW_NTP2_BARLTBASE1		0x05488U
+#define IDT_SW_NTP2_BARUTBASE1		0x0548CU
+#define IDT_SW_NTP2_BARSETUP2		0x05490U
+#define IDT_SW_NTP2_BARLIMIT2		0x05494U
+#define IDT_SW_NTP2_BARLTBASE2		0x05498U
+#define IDT_SW_NTP2_BARUTBASE2		0x0549CU
+#define IDT_SW_NTP2_BARSETUP3		0x054A0U
+#define IDT_SW_NTP2_BARLIMIT3		0x054A4U
+#define IDT_SW_NTP2_BARLTBASE3		0x054A8U
+#define IDT_SW_NTP2_BARUTBASE3		0x054ACU
+#define IDT_SW_NTP2_BARSETUP4		0x054B0U
+#define IDT_SW_NTP2_BARLIMIT4		0x054B4U
+#define IDT_SW_NTP2_BARLTBASE4		0x054B8U
+#define IDT_SW_NTP2_BARUTBASE4		0x054BCU
+#define IDT_SW_NTP2_BARSETUP5		0x054C0U
+#define IDT_SW_NTP2_BARLIMIT5		0x054C4U
+#define IDT_SW_NTP2_BARLTBASE5		0x054C8U
+#define IDT_SW_NTP2_BARUTBASE5		0x054CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP4_PCIECMDSTS		0x09004U
+#define IDT_SW_NTP4_PCIELCTLSTS		0x09050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP4_NTCTL		0x09400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP4_BARSETUP0		0x09470U
+#define IDT_SW_NTP4_BARLIMIT0		0x09474U
+#define IDT_SW_NTP4_BARLTBASE0		0x09478U
+#define IDT_SW_NTP4_BARUTBASE0		0x0947CU
+#define IDT_SW_NTP4_BARSETUP1		0x09480U
+#define IDT_SW_NTP4_BARLIMIT1		0x09484U
+#define IDT_SW_NTP4_BARLTBASE1		0x09488U
+#define IDT_SW_NTP4_BARUTBASE1		0x0948CU
+#define IDT_SW_NTP4_BARSETUP2		0x09490U
+#define IDT_SW_NTP4_BARLIMIT2		0x09494U
+#define IDT_SW_NTP4_BARLTBASE2		0x09498U
+#define IDT_SW_NTP4_BARUTBASE2		0x0949CU
+#define IDT_SW_NTP4_BARSETUP3		0x094A0U
+#define IDT_SW_NTP4_BARLIMIT3		0x094A4U
+#define IDT_SW_NTP4_BARLTBASE3		0x094A8U
+#define IDT_SW_NTP4_BARUTBASE3		0x094ACU
+#define IDT_SW_NTP4_BARSETUP4		0x094B0U
+#define IDT_SW_NTP4_BARLIMIT4		0x094B4U
+#define IDT_SW_NTP4_BARLTBASE4		0x094B8U
+#define IDT_SW_NTP4_BARUTBASE4		0x094BCU
+#define IDT_SW_NTP4_BARSETUP5		0x094C0U
+#define IDT_SW_NTP4_BARLIMIT5		0x094C4U
+#define IDT_SW_NTP4_BARLTBASE5		0x094C8U
+#define IDT_SW_NTP4_BARUTBASE5		0x094CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP6_PCIECMDSTS		0x0D004U
+#define IDT_SW_NTP6_PCIELCTLSTS		0x0D050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP6_NTCTL		0x0D400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP6_BARSETUP0		0x0D470U
+#define IDT_SW_NTP6_BARLIMIT0		0x0D474U
+#define IDT_SW_NTP6_BARLTBASE0		0x0D478U
+#define IDT_SW_NTP6_BARUTBASE0		0x0D47CU
+#define IDT_SW_NTP6_BARSETUP1		0x0D480U
+#define IDT_SW_NTP6_BARLIMIT1		0x0D484U
+#define IDT_SW_NTP6_BARLTBASE1		0x0D488U
+#define IDT_SW_NTP6_BARUTBASE1		0x0D48CU
+#define IDT_SW_NTP6_BARSETUP2		0x0D490U
+#define IDT_SW_NTP6_BARLIMIT2		0x0D494U
+#define IDT_SW_NTP6_BARLTBASE2		0x0D498U
+#define IDT_SW_NTP6_BARUTBASE2		0x0D49CU
+#define IDT_SW_NTP6_BARSETUP3		0x0D4A0U
+#define IDT_SW_NTP6_BARLIMIT3		0x0D4A4U
+#define IDT_SW_NTP6_BARLTBASE3		0x0D4A8U
+#define IDT_SW_NTP6_BARUTBASE3		0x0D4ACU
+#define IDT_SW_NTP6_BARSETUP4		0x0D4B0U
+#define IDT_SW_NTP6_BARLIMIT4		0x0D4B4U
+#define IDT_SW_NTP6_BARLTBASE4		0x0D4B8U
+#define IDT_SW_NTP6_BARUTBASE4		0x0D4BCU
+#define IDT_SW_NTP6_BARSETUP5		0x0D4C0U
+#define IDT_SW_NTP6_BARLIMIT5		0x0D4C4U
+#define IDT_SW_NTP6_BARLTBASE5		0x0D4C8U
+#define IDT_SW_NTP6_BARUTBASE5		0x0D4CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP8_PCIECMDSTS		0x11004U
+#define IDT_SW_NTP8_PCIELCTLSTS		0x11050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP8_NTCTL		0x11400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP8_BARSETUP0		0x11470U
+#define IDT_SW_NTP8_BARLIMIT0		0x11474U
+#define IDT_SW_NTP8_BARLTBASE0		0x11478U
+#define IDT_SW_NTP8_BARUTBASE0		0x1147CU
+#define IDT_SW_NTP8_BARSETUP1		0x11480U
+#define IDT_SW_NTP8_BARLIMIT1		0x11484U
+#define IDT_SW_NTP8_BARLTBASE1		0x11488U
+#define IDT_SW_NTP8_BARUTBASE1		0x1148CU
+#define IDT_SW_NTP8_BARSETUP2		0x11490U
+#define IDT_SW_NTP8_BARLIMIT2		0x11494U
+#define IDT_SW_NTP8_BARLTBASE2		0x11498U
+#define IDT_SW_NTP8_BARUTBASE2		0x1149CU
+#define IDT_SW_NTP8_BARSETUP3		0x114A0U
+#define IDT_SW_NTP8_BARLIMIT3		0x114A4U
+#define IDT_SW_NTP8_BARLTBASE3		0x114A8U
+#define IDT_SW_NTP8_BARUTBASE3		0x114ACU
+#define IDT_SW_NTP8_BARSETUP4		0x114B0U
+#define IDT_SW_NTP8_BARLIMIT4		0x114B4U
+#define IDT_SW_NTP8_BARLTBASE4		0x114B8U
+#define IDT_SW_NTP8_BARUTBASE4		0x114BCU
+#define IDT_SW_NTP8_BARSETUP5		0x114C0U
+#define IDT_SW_NTP8_BARLIMIT5		0x114C4U
+#define IDT_SW_NTP8_BARLTBASE5		0x114C8U
+#define IDT_SW_NTP8_BARUTBASE5		0x114CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP12_PCIECMDSTS		0x19004U
+#define IDT_SW_NTP12_PCIELCTLSTS	0x19050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP12_NTCTL		0x19400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP12_BARSETUP0		0x19470U
+#define IDT_SW_NTP12_BARLIMIT0		0x19474U
+#define IDT_SW_NTP12_BARLTBASE0		0x19478U
+#define IDT_SW_NTP12_BARUTBASE0		0x1947CU
+#define IDT_SW_NTP12_BARSETUP1		0x19480U
+#define IDT_SW_NTP12_BARLIMIT1		0x19484U
+#define IDT_SW_NTP12_BARLTBASE1		0x19488U
+#define IDT_SW_NTP12_BARUTBASE1		0x1948CU
+#define IDT_SW_NTP12_BARSETUP2		0x19490U
+#define IDT_SW_NTP12_BARLIMIT2		0x19494U
+#define IDT_SW_NTP12_BARLTBASE2		0x19498U
+#define IDT_SW_NTP12_BARUTBASE2		0x1949CU
+#define IDT_SW_NTP12_BARSETUP3		0x194A0U
+#define IDT_SW_NTP12_BARLIMIT3		0x194A4U
+#define IDT_SW_NTP12_BARLTBASE3		0x194A8U
+#define IDT_SW_NTP12_BARUTBASE3		0x194ACU
+#define IDT_SW_NTP12_BARSETUP4		0x194B0U
+#define IDT_SW_NTP12_BARLIMIT4		0x194B4U
+#define IDT_SW_NTP12_BARLTBASE4		0x194B8U
+#define IDT_SW_NTP12_BARUTBASE4		0x194BCU
+#define IDT_SW_NTP12_BARSETUP5		0x194C0U
+#define IDT_SW_NTP12_BARLIMIT5		0x194C4U
+#define IDT_SW_NTP12_BARLTBASE5		0x194C8U
+#define IDT_SW_NTP12_BARUTBASE5		0x194CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP16_PCIECMDSTS		0x21004U
+#define IDT_SW_NTP16_PCIELCTLSTS	0x21050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP16_NTCTL		0x21400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP16_BARSETUP0		0x21470U
+#define IDT_SW_NTP16_BARLIMIT0		0x21474U
+#define IDT_SW_NTP16_BARLTBASE0		0x21478U
+#define IDT_SW_NTP16_BARUTBASE0		0x2147CU
+#define IDT_SW_NTP16_BARSETUP1		0x21480U
+#define IDT_SW_NTP16_BARLIMIT1		0x21484U
+#define IDT_SW_NTP16_BARLTBASE1		0x21488U
+#define IDT_SW_NTP16_BARUTBASE1		0x2148CU
+#define IDT_SW_NTP16_BARSETUP2		0x21490U
+#define IDT_SW_NTP16_BARLIMIT2		0x21494U
+#define IDT_SW_NTP16_BARLTBASE2		0x21498U
+#define IDT_SW_NTP16_BARUTBASE2		0x2149CU
+#define IDT_SW_NTP16_BARSETUP3		0x214A0U
+#define IDT_SW_NTP16_BARLIMIT3		0x214A4U
+#define IDT_SW_NTP16_BARLTBASE3		0x214A8U
+#define IDT_SW_NTP16_BARUTBASE3		0x214ACU
+#define IDT_SW_NTP16_BARSETUP4		0x214B0U
+#define IDT_SW_NTP16_BARLIMIT4		0x214B4U
+#define IDT_SW_NTP16_BARLTBASE4		0x214B8U
+#define IDT_SW_NTP16_BARUTBASE4		0x214BCU
+#define IDT_SW_NTP16_BARSETUP5		0x214C0U
+#define IDT_SW_NTP16_BARLIMIT5		0x214C4U
+#define IDT_SW_NTP16_BARLTBASE5		0x214C8U
+#define IDT_SW_NTP16_BARUTBASE5		0x214CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP20_PCIECMDSTS		0x29004U
+#define IDT_SW_NTP20_PCIELCTLSTS	0x29050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP20_NTCTL		0x29400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP20_BARSETUP0		0x29470U
+#define IDT_SW_NTP20_BARLIMIT0		0x29474U
+#define IDT_SW_NTP20_BARLTBASE0		0x29478U
+#define IDT_SW_NTP20_BARUTBASE0		0x2947CU
+#define IDT_SW_NTP20_BARSETUP1		0x29480U
+#define IDT_SW_NTP20_BARLIMIT1		0x29484U
+#define IDT_SW_NTP20_BARLTBASE1		0x29488U
+#define IDT_SW_NTP20_BARUTBASE1		0x2948CU
+#define IDT_SW_NTP20_BARSETUP2		0x29490U
+#define IDT_SW_NTP20_BARLIMIT2		0x29494U
+#define IDT_SW_NTP20_BARLTBASE2		0x29498U
+#define IDT_SW_NTP20_BARUTBASE2		0x2949CU
+#define IDT_SW_NTP20_BARSETUP3		0x294A0U
+#define IDT_SW_NTP20_BARLIMIT3		0x294A4U
+#define IDT_SW_NTP20_BARLTBASE3		0x294A8U
+#define IDT_SW_NTP20_BARUTBASE3		0x294ACU
+#define IDT_SW_NTP20_BARSETUP4		0x294B0U
+#define IDT_SW_NTP20_BARLIMIT4		0x294B4U
+#define IDT_SW_NTP20_BARLTBASE4		0x294B8U
+#define IDT_SW_NTP20_BARUTBASE4		0x294BCU
+#define IDT_SW_NTP20_BARSETUP5		0x294C0U
+#define IDT_SW_NTP20_BARLIMIT5		0x294C4U
+#define IDT_SW_NTP20_BARLTBASE5		0x294C8U
+#define IDT_SW_NTP20_BARUTBASE5		0x294CCU
+/* IDT PCIe-switch control register	(DWORD) */
+#define IDT_SW_CTL			0x3E000U
+/* Boot Configuration Vector Status	(DWORD) */
+#define IDT_SW_BCVSTS			0x3E004U
+/* Port Clocking Mode			(DWORD) */
+#define IDT_SW_PCLKMODE			0x3E008U
+/* Reset Drain Delay			(DWORD) */
+#define IDT_SW_RDRAINDELAY		0x3E080U
+/* Port Operating Mode Change Drain Delay (DWORD) */
+#define IDT_SW_POMCDELAY		0x3E084U
+/* Side Effect Delay			(DWORD) */
+#define IDT_SW_SEDELAY			0x3E088U
+/* Upstream Secondary Bus Reset Delay	(DWORD) */
+#define IDT_SW_SSBRDELAY		0x3E08CU
+/* Switch partition N Control/Status/Failover registers */
+#define IDT_SW_SWPART0CTL		0x3E100U
+#define IDT_SW_SWPART0STS		0x3E104U
+#define IDT_SW_SWPART0FCTL		0x3E108U
+#define IDT_SW_SWPART1CTL		0x3E120U
+#define IDT_SW_SWPART1STS		0x3E124U
+#define IDT_SW_SWPART1FCTL		0x3E128U
+#define IDT_SW_SWPART2CTL		0x3E140U
+#define IDT_SW_SWPART2STS		0x3E144U
+#define IDT_SW_SWPART2FCTL		0x3E148U
+#define IDT_SW_SWPART3CTL		0x3E160U
+#define IDT_SW_SWPART3STS		0x3E164U
+#define IDT_SW_SWPART3FCTL		0x3E168U
+#define IDT_SW_SWPART4CTL		0x3E180U
+#define IDT_SW_SWPART4STS		0x3E184U
+#define IDT_SW_SWPART4FCTL		0x3E188U
+#define IDT_SW_SWPART5CTL		0x3E1A0U
+#define IDT_SW_SWPART5STS		0x3E1A4U
+#define IDT_SW_SWPART5FCTL		0x3E1A8U
+#define IDT_SW_SWPART6CTL		0x3E1C0U
+#define IDT_SW_SWPART6STS		0x3E1C4U
+#define IDT_SW_SWPART6FCTL		0x3E1C8U
+#define IDT_SW_SWPART7CTL		0x3E1E0U
+#define IDT_SW_SWPART7STS		0x3E1E4U
+#define IDT_SW_SWPART7FCTL		0x3E1E8U
+/* Switch port N control and status registers */
+#define IDT_SW_SWPORT0CTL		0x3E200U
+#define IDT_SW_SWPORT0STS		0x3E204U
+#define IDT_SW_SWPORT0FCTL		0x3E208U
+#define IDT_SW_SWPORT2CTL		0x3E240U
+#define IDT_SW_SWPORT2STS		0x3E244U
+#define IDT_SW_SWPORT2FCTL		0x3E248U
+#define IDT_SW_SWPORT4CTL		0x3E280U
+#define IDT_SW_SWPORT4STS		0x3E284U
+#define IDT_SW_SWPORT4FCTL		0x3E288U
+#define IDT_SW_SWPORT6CTL		0x3E2C0U
+#define IDT_SW_SWPORT6STS		0x3E2C4U
+#define IDT_SW_SWPORT6FCTL		0x3E2C8U
+#define IDT_SW_SWPORT8CTL		0x3E300U
+#define IDT_SW_SWPORT8STS		0x3E304U
+#define IDT_SW_SWPORT8FCTL		0x3E308U
+#define IDT_SW_SWPORT12CTL		0x3E380U
+#define IDT_SW_SWPORT12STS		0x3E384U
+#define IDT_SW_SWPORT12FCTL		0x3E388U
+#define IDT_SW_SWPORT16CTL		0x3E400U
+#define IDT_SW_SWPORT16STS		0x3E404U
+#define IDT_SW_SWPORT16FCTL		0x3E408U
+#define IDT_SW_SWPORT20CTL		0x3E480U
+#define IDT_SW_SWPORT20STS		0x3E484U
+#define IDT_SW_SWPORT20FCTL		0x3E488U
+/* Switch Event registers */
+/* Switch Event Status/Mask/Partition mask (DWORD) */
+#define IDT_SW_SESTS			0x3EC00U
+#define IDT_SW_SEMSK			0x3EC04U
+#define IDT_SW_SEPMSK			0x3EC08U
+/* Switch Event Link Up/Down Status/Mask (DWORD) */
+#define IDT_SW_SELINKUPSTS		0x3EC0CU
+#define IDT_SW_SELINKUPMSK		0x3EC10U
+#define IDT_SW_SELINKDNSTS		0x3EC14U
+#define IDT_SW_SELINKDNMSK		0x3EC18U
+/* Switch Event Fundamental Reset Status/Mask (DWORD) */
+#define IDT_SW_SEFRSTSTS		0x3EC1CU
+#define IDT_SW_SEFRSTMSK		0x3EC20U
+/* Switch Event Hot Reset Status/Mask	(DWORD) */
+#define IDT_SW_SEHRSTSTS		0x3EC24U
+#define IDT_SW_SEHRSTMSK		0x3EC28U
+/* Switch Event Failover Mask		(DWORD) */
+#define IDT_SW_SEFOVRMSK		0x3EC2CU
+/* Switch Event Global Signal Status/Mask (DWORD) */
+#define IDT_SW_SEGSIGSTS		0x3EC30U
+#define IDT_SW_SEGSIGMSK		0x3EC34U
+/* NT Global Doorbell Status		(DWORD) */
+#define IDT_SW_GDBELLSTS		0x3EC3CU
+/* Switch partition N message M control (msgs routing table) (DWORD) */
+#define IDT_SW_SWP0MSGCTL0		0x3EE00U
+#define IDT_SW_SWP1MSGCTL0		0x3EE04U
+#define IDT_SW_SWP2MSGCTL0		0x3EE08U
+#define IDT_SW_SWP3MSGCTL0		0x3EE0CU
+#define IDT_SW_SWP4MSGCTL0		0x3EE10U
+#define IDT_SW_SWP5MSGCTL0		0x3EE14U
+#define IDT_SW_SWP6MSGCTL0		0x3EE18U
+#define IDT_SW_SWP7MSGCTL0		0x3EE1CU
+#define IDT_SW_SWP0MSGCTL1		0x3EE20U
+#define IDT_SW_SWP1MSGCTL1		0x3EE24U
+#define IDT_SW_SWP2MSGCTL1		0x3EE28U
+#define IDT_SW_SWP3MSGCTL1		0x3EE2CU
+#define IDT_SW_SWP4MSGCTL1		0x3EE30U
+#define IDT_SW_SWP5MSGCTL1		0x3EE34U
+#define IDT_SW_SWP6MSGCTL1		0x3EE38U
+#define IDT_SW_SWP7MSGCTL1		0x3EE3CU
+#define IDT_SW_SWP0MSGCTL2		0x3EE40U
+#define IDT_SW_SWP1MSGCTL2		0x3EE44U
+#define IDT_SW_SWP2MSGCTL2		0x3EE48U
+#define IDT_SW_SWP3MSGCTL2		0x3EE4CU
+#define IDT_SW_SWP4MSGCTL2		0x3EE50U
+#define IDT_SW_SWP5MSGCTL2		0x3EE54U
+#define IDT_SW_SWP6MSGCTL2		0x3EE58U
+#define IDT_SW_SWP7MSGCTL2		0x3EE5CU
+#define IDT_SW_SWP0MSGCTL3		0x3EE60U
+#define IDT_SW_SWP1MSGCTL3		0x3EE64U
+#define IDT_SW_SWP2MSGCTL3		0x3EE68U
+#define IDT_SW_SWP3MSGCTL3		0x3EE6CU
+#define IDT_SW_SWP4MSGCTL3		0x3EE70U
+#define IDT_SW_SWP5MSGCTL3		0x3EE74U
+#define IDT_SW_SWP6MSGCTL3		0x3EE78U
+#define IDT_SW_SWP7MSGCTL3		0x3EE7CU
+/* SMBus Status and Control registers	(DWORD) */
+#define IDT_SW_SMBUSSTS			0x3F188U
+#define IDT_SW_SMBUSCTL			0x3F18CU
+/* Serial EEPROM Interface		(DWORD) */
+#define IDT_SW_EEPROMINTF		0x3F190U
+/* MBus I/O Expander Address N		(DWORD) */
+#define IDT_SW_IOEXPADDR0		0x3F198U
+#define IDT_SW_IOEXPADDR1		0x3F19CU
+#define IDT_SW_IOEXPADDR2		0x3F1A0U
+#define IDT_SW_IOEXPADDR3		0x3F1A4U
+#define IDT_SW_IOEXPADDR4		0x3F1A8U
+#define IDT_SW_IOEXPADDR5		0x3F1ACU
+/* General Purpose Events Control and Status registers (DWORD) */
+#define IDT_SW_GPECTL			0x3F1B0U
+#define IDT_SW_GPESTS			0x3F1B4U
+/* Temperature sensor Control/Status/Alarm/Adjustment/Slope registers */
+#define IDT_SW_TMPCTL			0x3F1D4U
+#define IDT_SW_TMPSTS			0x3F1D8U
+#define IDT_SW_TMPALARM			0x3F1DCU
+#define IDT_SW_TMPADJ			0x3F1E0U
+#define IDT_SW_TSSLOPE			0x3F1E4U
+/* SMBus Configuration Block header log	(DWORD) */
+#define IDT_SW_SMBUSCBHL		0x3F1E8U
+
+/*
+ * Common registers related constants
+ * @IDT_REG_ALIGN:	Registers alignment used in the driver
+ * @IDT_REG_PCI_MAX:	Maximum PCI configuration space register value
+ * @IDT_REG_SW_MAX:	Maximum global register value
+ */
+#define IDT_REG_ALIGN			4
+#define IDT_REG_PCI_MAX			0x00FFFU
+#define IDT_REG_SW_MAX			0x3FFFFU
+
+/*
+ * PCICMDSTS register fields related constants
+ * @IDT_PCICMDSTS_IOAE:	I/O access enable
+ * @IDT_PCICMDSTS_MAE:	Memory access enable
+ * @IDT_PCICMDSTS_BME:	Bus master enable
+ */
+#define IDT_PCICMDSTS_IOAE		0x00000001U
+#define IDT_PCICMDSTS_MAE		0x00000002U
+#define IDT_PCICMDSTS_BME		0x00000004U
+
+/*
+ * PCIEDCAP register fields related constants
+ * @IDT_PCIEDCAP_MPAYLOAD_MASK:	 Maximum payload size mask
+ * @IDT_PCIEDCAP_MPAYLOAD_FLD:	 Maximum payload size field offset
+ * @IDT_PCIEDCAP_MPAYLOAD_S128:	 Max supported payload size of 128 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S256:	 Max supported payload size of 256 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S512:	 Max supported payload size of 512 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S1024: Max supported payload size of 1024 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S2048: Max supported payload size of 2048 bytes
+ */
+#define IDT_PCIEDCAP_MPAYLOAD_MASK	0x00000007U
+#define IDT_PCIEDCAP_MPAYLOAD_FLD	0
+#define IDT_PCIEDCAP_MPAYLOAD_S128	0x00000000U
+#define IDT_PCIEDCAP_MPAYLOAD_S256	0x00000001U
+#define IDT_PCIEDCAP_MPAYLOAD_S512	0x00000002U
+#define IDT_PCIEDCAP_MPAYLOAD_S1024	0x00000003U
+#define IDT_PCIEDCAP_MPAYLOAD_S2048	0x00000004U
+
+/*
+ * PCIEDCTLSTS registers fields related constants
+ * @IDT_PCIEDCTL_MPS_MASK:	Maximum payload size mask
+ * @IDT_PCIEDCTL_MPS_FLD:	MPS field offset
+ * @IDT_PCIEDCTL_MPS_S128:	Max payload size of 128 bytes
+ * @IDT_PCIEDCTL_MPS_S256:	Max payload size of 256 bytes
+ * @IDT_PCIEDCTL_MPS_S512:	Max payload size of 512 bytes
+ * @IDT_PCIEDCTL_MPS_S1024:	Max payload size of 1024 bytes
+ * @IDT_PCIEDCTL_MPS_S2048:	Max payload size of 2048 bytes
+ * @IDT_PCIEDCTL_MPS_S4096:	Max payload size of 4096 bytes
+ */
+#define IDT_PCIEDCTLSTS_MPS_MASK	0x000000E0U
+#define IDT_PCIEDCTLSTS_MPS_FLD		5
+#define IDT_PCIEDCTLSTS_MPS_S128	0x00000000U
+#define IDT_PCIEDCTLSTS_MPS_S256	0x00000020U
+#define IDT_PCIEDCTLSTS_MPS_S512	0x00000040U
+#define IDT_PCIEDCTLSTS_MPS_S1024	0x00000060U
+#define IDT_PCIEDCTLSTS_MPS_S2048	0x00000080U
+#define IDT_PCIEDCTLSTS_MPS_S4096	0x000000A0U
+
+/*
+ * PCIELCAP register fields related constants
+ * @IDT_PCIELCAP_PORTNUM_MASK:	Port number field mask
+ * @IDT_PCIELCAP_PORTNUM_FLD:	Port number field offset
+ */
+#define IDT_PCIELCAP_PORTNUM_MASK	0xFF000000U
+#define IDT_PCIELCAP_PORTNUM_FLD	24
+
+/*
+ * PCIELCTLSTS registers fields related constants
+ * @IDT_PCIELSTS_CLS_MASK:	Current link speed mask
+ * @IDT_PCIELSTS_CLS_FLD:	Current link speed field offset
+ * @IDT_PCIELSTS_NLW_MASK:	Negotiated link width mask
+ * @IDT_PCIELSTS_NLW_FLD:	Negotiated link width field offset
+ * @IDT_PCIELSTS_SCLK_COM:	Common slot clock configuration
+ */
+#define IDT_PCIELCTLSTS_CLS_MASK	0x000F0000U
+#define IDT_PCIELCTLSTS_CLS_FLD		16
+#define IDT_PCIELCTLSTS_NLW_MASK	0x03F00000U
+#define IDT_PCIELCTLSTS_NLW_FLD		20
+#define IDT_PCIELCTLSTS_SCLK_COM	0x10000000U
+
+/*
+ * NTCTL register fields related constants
+ * @IDT_NTCTL_IDPROTDIS:	ID Protection check disable (disable MTBL)
+ * @IDT_NTCTL_CPEN:		Completion enable
+ * @IDT_NTCTL_RNS:		Request no snoop processing (if MTBL disabled)
+ * @IDT_NTCTL_ATP:		Address type processing (if MTBL disabled)
+ */
+#define IDT_NTCTL_IDPROTDIS		0x00000001U
+#define IDT_NTCTL_CPEN			0x00000002U
+#define IDT_NTCTL_RNS			0x00000004U
+#define IDT_NTCTL_ATP			0x00000008U
+
+/*
+ * NTINTSTS register fields related constants
+ * @IDT_NTINTSTS_MSG:		Message interrupt bit
+ * @IDT_NTINTSTS_DBELL:		Doorbell interrupt bit
+ * @IDT_NTINTSTS_SEVENT:	Switch Event interrupt bit
+ * @IDT_NTINTSTS_TMPSENSOR:	Temperature sensor interrupt bit
+ */
+#define IDT_NTINTSTS_MSG		0x00000001U
+#define IDT_NTINTSTS_DBELL		0x00000002U
+#define IDT_NTINTSTS_SEVENT		0x00000008U
+#define IDT_NTINTSTS_TMPSENSOR		0x00000080U
+
+/*
+ * NTINTMSK register fields related constants
+ * @IDT_NTINTMSK_MSG:		Message interrupt mask bit
+ * @IDT_NTINTMSK_DBELL:		Doorbell interrupt mask bit
+ * @IDT_NTINTMSK_SEVENT:	Switch Event interrupt mask bit
+ * @IDT_NTINTMSK_TMPSENSOR:	Temperature sensor interrupt mask bit
+ * @IDT_NTINTMSK_ALL:		All the useful interrupts mask
+ */
+#define IDT_NTINTMSK_MSG		0x00000001U
+#define IDT_NTINTMSK_DBELL		0x00000002U
+#define IDT_NTINTMSK_SEVENT		0x00000008U
+#define IDT_NTINTMSK_TMPSENSOR		0x00000080U
+#define IDT_NTINTMSK_ALL \
+	(IDT_NTINTMSK_MSG | IDT_NTINTMSK_DBELL | \
+	 IDT_NTINTMSK_SEVENT | IDT_NTINTMSK_TMPSENSOR)
+
+/*
+ * NTGSIGNAL register fields related constants
+ * @IDT_NTGSIGNAL_SET:	Set global signal of the local partition
+ */
+#define IDT_NTGSIGNAL_SET		0x00000001U
+
+/*
+ * BARSETUP register fields related constants
+ * @IDT_BARSETUP_TYPE_MASK:	Mask of the TYPE field
+ * @IDT_BARSETUP_TYPE_32:	32-bit addressing BAR
+ * @IDT_BARSETUP_TYPE_64:	64-bit addressing BAR
+ * @IDT_BARSETUP_PREF:		Value of the BAR prefetchable field
+ * @IDT_BARSETUP_SIZE_MASK:	Mask of the SIZE field
+ * @IDT_BARSETUP_SIZE_FLD:	SIZE field offset
+ * @IDT_BARSETUP_SIZE_CFG:	SIZE field value in case of config space MODE
+ * @IDT_BARSETUP_MODE_CFG:	Configuration space BAR mode
+ * @IDT_BARSETUP_ATRAN_MASK:	ATRAN field mask
+ * @IDT_BARSETUP_ATRAN_FLD:	ATRAN field offset
+ * @IDT_BARSETUP_ATRAN_DIR:	Direct address translation memory window
+ * @IDT_BARSETUP_ATRAN_LUT12:	12-entry lookup table
+ * @IDT_BARSETUP_ATRAN_LUT24:	24-entry lookup table
+ * @IDT_BARSETUP_TPART_MASK:	TPART field mask
+ * @IDT_BARSETUP_TPART_FLD:	TPART field offset
+ * @IDT_BARSETUP_EN:		BAR enable bit
+ */
+#define IDT_BARSETUP_TYPE_MASK		0x00000006U
+#define IDT_BARSETUP_TYPE_FLD		0
+#define IDT_BARSETUP_TYPE_32		0x00000000U
+#define IDT_BARSETUP_TYPE_64		0x00000004U
+#define IDT_BARSETUP_PREF		0x00000008U
+#define IDT_BARSETUP_SIZE_MASK		0x000003F0U
+#define IDT_BARSETUP_SIZE_FLD		4
+#define IDT_BARSETUP_SIZE_CFG		0x000000C0U
+#define IDT_BARSETUP_MODE_CFG		0x00000400U
+#define IDT_BARSETUP_ATRAN_MASK		0x00001800U
+#define IDT_BARSETUP_ATRAN_FLD		11
+#define IDT_BARSETUP_ATRAN_DIR		0x00000000U
+#define IDT_BARSETUP_ATRAN_LUT12	0x00000800U
+#define IDT_BARSETUP_ATRAN_LUT24	0x00001000U
+#define IDT_BARSETUP_TPART_MASK		0x0000E000U
+#define IDT_BARSETUP_TPART_FLD		13
+#define IDT_BARSETUP_EN			0x80000000U
+
+/*
+ * NTMTBLDATA register fields related constants
+ * @IDT_NTMTBLDATA_VALID:	Set the MTBL entry being valid
+ * @IDT_NTMTBLDATA_REQID_MASK:	Bus:Device:Function field mask
+ * @IDT_NTMTBLDATA_REQID_FLD:	Bus:Device:Function field offset
+ * @IDT_NTMTBLDATA_PART_MASK:	Partition field mask
+ * @IDT_NTMTBLDATA_PART_FLD:	Partition field offset
+ * @IDT_NTMTBLDATA_ATP_TRANS:	Enable AT field translation on request TLPs
+ * @IDT_NTMTBLDATA_CNS_INV:	Enable No Snoop attribute inversion of
+ *				Completion TLPs
+ * @IDT_NTMTBLDATA_RNS_INV:	Enable No Snoop attribute inversion of
+ *				Request TLPs
+ */
+#define IDT_NTMTBLDATA_VALID		0x00000001U
+#define IDT_NTMTBLDATA_REQID_MASK	0x0001FFFEU
+#define IDT_NTMTBLDATA_REQID_FLD	1
+#define IDT_NTMTBLDATA_PART_MASK	0x000E0000U
+#define IDT_NTMTBLDATA_PART_FLD		17
+#define IDT_NTMTBLDATA_ATP_TRANS	0x20000000U
+#define IDT_NTMTBLDATA_CNS_INV		0x40000000U
+#define IDT_NTMTBLDATA_RNS_INV		0x80000000U
+
+/*
+ * REQIDCAP register fields related constants
+ * @IDT_REQIDCAP_REQID_MASK:	Request ID field mask
+ * @IDT_REQIDCAP_REQID_FLD:	Request ID field offset
+ */
+#define IDT_REQIDCAP_REQID_MASK		0x0000FFFFU
+#define IDT_REQIDCAP_REQID_FLD		0
+
+/*
+ * LUTOFFSET register fields related constants
+ * @IDT_LUTOFFSET_INDEX_MASK:	Lookup table index field mask
+ * @IDT_LUTOFFSET_INDEX_FLD:	Lookup table index field offset
+ * @IDT_LUTOFFSET_BAR_MASK:	Lookup table BAR select field mask
+ * @IDT_LUTOFFSET_BAR_FLD:	Lookup table BAR select field offset
+ */
+#define IDT_LUTOFFSET_INDEX_MASK	0x0000001FU
+#define IDT_LUTOFFSET_INDEX_FLD		0
+#define IDT_LUTOFFSET_BAR_MASK		0x00000700U
+#define IDT_LUTOFFSET_BAR_FLD		8
+
+/*
+ * LUTUDATA register fields related constants
+ * @IDT_LUTUDATA_PART_MASK:	Partition field mask
+ * @IDT_LUTUDATA_PART_FLD:	Partition field offset
+ * @IDT_LUTUDATA_VALID:		Lookup table entry valid bit
+ */
+#define IDT_LUTUDATA_PART_MASK		0x0000000FU
+#define IDT_LUTUDATA_PART_FLD		0
+#define IDT_LUTUDATA_VALID		0x80000000U
+
+/*
+ * SWPARTxSTS register fields related constants
+ * @IDT_SWPARTxSTS_SCI:		Switch partition state change initiated
+ * @IDT_SWPARTxSTS_SCC:		Switch partition state change completed
+ * @IDT_SWPARTxSTS_STATE_MASK:	Switch partition state mask
+ * @IDT_SWPARTxSTS_STATE_FLD:	Switch partition state field offset
+ * @IDT_SWPARTxSTS_STATE_DIS:	Switch partition disabled
+ * @IDT_SWPARTxSTS_STATE_ACT:	Switch partition enabled
+ * @IDT_SWPARTxSTS_STATE_RES:	Switch partition in reset
+ * @IDT_SWPARTxSTS_US:		Switch partition has upstream port
+ * @IDT_SWPARTxSTS_USID_MASK:	Switch partition upstream port ID mask
+ * @IDT_SWPARTxSTS_USID_FLD:	Switch partition upstream port ID field offset
+ * @IDT_SWPARTxSTS_NT:		Upstream port has NT function
+ * @IDT_SWPARTxSTS_DMA:		Upstream port has DMA function
+ */
+#define IDT_SWPARTxSTS_SCI		0x00000001U
+#define IDT_SWPARTxSTS_SCC		0x00000002U
+#define IDT_SWPARTxSTS_STATE_MASK	0x00000060U
+#define IDT_SWPARTxSTS_STATE_FLD	5
+#define IDT_SWPARTxSTS_STATE_DIS	0x00000000U
+#define IDT_SWPARTxSTS_STATE_ACT	0x00000020U
+#define IDT_SWPARTxSTS_STATE_RES	0x00000060U
+#define IDT_SWPARTxSTS_US		0x00000100U
+#define IDT_SWPARTxSTS_USID_MASK	0x00003E00U
+#define IDT_SWPARTxSTS_USID_FLD		9
+#define IDT_SWPARTxSTS_NT		0x00004000U
+#define IDT_SWPARTxSTS_DMA		0x00008000U
+
+/*
+ * SWPORTxSTS register fields related constants
+ * @IDT_SWPORTxSTS_OMCI:	Operation mode change initiated
+ * @IDT_SWPORTxSTS_OMCC:	Operation mode change completed
+ * @IDT_SWPORTxSTS_LINKUP:	Link up status
+ * @IDT_SWPORTxSTS_DS:		Port lanes behave as downstream lanes
+ * @IDT_SWPORTxSTS_MODE_MASK:	Port mode field mask
+ * @IDT_SWPORTxSTS_MODE_FLD:	Port mode field offset
+ * @IDT_SWPORTxSTS_MODE_DIS:	Port mode - disabled
+ * @IDT_SWPORTxSTS_MODE_DS:	Port mode - downstream switch port
+ * @IDT_SWPORTxSTS_MODE_US:	Port mode - upstream switch port
+ * @IDT_SWPORTxSTS_MODE_NT:	Port mode - NT function
+ * @IDT_SWPORTxSTS_MODE_USNT:	Port mode - upstream switch port with NTB
+ * @IDT_SWPORTxSTS_MODE_UNAT:	Port mode - unattached
+ * @IDT_SWPORTxSTS_MODE_USDMA:	Port mode - upstream switch port with DMA
+ * @IDT_SWPORTxSTS_MODE_USNTDMA:Port mode - upstream port with NTB and DMA
+ * @IDT_SWPORTxSTS_MODE_NTDMA:	Port mode - NT function with DMA
+ * @IDT_SWPORTxSTS_SWPART_MASK:	Port partition field mask
+ * @IDT_SWPORTxSTS_SWPART_FLD:	Port partition field offset
+ * @IDT_SWPORTxSTS_DEVNUM_MASK:	Port device number field mask
+ * @IDT_SWPORTxSTS_DEVNUM_FLD:	Port device number field offset
+ */
+#define IDT_SWPORTxSTS_OMCI		0x00000001U
+#define IDT_SWPORTxSTS_OMCC		0x00000002U
+#define IDT_SWPORTxSTS_LINKUP		0x00000010U
+#define IDT_SWPORTxSTS_DS		0x00000020U
+#define IDT_SWPORTxSTS_MODE_MASK	0x000003C0U
+#define IDT_SWPORTxSTS_MODE_FLD		6
+#define IDT_SWPORTxSTS_MODE_DIS		0x00000000U
+#define IDT_SWPORTxSTS_MODE_DS		0x00000040U
+#define IDT_SWPORTxSTS_MODE_US		0x00000080U
+#define IDT_SWPORTxSTS_MODE_NT		0x000000C0U
+#define IDT_SWPORTxSTS_MODE_USNT	0x00000100U
+#define IDT_SWPORTxSTS_MODE_UNAT	0x00000140U
+#define IDT_SWPORTxSTS_MODE_USDMA	0x00000180U
+#define IDT_SWPORTxSTS_MODE_USNTDMA	0x000001C0U
+#define IDT_SWPORTxSTS_MODE_NTDMA	0x00000200U
+#define IDT_SWPORTxSTS_SWPART_MASK	0x00001C00U
+#define IDT_SWPORTxSTS_SWPART_FLD	10
+#define IDT_SWPORTxSTS_DEVNUM_MASK	0x001F0000U
+#define IDT_SWPORTxSTS_DEVNUM_FLD	16
+
+/*
+ * SEMSK register fields related constants
+ * @IDT_SEMSK_LINKUP:	Link Up event mask bit
+ * @IDT_SEMSK_LINKDN:	Link Down event mask bit
+ * @IDT_SEMSK_GSIGNAL:	Global Signal event mask bit
+ */
+#define IDT_SEMSK_LINKUP		0x00000001U
+#define IDT_SEMSK_LINKDN		0x00000002U
+#define IDT_SEMSK_GSIGNAL		0x00000020U
+
+/*
+ * SWPxMSGCTL register fields related constants
+ * @IDT_SWPxMSGCTL_REG_MASK:	Register select field mask
+ * @IDT_SWPxMSGCTL_REG_FLD:	Register select field offset
+ * @IDT_SWPxMSGCTL_PART_MASK:	Partition select field mask
+ * @IDT_SWPxMSGCTL_PART_FLD:	Partition select field offset
+ */
+#define IDT_SWPxMSGCTL_REG_MASK		0x00000003U
+#define IDT_SWPxMSGCTL_REG_FLD		0
+#define IDT_SWPxMSGCTL_PART_MASK	0x00000070U
+#define IDT_SWPxMSGCTL_PART_FLD		4
+
+/*
+ * TMPSTS register fields related constants
+ * @IDT_TMPSTS_TEMP_MASK:	Current temperature field mask
+ * @IDT_TMPSTS_TEMP_FLD:	Current temperature field offset
+ */
+#define IDT_TMPSTS_TEMP_MASK		0x000000FFU
+#define IDT_TMPSTS_TEMP_FLD		0
+
+/*
+ * Helper macro to get/set the corresponding field value
+ * @GET_FIELD:		Retrieve the value of the corresponding field
+ * @SET_FIELD:		Set the specified field up
+ * @IS_FLD_SET:		Check whether a field is set with value
+ */
+#define GET_FIELD(field, data) \
+	(((u32)(data) & IDT_ ##field## _MASK) >> IDT_ ##field## _FLD)
+#define SET_FIELD(field, data, value) \
+	(((u32)(data) & ~IDT_ ##field## _MASK) | \
+	 ((u32)(value) << IDT_ ##field## _FLD))
+#define IS_FLD_SET(field, data, value) \
+	(((u32)(data) & IDT_ ##field## _MASK) == IDT_ ##field## _ ##value)
+
+/*
+ * Useful registers masks:
+ * @IDT_DBELL_MASK:	Doorbell bits mask
+ * @IDT_OUTMSG_MASK:	Out messages status bits mask
+ * @IDT_INMSG_MASK:	In messages status bits mask
+ * @IDT_MSG_MASK:	Any message status bits mask
+ */
+#define IDT_DBELL_MASK		((u32)0xFFFFFFFFU)
+#define IDT_OUTMSG_MASK		((u32)0x0000000FU)
+#define IDT_INMSG_MASK		((u32)0x000F0000U)
+#define IDT_MSG_MASK		(IDT_INMSG_MASK | IDT_OUTMSG_MASK)
+
+/*
+ * Number of IDT NTB resources:
+ * @IDT_MSG_CNT:	Number of Message registers
+ * @IDT_BAR_CNT:	Number of BARs of each port
+ * @IDT_MTBL_ENTRY_CNT:	Number mapping table entries
+ */
+#define IDT_MSG_CNT		4
+#define IDT_BAR_CNT		6
+#define IDT_MTBL_ENTRY_CNT	64
+
+/*
+ * General IDT PCIe-switch constant
+ * @IDT_MAX_NR_PORTS:	Maximum number of ports per IDT PCIe-switch
+ * @IDT_MAX_NR_PARTS:	Maximum number of partitions per IDT PCIe-switch
+ * @IDT_MAX_NR_PEERS:	Maximum number of NT-peers per IDT PCIe-switch
+ * @IDT_MAX_NR_MWS:	Maximum number of Memory Widows
+ * @IDT_PCIE_REGSIZE:	Size of the registers in bytes
+ * @IDT_TRANS_ALIGN:	Alignment of translated base address
+ * @IDT_DIR_SIZE_ALIGN:	Alignment of size setting for direct translated MWs.
+ *			Even though the lower 10 bits are reserved, they are
+ *			treated by IDT as one's so basically there is no any
+ *			alignment of size limit for DIR address translation.
+ */
+#define IDT_MAX_NR_PORTS	24
+#define IDT_MAX_NR_PARTS	8
+#define IDT_MAX_NR_PEERS	8
+#define IDT_MAX_NR_MWS		29
+#define IDT_PCIE_REGSIZE	4
+#define IDT_TRANS_ALIGN		4
+#define IDT_DIR_SIZE_ALIGN	1
+
+/*
+ * IDT Memory Windows type. Depending on the device settings, IDT supports
+ * Direct Address Translation MW registers and Lookup Table registers
+ * @IDT_MW_DIR:		Direct address translation
+ * @IDT_MW_LUT12:	12-entry lookup table entry
+ * @IDT_MW_LUT24:	24-entry lookup table entry
+ *
+ * NOTE These values are exactly the same as one of the BARSETUP ATRAN field
+ */
+enum idt_mw_type {
+	IDT_MW_DIR = 0x0,
+	IDT_MW_LUT12 = 0x1,
+	IDT_MW_LUT24 = 0x2
+};
+
+/*
+ * IDT PCIe-switch model private data
+ * @name:	Device name
+ * @port_cnt:	Total number of NT endpoint ports
+ * @ports:	Port ids
+ */
+struct idt_89hpes_cfg {
+	char *name;
+	unsigned char port_cnt;
+	unsigned char ports[];
+};
+
+/*
+ * Memory window configuration structure
+ * @type:	Type of the memory window (direct address translation or lookup
+ *		table)
+ *
+ * @bar:	PCIe BAR the memory window referenced to
+ * @idx:	Index of the memory window within the BAR
+ *
+ * @addr_align:	Alignment of translated address
+ * @size_align:	Alignment of memory window size
+ * @size_max:	Maximum size of memory window
+ */
+struct idt_mw_cfg {
+	enum idt_mw_type type;
+
+	unsigned char bar;
+	unsigned char idx;
+
+	u64 addr_align;
+	u64 size_align;
+	u64 size_max;
+};
+
+/*
+ * Description structure of peer IDT NT-functions:
+ * @port:		NT-function port
+ * @part:		NT-function partition
+ *
+ * @mw_cnt:		Number of memory windows supported by NT-function
+ * @mws:		Array of memory windows descriptors
+ */
+struct idt_ntb_peer {
+	unsigned char port;
+	unsigned char part;
+
+	unsigned char mw_cnt;
+	struct idt_mw_cfg *mws;
+};
+
+/*
+ * Description structure of local IDT NT-function:
+ * @ntb:		Linux NTB-device description structure
+ * @swcfg:		Pointer to the structure of local IDT PCIe-switch
+ *			specific cofnfigurations
+ *
+ * @port:		Local NT-function port
+ * @part:		Local NT-function partition
+ *
+ * @peer_cnt:		Number of peers with activated NTB-function
+ * @peers:		Array of peers descripting structures
+ * @port_idx_map:	Map of port number -> peer index
+ * @part_idx_map:	Map of partition number -> peer index
+ *
+ * @mtbl_lock:		Mapping table access lock
+ *
+ * @mw_cnt:		Number of memory windows supported by NT-function
+ * @mws:		Array of memory windows descriptors
+ * @lut_lock:		Lookup table access lock
+ *
+ * @msg_locks:		Message registers mapping table lockers
+ *
+ * @cfgspc:		Virtual address of the memory mapped configuration
+ *			space of the NT-function
+ * @db_mask_lock:	Doorbell mask register lock
+ * @msg_mask_lock:	Message mask register lock
+ * @gasa_lock:		GASA registers access lock
+ *
+ * @dbgfs_info:		DebugFS info node
+ */
+struct idt_ntb_dev {
+	struct ntb_dev ntb;
+	struct idt_89hpes_cfg *swcfg;
+
+	unsigned char port;
+	unsigned char part;
+
+	unsigned char peer_cnt;
+	struct idt_ntb_peer peers[IDT_MAX_NR_PEERS];
+	char port_idx_map[IDT_MAX_NR_PORTS];
+	char part_idx_map[IDT_MAX_NR_PARTS];
+
+	spinlock_t mtbl_lock;
+
+	unsigned char mw_cnt;
+	struct idt_mw_cfg *mws;
+	spinlock_t lut_lock;
+
+	spinlock_t msg_locks[IDT_MSG_CNT];
+
+	void __iomem *cfgspc;
+	spinlock_t db_mask_lock;
+	spinlock_t msg_mask_lock;
+	spinlock_t gasa_lock;
+
+	struct dentry *dbgfs_info;
+};
+#define to_ndev_ntb(__ntb) container_of(__ntb, struct idt_ntb_dev, ntb)
+
+/*
+ * Descriptor of the IDT PCIe-switch BAR resources
+ * @setup:	BAR setup register
+ * @limit:	BAR limit register
+ * @ltbase:	Lower translated base address
+ * @utbase:	Upper translated base address
+ */
+struct idt_ntb_bar {
+	unsigned int setup;
+	unsigned int limit;
+	unsigned int ltbase;
+	unsigned int utbase;
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch message resources
+ * @in:		Inbound message register
+ * @out:	Outbound message register
+ * @src:	Source of inbound message register
+ */
+struct idt_ntb_msg {
+	unsigned int in;
+	unsigned int out;
+	unsigned int src;
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch NT-function specific parameters in the
+ * PCI Configuration Space
+ * @bars:	BARs related registers
+ * @msgs:	Messaging related registers
+ */
+struct idt_ntb_regs {
+	struct idt_ntb_bar bars[IDT_BAR_CNT];
+	struct idt_ntb_msg msgs[IDT_MSG_CNT];
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch port specific parameters in the
+ * Global Configuration Space
+ * @pcicmdsts:	 PCI command/status register
+ * @pcielctlsts: PCIe link control/status
+ *
+ * @ctl:	Port control register
+ * @sts:	Port status register
+ *
+ * @bars:	BARs related registers
+ */
+struct idt_ntb_port {
+	unsigned int pcicmdsts;
+	unsigned int pcielctlsts;
+	unsigned int ntctl;
+
+	unsigned int ctl;
+	unsigned int sts;
+
+	struct idt_ntb_bar bars[IDT_BAR_CNT];
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch partition specific parameters.
+ * @ctl:	Partition control register in the Global Address Space
+ * @sts:	Partition status register in the Global Address Space
+ * @msgctl:	Messages control registers
+ */
+struct idt_ntb_part {
+	unsigned int ctl;
+	unsigned int sts;
+	unsigned int msgctl[IDT_MSG_CNT];
+};
+
+#endif /* NTB_HW_IDT_H */
-- 
GitLab


From e9410ff810f330d41dd69aecdd46fd7d4bb9c983 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 9 May 2017 09:33:17 -0500
Subject: [PATCH 0786/1958] ntb: Add a module option to control affinity of DMA
 channels

The DMA channel(s)/memory used to transfer data to an NTB device
may not be required to be on the same node as the device. Add a
module parameter that allows any candidate channel (aside from
node assocation) and allocated memory to be used.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/test/ntb_perf.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 42756a98a728a..0ec9f56f6999d 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -101,6 +101,10 @@ static bool use_dma; /* default to 0 */
 module_param(use_dma, bool, 0644);
 MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
 
+static bool on_node = true; /* default to 1 */
+module_param(on_node, bool, 0644);
+MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)");
+
 struct perf_mw {
 	phys_addr_t	phys_addr;
 	resource_size_t	phys_size;
@@ -345,6 +349,10 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
 
 static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
 {
+	/* Is the channel required to be on the same node as the device? */
+	if (!on_node)
+		return true;
+
 	return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
 }
 
@@ -362,7 +370,7 @@ static int ntb_perf_thread(void *data)
 
 	pr_debug("kthread %s starting...\n", current->comm);
 
-	node = dev_to_node(&pdev->dev);
+	node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
 
 	if (use_dma && !pctx->dma_chan) {
 		dma_cap_mask_t dma_mask;
@@ -682,7 +690,8 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
 		pr_info("Fix run_order to %u\n", run_order);
 	}
 
-	node = dev_to_node(&perf->ntb->pdev->dev);
+	node = on_node ? dev_to_node(&perf->ntb->pdev->dev)
+		       : NUMA_NO_NODE;
 	atomic_set(&perf->tdone, 0);
 
 	/* launch kernel thread */
@@ -779,8 +788,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
 	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
 		dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
 
-	node = dev_to_node(&pdev->dev);
-
+	node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
 	perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
 	if (!perf) {
 		rc = -ENOMEM;
-- 
GitLab


From 0b93a6dbec96be880a3d58a683669b4a75beee1d Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 9 May 2017 09:33:28 -0500
Subject: [PATCH 0787/1958] ntb: Remove debug-fs variables from the context
 structure

The Debug FS entries manage themselves; we don't need to hang onto
them in the context structure.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/test/ntb_perf.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 0ec9f56f6999d..60e0dd320aefe 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -140,9 +140,6 @@ struct perf_ctx {
 	bool			link_is_up;
 	struct delayed_work	link_work;
 	wait_queue_head_t	link_wq;
-	struct dentry		*debugfs_node_dir;
-	struct dentry		*debugfs_run;
-	struct dentry		*debugfs_threads;
 	u8			perf_threads;
 	/* mutex ensures only one set of threads run at once */
 	struct mutex		run_mutex;
@@ -737,6 +734,9 @@ static const struct file_operations ntb_perf_debugfs_run = {
 static int perf_debugfs_setup(struct perf_ctx *perf)
 {
 	struct pci_dev *pdev = perf->ntb->pdev;
+	struct dentry *debugfs_node_dir;
+	struct dentry *debugfs_run;
+	struct dentry *debugfs_threads;
 
 	if (!debugfs_initialized())
 		return -ENODEV;
@@ -747,21 +747,21 @@ static int perf_debugfs_setup(struct perf_ctx *perf)
 			return -ENODEV;
 	}
 
-	perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
-						    perf_debugfs_dir);
-	if (!perf->debugfs_node_dir)
+	debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+					      perf_debugfs_dir);
+	if (!debugfs_node_dir)
 		return -ENODEV;
 
-	perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
-						perf->debugfs_node_dir, perf,
-						&ntb_perf_debugfs_run);
-	if (!perf->debugfs_run)
+	debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+					  debugfs_node_dir, perf,
+					  &ntb_perf_debugfs_run);
+	if (!debugfs_run)
 		return -ENODEV;
 
-	perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
-						  perf->debugfs_node_dir,
-						  &perf->perf_threads);
-	if (!perf->debugfs_threads)
+	debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+					    debugfs_node_dir,
+					    &perf->perf_threads);
+	if (!debugfs_threads)
 		return -ENODEV;
 
 	return 0;
-- 
GitLab


From 8407dd6c16c0d92432323c0ce8daecd13e424703 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 9 May 2017 09:33:36 -0500
Subject: [PATCH 0788/1958] ntb: Add more debugfs support for ntb_perf testing
 options

The ntb_perf tool uses module parameters to control the
characteristics of its test.  Enable the changing of these
options through debugfs, and eliminating the need to unload
and reload the module to make changes and run additional tests.

Add a new module parameter that forces the DMA channel
selection onto the same node as the NTB device (default: true).

 - seg_order: Size of the NTB memory window; power of 2.
 - run_order: Size of the data buffer; power of 2.
 - use_dma:   Use DMA or memcpy? Default: 0.
 - on_node:   Only use DMA channel(s) on the NTB node. Default: true.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/test/ntb_perf.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 60e0dd320aefe..6fdddf86269ef 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -737,6 +737,10 @@ static int perf_debugfs_setup(struct perf_ctx *perf)
 	struct dentry *debugfs_node_dir;
 	struct dentry *debugfs_run;
 	struct dentry *debugfs_threads;
+	struct dentry *debugfs_seg_order;
+	struct dentry *debugfs_run_order;
+	struct dentry *debugfs_use_dma;
+	struct dentry *debugfs_on_node;
 
 	if (!debugfs_initialized())
 		return -ENODEV;
@@ -764,6 +768,30 @@ static int perf_debugfs_setup(struct perf_ctx *perf)
 	if (!debugfs_threads)
 		return -ENODEV;
 
+	debugfs_seg_order = debugfs_create_u32("seg_order", 0600,
+					       debugfs_node_dir,
+					       &seg_order);
+	if (!debugfs_seg_order)
+		return -ENODEV;
+
+	debugfs_run_order = debugfs_create_u32("run_order", 0600,
+					       debugfs_node_dir,
+					       &run_order);
+	if (!debugfs_run_order)
+		return -ENODEV;
+
+	debugfs_use_dma = debugfs_create_bool("use_dma", 0600,
+					       debugfs_node_dir,
+					       &use_dma);
+	if (!debugfs_use_dma)
+		return -ENODEV;
+
+	debugfs_on_node = debugfs_create_bool("on_node", 0600,
+					      debugfs_node_dir,
+					      &on_node);
+	if (!debugfs_on_node)
+		return -ENODEV;
+
 	return 0;
 }
 
-- 
GitLab


From 32e0f5bfa5aa2a74264160e0990df3af125ca6e2 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Mon, 15 May 2017 10:33:27 -0500
Subject: [PATCH 0789/1958] ntb: Add error path/handling to Debug FS entry
 creation

If a failure occurs when creating Debug FS entries, unroll all of
the work that's been done.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/test/ntb_perf.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 6fdddf86269ef..759f772fa00c6 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -745,6 +745,7 @@ static int perf_debugfs_setup(struct perf_ctx *perf)
 	if (!debugfs_initialized())
 		return -ENODEV;
 
+	/* Assumpion: only one NTB device in the system */
 	if (!perf_debugfs_dir) {
 		perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
 		if (!perf_debugfs_dir)
@@ -754,45 +755,50 @@ static int perf_debugfs_setup(struct perf_ctx *perf)
 	debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
 					      perf_debugfs_dir);
 	if (!debugfs_node_dir)
-		return -ENODEV;
+		goto err;
 
 	debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
 					  debugfs_node_dir, perf,
 					  &ntb_perf_debugfs_run);
 	if (!debugfs_run)
-		return -ENODEV;
+		goto err;
 
 	debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
 					    debugfs_node_dir,
 					    &perf->perf_threads);
 	if (!debugfs_threads)
-		return -ENODEV;
+		goto err;
 
 	debugfs_seg_order = debugfs_create_u32("seg_order", 0600,
 					       debugfs_node_dir,
 					       &seg_order);
 	if (!debugfs_seg_order)
-		return -ENODEV;
+		goto err;
 
 	debugfs_run_order = debugfs_create_u32("run_order", 0600,
 					       debugfs_node_dir,
 					       &run_order);
 	if (!debugfs_run_order)
-		return -ENODEV;
+		goto err;
 
 	debugfs_use_dma = debugfs_create_bool("use_dma", 0600,
 					       debugfs_node_dir,
 					       &use_dma);
 	if (!debugfs_use_dma)
-		return -ENODEV;
+		goto err;
 
 	debugfs_on_node = debugfs_create_bool("on_node", 0600,
 					      debugfs_node_dir,
 					      &on_node);
 	if (!debugfs_on_node)
-		return -ENODEV;
+		goto err;
 
 	return 0;
+
+err:
+	debugfs_remove_recursive(perf_debugfs_dir);
+	perf_debugfs_dir = NULL;
+	return -ENODEV;
 }
 
 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
-- 
GitLab


From 854b1dd9c39d8c8c8647a44de47ef18506ae11f9 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 5 Jun 2017 14:00:51 -0600
Subject: [PATCH 0790/1958] ntb_netdev: set the net_device's parent

At present, ntb_netdev devices end up under /sys/devices/virtual/net
completely unconnected to the ntb trees below them. This patch sets the
parent of the net_device (using SET_NETDEV_DEV) to the client_dev
device. This results in a better connected sysfs path for the network
device:

/sys/devices/pci0000:00/0000:00:03.0/0000:03:00.1/0000:03:00.1/ntb_netdev0/net/eth2

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/net/ntb_netdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 4daf3d0926a82..0250aa9ae2cbc 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -418,6 +418,8 @@ static int ntb_netdev_probe(struct device *client_dev)
 	if (!ndev)
 		return -ENOMEM;
 
+	SET_NETDEV_DEV(ndev, client_dev);
+
 	dev = netdev_priv(ndev);
 	dev->ndev = ndev;
 	dev->pdev = pdev;
-- 
GitLab


From 615d22a51c04856efe62af6e1d5b450aaf5cc2c0 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 6 Jul 2017 20:21:15 +0900
Subject: [PATCH 0791/1958] block: Fix __blkdev_issue_zeroout loop

The BIO issuing loop in __blkdev_issue_zeroout() is allocating BIOs
with a maximum number of bvec (pages) equal to

min(nr_sects, (sector_t)BIO_MAX_PAGES)

This works since the requested number of bvecs will always be limited
to the absolute maximum number supported (BIO_MAX_PAGES), but this is
ineficient as too many bvec entries may be requested due to the
different units being used in the min() operation (number of sectors vs
number of pages).
To fix this, introduce the helper __blkdev_sectors_to_bio_pages() to
correctly calculate the number of bvecs for zeroout BIOs as the issuing
loop progresses. The calculation is done using consistent units and
makes sure that the number of pages return is at least 1 (for cases
where the number of sectors is less that the number of sectors in
a page).

Also remove a trailing space after the bit shift in the internal loop
min() call.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index e8caecd71688e..3fe0aec905972 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -261,6 +261,19 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
 	return 0;
 }
 
+/*
+ * Convert a number of 512B sectors to a number of pages.
+ * The result is limited to a number of pages that can fit into a BIO.
+ * Also make sure that the result is always at least 1 (page) for the cases
+ * where nr_sects is lower than the number of sectors in a page.
+ */
+static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
+{
+	sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
+
+	return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
+}
+
 /**
  * __blkdev_issue_zeroout - generate number of zero filed write bios
  * @bdev:	blockdev to issue
@@ -307,18 +320,18 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 
 	ret = 0;
 	while (nr_sects != 0) {
-		bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
-				gfp_mask);
+		bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+			       gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio->bi_bdev   = bdev;
 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		while (nr_sects != 0) {
-			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
-			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
+			sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
+			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
 			nr_sects -= bi_size >> 9;
 			sector += bi_size >> 9;
-			if (bi_size < (sz << 9))
+			if (bi_size < sz)
 				break;
 		}
 		cond_resched();
-- 
GitLab


From db2d153d7a3d64618659f19e645a3351d3a732f3 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Thu, 6 Jul 2017 18:00:07 +0300
Subject: [PATCH 0792/1958] null_blk: fix error flow for shared tags during
 module_init

In case we use shared tags feature, blk_mq_alloc_tag_set might fail
during module initialization. In that case, fail the load with a
suitable error code. Also move the tagset initialization process after
defining the amount of submission queues.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 71f4422eba815..85c24cace9732 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -844,9 +844,6 @@ static int __init null_init(void)
 		queue_mode = NULL_Q_MQ;
 	}
 
-	if (queue_mode == NULL_Q_MQ && shared_tags)
-		null_init_tag_set(&tag_set);
-
 	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
 		if (submit_queues < nr_online_nodes) {
 			pr_warn("null_blk: submit_queues param is set to %u.",
@@ -858,11 +855,19 @@ static int __init null_init(void)
 	else if (!submit_queues)
 		submit_queues = 1;
 
+	if (queue_mode == NULL_Q_MQ && shared_tags) {
+		ret = null_init_tag_set(&tag_set);
+		if (ret)
+			return ret;
+	}
+
 	mutex_init(&lock);
 
 	null_major = register_blkdev(0, "nullb");
-	if (null_major < 0)
-		return null_major;
+	if (null_major < 0) {
+		ret = null_major;
+		goto err_tagset;
+	}
 
 	if (use_lightnvm) {
 		ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
@@ -891,6 +896,9 @@ static int __init null_init(void)
 	kmem_cache_destroy(ppa_cache);
 err_ppa:
 	unregister_blkdev(null_major, "nullb");
+err_tagset:
+	if (queue_mode == NULL_Q_MQ && shared_tags)
+		blk_mq_free_tag_set(&tag_set);
 	return ret;
 }
 
-- 
GitLab


From 0e2ff11311d1d4c85dd9ad9ba4347775f628e94a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 6 Jul 2017 10:32:20 -0700
Subject: [PATCH 0793/1958] cciss: initialize struct scsi_req
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The changes in "block: Make most scsi_req_init() calls implicit" mean
that every driver that supports the generic scsi ioctls needs to
call scsi_req_init on newly allocated requests, but that commit didn't
add the call to the ccіss driver.  Fix that to avoid crashes when
udev issues SG_IO commands.

Fixes: ca18d6f7 ("block: Make most scsi_req_init() calls implicit")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/cciss.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 02a611993bb4b..678af946be30c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1944,6 +1944,13 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
 	return;
 }
 
+static void cciss_initialize_rq(struct request *rq)
+{
+	struct scsi_request *sreq = blk_mq_rq_to_pdu(rq);
+
+	scsi_req_init(sreq);
+}
+
 /*
  * cciss_add_disk sets up the block device queue for a logical drive
  */
@@ -1956,6 +1963,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 
 	disk->queue->cmd_size = sizeof(struct scsi_request);
 	disk->queue->request_fn = do_cciss_request;
+	disk->queue->initialize_rq_fn = cciss_initialize_rq;
 	disk->queue->queue_lock = &h->lock;
 	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
 	if (blk_init_allocated_queue(disk->queue) < 0)
-- 
GitLab


From e48585dec2bd0183436437995e30d20dfa92f6aa Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 5 Jun 2017 17:57:33 +0200
Subject: [PATCH 0794/1958] rtc: ds1307: factor out century bit handling

The driver has lots of places with chip-specific code what doesn't
necessarily facilitate maintenance.

Let's describe chip-specific differences in century bit handling
in struct chip_desc to improve this.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 73 +++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 46 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 9b4106a7e8e4e..922675281b7ae 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -136,6 +136,9 @@ struct chip_desc {
 	unsigned		alarm:1;
 	u16			nvram_offset;
 	u16			nvram_size;
+	u8			century_reg;
+	u8			century_enable_bit;
+	u8			century_bit;
 	u16			trickle_charger_reg;
 	u8			trickle_charger_setup;
 	u8			(*do_trickle_setup)(struct ds1307 *, uint32_t,
@@ -151,6 +154,8 @@ static struct chip_desc chips[last_ds_type] = {
 	},
 	[ds_1337] = {
 		.alarm		= 1,
+		.century_reg	= DS1307_REG_MONTH,
+		.century_bit	= DS1337_BIT_CENTURY,
 	},
 	[ds_1338] = {
 		.nvram_offset	= 8,
@@ -158,10 +163,15 @@ static struct chip_desc chips[last_ds_type] = {
 	},
 	[ds_1339] = {
 		.alarm		= 1,
+		.century_reg	= DS1307_REG_MONTH,
+		.century_bit	= DS1337_BIT_CENTURY,
 		.trickle_charger_reg = 0x10,
 		.do_trickle_setup = &do_trickle_setup_ds1339,
 	},
 	[ds_1340] = {
+		.century_reg	= DS1307_REG_HOUR,
+		.century_enable_bit = DS1340_BIT_CENTURY_EN,
+		.century_bit	= DS1340_BIT_CENTURY,
 		.trickle_charger_reg = 0x08,
 	},
 	[ds_1388] = {
@@ -169,6 +179,8 @@ static struct chip_desc chips[last_ds_type] = {
 	},
 	[ds_3231] = {
 		.alarm		= 1,
+		.century_reg	= DS1307_REG_MONTH,
+		.century_bit	= DS1337_BIT_CENTURY,
 	},
 	[rx_8130] = {
 		.alarm		= 1,
@@ -328,6 +340,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 {
 	struct ds1307	*ds1307 = dev_get_drvdata(dev);
 	int		tmp, ret;
+	const struct chip_desc *chip = &chips[ds1307->type];
 
 	/* read the RTC date and time registers all at once */
 	ret = regmap_bulk_read(ds1307->regmap, ds1307->offset, ds1307->regs, 7);
@@ -355,22 +368,9 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 	t->tm_mon = bcd2bin(tmp) - 1;
 	t->tm_year = bcd2bin(ds1307->regs[DS1307_REG_YEAR]) + 100;
 
-#ifdef CONFIG_RTC_DRV_DS1307_CENTURY
-	switch (ds1307->type) {
-	case ds_1337:
-	case ds_1339:
-	case ds_3231:
-		if (ds1307->regs[DS1307_REG_MONTH] & DS1337_BIT_CENTURY)
-			t->tm_year += 100;
-		break;
-	case ds_1340:
-		if (ds1307->regs[DS1307_REG_HOUR] & DS1340_BIT_CENTURY)
-			t->tm_year += 100;
-		break;
-	default:
-		break;
-	}
-#endif
+	if (ds1307->regs[chip->century_reg] & chip->century_bit &&
+	    IS_ENABLED(CONFIG_RTC_DRV_DS1307_CENTURY))
+		t->tm_year += 100;
 
 	dev_dbg(dev, "%s secs=%d, mins=%d, "
 		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -385,6 +385,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 {
 	struct ds1307	*ds1307 = dev_get_drvdata(dev);
+	const struct chip_desc *chip = &chips[ds1307->type];
 	int		result;
 	int		tmp;
 	u8		*buf = ds1307->regs;
@@ -395,24 +396,14 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 		t->tm_hour, t->tm_mday,
 		t->tm_mon, t->tm_year, t->tm_wday);
 
-#ifdef CONFIG_RTC_DRV_DS1307_CENTURY
 	if (t->tm_year < 100)
 		return -EINVAL;
 
-	switch (ds1307->type) {
-	case ds_1337:
-	case ds_1339:
-	case ds_3231:
-	case ds_1340:
-		if (t->tm_year > 299)
-			return -EINVAL;
-	default:
-		if (t->tm_year > 199)
-			return -EINVAL;
-		break;
-	}
+#ifdef CONFIG_RTC_DRV_DS1307_CENTURY
+	if (t->tm_year > (chip->century_bit ? 299 : 199))
+		return -EINVAL;
 #else
-	if (t->tm_year < 100 || t->tm_year > 199)
+	if (t->tm_year > 199)
 		return -EINVAL;
 #endif
 
@@ -427,19 +418,12 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 	tmp = t->tm_year - 100;
 	buf[DS1307_REG_YEAR] = bin2bcd(tmp);
 
-	switch (ds1307->type) {
-	case ds_1337:
-	case ds_1339:
-	case ds_3231:
-		if (t->tm_year > 199)
-			buf[DS1307_REG_MONTH] |= DS1337_BIT_CENTURY;
-		break;
-	case ds_1340:
-		buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN;
-		if (t->tm_year > 199)
-			buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY;
-		break;
-	case mcp794xx:
+	if (chip->century_enable_bit)
+		buf[chip->century_reg] |= chip->century_enable_bit;
+	if (t->tm_year > 199 && chip->century_bit)
+		buf[chip->century_reg] |= chip->century_bit;
+
+	if (ds1307->type == mcp794xx) {
 		/*
 		 * these bits were cleared when preparing the date/time
 		 * values and need to be set again before writing the
@@ -447,9 +431,6 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 		 */
 		buf[DS1307_REG_SECS] |= MCP794XX_BIT_ST;
 		buf[DS1307_REG_WDAY] |= MCP794XX_BIT_VBATEN;
-		break;
-	default:
-		break;
 	}
 
 	dev_dbg(dev, "%s: %7ph\n", "write", buf);
-- 
GitLab


From ac05fba39cc5a959e5d9ca24a059d884db0118a5 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 30 May 2017 09:53:30 +0200
Subject: [PATCH 0795/1958] rtc: gemini: Add optional clock handling

This makes the Gemini optionally take two clock references to
the PCLK and EXTCLK. As we are adding a clock framework to the
Gemini platform we need to make sure that we get the right
references.

Acked-by: Hans Ulli Kroll <ulli.kroll@googlemail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-gemini.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/rtc/rtc-gemini.c b/drivers/rtc/rtc-gemini.c
index 5279390bb42da..cf766e01832ad 100644
--- a/drivers/rtc/rtc-gemini.c
+++ b/drivers/rtc/rtc-gemini.c
@@ -26,6 +26,7 @@
 #include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/clk.h>
 
 #define DRV_NAME        "rtc-gemini"
 
@@ -38,6 +39,8 @@ struct gemini_rtc {
 	struct rtc_device	*rtc_dev;
 	void __iomem		*rtc_base;
 	int			rtc_irq;
+	struct clk		*pclk;
+	struct clk		*extclk;
 };
 
 enum gemini_rtc_offsets {
@@ -127,6 +130,27 @@ static int gemini_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	platform_set_drvdata(pdev, rtc);
 
+	rtc->pclk = devm_clk_get(dev, "PCLK");
+	if (IS_ERR(rtc->pclk)) {
+		dev_err(dev, "could not get PCLK\n");
+	} else {
+		ret = clk_prepare_enable(rtc->pclk);
+		if (ret) {
+			dev_err(dev, "failed to enable PCLK\n");
+			return ret;
+		}
+	}
+	rtc->extclk = devm_clk_get(dev, "EXTCLK");
+	if (IS_ERR(rtc->extclk)) {
+		dev_err(dev, "could not get EXTCLK\n");
+	} else {
+		ret = clk_prepare_enable(rtc->extclk);
+		if (ret) {
+			dev_err(dev, "failed to enable EXTCLK\n");
+			return ret;
+		}
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!res)
 		return -ENODEV;
@@ -156,6 +180,10 @@ static int gemini_rtc_remove(struct platform_device *pdev)
 {
 	struct gemini_rtc *rtc = platform_get_drvdata(pdev);
 
+	if (!IS_ERR(rtc->extclk))
+		clk_disable_unprepare(rtc->extclk);
+	if (!IS_ERR(rtc->pclk))
+		clk_disable_unprepare(rtc->pclk);
 	rtc_device_unregister(rtc->rtc_dev);
 
 	return 0;
-- 
GitLab


From 7f1e988dffbd808ad17f22b6b88a9aa42ebe739a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 30 May 2017 09:53:31 +0200
Subject: [PATCH 0796/1958] rtc: gemini: Augment DT bindings for Faraday

The Gemini RTC is actually a standard IP block from Faraday
Technology called FTRTC010. Rename the bindings, add the
generic compatible string and add definitions for the two
available clocks.

Cc: devicetree@vger.kernel.org
Cc: Po-Yu Chuang <ratbert@faraday-tech.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 .../bindings/rtc/cortina,gemini.txt           | 14 ----------
 .../bindings/rtc/faraday,ftrtc010.txt         | 28 +++++++++++++++++++
 2 files changed, 28 insertions(+), 14 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/rtc/cortina,gemini.txt
 create mode 100644 Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt

diff --git a/Documentation/devicetree/bindings/rtc/cortina,gemini.txt b/Documentation/devicetree/bindings/rtc/cortina,gemini.txt
deleted file mode 100644
index 4ce4e794ddbbf..0000000000000
--- a/Documentation/devicetree/bindings/rtc/cortina,gemini.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-* Cortina Systems Gemini RTC
-
-Gemini SoC real-time clock.
-
-Required properties:
-- compatible : Should be "cortina,gemini-rtc"
-
-Examples:
-
-rtc@45000000 {
-	compatible = "cortina,gemini-rtc";
-	reg = <0x45000000 0x100>;
-	interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
-};
diff --git a/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt b/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt
new file mode 100644
index 0000000000000..e3938f5e0b6c2
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt
@@ -0,0 +1,28 @@
+* Faraday Technology FTRTC010 Real Time Clock
+
+This RTC appears in for example the Storlink Gemini family of
+SoCs.
+
+Required properties:
+- compatible : Should be one of:
+  "faraday,ftrtc010"
+  "cortina,gemini-rtc", "faraday,ftrtc010"
+
+Optional properties:
+- clocks: when present should contain clock references to the
+  PCLK and EXTCLK clocks. Faraday calls the later CLK1HZ and
+  says the clock should be 1 Hz, but implementers actually seem
+  to choose different clocks here, like Cortina who chose
+  32768 Hz (a typical low-power clock).
+- clock-names: should name the clocks "PCLK" and "EXTCLK"
+  respectively.
+
+Examples:
+
+rtc@45000000 {
+	compatible = "cortina,gemini-rtc";
+	reg = <0x45000000 0x100>;
+	interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&foo 0>, <&foo 1>;
+	clock-names = "PCLK", "EXTCLK";
+};
-- 
GitLab


From 1d61d2592c1f70d74112ed1ee8f182694dc43c48 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 30 May 2017 09:53:32 +0200
Subject: [PATCH 0797/1958] rtc: gemini/ftrtc010: rename driver and symbols

The Gemini RTC is actually a generic IP block from Faraday
Technology names FTRTC010. Rename the driver file and all
symbols to match this IP name.

The relationship can be clearly seen in the U-Boot driver
posted by Po-Yu Chuang for the Faraday A320 board:
https://lists.denx.de/pipermail/u-boot/2009-September/061326.html

Remove the dependency on ARCH_GEMINI but select the driver
for ARCH_GEMINI so we get a smooth transition. The IP block
is synthsized on different silicon and architectures.

Cc: Po-Yu Chuang <ratbert@faraday-tech.com>
Acked-by: Hans Ulli Kroll <ulli.kroll@googlemail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 MAINTAINERS                                  |  2 +-
 drivers/rtc/Kconfig                          | 10 +--
 drivers/rtc/Makefile                         |  2 +-
 drivers/rtc/{rtc-gemini.c => rtc-ftrtc010.c} | 91 ++++++++++----------
 4 files changed, 53 insertions(+), 52 deletions(-)
 rename drivers/rtc/{rtc-gemini.c => rtc-ftrtc010.c} (62%)

diff --git a/MAINTAINERS b/MAINTAINERS
index f7d568b8f133d..c6f0f412b32ef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1226,7 +1226,7 @@ L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:	git git://github.com/ulli-kroll/linux.git
 S:	Maintained
 F:	arch/arm/mach-gemini/
-F:	drivers/rtc/rtc-gemini.c
+F:	drivers/rtc/rtc-ftrtc010.c
 
 ARM/CSR SIRFPRIMA2 MACHINE SUPPORT
 M:	Barry Song <baohua@kernel.org>
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 8e1aa67fe533f..0e5044cd84dda 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1495,16 +1495,16 @@ config RTC_DRV_ARMADA38X
 	  This driver can also be built as a module. If so, the module
 	  will be called armada38x-rtc.
 
-config RTC_DRV_GEMINI
-	tristate "Gemini SoC RTC"
-	depends on ARCH_GEMINI || COMPILE_TEST
+config RTC_DRV_FTRTC010
+	tristate "Faraday Technology FTRTC010 RTC"
 	depends on HAS_IOMEM
+	default ARCH_GEMINI
 	help
 	  If you say Y here you will get support for the
-	  RTC found on Gemini SoC's.
+	  Faraday Technolog FTRTC010 found on e.g. Gemini SoC's.
 
 	  This driver can also be built as a module. If so, the module
-	  will be called rtc-gemini.
+	  will be called rtc-ftrtc010.
 
 config RTC_DRV_PS3
 	tristate "PS3 RTC"
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index df89cac1f9ae7..4050fc8b9271b 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -68,7 +68,7 @@ obj-$(CONFIG_RTC_DRV_EFI)	+= rtc-efi.o
 obj-$(CONFIG_RTC_DRV_EM3027)	+= rtc-em3027.o
 obj-$(CONFIG_RTC_DRV_EP93XX)	+= rtc-ep93xx.o
 obj-$(CONFIG_RTC_DRV_FM3130)	+= rtc-fm3130.o
-obj-$(CONFIG_RTC_DRV_GEMINI)	+= rtc-gemini.o
+obj-$(CONFIG_RTC_DRV_FTRTC010)	+= rtc-ftrtc010.o
 obj-$(CONFIG_RTC_DRV_GENERIC)	+= rtc-generic.o
 obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o
 obj-$(CONFIG_RTC_DRV_HYM8563)	+= rtc-hym8563.o
diff --git a/drivers/rtc/rtc-gemini.c b/drivers/rtc/rtc-ftrtc010.c
similarity index 62%
rename from drivers/rtc/rtc-gemini.c
rename to drivers/rtc/rtc-ftrtc010.c
index cf766e01832ad..af8d6beae20c6 100644
--- a/drivers/rtc/rtc-gemini.c
+++ b/drivers/rtc/rtc-ftrtc010.c
@@ -1,5 +1,5 @@
 /*
- *  Gemini OnChip RTC
+ *  Faraday Technology FTRTC010 driver
  *
  *  Copyright (C) 2009 Janos Laube <janos.dev@gmail.com>
  *
@@ -28,14 +28,14 @@
 #include <linux/module.h>
 #include <linux/clk.h>
 
-#define DRV_NAME        "rtc-gemini"
+#define DRV_NAME        "rtc-ftrtc010"
 
 MODULE_AUTHOR("Hans Ulli Kroll <ulli.kroll@googlemail.com>");
 MODULE_DESCRIPTION("RTC driver for Gemini SoC");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" DRV_NAME);
 
-struct gemini_rtc {
+struct ftrtc010_rtc {
 	struct rtc_device	*rtc_dev;
 	void __iomem		*rtc_base;
 	int			rtc_irq;
@@ -43,19 +43,19 @@ struct gemini_rtc {
 	struct clk		*extclk;
 };
 
-enum gemini_rtc_offsets {
-	GEMINI_RTC_SECOND	= 0x00,
-	GEMINI_RTC_MINUTE	= 0x04,
-	GEMINI_RTC_HOUR		= 0x08,
-	GEMINI_RTC_DAYS		= 0x0C,
-	GEMINI_RTC_ALARM_SECOND	= 0x10,
-	GEMINI_RTC_ALARM_MINUTE	= 0x14,
-	GEMINI_RTC_ALARM_HOUR	= 0x18,
-	GEMINI_RTC_RECORD	= 0x1C,
-	GEMINI_RTC_CR		= 0x20
+enum ftrtc010_rtc_offsets {
+	FTRTC010_RTC_SECOND		= 0x00,
+	FTRTC010_RTC_MINUTE		= 0x04,
+	FTRTC010_RTC_HOUR		= 0x08,
+	FTRTC010_RTC_DAYS		= 0x0C,
+	FTRTC010_RTC_ALARM_SECOND	= 0x10,
+	FTRTC010_RTC_ALARM_MINUTE	= 0x14,
+	FTRTC010_RTC_ALARM_HOUR		= 0x18,
+	FTRTC010_RTC_RECORD		= 0x1C,
+	FTRTC010_RTC_CR			= 0x20,
 };
 
-static irqreturn_t gemini_rtc_interrupt(int irq, void *dev)
+static irqreturn_t ftrtc010_rtc_interrupt(int irq, void *dev)
 {
 	return IRQ_HANDLED;
 }
@@ -69,18 +69,18 @@ static irqreturn_t gemini_rtc_interrupt(int irq, void *dev)
  * the same thing, without the rtc-lib.c calls.
  */
 
-static int gemini_rtc_read_time(struct device *dev, struct rtc_time *tm)
+static int ftrtc010_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	struct gemini_rtc *rtc = dev_get_drvdata(dev);
+	struct ftrtc010_rtc *rtc = dev_get_drvdata(dev);
 
 	unsigned int  days, hour, min, sec;
 	unsigned long offset, time;
 
-	sec  = readl(rtc->rtc_base + GEMINI_RTC_SECOND);
-	min  = readl(rtc->rtc_base + GEMINI_RTC_MINUTE);
-	hour = readl(rtc->rtc_base + GEMINI_RTC_HOUR);
-	days = readl(rtc->rtc_base + GEMINI_RTC_DAYS);
-	offset = readl(rtc->rtc_base + GEMINI_RTC_RECORD);
+	sec  = readl(rtc->rtc_base + FTRTC010_RTC_SECOND);
+	min  = readl(rtc->rtc_base + FTRTC010_RTC_MINUTE);
+	hour = readl(rtc->rtc_base + FTRTC010_RTC_HOUR);
+	days = readl(rtc->rtc_base + FTRTC010_RTC_DAYS);
+	offset = readl(rtc->rtc_base + FTRTC010_RTC_RECORD);
 
 	time = offset + days * 86400 + hour * 3600 + min * 60 + sec;
 
@@ -89,9 +89,9 @@ static int gemini_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	return 0;
 }
 
-static int gemini_rtc_set_time(struct device *dev, struct rtc_time *tm)
+static int ftrtc010_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
-	struct gemini_rtc *rtc = dev_get_drvdata(dev);
+	struct ftrtc010_rtc *rtc = dev_get_drvdata(dev);
 	unsigned int sec, min, hour, day;
 	unsigned long offset, time;
 
@@ -100,27 +100,27 @@ static int gemini_rtc_set_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_tm_to_time(tm, &time);
 
-	sec = readl(rtc->rtc_base + GEMINI_RTC_SECOND);
-	min = readl(rtc->rtc_base + GEMINI_RTC_MINUTE);
-	hour = readl(rtc->rtc_base + GEMINI_RTC_HOUR);
-	day = readl(rtc->rtc_base + GEMINI_RTC_DAYS);
+	sec = readl(rtc->rtc_base + FTRTC010_RTC_SECOND);
+	min = readl(rtc->rtc_base + FTRTC010_RTC_MINUTE);
+	hour = readl(rtc->rtc_base + FTRTC010_RTC_HOUR);
+	day = readl(rtc->rtc_base + FTRTC010_RTC_DAYS);
 
 	offset = time - (day * 86400 + hour * 3600 + min * 60 + sec);
 
-	writel(offset, rtc->rtc_base + GEMINI_RTC_RECORD);
-	writel(0x01, rtc->rtc_base + GEMINI_RTC_CR);
+	writel(offset, rtc->rtc_base + FTRTC010_RTC_RECORD);
+	writel(0x01, rtc->rtc_base + FTRTC010_RTC_CR);
 
 	return 0;
 }
 
-static const struct rtc_class_ops gemini_rtc_ops = {
-	.read_time     = gemini_rtc_read_time,
-	.set_time      = gemini_rtc_set_time,
+static const struct rtc_class_ops ftrtc010_rtc_ops = {
+	.read_time     = ftrtc010_rtc_read_time,
+	.set_time      = ftrtc010_rtc_set_time,
 };
 
-static int gemini_rtc_probe(struct platform_device *pdev)
+static int ftrtc010_rtc_probe(struct platform_device *pdev)
 {
-	struct gemini_rtc *rtc;
+	struct ftrtc010_rtc *rtc;
 	struct device *dev = &pdev->dev;
 	struct resource *res;
 	int ret;
@@ -166,19 +166,19 @@ static int gemini_rtc_probe(struct platform_device *pdev)
 	if (!rtc->rtc_base)
 		return -ENOMEM;
 
-	ret = devm_request_irq(dev, rtc->rtc_irq, gemini_rtc_interrupt,
+	ret = devm_request_irq(dev, rtc->rtc_irq, ftrtc010_rtc_interrupt,
 			       IRQF_SHARED, pdev->name, dev);
 	if (unlikely(ret))
 		return ret;
 
 	rtc->rtc_dev = rtc_device_register(pdev->name, dev,
-					   &gemini_rtc_ops, THIS_MODULE);
+					   &ftrtc010_rtc_ops, THIS_MODULE);
 	return PTR_ERR_OR_ZERO(rtc->rtc_dev);
 }
 
-static int gemini_rtc_remove(struct platform_device *pdev)
+static int ftrtc010_rtc_remove(struct platform_device *pdev)
 {
-	struct gemini_rtc *rtc = platform_get_drvdata(pdev);
+	struct ftrtc010_rtc *rtc = platform_get_drvdata(pdev);
 
 	if (!IS_ERR(rtc->extclk))
 		clk_disable_unprepare(rtc->extclk);
@@ -189,19 +189,20 @@ static int gemini_rtc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id gemini_rtc_dt_match[] = {
+static const struct of_device_id ftrtc010_rtc_dt_match[] = {
 	{ .compatible = "cortina,gemini-rtc" },
+	{ .compatible = "faraday,ftrtc010" },
 	{ }
 };
-MODULE_DEVICE_TABLE(of, gemini_rtc_dt_match);
+MODULE_DEVICE_TABLE(of, ftrtc010_rtc_dt_match);
 
-static struct platform_driver gemini_rtc_driver = {
+static struct platform_driver ftrtc010_rtc_driver = {
 	.driver		= {
 		.name	= DRV_NAME,
-		.of_match_table = gemini_rtc_dt_match,
+		.of_match_table = ftrtc010_rtc_dt_match,
 	},
-	.probe		= gemini_rtc_probe,
-	.remove		= gemini_rtc_remove,
+	.probe		= ftrtc010_rtc_probe,
+	.remove		= ftrtc010_rtc_remove,
 };
 
-module_platform_driver_probe(gemini_rtc_driver, gemini_rtc_probe);
+module_platform_driver_probe(ftrtc010_rtc_driver, ftrtc010_rtc_probe);
-- 
GitLab


From d0a67c372df410b579197ea818596001fe20070d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 Jun 2017 11:29:00 +0300
Subject: [PATCH 0798/1958] rtc: rtc-nuc900: fix loop timeout test

We should change this post-op to a pre-op because we want the loop to
exit with "timeout" set to zero.

Fixes: 0a89b55364e0 ("nuc900/rtc: change the waiting for device ready implement")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-nuc900.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index b1b6b3041bfbc..4ed81117cf5f3 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c
@@ -93,7 +93,7 @@ static int *check_rtc_access_enable(struct nuc900_rtc *nuc900_rtc)
 	__raw_writel(AERPOWERON, nuc900_rtc->rtc_reg + REG_RTC_AER);
 
 	while (!(__raw_readl(nuc900_rtc->rtc_reg + REG_RTC_AER) & AERRWENB)
-								&& timeout--)
+								&& --timeout)
 		mdelay(1);
 
 	if (!timeout)
-- 
GitLab


From c35c4195f919a077ce106ec84cfcecbb0b67db30 Mon Sep 17 00:00:00 2001
From: Kirill Esipov <yesipov@gmail.com>
Date: Wed, 28 Jun 2017 14:29:09 +0300
Subject: [PATCH 0799/1958] rtc: ds3232: add temperature support

DS3232/DS3234 has the temperature registers with a resolution of 0.25
degree celsius. This enables to get the value through hwmon.

	# cat /sys/class/hwmon/hwmon0/temp1_input
	37250

Signed-off-by: Kirill Esipov <yesipov@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/Kconfig      |   8 +++
 drivers/rtc/rtc-ds3232.c | 119 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 0e5044cd84dda..d0e4b4a1c2a1e 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -802,6 +802,14 @@ config RTC_DRV_DS3232
 	  This driver can also be built as a module.  If so, the module
 	  will be called rtc-ds3232.
 
+config RTC_DRV_DS3232_HWMON
+	bool "HWMON support for Dallas/Maxim DS3232/DS3234"
+	depends on RTC_DRV_DS3232 && HWMON && !(RTC_DRV_DS3232=y && HWMON=m)
+	default y
+	help
+	  Say Y here if you want to expose temperature sensor data on
+	  rtc-ds3232
+
 config RTC_DRV_PCF2127
 	tristate "NXP PCF2127"
 	depends on RTC_I2C_AND_SPI
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index deff431a37c44..0550f7ba464f4 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -22,6 +22,7 @@
 #include <linux/bcd.h>
 #include <linux/slab.h>
 #include <linux/regmap.h>
+#include <linux/hwmon.h>
 
 #define DS3232_REG_SECONDS      0x00
 #define DS3232_REG_MINUTES      0x01
@@ -46,6 +47,8 @@
 #       define DS3232_REG_SR_A2F     0x02
 #       define DS3232_REG_SR_A1F     0x01
 
+#define DS3232_REG_TEMPERATURE	0x11
+
 struct ds3232 {
 	struct device *dev;
 	struct regmap *regmap;
@@ -275,6 +278,120 @@ static int ds3232_update_alarm(struct device *dev, unsigned int enabled)
 	return ret;
 }
 
+/*
+ * Temperature sensor support for ds3232/ds3234 devices.
+ * A user-initiated temperature conversion is not started by this function,
+ * so the temperature is updated once every 64 seconds.
+ */
+static int ds3232_hwmon_read_temp(struct device *dev, long int *mC)
+{
+	struct ds3232 *ds3232 = dev_get_drvdata(dev);
+	u8 temp_buf[2];
+	s16 temp;
+	int ret;
+
+	ret = regmap_bulk_read(ds3232->regmap, DS3232_REG_TEMPERATURE, temp_buf,
+			       sizeof(temp_buf));
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Temperature is represented as a 10-bit code with a resolution of
+	 * 0.25 degree celsius and encoded in two's complement format.
+	 */
+	temp = (temp_buf[0] << 8) | temp_buf[1];
+	temp >>= 6;
+	*mC = temp * 250;
+
+	return 0;
+}
+
+static umode_t ds3232_hwmon_is_visible(const void *data,
+				       enum hwmon_sensor_types type,
+				       u32 attr, int channel)
+{
+	if (type != hwmon_temp)
+		return 0;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		return 0444;
+	default:
+		return 0;
+	}
+}
+
+static int ds3232_hwmon_read(struct device *dev,
+			     enum hwmon_sensor_types type,
+			     u32 attr, int channel, long *temp)
+{
+	int err;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		err = ds3232_hwmon_read_temp(dev, temp);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static u32 ds3232_hwmon_chip_config[] = {
+	HWMON_C_REGISTER_TZ,
+	0
+};
+
+static const struct hwmon_channel_info ds3232_hwmon_chip = {
+	.type = hwmon_chip,
+	.config = ds3232_hwmon_chip_config,
+};
+
+static u32 ds3232_hwmon_temp_config[] = {
+	HWMON_T_INPUT,
+	0
+};
+
+static const struct hwmon_channel_info ds3232_hwmon_temp = {
+	.type = hwmon_temp,
+	.config = ds3232_hwmon_temp_config,
+};
+
+static const struct hwmon_channel_info *ds3232_hwmon_info[] = {
+	&ds3232_hwmon_chip,
+	&ds3232_hwmon_temp,
+	NULL
+};
+
+static const struct hwmon_ops ds3232_hwmon_hwmon_ops = {
+	.is_visible = ds3232_hwmon_is_visible,
+	.read = ds3232_hwmon_read,
+};
+
+static const struct hwmon_chip_info ds3232_hwmon_chip_info = {
+	.ops = &ds3232_hwmon_hwmon_ops,
+	.info = ds3232_hwmon_info,
+};
+
+static void ds3232_hwmon_register(struct device *dev, const char *name)
+{
+	struct ds3232 *ds3232 = dev_get_drvdata(dev);
+	struct device *hwmon_dev;
+
+	if (!IS_ENABLED(CONFIG_RTC_DRV_DS3232_HWMON))
+		return;
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, name, ds3232,
+							&ds3232_hwmon_chip_info,
+							NULL);
+	if (IS_ERR(hwmon_dev)) {
+		dev_err(dev, "unable to register hwmon device %ld\n",
+			PTR_ERR(hwmon_dev));
+	}
+}
+
 static int ds3232_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct ds3232 *ds3232 = dev_get_drvdata(dev);
@@ -366,6 +483,8 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq,
 	if (ds3232->irq > 0)
 		device_init_wakeup(dev, 1);
 
+	ds3232_hwmon_register(dev, name);
+
 	ds3232->rtc = devm_rtc_device_register(dev, name, &ds3232_rtc_ops,
 						THIS_MODULE);
 	if (IS_ERR(ds3232->rtc))
-- 
GitLab


From 300a7735becf55f7fd18f8cd3dc3b945a0cab712 Mon Sep 17 00:00:00 2001
From: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Date: Thu, 8 Jun 2017 12:36:54 +0200
Subject: [PATCH 0800/1958] rtc: ds1307: add ds1308 variant

The ds1308 variant is very similar to the already supported ds1338
variant, it have more debug registers and a square wave clock output.

Signed-off-by: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 922675281b7ae..57ea25400ecd4 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -34,6 +34,7 @@
  */
 enum ds_type {
 	ds_1307,
+	ds_1308,
 	ds_1337,
 	ds_1338,
 	ds_1339,
@@ -152,6 +153,10 @@ static struct chip_desc chips[last_ds_type] = {
 		.nvram_offset	= 8,
 		.nvram_size	= 56,
 	},
+	[ds_1308] = {
+		.nvram_offset	= 8,
+		.nvram_size	= 56,
+	},
 	[ds_1337] = {
 		.alarm		= 1,
 		.century_reg	= DS1307_REG_MONTH,
@@ -198,6 +203,7 @@ static struct chip_desc chips[last_ds_type] = {
 
 static const struct i2c_device_id ds1307_id[] = {
 	{ "ds1307", ds_1307 },
+	{ "ds1308", ds_1308 },
 	{ "ds1337", ds_1337 },
 	{ "ds1338", ds_1338 },
 	{ "ds1339", ds_1339 },
@@ -222,6 +228,10 @@ static const struct of_device_id ds1307_of_match[] = {
 		.compatible = "dallas,ds1307",
 		.data = (void *)ds_1307
 	},
+	{
+		.compatible = "dallas,ds1308",
+		.data = (void *)ds_1308
+	},
 	{
 		.compatible = "dallas,ds1337",
 		.data = (void *)ds_1337
@@ -282,6 +292,7 @@ MODULE_DEVICE_TABLE(of, ds1307_of_match);
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id ds1307_acpi_ids[] = {
 	{ .id = "DS1307", .driver_data = ds_1307 },
+	{ .id = "DS1308", .driver_data = ds_1308 },
 	{ .id = "DS1337", .driver_data = ds_1337 },
 	{ .id = "DS1338", .driver_data = ds_1338 },
 	{ .id = "DS1339", .driver_data = ds_1339 },
@@ -1558,6 +1569,7 @@ static int ds1307_probe(struct i2c_client *client,
 			goto read_rtc;
 		}
 		break;
+	case ds_1308:
 	case ds_1338:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1307_BIT_CH)
-- 
GitLab


From d2be279bcd8055ddfd92cc5f5d305eb3651e059b Mon Sep 17 00:00:00 2001
From: Amelie Delaunay <amelie.delaunay@st.com>
Date: Thu, 6 Jul 2017 10:47:44 +0200
Subject: [PATCH 0801/1958] dt-bindings: rtc: stm32: add support for STM32H7

This patch documents support for STM32H7 Real Time Clock.
It introduces a new compatible and rework clock definitions.
On STM32H7 we have a 'pclk' clock for register access, in addition to
the 'rtc_ck' clock.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Amelie Delaunay <amelie.delaunay@st.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 .../devicetree/bindings/rtc/st,stm32-rtc.txt  | 32 ++++++++++++++++---
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt
index e2837b951237b..0a4c371a9b7a8 100644
--- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt
+++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt
@@ -1,17 +1,25 @@
 STM32 Real Time Clock
 
 Required properties:
-- compatible: "st,stm32-rtc".
+- compatible: can be either "st,stm32-rtc" or "st,stm32h7-rtc", depending on
+  the device is compatible with stm32(f4/f7) or stm32h7.
 - reg: address range of rtc register set.
-- clocks: reference to the clock entry ck_rtc.
+- clocks: can use up to two clocks, depending on part used:
+  - "rtc_ck": RTC clock source.
+    It is required on stm32(f4/f7) and stm32h7.
+  - "pclk": RTC APB interface clock.
+    It is not present on stm32(f4/f7).
+    It is required on stm32h7.
+- clock-names: must be "rtc_ck" and "pclk".
+    It is required only on stm32h7.
 - interrupt-parent: phandle for the interrupt controller.
 - interrupts: rtc alarm interrupt.
 - st,syscfg: phandle for pwrcfg, mandatory to disable/enable backup domain
   (RTC registers) write protection.
 
-Optional properties (to override default ck_rtc parent clock):
-- assigned-clocks: reference to the ck_rtc clock entry.
-- assigned-clock-parents: phandle of the new parent clock of ck_rtc.
+Optional properties (to override default rtc_ck parent clock):
+- assigned-clocks: reference to the rtc_ck clock entry.
+- assigned-clock-parents: phandle of the new parent clock of rtc_ck.
 
 Example:
 
@@ -25,3 +33,17 @@ Example:
 		interrupts = <17 1>;
 		st,syscfg = <&pwrcfg>;
 	};
+
+	rtc: rtc@58004000 {
+		compatible = "st,stm32h7-rtc";
+		reg = <0x58004000 0x400>;
+		clocks = <&rcc RTCAPB_CK>, <&rcc RTC_CK>;
+		clock-names = "pclk", "rtc_ck";
+		assigned-clocks = <&rcc RTC_CK>;
+		assigned-clock-parents = <&rcc LSE_CK>;
+		interrupt-parent = <&exti>;
+		interrupts = <17 1>;
+		interrupt-names = "alarm";
+		st,syscfg = <&pwrcfg>;
+		status = "disabled";
+	};
-- 
GitLab


From 9a6757eadc14f01385fd41fe3906dc22dcdb919e Mon Sep 17 00:00:00 2001
From: Amelie Delaunay <amelie.delaunay@st.com>
Date: Thu, 6 Jul 2017 10:47:45 +0200
Subject: [PATCH 0802/1958] rtc: stm32: add STM32H7 RTC support

This patch adds support for STM32H7 RTC. On STM32H7, the RTC bus interface
clock (APB clock) needs to be enabled.

Signed-off-by: Amelie Delaunay <amelie.delaunay@st.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-stm32.c | 82 ++++++++++++++++++++++++++++++++---------
 1 file changed, 65 insertions(+), 17 deletions(-)

diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c
index bd57eb1029e11..3a5c3d7d0c77a 100644
--- a/drivers/rtc/rtc-stm32.c
+++ b/drivers/rtc/rtc-stm32.c
@@ -94,11 +94,17 @@
 /* STM32_PWR_CR bit field */
 #define PWR_CR_DBP			BIT(8)
 
+struct stm32_rtc_data {
+	bool has_pclk;
+};
+
 struct stm32_rtc {
 	struct rtc_device *rtc_dev;
 	void __iomem *base;
 	struct regmap *dbp;
-	struct clk *ck_rtc;
+	struct stm32_rtc_data *data;
+	struct clk *pclk;
+	struct clk *rtc_ck;
 	int irq_alarm;
 };
 
@@ -122,9 +128,9 @@ static int stm32_rtc_enter_init_mode(struct stm32_rtc *rtc)
 		writel_relaxed(isr, rtc->base + STM32_RTC_ISR);
 
 		/*
-		 * It takes around 2 ck_rtc clock cycles to enter in
+		 * It takes around 2 rtc_ck clock cycles to enter in
 		 * initialization phase mode (and have INITF flag set). As
-		 * slowest ck_rtc frequency may be 32kHz and highest should be
+		 * slowest rtc_ck frequency may be 32kHz and highest should be
 		 * 1MHz, we poll every 10 us with a timeout of 100ms.
 		 */
 		return readl_relaxed_poll_timeout_atomic(
@@ -153,7 +159,7 @@ static int stm32_rtc_wait_sync(struct stm32_rtc *rtc)
 
 	/*
 	 * Wait for RSF to be set to ensure the calendar registers are
-	 * synchronised, it takes around 2 ck_rtc clock cycles
+	 * synchronised, it takes around 2 rtc_ck clock cycles
 	 */
 	return readl_relaxed_poll_timeout_atomic(rtc->base + STM32_RTC_ISR,
 						 isr,
@@ -456,7 +462,7 @@ static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	/*
 	 * Poll Alarm write flag to be sure that Alarm update is allowed: it
-	 * takes around 2 ck_rtc clock cycles
+	 * takes around 2 rtc_ck clock cycles
 	 */
 	ret = readl_relaxed_poll_timeout_atomic(rtc->base + STM32_RTC_ISR,
 						isr,
@@ -490,8 +496,17 @@ static const struct rtc_class_ops stm32_rtc_ops = {
 	.alarm_irq_enable = stm32_rtc_alarm_irq_enable,
 };
 
+static const struct stm32_rtc_data stm32_rtc_data = {
+	.has_pclk = false,
+};
+
+static const struct stm32_rtc_data stm32h7_rtc_data = {
+	.has_pclk = true,
+};
+
 static const struct of_device_id stm32_rtc_of_match[] = {
-	{ .compatible = "st,stm32-rtc" },
+	{ .compatible = "st,stm32-rtc", .data = &stm32_rtc_data },
+	{ .compatible = "st,stm32h7-rtc", .data = &stm32h7_rtc_data },
 	{}
 };
 MODULE_DEVICE_TABLE(of, stm32_rtc_of_match);
@@ -503,7 +518,7 @@ static int stm32_rtc_init(struct platform_device *pdev,
 	unsigned int rate;
 	int ret = 0;
 
-	rate = clk_get_rate(rtc->ck_rtc);
+	rate = clk_get_rate(rtc->rtc_ck);
 
 	/* Find prediv_a and prediv_s to obtain the 1Hz calendar clock */
 	pred_a_max = STM32_RTC_PRER_PRED_A >> STM32_RTC_PRER_PRED_A_SHIFT;
@@ -524,7 +539,7 @@ static int stm32_rtc_init(struct platform_device *pdev,
 		pred_a = pred_a_max;
 		pred_s = (rate / (pred_a + 1)) - 1;
 
-		dev_warn(&pdev->dev, "ck_rtc is %s\n",
+		dev_warn(&pdev->dev, "rtc_ck is %s\n",
 			 (rate < ((pred_a + 1) * (pred_s + 1))) ?
 			 "fast" : "slow");
 	}
@@ -561,6 +576,7 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 {
 	struct stm32_rtc *rtc;
 	struct resource *res;
+	const struct of_device_id *match;
 	int ret;
 
 	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
@@ -579,15 +595,34 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 		return PTR_ERR(rtc->dbp);
 	}
 
-	rtc->ck_rtc = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(rtc->ck_rtc)) {
-		dev_err(&pdev->dev, "no ck_rtc clock");
-		return PTR_ERR(rtc->ck_rtc);
+	match = of_match_device(stm32_rtc_of_match, &pdev->dev);
+	rtc->data = (struct stm32_rtc_data *)match->data;
+
+	if (!rtc->data->has_pclk) {
+		rtc->pclk = NULL;
+		rtc->rtc_ck = devm_clk_get(&pdev->dev, NULL);
+	} else {
+		rtc->pclk = devm_clk_get(&pdev->dev, "pclk");
+		if (IS_ERR(rtc->pclk)) {
+			dev_err(&pdev->dev, "no pclk clock");
+			return PTR_ERR(rtc->pclk);
+		}
+		rtc->rtc_ck = devm_clk_get(&pdev->dev, "rtc_ck");
+	}
+	if (IS_ERR(rtc->rtc_ck)) {
+		dev_err(&pdev->dev, "no rtc_ck clock");
+		return PTR_ERR(rtc->rtc_ck);
+	}
+
+	if (rtc->data->has_pclk) {
+		ret = clk_prepare_enable(rtc->pclk);
+		if (ret)
+			return ret;
 	}
 
-	ret = clk_prepare_enable(rtc->ck_rtc);
+	ret = clk_prepare_enable(rtc->rtc_ck);
 	if (ret)
-		return ret;
+		goto err;
 
 	regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, PWR_CR_DBP);
 
@@ -595,7 +630,7 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 	 * After a system reset, RTC_ISR.INITS flag can be read to check if
 	 * the calendar has been initalized or not. INITS flag is reset by a
 	 * power-on reset (no vbat, no power-supply). It is not reset if
-	 * ck_rtc parent clock has changed (so RTC prescalers need to be
+	 * rtc_ck parent clock has changed (so RTC prescalers need to be
 	 * changed). That's why we cannot rely on this flag to know if RTC
 	 * init has to be done.
 	 */
@@ -646,7 +681,9 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 
 	return 0;
 err:
-	clk_disable_unprepare(rtc->ck_rtc);
+	if (rtc->data->has_pclk)
+		clk_disable_unprepare(rtc->pclk);
+	clk_disable_unprepare(rtc->rtc_ck);
 
 	regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, 0);
 
@@ -667,7 +704,9 @@ static int stm32_rtc_remove(struct platform_device *pdev)
 	writel_relaxed(cr, rtc->base + STM32_RTC_CR);
 	stm32_rtc_wpr_lock(rtc);
 
-	clk_disable_unprepare(rtc->ck_rtc);
+	clk_disable_unprepare(rtc->rtc_ck);
+	if (rtc->data->has_pclk)
+		clk_disable_unprepare(rtc->pclk);
 
 	/* Enable backup domain write protection */
 	regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, 0);
@@ -682,6 +721,9 @@ static int stm32_rtc_suspend(struct device *dev)
 {
 	struct stm32_rtc *rtc = dev_get_drvdata(dev);
 
+	if (rtc->data->has_pclk)
+		clk_disable_unprepare(rtc->pclk);
+
 	if (device_may_wakeup(dev))
 		return enable_irq_wake(rtc->irq_alarm);
 
@@ -693,6 +735,12 @@ static int stm32_rtc_resume(struct device *dev)
 	struct stm32_rtc *rtc = dev_get_drvdata(dev);
 	int ret = 0;
 
+	if (rtc->data->has_pclk) {
+		ret = clk_prepare_enable(rtc->pclk);
+		if (ret)
+			return ret;
+	}
+
 	ret = stm32_rtc_wait_sync(rtc);
 	if (ret < 0)
 		return ret;
-- 
GitLab


From f59627810e18d4435051d982b5d05cab18c6e653 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 22 Jun 2017 20:03:51 +0100
Subject: [PATCH 0803/1958] Btrfs: incremental send, fix invalid path for link
 commands

In some scenarios an incremental send stream can contain link commands
with an invalid target path. Such scenarios happen after moving some
directory inode A, renaming a regular file inode B into the old name of
inode A and finally creating a new hard link for inode B at directory
inode A.

Consider the following example scenario where this issue happens.

Parent snapshot:

  .                                                      (ino 256)
  |
  |--- dir1/                                             (ino 257)
  |      |--- dir2/                                      (ino 258)
  |             |--- dir3/                               (ino 259)
  |                   |--- file1                         (ino 261)
  |                   |--- dir4/                         (ino 262)
  |
  |--- dir5/                                             (ino 260)

Send snapshot:

  .                                                      (ino 256)
  |
  |--- dir1/                                             (ino 257)
         |--- dir2/                                      (ino 258)
         |      |--- dir3/                               (ino 259)
         |            |--- dir4                          (ino 261)
         |
         |--- dir6/                                      (ino 263)
                |--- dir44/                              (ino 262)
                       |--- file11                       (ino 261)
                       |--- dir55/                       (ino 260)

When attempting to apply the corresponding incremental send stream, a
link command contains an invalid target path which makes the receiver
fail. The following is the verbose output of the btrfs receive command:

  receiving snapshot mysnap2 uuid=90076fe6-5ba6-e64a-9321-9279670ed16b (...)
  utimes
  utimes dir1
  utimes dir1/dir2/dir3
  utimes
  rename dir1/dir2/dir3/dir4 -> o262-7-0
  link dir1/dir2/dir3/dir4 -> dir1/dir2/dir3/file1
  link dir1/dir2/dir3/dir4/file11 -> dir1/dir2/dir3/file1
  ERROR: link dir1/dir2/dir3/dir4/file11 -> dir1/dir2/dir3/file1 failed: Not a directory

The following steps happen during the computation of the incremental send
stream the lead to this issue:

1) When processing inode 261, we orphanize inode 262 due to a name/location
   collision with one of the new hard links for inode 261 (created in the
   second step below).

2) We create one of the 2 new hard links for inode 261, the one whose
   location is at "dir1/dir2/dir3/dir4".

3) We then attempt to create the other new hard link for inode 261, which
   has inode 262 as its parent directory. Because the path for this new
   hard link was computed before we started processing the new references
   (hard links), it reflects the old name/location of inode 262, that is,
   it does not account for the orphanization step that happened when
   we started processing the new references for inode 261, whence it is
   no longer valid, causing the receiver to fail.

So fix this issue by recomputing the full path of new references if we
ended up orphanizing other inodes which are directories.

A test case for fstests follows soon.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 81 +++++++++++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e937c10b82875..7eaccfb72b479 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1856,7 +1856,7 @@ static int is_first_ref(struct btrfs_root *root,
  */
 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 			      const char *name, int name_len,
-			      u64 *who_ino, u64 *who_gen)
+			      u64 *who_ino, u64 *who_gen, u64 *who_mode)
 {
 	int ret = 0;
 	u64 gen;
@@ -1905,7 +1905,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 	if (other_inode > sctx->send_progress ||
 	    is_waiting_for_move(sctx, other_inode)) {
 		ret = get_inode_info(sctx->parent_root, other_inode, NULL,
-				who_gen, NULL, NULL, NULL, NULL);
+				who_gen, who_mode, NULL, NULL, NULL);
 		if (ret < 0)
 			goto out;
 
@@ -3683,6 +3683,36 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 	return ret;
 }
 
+static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
+{
+	int ret;
+	struct fs_path *new_path;
+
+	/*
+	 * Our reference's name member points to its full_path member string, so
+	 * we use here a new path.
+	 */
+	new_path = fs_path_alloc();
+	if (!new_path)
+		return -ENOMEM;
+
+	ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
+	if (ret < 0) {
+		fs_path_free(new_path);
+		return ret;
+	}
+	ret = fs_path_add(new_path, ref->name, ref->name_len);
+	if (ret < 0) {
+		fs_path_free(new_path);
+		return ret;
+	}
+
+	fs_path_free(ref->full_path);
+	set_ref_path(ref, new_path);
+
+	return 0;
+}
+
 /*
  * This does all the move/link/unlink/rmdir magic.
  */
@@ -3696,10 +3726,12 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 	struct fs_path *valid_path = NULL;
 	u64 ow_inode = 0;
 	u64 ow_gen;
+	u64 ow_mode;
 	int did_overwrite = 0;
 	int is_orphan = 0;
 	u64 last_dir_ino_rm = 0;
 	bool can_rename = true;
+	bool orphanized_dir = false;
 	bool orphanized_ancestor = false;
 
 	btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
@@ -3798,7 +3830,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 		 */
 		ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
 				cur->name, cur->name_len,
-				&ow_inode, &ow_gen);
+				&ow_inode, &ow_gen, &ow_mode);
 		if (ret < 0)
 			goto out;
 		if (ret) {
@@ -3815,6 +3847,8 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 						cur->full_path);
 				if (ret < 0)
 					goto out;
+				if (S_ISDIR(ow_mode))
+					orphanized_dir = true;
 
 				/*
 				 * If ow_inode has its rename operation delayed
@@ -3920,6 +3954,18 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 				if (ret < 0)
 					goto out;
 			} else {
+				/*
+				 * We might have previously orphanized an inode
+				 * which is an ancestor of our current inode,
+				 * so our reference's full path, which was
+				 * computed before any such orphanizations, must
+				 * be updated.
+				 */
+				if (orphanized_dir) {
+					ret = update_ref_path(sctx, cur);
+					if (ret < 0)
+						goto out;
+				}
 				ret = send_link(sctx, cur->full_path,
 						valid_path);
 				if (ret < 0)
@@ -3990,34 +4036,9 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 				 * ancestor inode.
 				 */
 				if (orphanized_ancestor) {
-					struct fs_path *new_path;
-
-					/*
-					 * Our reference's name member points to
-					 * its full_path member string, so we
-					 * use here a new path.
-					 */
-					new_path = fs_path_alloc();
-					if (!new_path) {
-						ret = -ENOMEM;
-						goto out;
-					}
-					ret = get_cur_path(sctx, cur->dir,
-							   cur->dir_gen,
-							   new_path);
-					if (ret < 0) {
-						fs_path_free(new_path);
-						goto out;
-					}
-					ret = fs_path_add(new_path,
-							  cur->name,
-							  cur->name_len);
-					if (ret < 0) {
-						fs_path_free(new_path);
+					ret = update_ref_path(sctx, cur);
+					if (ret < 0)
 						goto out;
-					}
-					fs_path_free(cur->full_path);
-					set_ref_path(cur, new_path);
 				}
 				ret = send_unlink(sctx, cur->full_path);
 				if (ret < 0)
-- 
GitLab


From 24e52b11e0ca788513b945a87b57cc0522a92933 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 6 Jul 2017 15:31:46 +0100
Subject: [PATCH 0804/1958] Btrfs: incremental send, fix invalid memory access

When doing an incremental send, while processing an extent that changed
between the parent and send snapshots and that extent was an inline extent
in the parent snapshot, it's possible to access a memory region beyond
the end of leaf if the inline extent is very small and it is the first
item in a leaf.

An example scenario is described below.

The send snapshot has the following leaf:

 leaf 33865728 items 33 free space 773 generation 46 owner 5
 fs uuid ab7090d8-dafd-4fb9-9246-723b6d2e2fb7
 chunk uuid 2d16478c-c704-4ab9-b574-68bff2281b1f
        (...)
        item 14 key (335 EXTENT_DATA 0) itemoff 3052 itemsize 53
                generation 36 type 1 (regular)
                extent data disk byte 12791808 nr 4096
                extent data offset 0 nr 4096 ram 4096
                extent compression 0 (none)
        item 15 key (335 EXTENT_DATA 8192) itemoff 2999 itemsize 53
                generation 36 type 1 (regular)
                extent data disk byte 138170368 nr 225280
                extent data offset 0 nr 225280 ram 225280
                extent compression 0 (none)
        (...)

And the parent snapshot has the following leaf:

 leaf 31272960 items 17 free space 17 generation 31 owner 5
 fs uuid ab7090d8-dafd-4fb9-9246-723b6d2e2fb7
 chunk uuid 2d16478c-c704-4ab9-b574-68bff2281b1f
        item 0 key (335 EXTENT_DATA 0) itemoff 3951 itemsize 44
                generation 31 type 0 (inline)
                inline extent data size 23 ram_bytes 613 compression 1 (zlib)
        (...)

When computing the send stream, it is detected that the extent of inode
335, at file offset 0, and at fs/btrfs/send.c:is_extent_unchanged() we
grab the leaf from the parent snapshot and access the inline extent item.
However, before jumping to the 'out' label, we access the 'offset' and
'disk_bytenr' fields of the extent item, which should not be done for
inline extents since the inlined data starts at the offset of the
'disk_bytenr' field and can be very small. For example accessing the
'offset' field of the file extent item results in the following trace:

[  599.705368] general protection fault: 0000 [#1] PREEMPT SMP
[  599.706296] Modules linked in: btrfs psmouse i2c_piix4 ppdev acpi_cpufreq serio_raw parport_pc i2c_core evdev tpm_tis tpm_tis_core sg pcspkr parport tpm button su$
[  599.709340] CPU: 7 PID: 5283 Comm: btrfs Not tainted 4.10.0-rc8-btrfs-next-46+ #1
[  599.709340] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
[  599.709340] task: ffff88023eedd040 task.stack: ffffc90006658000
[  599.709340] RIP: 0010:read_extent_buffer+0xdb/0xf4 [btrfs]
[  599.709340] RSP: 0018:ffffc9000665ba00 EFLAGS: 00010286
[  599.709340] RAX: db73880000000000 RBX: 0000000000000000 RCX: 0000000000000001
[  599.709340] RDX: ffffc9000665ba60 RSI: db73880000000000 RDI: ffffc9000665ba5f
[  599.709340] RBP: ffffc9000665ba30 R08: 0000000000000001 R09: ffff88020dc5e098
[  599.709340] R10: 0000000000001000 R11: 0000160000000000 R12: 6db6db6db6db6db7
[  599.709340] R13: ffff880000000000 R14: 0000000000000000 R15: ffff88020dc5e088
[  599.709340] FS:  00007f519555a8c0(0000) GS:ffff88023f3c0000(0000) knlGS:0000000000000000
[  599.709340] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  599.709340] CR2: 00007f1411afd000 CR3: 0000000235f8e000 CR4: 00000000000006e0
[  599.709340] Call Trace:
[  599.709340]  btrfs_get_token_64+0x93/0xce [btrfs]
[  599.709340]  ? printk+0x48/0x50
[  599.709340]  btrfs_get_64+0xb/0xd [btrfs]
[  599.709340]  process_extent+0x3a1/0x1106 [btrfs]
[  599.709340]  ? btree_read_extent_buffer_pages+0x5/0xef [btrfs]
[  599.709340]  changed_cb+0xb03/0xb3d [btrfs]
[  599.709340]  ? btrfs_get_token_32+0x7a/0xcc [btrfs]
[  599.709340]  btrfs_compare_trees+0x432/0x53d [btrfs]
[  599.709340]  ? process_extent+0x1106/0x1106 [btrfs]
[  599.709340]  btrfs_ioctl_send+0x960/0xe26 [btrfs]
[  599.709340]  btrfs_ioctl+0x181b/0x1fed [btrfs]
[  599.709340]  ? trace_hardirqs_on_caller+0x150/0x1ac
[  599.709340]  vfs_ioctl+0x21/0x38
[  599.709340]  ? vfs_ioctl+0x21/0x38
[  599.709340]  do_vfs_ioctl+0x611/0x645
[  599.709340]  ? rcu_read_unlock+0x5b/0x5d
[  599.709340]  ? __fget+0x6d/0x79
[  599.709340]  SyS_ioctl+0x57/0x7b
[  599.709340]  entry_SYSCALL_64_fastpath+0x18/0xad
[  599.709340] RIP: 0033:0x7f51945eec47
[  599.709340] RSP: 002b:00007ffc21c13e98 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
[  599.709340] RAX: ffffffffffffffda RBX: ffffffff81096459 RCX: 00007f51945eec47
[  599.709340] RDX: 00007ffc21c13f20 RSI: 0000000040489426 RDI: 0000000000000004
[  599.709340] RBP: ffffc9000665bf98 R08: 00007f519450d700 R09: 00007f519450d700
[  599.709340] R10: 00007f519450d9d0 R11: 0000000000000202 R12: 0000000000000046
[  599.709340] R13: ffffc9000665bf78 R14: 0000000000000000 R15: 00007f5195574040
[  599.709340]  ? trace_hardirqs_off_caller+0x43/0xb1
[  599.709340] Code: 29 f0 49 39 d8 4c 0f 47 c3 49 03 81 58 01 00 00 44 89 c1 4c 01 c2 4c 29 c3 48 c1 f8 03 49 0f af c4 48 c1 e0 0c 4c 01 e8 48 01 c6 <f3> a4 31 f6 4$
[  599.709340] RIP: read_extent_buffer+0xdb/0xf4 [btrfs] RSP: ffffc9000665ba00
[  599.762057] ---[ end trace fe00d7af61b9f49e ]---

This is because the 'offset' field starts at an offset of 37 bytes
(offsetof(struct btrfs_file_extent_item, offset)), has a length of 8
bytes and therefore attemping to read it causes a 1 byte access beyond
the end of the leaf, as the first item's content in a leaf is located
at the tail of the leaf, the item size is 44 bytes and the offset of
that field plus its length (37 + 8 = 45) goes beyond the item's size
by 1 byte.

So fix this by accessing the 'offset' and 'disk_bytenr' fields after
jumping to the 'out' label if we are processing an inline extent. We
move the reading operation of the 'disk_bytenr' field too because we
have the same problem as for the 'offset' field explained above when
the inline data is less then 8 bytes. The access to the 'generation'
field is also moved but just for the sake of grouping access to all
the fields.

Fixes: e1cbfd7bf6da ("Btrfs: send, fix file hole not being preserved due to inline extent")
Cc: <stable@vger.kernel.org>  # v4.12+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 7eaccfb72b479..b082210df9c8b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5270,15 +5270,12 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 			goto out;
 		}
 
-		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
 		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
 			right_len = btrfs_file_extent_inline_len(eb, slot, ei);
 			right_len = PAGE_ALIGN(right_len);
 		} else {
 			right_len = btrfs_file_extent_num_bytes(eb, ei);
 		}
-		right_offset = btrfs_file_extent_offset(eb, ei);
-		right_gen = btrfs_file_extent_generation(eb, ei);
 
 		/*
 		 * Are we at extent 8? If yes, we know the extent is changed.
@@ -5303,6 +5300,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 			goto out;
 		}
 
+		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
+		right_offset = btrfs_file_extent_offset(eb, ei);
+		right_gen = btrfs_file_extent_generation(eb, ei);
+
 		left_offset_fixed = left_offset;
 		if (key.offset < ekey->offset) {
 			/* Fix the right offset for 2a and 7. */
-- 
GitLab


From 4edfc5406ac9d3b59f770dd7436fdcfd5d593216 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 1 Jul 2017 12:15:22 +0200
Subject: [PATCH 0805/1958] platform/x86: silead_dmi: Add touchscreen info for
 I.T.Works TW891 2-in-1

Add touchscreen info for I.T.Works TW891 2-in-1.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/silead_dmi.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/platform/x86/silead_dmi.c b/drivers/platform/x86/silead_dmi.c
index 3cd3bdfe51df3..f9e840aa5592d 100644
--- a/drivers/platform/x86/silead_dmi.c
+++ b/drivers/platform/x86/silead_dmi.c
@@ -122,6 +122,20 @@ static const struct silead_ts_dmi_data pov_mobii_wintab_p800w_data = {
 	.properties	= pov_mobii_wintab_p800w_props,
 };
 
+static const struct property_entry itworks_tw891_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1600),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 890),
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-itworks-tw891.fw"),
+	{ }
+};
+
+static const struct silead_ts_dmi_data itworks_tw891_data = {
+	.acpi_name	= "MSSL1680:00",
+	.properties	= itworks_tw891_props,
+};
+
 static const struct dmi_system_id silead_ts_dmi_table[] = {
 	{
 		/* CUBE iwork8 Air */
@@ -187,6 +201,14 @@ static const struct dmi_system_id silead_ts_dmi_table[] = {
 			DMI_MATCH(DMI_BIOS_DATE, "08/22/2014"),
 		},
 	},
+	{
+		/* I.T.Works TW891 */
+		.driver_data = (void *)&itworks_tw891_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TW891"),
+		},
+	},
 	{ },
 };
 
-- 
GitLab


From 12f66e4a0f7b5624901ba4301210e026c9ddf78d Mon Sep 17 00:00:00 2001
From: Jiang Yi <jiangyilism@gmail.com>
Date: Fri, 2 Jun 2017 11:45:09 +0800
Subject: [PATCH 0806/1958] target: reject COMPARE_AND_WRITE if emulate_caw is
 not set

In struct se_dev_attrib, there is a field emulate_caw exposed
as a /sys/kernel/config/target/core/$HBA/$DEV/attrib/.

If this field is set zero, it means the corresponding struct se_device
does not support the scsi cmd COMPARE_AND_WRITE

In function sbc_parse_cdb(), go ahead and reject scsi COMPARE_AND_WRITE
if emulate_caw is not set, because it has been explicitly disabled
from user-space.

(Make pr_err ratelimited - nab)

Signed-off-by: Jiang Yi <jiangyilism@gmail.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 4316f7b65fb76..ff4a6ec30adf5 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1005,6 +1005,12 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		break;
 	}
 	case COMPARE_AND_WRITE:
+		if (!dev->dev_attrib.emulate_caw) {
+			pr_err_ratelimited("se_device %s/%s (vpd_unit_serial %s) reject"
+				" COMPARE_AND_WRITE\n", dev->se_hba->backend->ops->name,
+				dev->dev_group.cg_item.ci_name, dev->t10_wwn.unit_serial);
+			return TCM_UNSUPPORTED_SCSI_OPCODE;
+		}
 		sectors = cdb[13];
 		/*
 		 * Currently enforce COMPARE_AND_WRITE for a single sector
-- 
GitLab


From eeb64d239ea664592ff8f1bce5546209a6593df5 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 3 Jun 2017 06:41:03 -0700
Subject: [PATCH 0807/1958] target: Add support for TMR percpu reference
 counting

This patch introduces TMR percpu reference counting using
se_lun->lun_ref in transport_lookup_tmr_lun(), following
how existing non TMR per se_lun reference counting works
within transport_lookup_cmd_lun().

It also adds explicit transport_lun_remove_cmd() calls to
drop the reference in the three tmr related locations that
invoke transport_cmd_check_stop_to_fabric();

   - target_tmr_work() during normal ->queue_tm_rsp()
   - target_complete_tmr_failure() during error ->queue_tm_rsp()
   - transport_generic_handle_tmr() during early failure

Also, note the exception paths in transport_generic_free_cmd()
and transport_cmd_finish_abort() already check SCF_SE_LUN_CMD,
and will invoke transport_lun_remove_cmd() when necessary.

Reviewed-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Quinn Tran <quinn.tran@cavium.com>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c    | 14 ++++++++++----
 drivers/target/target_core_transport.c |  3 +++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index e4771cec108cc..bf7a57953fa4a 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -168,11 +168,20 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 	rcu_read_lock();
 	deve = target_nacl_find_deve(nacl, unpacked_lun);
 	if (deve) {
-		se_cmd->se_lun = rcu_dereference(deve->se_lun);
 		se_lun = rcu_dereference(deve->se_lun);
+
+		if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
+			se_lun = NULL;
+			goto out_unlock;
+		}
+
+		se_cmd->se_lun = rcu_dereference(deve->se_lun);
 		se_cmd->pr_res_key = deve->pr_res_key;
 		se_cmd->orig_fe_lun = unpacked_lun;
+		se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
+		se_cmd->lun_ref_active = true;
 	}
+out_unlock:
 	rcu_read_unlock();
 
 	if (!se_lun) {
@@ -182,9 +191,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 			unpacked_lun);
 		return -ENODEV;
 	}
-	/*
-	 * XXX: Add percpu se_lun->lun_ref reference count for TMR
-	 */
 	se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev);
 	se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev);
 
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index f1b3a46bdcaff..97a01f48068bc 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1590,6 +1590,7 @@ static void target_complete_tmr_failure(struct work_struct *work)
 	se_cmd->se_tmr_req->response = TMR_LUN_DOES_NOT_EXIST;
 	se_cmd->se_tfo->queue_tm_rsp(se_cmd);
 
+	transport_lun_remove_cmd(se_cmd);
 	transport_cmd_check_stop_to_fabric(se_cmd);
 }
 
@@ -3201,6 +3202,7 @@ static void target_tmr_work(struct work_struct *work)
 	cmd->se_tfo->queue_tm_rsp(cmd);
 
 check_stop:
+	transport_lun_remove_cmd(cmd);
 	transport_cmd_check_stop_to_fabric(cmd);
 }
 
@@ -3223,6 +3225,7 @@ int transport_generic_handle_tmr(
 		pr_warn_ratelimited("handle_tmr caught CMD_T_ABORTED TMR %d"
 			"ref_tag: %llu tag: %llu\n", cmd->se_tmr_req->function,
 			cmd->se_tmr_req->ref_task_tag, cmd->tag);
+		transport_lun_remove_cmd(cmd);
 		transport_cmd_check_stop_to_fabric(cmd);
 		return 0;
 	}
-- 
GitLab


From 5465e7d3b99bbaa823ae4f8e538543e7d6cdc530 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 3 Jun 2017 06:55:50 -0700
Subject: [PATCH 0808/1958] target: Add TARGET_SCF_LOOKUP_LUN_FROM_TAG support
 for ABORT_TASK

This patch introduces support in target_submit_tmr() for locating a
unpacked_lun from an existing se_cmd->tag during ABORT_TASK.

When TARGET_SCF_LOOKUP_LUN_FROM_TAG is set, target_submit_tmr()
will do the extra lookup via target_lookup_lun_from_tag() and
subsequently invoke transport_lookup_tmr_lun() so a proper
percpu se_lun->lun_ref is taken before workqueue dispatch into
se_device->tmr_wq happens.

Aside from the extra target_lookup_lun_from_tag(), the existing
code-path remains unchanged.

Reviewed-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Quinn Tran <quinn.tran@cavium.com>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 53 +++++++++++++++++++++-----
 include/target/target_core_base.h      |  3 +-
 2 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 97a01f48068bc..e045803921f87 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1594,6 +1594,29 @@ static void target_complete_tmr_failure(struct work_struct *work)
 	transport_cmd_check_stop_to_fabric(se_cmd);
 }
 
+static bool target_lookup_lun_from_tag(struct se_session *se_sess, u64 tag,
+				       u64 *unpacked_lun)
+{
+	struct se_cmd *se_cmd;
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
+	list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) {
+		if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
+			continue;
+
+		if (se_cmd->tag == tag) {
+			*unpacked_lun = se_cmd->orig_fe_lun;
+			ret = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
+	return ret;
+}
+
 /**
  * target_submit_tmr - lookup unpacked lun and submit uninitialized se_cmd
  *                     for TMR CDBs
@@ -1641,19 +1664,31 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
 		core_tmr_release_req(se_cmd->se_tmr_req);
 		return ret;
 	}
+	/*
+	 * If this is ABORT_TASK with no explicit fabric provided LUN,
+	 * go ahead and search active session tags for a match to figure
+	 * out unpacked_lun for the original se_cmd.
+	 */
+	if (tm_type == TMR_ABORT_TASK && (flags & TARGET_SCF_LOOKUP_LUN_FROM_TAG)) {
+		if (!target_lookup_lun_from_tag(se_sess, tag, &unpacked_lun))
+			goto failure;
+	}
 
 	ret = transport_lookup_tmr_lun(se_cmd, unpacked_lun);
-	if (ret) {
-		/*
-		 * For callback during failure handling, push this work off
-		 * to process context with TMR_LUN_DOES_NOT_EXIST status.
-		 */
-		INIT_WORK(&se_cmd->work, target_complete_tmr_failure);
-		schedule_work(&se_cmd->work);
-		return 0;
-	}
+	if (ret)
+		goto failure;
+
 	transport_generic_handle_tmr(se_cmd);
 	return 0;
+
+	/*
+	 * For callback during failure handling, push this work off
+	 * to process context with TMR_LUN_DOES_NOT_EXIST status.
+	 */
+failure:
+	INIT_WORK(&se_cmd->work, target_complete_tmr_failure);
+	schedule_work(&se_cmd->work);
+	return 0;
 }
 EXPORT_SYMBOL(target_submit_tmr);
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 47d9f381209fc..f835528e424e7 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -188,7 +188,8 @@ enum target_sc_flags_table {
 	TARGET_SCF_BIDI_OP		= 0x01,
 	TARGET_SCF_ACK_KREF		= 0x02,
 	TARGET_SCF_UNKNOWN_SIZE		= 0x04,
-	TARGET_SCF_USE_CPUID	= 0x08,
+	TARGET_SCF_USE_CPUID		= 0x08,
+	TARGET_SCF_LOOKUP_LUN_FROM_TAG	= 0x10,
 };
 
 /* fabric independent task management function values */
-- 
GitLab


From eb5ae2335a84cccf45ec01602bc300c3e70486d0 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 3 Jun 2017 07:07:21 -0700
Subject: [PATCH 0809/1958] qla2xxx: Convert QLA_TGT_ABTS to
 TARGET_SCF_LOOKUP_LUN_FROM_TAG

Following Himanshu's earlier patch to drop the redundant tag
lookup within __qlt_24xx_handle_abts(), go ahead and drop this
now QLA_TGT_ABTS can use TARGET_SCF_LOOKUP_LUN_FROM_TAG and
have target_submit_tmr() do this from common code.

Reviewed-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Quinn Tran <quinn.tran@cavium.com>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c  | 39 +++++++-----------------------
 drivers/scsi/qla2xxx/tcm_qla2xxx.c |  4 ++-
 2 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 0e03ca2ab3e52..401e245477d4c 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1847,38 +1847,13 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	struct abts_recv_from_24xx *abts, struct fc_port *sess)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct se_session *se_sess = sess->se_sess;
 	struct qla_tgt_mgmt_cmd *mcmd;
-	struct se_cmd *se_cmd;
-	u32 lun = 0;
 	int rc;
-	bool found_lun = false;
-	unsigned long flags;
-
-	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
-	list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) {
-		struct qla_tgt_cmd *cmd =
-			container_of(se_cmd, struct qla_tgt_cmd, se_cmd);
-		if (se_cmd->tag == abts->exchange_addr_to_abort) {
-			lun = cmd->unpacked_lun;
-			found_lun = true;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
-	/* cmd not in LIO lists, look in qla list */
-	if (!found_lun) {
-		if (abort_cmd_for_tag(vha, abts->exchange_addr_to_abort)) {
-			/* send TASK_ABORT response immediately */
-			qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_CMPL, false);
-			return 0;
-		} else {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf081,
-			    "unable to find cmd in driver or LIO for tag 0x%x\n",
-			    abts->exchange_addr_to_abort);
-			return -ENOENT;
-		}
+	if (abort_cmd_for_tag(vha, abts->exchange_addr_to_abort)) {
+		/* send TASK_ABORT response immediately */
+		qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_CMPL, false);
+		return 0;
 	}
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00f,
@@ -1899,7 +1874,11 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	mcmd->reset_count = vha->hw->chip_reset;
 	mcmd->tmr_func = QLA_TGT_ABTS;
 
-	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, mcmd->tmr_func,
+	/*
+	 * LUN is looked up by target-core internally based on the passed
+	 * abts->exchange_addr_to_abort tag.
+	 */
+	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, 0, mcmd->tmr_func,
 	    abts->exchange_addr_to_abort);
 	if (rc != 0) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf052,
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 7443e4efa3aed..75aeb9fdfd06e 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -601,11 +601,13 @@ static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, uint32_t lun,
 	struct fc_port *sess = mcmd->sess;
 	struct se_cmd *se_cmd = &mcmd->se_cmd;
 	int transl_tmr_func = 0;
+	int flags = TARGET_SCF_ACK_KREF;
 
 	switch (tmr_func) {
 	case QLA_TGT_ABTS:
 		pr_debug("%ld: ABTS received\n", sess->vha->host_no);
 		transl_tmr_func = TMR_ABORT_TASK;
+		flags |= TARGET_SCF_LOOKUP_LUN_FROM_TAG;
 		break;
 	case QLA_TGT_2G_ABORT_TASK:
 		pr_debug("%ld: 2G Abort Task received\n", sess->vha->host_no);
@@ -638,7 +640,7 @@ static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, uint32_t lun,
 	}
 
 	return target_submit_tmr(se_cmd, sess->se_sess, NULL, lun, mcmd,
-	    transl_tmr_func, GFP_ATOMIC, tag, TARGET_SCF_ACK_KREF);
+	    transl_tmr_func, GFP_ATOMIC, tag, flags);
 }
 
 static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
-- 
GitLab


From 3e182db787714b373d4b1a1fd7dba4a581e8e406 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:22 -0700
Subject: [PATCH 0810/1958] target: Use symbolic value for WRITE_VERIFY_16

Now that a symbolic value has been introduced for WRITE_VERIFY_16,
use it. This patch does not change any functionality.

References: commit c2d26f18dcbc ("target: Add WRITE_VERIFY_16")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index bf7a57953fa4a..6403f44e61041 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1131,7 +1131,7 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 	case WRITE_16:
 	case WRITE_VERIFY:
 	case WRITE_VERIFY_12:
-	case 0x8e: /* WRITE_VERIFY_16 */
+	case WRITE_VERIFY_16:
 	case COMPARE_AND_WRITE:
 	case XDWRITEREAD_10:
 		cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-- 
GitLab


From 9f2f342892e15f8600939ec8d06caf963ccff880 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:23 -0700
Subject: [PATCH 0811/1958] target: Remove se_device.dev_list

The last user of se_device.dev_list was removed through commit
0fd97ccf45be ("target: kill struct se_subsystem_dev"). Hence
also remove se_device.dev_list.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 1 -
 include/target/target_core_base.h   | 2 --
 2 files changed, 3 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 6403f44e61041..1f54b397bd574 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -767,7 +767,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 	dev->prot_length = sizeof(struct t10_pi_tuple);
 	dev->hba_index = hba->hba_index;
 
-	INIT_LIST_HEAD(&dev->dev_list);
 	INIT_LIST_HEAD(&dev->dev_sep_list);
 	INIT_LIST_HEAD(&dev->dev_tmr_list);
 	INIT_LIST_HEAD(&dev->delayed_cmd_list);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index f835528e424e7..a3af69fcf75e4 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -816,8 +816,6 @@ struct se_device {
 	unsigned char		udev_path[SE_UDEV_PATH_LEN];
 	/* Pointer to template of function pointers for transport */
 	const struct target_backend_ops *transport;
-	/* Linked list for struct se_hba struct se_device list */
-	struct list_head	dev_list;
 	struct se_lun		xcopy_lun;
 	/* Protection Information */
 	int			prot_length;
-- 
GitLab


From f2b72d6a8eed0eb02e6346886514a27df1efe827 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:24 -0700
Subject: [PATCH 0812/1958] target: Fix transport_init_se_cmd()

Avoid that aborting a command before it has been submitted onto
a workqueue triggers the following warning:

INFO: trying to register non-static key.
the code is fine but needs lockdep annotation.
turning off the locking correctness validator.
CPU: 3 PID: 46 Comm: kworker/u8:1 Not tainted 4.12.0-rc2-dbg+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Workqueue: tmr-iblock target_tmr_work [target_core_mod]
Call Trace:
 dump_stack+0x86/0xcf
 register_lock_class+0xe8/0x570
 __lock_acquire+0xa1/0x11d0
 lock_acquire+0x59/0x80
 flush_work+0x42/0x2b0
 __cancel_work_timer+0x10c/0x180
 cancel_work_sync+0xb/0x10
 core_tmr_lun_reset+0x352/0x740 [target_core_mod]
 target_tmr_work+0xd6/0x130 [target_core_mod]
 process_one_work+0x1ca/0x3f0
 worker_thread+0x49/0x3b0
 kthread+0x109/0x140
 ret_from_fork+0x31/0x40

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index e045803921f87..d601616b9f12c 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1239,6 +1239,7 @@ void transport_init_se_cmd(
 	init_completion(&cmd->t_transport_stop_comp);
 	init_completion(&cmd->cmd_wait_comp);
 	spin_lock_init(&cmd->t_state_lock);
+	INIT_WORK(&cmd->work, NULL);
 	kref_init(&cmd->cmd_kref);
 
 	cmd->se_tfo = tfo;
-- 
GitLab


From a85d667e58bddf73be84d1981b41eaac985ed216 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:27 -0700
Subject: [PATCH 0813/1958] target: Use {get,put}_unaligned_be*() instead of
 open coding these functions

Introduce the function get_unaligned_be24(). Use {get,put}_unaligned_be*()
where appropriate. This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c       |  8 +-
 drivers/target/target_core_device.c     |  8 +-
 drivers/target/target_core_fabric_lib.c |  6 +-
 drivers/target/target_core_pr.c         | 99 +++++--------------------
 drivers/target/target_core_pscsi.c      | 20 ++---
 drivers/target/target_core_sbc.c        | 59 ++++-----------
 drivers/target/target_core_spc.c        | 42 +++++------
 drivers/target/target_core_xcopy.c      |  4 +-
 include/target/target_core_backend.h    |  8 ++
 9 files changed, 83 insertions(+), 171 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index fc4a9c303d559..a91b7c25ffd41 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -205,8 +205,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
 		/*
 		 * TARGET PORT GROUP
 		 */
-		buf[off++] = ((tg_pt_gp->tg_pt_gp_id >> 8) & 0xff);
-		buf[off++] = (tg_pt_gp->tg_pt_gp_id & 0xff);
+		put_unaligned_be16(tg_pt_gp->tg_pt_gp_id, &buf[off]);
+		off += 2;
 
 		off++; /* Skip over Reserved */
 		/*
@@ -235,8 +235,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
 			/*
 			 * Set RELATIVE TARGET PORT IDENTIFIER
 			 */
-			buf[off++] = ((lun->lun_rtpi >> 8) & 0xff);
-			buf[off++] = (lun->lun_rtpi & 0xff);
+			put_unaligned_be16(lun->lun_rtpi, &buf[off]);
+			off += 2;
 			rd_len += 4;
 		}
 		spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 1f54b397bd574..11c80c47b9d3e 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1091,19 +1091,19 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 	      TRANSPORT_FLAG_PASSTHROUGH_PGR)) {
 		if (cdb[0] == PERSISTENT_RESERVE_IN) {
 			cmd->execute_cmd = target_scsi3_emulate_pr_in;
-			size = (cdb[7] << 8) + cdb[8];
+			size = get_unaligned_be16(&cdb[7]);
 			return target_cmd_size_check(cmd, size);
 		}
 		if (cdb[0] == PERSISTENT_RESERVE_OUT) {
 			cmd->execute_cmd = target_scsi3_emulate_pr_out;
-			size = (cdb[7] << 8) + cdb[8];
+			size = get_unaligned_be16(&cdb[7]);
 			return target_cmd_size_check(cmd, size);
 		}
 
 		if (cdb[0] == RELEASE || cdb[0] == RELEASE_10) {
 			cmd->execute_cmd = target_scsi2_reservation_release;
 			if (cdb[0] == RELEASE_10)
-				size = (cdb[7] << 8) | cdb[8];
+				size = get_unaligned_be16(&cdb[7]);
 			else
 				size = cmd->data_length;
 			return target_cmd_size_check(cmd, size);
@@ -1111,7 +1111,7 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 		if (cdb[0] == RESERVE || cdb[0] == RESERVE_10) {
 			cmd->execute_cmd = target_scsi2_reservation_reserve;
 			if (cdb[0] == RESERVE_10)
-				size = (cdb[7] << 8) | cdb[8];
+				size = get_unaligned_be16(&cdb[7]);
 			else
 				size = cmd->data_length;
 			return target_cmd_size_check(cmd, size);
diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c
index cb6497ce4b61a..508da345b73fd 100644
--- a/drivers/target/target_core_fabric_lib.c
+++ b/drivers/target/target_core_fabric_lib.c
@@ -34,6 +34,7 @@
 #include <linux/ctype.h>
 #include <linux/spinlock.h>
 #include <linux/export.h>
+#include <asm/unaligned.h>
 
 #include <scsi/scsi_proto.h>
 
@@ -216,8 +217,7 @@ static int iscsi_get_pr_transport_id(
 	if (padding != 0)
 		len += padding;
 
-	buf[2] = ((len >> 8) & 0xff);
-	buf[3] = (len & 0xff);
+	put_unaligned_be16(len, &buf[2]);
 	/*
 	 * Increment value for total payload + header length for
 	 * full status descriptor
@@ -306,7 +306,7 @@ static char *iscsi_parse_pr_out_transport_id(
 	 */
 	if (out_tid_len) {
 		/* The shift works thanks to integer promotion rules */
-		add_len = (buf[2] << 8) | buf[3];
+		add_len = get_unaligned_be16(&buf[2]);
 
 		tid_len = strlen(&buf[4]);
 		tid_len += 4; /* Add four bytes for iSCSI Transport ID header */
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 129ca572673ce..9921d4d6bb418 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -1562,10 +1562,7 @@ core_scsi3_decode_spec_i_port(
 	 * first extract TransportID Parameter Data Length, and make sure
 	 * the value matches up to the SCSI expected data transfer length.
 	 */
-	tpdl = (buf[24] & 0xff) << 24;
-	tpdl |= (buf[25] & 0xff) << 16;
-	tpdl |= (buf[26] & 0xff) << 8;
-	tpdl |= buf[27] & 0xff;
+	tpdl = get_unaligned_be32(&buf[24]);
 
 	if ((tpdl + 28) != cmd->data_length) {
 		pr_err("SPC-3 PR: Illegal tpdl: %u + 28 byte header"
@@ -3221,12 +3218,8 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
 		goto out_put_pr_reg;
 	}
 
-	rtpi = (buf[18] & 0xff) << 8;
-	rtpi |= buf[19] & 0xff;
-	tid_len = (buf[20] & 0xff) << 24;
-	tid_len |= (buf[21] & 0xff) << 16;
-	tid_len |= (buf[22] & 0xff) << 8;
-	tid_len |= buf[23] & 0xff;
+	rtpi = get_unaligned_be16(&buf[18]);
+	tid_len = get_unaligned_be32(&buf[20]);
 	transport_kunmap_data_sg(cmd);
 	buf = NULL;
 
@@ -3552,16 +3545,6 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
 	return ret;
 }
 
-static unsigned long long core_scsi3_extract_reservation_key(unsigned char *cdb)
-{
-	unsigned int __v1, __v2;
-
-	__v1 = (cdb[0] << 24) | (cdb[1] << 16) | (cdb[2] << 8) | cdb[3];
-	__v2 = (cdb[4] << 24) | (cdb[5] << 16) | (cdb[6] << 8) | cdb[7];
-
-	return ((unsigned long long)__v2) | (unsigned long long)__v1 << 32;
-}
-
 /*
  * See spc4r17 section 6.14 Table 170
  */
@@ -3619,8 +3602,8 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	/*
 	 * From PERSISTENT_RESERVE_OUT parameter list (payload)
 	 */
-	res_key = core_scsi3_extract_reservation_key(&buf[0]);
-	sa_res_key = core_scsi3_extract_reservation_key(&buf[8]);
+	res_key = get_unaligned_be64(&buf[0]);
+	sa_res_key = get_unaligned_be64(&buf[8]);
 	/*
 	 * REGISTER_AND_MOVE uses a different SA parameter list containing
 	 * SCSI TransportIDs.
@@ -3734,10 +3717,7 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	buf[0] = ((dev->t10_pr.pr_generation >> 24) & 0xff);
-	buf[1] = ((dev->t10_pr.pr_generation >> 16) & 0xff);
-	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
-	buf[3] = (dev->t10_pr.pr_generation & 0xff);
+	put_unaligned_be32(dev->t10_pr.pr_generation, buf);
 
 	spin_lock(&dev->t10_pr.registration_lock);
 	list_for_each_entry(pr_reg, &dev->t10_pr.registration_list,
@@ -3749,23 +3729,13 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
 		if ((add_len + 8) > (cmd->data_length - 8))
 			break;
 
-		buf[off++] = ((pr_reg->pr_res_key >> 56) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 48) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 40) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 32) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 24) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 16) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 8) & 0xff);
-		buf[off++] = (pr_reg->pr_res_key & 0xff);
-
+		put_unaligned_be64(pr_reg->pr_res_key, &buf[off]);
+		off += 8;
 		add_len += 8;
 	}
 	spin_unlock(&dev->t10_pr.registration_lock);
 
-	buf[4] = ((add_len >> 24) & 0xff);
-	buf[5] = ((add_len >> 16) & 0xff);
-	buf[6] = ((add_len >> 8) & 0xff);
-	buf[7] = (add_len & 0xff);
+	put_unaligned_be32(add_len, &buf[4]);
 
 	transport_kunmap_data_sg(cmd);
 
@@ -3796,10 +3766,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	buf[0] = ((dev->t10_pr.pr_generation >> 24) & 0xff);
-	buf[1] = ((dev->t10_pr.pr_generation >> 16) & 0xff);
-	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
-	buf[3] = (dev->t10_pr.pr_generation & 0xff);
+	put_unaligned_be32(dev->t10_pr.pr_generation, &buf[0]);
 
 	spin_lock(&dev->dev_reservation_lock);
 	pr_reg = dev->dev_pr_res_holder;
@@ -3807,10 +3774,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 		/*
 		 * Set the hardcoded Additional Length
 		 */
-		buf[4] = ((add_len >> 24) & 0xff);
-		buf[5] = ((add_len >> 16) & 0xff);
-		buf[6] = ((add_len >> 8) & 0xff);
-		buf[7] = (add_len & 0xff);
+		put_unaligned_be32(add_len, &buf[4]);
 
 		if (cmd->data_length < 22)
 			goto err;
@@ -3837,14 +3801,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 		else
 			pr_res_key = pr_reg->pr_res_key;
 
-		buf[8] = ((pr_res_key >> 56) & 0xff);
-		buf[9] = ((pr_res_key >> 48) & 0xff);
-		buf[10] = ((pr_res_key >> 40) & 0xff);
-		buf[11] = ((pr_res_key >> 32) & 0xff);
-		buf[12] = ((pr_res_key >> 24) & 0xff);
-		buf[13] = ((pr_res_key >> 16) & 0xff);
-		buf[14] = ((pr_res_key >> 8) & 0xff);
-		buf[15] = (pr_res_key & 0xff);
+		put_unaligned_be64(pr_res_key, &buf[8]);
 		/*
 		 * Set the SCOPE and TYPE
 		 */
@@ -3882,8 +3839,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	buf[0] = ((add_len >> 8) & 0xff);
-	buf[1] = (add_len & 0xff);
+	put_unaligned_be16(add_len, &buf[0]);
 	buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */
 	buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */
 	buf[2] |= 0x04; /* ATP_C: All Target Ports Capable bit */
@@ -3947,10 +3903,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	buf[0] = ((dev->t10_pr.pr_generation >> 24) & 0xff);
-	buf[1] = ((dev->t10_pr.pr_generation >> 16) & 0xff);
-	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
-	buf[3] = (dev->t10_pr.pr_generation & 0xff);
+	put_unaligned_be32(dev->t10_pr.pr_generation, &buf[0]);
 
 	spin_lock(&dev->dev_reservation_lock);
 	if (dev->dev_pr_res_holder) {
@@ -3992,14 +3945,8 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 		/*
 		 * Set RESERVATION KEY
 		 */
-		buf[off++] = ((pr_reg->pr_res_key >> 56) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 48) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 40) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 32) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 24) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 16) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 8) & 0xff);
-		buf[off++] = (pr_reg->pr_res_key & 0xff);
+		put_unaligned_be64(pr_reg->pr_res_key, &buf[off]);
+		off += 8;
 		off += 4; /* Skip Over Reserved area */
 
 		/*
@@ -4041,8 +3988,8 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 		if (!pr_reg->pr_reg_all_tg_pt) {
 			u16 sep_rtpi = pr_reg->tg_pt_sep_rtpi;
 
-			buf[off++] = ((sep_rtpi >> 8) & 0xff);
-			buf[off++] = (sep_rtpi & 0xff);
+			put_unaligned_be16(sep_rtpi, &buf[off]);
+			off += 2;
 		} else
 			off += 2; /* Skip over RELATIVE TARGET PORT IDENTIFIER */
 
@@ -4062,10 +4009,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 		/*
 		 * Set the ADDITIONAL DESCRIPTOR LENGTH
 		 */
-		buf[off++] = ((desc_len >> 24) & 0xff);
-		buf[off++] = ((desc_len >> 16) & 0xff);
-		buf[off++] = ((desc_len >> 8) & 0xff);
-		buf[off++] = (desc_len & 0xff);
+		put_unaligned_be32(desc_len, &buf[off]);
 		/*
 		 * Size of full desctipor header minus TransportID
 		 * containing $FABRIC_MOD specific) initiator device/port
@@ -4082,10 +4026,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 	/*
 	 * Set ADDITIONAL_LENGTH
 	 */
-	buf[4] = ((add_len >> 24) & 0xff);
-	buf[5] = ((add_len >> 16) & 0xff);
-	buf[6] = ((add_len >> 8) & 0xff);
-	buf[7] = (add_len & 0xff);
+	put_unaligned_be32(add_len, &buf[4]);
 
 	transport_kunmap_data_sg(cmd);
 
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 3e4abb13f8ea4..e0be4aa383283 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -168,7 +168,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
 	/*
 	 * If MODE_SENSE still returns zero, set the default value to 1024.
 	 */
-	sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]);
+	sdev->sector_size = get_unaligned_be24(&buf[9]);
 out_free:
 	if (!sdev->sector_size)
 		sdev->sector_size = 1024;
@@ -209,8 +209,7 @@ pscsi_get_inquiry_vpd_serial(struct scsi_device *sdev, struct t10_wwn *wwn)
 	cdb[0] = INQUIRY;
 	cdb[1] = 0x01; /* Query VPD */
 	cdb[2] = 0x80; /* Unit Serial Number */
-	cdb[3] = (INQUIRY_VPD_SERIAL_LEN >> 8) & 0xff;
-	cdb[4] = (INQUIRY_VPD_SERIAL_LEN & 0xff);
+	put_unaligned_be16(INQUIRY_VPD_SERIAL_LEN, &cdb[3]);
 
 	ret = scsi_execute_req(sdev, cdb, DMA_FROM_DEVICE, buf,
 			      INQUIRY_VPD_SERIAL_LEN, NULL, HZ, 1, NULL);
@@ -245,8 +244,7 @@ pscsi_get_inquiry_vpd_device_ident(struct scsi_device *sdev,
 	cdb[0] = INQUIRY;
 	cdb[1] = 0x01; /* Query VPD */
 	cdb[2] = 0x83; /* Device Identifier */
-	cdb[3] = (INQUIRY_VPD_DEVICE_IDENTIFIER_LEN >> 8) & 0xff;
-	cdb[4] = (INQUIRY_VPD_DEVICE_IDENTIFIER_LEN & 0xff);
+	put_unaligned_be16(INQUIRY_VPD_DEVICE_IDENTIFIER_LEN, &cdb[3]);
 
 	ret = scsi_execute_req(sdev, cdb, DMA_FROM_DEVICE, buf,
 			      INQUIRY_VPD_DEVICE_IDENTIFIER_LEN,
@@ -254,7 +252,7 @@ pscsi_get_inquiry_vpd_device_ident(struct scsi_device *sdev,
 	if (ret)
 		goto out;
 
-	page_len = (buf[2] << 8) | buf[3];
+	page_len = get_unaligned_be16(&buf[2]);
 	while (page_len > 0) {
 		/* Grab a pointer to the Identification descriptor */
 		page_83 = &buf[off];
@@ -669,19 +667,17 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
 		}
 
 		if (cdb[0] == MODE_SELECT)
-			bdl = (buf[3]);
+			bdl = buf[3];
 		else
-			bdl = (buf[6] << 8) | (buf[7]);
+			bdl = get_unaligned_be16(&buf[6]);
 
 		if (!bdl)
 			goto after_mode_select;
 
 		if (cdb[0] == MODE_SELECT)
-			blocksize = (buf[9] << 16) | (buf[10] << 8) |
-					(buf[11]);
+			blocksize = get_unaligned_be24(&buf[9]);
 		else
-			blocksize = (buf[13] << 16) | (buf[14] << 8) |
-					(buf[15]);
+			blocksize = get_unaligned_be24(&buf[13]);
 
 		sd->sector_size = blocksize;
 	}
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index ff4a6ec30adf5..ca42fba882dbd 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -71,14 +71,8 @@ sbc_emulate_readcapacity(struct se_cmd *cmd)
 	else
 		blocks = (u32)blocks_long;
 
-	buf[0] = (blocks >> 24) & 0xff;
-	buf[1] = (blocks >> 16) & 0xff;
-	buf[2] = (blocks >> 8) & 0xff;
-	buf[3] = blocks & 0xff;
-	buf[4] = (dev->dev_attrib.block_size >> 24) & 0xff;
-	buf[5] = (dev->dev_attrib.block_size >> 16) & 0xff;
-	buf[6] = (dev->dev_attrib.block_size >> 8) & 0xff;
-	buf[7] = dev->dev_attrib.block_size & 0xff;
+	put_unaligned_be32(blocks, &buf[0]);
+	put_unaligned_be32(dev->dev_attrib.block_size, &buf[4]);
 
 	rbuf = transport_kmap_data_sg(cmd);
 	if (rbuf) {
@@ -102,18 +96,8 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
 	unsigned long long blocks = dev->transport->get_blocks(dev);
 
 	memset(buf, 0, sizeof(buf));
-	buf[0] = (blocks >> 56) & 0xff;
-	buf[1] = (blocks >> 48) & 0xff;
-	buf[2] = (blocks >> 40) & 0xff;
-	buf[3] = (blocks >> 32) & 0xff;
-	buf[4] = (blocks >> 24) & 0xff;
-	buf[5] = (blocks >> 16) & 0xff;
-	buf[6] = (blocks >> 8) & 0xff;
-	buf[7] = blocks & 0xff;
-	buf[8] = (dev->dev_attrib.block_size >> 24) & 0xff;
-	buf[9] = (dev->dev_attrib.block_size >> 16) & 0xff;
-	buf[10] = (dev->dev_attrib.block_size >> 8) & 0xff;
-	buf[11] = dev->dev_attrib.block_size & 0xff;
+	put_unaligned_be64(blocks, &buf[0]);
+	put_unaligned_be32(dev->dev_attrib.block_size, &buf[8]);
 	/*
 	 * Set P_TYPE and PROT_EN bits for DIF support
 	 */
@@ -134,8 +118,8 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
 
 	if (dev->transport->get_alignment_offset_lbas) {
 		u16 lalba = dev->transport->get_alignment_offset_lbas(dev);
-		buf[14] = (lalba >> 8) & 0x3f;
-		buf[15] = lalba & 0xff;
+
+		put_unaligned_be16(lalba, &buf[14]);
 	}
 
 	/*
@@ -262,18 +246,17 @@ static inline u32 transport_get_sectors_6(unsigned char *cdb)
 
 static inline u32 transport_get_sectors_10(unsigned char *cdb)
 {
-	return (u32)(cdb[7] << 8) + cdb[8];
+	return get_unaligned_be16(&cdb[7]);
 }
 
 static inline u32 transport_get_sectors_12(unsigned char *cdb)
 {
-	return (u32)(cdb[6] << 24) + (cdb[7] << 16) + (cdb[8] << 8) + cdb[9];
+	return get_unaligned_be32(&cdb[6]);
 }
 
 static inline u32 transport_get_sectors_16(unsigned char *cdb)
 {
-	return (u32)(cdb[10] << 24) + (cdb[11] << 16) +
-		    (cdb[12] << 8) + cdb[13];
+	return get_unaligned_be32(&cdb[10]);
 }
 
 /*
@@ -281,29 +264,23 @@ static inline u32 transport_get_sectors_16(unsigned char *cdb)
  */
 static inline u32 transport_get_sectors_32(unsigned char *cdb)
 {
-	return (u32)(cdb[28] << 24) + (cdb[29] << 16) +
-		    (cdb[30] << 8) + cdb[31];
+	return get_unaligned_be32(&cdb[28]);
 
 }
 
 static inline u32 transport_lba_21(unsigned char *cdb)
 {
-	return ((cdb[1] & 0x1f) << 16) | (cdb[2] << 8) | cdb[3];
+	return get_unaligned_be24(&cdb[1]) & 0x1fffff;
 }
 
 static inline u32 transport_lba_32(unsigned char *cdb)
 {
-	return (cdb[2] << 24) | (cdb[3] << 16) | (cdb[4] << 8) | cdb[5];
+	return get_unaligned_be32(&cdb[2]);
 }
 
 static inline unsigned long long transport_lba_64(unsigned char *cdb)
 {
-	unsigned int __v1, __v2;
-
-	__v1 = (cdb[2] << 24) | (cdb[3] << 16) | (cdb[4] << 8) | cdb[5];
-	__v2 = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9];
-
-	return ((unsigned long long)__v2) | (unsigned long long)__v1 << 32;
+	return get_unaligned_be64(&cdb[2]);
 }
 
 /*
@@ -311,12 +288,7 @@ static inline unsigned long long transport_lba_64(unsigned char *cdb)
  */
 static inline unsigned long long transport_lba_64_ext(unsigned char *cdb)
 {
-	unsigned int __v1, __v2;
-
-	__v1 = (cdb[12] << 24) | (cdb[13] << 16) | (cdb[14] << 8) | cdb[15];
-	__v2 = (cdb[16] << 24) | (cdb[17] << 16) | (cdb[18] << 8) | cdb[19];
-
-	return ((unsigned long long)__v2) | (unsigned long long)__v1 << 32;
+	return get_unaligned_be64(&cdb[12]);
 }
 
 static sense_reason_t
@@ -1051,8 +1023,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 				cmd->t_task_cdb[1] & 0x1f);
 			return TCM_INVALID_CDB_FIELD;
 		}
-		size = (cdb[10] << 24) | (cdb[11] << 16) |
-		       (cdb[12] << 8) | cdb[13];
+		size = get_unaligned_be32(&cdb[10]);
 		break;
 	case SYNCHRONIZE_CACHE:
 	case SYNCHRONIZE_CACHE_16:
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 2a91ed3ef3801..f59ac76710318 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -287,8 +287,8 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 		/* Skip over Obsolete field in RTPI payload
 		 * in Table 472 */
 		off += 2;
-		buf[off++] = ((lun->lun_rtpi >> 8) & 0xff);
-		buf[off++] = (lun->lun_rtpi & 0xff);
+		put_unaligned_be16(lun->lun_rtpi, &buf[off]);
+		off += 2;
 		len += 8; /* Header size + Designation descriptor */
 		/*
 		 * Target port group identifier, see spc4r17
@@ -316,8 +316,8 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 		off++; /* Skip over Reserved */
 		buf[off++] = 4; /* DESIGNATOR LENGTH */
 		off += 2; /* Skip over Reserved Field */
-		buf[off++] = ((tg_pt_gp_id >> 8) & 0xff);
-		buf[off++] = (tg_pt_gp_id & 0xff);
+		put_unaligned_be16(tg_pt_gp_id, &buf[off]);
+		off += 2;
 		len += 8; /* Header size + Designation descriptor */
 		/*
 		 * Logical Unit Group identifier, see spc4r17
@@ -343,8 +343,8 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 		off++; /* Skip over Reserved */
 		buf[off++] = 4; /* DESIGNATOR LENGTH */
 		off += 2; /* Skip over Reserved Field */
-		buf[off++] = ((lu_gp_id >> 8) & 0xff);
-		buf[off++] = (lu_gp_id & 0xff);
+		put_unaligned_be16(lu_gp_id, &buf[off]);
+		off += 2;
 		len += 8; /* Header size + Designation descriptor */
 		/*
 		 * SCSI name string designator, see spc4r17
@@ -431,8 +431,7 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 		/* Header size + Designation descriptor */
 		len += (scsi_target_len + 4);
 	}
-	buf[2] = ((len >> 8) & 0xff);
-	buf[3] = (len & 0xff); /* Page Length for VPD 0x83 */
+	put_unaligned_be16(len, &buf[2]); /* Page Length for VPD 0x83 */
 	return 0;
 }
 EXPORT_SYMBOL(spc_emulate_evpd_83);
@@ -1288,7 +1287,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		cmd->execute_cmd = spc_emulate_modeselect;
 		break;
 	case MODE_SELECT_10:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		cmd->execute_cmd = spc_emulate_modeselect;
 		break;
 	case MODE_SENSE:
@@ -1296,25 +1295,25 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		cmd->execute_cmd = spc_emulate_modesense;
 		break;
 	case MODE_SENSE_10:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		cmd->execute_cmd = spc_emulate_modesense;
 		break;
 	case LOG_SELECT:
 	case LOG_SENSE:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		break;
 	case PERSISTENT_RESERVE_IN:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		cmd->execute_cmd = target_scsi3_emulate_pr_in;
 		break;
 	case PERSISTENT_RESERVE_OUT:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		cmd->execute_cmd = target_scsi3_emulate_pr_out;
 		break;
 	case RELEASE:
 	case RELEASE_10:
 		if (cdb[0] == RELEASE_10)
-			*size = (cdb[7] << 8) | cdb[8];
+			*size = get_unaligned_be16(&cdb[7]);
 		else
 			*size = cmd->data_length;
 
@@ -1327,7 +1326,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		 * Assume the passthrough or $FABRIC_MOD will tell us about it.
 		 */
 		if (cdb[0] == RESERVE_10)
-			*size = (cdb[7] << 8) | cdb[8];
+			*size = get_unaligned_be16(&cdb[7]);
 		else
 			*size = cmd->data_length;
 
@@ -1338,7 +1337,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		cmd->execute_cmd = spc_emulate_request_sense;
 		break;
 	case INQUIRY:
-		*size = (cdb[3] << 8) + cdb[4];
+		*size = get_unaligned_be16(&cdb[3]);
 
 		/*
 		 * Do implicit HEAD_OF_QUEUE processing for INQUIRY.
@@ -1349,7 +1348,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		break;
 	case SECURITY_PROTOCOL_IN:
 	case SECURITY_PROTOCOL_OUT:
-		*size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9];
+		*size = get_unaligned_be32(&cdb[6]);
 		break;
 	case EXTENDED_COPY:
 		*size = get_unaligned_be32(&cdb[10]);
@@ -1361,19 +1360,18 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		break;
 	case READ_ATTRIBUTE:
 	case WRITE_ATTRIBUTE:
-		*size = (cdb[10] << 24) | (cdb[11] << 16) |
-		       (cdb[12] << 8) | cdb[13];
+		*size = get_unaligned_be32(&cdb[10]);
 		break;
 	case RECEIVE_DIAGNOSTIC:
 	case SEND_DIAGNOSTIC:
-		*size = (cdb[3] << 8) | cdb[4];
+		*size = get_unaligned_be16(&cdb[3]);
 		break;
 	case WRITE_BUFFER:
-		*size = (cdb[6] << 16) + (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be24(&cdb[6]);
 		break;
 	case REPORT_LUNS:
 		cmd->execute_cmd = spc_emulate_report_luns;
-		*size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9];
+		*size = get_unaligned_be32(&cdb[6]);
 		/*
 		 * Do implicit HEAD_OF_QUEUE processing for REPORT_LUNS
 		 * See spc4r17 section 5.3
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index cac5a20a4de07..f12cf0c12531f 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -311,9 +311,7 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op
 		(unsigned long long)xop->dst_lba);
 
 	if (dc != 0) {
-		xop->dbl = (desc[29] & 0xff) << 16;
-		xop->dbl |= (desc[30] & 0xff) << 8;
-		xop->dbl |= desc[31] & 0xff;
+		xop->dbl = get_unaligned_be24(&desc[29]);
 
 		pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl);
 	}
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index e475531565fdf..b76071161cdc7 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -2,6 +2,7 @@
 #define TARGET_CORE_BACKEND_H
 
 #include <linux/types.h>
+#include <asm/unaligned.h>
 #include <target/target_core_base.h>
 
 #define TRANSPORT_FLAG_PASSTHROUGH		0x1
@@ -109,4 +110,11 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
 				       struct request_queue *q);
 
+
+/* Only use get_unaligned_be24() if reading p - 1 is allowed. */
+static inline uint32_t get_unaligned_be24(const uint8_t *const p)
+{
+	return get_unaligned_be32(p - 1) & 0xffffffU;
+}
+
 #endif /* TARGET_CORE_BACKEND_H */
-- 
GitLab


From d877d7275be34ad70ce92bcbb4bb36cec77ed004 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:29 -0700
Subject: [PATCH 0814/1958] target: Fix a deadlock between the XCOPY code and
 iSCSI session shutdown

Move the code for parsing an XCOPY command from the context of
the iSCSI receiver thread to the context of the XCOPY workqueue.
Keep the simple XCOPY checks in the context of the iSCSI receiver
thread. Move the code for allocating and freeing struct xcopy_op
from the code that parses an XCOPY command to its caller.

This patch fixes the following deadlock:

======================================================
[ INFO: possible circular locking dependency detected ]
4.10.0-rc7-dbg+ #1 Not tainted
-------------------------------------------------------
rmdir/13321 is trying to acquire lock:
 (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02cb47d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]

but task is already holding lock:
 (&sb->s_type->i_mutex_key#14){++++++}, at: [<ffffffff811c6e20>] vfs_rmdir+0x50/0x140

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:
-> #1 (&sb->s_type->i_mutex_key#14){++++++}:
 lock_acquire+0x71/0x90
 down_write+0x3f/0x70
 configfs_depend_item+0x3a/0xb0 [configfs]
 target_depend_item+0x13/0x20 [target_core_mod]
 target_xcopy_locate_se_dev_e4+0xdd/0x1a0 [target_core_mod]
 target_do_xcopy+0x34b/0x970 [target_core_mod]
 __target_execute_cmd+0x22/0xa0 [target_core_mod]
 target_execute_cmd+0x233/0x2c0 [target_core_mod]
 iscsit_execute_cmd+0x208/0x270 [iscsi_target_mod]
 iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
 iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
 iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
 kthread+0x102/0x140
 ret_from_fork+0x31/0x40

-> #0 (&sess->cmdsn_mutex){+.+.+.}:
 __lock_acquire+0x10e6/0x1260
 lock_acquire+0x71/0x90
 mutex_lock_nested+0x5f/0x670
 iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
 iscsit_close_session+0xac/0x200 [iscsi_target_mod]
 lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
 target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
 core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
 target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
 config_item_release+0x5a/0xc0 [configfs]
 config_item_put+0x1d/0x1f [configfs]
 configfs_rmdir+0x1a6/0x300 [configfs]
 vfs_rmdir+0xb7/0x140
 do_rmdir+0x1f4/0x200
 SyS_rmdir+0x11/0x20
 entry_SYSCALL_64_fastpath+0x23/0xc6

other info that might help us debug this:

 Possible unsafe locking scenario:
       CPU0                    CPU1
       ----                    ----
  lock(&sb->s_type->i_mutex_key#14);
                               lock(&sess->cmdsn_mutex);
                               lock(&sb->s_type->i_mutex_key#14);
  lock(&sess->cmdsn_mutex);

 *** DEADLOCK ***

3 locks held by rmdir/13321:
 #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff811e1aff>] mnt_want_write+0x1f/0x50
 #1:  (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cc8ce>] do_rmdir+0x15e/0x200
 #2:  (&sb->s_type->i_mutex_key#14){++++++}, at: [<ffffffff811c6e20>] vfs_rmdir+0x50/0x140

stack backtrace:
CPU: 2 PID: 13321 Comm: rmdir Not tainted 4.10.0-rc7-dbg+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0x86/0xc3
 print_circular_bug+0x1c7/0x220
 __lock_acquire+0x10e6/0x1260
 lock_acquire+0x71/0x90
 mutex_lock_nested+0x5f/0x670
 iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
 iscsit_close_session+0xac/0x200 [iscsi_target_mod]
 lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
 target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
 core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
 target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
 config_item_release+0x5a/0xc0 [configfs]
 config_item_put+0x1d/0x1f [configfs]
 configfs_rmdir+0x1a6/0x300 [configfs]
 vfs_rmdir+0xb7/0x140
 do_rmdir+0x1f4/0x200
 SyS_rmdir+0x11/0x20
 entry_SYSCALL_64_fastpath+0x23/0xc6

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_xcopy.c | 110 ++++++++++++++++++-----------
 1 file changed, 69 insertions(+), 41 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index f12cf0c12531f..56738a41e3461 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -40,6 +40,8 @@
 
 static struct workqueue_struct *xcopy_wq = NULL;
 
+static sense_reason_t target_parse_xcopy_cmd(struct xcopy_op *xop);
+
 static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
 {
 	int off = 0;
@@ -779,13 +781,24 @@ static int target_xcopy_write_destination(
 static void target_xcopy_do_work(struct work_struct *work)
 {
 	struct xcopy_op *xop = container_of(work, struct xcopy_op, xop_work);
-	struct se_device *src_dev = xop->src_dev, *dst_dev = xop->dst_dev;
 	struct se_cmd *ec_cmd = xop->xop_se_cmd;
-	sector_t src_lba = xop->src_lba, dst_lba = xop->dst_lba, end_lba;
+	struct se_device *src_dev, *dst_dev;
+	sector_t src_lba, dst_lba, end_lba;
 	unsigned int max_sectors;
-	int rc;
-	unsigned short nolb = xop->nolb, cur_nolb, max_nolb, copied_nolb = 0;
+	int rc = 0;
+	unsigned short nolb, cur_nolb, max_nolb, copied_nolb = 0;
+
+	if (target_parse_xcopy_cmd(xop) != TCM_NO_SENSE)
+		goto err_free;
 
+	if (WARN_ON_ONCE(!xop->src_dev) || WARN_ON_ONCE(!xop->dst_dev))
+		goto err_free;
+
+	src_dev = xop->src_dev;
+	dst_dev = xop->dst_dev;
+	src_lba = xop->src_lba;
+	dst_lba = xop->dst_lba;
+	nolb = xop->nolb;
 	end_lba = src_lba + nolb;
 	/*
 	 * Break up XCOPY I/O into hw_max_sectors sized I/O based on the
@@ -853,6 +866,8 @@ static void target_xcopy_do_work(struct work_struct *work)
 
 out:
 	xcopy_pt_undepend_remotedev(xop);
+
+err_free:
 	kfree(xop);
 	/*
 	 * Don't override an error scsi status if it has already been set
@@ -865,48 +880,22 @@ static void target_xcopy_do_work(struct work_struct *work)
 	target_complete_cmd(ec_cmd, ec_cmd->scsi_status);
 }
 
-sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
+/*
+ * Returns TCM_NO_SENSE upon success or a sense code != TCM_NO_SENSE if parsing
+ * fails.
+ */
+static sense_reason_t target_parse_xcopy_cmd(struct xcopy_op *xop)
 {
-	struct se_device *dev = se_cmd->se_dev;
-	struct xcopy_op *xop = NULL;
+	struct se_cmd *se_cmd = xop->xop_se_cmd;
 	unsigned char *p = NULL, *seg_desc;
-	unsigned int list_id, list_id_usage, sdll, inline_dl, sa;
+	unsigned int list_id, list_id_usage, sdll, inline_dl;
 	sense_reason_t ret = TCM_INVALID_PARAMETER_LIST;
 	int rc;
 	unsigned short tdll;
 
-	if (!dev->dev_attrib.emulate_3pc) {
-		pr_err("EXTENDED_COPY operation explicitly disabled\n");
-		return TCM_UNSUPPORTED_SCSI_OPCODE;
-	}
-
-	sa = se_cmd->t_task_cdb[1] & 0x1f;
-	if (sa != 0x00) {
-		pr_err("EXTENDED_COPY(LID4) not supported\n");
-		return TCM_UNSUPPORTED_SCSI_OPCODE;
-	}
-
-	if (se_cmd->data_length == 0) {
-		target_complete_cmd(se_cmd, SAM_STAT_GOOD);
-		return TCM_NO_SENSE;
-	}
-	if (se_cmd->data_length < XCOPY_HDR_LEN) {
-		pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
-				se_cmd->data_length, XCOPY_HDR_LEN);
-		return TCM_PARAMETER_LIST_LENGTH_ERROR;
-	}
-
-	xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL);
-	if (!xop) {
-		pr_err("Unable to allocate xcopy_op\n");
-		return TCM_OUT_OF_RESOURCES;
-	}
-	xop->xop_se_cmd = se_cmd;
-
 	p = transport_kmap_data_sg(se_cmd);
 	if (!p) {
 		pr_err("transport_kmap_data_sg() failed in target_do_xcopy\n");
-		kfree(xop);
 		return TCM_OUT_OF_RESOURCES;
 	}
 
@@ -975,18 +964,57 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 	pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc,
 				rc * XCOPY_TARGET_DESC_LEN);
 	transport_kunmap_data_sg(se_cmd);
-
-	INIT_WORK(&xop->xop_work, target_xcopy_do_work);
-	queue_work(xcopy_wq, &xop->xop_work);
 	return TCM_NO_SENSE;
 
 out:
 	if (p)
 		transport_kunmap_data_sg(se_cmd);
-	kfree(xop);
 	return ret;
 }
 
+sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
+{
+	struct se_device *dev = se_cmd->se_dev;
+	struct xcopy_op *xop;
+	unsigned int sa;
+
+	if (!dev->dev_attrib.emulate_3pc) {
+		pr_err("EXTENDED_COPY operation explicitly disabled\n");
+		return TCM_UNSUPPORTED_SCSI_OPCODE;
+	}
+
+	sa = se_cmd->t_task_cdb[1] & 0x1f;
+	if (sa != 0x00) {
+		pr_err("EXTENDED_COPY(LID4) not supported\n");
+		return TCM_UNSUPPORTED_SCSI_OPCODE;
+	}
+
+	if (se_cmd->data_length == 0) {
+		target_complete_cmd(se_cmd, SAM_STAT_GOOD);
+		return TCM_NO_SENSE;
+	}
+	if (se_cmd->data_length < XCOPY_HDR_LEN) {
+		pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
+				se_cmd->data_length, XCOPY_HDR_LEN);
+		return TCM_PARAMETER_LIST_LENGTH_ERROR;
+	}
+
+	xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL);
+	if (!xop)
+		goto err;
+	xop->xop_se_cmd = se_cmd;
+	INIT_WORK(&xop->xop_work, target_xcopy_do_work);
+	if (WARN_ON_ONCE(!queue_work(xcopy_wq, &xop->xop_work)))
+		goto free;
+	return TCM_NO_SENSE;
+
+free:
+	kfree(xop);
+
+err:
+	return TCM_OUT_OF_RESOURCES;
+}
+
 static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd)
 {
 	unsigned char *p;
-- 
GitLab


From 13fdd4458ed1b808946fd7baba657b6a51d3c72b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:35 -0700
Subject: [PATCH 0815/1958] IB/srpt: Make a debug statement in srpt_abort_cmd()
 more informative

Do not only report the state of the I/O context before srpt_abort_cmd()
was called but also the new state assigned by srpt_abort_cmd()

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/infiniband/ulp/srpt/ib_srpt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 1ced0731c1400..402275be0931e 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1157,8 +1157,8 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
 	}
 	spin_unlock_irqrestore(&ioctx->spinlock, flags);
 
-	pr_debug("Aborting cmd with state %d and tag %lld\n", state,
-		 ioctx->cmd.tag);
+	pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state,
+		 ioctx->state, ioctx->cmd.tag);
 
 	switch (state) {
 	case SRPT_STATE_NEW:
-- 
GitLab


From 9f4ab18ac51dc87345a9cbd2527e6acf7a0a9335 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:36 -0700
Subject: [PATCH 0816/1958] xen/scsiback: Fix a TMR related use-after-free

scsiback_release_cmd() must not dereference se_cmd->se_tmr_req
because that memory is freed by target_free_cmd_mem() before
scsiback_release_cmd() is called. Fix this use-after-free by
inlining struct scsiback_tmr into struct vscsibk_pend.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
Cc: xen-devel@lists.xenproject.org
Cc: <stable@vger.kernel.org> # 3.18+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/xen/xen-scsiback.c | 33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index d6950e0802b7d..980f328173054 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -134,9 +134,7 @@ struct vscsibk_pend {
 	struct page *pages[VSCSI_MAX_GRANTS];
 
 	struct se_cmd se_cmd;
-};
 
-struct scsiback_tmr {
 	atomic_t tmr_complete;
 	wait_queue_head_t tmr_wait;
 };
@@ -599,26 +597,20 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 	struct scsiback_tpg *tpg = pending_req->v2p->tpg;
 	struct scsiback_nexus *nexus = tpg->tpg_nexus;
 	struct se_cmd *se_cmd = &pending_req->se_cmd;
-	struct scsiback_tmr *tmr;
 	u64 unpacked_lun = pending_req->v2p->lun;
 	int rc, err = FAILED;
 
-	tmr = kzalloc(sizeof(struct scsiback_tmr), GFP_KERNEL);
-	if (!tmr) {
-		target_put_sess_cmd(se_cmd);
-		goto err;
-	}
-
-	init_waitqueue_head(&tmr->tmr_wait);
+	init_waitqueue_head(&pending_req->tmr_wait);
 
 	rc = target_submit_tmr(&pending_req->se_cmd, nexus->tvn_se_sess,
 			       &pending_req->sense_buffer[0],
-			       unpacked_lun, tmr, act, GFP_KERNEL,
+			       unpacked_lun, NULL, act, GFP_KERNEL,
 			       tag, TARGET_SCF_ACK_KREF);
 	if (rc)
 		goto err;
 
-	wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete));
+	wait_event(pending_req->tmr_wait,
+		   atomic_read(&pending_req->tmr_complete));
 
 	err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
 		SUCCESS : FAILED;
@@ -626,9 +618,8 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
 	transport_generic_free_cmd(&pending_req->se_cmd, 1);
 	return;
+
 err:
-	if (tmr)
-		kfree(tmr);
 	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
 }
 
@@ -1389,12 +1380,6 @@ static int scsiback_check_stop_free(struct se_cmd *se_cmd)
 static void scsiback_release_cmd(struct se_cmd *se_cmd)
 {
 	struct se_session *se_sess = se_cmd->se_sess;
-	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
-
-	if (se_tmr && se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) {
-		struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
-		kfree(tmr);
-	}
 
 	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
 }
@@ -1455,11 +1440,11 @@ static int scsiback_queue_status(struct se_cmd *se_cmd)
 
 static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd)
 {
-	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
-	struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
+	struct vscsibk_pend *pending_req = container_of(se_cmd,
+				struct vscsibk_pend, se_cmd);
 
-	atomic_set(&tmr->tmr_complete, 1);
-	wake_up(&tmr->tmr_wait);
+	atomic_set(&pending_req->tmr_complete, 1);
+	wake_up(&pending_req->tmr_wait);
 }
 
 static void scsiback_aborted_task(struct se_cmd *se_cmd)
-- 
GitLab


From e3eac12442c2678d64b655e9768bda1be65e0b68 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:37 -0700
Subject: [PATCH 0817/1958] xen/scsiback: Replace a waitqueue and a counter by
 a completion

This patch simplifies the implementation of the scsiback driver
but does not change its behavior.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
Cc: xen-devel@lists.xenproject.org
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/xen/xen-scsiback.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 980f328173054..4cb33a0916a86 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -135,8 +135,7 @@ struct vscsibk_pend {
 
 	struct se_cmd se_cmd;
 
-	atomic_t tmr_complete;
-	wait_queue_head_t tmr_wait;
+	struct completion tmr_done;
 };
 
 #define VSCSI_DEFAULT_SESSION_TAGS	128
@@ -600,7 +599,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 	u64 unpacked_lun = pending_req->v2p->lun;
 	int rc, err = FAILED;
 
-	init_waitqueue_head(&pending_req->tmr_wait);
+	init_completion(&pending_req->tmr_done);
 
 	rc = target_submit_tmr(&pending_req->se_cmd, nexus->tvn_se_sess,
 			       &pending_req->sense_buffer[0],
@@ -609,8 +608,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 	if (rc)
 		goto err;
 
-	wait_event(pending_req->tmr_wait,
-		   atomic_read(&pending_req->tmr_complete));
+	wait_for_completion(&pending_req->tmr_done);
 
 	err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
 		SUCCESS : FAILED;
@@ -1443,8 +1441,7 @@ static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd)
 	struct vscsibk_pend *pending_req = container_of(se_cmd,
 				struct vscsibk_pend, se_cmd);
 
-	atomic_set(&pending_req->tmr_complete, 1);
-	wake_up(&pending_req->tmr_wait);
+	complete(&pending_req->tmr_done);
 }
 
 static void scsiback_aborted_task(struct se_cmd *se_cmd)
-- 
GitLab


From af90e84d1f535827f4c593436a807b1efa9f6f2b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:38 -0700
Subject: [PATCH 0818/1958] xen/scsiback: Make TMF processing slightly faster

Target drivers must guarantee that struct se_cmd and struct se_tmr_req
exist as long as target_tmr_work() is in progress. Since the last
access by the LIO core is a call to .check_stop_free() and since the
Xen scsiback .check_stop_free() drops a reference to the TMF, it is
already guaranteed that the struct se_cmd that corresponds to the TMF
exists as long as target_tmr_work() is in progress. Hence change the
second argument of transport_generic_free_cmd() from 1 into 0.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
Cc: xen-devel@lists.xenproject.org
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/xen/xen-scsiback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 4cb33a0916a86..7bc88fd43cfc8 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -614,7 +614,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 		SUCCESS : FAILED;
 
 	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
-	transport_generic_free_cmd(&pending_req->se_cmd, 1);
+	transport_generic_free_cmd(&pending_req->se_cmd, 0);
 	return;
 
 err:
-- 
GitLab


From 03db016a1bf2d35f41c08aad2ca4f4f18eeda4be Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 2 Jun 2017 23:33:56 -0700
Subject: [PATCH 0819/1958] iscsi-target: Kill left-over
 iscsi_target_do_cleanup

With commit 25cdda95fda7 in place to address the initial login
PDU asynchronous socket close OOPs, go ahead and kill off the
left-over iscsi_target_do_cleanup() and ->login_cleanup_work.

Reported-by: Mike Christie <mchristi@redhat.com>
Cc: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_nego.c | 24 ------------------------
 include/target/iscsi/iscsi_target_core.h |  1 -
 2 files changed, 25 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 6f88b31242b05..96df63f1f795e 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -655,28 +655,6 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
 	iscsit_deaccess_np(np, tpg, tpg_np);
 }
 
-static void iscsi_target_do_cleanup(struct work_struct *work)
-{
-	struct iscsi_conn *conn = container_of(work,
-				struct iscsi_conn, login_cleanup_work.work);
-	struct sock *sk = conn->sock->sk;
-	struct iscsi_login *login = conn->login;
-	struct iscsi_np *np = login->np;
-	struct iscsi_portal_group *tpg = conn->tpg;
-	struct iscsi_tpg_np *tpg_np = conn->tpg_np;
-
-	pr_debug("Entering iscsi_target_do_cleanup\n");
-
-	cancel_delayed_work_sync(&conn->login_work);
-	conn->orig_state_change(sk);
-
-	iscsi_target_restore_sock_callbacks(conn);
-	iscsi_target_login_drop(conn, login);
-	iscsit_deaccess_np(np, tpg, tpg_np);
-
-	pr_debug("iscsi_target_do_cleanup done()\n");
-}
-
 static void iscsi_target_sk_state_change(struct sock *sk)
 {
 	struct iscsi_conn *conn;
@@ -1082,7 +1060,6 @@ int iscsi_target_locate_portal(
 	int sessiontype = 0, ret = 0, tag_num, tag_size;
 
 	INIT_DELAYED_WORK(&conn->login_work, iscsi_target_do_login_rx);
-	INIT_DELAYED_WORK(&conn->login_cleanup_work, iscsi_target_do_cleanup);
 	iscsi_target_set_sock_callbacks(conn);
 
 	login->np = np;
@@ -1331,7 +1308,6 @@ int iscsi_target_start_negotiation(
 
 	if (ret < 0) {
 		cancel_delayed_work_sync(&conn->login_work);
-		cancel_delayed_work_sync(&conn->login_cleanup_work);
 		iscsi_target_restore_sock_callbacks(conn);
 		iscsi_remove_failed_auth_entry(conn);
 	}
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 5f17fb770477b..7948fc68286b3 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -560,7 +560,6 @@ struct iscsi_conn {
 #define LOGIN_FLAGS_INITIAL_PDU		8
 	unsigned long		login_flags;
 	struct delayed_work	login_work;
-	struct delayed_work	login_cleanup_work;
 	struct iscsi_login	*login;
 	struct timer_list	nopin_timer;
 	struct timer_list	nopin_response_timer;
-- 
GitLab


From c00e6220231542c6409780a3e9bfa44be7d94f3a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:28 -0700
Subject: [PATCH 0820/1958] target: Introduce a function that shows the command
 state

Introduce target_show_cmd() and use it where appropriate. If
transport_wait_for_tasks() takes too long, make it show the
state of the command it is waiting for.

(Add missing brackets around multi-line conditions - nab)

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tmr.c       |  18 +---
 drivers/target/target_core_transport.c | 122 ++++++++++++++++++++++---
 include/target/target_core_fabric.h    |   1 +
 3 files changed, 114 insertions(+), 27 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 13f47bf4d16b1..e22847bd79b95 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -355,20 +355,10 @@ static void core_tmr_drain_state_list(
 		cmd = list_entry(drain_task_list.next, struct se_cmd, state_list);
 		list_del_init(&cmd->state_list);
 
-		pr_debug("LUN_RESET: %s cmd: %p"
-			" ITT/CmdSN: 0x%08llx/0x%08x, i_state: %d, t_state: %d"
-			"cdb: 0x%02x\n",
-			(preempt_and_abort_list) ? "Preempt" : "", cmd,
-			cmd->tag, 0,
-			cmd->se_tfo->get_cmd_state(cmd), cmd->t_state,
-			cmd->t_task_cdb[0]);
-		pr_debug("LUN_RESET: ITT[0x%08llx] - pr_res_key: 0x%016Lx"
-			" -- CMD_T_ACTIVE: %d"
-			" CMD_T_STOP: %d CMD_T_SENT: %d\n",
-			cmd->tag, cmd->pr_res_key,
-			(cmd->transport_state & CMD_T_ACTIVE) != 0,
-			(cmd->transport_state & CMD_T_STOP) != 0,
-			(cmd->transport_state & CMD_T_SENT) != 0);
+		target_show_cmd("LUN_RESET: ", cmd);
+		pr_debug("LUN_RESET: ITT[0x%08llx] - %s pr_res_key: 0x%016Lx\n",
+			 cmd->tag, (preempt_and_abort_list) ? "preempt" : "",
+			 cmd->pr_res_key);
 
 		/*
 		 * If the command may be queued onto a workqueue cancel it now.
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index d601616b9f12c..a5ecec8f39969 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1704,15 +1704,9 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 	if (transport_check_aborted_status(cmd, 1))
 		return;
 
-	pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx"
-		" CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]);
-	pr_debug("-----[ i_state: %d t_state: %d sense_reason: %d\n",
-		cmd->se_tfo->get_cmd_state(cmd),
-		cmd->t_state, sense_reason);
-	pr_debug("-----[ CMD_T_ACTIVE: %d CMD_T_STOP: %d CMD_T_SENT: %d\n",
-		(cmd->transport_state & CMD_T_ACTIVE) != 0,
-		(cmd->transport_state & CMD_T_STOP) != 0,
-		(cmd->transport_state & CMD_T_SENT) != 0);
+	pr_debug("-----[ Storage Engine Exception; sense_reason %d\n",
+		 sense_reason);
+	target_show_cmd("-----[ ", cmd);
 
 	/*
 	 * For SAM Task Attribute emulation for failed struct se_cmd
@@ -2705,6 +2699,108 @@ int target_put_sess_cmd(struct se_cmd *se_cmd)
 }
 EXPORT_SYMBOL(target_put_sess_cmd);
 
+static const char *data_dir_name(enum dma_data_direction d)
+{
+	switch (d) {
+	case DMA_BIDIRECTIONAL:	return "BIDI";
+	case DMA_TO_DEVICE:	return "WRITE";
+	case DMA_FROM_DEVICE:	return "READ";
+	case DMA_NONE:		return "NONE";
+	}
+
+	return "(?)";
+}
+
+static const char *cmd_state_name(enum transport_state_table t)
+{
+	switch (t) {
+	case TRANSPORT_NO_STATE:	return "NO_STATE";
+	case TRANSPORT_NEW_CMD:		return "NEW_CMD";
+	case TRANSPORT_WRITE_PENDING:	return "WRITE_PENDING";
+	case TRANSPORT_PROCESSING:	return "PROCESSING";
+	case TRANSPORT_COMPLETE:	return "COMPLETE";
+	case TRANSPORT_ISTATE_PROCESSING:
+					return "ISTATE_PROCESSING";
+	case TRANSPORT_COMPLETE_QF_WP:	return "COMPLETE_QF_WP";
+	case TRANSPORT_COMPLETE_QF_OK:	return "COMPLETE_QF_OK";
+	case TRANSPORT_COMPLETE_QF_ERR:	return "COMPLETE_QF_ERR";
+	}
+
+	return "(?)";
+}
+
+static void target_append_str(char **str, const char *txt)
+{
+	char *prev = *str;
+
+	*str = *str ? kasprintf(GFP_ATOMIC, "%s,%s", *str, txt) :
+		kstrdup(txt, GFP_ATOMIC);
+	kfree(prev);
+}
+
+/*
+ * Convert a transport state bitmask into a string. The caller is
+ * responsible for freeing the returned pointer.
+ */
+static char *target_ts_to_str(u32 ts)
+{
+	char *str = NULL;
+
+	if (ts & CMD_T_ABORTED)
+		target_append_str(&str, "aborted");
+	if (ts & CMD_T_ACTIVE)
+		target_append_str(&str, "active");
+	if (ts & CMD_T_COMPLETE)
+		target_append_str(&str, "complete");
+	if (ts & CMD_T_SENT)
+		target_append_str(&str, "sent");
+	if (ts & CMD_T_STOP)
+		target_append_str(&str, "stop");
+	if (ts & CMD_T_FABRIC_STOP)
+		target_append_str(&str, "fabric_stop");
+
+	return str;
+}
+
+static const char *target_tmf_name(enum tcm_tmreq_table tmf)
+{
+	switch (tmf) {
+	case TMR_ABORT_TASK:		return "ABORT_TASK";
+	case TMR_ABORT_TASK_SET:	return "ABORT_TASK_SET";
+	case TMR_CLEAR_ACA:		return "CLEAR_ACA";
+	case TMR_CLEAR_TASK_SET:	return "CLEAR_TASK_SET";
+	case TMR_LUN_RESET:		return "LUN_RESET";
+	case TMR_TARGET_WARM_RESET:	return "TARGET_WARM_RESET";
+	case TMR_TARGET_COLD_RESET:	return "TARGET_COLD_RESET";
+	case TMR_UNKNOWN:		break;
+	}
+	return "(?)";
+}
+
+void target_show_cmd(const char *pfx, struct se_cmd *cmd)
+{
+	char *ts_str = target_ts_to_str(cmd->transport_state);
+	const u8 *cdb = cmd->t_task_cdb;
+	struct se_tmr_req *tmf = cmd->se_tmr_req;
+
+	if (!(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
+		pr_debug("%scmd %#02x:%#02x with tag %#llx dir %s i_state %d t_state %s len %d refcnt %d transport_state %s\n",
+			 pfx, cdb[0], cdb[1], cmd->tag,
+			 data_dir_name(cmd->data_direction),
+			 cmd->se_tfo->get_cmd_state(cmd),
+			 cmd_state_name(cmd->t_state), cmd->data_length,
+			 kref_read(&cmd->cmd_kref), ts_str);
+	} else {
+		pr_debug("%stmf %s with tag %#llx ref_task_tag %#llx i_state %d t_state %s refcnt %d transport_state %s\n",
+			 pfx, target_tmf_name(tmf->function), cmd->tag,
+			 tmf->ref_task_tag, cmd->se_tfo->get_cmd_state(cmd),
+			 cmd_state_name(cmd->t_state),
+			 kref_read(&cmd->cmd_kref), ts_str);
+	}
+	kfree(ts_str);
+}
+EXPORT_SYMBOL(target_show_cmd);
+
 /* target_sess_cmd_list_set_waiting - Flag all commands in
  *         sess_cmd_list to complete cmd_wait_comp.  Set
  *         sess_tearing_down so no more commands are queued.
@@ -2849,13 +2945,13 @@ __transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop,
 
 	cmd->transport_state |= CMD_T_STOP;
 
-	pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d,"
-		 " t_state: %d, CMD_T_STOP\n", cmd, cmd->tag,
-		 cmd->se_tfo->get_cmd_state(cmd), cmd->t_state);
+	target_show_cmd("wait_for_tasks: Stopping ", cmd);
 
 	spin_unlock_irqrestore(&cmd->t_state_lock, *flags);
 
-	wait_for_completion(&cmd->t_transport_stop_comp);
+	while (!wait_for_completion_timeout(&cmd->t_transport_stop_comp,
+					    180 * HZ))
+		target_show_cmd("wait for tasks: ", cmd);
 
 	spin_lock_irqsave(&cmd->t_state_lock, *flags);
 	cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP);
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index d7dd1427fe0de..33d2e3e5773cf 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -160,6 +160,7 @@ int	target_get_sess_cmd(struct se_cmd *, bool);
 int	target_put_sess_cmd(struct se_cmd *);
 void	target_sess_cmd_list_set_waiting(struct se_session *);
 void	target_wait_for_sess_cmds(struct se_session *);
+void	target_show_cmd(const char *pfx, struct se_cmd *cmd);
 
 int	core_alua_check_nonop_delay(struct se_cmd *);
 
-- 
GitLab


From f33f79f3d0e5caf04dd889cd7cf636261970f009 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 7 Jul 2017 12:08:29 +0800
Subject: [PATCH 0821/1958] ALSA: hda/realtek - change the location for one of
 two front microphones

On this Lenovo machine, there are two front mics, and both of them are
assigned the same name "Mic", but pulseaudio can't support two mics
with the same name, as a workaround, we change the location for one of
them, then the driver will assign "Front Mic" and "Mic" for them.

Cc: stable@vger.kernel.org
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index cd6987b5c6d9d..e2116d2ea529c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5179,6 +5179,7 @@ enum {
 	ALC233_FIXUP_ASUS_MIC_NO_PRESENCE,
 	ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE,
 	ALC233_FIXUP_LENOVO_MULTI_CODECS,
+	ALC294_FIXUP_LENOVO_MIC_LOCATION,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5962,6 +5963,18 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc233_alc662_fixup_lenovo_dual_codecs,
 	},
+	[ALC294_FIXUP_LENOVO_MIC_LOCATION] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			/* Change the mic location from front to right, otherwise there are
+			   two front mics with the same name, pulseaudio can't handle them.
+			   This is just a temporary workaround, after applying this fixup,
+			   there will be one "Front Mic" and one "Mic" in this machine.
+			 */
+			{ 0x1a, 0x04a19040 },
+			{ }
+		},
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6143,6 +6156,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+	SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
-- 
GitLab


From 4d3895d5ea43cf40fd707692263c6f0988fe8d70 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:39 -0700
Subject: [PATCH 0822/1958] target/tcm_loop: Merge struct tcm_loop_cmd and
 struct tcm_loop_tmr

This patch simplifies the tcm_loop implementation but does not
change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/loopback/tcm_loop.c | 22 ++++++++--------------
 drivers/target/loopback/tcm_loop.h |  7 ++-----
 2 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 5091b31b3e56b..79776b447b15c 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -221,7 +221,6 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 	struct se_portal_group *se_tpg;
 	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_cmd *tl_cmd = NULL;
-	struct tcm_loop_tmr *tl_tmr = NULL;
 	int ret = TMR_FUNCTION_FAILED, rc;
 
 	/*
@@ -240,12 +239,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 		return ret;
 	}
 
-	tl_tmr = kzalloc(sizeof(struct tcm_loop_tmr), GFP_KERNEL);
-	if (!tl_tmr) {
-		pr_err("Unable to allocate memory for tl_tmr\n");
-		goto release;
-	}
-	init_waitqueue_head(&tl_tmr->tl_tmr_wait);
+	init_waitqueue_head(&tl_cmd->tl_tmr_wait);
 
 	se_cmd = &tl_cmd->tl_se_cmd;
 	se_tpg = &tl_tpg->tl_se_tpg;
@@ -257,7 +251,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 				DMA_NONE, TCM_SIMPLE_TAG,
 				&tl_cmd->tl_sense_buf[0]);
 
-	rc = core_tmr_alloc_req(se_cmd, tl_tmr, tmr, GFP_KERNEL);
+	rc = core_tmr_alloc_req(se_cmd, NULL, tmr, GFP_KERNEL);
 	if (rc < 0)
 		goto release;
 
@@ -276,7 +270,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 	 * tcm_loop_queue_tm_rsp() to wake us up.
 	 */
 	transport_generic_handle_tmr(se_cmd);
-	wait_event(tl_tmr->tl_tmr_wait, atomic_read(&tl_tmr->tmr_complete));
+	wait_event(tl_cmd->tl_tmr_wait, atomic_read(&tl_cmd->tmr_complete));
 	/*
 	 * The TMR LUN_RESET has completed, check the response status and
 	 * then release allocations.
@@ -287,7 +281,6 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 		transport_generic_free_cmd(se_cmd, 1);
 	else
 		kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
-	kfree(tl_tmr);
 	return ret;
 }
 
@@ -669,14 +662,15 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd)
 
 static void tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd)
 {
-	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
-	struct tcm_loop_tmr *tl_tmr = se_tmr->fabric_tmr_ptr;
+	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
+				struct tcm_loop_cmd, tl_se_cmd);
+
 	/*
 	 * The SCSI EH thread will be sleeping on se_tmr->tl_tmr_wait, go ahead
 	 * and wake up the wait_queue_head_t in tcm_loop_device_reset()
 	 */
-	atomic_set(&tl_tmr->tmr_complete, 1);
-	wake_up(&tl_tmr->tl_tmr_wait);
+	atomic_set(&tl_cmd->tmr_complete, 1);
+	wake_up(&tl_cmd->tl_tmr_wait);
 }
 
 static void tcm_loop_aborted_task(struct se_cmd *se_cmd)
diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
index a8a230b4e6b53..21340781568be 100644
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h
@@ -16,13 +16,10 @@ struct tcm_loop_cmd {
 	/* The TCM I/O descriptor that is accessed via container_of() */
 	struct se_cmd tl_se_cmd;
 	struct work_struct work;
-	/* Sense buffer that will be mapped into outgoing status */
-	unsigned char tl_sense_buf[TRANSPORT_SENSE_BUFFER];
-};
-
-struct tcm_loop_tmr {
 	atomic_t tmr_complete;
 	wait_queue_head_t tl_tmr_wait;
+	/* Sense buffer that will be mapped into outgoing status */
+	unsigned char tl_sense_buf[TRANSPORT_SENSE_BUFFER];
 };
 
 struct tcm_loop_nexus {
-- 
GitLab


From d17203c41185a05ecd4d1fc647f16b17ab1b27ae Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:40 -0700
Subject: [PATCH 0823/1958] target/tcm_loop: Replace a waitqueue and a counter
 by a completion

This patch simplifies the implementation of the tcm_loop driver
but does not change its behavior.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/loopback/tcm_loop.c | 12 ++++--------
 drivers/target/loopback/tcm_loop.h |  3 +--
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 79776b447b15c..27f9127471134 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -239,7 +239,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 		return ret;
 	}
 
-	init_waitqueue_head(&tl_cmd->tl_tmr_wait);
+	init_completion(&tl_cmd->tmr_done);
 
 	se_cmd = &tl_cmd->tl_se_cmd;
 	se_tpg = &tl_tpg->tl_se_tpg;
@@ -270,7 +270,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 	 * tcm_loop_queue_tm_rsp() to wake us up.
 	 */
 	transport_generic_handle_tmr(se_cmd);
-	wait_event(tl_cmd->tl_tmr_wait, atomic_read(&tl_cmd->tmr_complete));
+	wait_for_completion(&tl_cmd->tmr_done);
 	/*
 	 * The TMR LUN_RESET has completed, check the response status and
 	 * then release allocations.
@@ -665,12 +665,8 @@ static void tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd)
 	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
 				struct tcm_loop_cmd, tl_se_cmd);
 
-	/*
-	 * The SCSI EH thread will be sleeping on se_tmr->tl_tmr_wait, go ahead
-	 * and wake up the wait_queue_head_t in tcm_loop_device_reset()
-	 */
-	atomic_set(&tl_cmd->tmr_complete, 1);
-	wake_up(&tl_cmd->tl_tmr_wait);
+	/* Wake up tcm_loop_issue_tmr(). */
+	complete(&tl_cmd->tmr_done);
 }
 
 static void tcm_loop_aborted_task(struct se_cmd *se_cmd)
diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
index 21340781568be..3acc43c05117a 100644
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h
@@ -16,8 +16,7 @@ struct tcm_loop_cmd {
 	/* The TCM I/O descriptor that is accessed via container_of() */
 	struct se_cmd tl_se_cmd;
 	struct work_struct work;
-	atomic_t tmr_complete;
-	wait_queue_head_t tl_tmr_wait;
+	struct completion tmr_done;
 	/* Sense buffer that will be mapped into outgoing status */
 	unsigned char tl_sense_buf[TRANSPORT_SENSE_BUFFER];
 };
-- 
GitLab


From 75f141aaf48e13812b4fee914a66f6fce28b543f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:41 -0700
Subject: [PATCH 0824/1958] target/tcm_loop: Use target_submit_tmr() instead of
 open-coding this function

Use target_submit_tmr() instead of open-coding this function. The
only functional change is that TMFs are now added to sess_cmd_list,
something the current code does not do. This behavior change is a
bug fix because it makes LUN RESETs wait for other TMFs that are in
progress for the same LUN.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/loopback/tcm_loop.c | 32 +++---------------------------
 1 file changed, 3 insertions(+), 29 deletions(-)

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 27f9127471134..8cf556b4c5ca7 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -218,7 +218,6 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 {
 	struct se_cmd *se_cmd = NULL;
 	struct se_session *se_sess;
-	struct se_portal_group *se_tpg;
 	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_cmd *tl_cmd = NULL;
 	int ret = TMR_FUNCTION_FAILED, rc;
@@ -242,40 +241,15 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 	init_completion(&tl_cmd->tmr_done);
 
 	se_cmd = &tl_cmd->tl_se_cmd;
-	se_tpg = &tl_tpg->tl_se_tpg;
 	se_sess = tl_tpg->tl_nexus->se_sess;
-	/*
-	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
-	 */
-	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess, 0,
-				DMA_NONE, TCM_SIMPLE_TAG,
-				&tl_cmd->tl_sense_buf[0]);
 
-	rc = core_tmr_alloc_req(se_cmd, NULL, tmr, GFP_KERNEL);
+	rc = target_submit_tmr(se_cmd, se_sess, tl_cmd->tl_sense_buf, lun,
+			       NULL, tmr, GFP_KERNEL, task, 0 /*flags*/);
 	if (rc < 0)
 		goto release;
-
-	if (tmr == TMR_ABORT_TASK)
-		se_cmd->se_tmr_req->ref_task_tag = task;
-
-	/*
-	 * Locate the underlying TCM struct se_lun
-	 */
-	if (transport_lookup_tmr_lun(se_cmd, lun) < 0) {
-		ret = TMR_LUN_DOES_NOT_EXIST;
-		goto release;
-	}
-	/*
-	 * Queue the TMR to TCM Core and sleep waiting for
-	 * tcm_loop_queue_tm_rsp() to wake us up.
-	 */
-	transport_generic_handle_tmr(se_cmd);
 	wait_for_completion(&tl_cmd->tmr_done);
-	/*
-	 * The TMR LUN_RESET has completed, check the response status and
-	 * then release allocations.
-	 */
 	ret = se_cmd->se_tmr_req->response;
+
 release:
 	if (se_cmd)
 		transport_generic_free_cmd(se_cmd, 1);
-- 
GitLab


From 4c1f0e65397f4e5768b955c32489d5b4b6b92a90 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:42 -0700
Subject: [PATCH 0825/1958] target/tcm_loop: Make TMF processing slightly
 faster

Target drivers must guarantee that struct se_cmd and struct se_tmr_req
exist as long as target_tmr_work() is in progress. This is why the
tcm_loop driver today passes 1 as second argument to
transport_generic_free_cmd() from inside the TMF code. Instead of
making the TMF code wait, make the TMF code obtain two references
(SCF_ACK_KREF) and drop one reference from inside the .check_stop_free()
callback.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/loopback/tcm_loop.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 8cf556b4c5ca7..b6a913e38b301 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -51,19 +51,7 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd);
  */
 static int tcm_loop_check_stop_free(struct se_cmd *se_cmd)
 {
-	/*
-	 * Do not release struct se_cmd's containing a valid TMR
-	 * pointer.  These will be released directly in tcm_loop_device_reset()
-	 * with transport_generic_free_cmd().
-	 */
-	if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
-		return 0;
-	/*
-	 * Release the struct se_cmd, which will make a callback to release
-	 * struct tcm_loop_cmd * in tcm_loop_deallocate_core_cmd()
-	 */
-	transport_generic_free_cmd(se_cmd, 0);
-	return 1;
+	return transport_generic_free_cmd(se_cmd, 0);
 }
 
 static void tcm_loop_release_cmd(struct se_cmd *se_cmd)
@@ -244,18 +232,23 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 	se_sess = tl_tpg->tl_nexus->se_sess;
 
 	rc = target_submit_tmr(se_cmd, se_sess, tl_cmd->tl_sense_buf, lun,
-			       NULL, tmr, GFP_KERNEL, task, 0 /*flags*/);
+			       NULL, tmr, GFP_KERNEL, task,
+			       TARGET_SCF_ACK_KREF);
 	if (rc < 0)
 		goto release;
 	wait_for_completion(&tl_cmd->tmr_done);
 	ret = se_cmd->se_tmr_req->response;
+	target_put_sess_cmd(se_cmd);
+
+out:
+	return ret;
 
 release:
 	if (se_cmd)
-		transport_generic_free_cmd(se_cmd, 1);
+		transport_generic_free_cmd(se_cmd, 0);
 	else
 		kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
-	return ret;
+	goto out;
 }
 
 static int tcm_loop_abort_task(struct scsi_cmnd *sc)
-- 
GitLab


From 4412a67131a037fa1d032bcd50270e9d336a775d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:43 -0700
Subject: [PATCH 0826/1958] target/iscsi: Remove second argument of
 __iscsit_free_cmd()

Initialize .data_direction to DMA_NONE in iscsit_allocate_cmd()
such that the second argument of __iscsit_free_cmd() can be left
out. Note: this patch causes the first part of __iscsit_free_cmd()
no longer to be skipped for TMFs. That's fine since no data
segments are associated with TMFs.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c      |  4 +---
 drivers/target/iscsi/iscsi_target_util.c | 29 +++++++++++-------------
 drivers/target/iscsi/iscsi_target_util.h |  2 +-
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 3fdca2cdd8da9..735d4ff2d0cec 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -488,15 +488,13 @@ EXPORT_SYMBOL(iscsit_queue_rsp);
 
 void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
-	bool scsi_cmd = (cmd->iscsi_opcode == ISCSI_OP_SCSI_CMD);
-
 	spin_lock_bh(&conn->cmd_lock);
 	if (!list_empty(&cmd->i_conn_node) &&
 	    !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP))
 		list_del_init(&cmd->i_conn_node);
 	spin_unlock_bh(&conn->cmd_lock);
 
-	__iscsit_free_cmd(cmd, scsi_cmd, true);
+	__iscsit_free_cmd(cmd, true);
 }
 EXPORT_SYMBOL(iscsit_aborted_task);
 
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 7d3e2fcc26a0d..41b9e7cc08b86 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -167,6 +167,7 @@ struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, int state)
 
 	cmd->se_cmd.map_tag = tag;
 	cmd->conn = conn;
+	cmd->data_direction = DMA_NONE;
 	INIT_LIST_HEAD(&cmd->i_conn_node);
 	INIT_LIST_HEAD(&cmd->datain_list);
 	INIT_LIST_HEAD(&cmd->cmd_r2t_list);
@@ -711,19 +712,16 @@ void iscsit_release_cmd(struct iscsi_cmd *cmd)
 }
 EXPORT_SYMBOL(iscsit_release_cmd);
 
-void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd,
-		       bool check_queues)
+void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool check_queues)
 {
 	struct iscsi_conn *conn = cmd->conn;
 
-	if (scsi_cmd) {
-		if (cmd->data_direction == DMA_TO_DEVICE) {
-			iscsit_stop_dataout_timer(cmd);
-			iscsit_free_r2ts_from_list(cmd);
-		}
-		if (cmd->data_direction == DMA_FROM_DEVICE)
-			iscsit_free_all_datain_reqs(cmd);
+	if (cmd->data_direction == DMA_TO_DEVICE) {
+		iscsit_stop_dataout_timer(cmd);
+		iscsit_free_r2ts_from_list(cmd);
 	}
+	if (cmd->data_direction == DMA_FROM_DEVICE)
+		iscsit_free_all_datain_reqs(cmd);
 
 	if (conn && check_queues) {
 		iscsit_remove_cmd_from_immediate_queue(cmd, conn);
@@ -738,23 +736,22 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
 {
 	struct se_cmd *se_cmd = NULL;
 	int rc;
-	bool op_scsi = false;
+
 	/*
 	 * Determine if a struct se_cmd is associated with
 	 * this struct iscsi_cmd.
 	 */
 	switch (cmd->iscsi_opcode) {
 	case ISCSI_OP_SCSI_CMD:
-		op_scsi = true;
 		/*
 		 * Fallthrough
 		 */
 	case ISCSI_OP_SCSI_TMFUNC:
 		se_cmd = &cmd->se_cmd;
-		__iscsit_free_cmd(cmd, op_scsi, shutdown);
+		__iscsit_free_cmd(cmd, shutdown);
 		rc = transport_generic_free_cmd(se_cmd, shutdown);
 		if (!rc && shutdown && se_cmd->se_sess) {
-			__iscsit_free_cmd(cmd, op_scsi, shutdown);
+			__iscsit_free_cmd(cmd, shutdown);
 			target_put_sess_cmd(se_cmd);
 		}
 		break;
@@ -766,18 +763,18 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
 		 */
 		if (cmd->se_cmd.se_tfo != NULL) {
 			se_cmd = &cmd->se_cmd;
-			__iscsit_free_cmd(cmd, true, shutdown);
+			__iscsit_free_cmd(cmd, shutdown);
 
 			rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown);
 			if (!rc && shutdown && se_cmd->se_sess) {
-				__iscsit_free_cmd(cmd, true, shutdown);
+				__iscsit_free_cmd(cmd, shutdown);
 				target_put_sess_cmd(se_cmd);
 			}
 			break;
 		}
 		/* Fall-through */
 	default:
-		__iscsit_free_cmd(cmd, false, shutdown);
+		__iscsit_free_cmd(cmd, shutdown);
 		iscsit_release_cmd(cmd);
 		break;
 	}
diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
index 9e4197af8708e..425160565d0c3 100644
--- a/drivers/target/iscsi/iscsi_target_util.h
+++ b/drivers/target/iscsi/iscsi_target_util.h
@@ -37,7 +37,7 @@ extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_co
 extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *);
 extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *);
 extern void iscsit_release_cmd(struct iscsi_cmd *);
-extern void __iscsit_free_cmd(struct iscsi_cmd *, bool, bool);
+extern void __iscsit_free_cmd(struct iscsi_cmd *, bool);
 extern void iscsit_free_cmd(struct iscsi_cmd *, bool);
 extern int iscsit_check_session_usage_count(struct iscsi_session *);
 extern void iscsit_dec_session_usage_count(struct iscsi_session *);
-- 
GitLab


From d1c26857cdec3e3bdb5cf7179411f6ce8cc0834c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:44 -0700
Subject: [PATCH 0827/1958] target/iscsi: Simplify iscsit_free_cmd()

Since .se_tfo is only set if a command has been submitted to
the LIO core, check .se_tfo instead of .iscsi_opcode. Since
__iscsit_free_cmd() only affects SCSI commands but not TMFs,
calling that function for TMFs does not change behavior. This
patch does not change the behavior of iscsit_free_cmd().

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_util.c | 39 +++---------------------
 1 file changed, 4 insertions(+), 35 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 41b9e7cc08b86..1e36f83b59616 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -734,49 +734,18 @@ void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool check_queues)
 
 void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
 {
-	struct se_cmd *se_cmd = NULL;
+	struct se_cmd *se_cmd = cmd->se_cmd.se_tfo ? &cmd->se_cmd : NULL;
 	int rc;
 
-	/*
-	 * Determine if a struct se_cmd is associated with
-	 * this struct iscsi_cmd.
-	 */
-	switch (cmd->iscsi_opcode) {
-	case ISCSI_OP_SCSI_CMD:
-		/*
-		 * Fallthrough
-		 */
-	case ISCSI_OP_SCSI_TMFUNC:
-		se_cmd = &cmd->se_cmd;
-		__iscsit_free_cmd(cmd, shutdown);
+	__iscsit_free_cmd(cmd, shutdown);
+	if (se_cmd) {
 		rc = transport_generic_free_cmd(se_cmd, shutdown);
 		if (!rc && shutdown && se_cmd->se_sess) {
 			__iscsit_free_cmd(cmd, shutdown);
 			target_put_sess_cmd(se_cmd);
 		}
-		break;
-	case ISCSI_OP_REJECT:
-		/*
-		 * Handle special case for REJECT when iscsi_add_reject*() has
-		 * overwritten the original iscsi_opcode assignment, and the
-		 * associated cmd->se_cmd needs to be released.
-		 */
-		if (cmd->se_cmd.se_tfo != NULL) {
-			se_cmd = &cmd->se_cmd;
-			__iscsit_free_cmd(cmd, shutdown);
-
-			rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown);
-			if (!rc && shutdown && se_cmd->se_sess) {
-				__iscsit_free_cmd(cmd, shutdown);
-				target_put_sess_cmd(se_cmd);
-			}
-			break;
-		}
-		/* Fall-through */
-	default:
-		__iscsit_free_cmd(cmd, shutdown);
+	} else {
 		iscsit_release_cmd(cmd);
-		break;
 	}
 }
 EXPORT_SYMBOL(iscsit_free_cmd);
-- 
GitLab


From 8fa4011e0dd9423f1226f2f9769a1cf25c264468 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 23 May 2017 16:48:45 -0700
Subject: [PATCH 0828/1958] target/iscsi: Remove dead code from
 iscsit_process_scsi_cmd()

If an iSCSI command is rejected before iscsit_process_scsi_cmd()
is called, .reject_reason is set but iscsit_process_scsi_cmd() is
not called. This means that the "if (cmd->reject_reason) ..." code
in this function can be removed without changing the behavior of
this function.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 735d4ff2d0cec..74e4975dd1b1e 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1249,12 +1249,8 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	 * execution.  These exceptions are processed in CmdSN order using
 	 * iscsit_check_received_cmdsn() in iscsit_get_immediate_data() below.
 	 */
-	if (cmd->sense_reason) {
-		if (cmd->reject_reason)
-			return 0;
-
+	if (cmd->sense_reason)
 		return 1;
-	}
 	/*
 	 * Call directly into transport_generic_new_cmd() to perform
 	 * the backend memory allocation.
-- 
GitLab


From e9447a46e9fba006ff9b0f4e40a4e38bf2d788db Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Tue, 6 Jun 2017 15:45:49 -0500
Subject: [PATCH 0829/1958] ibmvscsis: Use tpgt passed in by user

ibmvscsis always returned 0 for the tpg/tag, since it did not
parse the value passed in by the user.

When functions like ALUA members exports the value, it will
be incorrect because targetcli/rtslib starts the tpg numbering
at 1.

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 35710524d059d..522d547d9fea1 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3914,8 +3914,16 @@ static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
 {
 	struct ibmvscsis_tport *tport =
 		container_of(wwn, struct ibmvscsis_tport, tport_wwn);
+	u16 tpgt;
 	int rc;
 
+	if (strstr(name, "tpgt_") != name)
+		return ERR_PTR(-EINVAL);
+	rc = kstrtou16(name + 5, 0, &tpgt);
+	if (rc)
+		return ERR_PTR(rc);
+	tport->tport_tpgt = tpgt;
+
 	tport->releasing = false;
 
 	rc = core_tpg_register(&tport->tport_wwn, &tport->se_tpg,
-- 
GitLab


From 9a8bb60650b3d6994bd19a3200941f029c95a7a0 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Tue, 6 Jun 2017 09:28:48 -0500
Subject: [PATCH 0830/1958] tcmu: Support emulate_write_cache

This will enable the toggling of write_cache in tcmu through targetcli-fb

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Reviewed-By: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index beb5f098f32d6..0c797cc69d9eb 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1290,6 +1290,8 @@ static int tcmu_configure_device(struct se_device *dev)
 	/* Other attributes can be configured in userspace */
 	if (!dev->dev_attrib.hw_max_sectors)
 		dev->dev_attrib.hw_max_sectors = 128;
+	if (!dev->dev_attrib.emulate_write_cache)
+		dev->dev_attrib.emulate_write_cache = 0;
 	dev->dev_attrib.hw_queue_depth = 128;
 
 	/*
@@ -1546,6 +1548,32 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag
 }
 CONFIGFS_ATTR(tcmu_, cmd_time_out);
 
+static ssize_t tcmu_emulate_write_cache_show(struct config_item *item,
+					     char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+
+	return snprintf(page, PAGE_SIZE, "%i\n", da->emulate_write_cache);
+}
+
+static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
+					      const char *page, size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	int val;
+	int ret;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	da->emulate_write_cache = val;
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, emulate_write_cache);
+
 static struct configfs_attribute **tcmu_attrs;
 
 static struct target_backend_ops tcmu_ops = {
@@ -1682,6 +1710,8 @@ static int __init tcmu_module_init(void)
 		tcmu_attrs[i] = passthrough_attrib_attrs[i];
 	}
 	tcmu_attrs[i] = &tcmu_attr_cmd_time_out;
+	i++;
+	tcmu_attrs[i] = &tcmu_attr_emulate_write_cache;
 	tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
 
 	ret = transport_backend_register(&tcmu_ops);
-- 
GitLab


From 1068be7bd4b05ca41a6a8de724f52a9c87861412 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Tue, 6 Jun 2017 09:28:49 -0500
Subject: [PATCH 0831/1958] tcmu: Add netlink for device reconfiguration

This gives tcmu the ability to handle events that can cause
reconfiguration, such as resize, path changes, write_cache, etc...

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Reviewed-By: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c     | 12 ++++++++++++
 include/uapi/linux/target_core_user.h |  1 +
 2 files changed, 13 insertions(+)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 0c797cc69d9eb..ae918222284b7 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1562,6 +1562,7 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 {
 	struct se_dev_attrib *da = container_of(to_config_group(item),
 					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
 	int val;
 	int ret;
 
@@ -1570,6 +1571,17 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 		return ret;
 
 	da->emulate_write_cache = val;
+
+	/* Check if device has been configured before */
+	if (tcmu_dev_configured(udev)) {
+		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
+					 udev->uio_info.name,
+					 udev->uio_info.uio_dev->minor);
+		if (ret) {
+			pr_err("Unable to reconfigure device\n");
+			return ret;
+		}
+	}
 	return count;
 }
 CONFIGFS_ATTR(tcmu_, emulate_write_cache);
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index af17b4154ef60..403a61faada0e 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -130,6 +130,7 @@ enum tcmu_genl_cmd {
 	TCMU_CMD_UNSPEC,
 	TCMU_CMD_ADDED_DEVICE,
 	TCMU_CMD_REMOVED_DEVICE,
+	TCMU_CMD_RECONFIG_DEVICE,
 	__TCMU_CMD_MAX,
 };
 #define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
-- 
GitLab


From 801fc54d5d943e8a6a6bc26bc94fb9b90938ff68 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Tue, 6 Jun 2017 09:28:50 -0500
Subject: [PATCH 0832/1958] tcmu: Make dev_size configurable via userspace

Allow tcmu backstores to be able to set the device size
after it has been configured via set attribute.

Part of support in userspace to support certain backstores
changing device size.

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Reviewed-By: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 59 ++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index ae918222284b7..c8c84b71dc91d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1548,6 +1548,44 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag
 }
 CONFIGFS_ATTR(tcmu_, cmd_time_out);
 
+static ssize_t tcmu_dev_size_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+
+	return snprintf(page, PAGE_SIZE, "%zu\n", udev->dev_size);
+}
+
+static ssize_t tcmu_dev_size_store(struct config_item *item, const char *page,
+				   size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(page, 0, &val);
+	if (ret < 0)
+		return ret;
+	udev->dev_size = val;
+
+	/* Check if device has been configured before */
+	if (tcmu_dev_configured(udev)) {
+		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
+					 udev->uio_info.name,
+					 udev->uio_info.uio_dev->minor);
+		if (ret) {
+			pr_err("Unable to reconfigure device\n");
+			return ret;
+		}
+	}
+
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, dev_size);
+
 static ssize_t tcmu_emulate_write_cache_show(struct config_item *item,
 					     char *page)
 {
@@ -1586,6 +1624,13 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 }
 CONFIGFS_ATTR(tcmu_, emulate_write_cache);
 
+struct configfs_attribute *tcmu_attrib_attrs[] = {
+	&tcmu_attr_cmd_time_out,
+	&tcmu_attr_dev_size,
+	&tcmu_attr_emulate_write_cache,
+	NULL,
+};
+
 static struct configfs_attribute **tcmu_attrs;
 
 static struct target_backend_ops tcmu_ops = {
@@ -1685,7 +1730,7 @@ static int unmap_thread_fn(void *data)
 
 static int __init tcmu_module_init(void)
 {
-	int ret, i, len = 0;
+	int ret, i, k, len = 0;
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
 
@@ -1710,7 +1755,10 @@ static int __init tcmu_module_init(void)
 	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
 		len += sizeof(struct configfs_attribute *);
 	}
-	len += sizeof(struct configfs_attribute *) * 2;
+	for (i = 0; tcmu_attrib_attrs[i] != NULL; i++) {
+		len += sizeof(struct configfs_attribute *);
+	}
+	len += sizeof(struct configfs_attribute *);
 
 	tcmu_attrs = kzalloc(len, GFP_KERNEL);
 	if (!tcmu_attrs) {
@@ -1721,9 +1769,10 @@ static int __init tcmu_module_init(void)
 	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
 		tcmu_attrs[i] = passthrough_attrib_attrs[i];
 	}
-	tcmu_attrs[i] = &tcmu_attr_cmd_time_out;
-	i++;
-	tcmu_attrs[i] = &tcmu_attr_emulate_write_cache;
+	for (k = 0; tcmu_attrib_attrs[k] != NULL; k++) {
+		tcmu_attrs[i] = tcmu_attrib_attrs[k];
+		i++;
+	}
 	tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
 
 	ret = transport_backend_register(&tcmu_ops);
-- 
GitLab


From ee01825220f01c0befea25f08325962fa9374ee2 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Tue, 6 Jun 2017 09:28:51 -0500
Subject: [PATCH 0833/1958] tcmu: Make dev_config configurable

This allows for userspace to change the device path after
it has been created. Thus giving the user the ability to change
the path. The use case for this is to allow for virtual optical
to have media change.

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Reviewed-By: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 41 +++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c8c84b71dc91d..7c64757318952 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1548,6 +1548,46 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag
 }
 CONFIGFS_ATTR(tcmu_, cmd_time_out);
 
+static ssize_t tcmu_dev_path_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+
+	return snprintf(page, PAGE_SIZE, "%s\n", udev->dev_config);
+}
+
+static ssize_t tcmu_dev_path_store(struct config_item *item, const char *page,
+				   size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+	char *copy = NULL;
+	int ret;
+
+	copy = kstrdup(page, GFP_KERNEL);
+	if (!copy) {
+		kfree(copy);
+		return -EINVAL;
+	}
+	strlcpy(udev->dev_config, copy, TCMU_CONFIG_LEN);
+
+	/* Check if device has been configured before */
+	if (tcmu_dev_configured(udev)) {
+		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
+					 udev->uio_info.name,
+					 udev->uio_info.uio_dev->minor);
+		if (ret) {
+			pr_err("Unable to reconfigure device\n");
+			return ret;
+		}
+	}
+
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, dev_path);
+
 static ssize_t tcmu_dev_size_show(struct config_item *item, char *page)
 {
 	struct se_dev_attrib *da = container_of(to_config_group(item),
@@ -1626,6 +1666,7 @@ CONFIGFS_ATTR(tcmu_, emulate_write_cache);
 
 struct configfs_attribute *tcmu_attrib_attrs[] = {
 	&tcmu_attr_cmd_time_out,
+	&tcmu_attr_dev_path,
 	&tcmu_attr_dev_size,
 	&tcmu_attr_emulate_write_cache,
 	NULL,
-- 
GitLab


From 8a45885c1514cdae2ee64b5ac03ffc00a1a8a9d7 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Tue, 6 Jun 2017 09:28:52 -0500
Subject: [PATCH 0834/1958] tcmu: Add Type of reconfig into netlink

This patch adds more info about the attribute being changed,
so that usersapce can easily figure out what is happening.

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Reviewed-By: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c     | 20 ++++++++++++++------
 include/uapi/linux/target_core_user.h |  8 ++++++++
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 7c64757318952..afc1fd6bacaf1 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1176,7 +1176,8 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	return 0;
 }
 
-static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int minor)
+static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name,
+			      int minor, int type)
 {
 	struct sk_buff *skb;
 	void *msg_header;
@@ -1198,6 +1199,10 @@ static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int mino
 	if (ret < 0)
 		goto free_skb;
 
+	ret = nla_put_u32(skb, TCMU_ATTR_TYPE, type);
+	if (ret < 0)
+		goto free_skb;
+
 	genlmsg_end(skb, msg_header);
 
 	ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0,
@@ -1301,7 +1306,7 @@ static int tcmu_configure_device(struct se_device *dev)
 	kref_get(&udev->kref);
 
 	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
-				 udev->uio_info.uio_dev->minor);
+				 udev->uio_info.uio_dev->minor, NO_RECONFIG);
 	if (ret)
 		goto err_netlink;
 
@@ -1383,7 +1388,7 @@ static void tcmu_free_device(struct se_device *dev)
 
 	if (tcmu_dev_configured(udev)) {
 		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
-				   udev->uio_info.uio_dev->minor);
+				   udev->uio_info.uio_dev->minor, NO_RECONFIG);
 
 		uio_unregister_device(&udev->uio_info);
 	}
@@ -1577,7 +1582,8 @@ static ssize_t tcmu_dev_path_store(struct config_item *item, const char *page,
 	if (tcmu_dev_configured(udev)) {
 		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
 					 udev->uio_info.name,
-					 udev->uio_info.uio_dev->minor);
+					 udev->uio_info.uio_dev->minor,
+					 CONFIG_PATH);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
 			return ret;
@@ -1615,7 +1621,8 @@ static ssize_t tcmu_dev_size_store(struct config_item *item, const char *page,
 	if (tcmu_dev_configured(udev)) {
 		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
 					 udev->uio_info.name,
-					 udev->uio_info.uio_dev->minor);
+					 udev->uio_info.uio_dev->minor,
+					 CONFIG_SIZE);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
 			return ret;
@@ -1654,7 +1661,8 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 	if (tcmu_dev_configured(udev)) {
 		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
 					 udev->uio_info.name,
-					 udev->uio_info.uio_dev->minor);
+					 udev->uio_info.uio_dev->minor,
+					 CONFIG_WRITECACHE);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
 			return ret;
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index 403a61faada0e..5b00e3500005b 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -139,8 +139,16 @@ enum tcmu_genl_attr {
 	TCMU_ATTR_UNSPEC,
 	TCMU_ATTR_DEVICE,
 	TCMU_ATTR_MINOR,
+	TCMU_ATTR_TYPE,
 	__TCMU_ATTR_MAX,
 };
 #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
 
+enum tcmu_reconfig_types {
+	NO_RECONFIG,
+	CONFIG_PATH,
+	CONFIG_SIZE,
+	CONFIG_WRITECACHE,
+};
+
 #endif
-- 
GitLab


From 1d6ef276594a781686058802996e09c8550fd767 Mon Sep 17 00:00:00 2001
From: Jiang Yi <jiangyilism@gmail.com>
Date: Sun, 25 Jun 2017 12:28:50 -0700
Subject: [PATCH 0835/1958] target: Fix COMPARE_AND_WRITE caw_sem leak during
 se_cmd quiesce

This patch addresses a COMPARE_AND_WRITE se_device->caw_sem leak,
that would be triggered during normal se_cmd shutdown or abort
via __transport_wait_for_tasks().

This would occur because target_complete_cmd() would catch this
early and do complete_all(&cmd->t_transport_stop_comp), but since
target_complete_ok_work() or target_complete_failure_work() are
never called to invoke se_cmd->transport_complete_callback(),
the COMPARE_AND_WRITE specific callbacks never release caw_sem.

To address this special case, go ahead and release caw_sem
directly from target_complete_cmd().

(Remove '&& success' from check, to release caw_sem regardless
 of scsi_status - nab)

Signed-off-by: Jiang Yi <jiangyilism@gmail.com>
Cc: <stable@vger.kernel.org> # 3.14+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index a5ecec8f39969..28de421e3220c 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -730,6 +730,15 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 	if (cmd->transport_state & CMD_T_ABORTED ||
 	    cmd->transport_state & CMD_T_STOP) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+		/*
+		 * If COMPARE_AND_WRITE was stopped by __transport_wait_for_tasks(),
+		 * release se_device->caw_sem obtained by sbc_compare_and_write()
+		 * since target_complete_ok_work() or target_complete_failure_work()
+		 * won't be called to invoke the normal CAW completion callbacks.
+		 */
+		if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) {
+			up(&dev->caw_sem);
+		}
 		complete_all(&cmd->t_transport_stop_comp);
 		return;
 	} else if (!success) {
-- 
GitLab


From 07932a023af3cd728390ffdaeffb78e357123181 Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Thu, 15 Jun 2017 15:05:31 +0800
Subject: [PATCH 0836/1958] tcmu: Fix module removal due to stuck unmap_thread
 thread again

Because the unmap code just after the schdule() returned may take
a long time and if the kthread_stop() is fired just when in this
routine, the module removal maybe stuck too.

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index afc1fd6bacaf1..a60a66d61146d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1707,7 +1707,7 @@ static int unmap_thread_fn(void *data)
 	struct page *page;
 	int i;
 
-	while (1) {
+	while (!kthread_should_stop()) {
 		DEFINE_WAIT(__wait);
 
 		prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE);
-- 
GitLab


From 5821783bcaa94f4a00d65effb3fb8937b08cb2ae Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 13 Jun 2017 14:29:09 +0100
Subject: [PATCH 0837/1958] tcmu: make array tcmu_attrib_attrs static const

The array tcmu_attrib_attrs does not need to be in global scope, so make
it static.

Cleans up sparse warning:
"symbol 'tcmu_attrib_attrs' was not declared. Should it be static?"

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index a60a66d61146d..6322269d9e85e 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1672,7 +1672,7 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 }
 CONFIGFS_ATTR(tcmu_, emulate_write_cache);
 
-struct configfs_attribute *tcmu_attrib_attrs[] = {
+static struct configfs_attribute *tcmu_attrib_attrs[] = {
 	&tcmu_attr_cmd_time_out,
 	&tcmu_attr_dev_path,
 	&tcmu_attr_dev_size,
-- 
GitLab


From fce50a2fa4e9c6e103915c351b6d4a98661341d6 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 29 Jun 2017 22:21:31 -0700
Subject: [PATCH 0838/1958] iser-target: Avoid isert_conn->cm_id dereference in
 isert_login_recv_done

This patch fixes a NULL pointer dereference in isert_login_recv_done()
of isert_conn->cm_id due to isert_cma_handler() -> isert_connect_error()
resetting isert_conn->cm_id = NULL during a failed login attempt.

As per Sagi, we will always see the completion of all recv wrs posted
on the qp (given that we assigned a ->done handler), this is a FLUSH
error completion, we just don't get to verify that because we deref
NULL before.

The issue here, was the assumption that dereferencing the connection
cm_id is always safe, which is not true since:

    commit 4a579da2586bd3b79b025947ea24ede2bbfede62
    Author: Sagi Grimberg <sagig@mellanox.com>
    Date:   Sun Mar 29 15:52:04 2015 +0300

         iser-target: Fix possible deadlock in RDMA_CM connection error

As I see it, we have a direct reference to the isert_device from
isert_conn which is the one-liner fix that we actually need like
we do in isert_rdma_read_done() and isert_rdma_write_done().

Reported-by: Andrea Righi <righi.andrea@gmail.com>
Tested-by: Andrea Righi <righi.andrea@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index fcbed35e95a82..0e662656ef428 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -1452,7 +1452,7 @@ static void
 isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct isert_conn *isert_conn = wc->qp->qp_context;
-	struct ib_device *ib_dev = isert_conn->cm_id->device;
+	struct ib_device *ib_dev = isert_conn->device->ib_device;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		isert_print_wc(wc, "login recv");
-- 
GitLab


From 5f572526a18418258bfa137e3353656c25439500 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 30 Jun 2017 00:08:13 -0700
Subject: [PATCH 0839/1958] qla2xxx: Fix incorrect tcm_qla2xxx_free_cmd use
 during TMR ABORT

This patch drops two incorrect usages of tcm_qla2xxx_free_cmd()
during TMR ABORT within tcm_qla2xxx_handle_data_work() and
tcm_qla2xxx_aborted_task(), which where attempting to dispatch
into workqueue context to do tcm_qla2xxx_complete_free() and
subsequently invoke transport_generic_free_cmd().

This is incorrect because during TMR ABORT target-core will
drop the outstanding se_cmd->cmd_kref references once it has
quiesced the se_cmd via transport_wait_for_tasks(), and in
the case of qla2xxx it should not attempt to do it's own
transport_generic_free_cmd() once the abort has occured.

As reported by Pascal, this was originally manifesting as a
BUG_ON(cmd->cmd_in_wq) in qlt_free_cmd() during TMR ABORT,
with a LIO backend that had sufficently high enough WRITE
latency to trigger a host side TMR ABORT_TASK.

In addition, for the case in tcm_qla2xxx_write_pending_status()
and tcm_qla2xxx_handle_data_work() that waits for outstanding
FCP WRITE data transfer to complete before preceeding with a
TMR ABORT, avoid se_cmd->t_transport_stop_comp that is already
used by transport_wait_for_tasks() and use a qla2xxx internal
struct completion instead.

Reported-by: Pascal de Bruijn <p.debruijn@unilogic.nl>
Tested-by: Pascal de Bruijn <p.debruijn@unilogic.nl>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Quinn Tran <quinn.tran@cavium.com>
Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c  |  4 ++++
 drivers/scsi/qla2xxx/qla_target.h  |  1 +
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 36 +++---------------------------
 3 files changed, 8 insertions(+), 33 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 401e245477d4c..8f8ece9008011 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -4079,6 +4079,8 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
 	cmd = &((struct qla_tgt_cmd *)se_sess->sess_cmd_map)[tag];
 	memset(cmd, 0, sizeof(struct qla_tgt_cmd));
 
+	init_completion(&cmd->write_pending_abort_comp);
+
 	memcpy(&cmd->atio, atio, sizeof(*atio));
 	cmd->state = QLA_TGT_STATE_NEW;
 	cmd->tgt = vha->vha_tgt.qla_tgt;
@@ -5083,6 +5085,8 @@ qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
 
 	qlt_incr_num_pend_cmds(vha);
 	INIT_LIST_HEAD(&cmd->cmd_list);
+	init_completion(&cmd->write_pending_abort_comp);
+
 	memcpy(&cmd->atio, atio, sizeof(*atio));
 
 	cmd->tgt = vha->vha_tgt.qla_tgt;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index d64420251194e..939e93c5d3aef 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -868,6 +868,7 @@ struct qla_tgt_cmd {
 	unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER];
 
 	spinlock_t cmd_lock;
+	struct completion write_pending_abort_comp;
 	/* to save extra sess dereferences */
 	unsigned int conf_compl_supported:1;
 	unsigned int sg_mapped:1;
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 75aeb9fdfd06e..8c1bf9b14bb2f 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -415,6 +415,7 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd)
 
 static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd)
 {
+	struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd);
 	unsigned long flags;
 	/*
 	 * Check for WRITE_PENDING status to determine if we need to wait for
@@ -424,8 +425,7 @@ static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd)
 	if (se_cmd->t_state == TRANSPORT_WRITE_PENDING ||
 	    se_cmd->t_state == TRANSPORT_COMPLETE_QF_WP) {
 		spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
-		wait_for_completion_timeout(&se_cmd->t_transport_stop_comp,
-						50);
+		wait_for_completion(&cmd->write_pending_abort_comp);
 		return 0;
 	}
 	spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
@@ -501,7 +501,6 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
 static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 {
 	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
-	unsigned long flags;
 
 	/*
 	 * Ensure that the complete FCP WRITE payload has been received.
@@ -509,17 +508,6 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 	 */
 	cmd->cmd_in_wq = 0;
 
-	spin_lock_irqsave(&cmd->cmd_lock, flags);
-	cmd->data_work = 1;
-	if (cmd->aborted) {
-		cmd->data_work_free = 1;
-		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
-		tcm_qla2xxx_free_cmd(cmd);
-		return;
-	}
-	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
 	cmd->vha->tgt_counters.qla_core_ret_ctio++;
 	if (!cmd->write_data_transferred) {
 		/*
@@ -527,7 +515,7 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 		 * waiting upon completion in tcm_qla2xxx_write_pending_status()
 		 */
 		if (cmd->se_cmd.transport_state & CMD_T_ABORTED) {
-			complete(&cmd->se_cmd.t_transport_stop_comp);
+			complete(&cmd->write_pending_abort_comp);
 			return;
 		}
 
@@ -753,31 +741,13 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd)
 	qlt_xmit_tm_rsp(mcmd);
 }
 
-#define DATA_WORK_NOT_FREE(_cmd) (_cmd->data_work && !_cmd->data_work_free)
 static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 {
 	struct qla_tgt_cmd *cmd = container_of(se_cmd,
 				struct qla_tgt_cmd, se_cmd);
-	unsigned long flags;
 
 	if (qlt_abort_cmd(cmd))
 		return;
-
-	spin_lock_irqsave(&cmd->cmd_lock, flags);
-	if ((cmd->state == QLA_TGT_STATE_NEW)||
-	    ((cmd->state == QLA_TGT_STATE_DATA_IN) &&
-		DATA_WORK_NOT_FREE(cmd))) {
-		cmd->data_work_free = 1;
-		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-		/*
-		 * cmd has not reached fw, Use this trigger to free it.
-		 */
-		tcm_qla2xxx_free_cmd(cmd);
-		return;
-	}
-	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-	return;
-
 }
 
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,
-- 
GitLab


From 2d76443e02f260d7a5bd0ede1851ae5534f0c68d Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Mon, 12 Jun 2017 01:34:28 -0500
Subject: [PATCH 0840/1958] tcmu: reconfigure netlink attr changes

1. TCMU_ATTR_TYPE is too generic when it describes only the
reconfiguration type, so rename to TCMU_ATTR_RECONFIG_TYPE.

2. Only return the reconfig type when it is a
TCMU_CMD_RECONFIG_DEVICE command.

3. CONFIG_* type is not needed. We can pass the value along with an
ATTR to userspace, so it does not need to read sysfs/configfs.

4. Fix leak in tcmu_dev_path_store and rename to dev_config to
reflect it is more than just a path that can be changed.

6. Don't update kernel struct value if netlink sending fails.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c     | 73 ++++++++++++++++-----------
 include/uapi/linux/target_core_user.h | 12 ++---
 2 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 6322269d9e85e..ca5b081295db8 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1177,7 +1177,8 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 }
 
 static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name,
-			      int minor, int type)
+			      int minor, int reconfig_attr,
+			      const void *reconfig_data)
 {
 	struct sk_buff *skb;
 	void *msg_header;
@@ -1199,9 +1200,27 @@ static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name,
 	if (ret < 0)
 		goto free_skb;
 
-	ret = nla_put_u32(skb, TCMU_ATTR_TYPE, type);
-	if (ret < 0)
-		goto free_skb;
+	if (cmd == TCMU_CMD_RECONFIG_DEVICE) {
+		switch (reconfig_attr) {
+		case TCMU_ATTR_DEV_CFG:
+			ret = nla_put_string(skb, reconfig_attr, reconfig_data);
+			break;
+		case TCMU_ATTR_DEV_SIZE:
+			ret = nla_put_u64_64bit(skb, reconfig_attr,
+						*((u64 *)reconfig_data),
+						TCMU_ATTR_PAD);
+			break;
+		case TCMU_ATTR_WRITECACHE:
+			ret = nla_put_u8(skb, reconfig_attr,
+					  *((u8 *)reconfig_data));
+			break;
+		default:
+			BUG();
+		}
+
+		if (ret < 0)
+			goto free_skb;
+	}
 
 	genlmsg_end(skb, msg_header);
 
@@ -1306,7 +1325,7 @@ static int tcmu_configure_device(struct se_device *dev)
 	kref_get(&udev->kref);
 
 	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
-				 udev->uio_info.uio_dev->minor, NO_RECONFIG);
+				 udev->uio_info.uio_dev->minor, 0, NULL);
 	if (ret)
 		goto err_netlink;
 
@@ -1388,7 +1407,7 @@ static void tcmu_free_device(struct se_device *dev)
 
 	if (tcmu_dev_configured(udev)) {
 		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
-				   udev->uio_info.uio_dev->minor, NO_RECONFIG);
+				   udev->uio_info.uio_dev->minor, 0, NULL);
 
 		uio_unregister_device(&udev->uio_info);
 	}
@@ -1553,7 +1572,7 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag
 }
 CONFIGFS_ATTR(tcmu_, cmd_time_out);
 
-static ssize_t tcmu_dev_path_show(struct config_item *item, char *page)
+static ssize_t tcmu_dev_config_show(struct config_item *item, char *page)
 {
 	struct se_dev_attrib *da = container_of(to_config_group(item),
 						struct se_dev_attrib, da_group);
@@ -1562,37 +1581,34 @@ static ssize_t tcmu_dev_path_show(struct config_item *item, char *page)
 	return snprintf(page, PAGE_SIZE, "%s\n", udev->dev_config);
 }
 
-static ssize_t tcmu_dev_path_store(struct config_item *item, const char *page,
-				   size_t count)
+static ssize_t tcmu_dev_config_store(struct config_item *item, const char *page,
+				     size_t count)
 {
 	struct se_dev_attrib *da = container_of(to_config_group(item),
 						struct se_dev_attrib, da_group);
 	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
-	char *copy = NULL;
-	int ret;
+	int ret, len;
 
-	copy = kstrdup(page, GFP_KERNEL);
-	if (!copy) {
-		kfree(copy);
+	len = strlen(page);
+	if (!len || len > TCMU_CONFIG_LEN - 1)
 		return -EINVAL;
-	}
-	strlcpy(udev->dev_config, copy, TCMU_CONFIG_LEN);
 
 	/* Check if device has been configured before */
 	if (tcmu_dev_configured(udev)) {
 		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
 					 udev->uio_info.name,
 					 udev->uio_info.uio_dev->minor,
-					 CONFIG_PATH);
+					 TCMU_ATTR_DEV_CFG, page);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
 			return ret;
 		}
 	}
+	strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN);
 
 	return count;
 }
-CONFIGFS_ATTR(tcmu_, dev_path);
+CONFIGFS_ATTR(tcmu_, dev_config);
 
 static ssize_t tcmu_dev_size_show(struct config_item *item, char *page)
 {
@@ -1609,26 +1625,25 @@ static ssize_t tcmu_dev_size_store(struct config_item *item, const char *page,
 	struct se_dev_attrib *da = container_of(to_config_group(item),
 						struct se_dev_attrib, da_group);
 	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
-	unsigned long val;
+	u64 val;
 	int ret;
 
-	ret = kstrtoul(page, 0, &val);
+	ret = kstrtou64(page, 0, &val);
 	if (ret < 0)
 		return ret;
-	udev->dev_size = val;
 
 	/* Check if device has been configured before */
 	if (tcmu_dev_configured(udev)) {
 		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
 					 udev->uio_info.name,
 					 udev->uio_info.uio_dev->minor,
-					 CONFIG_SIZE);
+					 TCMU_ATTR_DEV_SIZE, &val);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
 			return ret;
 		}
 	}
-
+	udev->dev_size = val;
 	return count;
 }
 CONFIGFS_ATTR(tcmu_, dev_size);
@@ -1648,33 +1663,33 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 	struct se_dev_attrib *da = container_of(to_config_group(item),
 					struct se_dev_attrib, da_group);
 	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
-	int val;
+	u8 val;
 	int ret;
 
-	ret = kstrtouint(page, 0, &val);
+	ret = kstrtou8(page, 0, &val);
 	if (ret < 0)
 		return ret;
 
-	da->emulate_write_cache = val;
-
 	/* Check if device has been configured before */
 	if (tcmu_dev_configured(udev)) {
 		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
 					 udev->uio_info.name,
 					 udev->uio_info.uio_dev->minor,
-					 CONFIG_WRITECACHE);
+					 TCMU_ATTR_WRITECACHE, &val);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
 			return ret;
 		}
 	}
+
+	da->emulate_write_cache = val;
 	return count;
 }
 CONFIGFS_ATTR(tcmu_, emulate_write_cache);
 
 static struct configfs_attribute *tcmu_attrib_attrs[] = {
 	&tcmu_attr_cmd_time_out,
-	&tcmu_attr_dev_path,
+	&tcmu_attr_dev_config,
 	&tcmu_attr_dev_size,
 	&tcmu_attr_emulate_write_cache,
 	NULL,
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index 5b00e3500005b..4bfc9a1b635c6 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -139,16 +139,12 @@ enum tcmu_genl_attr {
 	TCMU_ATTR_UNSPEC,
 	TCMU_ATTR_DEVICE,
 	TCMU_ATTR_MINOR,
-	TCMU_ATTR_TYPE,
+	TCMU_ATTR_PAD,
+	TCMU_ATTR_DEV_CFG,
+	TCMU_ATTR_DEV_SIZE,
+	TCMU_ATTR_WRITECACHE,
 	__TCMU_ATTR_MAX,
 };
 #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
 
-enum tcmu_reconfig_types {
-	NO_RECONFIG,
-	CONFIG_PATH,
-	CONFIG_SIZE,
-	CONFIG_WRITECACHE,
-};
-
 #endif
-- 
GitLab


From 926347061ef1f4d3873829fd1960c6e4b965aa9f Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:12 -0500
Subject: [PATCH 0841/1958] target: break up free_device callback

With this patch free_device is now used to free what is allocated in the
alloc_device callback and destroy_device tears down the resources that are
setup in the configure_device callback.

This patch will be needed in the next patch where tcmu needs
to be able to look up the device in the destroy callback.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c  |  2 ++
 drivers/target/target_core_file.c    |  7 ++++++-
 drivers/target/target_core_iblock.c  |  8 ++++++--
 drivers/target/target_core_pscsi.c   |  7 ++++++-
 drivers/target/target_core_rd.c      |  7 ++++++-
 drivers/target/target_core_user.c    | 12 +++++++++---
 include/target/target_core_backend.h |  1 +
 7 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 11c80c47b9d3e..16a701fed66b2 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -974,6 +974,8 @@ void target_free_device(struct se_device *dev)
 	if (dev->dev_flags & DF_CONFIGURED) {
 		destroy_workqueue(dev->tmr_wq);
 
+		dev->transport->destroy_device(dev);
+
 		mutex_lock(&g_device_mutex);
 		list_del(&dev->g_dev_node);
 		mutex_unlock(&g_device_mutex);
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 73b8f93a5fefb..50f2355805461 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -236,6 +236,11 @@ static void fd_dev_call_rcu(struct rcu_head *p)
 }
 
 static void fd_free_device(struct se_device *dev)
+{
+	call_rcu(&dev->rcu_head, fd_dev_call_rcu);
+}
+
+static void fd_destroy_device(struct se_device *dev)
 {
 	struct fd_dev *fd_dev = FD_DEV(dev);
 
@@ -243,7 +248,6 @@ static void fd_free_device(struct se_device *dev)
 		filp_close(fd_dev->fd_file, NULL);
 		fd_dev->fd_file = NULL;
 	}
-	call_rcu(&dev->rcu_head, fd_dev_call_rcu);
 }
 
 static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
@@ -826,6 +830,7 @@ static const struct target_backend_ops fileio_ops = {
 	.detach_hba		= fd_detach_hba,
 	.alloc_device		= fd_alloc_device,
 	.configure_device	= fd_configure_device,
+	.destroy_device		= fd_destroy_device,
 	.free_device		= fd_free_device,
 	.parse_cdb		= fd_parse_cdb,
 	.set_configfs_dev_params = fd_set_configfs_dev_params,
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index b2044133d747c..a5e16f7153920 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -189,6 +189,11 @@ static void iblock_dev_call_rcu(struct rcu_head *p)
 }
 
 static void iblock_free_device(struct se_device *dev)
+{
+	call_rcu(&dev->rcu_head, iblock_dev_call_rcu);
+}
+
+static void iblock_destroy_device(struct se_device *dev)
 {
 	struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
 
@@ -196,8 +201,6 @@ static void iblock_free_device(struct se_device *dev)
 		blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
 	if (ib_dev->ibd_bio_set != NULL)
 		bioset_free(ib_dev->ibd_bio_set);
-
-	call_rcu(&dev->rcu_head, iblock_dev_call_rcu);
 }
 
 static unsigned long long iblock_emulate_read_cap_with_block_size(
@@ -858,6 +861,7 @@ static const struct target_backend_ops iblock_ops = {
 	.detach_hba		= iblock_detach_hba,
 	.alloc_device		= iblock_alloc_device,
 	.configure_device	= iblock_configure_device,
+	.destroy_device		= iblock_destroy_device,
 	.free_device		= iblock_free_device,
 	.parse_cdb		= iblock_parse_cdb,
 	.set_configfs_dev_params = iblock_set_configfs_dev_params,
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index e0be4aa383283..7d944b23aeeee 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -563,6 +563,11 @@ static void pscsi_dev_call_rcu(struct rcu_head *p)
 }
 
 static void pscsi_free_device(struct se_device *dev)
+{
+	call_rcu(&dev->rcu_head, pscsi_dev_call_rcu);
+}
+
+static void pscsi_destroy_device(struct se_device *dev)
 {
 	struct pscsi_dev_virt *pdv = PSCSI_DEV(dev);
 	struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
@@ -592,7 +597,6 @@ static void pscsi_free_device(struct se_device *dev)
 
 		pdv->pdv_sd = NULL;
 	}
-	call_rcu(&dev->rcu_head, pscsi_dev_call_rcu);
 }
 
 static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
@@ -1084,6 +1088,7 @@ static const struct target_backend_ops pscsi_ops = {
 	.pmode_enable_hba	= pscsi_pmode_enable_hba,
 	.alloc_device		= pscsi_alloc_device,
 	.configure_device	= pscsi_configure_device,
+	.destroy_device		= pscsi_destroy_device,
 	.free_device		= pscsi_free_device,
 	.transport_complete	= pscsi_transport_complete,
 	.parse_cdb		= pscsi_parse_cdb,
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index d12967690054d..a6e8106abd6ff 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -338,11 +338,15 @@ static void rd_dev_call_rcu(struct rcu_head *p)
 }
 
 static void rd_free_device(struct se_device *dev)
+{
+	call_rcu(&dev->rcu_head, rd_dev_call_rcu);
+}
+
+static void rd_destroy_device(struct se_device *dev)
 {
 	struct rd_dev *rd_dev = RD_DEV(dev);
 
 	rd_release_device_space(rd_dev);
-	call_rcu(&dev->rcu_head, rd_dev_call_rcu);
 }
 
 static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page)
@@ -651,6 +655,7 @@ static const struct target_backend_ops rd_mcp_ops = {
 	.detach_hba		= rd_detach_hba,
 	.alloc_device		= rd_alloc_device,
 	.configure_device	= rd_configure_device,
+	.destroy_device		= rd_destroy_device,
 	.free_device		= rd_free_device,
 	.parse_cdb		= rd_parse_cdb,
 	.set_configfs_dev_params = rd_set_configfs_dev_params,
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index ca5b081295db8..e58127b8db8af 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1379,6 +1379,14 @@ static void tcmu_blocks_release(struct tcmu_dev *udev)
 }
 
 static void tcmu_free_device(struct se_device *dev)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+
+	/* release ref from init */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
+}
+
+static void tcmu_destroy_device(struct se_device *dev)
 {
 	struct tcmu_dev *udev = TCMU_DEV(dev);
 	struct tcmu_cmd *cmd;
@@ -1411,9 +1419,6 @@ static void tcmu_free_device(struct se_device *dev)
 
 		uio_unregister_device(&udev->uio_info);
 	}
-
-	/* release ref from init */
-	kref_put(&udev->kref, tcmu_dev_kref_release);
 }
 
 enum {
@@ -1705,6 +1710,7 @@ static struct target_backend_ops tcmu_ops = {
 	.detach_hba		= tcmu_detach_hba,
 	.alloc_device		= tcmu_alloc_device,
 	.configure_device	= tcmu_configure_device,
+	.destroy_device		= tcmu_destroy_device,
 	.free_device		= tcmu_free_device,
 	.parse_cdb		= tcmu_parse_cdb,
 	.set_configfs_dev_params = tcmu_set_configfs_dev_params,
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index b76071161cdc7..3dbcacd7e8d7a 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -30,6 +30,7 @@ struct target_backend_ops {
 
 	struct se_device *(*alloc_device)(struct se_hba *, const char *);
 	int (*configure_device)(struct se_device *);
+	void (*destroy_device)(struct se_device *);
 	void (*free_device)(struct se_device *device);
 
 	ssize_t (*set_configfs_dev_params)(struct se_device *,
-- 
GitLab


From 0a5eee647b78e53da05e081362f42a11b4b674eb Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:13 -0500
Subject: [PATCH 0842/1958] target: use idr for se_device dev index

In the next patches we will add tcmu netlink support that allows
userspace to send commands to target_core_user. To execute operations
on a se_device/tcmu_dev we need to be able to look up a dev by any old
id. This patch replaces the se_device->dev_index with a idr created
id.

The next patches will also remove the g_device_list and replace it with
the idr.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 30 +++++++++++++++++++++++++----
 include/target/target_core_base.h   |  1 -
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 16a701fed66b2..f10dfe7b1d54e 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -51,6 +51,7 @@
 
 DEFINE_MUTEX(g_device_mutex);
 LIST_HEAD(g_device_list);
+static DEFINE_IDR(devices_idr);
 
 static struct se_hba *lun0_hba;
 /* not static, needed by tpg.c */
@@ -882,7 +883,7 @@ EXPORT_SYMBOL(target_to_linux_sector);
 int target_configure_device(struct se_device *dev)
 {
 	struct se_hba *hba = dev->se_hba;
-	int ret;
+	int ret, id;
 
 	if (dev->dev_flags & DF_CONFIGURED) {
 		pr_err("se_dev->se_dev_ptr already set for storage"
@@ -890,9 +891,26 @@ int target_configure_device(struct se_device *dev)
 		return -EEXIST;
 	}
 
+	/*
+	 * Add early so modules like tcmu can use during its
+	 * configuration.
+	 */
+	mutex_lock(&g_device_mutex);
+	/*
+	 * Use cyclic to try and avoid collisions with devices
+	 * that were recently removed.
+	 */
+	id = idr_alloc_cyclic(&devices_idr, dev, 0, INT_MAX, GFP_KERNEL);
+	mutex_unlock(&g_device_mutex);
+	if (id < 0) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	dev->dev_index = id;
+
 	ret = dev->transport->configure_device(dev);
 	if (ret)
-		goto out;
+		goto out_free_index;
 	/*
 	 * XXX: there is not much point to have two different values here..
 	 */
@@ -907,12 +925,11 @@ int target_configure_device(struct se_device *dev)
 					 dev->dev_attrib.hw_block_size);
 	dev->dev_attrib.optimal_sectors = dev->dev_attrib.hw_max_sectors;
 
-	dev->dev_index = scsi_get_new_index(SCSI_DEVICE_INDEX);
 	dev->creation_time = get_jiffies_64();
 
 	ret = core_setup_alua(dev);
 	if (ret)
-		goto out;
+		goto out_free_index;
 
 	/*
 	 * Startup the struct se_device processing thread
@@ -960,6 +977,10 @@ int target_configure_device(struct se_device *dev)
 
 out_free_alua:
 	core_alua_free_lu_gp_mem(dev);
+out_free_index:
+	mutex_lock(&g_device_mutex);
+	idr_remove(&devices_idr, dev->dev_index);
+	mutex_unlock(&g_device_mutex);
 out:
 	se_release_vpd_for_dev(dev);
 	return ret;
@@ -977,6 +998,7 @@ void target_free_device(struct se_device *dev)
 		dev->transport->destroy_device(dev);
 
 		mutex_lock(&g_device_mutex);
+		idr_remove(&devices_idr, dev->dev_index);
 		list_del(&dev->g_dev_node);
 		mutex_unlock(&g_device_mutex);
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index a3af69fcf75e4..51a92f17352c2 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -219,7 +219,6 @@ enum tcm_tmrsp_table {
  */
 typedef enum {
 	SCSI_INST_INDEX,
-	SCSI_DEVICE_INDEX,
 	SCSI_AUTH_INTR_INDEX,
 	SCSI_INDEX_TYPE_MAX
 } scsi_index_t;
-- 
GitLab


From 85441e6b8c97964a6da72135dc21f708adbdc4d8 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:14 -0500
Subject: [PATCH 0843/1958] target: add helper to find se_device by dev_index

This adds a helper to find a se_device by dev_index. It will
be used in the next patches so tcmu's netlink interface can
execute commands on specific devices.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c  | 24 ++++++++++++++++++++++++
 include/target/target_core_backend.h |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index f10dfe7b1d54e..de1131612ddc8 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -880,6 +880,30 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb)
 }
 EXPORT_SYMBOL(target_to_linux_sector);
 
+/**
+ * target_find_device - find a se_device by its dev_index
+ * @id: dev_index
+ * @do_depend: true if caller needs target_depend_item to be done
+ *
+ * If do_depend is true, the caller must do a target_undepend_item
+ * when finished using the device.
+ *
+ * If do_depend is false, the caller must be called in a configfs
+ * callback or during removal.
+ */
+struct se_device *target_find_device(int id, bool do_depend)
+{
+	struct se_device *dev;
+
+	mutex_lock(&g_device_mutex);
+	dev = idr_find(&devices_idr, id);
+	if (dev && do_depend && target_depend_item(&dev->dev_group.cg_item))
+		dev = NULL;
+	mutex_unlock(&g_device_mutex);
+	return dev;
+}
+EXPORT_SYMBOL(target_find_device);
+
 int target_configure_device(struct se_device *dev)
 {
 	struct se_hba *hba = dev->se_hba;
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 3dbcacd7e8d7a..1f2b7007f2df3 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -106,6 +106,8 @@ bool	target_lun_is_rdonly(struct se_cmd *);
 sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
+struct	se_device *target_find_device(int id, bool do_depend);
+
 bool target_sense_desc_format(struct se_device *dev);
 sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
-- 
GitLab


From b3af66e24393f03ef81db17a11387d9e6174bd01 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:15 -0500
Subject: [PATCH 0844/1958] tcmu: perfom device add, del and reconfig
 synchronously

This makes the device add, del reconfig operations sync. It fixes
the issue where for add and reconfig, we do not know if userspace
successfully completely the operation, so we leave invalid kernel
structs or report incorrect status for the config/reconfig operations.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c     | 213 +++++++++++++++++++++++---
 include/uapi/linux/target_core_user.h |   7 +
 2 files changed, 200 insertions(+), 20 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index e58127b8db8af..e080cd1a8fdef 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -87,6 +87,8 @@
 /* Default maximum of the global data blocks(512K * PAGE_SIZE) */
 #define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024)
 
+static u8 tcmu_kern_cmd_reply_supported;
+
 static struct device *tcmu_root_device;
 
 struct tcmu_hba {
@@ -95,6 +97,13 @@ struct tcmu_hba {
 
 #define TCMU_CONFIG_LEN 256
 
+struct tcmu_nl_cmd {
+	/* wake up thread waiting for reply */
+	struct completion complete;
+	int cmd;
+	int status;
+};
+
 struct tcmu_dev {
 	struct list_head node;
 	struct kref kref;
@@ -135,6 +144,11 @@ struct tcmu_dev {
 	struct timer_list timeout;
 	unsigned int cmd_time_out;
 
+	spinlock_t nl_cmd_lock;
+	struct tcmu_nl_cmd curr_nl_cmd;
+	/* wake up threads waiting on curr_nl_cmd */
+	wait_queue_head_t nl_cmd_wq;
+
 	char dev_config[TCMU_CONFIG_LEN];
 };
 
@@ -178,16 +192,128 @@ static const struct genl_multicast_group tcmu_mcgrps[] = {
 	[TCMU_MCGRP_CONFIG] = { .name = "config", },
 };
 
+static struct nla_policy tcmu_attr_policy[TCMU_ATTR_MAX+1] = {
+	[TCMU_ATTR_DEVICE]	= { .type = NLA_STRING },
+	[TCMU_ATTR_MINOR]	= { .type = NLA_U32 },
+	[TCMU_ATTR_CMD_STATUS]	= { .type = NLA_S32 },
+	[TCMU_ATTR_DEVICE_ID]	= { .type = NLA_U32 },
+	[TCMU_ATTR_SUPP_KERN_CMD_REPLY] = { .type = NLA_U8 },
+};
+
+static int tcmu_genl_cmd_done(struct genl_info *info, int completed_cmd)
+{
+	struct se_device *dev;
+	struct tcmu_dev *udev;
+	struct tcmu_nl_cmd *nl_cmd;
+	int dev_id, rc, ret = 0;
+	bool is_removed = (completed_cmd == TCMU_CMD_REMOVED_DEVICE);
+
+	if (!info->attrs[TCMU_ATTR_CMD_STATUS] ||
+	    !info->attrs[TCMU_ATTR_DEVICE_ID]) {
+		printk(KERN_ERR "TCMU_ATTR_CMD_STATUS or TCMU_ATTR_DEVICE_ID not set, doing nothing\n");
+                return -EINVAL;
+        }
+
+	dev_id = nla_get_u32(info->attrs[TCMU_ATTR_DEVICE_ID]);
+	rc = nla_get_s32(info->attrs[TCMU_ATTR_CMD_STATUS]);
+
+	dev = target_find_device(dev_id, !is_removed);
+	if (!dev) {
+		printk(KERN_ERR "tcmu nl cmd %u/%u completion could not find device with dev id %u.\n",
+		       completed_cmd, rc, dev_id);
+		return -ENODEV;
+	}
+	udev = TCMU_DEV(dev);
+
+	spin_lock(&udev->nl_cmd_lock);
+	nl_cmd = &udev->curr_nl_cmd;
+
+	pr_debug("genl cmd done got id %d curr %d done %d rc %d\n", dev_id,
+		 nl_cmd->cmd, completed_cmd, rc);
+
+	if (nl_cmd->cmd != completed_cmd) {
+		printk(KERN_ERR "Mismatched commands (Expecting reply for %d. Current %d).\n",
+		       completed_cmd, nl_cmd->cmd);
+		ret = -EINVAL;
+	} else {
+		nl_cmd->status = rc;
+	}
+
+	spin_unlock(&udev->nl_cmd_lock);
+	if (!is_removed)
+		 target_undepend_item(&dev->dev_group.cg_item);
+	if (!ret)
+		complete(&nl_cmd->complete);
+	return ret;
+}
+
+static int tcmu_genl_rm_dev_done(struct sk_buff *skb, struct genl_info *info)
+{
+	return tcmu_genl_cmd_done(info, TCMU_CMD_REMOVED_DEVICE);
+}
+
+static int tcmu_genl_add_dev_done(struct sk_buff *skb, struct genl_info *info)
+{
+	return tcmu_genl_cmd_done(info, TCMU_CMD_ADDED_DEVICE);
+}
+
+static int tcmu_genl_reconfig_dev_done(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	return tcmu_genl_cmd_done(info, TCMU_CMD_RECONFIG_DEVICE);
+}
+
+static int tcmu_genl_set_features(struct sk_buff *skb, struct genl_info *info)
+{
+	if (info->attrs[TCMU_ATTR_SUPP_KERN_CMD_REPLY]) {
+		tcmu_kern_cmd_reply_supported  =
+			nla_get_u8(info->attrs[TCMU_ATTR_SUPP_KERN_CMD_REPLY]);
+		printk(KERN_INFO "tcmu daemon: command reply support %u.\n",
+		       tcmu_kern_cmd_reply_supported);
+	}
+
+	return 0;
+}
+
+static const struct genl_ops tcmu_genl_ops[] = {
+	{
+		.cmd	= TCMU_CMD_SET_FEATURES,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= tcmu_attr_policy,
+		.doit	= tcmu_genl_set_features,
+	},
+	{
+		.cmd	= TCMU_CMD_ADDED_DEVICE_DONE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= tcmu_attr_policy,
+		.doit	= tcmu_genl_add_dev_done,
+	},
+	{
+		.cmd	= TCMU_CMD_REMOVED_DEVICE_DONE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= tcmu_attr_policy,
+		.doit	= tcmu_genl_rm_dev_done,
+	},
+	{
+		.cmd	= TCMU_CMD_RECONFIG_DEVICE_DONE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= tcmu_attr_policy,
+		.doit	= tcmu_genl_reconfig_dev_done,
+	},
+};
+
 /* Our generic netlink family */
 static struct genl_family tcmu_genl_family __ro_after_init = {
 	.module = THIS_MODULE,
 	.hdrsize = 0,
 	.name = "TCM-USER",
-	.version = 1,
+	.version = 2,
 	.maxattr = TCMU_ATTR_MAX,
 	.mcgrps = tcmu_mcgrps,
 	.n_mcgrps = ARRAY_SIZE(tcmu_mcgrps),
 	.netnsok = true,
+	.ops = tcmu_genl_ops,
+	.n_ops = ARRAY_SIZE(tcmu_genl_ops),
 };
 
 #define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index))
@@ -989,6 +1115,9 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	setup_timer(&udev->timeout, tcmu_device_timedout,
 		(unsigned long)udev);
 
+	init_waitqueue_head(&udev->nl_cmd_wq);
+	spin_lock_init(&udev->nl_cmd_lock);
+
 	return &udev->se_dev;
 }
 
@@ -1176,9 +1305,54 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	return 0;
 }
 
-static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name,
-			      int minor, int reconfig_attr,
-			      const void *reconfig_data)
+static void tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd)
+{
+	struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
+
+	if (!tcmu_kern_cmd_reply_supported)
+		return;
+relock:
+	spin_lock(&udev->nl_cmd_lock);
+
+	if (nl_cmd->cmd != TCMU_CMD_UNSPEC) {
+		spin_unlock(&udev->nl_cmd_lock);
+		pr_debug("sleeping for open nl cmd\n");
+		wait_event(udev->nl_cmd_wq, (nl_cmd->cmd == TCMU_CMD_UNSPEC));
+		goto relock;
+	}
+
+	memset(nl_cmd, 0, sizeof(*nl_cmd));
+	nl_cmd->cmd = cmd;
+	init_completion(&nl_cmd->complete);
+
+	spin_unlock(&udev->nl_cmd_lock);
+}
+
+static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev)
+{
+	struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
+	int ret;
+	DEFINE_WAIT(__wait);
+
+	if (!tcmu_kern_cmd_reply_supported)
+		return 0;
+
+	pr_debug("sleeping for nl reply\n");
+	wait_for_completion(&nl_cmd->complete);
+
+	spin_lock(&udev->nl_cmd_lock);
+	nl_cmd->cmd = TCMU_CMD_UNSPEC;
+	ret = nl_cmd->status;
+	nl_cmd->status = 0;
+	spin_unlock(&udev->nl_cmd_lock);
+
+	wake_up_all(&udev->nl_cmd_wq);
+
+	return ret;;
+}
+
+static int tcmu_netlink_event(struct tcmu_dev *udev, enum tcmu_genl_cmd cmd,
+			      int reconfig_attr, const void *reconfig_data)
 {
 	struct sk_buff *skb;
 	void *msg_header;
@@ -1192,11 +1366,15 @@ static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name,
 	if (!msg_header)
 		goto free_skb;
 
-	ret = nla_put_string(skb, TCMU_ATTR_DEVICE, name);
+	ret = nla_put_string(skb, TCMU_ATTR_DEVICE, udev->uio_info.name);
 	if (ret < 0)
 		goto free_skb;
 
-	ret = nla_put_u32(skb, TCMU_ATTR_MINOR, minor);
+	ret = nla_put_u32(skb, TCMU_ATTR_MINOR, udev->uio_info.uio_dev->minor);
+	if (ret < 0)
+		goto free_skb;
+
+	ret = nla_put_u32(skb, TCMU_ATTR_DEVICE_ID, udev->se_dev.dev_index);
 	if (ret < 0)
 		goto free_skb;
 
@@ -1224,12 +1402,15 @@ static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name,
 
 	genlmsg_end(skb, msg_header);
 
+	tcmu_init_genl_cmd_reply(udev, cmd);
+
 	ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0,
 				TCMU_MCGRP_CONFIG, GFP_KERNEL);
-
 	/* We don't care if no one is listening */
 	if (ret == -ESRCH)
 		ret = 0;
+	if (!ret)
+		ret = tcmu_wait_genl_cmd_reply(udev);
 
 	return ret;
 free_skb:
@@ -1324,8 +1505,7 @@ static int tcmu_configure_device(struct se_device *dev)
 	 */
 	kref_get(&udev->kref);
 
-	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
-				 udev->uio_info.uio_dev->minor, 0, NULL);
+	ret = tcmu_netlink_event(udev, TCMU_CMD_ADDED_DEVICE, 0, NULL);
 	if (ret)
 		goto err_netlink;
 
@@ -1414,8 +1594,7 @@ static void tcmu_destroy_device(struct se_device *dev)
 	tcmu_blocks_release(udev);
 
 	if (tcmu_dev_configured(udev)) {
-		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
-				   udev->uio_info.uio_dev->minor, 0, NULL);
+		tcmu_netlink_event(udev, TCMU_CMD_REMOVED_DEVICE, 0, NULL);
 
 		uio_unregister_device(&udev->uio_info);
 	}
@@ -1600,9 +1779,7 @@ static ssize_t tcmu_dev_config_store(struct config_item *item, const char *page,
 
 	/* Check if device has been configured before */
 	if (tcmu_dev_configured(udev)) {
-		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
-					 udev->uio_info.name,
-					 udev->uio_info.uio_dev->minor,
+		ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE,
 					 TCMU_ATTR_DEV_CFG, page);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
@@ -1639,9 +1816,7 @@ static ssize_t tcmu_dev_size_store(struct config_item *item, const char *page,
 
 	/* Check if device has been configured before */
 	if (tcmu_dev_configured(udev)) {
-		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
-					 udev->uio_info.name,
-					 udev->uio_info.uio_dev->minor,
+		ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE,
 					 TCMU_ATTR_DEV_SIZE, &val);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
@@ -1677,9 +1852,7 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 
 	/* Check if device has been configured before */
 	if (tcmu_dev_configured(udev)) {
-		ret = tcmu_netlink_event(TCMU_CMD_RECONFIG_DEVICE,
-					 udev->uio_info.name,
-					 udev->uio_info.uio_dev->minor,
+		ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE,
 					 TCMU_ATTR_WRITECACHE, &val);
 		if (ret) {
 			pr_err("Unable to reconfigure device\n");
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index 4bfc9a1b635c6..24a1c4ec2248c 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -131,6 +131,10 @@ enum tcmu_genl_cmd {
 	TCMU_CMD_ADDED_DEVICE,
 	TCMU_CMD_REMOVED_DEVICE,
 	TCMU_CMD_RECONFIG_DEVICE,
+	TCMU_CMD_ADDED_DEVICE_DONE,
+	TCMU_CMD_REMOVED_DEVICE_DONE,
+	TCMU_CMD_RECONFIG_DEVICE_DONE,
+	TCMU_CMD_SET_FEATURES,
 	__TCMU_CMD_MAX,
 };
 #define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
@@ -143,6 +147,9 @@ enum tcmu_genl_attr {
 	TCMU_ATTR_DEV_CFG,
 	TCMU_ATTR_DEV_SIZE,
 	TCMU_ATTR_WRITECACHE,
+	TCMU_ATTR_CMD_STATUS,
+	TCMU_ATTR_DEVICE_ID,
+	TCMU_ATTR_SUPP_KERN_CMD_REPLY,
 	__TCMU_ATTR_MAX,
 };
 #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
-- 
GitLab


From b1943fd454d1a2e2c8018a2f79a7023893619439 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:16 -0500
Subject: [PATCH 0845/1958] target: add helper to iterate over devices

This adds a wrapper around idr_for_each so the xcopy code can loop over
the devices in the next patch.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c   | 45 +++++++++++++++++++++++++++
 drivers/target/target_core_internal.h |  2 ++
 2 files changed, 47 insertions(+)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index de1131612ddc8..bd32a0c659613 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -904,6 +904,51 @@ struct se_device *target_find_device(int id, bool do_depend)
 }
 EXPORT_SYMBOL(target_find_device);
 
+struct devices_idr_iter {
+	int (*fn)(struct se_device *dev, void *data);
+	void *data;
+};
+
+static int target_devices_idr_iter(int id, void *p, void *data)
+{
+	struct devices_idr_iter *iter = data;
+	struct se_device *dev = p;
+
+	/*
+	 * We add the device early to the idr, so it can be used
+	 * by backend modules during configuration. We do not want
+	 * to allow other callers to access partially setup devices,
+	 * so we skip them here.
+	 */
+	if (!(dev->dev_flags & DF_CONFIGURED))
+		return 0;
+
+	return iter->fn(dev, iter->data);
+}
+
+/**
+ * target_for_each_device - iterate over configured devices
+ * @fn: iterator function
+ * @data: pointer to data that will be passed to fn
+ *
+ * fn must return 0 to continue looping over devices. non-zero will break
+ * from the loop and return that value to the caller.
+ */
+int target_for_each_device(int (*fn)(struct se_device *dev, void *data),
+			   void *data)
+{
+	struct devices_idr_iter iter;
+	int ret;
+
+	iter.fn = fn;
+	iter.data = data;
+
+	mutex_lock(&g_device_mutex);
+	ret = idr_for_each(&devices_idr, target_devices_idr_iter, &iter);
+	mutex_unlock(&g_device_mutex);
+	return ret;
+}
+
 int target_configure_device(struct se_device *dev)
 {
 	struct se_hba *hba = dev->se_hba;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 0912de7c0cf8f..1d3ac02385687 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -87,6 +87,8 @@ void	core_dev_release_virtual_lun0(void);
 struct se_device *target_alloc_device(struct se_hba *hba, const char *name);
 int	target_configure_device(struct se_device *dev);
 void	target_free_device(struct se_device *);
+int	target_for_each_device(int (*fn)(struct se_device *dev, void *data),
+			       void *data);
 
 /* target_core_configfs.c */
 void	target_setup_backend_cits(struct target_backend *);
-- 
GitLab


From 6906d008b4b06e42cad393ac25bec76fbf31fabd Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:17 -0500
Subject: [PATCH 0846/1958] xcopy: loop over devices using idr helper

This converts the xcopy code to use the idr helper. The next patch
will drop the g_device_list and make g_device_mutex local to the
target_core_device.c file.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_xcopy.c | 70 +++++++++++++++++-------------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 56738a41e3461..9ee89e00cd776 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -55,48 +55,60 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
 	return 0;
 }
 
-static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
-					struct se_device **found_dev)
+struct xcopy_dev_search_info {
+	const unsigned char *dev_wwn;
+	struct se_device *found_dev;
+};
+
+static int target_xcopy_locate_se_dev_e4_iter(struct se_device *se_dev,
+					      void *data)
 {
-	struct se_device *se_dev;
+	struct xcopy_dev_search_info *info = data;
 	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
 	int rc;
 
-	mutex_lock(&g_device_mutex);
-	list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
+	if (!se_dev->dev_attrib.emulate_3pc)
+		return 0;
 
-		if (!se_dev->dev_attrib.emulate_3pc)
-			continue;
+	memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
+	target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
 
-		memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
-		target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
+	rc = memcmp(&tmp_dev_wwn[0], info->dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
+	if (rc != 0)
+		return 0;
 
-		rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
-		if (rc != 0)
-			continue;
+	info->found_dev = se_dev;
+	pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
 
-		*found_dev = se_dev;
-		pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
+	rc = target_depend_item(&se_dev->dev_group.cg_item);
+	if (rc != 0) {
+		pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n",
+		       rc, se_dev);
+		return rc;
+	}
 
-		rc = target_depend_item(&se_dev->dev_group.cg_item);
-		if (rc != 0) {
-			pr_err("configfs_depend_item attempt failed:"
-				" %d for se_dev: %p\n", rc, se_dev);
-			mutex_unlock(&g_device_mutex);
-			return rc;
-		}
+	pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n",
+		 se_dev, &se_dev->dev_group);
+	return 1;
+}
+
+static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
+					struct se_device **found_dev)
+{
+	struct xcopy_dev_search_info info;
+	int ret;
 
-		pr_debug("Called configfs_depend_item for se_dev: %p"
-			" se_dev->se_dev_group: %p\n", se_dev,
-			&se_dev->dev_group);
+	memset(&info, 0, sizeof(info));
+	info.dev_wwn = dev_wwn;
 
-		mutex_unlock(&g_device_mutex);
+	ret = target_for_each_device(target_xcopy_locate_se_dev_e4_iter, &info);
+	if (ret == 1) {
+		*found_dev = info.found_dev;
 		return 0;
+	} else {
+		pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
+		return -EINVAL;
 	}
-	mutex_unlock(&g_device_mutex);
-
-	pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
-	return -EINVAL;
 }
 
 static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
-- 
GitLab


From be50f538e9a5081c61a78faf58c5591c94064633 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:18 -0500
Subject: [PATCH 0847/1958] target: remove g_device_list

g_device_list is no longer needed because we now use the idr code
for lookups and seaches.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c   | 30 +++++++++++----------------
 drivers/target/target_core_internal.h |  3 ---
 include/target/target_core_base.h     |  1 -
 3 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index bd32a0c659613..3ae8fbf01bdf4 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -49,8 +49,8 @@
 #include "target_core_pr.h"
 #include "target_core_ua.h"
 
-DEFINE_MUTEX(g_device_mutex);
-LIST_HEAD(g_device_list);
+DEFINE_MUTEX(device_mutex);
+LIST_HEAD(device_list);
 static DEFINE_IDR(devices_idr);
 
 static struct se_hba *lun0_hba;
@@ -773,7 +773,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 	INIT_LIST_HEAD(&dev->delayed_cmd_list);
 	INIT_LIST_HEAD(&dev->state_list);
 	INIT_LIST_HEAD(&dev->qf_cmd_list);
-	INIT_LIST_HEAD(&dev->g_dev_node);
 	spin_lock_init(&dev->execute_task_lock);
 	spin_lock_init(&dev->delayed_cmd_lock);
 	spin_lock_init(&dev->dev_reservation_lock);
@@ -895,11 +894,11 @@ struct se_device *target_find_device(int id, bool do_depend)
 {
 	struct se_device *dev;
 
-	mutex_lock(&g_device_mutex);
+	mutex_lock(&device_mutex);
 	dev = idr_find(&devices_idr, id);
 	if (dev && do_depend && target_depend_item(&dev->dev_group.cg_item))
 		dev = NULL;
-	mutex_unlock(&g_device_mutex);
+	mutex_unlock(&device_mutex);
 	return dev;
 }
 EXPORT_SYMBOL(target_find_device);
@@ -943,9 +942,9 @@ int target_for_each_device(int (*fn)(struct se_device *dev, void *data),
 	iter.fn = fn;
 	iter.data = data;
 
-	mutex_lock(&g_device_mutex);
+	mutex_lock(&device_mutex);
 	ret = idr_for_each(&devices_idr, target_devices_idr_iter, &iter);
-	mutex_unlock(&g_device_mutex);
+	mutex_unlock(&device_mutex);
 	return ret;
 }
 
@@ -964,13 +963,13 @@ int target_configure_device(struct se_device *dev)
 	 * Add early so modules like tcmu can use during its
 	 * configuration.
 	 */
-	mutex_lock(&g_device_mutex);
+	mutex_lock(&device_mutex);
 	/*
 	 * Use cyclic to try and avoid collisions with devices
 	 * that were recently removed.
 	 */
 	id = idr_alloc_cyclic(&devices_idr, dev, 0, INT_MAX, GFP_KERNEL);
-	mutex_unlock(&g_device_mutex);
+	mutex_unlock(&device_mutex);
 	if (id < 0) {
 		ret = -ENOMEM;
 		goto out;
@@ -1036,10 +1035,6 @@ int target_configure_device(struct se_device *dev)
 	hba->dev_count++;
 	spin_unlock(&hba->device_lock);
 
-	mutex_lock(&g_device_mutex);
-	list_add_tail(&dev->g_dev_node, &g_device_list);
-	mutex_unlock(&g_device_mutex);
-
 	dev->dev_flags |= DF_CONFIGURED;
 
 	return 0;
@@ -1047,9 +1042,9 @@ int target_configure_device(struct se_device *dev)
 out_free_alua:
 	core_alua_free_lu_gp_mem(dev);
 out_free_index:
-	mutex_lock(&g_device_mutex);
+	mutex_lock(&device_mutex);
 	idr_remove(&devices_idr, dev->dev_index);
-	mutex_unlock(&g_device_mutex);
+	mutex_unlock(&device_mutex);
 out:
 	se_release_vpd_for_dev(dev);
 	return ret;
@@ -1066,10 +1061,9 @@ void target_free_device(struct se_device *dev)
 
 		dev->transport->destroy_device(dev);
 
-		mutex_lock(&g_device_mutex);
+		mutex_lock(&device_mutex);
 		idr_remove(&devices_idr, dev->dev_index);
-		list_del(&dev->g_dev_node);
-		mutex_unlock(&g_device_mutex);
+		mutex_unlock(&device_mutex);
 
 		spin_lock(&hba->device_lock);
 		hba->dev_count--;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 1d3ac02385687..f30e8ac13386d 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -56,9 +56,6 @@ struct target_fabric_configfs {
 extern struct t10_alua_lu_gp *default_lu_gp;
 
 /* target_core_device.c */
-extern struct mutex g_device_mutex;
-extern struct list_head g_device_list;
-
 int	core_alloc_rtpi(struct se_lun *lun, struct se_device *dev);
 struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16);
 void	target_pr_kref_release(struct kref *);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 51a92f17352c2..516764febeb78 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -796,7 +796,6 @@ struct se_device {
 	struct list_head	delayed_cmd_list;
 	struct list_head	state_list;
 	struct list_head	qf_cmd_list;
-	struct list_head	g_dev_node;
 	/* Pointer to associated SE HBA */
 	struct se_hba		*se_hba;
 	/* T10 Inquiry and VPD WWN Information */
-- 
GitLab


From 531283ff7593f7059ced43c725d90cec3e5af549 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:19 -0500
Subject: [PATCH 0848/1958] tcmu: drop configured check in destroy

destroy_device is only called if we have successfully run
configure_device, so drop the duplicate tcmu_dev_configured check.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index e080cd1a8fdef..d10aa0ac0918e 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1593,11 +1593,9 @@ static void tcmu_destroy_device(struct se_device *dev)
 
 	tcmu_blocks_release(udev);
 
-	if (tcmu_dev_configured(udev)) {
-		tcmu_netlink_event(udev, TCMU_CMD_REMOVED_DEVICE, 0, NULL);
+	tcmu_netlink_event(udev, TCMU_CMD_REMOVED_DEVICE, 0, NULL);
 
-		uio_unregister_device(&udev->uio_info);
-	}
+	uio_unregister_device(&udev->uio_info);
 }
 
 enum {
-- 
GitLab


From 9260695d65590f4711d1166eadbfcb0acfa0625a Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 23 Jun 2017 01:18:20 -0500
Subject: [PATCH 0849/1958] tcmu: fix multiple uio open/close sequences

If the uio device is open and closed multiple times, the
kref count will be off due to tcmu_release getting called
multiple times for each close. This patch integrates
Wenji Tang's patch to add a kref_get on open that now
matches the kref_put done on tcmu_release and adds
a kref_put in tcmu_destroy_device to match the kref_get
done in succesful tcmu_configure_device calls.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Cc: Wenji Tang <tang.wenji@zte.com.cn>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index d10aa0ac0918e..1e69b1b32558d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1269,6 +1269,7 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 		return -EBUSY;
 
 	udev->inode = inode;
+	kref_get(&udev->kref);
 
 	pr_debug("open\n");
 
@@ -1300,7 +1301,7 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
 	pr_debug("close\n");
-	/* release ref from configure */
+	/* release ref from open */
 	kref_put(&udev->kref, tcmu_dev_kref_release);
 	return 0;
 }
@@ -1596,6 +1597,9 @@ static void tcmu_destroy_device(struct se_device *dev)
 	tcmu_netlink_event(udev, TCMU_CMD_REMOVED_DEVICE, 0, NULL);
 
 	uio_unregister_device(&udev->uio_info);
+
+	/* release ref from configure */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
 }
 
 enum {
-- 
GitLab


From 9d62bc0e6d79b11e3298e831358155930fb8f5e3 Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Fri, 30 Jun 2017 16:14:16 +0800
Subject: [PATCH 0850/1958] tcmu: Fix flushing cmd entry dcache page

When feeding the tcmu's cmd ring, we need to flush the dcache page
for the cmd entry to make sure these kernel stores are visible to
user space mappings of that page.

For the none PAD cmd entry, this will be flushed at the end of the
tcmu_queue_cmd_ring().

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 1e69b1b32558d..a8ff6b5fdec8f 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -825,21 +825,21 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		size_t pad_size = head_to_end(cmd_head, udev->cmdr_size);
 
 		entry = (void *) mb + CMDR_OFF + cmd_head;
-		tcmu_flush_dcache_range(entry, sizeof(*entry));
 		tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD);
 		tcmu_hdr_set_len(&entry->hdr.len_op, pad_size);
 		entry->hdr.cmd_id = 0; /* not used for PAD */
 		entry->hdr.kflags = 0;
 		entry->hdr.uflags = 0;
+		tcmu_flush_dcache_range(entry, sizeof(*entry));
 
 		UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
+		tcmu_flush_dcache_range(mb, sizeof(*mb));
 
 		cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
 		WARN_ON(cmd_head != 0);
 	}
 
 	entry = (void *) mb + CMDR_OFF + cmd_head;
-	tcmu_flush_dcache_range(entry, sizeof(*entry));
 	tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
 	entry->hdr.cmd_id = tcmu_cmd->cmd_id;
 	entry->hdr.kflags = 0;
-- 
GitLab


From c82ff239ecf27dd5eddc91540f50321547a2d311 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 4 Jul 2017 09:44:19 +0100
Subject: [PATCH 0851/1958] target: make device_mutex and device_list static

Variables device_mutex and device_list static are local to the source,
so make them static.

Cleans up sparse warnings:
"symbol 'device_list' was not declared. Should it be static?"
"symbol 'device_mutex' was not declared. Should it be static?"

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 3ae8fbf01bdf4..bbcef3bc66c8e 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -49,8 +49,8 @@
 #include "target_core_pr.h"
 #include "target_core_ua.h"
 
-DEFINE_MUTEX(device_mutex);
-LIST_HEAD(device_list);
+static DEFINE_MUTEX(device_mutex);
+static LIST_HEAD(device_list);
 static DEFINE_IDR(devices_idr);
 
 static struct se_hba *lun0_hba;
-- 
GitLab


From 9fe36984501035f2878aa10d83e79bfc07b7ad7e Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 31 May 2017 15:52:38 -0500
Subject: [PATCH 0852/1958] target: do not require a transport_complete for
 SCF_TRANSPORT_TASK_SENSE

tcmu needs to pass raw sense to target_complete_cmd, but a a
transport_complete callout is akward to implement for it.

This moves the check for SCF_TRANSPORT_TASK_SENSE so any backend
can pass sense.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 28de421e3220c..a8f943e325eb3 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -719,9 +719,9 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 		dev->transport->transport_complete(cmd,
 				cmd->t_data_sg,
 				transport_get_sense_buffer(cmd));
-		if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
-			success = 1;
 	}
+	if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
+		success = 1;
 
 	/*
 	 * Check for case where an explicit ABORT_TASK has been received
-- 
GitLab


From c6d66aba98a39cfed206c5c61f0a604ba09b26ce Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 31 May 2017 15:52:39 -0500
Subject: [PATCH 0853/1958] target: add helper to copy sense to se_cmd buffer

This adds a helper to copy sense from backend module buffer to
the se_cmd's sense buffer.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 18 ++++++++++++++++++
 include/target/target_core_backend.h   |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index a8f943e325eb3..341025a3cbb70 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -704,6 +704,24 @@ static unsigned char *transport_get_sense_buffer(struct se_cmd *cmd)
 	return cmd->sense_buffer;
 }
 
+void transport_copy_sense_to_cmd(struct se_cmd *cmd, unsigned char *sense)
+{
+	unsigned char *cmd_sense_buf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	cmd_sense_buf = transport_get_sense_buffer(cmd);
+	if (!cmd_sense_buf) {
+		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+		return;
+	}
+
+	cmd->se_cmd_flags |= SCF_TRANSPORT_TASK_SENSE;
+	memcpy(cmd_sense_buf, sense, cmd->scsi_sense_length);
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+}
+EXPORT_SYMBOL(transport_copy_sense_to_cmd);
+
 void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 {
 	struct se_device *dev = cmd->se_dev;
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 1f2b7007f2df3..3757f5f54e03b 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -73,6 +73,8 @@ void	target_backend_unregister(const struct target_backend_ops *);
 void	target_complete_cmd(struct se_cmd *, u8);
 void	target_complete_cmd_with_length(struct se_cmd *, u8, int);
 
+void	transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *);
+
 sense_reason_t	spc_parse_cdb(struct se_cmd *cmd, unsigned int *size);
 sense_reason_t	spc_emulate_report_luns(struct se_cmd *cmd);
 sense_reason_t	spc_emulate_inquiry_std(struct se_cmd *, unsigned char *);
-- 
GitLab


From 406f74c20dc258f8258b64d16d159c3fff06a506 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 31 May 2017 15:52:40 -0500
Subject: [PATCH 0854/1958] tcmu: fix sense handling during completion

We were just copying the sense to the cmd sense_buffer and
did not implement a transport_complete or set the
SCF_TRANSPORT_TASK_SENSE, so the sense was ignored.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index a8ff6b5fdec8f..cbbfba0c13527 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -956,8 +956,7 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 			cmd->se_cmd);
 		entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION;
 	} else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
-		memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
-			       se_cmd->scsi_sense_length);
+		transport_copy_sense_to_cmd(se_cmd, entry->rsp.sense_buffer);
 	} else if (se_cmd->se_cmd_flags & SCF_BIDI) {
 		/* Get Data-In buffer before clean up */
 		gather_data_area(udev, cmd, true);
-- 
GitLab


From dce6ce8cfb842c333094c3eb2e3ea66b1edb02ad Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 31 May 2017 15:52:41 -0500
Subject: [PATCH 0855/1958] pscsi: finish cmd processing from pscsi_req_done

This patch performs the pscsi_transport_complete operations from
pscsi_req_done. It looks like the only difference the transport_complete
callout provides is that it is called under t_state_lock which seems to
only be needed for the SCF_TRANSPORT_TASK_SENSE bit handling. We can
now use transport_copy_sense_to_cmd to handle the se_cmd sense bits, and
we can then drop the code where we have to copy the request info to
the pscsi_plugin_task for transport_complete use.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pscsi.c | 38 +++++++++++++-----------------
 drivers/target/target_core_pscsi.h |  4 ----
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7d944b23aeeee..fb40f64072f6b 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -599,12 +599,11 @@ static void pscsi_destroy_device(struct se_device *dev)
 	}
 }
 
-static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
-				     unsigned char *sense_buffer)
+static void pscsi_complete_cmd(struct se_cmd *cmd, u8 scsi_status,
+			       unsigned char *req_sense)
 {
 	struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev);
 	struct scsi_device *sd = pdv->pdv_sd;
-	int result;
 	struct pscsi_plugin_task *pt = cmd->priv;
 	unsigned char *cdb;
 	/*
@@ -615,7 +614,6 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
 		return;
 
 	cdb = &pt->pscsi_cdb[0];
-	result = pt->pscsi_result;
 	/*
 	 * Hack to make sure that Write-Protect modepage is set if R/O mode is
 	 * forced.
@@ -624,7 +622,7 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
 		goto after_mode_sense;
 
 	if (((cdb[0] == MODE_SENSE) || (cdb[0] == MODE_SENSE_10)) &&
-	     (status_byte(result) << 1) == SAM_STAT_GOOD) {
+	    scsi_status == SAM_STAT_GOOD) {
 		bool read_only = target_lun_is_rdonly(cmd);
 
 		if (read_only) {
@@ -659,12 +657,12 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
 	 * storage engine.
 	 */
 	if (((cdb[0] == MODE_SELECT) || (cdb[0] == MODE_SELECT_10)) &&
-	      (status_byte(result) << 1) == SAM_STAT_GOOD) {
+	     scsi_status == SAM_STAT_GOOD) {
 		unsigned char *buf;
 		u16 bdl;
 		u32 blocksize;
 
-		buf = sg_virt(&sg[0]);
+		buf = sg_virt(&cmd->t_data_sg[0]);
 		if (!buf) {
 			pr_err("Unable to get buf for scatterlist\n");
 			goto after_mode_select;
@@ -687,10 +685,8 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
 	}
 after_mode_select:
 
-	if (sense_buffer && (status_byte(result) & CHECK_CONDITION)) {
-		memcpy(sense_buffer, pt->pscsi_sense, TRANSPORT_SENSE_BUFFER);
-		cmd->se_cmd_flags |= SCF_TRANSPORT_TASK_SENSE;
-	}
+	if (scsi_status == SAM_STAT_CHECK_CONDITION)
+		transport_copy_sense_to_cmd(cmd, req_sense);
 }
 
 enum {
@@ -1049,30 +1045,29 @@ static void pscsi_req_done(struct request *req, int uptodate)
 {
 	struct se_cmd *cmd = req->end_io_data;
 	struct pscsi_plugin_task *pt = cmd->priv;
+	int result = scsi_req(req)->result;
+	u8 scsi_status = status_byte(result) << 1;
 
-	pt->pscsi_result = scsi_req(req)->result;
-	pt->pscsi_resid = scsi_req(req)->resid_len;
-
-	cmd->scsi_status = status_byte(pt->pscsi_result) << 1;
-	if (cmd->scsi_status) {
+	if (scsi_status) {
 		pr_debug("PSCSI Status Byte exception at cmd: %p CDB:"
 			" 0x%02x Result: 0x%08x\n", cmd, pt->pscsi_cdb[0],
-			pt->pscsi_result);
+			result);
 	}
 
-	switch (host_byte(pt->pscsi_result)) {
+	pscsi_complete_cmd(cmd, scsi_status, scsi_req(req)->sense);
+
+	switch (host_byte(result)) {
 	case DID_OK:
-		target_complete_cmd(cmd, cmd->scsi_status);
+		target_complete_cmd(cmd, scsi_status);
 		break;
 	default:
 		pr_debug("PSCSI Host Byte exception at cmd: %p CDB:"
 			" 0x%02x Result: 0x%08x\n", cmd, pt->pscsi_cdb[0],
-			pt->pscsi_result);
+			result);
 		target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
 		break;
 	}
 
-	memcpy(pt->pscsi_sense, scsi_req(req)->sense, TRANSPORT_SENSE_BUFFER);
 	__blk_put_request(req->q, req);
 	kfree(pt);
 }
@@ -1090,7 +1085,6 @@ static const struct target_backend_ops pscsi_ops = {
 	.configure_device	= pscsi_configure_device,
 	.destroy_device		= pscsi_destroy_device,
 	.free_device		= pscsi_free_device,
-	.transport_complete	= pscsi_transport_complete,
 	.parse_cdb		= pscsi_parse_cdb,
 	.set_configfs_dev_params = pscsi_set_configfs_dev_params,
 	.show_configfs_dev_params = pscsi_show_configfs_dev_params,
diff --git a/drivers/target/target_core_pscsi.h b/drivers/target/target_core_pscsi.h
index 8a02fa47c7e8e..b86fb0e1b783d 100644
--- a/drivers/target/target_core_pscsi.h
+++ b/drivers/target/target_core_pscsi.h
@@ -23,10 +23,6 @@ struct scsi_device;
 struct Scsi_Host;
 
 struct pscsi_plugin_task {
-	unsigned char pscsi_sense[TRANSPORT_SENSE_BUFFER];
-	int	pscsi_direction;
-	int	pscsi_result;
-	u32	pscsi_resid;
 	unsigned char pscsi_cdb[0];
 } ____cacheline_aligned;
 
-- 
GitLab


From 1a444175486026c1a280507f8d82094909acddd2 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 31 May 2017 15:52:42 -0500
Subject: [PATCH 0856/1958] target: remove transport_complete

transport_complete is no longer used, so drop the code.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 6 ------
 include/target/target_core_backend.h   | 4 ----
 2 files changed, 10 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 341025a3cbb70..b9122a56784ed 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -732,12 +732,6 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-
-	if (dev && dev->transport->transport_complete) {
-		dev->transport->transport_complete(cmd,
-				cmd->t_data_sg,
-				transport_get_sense_buffer(cmd));
-	}
 	if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
 		success = 1;
 
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 3757f5f54e03b..e150e391878be 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -37,10 +37,6 @@ struct target_backend_ops {
 					   const char *, ssize_t);
 	ssize_t (*show_configfs_dev_params)(struct se_device *, char *);
 
-	void (*transport_complete)(struct se_cmd *cmd,
-				   struct scatterlist *,
-				   unsigned char *);
-
 	sense_reason_t (*parse_cdb)(struct se_cmd *cmd);
 	u32 (*get_device_type)(struct se_device *);
 	sector_t (*get_blocks)(struct se_device *);
-- 
GitLab


From 402242c904432207515e3ccb4126ff0dcfba89ca Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 31 May 2017 15:52:43 -0500
Subject: [PATCH 0857/1958] target: fix SAM_STAT_BUSY/TASK_SET_FULL handling

If the scsi status was not SAM_STAT_GOOD or there was no transport
sense, we would ignore the scsi status and do a generic not ready
LUN communication failure check condition failure.

The problem is that LUN COMM failure is treated as a hard error
sometimes and will cause apps to get IO errors instead of the OS's SCSI
layer retrying. For example, the tcmu daemon will return SAM_STAT_QUEUE_FULL
when memory runs low and can still make progress but wants the initiator to
reduce the work load. Windows will fail this error directly the app
instead of retrying.

This patch is based on Nick's "target/iblock: Use -EAGAIN/-ENOMEM to
propigate SAM BUSY/TASK_SET_FULL" patch here:

http://comments.gmane.org/gmane.linux.scsi.target.devel/11301

but instead of only setting SAM_STAT_GOOD, SAM_STAT_TASK_SET_FULL
and SAM_STAT_BUSY as success, it sets all non check condition status
as success so they are passed back to the initiator, so passthrough
type backends can return all SCSI status codes. Since only passthrough
uses this, I was not sure if we wanted to add checks for non-passthrough
and specific codes.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index b9122a56784ed..8735243edf041 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -725,15 +725,23 @@ EXPORT_SYMBOL(transport_copy_sense_to_cmd);
 void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 {
 	struct se_device *dev = cmd->se_dev;
-	int success = scsi_status == GOOD;
+	int success;
 	unsigned long flags;
 
 	cmd->scsi_status = scsi_status;
 
-
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
+	switch (cmd->scsi_status) {
+	case SAM_STAT_CHECK_CONDITION:
+		if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
+			success = 1;
+		else
+			success = 0;
+		break;
+	default:
 		success = 1;
+		break;
+	}
 
 	/*
 	 * Check for case where an explicit ABORT_TASK has been received
-- 
GitLab


From e5dc9a7055c98bcd7b03f9735d5f2ec2b7f0d897 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 28 Jun 2017 14:58:56 +0900
Subject: [PATCH 0858/1958] target: Use macro for WRITE_VERIFY_32 operation
 codes

Add WRITE_VERIFY_32 definition to scsi prototypes and use this macro
definition isntead of the hard coded value.

(Drop WRITE_VERIFY_16 that's already part of another patch - nab)

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 2 +-
 include/scsi/scsi_proto.h           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index bbcef3bc66c8e..1c7c57a3c52e1 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1226,7 +1226,7 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 		switch (get_unaligned_be16(&cdb[8])) {
 		case READ_32:
 		case WRITE_32:
-		case 0x0c: /* WRITE_VERIFY_32 */
+		case WRITE_VERIFY_32:
 		case XDWRITEREAD_32:
 			cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
 			break;
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index ce78ec8e367da..5e3fe037938e9 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -161,6 +161,7 @@
 #define READ_32		      0x09
 #define VERIFY_32	      0x0a
 #define WRITE_32	      0x0b
+#define WRITE_VERIFY_32	      0x0c
 #define WRITE_SAME_32	      0x0d
 
 /* Values for T10/04-262r7 */
-- 
GitLab


From 016a5fec19e191ed1d45f75d8bfbefdec3f2dada Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 28 Jun 2017 14:58:57 +0900
Subject: [PATCH 0859/1958] target: pscsi: Introduce TYPE_ZBC support

TYPE_ZBC host managed zoned block devices are also block devices
despite the non-standard device type (14h). Handle them similarly to
regular TYPE_DISK devices.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pscsi.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index fb40f64072f6b..b71d22ebc9bfb 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -382,7 +382,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 	spin_unlock_irq(sh->host_lock);
 	/*
 	 * Claim exclusive struct block_device access to struct scsi_device
-	 * for TYPE_DISK using supplied udev_path
+	 * for TYPE_DISK and TYPE_ZBC using supplied udev_path
 	 */
 	bd = blkdev_get_by_path(dev->udev_path,
 				FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv);
@@ -400,8 +400,9 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 		return ret;
 	}
 
-	pr_debug("CORE_PSCSI[%d] - Added TYPE_DISK for %d:%d:%d:%llu\n",
-		phv->phv_host_id, sh->host_no, sd->channel, sd->id, sd->lun);
+	pr_debug("CORE_PSCSI[%d] - Added TYPE_%s for %d:%d:%d:%llu\n",
+		phv->phv_host_id, sd->type == TYPE_DISK ? "DISK" : "ZBC",
+		sh->host_no, sd->channel, sd->id, sd->lun);
 	return 0;
 }
 
@@ -520,6 +521,7 @@ static int pscsi_configure_device(struct se_device *dev)
 		 */
 		switch (sd->type) {
 		case TYPE_DISK:
+		case TYPE_ZBC:
 			ret = pscsi_create_type_disk(dev, sd);
 			break;
 		default:
@@ -576,9 +578,11 @@ static void pscsi_destroy_device(struct se_device *dev)
 	if (sd) {
 		/*
 		 * Release exclusive pSCSI internal struct block_device claim for
-		 * struct scsi_device with TYPE_DISK from pscsi_create_type_disk()
+		 * struct scsi_device with TYPE_DISK or TYPE_ZBC
+		 * from pscsi_create_type_disk()
 		 */
-		if ((sd->type == TYPE_DISK) && pdv->pdv_bd) {
+		if ((sd->type == TYPE_DISK || sd->type == TYPE_ZBC) &&
+		    pdv->pdv_bd) {
 			blkdev_put(pdv->pdv_bd,
 				   FMODE_WRITE|FMODE_READ|FMODE_EXCL);
 			pdv->pdv_bd = NULL;
@@ -1000,7 +1004,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 	req->end_io_data = cmd;
 	scsi_req(req)->cmd_len = scsi_command_size(pt->pscsi_cdb);
 	scsi_req(req)->cmd = &pt->pscsi_cdb[0];
-	if (pdv->pdv_sd->type == TYPE_DISK)
+	if (pdv->pdv_sd->type == TYPE_DISK ||
+	    pdv->pdv_sd->type == TYPE_ZBC)
 		req->timeout = PS_TIMEOUT_DISK;
 	else
 		req->timeout = PS_TIMEOUT_OTHER;
-- 
GitLab


From 22e19c8d062cc19832cde13225cbc6e5283cd969 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 6 Jul 2017 00:31:14 +0200
Subject: [PATCH 0860/1958] um: userspace - be more verbose in ptrace set regs
 error

When ptrace fails to set GP/FP regs for the target process,
log the error before crashing the UML kernel.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/os-Linux/skas/process.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 03b3c4cc7735a..31c9be2c2bddf 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -323,11 +323,17 @@ void userspace(struct uml_pt_regs *regs)
 		 * fail.  In this case, there is nothing to do but
 		 * just kill the process.
 		 */
-		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp))
+		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
+			printk(UM_KERN_ERR "userspace - ptrace set regs "
+			       "failed, errno = %d\n", errno);
 			fatal_sigsegv();
+		}
 
-		if (put_fp_registers(pid, regs->fp))
+		if (put_fp_registers(pid, regs->fp)) {
+			printk(UM_KERN_ERR "userspace - ptrace set fp regs "
+			       "failed, errno = %d\n", errno);
 			fatal_sigsegv();
+		}
 
 		/* Now we set local_using_sysemu to be used for one loop */
 		local_using_sysemu = get_using_sysemu();
-- 
GitLab


From 171fa6928bcdf72372f98884b0abb93ad9b2e673 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 6 Jul 2017 00:32:33 +0200
Subject: [PATCH 0861/1958] um: stub-data.h: remove superfluous include

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/shared/skas/stub-data.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index a9deece956bf4..13f404e1262bd 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -8,8 +8,6 @@
 #ifndef __STUB_DATA_H
 #define __STUB_DATA_H
 
-#include <time.h>
-
 struct stub_data {
 	unsigned long offset;
 	int fd;
-- 
GitLab


From 88af23381ac3a04e35974d9ece422c2b6ebe7775 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 6 Jul 2017 00:34:04 +0200
Subject: [PATCH 0862/1958] um: Add kerneldoc for segv_handler

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/kernel/trap.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 59158871b9fcc..4e6fcb32620ff 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -183,6 +183,16 @@ void fatal_sigsegv(void)
 	os_dump_core();
 }
 
+/**
+ * segv_handler() - the SIGSEGV handler
+ * @sig:	the signal number
+ * @unused_si:	the signal info struct; unused in this handler
+ * @regs:	the ptrace register information
+ *
+ * The handler first extracts the faultinfo from the UML ptrace regs struct.
+ * If the userfault did not happen in an UML userspace process, bad_segv is called.
+ * Otherwise the signal did happen in a cloned userspace process, handle it.
+ */
 void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
 	struct faultinfo * fi = UPT_FAULTINFO(regs);
-- 
GitLab


From e90998302682c41df4813da1ae019c8db79775f5 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 6 Jul 2017 00:34:05 +0200
Subject: [PATCH 0863/1958] um: Add kerneldoc for userspace_tramp() and
 start_userspace()

Also use correct function name spelling (stub_segv_handler) for better grepping

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/os-Linux/skas/process.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 31c9be2c2bddf..819d68656673c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -108,7 +108,7 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi)
 	wait_stub_done(pid);
 
 	/*
-	 * faultinfo is prepared by the stub-segv-handler at start of
+	 * faultinfo is prepared by the stub_segv_handler at start of
 	 * the stub stack page. We just have to copy it.
 	 */
 	memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
@@ -175,6 +175,21 @@ static void handle_trap(int pid, struct uml_pt_regs *regs,
 
 extern char __syscall_stub_start[];
 
+/**
+ * userspace_tramp() - userspace trampoline
+ * @stack:	pointer to the new userspace stack page, can be NULL, if? FIXME:
+ *
+ * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed.
+ * This function will run on a temporary stack page.
+ * It ptrace()'es itself, then
+ * Two pages are mapped into the userspace address space:
+ * - STUB_CODE (with EXEC), which contains the skas stub code
+ * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel.
+ * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process.
+ * And last the process stops itself to give control to the UML kernel for this userspace process.
+ *
+ * Return: Always zero, otherwise the current userspace process is ended with non null exit() call
+ */
 static int userspace_tramp(void *stack)
 {
 	void *addr;
@@ -236,12 +251,24 @@ static int userspace_tramp(void *stack)
 
 int userspace_pid[NR_CPUS];
 
+/**
+ * start_userspace() - prepare a new userspace process
+ * @stub_stack:	pointer to the stub stack. Can be NULL, if? FIXME:
+ *
+ * Setups a new temporary stack page that is used while userspace_tramp() runs
+ * Clones the kernel process into a new userspace process, with FDs only.
+ *
+ * Return: When positive: the process id of the new userspace process,
+ *         when negative: an error number.
+ * FIXME: can PIDs become negative?!
+ */
 int start_userspace(unsigned long stub_stack)
 {
 	void *stack;
 	unsigned long sp;
 	int pid, status, n, flags, err;
 
+	/* setup a temporary stack page */
 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
 		     PROT_READ | PROT_WRITE | PROT_EXEC,
 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -252,10 +279,12 @@ int start_userspace(unsigned long stub_stack)
 		return err;
 	}
 
+	/* set stack pointer to the end of the stack page, so it can grow downwards */
 	sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *);
 
 	flags = CLONE_FILES | SIGCHLD;
 
+	/* clone into new userspace process */
 	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
 	if (pid < 0) {
 		err = -errno;
-- 
GitLab


From 0e4524a5d341e719e8ee9ee7db5d58e2c5a4c10e Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 6 Jul 2017 14:44:28 +0200
Subject: [PATCH 0864/1958] KVM: mark vcpu->pid pointer as rcu protected

We do use rcu to protect the pid pointer. Mark it as such and
adopt all code to use the proper access methods.

This was detected by sparse.
"virt/kvm/kvm_main.c:2248:15: error: incompatible types in comparison
expression (different address spaces)"

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 15 +++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0b50e7b35ed41..bcd37b855c667 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -234,7 +234,7 @@ struct kvm_vcpu {
 
 	int guest_fpu_loaded, guest_xcr0_loaded;
 	struct swait_queue_head wq;
-	struct pid *pid;
+	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
 	struct kvm_vcpu_stat stat;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 19f0ecb9b93e2..fc2d58312fd52 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -293,7 +293,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
 
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
-	put_pid(vcpu->pid);
+	/*
+	 * no need for rcu_read_lock as VCPU_RUN is the only place that
+	 * will change the vcpu->pid pointer and on uninit all file
+	 * descriptors are already gone.
+	 */
+	put_pid(rcu_dereference_protected(vcpu->pid, 1));
 	kvm_arch_vcpu_uninit(vcpu);
 	free_page((unsigned long)vcpu->run);
 }
@@ -2551,13 +2556,14 @@ static long kvm_vcpu_ioctl(struct file *filp,
 	if (r)
 		return r;
 	switch (ioctl) {
-	case KVM_RUN:
+	case KVM_RUN: {
+		struct pid *oldpid;
 		r = -EINVAL;
 		if (arg)
 			goto out;
-		if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+		oldpid = rcu_access_pointer(vcpu->pid);
+		if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
 			/* The thread running this VCPU changed. */
-			struct pid *oldpid = vcpu->pid;
 			struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
 
 			rcu_assign_pointer(vcpu->pid, newpid);
@@ -2568,6 +2574,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
 		r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
 		trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
 		break;
+	}
 	case KVM_GET_REGS: {
 		struct kvm_regs *kvm_regs;
 
-- 
GitLab


From d1bec20facd6eae17cb2f39ebbf443c95c650490 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:41:58 +0200
Subject: [PATCH 0865/1958] rtc: class separate device allocation from
 registration

Create rtc_allocate_device to allocate memory for a struct rtc_device and
initialize it.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c | 63 ++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 543c64cd3df02..fb39c1334d7b6 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -150,6 +150,41 @@ static SIMPLE_DEV_PM_OPS(rtc_class_dev_pm_ops, rtc_suspend, rtc_resume);
 #define RTC_CLASS_DEV_PM_OPS	NULL
 #endif
 
+static struct rtc_device *rtc_allocate_device(void)
+{
+	struct rtc_device *rtc;
+
+	rtc = kzalloc(sizeof(*rtc), GFP_KERNEL);
+	if (!rtc)
+		return NULL;
+
+	device_initialize(&rtc->dev);
+
+	rtc->irq_freq = 1;
+	rtc->max_user_freq = 64;
+	rtc->dev.class = rtc_class;
+	rtc->dev.groups = rtc_get_dev_attribute_groups();
+	rtc->dev.release = rtc_device_release;
+
+	mutex_init(&rtc->ops_lock);
+	spin_lock_init(&rtc->irq_lock);
+	spin_lock_init(&rtc->irq_task_lock);
+	init_waitqueue_head(&rtc->irq_queue);
+
+	/* Init timerqueue */
+	timerqueue_init_head(&rtc->timerqueue);
+	INIT_WORK(&rtc->irqwork, rtc_timer_do_work);
+	/* Init aie timer */
+	rtc_timer_init(&rtc->aie_timer, rtc_aie_update_irq, (void *)rtc);
+	/* Init uie timer */
+	rtc_timer_init(&rtc->uie_rtctimer, rtc_uie_update_irq, (void *)rtc);
+	/* Init pie timer */
+	hrtimer_init(&rtc->pie_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtc->pie_timer.function = rtc_pie_update_irq;
+	rtc->pie_enabled = 0;
+
+	return rtc;
+}
 
 /**
  * rtc_device_register - register w/ RTC class
@@ -189,40 +224,16 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 		}
 	}
 
-	rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL);
-	if (rtc == NULL) {
+	rtc = rtc_allocate_device();
+	if (!rtc) {
 		err = -ENOMEM;
 		goto exit_ida;
 	}
 
-	device_initialize(&rtc->dev);
-
 	rtc->id = id;
 	rtc->ops = ops;
 	rtc->owner = owner;
-	rtc->irq_freq = 1;
-	rtc->max_user_freq = 64;
 	rtc->dev.parent = dev;
-	rtc->dev.class = rtc_class;
-	rtc->dev.groups = rtc_get_dev_attribute_groups();
-	rtc->dev.release = rtc_device_release;
-
-	mutex_init(&rtc->ops_lock);
-	spin_lock_init(&rtc->irq_lock);
-	spin_lock_init(&rtc->irq_task_lock);
-	init_waitqueue_head(&rtc->irq_queue);
-
-	/* Init timerqueue */
-	timerqueue_init_head(&rtc->timerqueue);
-	INIT_WORK(&rtc->irqwork, rtc_timer_do_work);
-	/* Init aie timer */
-	rtc_timer_init(&rtc->aie_timer, rtc_aie_update_irq, (void *)rtc);
-	/* Init uie timer */
-	rtc_timer_init(&rtc->uie_rtctimer, rtc_uie_update_irq, (void *)rtc);
-	/* Init pie timer */
-	hrtimer_init(&rtc->pie_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	rtc->pie_timer.function = rtc_pie_update_irq;
-	rtc->pie_enabled = 0;
 
 	dev_set_name(&rtc->dev, "rtc%d", id);
 
-- 
GitLab


From b91336df8ac2f5d15a2132074ba596580526db1d Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:41:59 +0200
Subject: [PATCH 0866/1958] rtc: class separate id allocation from registration

Create rtc_device_get_id to allocate the id for an RTC.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index fb39c1334d7b6..93ce88e1b1cb6 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -186,6 +186,27 @@ static struct rtc_device *rtc_allocate_device(void)
 	return rtc;
 }
 
+static int rtc_device_get_id(struct device *dev)
+{
+	int of_id = -1, id = -1;
+
+	if (dev->of_node)
+		of_id = of_alias_get_id(dev->of_node, "rtc");
+	else if (dev->parent && dev->parent->of_node)
+		of_id = of_alias_get_id(dev->parent->of_node, "rtc");
+
+	if (of_id >= 0) {
+		id = ida_simple_get(&rtc_ida, of_id, of_id + 1, GFP_KERNEL);
+		if (id < 0)
+			dev_warn(dev, "/aliases ID %d not available\n", of_id);
+	}
+
+	if (id < 0)
+		id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
+
+	return id;
+}
+
 /**
  * rtc_device_register - register w/ RTC class
  * @dev: the device to register
@@ -201,27 +222,12 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 {
 	struct rtc_device *rtc;
 	struct rtc_wkalrm alrm;
-	int of_id = -1, id = -1, err;
-
-	if (dev->of_node)
-		of_id = of_alias_get_id(dev->of_node, "rtc");
-	else if (dev->parent && dev->parent->of_node)
-		of_id = of_alias_get_id(dev->parent->of_node, "rtc");
-
-	if (of_id >= 0) {
-		id = ida_simple_get(&rtc_ida, of_id, of_id + 1,
-				    GFP_KERNEL);
-		if (id < 0)
-			dev_warn(dev, "/aliases ID %d not available\n",
-				    of_id);
-	}
+	int id, err;
 
+	id = rtc_device_get_id(dev);
 	if (id < 0) {
-		id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
-		if (id < 0) {
-			err = id;
-			goto exit;
-		}
+		err = id;
+		goto exit;
 	}
 
 	rtc = rtc_allocate_device();
-- 
GitLab


From 3068a254d5519cd5116f61297462da6d1aa84c20 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:00 +0200
Subject: [PATCH 0867/1958] rtc: introduce new registration method

Introduce rtc_register_device() to register an already allocated and
initialized struct rtc_device. It automatically sets up the owner and the
two steps allocation/registration will allow to remove race conditions in
the IRQ handling of some driver. It also allows to properly extend the core
without adding more arguments to rtc_device_register().

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/class.c | 84 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rtc.h |  7 ++++
 2 files changed, 91 insertions(+)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 93ce88e1b1cb6..58e2a05765bbc 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -150,6 +150,7 @@ static SIMPLE_DEV_PM_OPS(rtc_class_dev_pm_ops, rtc_suspend, rtc_resume);
 #define RTC_CLASS_DEV_PM_OPS	NULL
 #endif
 
+/* Ensure the caller will set the id before releasing the device */
 static struct rtc_device *rtc_allocate_device(void)
 {
 	struct rtc_device *rtc;
@@ -372,6 +373,89 @@ void devm_rtc_device_unregister(struct device *dev, struct rtc_device *rtc)
 }
 EXPORT_SYMBOL_GPL(devm_rtc_device_unregister);
 
+static void devm_rtc_release_device(struct device *dev, void *res)
+{
+	struct rtc_device *rtc = *(struct rtc_device **)res;
+
+	if (rtc->registered)
+		rtc_device_unregister(rtc);
+	else
+		put_device(&rtc->dev);
+}
+
+struct rtc_device *devm_rtc_allocate_device(struct device *dev)
+{
+	struct rtc_device **ptr, *rtc;
+	int id, err;
+
+	id = rtc_device_get_id(dev);
+	if (id < 0)
+		return ERR_PTR(id);
+
+	ptr = devres_alloc(devm_rtc_release_device, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr) {
+		err = -ENOMEM;
+		goto exit_ida;
+	}
+
+	rtc = rtc_allocate_device();
+	if (!rtc) {
+		err = -ENOMEM;
+		goto exit_devres;
+	}
+
+	*ptr = rtc;
+	devres_add(dev, ptr);
+
+	rtc->id = id;
+	rtc->dev.parent = dev;
+	dev_set_name(&rtc->dev, "rtc%d", id);
+
+	return rtc;
+
+exit_devres:
+	devres_free(ptr);
+exit_ida:
+	ida_simple_remove(&rtc_ida, id);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devm_rtc_allocate_device);
+
+int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
+{
+	struct rtc_wkalrm alrm;
+	int err;
+
+	if (!rtc->ops)
+		return -EINVAL;
+
+	rtc->owner = owner;
+
+	/* Check to see if there is an ALARM already set in hw */
+	err = __rtc_read_alarm(rtc, &alrm);
+	if (!err && !rtc_valid_tm(&alrm.time))
+		rtc_initialize_alarm(rtc, &alrm);
+
+	rtc_dev_prepare(rtc);
+
+	err = cdev_device_add(&rtc->char_dev, &rtc->dev);
+	if (err)
+		dev_warn(rtc->dev.parent, "failed to add char device %d:%d\n",
+			 MAJOR(rtc->dev.devt), rtc->id);
+	else
+		dev_dbg(rtc->dev.parent, "char device (%d:%d)\n",
+			MAJOR(rtc->dev.devt), rtc->id);
+
+	rtc_proc_add_device(rtc);
+
+	rtc->registered = true;
+	dev_info(rtc->dev.parent, "registered as %s\n",
+		 dev_name(&rtc->dev));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtc_register_device);
+
 static int __init rtc_init(void)
 {
 	rtc_class = class_create(THIS_MODULE, "rtc");
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index d354f56e0cf54..8e4a5f44f59ef 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -142,6 +142,8 @@ struct rtc_device {
 	/* Some hardware can't support UIE mode */
 	int uie_unsupported;
 
+	bool registered;
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	struct work_struct uie_task;
 	struct timer_list uie_timer;
@@ -163,6 +165,8 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev,
 					const char *name,
 					const struct rtc_class_ops *ops,
 					struct module *owner);
+struct rtc_device *devm_rtc_allocate_device(struct device *dev);
+int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
 extern void rtc_device_unregister(struct rtc_device *rtc);
 extern void devm_rtc_device_unregister(struct device *dev,
 					struct rtc_device *rtc);
@@ -218,6 +222,9 @@ static inline bool is_leap_year(unsigned int year)
 	return (!(year % 4) && (year % 100)) || !(year % 400);
 }
 
+#define rtc_register_device(device) \
+	__rtc_register_device(THIS_MODULE, device)
+
 #ifdef CONFIG_RTC_HCTOSYS_DEVICE
 extern int rtc_hctosys_ret;
 #else
-- 
GitLab


From 735ae2056b3c7296bb188d079c020f606a679c4a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:01 +0200
Subject: [PATCH 0868/1958] rtc: at91rm9200: remove race condition

While highly unlikely, it is possible to get an interrupt as soon as it is
requested. In that case, at91_rtc_interrupt() will be called with rtc ==
NULL.

Solve that by using devm_rtc_allocate_device/rtc_register_device.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-at91rm9200.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index b60fd477778f7..e221b78b6f106 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -409,6 +409,11 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+	platform_set_drvdata(pdev, rtc);
+
 	sclk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(sclk))
 		return PTR_ERR(sclk);
@@ -441,13 +446,10 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 	if (!device_can_wakeup(&pdev->dev))
 		device_init_wakeup(&pdev->dev, 1);
 
-	rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				&at91_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		ret = PTR_ERR(rtc);
+	rtc->ops = &at91_rtc_ops;
+	ret = rtc_register_device(rtc);
+	if (ret)
 		goto err_clk;
-	}
-	platform_set_drvdata(pdev, rtc);
 
 	/* enable SECEV interrupt in order to initialize at91_rtc_upd_rdy
 	 * completion.
-- 
GitLab


From 697e5a47aa12cdab6f2a8b284cc923cdf704eafc Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:02 +0200
Subject: [PATCH 0869/1958] rtc: add generic nvmem support

Many RTCs have an on board non volatile storage. It can be battery backed
RAM or an EEPROM. Use the nvmem subsystem to export it to both userspace
and in-kernel consumers.

This stays compatible with the previous (non documented) ABI that was using
/sys/class/rtc/rtcx/device/nvram to export that memory. But will warn about
the deprecation.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 Documentation/rtc.txt  |   2 +
 drivers/rtc/Kconfig    |   8 +++
 drivers/rtc/Makefile   |   1 +
 drivers/rtc/class.c    |   4 ++
 drivers/rtc/nvmem.c    | 113 +++++++++++++++++++++++++++++++++++++++++
 drivers/rtc/rtc-core.h |   8 +++
 include/linux/rtc.h    |   7 +++
 7 files changed, 143 insertions(+)
 create mode 100644 drivers/rtc/nvmem.c

diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 47feb4414b7e1..c0c977445fb9c 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -164,6 +164,8 @@ offset		 The amount which the rtc clock has been adjusted in firmware.
 		 which are added to or removed from the rtc's base clock per
 		 billion ticks. A positive value makes a day pass more slowly,
 		 longer, and a negative value makes a day pass more quickly.
+*/nvmem		 The non volatile storage exported as a raw file, as described
+		 in Documentation/nvmem/nvmem.txt
 ================ ==============================================================
 
 IOCTL interface
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index d0e4b4a1c2a1e..72419ac2c52a9 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -77,6 +77,14 @@ config RTC_DEBUG
 	  Say yes here to enable debugging support in the RTC framework
 	  and individual RTC drivers.
 
+config RTC_NVMEM
+	bool "RTC non volatile storage support"
+	select NVMEM
+	default RTC_CLASS
+	help
+	  Say yes here to add support for the non volatile (often battery
+	  backed) storage present on RTCs.
+
 comment "RTC interfaces"
 
 config RTC_INTF_SYSFS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 4050fc8b9271b..acd366b41c854 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -15,6 +15,7 @@ ifdef CONFIG_RTC_DRV_EFI
 rtc-core-y			+= rtc-efi-platform.o
 endif
 
+rtc-core-$(CONFIG_RTC_NVMEM)		+= nvmem.o
 rtc-core-$(CONFIG_RTC_INTF_DEV)		+= rtc-dev.o
 rtc-core-$(CONFIG_RTC_INTF_PROC)	+= rtc-proc.o
 rtc-core-$(CONFIG_RTC_INTF_SYSFS)	+= rtc-sysfs.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 58e2a05765bbc..2ed970d61da14 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -290,6 +290,8 @@ EXPORT_SYMBOL_GPL(rtc_device_register);
  */
 void rtc_device_unregister(struct rtc_device *rtc)
 {
+	rtc_nvmem_unregister(rtc);
+
 	mutex_lock(&rtc->ops_lock);
 	/*
 	 * Remove innards of this RTC, then disable it, before
@@ -448,6 +450,8 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
 
 	rtc_proc_add_device(rtc);
 
+	rtc_nvmem_register(rtc);
+
 	rtc->registered = true;
 	dev_info(rtc->dev.parent, "registered as %s\n",
 		 dev_name(&rtc->dev));
diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c
new file mode 100644
index 0000000000000..8567b4ed9ac62
--- /dev/null
+++ b/drivers/rtc/nvmem.c
@@ -0,0 +1,113 @@
+/*
+ * RTC subsystem, nvmem interface
+ *
+ * Copyright (C) 2017 Alexandre Belloni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/rtc.h>
+#include <linux/sysfs.h>
+
+#include "rtc-core.h"
+
+/*
+ * Deprecated ABI compatibility, this should be removed at some point
+ */
+
+static const char nvram_warning[] = "Deprecated ABI, please use nvmem";
+
+static ssize_t
+rtc_nvram_read(struct file *filp, struct kobject *kobj,
+	       struct bin_attribute *attr,
+	       char *buf, loff_t off, size_t count)
+{
+	struct rtc_device *rtc = attr->private;
+
+	dev_warn_once(kobj_to_dev(kobj), nvram_warning);
+
+	return nvmem_device_read(rtc->nvmem, off, count, buf);
+}
+
+static ssize_t
+rtc_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct rtc_device *rtc = attr->private;
+
+	dev_warn_once(kobj_to_dev(kobj), nvram_warning);
+
+	return nvmem_device_write(rtc->nvmem, off, count, buf);
+}
+
+static int rtc_nvram_register(struct rtc_device *rtc)
+{
+	int err;
+
+	rtc->nvram = devm_kzalloc(rtc->dev.parent,
+				sizeof(struct bin_attribute),
+				GFP_KERNEL);
+	if (!rtc->nvram)
+		return -ENOMEM;
+
+	rtc->nvram->attr.name = "nvram";
+	rtc->nvram->attr.mode = 0644;
+	rtc->nvram->private = rtc;
+
+	sysfs_bin_attr_init(rtc->nvram);
+
+	rtc->nvram->read = rtc_nvram_read;
+	rtc->nvram->write = rtc_nvram_write;
+	rtc->nvram->size = rtc->nvmem_config->size;
+
+	err = sysfs_create_bin_file(&rtc->dev.parent->kobj,
+				    rtc->nvram);
+	if (err) {
+		devm_kfree(rtc->dev.parent, rtc->nvram);
+		rtc->nvram = NULL;
+	}
+
+	return err;
+}
+
+static void rtc_nvram_unregister(struct rtc_device *rtc)
+{
+	sysfs_remove_bin_file(&rtc->dev.parent->kobj, rtc->nvram);
+}
+
+/*
+ * New ABI, uses nvmem
+ */
+void rtc_nvmem_register(struct rtc_device *rtc)
+{
+	if (!rtc->nvmem_config)
+		return;
+
+	rtc->nvmem_config->dev = &rtc->dev;
+	rtc->nvmem_config->owner = rtc->owner;
+	rtc->nvmem = nvmem_register(rtc->nvmem_config);
+	if (IS_ERR_OR_NULL(rtc->nvmem))
+		return;
+
+	/* Register the old ABI */
+	if (rtc->nvram_old_abi)
+		rtc_nvram_register(rtc);
+}
+
+void rtc_nvmem_unregister(struct rtc_device *rtc)
+{
+	if (IS_ERR_OR_NULL(rtc->nvmem))
+		return;
+
+	/* unregister the old ABI */
+	if (rtc->nvram)
+		rtc_nvram_unregister(rtc);
+
+	nvmem_unregister(rtc->nvmem);
+}
diff --git a/drivers/rtc/rtc-core.h b/drivers/rtc/rtc-core.h
index 7a4ed2f7c7d7d..ecab76a3207c0 100644
--- a/drivers/rtc/rtc-core.h
+++ b/drivers/rtc/rtc-core.h
@@ -45,3 +45,11 @@ static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
 	return NULL;
 }
 #endif
+
+#ifdef CONFIG_RTC_NVMEM
+void rtc_nvmem_register(struct rtc_device *rtc);
+void rtc_nvmem_unregister(struct rtc_device *rtc);
+#else
+static inline void rtc_nvmem_register(struct rtc_device *rtc) {}
+static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
+#endif
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 8e4a5f44f59ef..d53ecdc060cfd 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/nvmem-provider.h>
 #include <uapi/linux/rtc.h>
 
 extern int rtc_month_days(unsigned int month, unsigned int year);
@@ -144,6 +145,12 @@ struct rtc_device {
 
 	bool registered;
 
+	struct nvmem_config *nvmem_config;
+	struct nvmem_device *nvmem;
+	/* Old ABI support */
+	bool nvram_old_abi;
+	struct bin_attribute *nvram;
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	struct work_struct uie_task;
 	struct timer_list uie_timer;
-- 
GitLab


From 7133eca19505defed19b1337f46b2cd5f7608814 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:03 +0200
Subject: [PATCH 0870/1958] rtc: rv8803: switch to rtc_register_device

This removes a possible race condition and allows for further improvement
of the driver.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rv8803.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 9ad97ab298664..209edbb039715 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -577,6 +577,11 @@ static int rv8803_probe(struct i2c_client *client,
 	if (flags & RV8803_FLAG_AF)
 		dev_warn(&client->dev, "An alarm maybe have been missed.\n");
 
+	rv8803->rtc = devm_rtc_allocate_device(&client->dev);
+	if (IS_ERR(rv8803->rtc)) {
+		return PTR_ERR(rv8803->rtc);
+	}
+
 	if (client->irq > 0) {
 		err = devm_request_threaded_irq(&client->dev, client->irq,
 						NULL, rv8803_handle_irq,
@@ -592,12 +597,10 @@ static int rv8803_probe(struct i2c_client *client,
 		}
 	}
 
-	rv8803->rtc = devm_rtc_device_register(&client->dev, client->name,
-					       &rv8803_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rv8803->rtc)) {
-		dev_err(&client->dev, "unable to register the class device\n");
-		return PTR_ERR(rv8803->rtc);
-	}
+	rv8803->rtc->ops = &rv8803_rtc_ops;
+	err = rtc_register_device(rv8803->rtc);
+	if (err)
+		return err;
 
 	err = rv8803_write_reg(rv8803->client, RV8803_EXT, RV8803_EXT_WADA);
 	if (err)
-- 
GitLab


From 16d70a78b45a65c88a3c614ebadbf2589f6f8efe Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:04 +0200
Subject: [PATCH 0871/1958] rtc: rv8803: use generic nvmem support

Instead of adding a binary sysfs attribute from the driver (which suffers
from a race condition as the attribute appears after the device), use the
core to register an nvmem device.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rv8803.c | 51 ++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 31 deletions(-)

diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 209edbb039715..10474ab7887dd 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -68,6 +68,7 @@ struct rv8803_data {
 	struct mutex flags_lock;
 	u8 ctrl;
 	enum rv8803_type type;
+	struct nvmem_config nvmem_cfg;
 };
 
 static int rv8803_read_reg(const struct i2c_client *client, u8 reg)
@@ -460,48 +461,32 @@ static int rv8803_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 	}
 }
 
-static ssize_t rv8803_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *attr,
-				  char *buf, loff_t off, size_t count)
+static int rv8803_nvram_write(void *priv, unsigned int offset, void *val,
+			      size_t bytes)
 {
-	struct device *dev = kobj_to_dev(kobj);
-	struct i2c_client *client = to_i2c_client(dev);
 	int ret;
 
-	ret = rv8803_write_reg(client, RV8803_RAM, buf[0]);
+	ret = rv8803_write_reg(priv, RV8803_RAM, *(u8 *)val);
 	if (ret)
 		return ret;
 
-	return 1;
+	return 0;
 }
 
-static ssize_t rv8803_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *attr,
-				 char *buf, loff_t off, size_t count)
+static int rv8803_nvram_read(void *priv, unsigned int offset,
+			     void *val, size_t bytes)
 {
-	struct device *dev = kobj_to_dev(kobj);
-	struct i2c_client *client = to_i2c_client(dev);
 	int ret;
 
-	ret = rv8803_read_reg(client, RV8803_RAM);
+	ret = rv8803_read_reg(priv, RV8803_RAM);
 	if (ret < 0)
 		return ret;
 
-	buf[0] = ret;
+	*(u8 *)val = ret;
 
-	return 1;
+	return 0;
 }
 
-static struct bin_attribute rv8803_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = 1,
-	.read = rv8803_nvram_read,
-	.write = rv8803_nvram_write,
-};
-
 static struct rtc_class_ops rv8803_rtc_ops = {
 	.read_time = rv8803_get_time,
 	.set_time = rv8803_set_time,
@@ -597,7 +582,17 @@ static int rv8803_probe(struct i2c_client *client,
 		}
 	}
 
+	rv8803->nvmem_cfg.name = "rv8803_nvram",
+	rv8803->nvmem_cfg.word_size = 1,
+	rv8803->nvmem_cfg.stride = 1,
+	rv8803->nvmem_cfg.size = 1,
+	rv8803->nvmem_cfg.reg_read = rv8803_nvram_read,
+	rv8803->nvmem_cfg.reg_write = rv8803_nvram_write,
+	rv8803->nvmem_cfg.priv = client;
+
 	rv8803->rtc->ops = &rv8803_rtc_ops;
+	rv8803->rtc->nvmem_config = &rv8803->nvmem_cfg;
+	rv8803->rtc->nvram_old_abi = true;
 	err = rtc_register_device(rv8803->rtc);
 	if (err)
 		return err;
@@ -612,10 +607,6 @@ static int rv8803_probe(struct i2c_client *client,
 		return err;
 	}
 
-	err = device_create_bin_file(&client->dev, &rv8803_nvram_attr);
-	if (err)
-		return err;
-
 	rv8803->rtc->max_user_freq = 1;
 
 	return 0;
@@ -623,8 +614,6 @@ static int rv8803_probe(struct i2c_client *client,
 
 static int rv8803_remove(struct i2c_client *client)
 {
-	device_remove_bin_file(&client->dev, &rv8803_nvram_attr);
-
 	return 0;
 }
 
-- 
GitLab


From 2a52482ffb1a65249049eaec9e3560a1e79abf3d Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:05 +0200
Subject: [PATCH 0872/1958] rtc: rv8803: remove rv8803_remove

rv8803_remove() is now empty, remove it.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-rv8803.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 10474ab7887dd..aae2576741a61 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -612,11 +612,6 @@ static int rv8803_probe(struct i2c_client *client,
 	return 0;
 }
 
-static int rv8803_remove(struct i2c_client *client)
-{
-	return 0;
-}
-
 static const struct i2c_device_id rv8803_id[] = {
 	{ "rv8803", rv_8803 },
 	{ "rx8900", rx_8900 },
@@ -643,7 +638,6 @@ static struct i2c_driver rv8803_driver = {
 		.of_match_table = of_match_ptr(rv8803_of_match),
 	},
 	.probe		= rv8803_probe,
-	.remove		= rv8803_remove,
 	.id_table	= rv8803_id,
 };
 module_i2c_driver(rv8803_driver);
-- 
GitLab


From 69b119a642b7e143be08f9cbabca7bc910f5720d Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:06 +0200
Subject: [PATCH 0873/1958] rtc: ds1307: switch to rtc_register_device

This removes a possible race condition and crash and allows for further
improvement of the driver.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 57ea25400ecd4..6f94f20326fb9 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -1674,8 +1674,8 @@ static int ds1307_probe(struct i2c_client *client,
 		device_set_wakeup_capable(ds1307->dev, true);
 		set_bit(HAS_ALARM, &ds1307->flags);
 	}
-	ds1307->rtc = devm_rtc_device_register(ds1307->dev, ds1307->name,
-				rtc_ops, THIS_MODULE);
+
+	ds1307->rtc = devm_rtc_allocate_device(ds1307->dev);
 	if (IS_ERR(ds1307->rtc)) {
 		return PTR_ERR(ds1307->rtc);
 	}
@@ -1737,6 +1737,11 @@ static int ds1307_probe(struct i2c_client *client,
 		}
 	}
 
+	ds1307->rtc->ops = rtc_ops;
+	err = rtc_register_device(ds1307->rtc);
+	if (err)
+		return err;
+
 	ds1307_hwmon_register(ds1307);
 	ds1307_clks_register(ds1307);
 
-- 
GitLab


From abc925f72c666042bf49fbf590bed01e5dbc2c97 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:07 +0200
Subject: [PATCH 0874/1958] rtc: ds1307: use generic nvmem

Instead of adding a binary sysfs attribute from the driver (which suffers
from a race condition as the attribute appears after the device), use the
core to register an nvmem device.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 88 ++++++++++------------------------------
 1 file changed, 22 insertions(+), 66 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 6f94f20326fb9..76652566cc31f 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -118,7 +118,7 @@ struct ds1307 {
 	u8			offset; /* register's offset */
 	u8			regs[11];
 	u16			nvram_offset;
-	struct bin_attribute	*nvram;
+	struct nvmem_config	nvmem_cfg;
 	enum ds_type		type;
 	unsigned long		flags;
 #define HAS_NVRAM	0		/* bit 0 == sysfs file active */
@@ -895,43 +895,24 @@ static const struct rtc_class_ops mcp794xx_rtc_ops = {
 
 /*----------------------------------------------------------------------*/
 
-static ssize_t
-ds1307_nvram_read(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr,
-		char *buf, loff_t off, size_t count)
+static int ds1307_nvram_read(void *priv, unsigned int offset, void *val,
+			     size_t bytes)
 {
-	struct ds1307		*ds1307;
-	int			result;
-
-	ds1307 = dev_get_drvdata(kobj_to_dev(kobj));
+	struct ds1307 *ds1307 = priv;
 
-	result = regmap_bulk_read(ds1307->regmap, ds1307->nvram_offset + off,
-				  buf, count);
-	if (result)
-		dev_err(ds1307->dev, "%s error %d\n", "nvram read", result);
-	return result;
+	return regmap_bulk_read(ds1307->regmap, ds1307->nvram_offset + offset,
+				val, bytes);
 }
 
-static ssize_t
-ds1307_nvram_write(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr,
-		char *buf, loff_t off, size_t count)
+static int ds1307_nvram_write(void *priv, unsigned int offset, void *val,
+			      size_t bytes)
 {
-	struct ds1307		*ds1307;
-	int			result;
+	struct ds1307 *ds1307 = priv;
 
-	ds1307 = dev_get_drvdata(kobj_to_dev(kobj));
-
-	result = regmap_bulk_write(ds1307->regmap, ds1307->nvram_offset + off,
-				   buf, count);
-	if (result) {
-		dev_err(ds1307->dev, "%s error %d\n", "nvram write", result);
-		return result;
-	}
-	return count;
+	return regmap_bulk_write(ds1307->regmap, ds1307->nvram_offset + offset,
+				 val, bytes);
 }
 
-
 /*----------------------------------------------------------------------*/
 
 static u8 do_trickle_setup_ds1339(struct ds1307 *ds1307,
@@ -1704,37 +1685,17 @@ static int ds1307_probe(struct i2c_client *client,
 	}
 
 	if (chip->nvram_size) {
-
-		ds1307->nvram = devm_kzalloc(ds1307->dev,
-					sizeof(struct bin_attribute),
-					GFP_KERNEL);
-		if (!ds1307->nvram) {
-			dev_err(ds1307->dev,
-				"cannot allocate memory for nvram sysfs\n");
-		} else {
-
-			ds1307->nvram->attr.name = "nvram";
-			ds1307->nvram->attr.mode = S_IRUGO | S_IWUSR;
-
-			sysfs_bin_attr_init(ds1307->nvram);
-
-			ds1307->nvram->read = ds1307_nvram_read;
-			ds1307->nvram->write = ds1307_nvram_write;
-			ds1307->nvram->size = chip->nvram_size;
-			ds1307->nvram_offset = chip->nvram_offset;
-
-			err = sysfs_create_bin_file(&ds1307->dev->kobj,
-						    ds1307->nvram);
-			if (err) {
-				dev_err(ds1307->dev,
-					"unable to create sysfs file: %s\n",
-					ds1307->nvram->attr.name);
-			} else {
-				set_bit(HAS_NVRAM, &ds1307->flags);
-				dev_info(ds1307->dev, "%zu bytes nvram\n",
-					 ds1307->nvram->size);
-			}
-		}
+		ds1307->nvmem_cfg.name = "ds1307_nvram";
+		ds1307->nvmem_cfg.word_size = 1;
+		ds1307->nvmem_cfg.stride = 1;
+		ds1307->nvmem_cfg.size = chip->nvram_size;
+		ds1307->nvmem_cfg.reg_read = ds1307_nvram_read;
+		ds1307->nvmem_cfg.reg_write = ds1307_nvram_write;
+		ds1307->nvmem_cfg.priv = ds1307;
+		ds1307->nvram_offset = chip->nvram_offset;
+
+		ds1307->rtc->nvmem_config = &ds1307->nvmem_cfg;
+		ds1307->rtc->nvram_old_abi = true;
 	}
 
 	ds1307->rtc->ops = rtc_ops;
@@ -1753,11 +1714,6 @@ static int ds1307_probe(struct i2c_client *client,
 
 static int ds1307_remove(struct i2c_client *client)
 {
-	struct ds1307 *ds1307 = i2c_get_clientdata(client);
-
-	if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags))
-		sysfs_remove_bin_file(&ds1307->dev->kobj, ds1307->nvram);
-
 	return 0;
 }
 
-- 
GitLab


From bed8e280476fc8652f4105fe430e10fcf4032392 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 6 Jul 2017 11:42:08 +0200
Subject: [PATCH 0875/1958] rtc: ds1307: remove ds1307_remove

ds1307_remove() is now empty, remove it

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-ds1307.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 76652566cc31f..4fac49e55d473 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -1712,11 +1712,6 @@ static int ds1307_probe(struct i2c_client *client,
 	return err;
 }
 
-static int ds1307_remove(struct i2c_client *client)
-{
-	return 0;
-}
-
 static struct i2c_driver ds1307_driver = {
 	.driver = {
 		.name	= "rtc-ds1307",
@@ -1724,7 +1719,6 @@ static struct i2c_driver ds1307_driver = {
 		.acpi_match_table = ACPI_PTR(ds1307_acpi_ids),
 	},
 	.probe		= ds1307_probe,
-	.remove		= ds1307_remove,
 	.id_table	= ds1307_id,
 };
 
-- 
GitLab


From eaf260ac04d9b4cf9f458d5c97555bfff2da526e Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Thu, 6 Jul 2017 16:00:21 -0700
Subject: [PATCH 0876/1958] tracing: Treat recording comm for idle task as a
 success

Currently we stop recording comm for non-idle tasks when switching from/to idle
task since we treat that as a record failure. Fix that by treat recording of
comm for idle task as a success.

Link: http://lkml.kernel.org/r/20170706230023.17942-1-joelaf@google.com

Cc: kernel-team@android.com
Cc: Ingo Molnar <mingo@redhat.com>
Reported-by: Michael Sartain <mikesart@gmail.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f079a8ca11175..6722d86f2af51 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1916,7 +1916,11 @@ static int trace_save_cmdline(struct task_struct *tsk)
 {
 	unsigned pid, idx;
 
-	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+	/* treat recording of idle task as a success */
+	if (!tsk->pid)
+		return 1;
+
+	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
 		return 0;
 
 	/*
-- 
GitLab


From bd45d34d25720a820021c8ea45de5cd607eace64 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Thu, 6 Jul 2017 16:00:22 -0700
Subject: [PATCH 0877/1958] tracing: Treat recording tgid for idle task as a
 success

Currently we stop recording tgid for non-idle tasks when switching from/to idle
task since we treat that as a record failure. Fix that by treat recording of
tgid for idle task as a success.

Link: http://lkml.kernel.org/r/20170706230023.17942-2-joelaf@google.com

Cc: kernel-team@android.com
Cc: Ingo Molnar <mingo@redhat.com>
Reported-by: Michael Sartain <mikesart@gmail.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6722d86f2af51..aee11e3a394f3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2006,7 +2006,11 @@ int trace_find_tgid(int pid)
 
 static int trace_save_tgid(struct task_struct *tsk)
 {
-	if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
+	/* treat recording of idle task as a success */
+	if (!tsk->pid)
+		return 1;
+
+	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
 		return 0;
 
 	tgid_map[tsk->pid] = tsk->tgid;
-- 
GitLab


From 29b1a8ad7df4528b862a79e3d5fb0936f4d199c7 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Thu, 6 Jul 2017 16:00:23 -0700
Subject: [PATCH 0878/1958] tracing: Attempt to record other information even
 if some fail

In recent patches where we record comm and tgid at the same time, we skip
continuing to record if any fail. Fix that by trying to record as many things
as we can even if some couldn't be recorded. If any information isn't recorded,
then we don't set trace_taskinfo_save as before.

Link: http://lkml.kernel.org/r/20170706230023.17942-3-joelaf@google.com

Cc: kernel-team@android.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index aee11e3a394f3..92af8fd1429b5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2037,11 +2037,20 @@ static bool tracing_record_taskinfo_skip(int flags)
  */
 void tracing_record_taskinfo(struct task_struct *task, int flags)
 {
+	bool done;
+
 	if (tracing_record_taskinfo_skip(flags))
 		return;
-	if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
-		return;
-	if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
+
+	/*
+	 * Record as much task information as possible. If some fail, continue
+	 * to try to record the others.
+	 */
+	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
+	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
+
+	/* If recording any information failed, retry again soon. */
+	if (!done)
 		return;
 
 	__this_cpu_write(trace_taskinfo_save, false);
@@ -2058,15 +2067,22 @@ void tracing_record_taskinfo(struct task_struct *task, int flags)
 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
 					  struct task_struct *next, int flags)
 {
+	bool done;
+
 	if (tracing_record_taskinfo_skip(flags))
 		return;
 
-	if ((flags & TRACE_RECORD_CMDLINE) &&
-	    (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
-		return;
+	/*
+	 * Record as much task information as possible. If some fail, continue
+	 * to try to record the others.
+	 */
+	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
+	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
+	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
+	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
 
-	if ((flags & TRACE_RECORD_TGID) &&
-	    (!trace_save_tgid(prev) || !trace_save_tgid(next)))
+	/* If recording any information failed, retry again soon. */
+	if (!done)
 		return;
 
 	__this_cpu_write(trace_taskinfo_save, false);
-- 
GitLab


From 5535f800b0e1533e5f3a1428f6ef25eb29eccc0f Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 6 Jul 2017 20:31:11 +0200
Subject: [PATCH 0879/1958] KVM: use rcu access function for irq routing

irq routing is rcu protected. Use the proper access functions.
Found by sparse

virt/kvm/irqchip.c:233:13: warning: incorrect type in assignment (different address spaces)
virt/kvm/irqchip.c:233:13:    expected struct kvm_irq_routing_table *old
virt/kvm/irqchip.c:233:13:    got struct kvm_irq_routing_table [noderef] <asn:4>*irq_routing

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/irqchip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 31e40c9e81df4..b1286c4e07122 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -230,7 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	}
 
 	mutex_lock(&kvm->irq_lock);
-	old = kvm->irq_routing;
+	old = rcu_dereference_protected(kvm->irq_routing, 1);
 	rcu_assign_pointer(kvm->irq_routing, new);
 	kvm_irq_routing_update(kvm);
 	kvm_arch_irq_routing_update(kvm);
-- 
GitLab


From 4a12f95177280a660bda99e81838919b1cc6a91a Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 7 Jul 2017 10:51:38 +0200
Subject: [PATCH 0880/1958] KVM: mark kvm->busses as rcu protected

mark kvm->busses as rcu protected and use the correct access
function everywhere.

found by sparse
virt/kvm/kvm_main.c:3490:15: error: incompatible types in comparison expression (different address spaces)
virt/kvm/kvm_main.c:3509:15: error: incompatible types in comparison expression (different address spaces)
virt/kvm/kvm_main.c:3561:15: error: incompatible types in comparison expression (different address spaces)
virt/kvm/kvm_main.c:3644:15: error: incompatible types in comparison expression (different address spaces)

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/kvm_host.h |  8 +++++++-
 virt/kvm/eventfd.c       |  8 +++++---
 virt/kvm/kvm_main.c      | 17 ++++++++++-------
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bcd37b855c667..6a164f9eb02c1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -404,7 +404,7 @@ struct kvm {
 	int last_boosted_vcpu;
 	struct list_head vm_list;
 	struct mutex lock;
-	struct kvm_io_bus *buses[KVM_NR_BUSES];
+	struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 	struct {
 		spinlock_t        lock;
@@ -473,6 +473,12 @@ struct kvm {
 #define vcpu_err(vcpu, fmt, ...)					\
 	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
+static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
+{
+	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
+				      lockdep_is_held(&kvm->slots_lock));
+}
+
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 {
 	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a8d540398bbd0..d016aadd5fbb6 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
 	if (ret < 0)
 		goto unlock_fail;
 
-	kvm->buses[bus_idx]->ioeventfd_count++;
+	kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
 	list_add_tail(&p->list, &kvm->ioeventfds);
 
 	mutex_unlock(&kvm->slots_lock);
@@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 {
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
+	struct kvm_io_bus	 *bus;
 	int                       ret = -ENOENT;
 
 	eventfd = eventfd_ctx_fdget(args->fd);
@@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 			continue;
 
 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-		if (kvm->buses[bus_idx])
-			kvm->buses[bus_idx]->ioeventfd_count--;
+		bus = kvm_get_bus(kvm, bus_idx);
+		if (bus)
+			bus->ioeventfd_count--;
 		ioeventfd_release(p);
 		ret = 0;
 		break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc2d58312fd52..d76e822f89291 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -679,8 +679,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (init_srcu_struct(&kvm->irq_srcu))
 		goto out_err_no_irq_srcu;
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
-					GFP_KERNEL);
+		rcu_assign_pointer(kvm->buses[i],
+			kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
 		if (!kvm->buses[i])
 			goto out_err;
 	}
@@ -705,7 +705,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	hardware_disable_all();
 out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
-		kfree(kvm->buses[i]);
+		kfree(rcu_access_pointer(kvm->buses[i]));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
 		kvm_free_memslots(kvm, kvm->memslots[i]);
 	kvm_arch_free_vm(kvm);
@@ -740,8 +740,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		if (kvm->buses[i])
-			kvm_io_bus_destroy(kvm->buses[i]);
+		struct kvm_io_bus *bus;
+
+		bus = rcu_dereference_protected(kvm->buses[i], 1);
+		if (bus)
+			kvm_io_bus_destroy(bus);
 		kvm->buses[i] = NULL;
 	}
 	kvm_coalesced_mmio_free(kvm);
@@ -3570,7 +3573,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 {
 	struct kvm_io_bus *new_bus, *bus;
 
-	bus = kvm->buses[bus_idx];
+	bus = kvm_get_bus(kvm, bus_idx);
 	if (!bus)
 		return -ENOMEM;
 
@@ -3599,7 +3602,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	int i;
 	struct kvm_io_bus *new_bus, *bus;
 
-	bus = kvm->buses[bus_idx];
+	bus = kvm_get_bus(kvm, bus_idx);
 	if (!bus)
 		return;
 
-- 
GitLab


From a80cf7b5f4149753d5f19c872a47e66195b167d4 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 6 Jul 2017 16:17:14 +0200
Subject: [PATCH 0881/1958] KVM: mark memory slots as rcu

we access the memslots array via srcu. Mark it as such and
use the right access functions also for the freeing of
memory slots.

Found by sparse:
./include/linux/kvm_host.h:565:16: error: incompatible types in
comparison expression (different address spaces)

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 virt/kvm/kvm_main.c      | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6a164f9eb02c1..b3ca77a96b2d7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -390,7 +390,7 @@ struct kvm {
 	spinlock_t mmu_lock;
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
-	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
+	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 
 	/*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d76e822f89291..6e6d4edf0e923 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -707,7 +707,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(rcu_access_pointer(kvm->buses[i]));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm, kvm->memslots[i]);
+		kvm_free_memslots(kvm,
+			rcu_dereference_protected(kvm->memslots[i], 1));
 	kvm_arch_free_vm(kvm);
 	mmdrop(current->mm);
 	return ERR_PTR(r);
@@ -756,7 +757,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm, kvm->memslots[i]);
+		kvm_free_memslots(kvm,
+			rcu_dereference_protected(kvm->memslots[i], 1));
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
-- 
GitLab


From 20e2b791796bd68816fa115f12be5320de2b8021 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 6 Jul 2017 12:34:40 +0200
Subject: [PATCH 0882/1958] ALSA: msnd: Optimize / harden DSP and MIDI loops

The ISA msnd drivers have loops fetching the ring-buffer head, tail
and size values inside the loops.  Such codes are inefficient and
fragile.

This patch optimizes it, and also adds the sanity check to avoid the
endless loops.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196131
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196133
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/isa/msnd/msnd_midi.c     | 30 +++++++++++++++---------------
 sound/isa/msnd/msnd_pinnacle.c | 23 ++++++++++++-----------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/sound/isa/msnd/msnd_midi.c b/sound/isa/msnd/msnd_midi.c
index 912b5a9ccbab5..013d8d1170fe0 100644
--- a/sound/isa/msnd/msnd_midi.c
+++ b/sound/isa/msnd/msnd_midi.c
@@ -120,24 +120,24 @@ void snd_msndmidi_input_read(void *mpuv)
 	unsigned long flags;
 	struct snd_msndmidi *mpu = mpuv;
 	void *pwMIDQData = mpu->dev->mappedbase + MIDQ_DATA_BUFF;
+	u16 head, tail, size;
 
 	spin_lock_irqsave(&mpu->input_lock, flags);
-	while (readw(mpu->dev->MIDQ + JQS_wTail) !=
-	       readw(mpu->dev->MIDQ + JQS_wHead)) {
-		u16 wTmp, val;
-		val = readw(pwMIDQData + 2 * readw(mpu->dev->MIDQ + JQS_wHead));
-
-			if (test_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER,
-				     &mpu->mode))
-				snd_rawmidi_receive(mpu->substream_input,
-						    (unsigned char *)&val, 1);
-
-		wTmp = readw(mpu->dev->MIDQ + JQS_wHead) + 1;
-		if (wTmp > readw(mpu->dev->MIDQ + JQS_wSize))
-			writew(0,  mpu->dev->MIDQ + JQS_wHead);
-		else
-			writew(wTmp,  mpu->dev->MIDQ + JQS_wHead);
+	head = readw(mpu->dev->MIDQ + JQS_wHead);
+	tail = readw(mpu->dev->MIDQ + JQS_wTail);
+	size = readw(mpu->dev->MIDQ + JQS_wSize);
+	if (head > size || tail > size)
+		goto out;
+	while (head != tail) {
+		unsigned char val = readw(pwMIDQData + 2 * head);
+
+		if (test_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER, &mpu->mode))
+			snd_rawmidi_receive(mpu->substream_input, &val, 1);
+		if (++head > size)
+			head = 0;
+		writew(head, mpu->dev->MIDQ + JQS_wHead);
 	}
+ out:
 	spin_unlock_irqrestore(&mpu->input_lock, flags);
 }
 EXPORT_SYMBOL(snd_msndmidi_input_read);
diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c
index ad4897337df57..fc4fb1904aef8 100644
--- a/sound/isa/msnd/msnd_pinnacle.c
+++ b/sound/isa/msnd/msnd_pinnacle.c
@@ -170,23 +170,24 @@ static irqreturn_t snd_msnd_interrupt(int irq, void *dev_id)
 {
 	struct snd_msnd *chip = dev_id;
 	void *pwDSPQData = chip->mappedbase + DSPQ_DATA_BUFF;
+	u16 head, tail, size;
 
 	/* Send ack to DSP */
 	/* inb(chip->io + HP_RXL); */
 
 	/* Evaluate queued DSP messages */
-	while (readw(chip->DSPQ + JQS_wTail) != readw(chip->DSPQ + JQS_wHead)) {
-		u16 wTmp;
-
-		snd_msnd_eval_dsp_msg(chip,
-			readw(pwDSPQData + 2 * readw(chip->DSPQ + JQS_wHead)));
-
-		wTmp = readw(chip->DSPQ + JQS_wHead) + 1;
-		if (wTmp > readw(chip->DSPQ + JQS_wSize))
-			writew(0, chip->DSPQ + JQS_wHead);
-		else
-			writew(wTmp, chip->DSPQ + JQS_wHead);
+	head = readw(chip->DSPQ + JQS_wHead);
+	tail = readw(chip->DSPQ + JQS_wTail);
+	size = readw(chip->DSPQ + JQS_wSize);
+	if (head > size || tail > size)
+		goto out;
+	while (head != tail) {
+		snd_msnd_eval_dsp_msg(chip, readw(pwDSPQData + 2 * head));
+		if (++head > size)
+			head = 0;
+		writew(head, chip->DSPQ + JQS_wHead);
 	}
+ out:
 	/* Send ack to DSP */
 	inb(chip->io + HP_RXL);
 	return IRQ_HANDLED;
-- 
GitLab


From 564d8a2cf3abf16575af48bdc3e86e92ee8a617d Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Fri, 7 Jul 2017 04:57:04 +0200
Subject: [PATCH 0883/1958] drm/radeon: Fix eDP for single-display iMac10,1
 (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The late 2009, 27 inch Apple iMac10,1 has an
internal eDP display and an external Mini-
Displayport output, driven by a DCE-3.2, RV730
Radeon Mobility HD-4670.

The machine worked fine in a dual-display setup
with eDP panel + externally connected HDMI
or DVI-D digital display sink, connected via
MiniDP to DVI or HDMI adapter.

However, booting the machine single-display with
only eDP panel results in a completely black
display - even backlight powering off, as soon as
the radeon modesetting driver loads.

This patch fixes the single dispay eDP case by
assigning encoders based on dig->linkb, similar
to DCE-4+. While this should not be generally
necessary (Alex: "...atom on normal boards
should be able to handle any mapping."), Apple
seems to use some special routing here.

One remaining problem not solved by this patch
is that an external Minidisplayport->DP sink
does still not work on iMac10,1, whereas external
DVI and HDMI sinks continue to work.

The problem affects at least all tested kernels
since Linux 3.13 - didn't test earlier kernels, so
backporting to stable probably makes sense.

v2: With the original patch from 2016, Alex was worried it
    will break other DCE3.2 systems. Use dmi_match() to
    apply this special encoder assignment only for the
    Apple iMac 10,1 from late 2009.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Michel Dänzer <michel.daenzer@amd.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/atombios_encoders.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index fa4f8f008e4dc..e67ed383e11b1 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -31,6 +31,7 @@
 #include "radeon_asic.h"
 #include "atom.h"
 #include <linux/backlight.h>
+#include <linux/dmi.h>
 
 extern int atom_debug;
 
@@ -2184,9 +2185,17 @@ int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder, int fe_idx)
 		goto assigned;
 	}
 
-	/* on DCE32 and encoder can driver any block so just crtc id */
+	/*
+	 * On DCE32 any encoder can drive any block so usually just use crtc id,
+	 * but Apple thinks different at least on iMac10,1, so there use linkb,
+	 * otherwise the internal eDP panel will stay dark.
+	 */
 	if (ASIC_IS_DCE32(rdev)) {
-		enc_idx = radeon_crtc->crtc_id;
+		if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1"))
+			enc_idx = (dig->linkb) ? 1 : 0;
+		else
+			enc_idx = radeon_crtc->crtc_id;
+
 		goto assigned;
 	}
 
-- 
GitLab


From 84eea8c79090c44564877cd47c73455e32ec4846 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 16 May 2017 08:55:34 +0800
Subject: [PATCH 0884/1958] ceph: re-request max size after importing caps

The 'wanted max size' could be sent to inode's old auth mds, re-send
it to inode's new auth mds if necessary. Otherwise write syscall may
hang.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a3ebb632294e7..6752223dc81cf 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3027,8 +3027,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 					le32_to_cpu(grant->truncate_seq),
 					le64_to_cpu(grant->truncate_size),
 					size);
-		/* max size increase? */
-		if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+	}
+
+	if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) {
+		if (max_size != ci->i_max_size) {
 			dout("max_size %lld -> %llu\n",
 			     ci->i_max_size, max_size);
 			ci->i_max_size = max_size;
@@ -3037,6 +3039,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 				ci->i_requested_max_size = 0;
 			}
 			wake = true;
+		} else if (ci->i_wanted_max_size > ci->i_max_size &&
+			   ci->i_wanted_max_size > ci->i_requested_max_size) {
+			/* CEPH_CAP_OP_IMPORT */
+			wake = true;
 		}
 	}
 
@@ -3554,7 +3560,6 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 	}
 
 	/* make sure we re-request max_size, if necessary */
-	ci->i_wanted_max_size = 0;
 	ci->i_requested_max_size = 0;
 
 	*old_issued = issued;
-- 
GitLab


From efb0ca765ac6f4985b57ef215e8d55e746b083f4 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 22 May 2017 12:03:32 +0800
Subject: [PATCH 0885/1958] ceph: update the 'approaching max_size' code

The old 'approaching max_size' code expects MDS set max_size to
'2 * reported_size'. This is no longer true. The new code reports
file size when half of previous max_size increment has been used.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c  |  2 +-
 fs/ceph/caps.c  | 18 ++++++++++++++++--
 fs/ceph/file.c  |  2 +-
 fs/ceph/inode.c |  9 +++------
 fs/ceph/super.h |  3 ++-
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1e71e6ca5ddfb..82220e9aaf7de 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1318,7 +1318,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 			  struct page *page, void *fsdata)
 {
 	struct inode *inode = file_inode(file);
-	int check_cap = 0;
+	bool check_cap = false;
 
 	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
 	     inode, page, (int)pos, (int)copied, (int)len);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6752223dc81cf..f5552455223fa 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1653,6 +1653,21 @@ static int try_nonblocking_invalidate(struct inode *inode)
 	return -1;
 }
 
+bool __ceph_should_report_size(struct ceph_inode_info *ci)
+{
+	loff_t size = ci->vfs_inode.i_size;
+	/* mds will adjust max size according to the reported size */
+	if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
+		return false;
+	if (size >= ci->i_max_size)
+		return true;
+	/* half of previous max_size increment has been used */
+	if (ci->i_max_size > ci->i_reported_size &&
+	    (size << 1) >= ci->i_max_size + ci->i_reported_size)
+		return true;
+	return false;
+}
+
 /*
  * Swiss army knife function to examine currently used and wanted
  * versus held caps.  Release, flush, ack revoked caps to mds as
@@ -1806,8 +1821,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 			}
 
 			/* approaching file_max? */
-			if ((inode->i_size << 1) >= ci->i_max_size &&
-			    (ci->i_reported_size << 1) < ci->i_max_size) {
+			if (__ceph_should_report_size(ci)) {
 				dout("i_size approaching max_size\n");
 				goto ack;
 			}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 29308a80d66ff..3d48c415f3cb1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1040,8 +1040,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 	int num_pages;
 	int written = 0;
 	int flags;
-	int check_caps = 0;
 	int ret;
+	bool check_caps = false;
 	struct timespec mtime = current_time(inode);
 	size_t count = iov_iter_count(from);
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4de6cdddf0592..53f23c920266d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1653,20 +1653,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	return err;
 }
 
-int ceph_inode_set_size(struct inode *inode, loff_t size)
+bool ceph_inode_set_size(struct inode *inode, loff_t size)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	int ret = 0;
+	bool ret;
 
 	spin_lock(&ci->i_ceph_lock);
 	dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
 	i_size_write(inode, size);
 	inode->i_blocks = calc_inode_blocks(size);
 
-	/* tell the MDS if we are approaching max_size */
-	if ((size << 1) >= ci->i_max_size &&
-	    (ci->i_reported_size << 1) < ci->i_max_size)
-		ret = 1;
+	ret = __ceph_should_report_size(ci);
 
 	spin_unlock(&ci->i_ceph_lock);
 	return ret;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a973acd8beaff..f8a0aba0d938e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -793,7 +793,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 
 extern int ceph_inode_holds_cap(struct inode *inode, int mask);
 
-extern int ceph_inode_set_size(struct inode *inode, loff_t size);
+extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
 extern void __ceph_do_pending_vmtruncate(struct inode *inode);
 extern void ceph_queue_vmtruncate(struct inode *inode);
 
@@ -918,6 +918,7 @@ extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				       struct ceph_snap_context *snapc);
 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
 			     struct ceph_mds_session **psession);
+extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
 extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 			    struct ceph_mds_session *session);
 extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
-- 
GitLab


From f2b0c45f09796f87723a1225c919035457f72b7a Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 23 May 2017 17:03:12 +0800
Subject: [PATCH 0886/1958] ceph: remove useless page->mapping check in
 writepage_nounlock()

Callers of writepage_nounlock() have already ensured non-null
page->mapping.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 82220e9aaf7de..96f83a417944c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -534,10 +534,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 
 	dout("writepage %p idx %lu\n", page, page->index);
 
-	if (!page->mapping || !page->mapping->host) {
-		dout("writepage %p - no mapping\n", page);
-		return -EFAULT;
-	}
 	inode = page->mapping->host;
 	ci = ceph_inode(inode);
 	fsc = ceph_inode_to_client(inode);
-- 
GitLab


From fa71fefb308532eb5b2e4b38d914d19fc836f73e Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 23 May 2017 17:18:53 +0800
Subject: [PATCH 0887/1958] ceph: redirty page when writepage_nounlock() skips
 unwritable page

Ceph needs to flush dirty page in the order in which in which snap
context they belong to. Dirty pages belong to older snap context
should be flushed earlier. if writepage_nounlock() can not flush a
page, it should redirty the page.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 96f83a417944c..8fde3b59e3a55 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -551,8 +551,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		dout("writepage %p page %p snapc %p not writeable - noop\n",
 		     inode, page, snapc);
 		/* we should only noop if called by kswapd */
-		WARN_ON((current->flags & PF_MEMALLOC) == 0);
+		WARN_ON(!(current->flags & PF_MEMALLOC));
 		ceph_put_snap_context(oldest);
+		redirty_page_for_writepage(wbc, page);
 		goto out;
 	}
 	ceph_put_snap_context(oldest);
-- 
GitLab


From 439868812aac01ec5d1b133a51e768280f3fc8d5 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 23 May 2017 17:48:28 +0800
Subject: [PATCH 0888/1958] ceph: cleanup writepage_nounlock()

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8fde3b59e3a55..50836280a6f8a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -530,7 +530,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	long writeback_stat;
 	u64 truncate_size;
 	u32 truncate_seq;
-	int err = 0, len = PAGE_SIZE;
+	int err, len = PAGE_SIZE;
 
 	dout("writepage %p idx %lu\n", page, page->index);
 
@@ -543,7 +543,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	snapc = page_snap_context(page);
 	if (snapc == NULL) {
 		dout("writepage %p page %p not dirty?\n", inode, page);
-		goto out;
+		return 0;
 	}
 	oldest = get_oldest_context(inode, &snap_size,
 				    &truncate_size, &truncate_seq);
@@ -554,7 +554,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		WARN_ON(!(current->flags & PF_MEMALLOC));
 		ceph_put_snap_context(oldest);
 		redirty_page_for_writepage(wbc, page);
-		goto out;
+		return 0;
 	}
 	ceph_put_snap_context(oldest);
 
@@ -564,8 +564,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	/* is this a partial page at end of file? */
 	if (page_off >= snap_size) {
 		dout("%p page eof %llu\n", page, snap_size);
-		goto out;
+		return 0;
 	}
+
 	if (snap_size < page_off + len)
 		len = snap_size - page_off;
 
@@ -592,7 +593,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 			dout("writepage interrupted page %p\n", page);
 			redirty_page_for_writepage(wbc, page);
 			end_page_writeback(page);
-			goto out;
+			return err;
 		}
 		dout("writepage setting page/mapping error %d %p\n",
 		     err, page);
@@ -608,7 +609,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	end_page_writeback(page);
 	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
 	ceph_put_snap_context(snapc);  /* page's reference */
-out:
 	return err;
 }
 
-- 
GitLab


From 92e57e6287cc7402487edf3eb09c8a7b36dad63f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 5 Jun 2017 11:07:28 +0800
Subject: [PATCH 0889/1958] ceph: don't re-send interrupted flock request

Don't re-send interrupted flock request in cases of mds failover
and receiving request forward. Because corresponding 'lock intr'
request may have been finished, it won't get re-sent.

Link: http://tracker.ceph.com/issues/20170
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/locks.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 6806dbeaee19e..64ae744720469 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -127,6 +127,29 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
 	dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
 	     req->r_tid);
 
+	mutex_lock(&mdsc->mutex);
+	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
+		err = 0;
+	} else {
+		/*
+		 * ensure we aren't running concurrently with
+		 * ceph_fill_trace or ceph_readdir_prepopulate, which
+		 * rely on locks (dir mutex) held by our caller.
+		 */
+		mutex_lock(&req->r_fill_mutex);
+		req->r_err = err;
+		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+		mutex_unlock(&req->r_fill_mutex);
+
+		if (!req->r_session) {
+			// haven't sent the request
+			err = 0;
+		}
+	}
+	mutex_unlock(&mdsc->mutex);
+	if (!err)
+		return 0;
+
 	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
 					    USE_AUTH_MDS);
 	if (IS_ERR(intr_req))
@@ -146,7 +169,7 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
 	if (err && err != -ERESTARTSYS)
 		return err;
 
-	wait_for_completion(&req->r_completion);
+	wait_for_completion_killable(&req->r_safe_completion);
 	return 0;
 }
 
-- 
GitLab


From 1684dd03e9f59212775cafa50ea77b9ef5b263db Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Wed, 14 Jun 2017 15:54:56 +0800
Subject: [PATCH 0890/1958] ceph: getattr before read on ceph.* xattrs

Previously we were returning values for quota, layout
xattrs without any kind of update -- the user just got
whatever happened to be in our cache.

Clearly this extra round trip has a cost, but reads of
these xattrs are fairly rare, happening on admin
intervention rather than in normal operation.

Link: http://tracker.ceph.com/issues/17939
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/xattr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 75267cdd5dfd8..11263f102e4c9 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -756,6 +756,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 	/* let's see if a virtual xattr was requested */
 	vxattr = ceph_match_vxattr(inode, name);
 	if (vxattr) {
+		err = ceph_do_getattr(inode, 0, true);
+		if (err)
+			return err;
 		err = -ENODATA;
 		if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
 			err = vxattr->getxattr_cb(ci, value, size);
-- 
GitLab


From 62a65f36d016fff32179acdbfcb8b2d8d9e54757 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 22 Jun 2017 16:26:34 +0800
Subject: [PATCH 0891/1958] ceph: avoid invalid memory dereference in the
 middle of umount

extra_mon_dispatch() and debugfs' foo_show functions dereference
fsc->mdsc. we should clean up fsc->client->extra_mon_dispatch
and debugfs before destroying fsc->mds.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 4 ++--
 fs/ceph/super.c      | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0c05df44cc6c8..666a9f2748321 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3769,13 +3769,13 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
 {
 	struct ceph_mds_client *mdsc = fsc->mdsc;
-
 	dout("mdsc_destroy %p\n", mdsc);
-	ceph_mdsc_stop(mdsc);
 
 	/* flush out any connection work with references to us */
 	ceph_msgr_flush();
 
+	ceph_mdsc_stop(mdsc);
+
 	fsc->mdsc = NULL;
 	kfree(mdsc);
 	dout("mdsc_destroy %p done\n", mdsc);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8d7918ce694a9..14e78dd52ef9c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -636,8 +636,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 
 	destroy_mount_options(fsc->mount_options);
 
-	ceph_fs_debugfs_cleanup(fsc);
-
 	ceph_destroy_client(fsc->client);
 
 	kfree(fsc);
@@ -1040,6 +1038,10 @@ static void ceph_kill_sb(struct super_block *s)
 
 	ceph_mdsc_pre_umount(fsc->mdsc);
 	generic_shutdown_super(s);
+
+	fsc->client->extra_mon_dispatch = NULL;
+	ceph_fs_debugfs_cleanup(fsc);
+
 	ceph_mdsc_destroy(fsc);
 
 	destroy_fs_client(fsc);
-- 
GitLab


From 4b9f2042fd2a9da7e6c7b4dd49eff19dc3754e4f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 27 Jun 2017 17:17:24 +0800
Subject: [PATCH 0892/1958] ceph: avoid accessing freeing inode in
 ceph_check_delayed_caps()

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f5552455223fa..7007ae2a5ad2e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3809,6 +3809,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
  */
 void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 {
+	struct inode *inode;
 	struct ceph_inode_info *ci;
 	int flags = CHECK_CAPS_NODELAY;
 
@@ -3824,9 +3825,15 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 		    time_before(jiffies, ci->i_hold_caps_max))
 			break;
 		list_del_init(&ci->i_cap_delay_list);
+
+		inode = igrab(&ci->vfs_inode);
 		spin_unlock(&mdsc->cap_delay_lock);
-		dout("check_delayed_caps on %p\n", &ci->vfs_inode);
-		ceph_check_caps(ci, flags, NULL);
+
+		if (inode) {
+			dout("check_delayed_caps on %p\n", inode);
+			ceph_check_caps(ci, flags, NULL);
+			iput(inode);
+		}
 	}
 	spin_unlock(&mdsc->cap_delay_lock);
 }
-- 
GitLab


From 1d8f83604c4244d93c5a49f5107624769df6248f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 27 Jun 2017 11:57:56 +0800
Subject: [PATCH 0893/1958] ceph: new mount option that specifies fscache
 uniquifier

Current ceph uses FSID as primary index key of fscache data. This
allows ceph to retain cached data across remount. But this causes
problem (kernel opps, fscache does not support sharing data) when
a filesystem get mounted several times (with fscache enabled, with
different mount options).

The fix is adding a new mount option, which specifies uniquifier
for fscache.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/cache.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-----
 fs/ceph/super.c | 41 +++++++++++++++-------
 fs/ceph/super.h |  1 +
 3 files changed, 113 insertions(+), 21 deletions(-)

diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 4e7421caf3804..fd1172823f862 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -35,18 +35,34 @@ struct fscache_netfs ceph_cache_netfs = {
 	.version	= 0,
 };
 
+static DEFINE_MUTEX(ceph_fscache_lock);
+static LIST_HEAD(ceph_fscache_list);
+
+struct ceph_fscache_entry {
+	struct list_head list;
+	struct fscache_cookie *fscache;
+	struct ceph_fsid fsid;
+	size_t uniq_len;
+	char uniquifier[0];
+};
+
 static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
 					     void *buffer, uint16_t maxbuf)
 {
 	const struct ceph_fs_client* fsc = cookie_netfs_data;
-	uint16_t klen;
+	const char *fscache_uniq = fsc->mount_options->fscache_uniq;
+	uint16_t fsid_len, uniq_len;
 
-	klen = sizeof(fsc->client->fsid);
-	if (klen > maxbuf)
+	fsid_len = sizeof(fsc->client->fsid);
+	uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
+	if (fsid_len + uniq_len > maxbuf)
 		return 0;
 
-	memcpy(buffer, &fsc->client->fsid, klen);
-	return klen;
+	memcpy(buffer, &fsc->client->fsid, fsid_len);
+	if (uniq_len)
+		memcpy(buffer + fsid_len, fscache_uniq, uniq_len);
+
+	return fsid_len + uniq_len;
 }
 
 static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
@@ -67,13 +83,54 @@ void ceph_fscache_unregister(void)
 
 int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
 {
+	const struct ceph_fsid *fsid = &fsc->client->fsid;
+	const char *fscache_uniq = fsc->mount_options->fscache_uniq;
+	size_t uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
+	struct ceph_fscache_entry *ent;
+	int err = 0;
+
+	mutex_lock(&ceph_fscache_lock);
+	list_for_each_entry(ent, &ceph_fscache_list, list) {
+		if (memcmp(&ent->fsid, fsid, sizeof(*fsid)))
+			continue;
+		if (ent->uniq_len != uniq_len)
+			continue;
+		if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len))
+			continue;
+
+		pr_err("fscache cookie already registered for fsid %pU\n", fsid);
+		pr_err("  use fsc=%%s mount option to specify a uniquifier\n");
+		err = -EBUSY;
+		goto out_unlock;
+	}
+
+	ent = kzalloc(sizeof(*ent) + uniq_len, GFP_KERNEL);
+	if (!ent) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
 	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
 					      &ceph_fscache_fsid_object_def,
 					      fsc, true);
-	if (!fsc->fscache)
-		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
 
-	return 0;
+	if (fsc->fscache) {
+		memcpy(&ent->fsid, fsid, sizeof(*fsid));
+		if (uniq_len > 0) {
+			memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
+			ent->uniq_len = uniq_len;
+		}
+		ent->fscache = fsc->fscache;
+		list_add_tail(&ent->list, &ceph_fscache_list);
+	} else {
+		kfree(ent);
+		pr_err("unable to register fscache cookie for fsid %pU\n",
+		       fsid);
+		/* all other fs ignore this error */
+	}
+out_unlock:
+	mutex_unlock(&ceph_fscache_lock);
+	return err;
 }
 
 static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
@@ -349,7 +406,24 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
 
 void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
 {
-	fscache_relinquish_cookie(fsc->fscache, 0);
+	if (fscache_cookie_valid(fsc->fscache)) {
+		struct ceph_fscache_entry *ent;
+		bool found = false;
+
+		mutex_lock(&ceph_fscache_lock);
+		list_for_each_entry(ent, &ceph_fscache_list, list) {
+			if (ent->fscache == fsc->fscache) {
+				list_del(&ent->list);
+				kfree(ent);
+				found = true;
+				break;
+			}
+		}
+		WARN_ON_ONCE(!found);
+		mutex_unlock(&ceph_fscache_lock);
+
+		__fscache_relinquish_cookie(fsc->fscache, 0);
+	}
 	fsc->fscache = NULL;
 }
 
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 14e78dd52ef9c..aa06a8c24792c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -121,6 +121,7 @@ enum {
 	/* int args above */
 	Opt_snapdirname,
 	Opt_mds_namespace,
+	Opt_fscache_uniq,
 	Opt_last_string,
 	/* string args above */
 	Opt_dirstat,
@@ -158,6 +159,7 @@ static match_table_t fsopt_tokens = {
 	/* int args above */
 	{Opt_snapdirname, "snapdirname=%s"},
 	{Opt_mds_namespace, "mds_namespace=%s"},
+	{Opt_fscache_uniq, "fsc=%s"},
 	/* string args above */
 	{Opt_dirstat, "dirstat"},
 	{Opt_nodirstat, "nodirstat"},
@@ -223,6 +225,14 @@ static int parse_fsopt_token(char *c, void *private)
 		if (!fsopt->mds_namespace)
 			return -ENOMEM;
 		break;
+	case Opt_fscache_uniq:
+		fsopt->fscache_uniq = kstrndup(argstr[0].from,
+					       argstr[0].to-argstr[0].from,
+					       GFP_KERNEL);
+		if (!fsopt->fscache_uniq)
+			return -ENOMEM;
+		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+		break;
 		/* misc */
 	case Opt_wsize:
 		fsopt->wsize = intval;
@@ -317,6 +327,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
 	kfree(args->snapdir_name);
 	kfree(args->mds_namespace);
 	kfree(args->server_path);
+	kfree(args->fscache_uniq);
 	kfree(args);
 }
 
@@ -350,8 +361,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
 	ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
 	if (ret)
 		return ret;
-
 	ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
+	if (ret)
+		return ret;
+	ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
 	if (ret)
 		return ret;
 
@@ -475,8 +488,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 		seq_puts(m, ",noasyncreaddir");
 	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
 		seq_puts(m, ",nodcache");
-	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
-		seq_puts(m, ",fsc");
+	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
+		if (fsopt->fscache_uniq)
+			seq_printf(m, ",fsc=%s", fsopt->fscache_uniq);
+		else
+			seq_puts(m, ",fsc");
+	}
 	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
 		seq_puts(m, ",nopoolperm");
 
@@ -597,18 +614,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	if (!fsc->wb_pagevec_pool)
 		goto fail_trunc_wq;
 
-	/* setup fscache */
-	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
-	    (ceph_fscache_register_fs(fsc) != 0))
-		goto fail_fscache;
-
 	/* caps */
 	fsc->min_caps = fsopt->max_readdir;
 
 	return fsc;
 
-fail_fscache:
-	ceph_fscache_unregister_fs(fsc);
 fail_trunc_wq:
 	destroy_workqueue(fsc->trunc_wq);
 fail_pg_inv_wq:
@@ -626,8 +636,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
 	dout("destroy_fs_client %p\n", fsc);
 
-	ceph_fscache_unregister_fs(fsc);
-
 	destroy_workqueue(fsc->wb_wq);
 	destroy_workqueue(fsc->pg_inv_wq);
 	destroy_workqueue(fsc->trunc_wq);
@@ -820,6 +828,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 		if (err < 0)
 			goto out;
 
+		/* setup fscache */
+		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
+			err = ceph_fscache_register_fs(fsc);
+			if (err < 0)
+				goto out;
+		}
+
 		if (!fsc->mount_options->server_path) {
 			path = "";
 			dout("mount opening path \\t\n");
@@ -1042,6 +1057,8 @@ static void ceph_kill_sb(struct super_block *s)
 	fsc->client->extra_mon_dispatch = NULL;
 	ceph_fs_debugfs_cleanup(fsc);
 
+	ceph_fscache_unregister_fs(fsc);
+
 	ceph_mdsc_destroy(fsc);
 
 	destroy_fs_client(fsc);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f8a0aba0d938e..f02a2225fe422 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -73,6 +73,7 @@ struct ceph_mount_options {
 	char *snapdir_name;   /* default ".snap" */
 	char *mds_namespace;  /* default NULL */
 	char *server_path;    /* default  "/" */
+	char *fscache_uniq;   /* default NULL */
 };
 
 struct ceph_fs_client {
-- 
GitLab


From 481f001ffa5d8e9d72f8206e92f73ded076eeb30 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 3 Jul 2017 09:09:10 +0800
Subject: [PATCH 0894/1958] ceph: update ceph_dentry_info::lease_session when
 necessary

Current code does not update ceph_dentry_info::lease_session once
it is set. If auth mds of corresponding dentry changes, dentry lease
keeps in an invalid state.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 53f23c920266d..220dfd87cbfaa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1016,6 +1016,7 @@ static void update_dentry_lease(struct dentry *dentry,
 	long unsigned ttl = from_time + (duration * HZ) / 1000;
 	long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
 	struct inode *dir;
+	struct ceph_mds_session *old_lease_session = NULL;
 
 	/*
 	 * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
@@ -1051,8 +1052,10 @@ static void update_dentry_lease(struct dentry *dentry,
 	    time_before(ttl, di->time))
 		goto out_unlock;  /* we already have a newer lease. */
 
-	if (di->lease_session && di->lease_session != session)
-		goto out_unlock;
+	if (di->lease_session && di->lease_session != session) {
+		old_lease_session = di->lease_session;
+		di->lease_session = NULL;
+	}
 
 	ceph_dentry_lru_touch(dentry);
 
@@ -1065,6 +1068,8 @@ static void update_dentry_lease(struct dentry *dentry,
 	di->time = ttl;
 out_unlock:
 	spin_unlock(&dentry->d_lock);
+	if (old_lease_session)
+		ceph_put_mds_session(old_lease_session);
 	return;
 }
 
-- 
GitLab


From dcbbd97ccb9c6f4dad39875c1404d2643eaf110b Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:44:59 +0200
Subject: [PATCH 0895/1958] libceph: remove ceph_sanitize_features() workaround

Reflects ceph.git commit ff1959282826ae6acd7134e1b1ede74ffd1cc04a.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 21 ---------------------
 net/ceph/messenger.c               |  3 +--
 2 files changed, 1 insertion(+), 23 deletions(-)

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index fd8b2953c78f8..4962708841b50 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -77,29 +77,8 @@
 #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
 #define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
 
-/*
- * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
- * vector to evaluate to 64 bit ~0.  To cope, we designate 1ULL << 63
- * to mean 33 bit ~0, and introduce a helper below to do the
- * translation.
- *
- * This was introduced by ceph.git commit
- *   9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
- * and fixed by ceph.git commit
- *   4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
- */
 #define CEPH_FEATURE_RESERVED (1ULL<<63)
 
-static inline u64 ceph_sanitize_features(u64 features)
-{
-	if (features & CEPH_FEATURE_RESERVED) {
-		/* everything through OSD_SNAPMAPPER */
-		return 0x1ffffffffull;
-	} else {
-		return features;
-	}
-}
-
 /*
  * Features supported.
  */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 588a919300514..9daed25406390 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2033,8 +2033,7 @@ static int process_connect(struct ceph_connection *con)
 {
 	u64 sup_feat = from_msgr(con->msgr)->supported_features;
 	u64 req_feat = from_msgr(con->msgr)->required_features;
-	u64 server_feat = ceph_sanitize_features(
-				le64_to_cpu(con->in_reply.features));
+	u64 server_feat = le64_to_cpu(con->in_reply.features);
 	int ret;
 
 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
-- 
GitLab


From f179d3ba8cb9073c2d96315b79ff7bc658a1feee Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:44:59 +0200
Subject: [PATCH 0896/1958] libceph: new features macros

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 242 ++++++++++++++++++++---------
 1 file changed, 167 insertions(+), 75 deletions(-)

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 4962708841b50..7fce9ed44af03 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -2,82 +2,174 @@
 #define __CEPH_FEATURES
 
 /*
- * feature bits
+ * Each time we reclaim bits for reuse we need to specify another bit
+ * that, if present, indicates we have the new incarnation of that
+ * feature.  Base case is 1 (first use).
  */
-#define CEPH_FEATURE_UID            (1ULL<<0)
-#define CEPH_FEATURE_NOSRCADDR      (1ULL<<1)
-#define CEPH_FEATURE_MONCLOCKCHECK  (1ULL<<2)
-#define CEPH_FEATURE_FLOCK          (1ULL<<3)
-#define CEPH_FEATURE_SUBSCRIBE2     (1ULL<<4)
-#define CEPH_FEATURE_MONNAMES       (1ULL<<5)
-#define CEPH_FEATURE_RECONNECT_SEQ  (1ULL<<6)
-#define CEPH_FEATURE_DIRLAYOUTHASH  (1ULL<<7)
-#define CEPH_FEATURE_OBJECTLOCATOR  (1ULL<<8)
-#define CEPH_FEATURE_PGID64         (1ULL<<9)
-#define CEPH_FEATURE_INCSUBOSDMAP   (1ULL<<10)
-#define CEPH_FEATURE_PGPOOL3        (1ULL<<11)
-#define CEPH_FEATURE_OSDREPLYMUX    (1ULL<<12)
-#define CEPH_FEATURE_OSDENC         (1ULL<<13)
-#define CEPH_FEATURE_OMAP           (1ULL<<14)
-#define CEPH_FEATURE_MONENC         (1ULL<<15)
-#define CEPH_FEATURE_QUERY_T        (1ULL<<16)
-#define CEPH_FEATURE_INDEP_PG_MAP   (1ULL<<17)
-#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
-#define CEPH_FEATURE_CHUNKY_SCRUB   (1ULL<<19)
-#define CEPH_FEATURE_MON_NULLROUTE  (1ULL<<20)
-#define CEPH_FEATURE_MON_GV         (1ULL<<21)
-#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
-#define CEPH_FEATURE_MSG_AUTH	    (1ULL<<23)
-#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
-#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
-#define CEPH_FEATURE_CREATEPOOLID   (1ULL<<26)
-#define CEPH_FEATURE_REPLY_CREATE_INODE   (1ULL<<27)
-#define CEPH_FEATURE_OSD_HBMSGS     (1ULL<<28)
-#define CEPH_FEATURE_MDSENC         (1ULL<<29)
-#define CEPH_FEATURE_OSDHASHPSPOOL  (1ULL<<30)
-#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
-#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
-#define CEPH_FEATURE_MON_SCRUB      (1ULL<<33)
-#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
-#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
-#define CEPH_FEATURE_CRUSH_V2      (1ULL<<36)  /* new indep; SET_* steps */
-#define CEPH_FEATURE_EXPORT_PEER   (1ULL<<37)
-#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
-#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38)   /* overlap with EC */
-/* The process supports new-style OSDMap encoding. Monitors also use
-   this bit to determine if peers support NAK messages. */
-#define CEPH_FEATURE_OSDMAP_ENC    (1ULL<<39)
-#define CEPH_FEATURE_MDS_INLINE_DATA     (1ULL<<40)
-#define CEPH_FEATURE_CRUSH_TUNABLES3     (1ULL<<41)
-#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41)  /* overlap w/ tunables3 */
-#define CEPH_FEATURE_MSGR_KEEPALIVE2   (1ULL<<42)
-#define CEPH_FEATURE_OSD_POOLRESEND    (1ULL<<43)
-#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
-#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
-#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
-#define CEPH_FEATURE_OSD_REPOP         (1ULL<<46)   /* overlap with fadvise */
-#define CEPH_FEATURE_OSD_OBJECT_DIGEST  (1ULL<<46)  /* overlap with fadvise */
-#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */
-#define CEPH_FEATURE_MDS_QUOTA      (1ULL<<47)
-#define CEPH_FEATURE_CRUSH_V4      (1ULL<<48)  /* straw2 buckets */
-#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
-// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
-#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
-#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
-#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
-#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
-#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
-#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
-#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
-#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
-#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
-#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
-#define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
-// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
-#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
-#define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
-
-#define CEPH_FEATURE_RESERVED (1ULL<<63)
+#define CEPH_FEATURE_INCARNATION_1 (0ull)
+#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
+
+#define DEFINE_CEPH_FEATURE(bit, incarnation, name)			\
+	const static uint64_t CEPH_FEATURE_##name = (1ULL<<bit);		\
+	const static uint64_t CEPH_FEATUREMASK_##name =			\
+		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
+
+/* this bit is ignored but still advertised by release *when* */
+#define DEFINE_CEPH_FEATURE_DEPRECATED(bit, incarnation, name, when) \
+	const static uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
+	const static uint64_t DEPRECATED_CEPH_FEATUREMASK_##name =		\
+		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
+
+/*
+ * this bit is ignored by release *unused* and not advertised by
+ * release *unadvertised*
+ */
+#define DEFINE_CEPH_FEATURE_RETIRED(bit, inc, name, unused, unadvertised)
+
+
+/*
+ * test for a feature.  this test is safer than a typical mask against
+ * the bit because it ensures that we have the bit AND the marker for the
+ * bit's incarnation.  this must be used in any case where the features
+ * bits may include an old meaning of the bit.
+ */
+#define CEPH_HAVE_FEATURE(x, name)			\
+	(((x) & (CEPH_FEATUREMASK_##name)) == (CEPH_FEATUREMASK_##name))
+
+
+/*
+ * Notes on deprecation:
+ *
+ * A *major* release is a release through which all upgrades must pass
+ * (e.g., jewel).  For example, no pre-jewel server will ever talk to
+ * a post-jewel server (mon, osd, etc).
+ *
+ * For feature bits used *only* on the server-side:
+ *
+ *  - In the first phase we indicate that a feature is DEPRECATED as of
+ *    a particular release.  This is the first major release X (say,
+ *    jewel) that does not depend on its peers advertising the feature.
+ *    That is, it safely assumes its peers all have the feature.  We
+ *    indicate this with the DEPRECATED macro.  For example,
+ *
+ *      DEFINE_CEPH_FEATURE_DEPRECATED( 2, 1, MONCLOCKCHECK, JEWEL)
+ *
+ *    because 10.2.z (jewel) did not care if its peers advertised this
+ *    feature bit.
+ *
+ *  - In the second phase we stop advertising the the bit and call it
+ *    RETIRED.  This can normally be done in the *next* major release
+ *    following the one in which we marked the feature DEPRECATED.  In
+ *    the above example, for 12.0.z (luminous) we can say:
+ *
+ *      DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
+ *
+ *  - The bit can be reused in the first post-luminous release, 13.0.z
+ *    (m).
+ *
+ * This ensures that no two versions who have different meanings for
+ * the bit ever speak to each other.
+ */
+
+DEFINE_CEPH_FEATURE( 0, 1, UID)
+DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
+DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
+DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
+DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
+DEFINE_CEPH_FEATURE( 6, 1, RECONNECT_SEQ)
+DEFINE_CEPH_FEATURE( 7, 1, DIRLAYOUTHASH)
+DEFINE_CEPH_FEATURE( 8, 1, OBJECTLOCATOR)
+DEFINE_CEPH_FEATURE( 9, 1, PGID64)
+DEFINE_CEPH_FEATURE(10, 1, INCSUBOSDMAP)
+DEFINE_CEPH_FEATURE(11, 1, PGPOOL3)
+DEFINE_CEPH_FEATURE(12, 1, OSDREPLYMUX)
+DEFINE_CEPH_FEATURE(13, 1, OSDENC)
+DEFINE_CEPH_FEATURE_RETIRED(14, 1, OMAP, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(14, 2, SERVER_KRAKEN)
+DEFINE_CEPH_FEATURE(15, 1, MONENC)
+DEFINE_CEPH_FEATURE_RETIRED(16, 1, QUERY_T, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(17, 1, INDEP_PG_MAP, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(18, 1, CRUSH_TUNABLES)
+DEFINE_CEPH_FEATURE_RETIRED(19, 1, CHUNKY_SCRUB, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(20, 1, MON_NULLROUTE, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(21, 1, MON_GV, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(21, 2, SERVER_LUMINOUS)
+DEFINE_CEPH_FEATURE(21, 2, RESEND_ON_SPLIT)  // overlap
+DEFINE_CEPH_FEATURE(21, 2, RADOS_BACKOFF)    // overlap
+DEFINE_CEPH_FEATURE(21, 2, OSDMAP_PG_UPMAP)  // overlap
+DEFINE_CEPH_FEATURE(21, 2, CRUSH_CHOOSE_ARGS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(22, 1, BACKFILL_RESERVATION, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(23, 1, MSG_AUTH)
+DEFINE_CEPH_FEATURE_RETIRED(24, 1, RECOVERY_RESERVATION, JEWEL, LUNINOUS)
+
+DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
+DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
+DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
+DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
+DEFINE_CEPH_FEATURE(29, 1, MDSENC)
+DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
+DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS)  // deprecate me
+DEFINE_CEPH_FEATURE_RETIRED(32, 1, OSD_SNAPMAPPER, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(33, 1, MON_SCRUB, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(34, 1, OSD_PACKED_RECOVERY, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(35, 1, OSD_CACHEPOOL)
+DEFINE_CEPH_FEATURE(36, 1, CRUSH_V2)
+DEFINE_CEPH_FEATURE(37, 1, EXPORT_PEER)
+DEFINE_CEPH_FEATURE(38, 1, OSD_ERASURE_CODES)
+DEFINE_CEPH_FEATURE(38, 1, OSD_OSD_TMAP2OMAP) // overlap
+DEFINE_CEPH_FEATURE(39, 1, OSDMAP_ENC)
+DEFINE_CEPH_FEATURE(40, 1, MDS_INLINE_DATA)
+DEFINE_CEPH_FEATURE(41, 1, CRUSH_TUNABLES3)
+DEFINE_CEPH_FEATURE(41, 1, OSD_PRIMARY_AFFINITY) // overlap
+DEFINE_CEPH_FEATURE(42, 1, MSGR_KEEPALIVE2)
+DEFINE_CEPH_FEATURE(43, 1, OSD_POOLRESEND)
+DEFINE_CEPH_FEATURE(44, 1, ERASURE_CODE_PLUGINS_V2)
+DEFINE_CEPH_FEATURE_RETIRED(45, 1, OSD_SET_ALLOC_HINT, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(46, 1, OSD_FADVISE_FLAGS)
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_REPOP, JEWEL, LUMINOUS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_OBJECT_DIGEST, JEWEL, LUMINOUS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_TRANSACTION_MAY_LAYOUT, JEWEL, LUMINOUS) // overlap
+
+DEFINE_CEPH_FEATURE(47, 1, MDS_QUOTA)
+DEFINE_CEPH_FEATURE(48, 1, CRUSH_V4)
+DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_MIN_SIZE_RECOVERY, JEWEL, LUMINOUS)
+DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_PROXY_FEATURES, JEWEL, LUMINOUS) // overlap
+
+DEFINE_CEPH_FEATURE(50, 1, MON_METADATA)
+DEFINE_CEPH_FEATURE(51, 1, OSD_BITWISE_HOBJ_SORT)
+DEFINE_CEPH_FEATURE(52, 1, OSD_PROXY_WRITE_FEATURES)
+DEFINE_CEPH_FEATURE(53, 1, ERASURE_CODE_PLUGINS_V3)
+DEFINE_CEPH_FEATURE(54, 1, OSD_HITSET_GMT)
+DEFINE_CEPH_FEATURE(55, 1, HAMMER_0_94_4)
+DEFINE_CEPH_FEATURE(56, 1, NEW_OSDOP_ENCODING)
+DEFINE_CEPH_FEATURE(57, 1, MON_STATEFUL_SUB)
+DEFINE_CEPH_FEATURE(57, 1, MON_ROUTE_OSDMAP) // overlap
+DEFINE_CEPH_FEATURE(57, 1, OSDSUBOP_NO_SNAPCONTEXT) // overlap
+DEFINE_CEPH_FEATURE(57, 1, SERVER_JEWEL) // overlap
+DEFINE_CEPH_FEATURE(58, 1, CRUSH_TUNABLES5)
+DEFINE_CEPH_FEATURE(58, 1, NEW_OSDOPREPLY_ENCODING) // overlap
+DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
+DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
+DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
+DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
+DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING)  // *do not share this bit*
+
+DEFINE_CEPH_FEATURE(61, 1, RESERVED2)          // unused, but slow down!
+DEFINE_CEPH_FEATURE(62, 1, RESERVED)           // do not use; used as a sentinal
+DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
+
 
 /*
  * Features supported.
-- 
GitLab


From ca35ffea898d2cae67820202bd9a494742726b41 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:44:59 +0200
Subject: [PATCH 0897/1958] libceph: handle non-empty dest in
 ceph_{oloc,oid}_copy()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 55e3a477f92d4..bf2fcc837e366 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1547,12 +1547,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 void ceph_oloc_copy(struct ceph_object_locator *dest,
 		    const struct ceph_object_locator *src)
 {
-	WARN_ON(!ceph_oloc_empty(dest));
-	WARN_ON(dest->pool_ns); /* empty() only covers ->pool */
+	ceph_oloc_destroy(dest);
 
 	dest->pool = src->pool;
 	if (src->pool_ns)
 		dest->pool_ns = ceph_get_string(src->pool_ns);
+	else
+		dest->pool_ns = NULL;
 }
 EXPORT_SYMBOL(ceph_oloc_copy);
 
@@ -1565,14 +1566,15 @@ EXPORT_SYMBOL(ceph_oloc_destroy);
 void ceph_oid_copy(struct ceph_object_id *dest,
 		   const struct ceph_object_id *src)
 {
-	WARN_ON(!ceph_oid_empty(dest));
+	ceph_oid_destroy(dest);
 
 	if (src->name != src->inline_name) {
 		/* very rare, see ceph_object_id definition */
 		dest->name = kmalloc(src->name_len + 1,
 				     GFP_NOIO | __GFP_NOFAIL);
+	} else {
+		dest->name = dest->inline_name;
 	}
-
 	memcpy(dest->name, src->name, src->name_len + 1);
 	dest->name_len = src->name_len;
 }
-- 
GitLab


From 2d7522e0bda17eb3c7af0ffe74f03dd6c3cca443 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:45:00 +0200
Subject: [PATCH 0898/1958] libceph: advertise support for OSD_POOLRESEND

The code has been in place since commit 63244fa123a7 ("libceph:
introduce ceph_osd_request_target, calc_target()"), and, with the
ceph_{oloc,oid}_copy() issue fixed in the previous commit, is now
in working order.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 7fce9ed44af03..89c68af485399 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -197,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
+	 CEPH_FEATURE_OSD_POOLRESEND |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
 	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
-- 
GitLab


From 220abf5aa7ba5f544f1b589bde33761c60bbf9a0 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:45:00 +0200
Subject: [PATCH 0899/1958] libceph: support SERVER_JEWEL feature bits

Only MON_STATEFUL_SUB, really.  MON_ROUTE_OSDMAP and
OSDSUBOP_NO_SNAPCONTEXT are irrelevant.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 2 ++
 net/ceph/mon_client.c              | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 89c68af485399..78a58770e6e94 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -199,6 +199,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_OSD_POOLRESEND |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
+	 CEPH_FEATURE_SERVER_JEWEL |		\
+	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
 	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 250f11f786092..8756757655313 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -6,6 +6,7 @@
 #include <linux/random.h>
 #include <linux/sched.h>
 
+#include <linux/ceph/ceph_features.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/debugfs.h>
@@ -297,6 +298,10 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
 
 	mutex_lock(&monc->mutex);
 	if (monc->sub_renew_sent) {
+		/*
+		 * This is only needed for legacy (infernalis or older)
+		 * MONs -- see delayed_work().
+		 */
 		monc->sub_renew_after = monc->sub_renew_sent +
 					    (seconds >> 1) * HZ - 1;
 		dout("%s sent %lu duration %d renew after %lu\n", __func__,
@@ -955,7 +960,8 @@ static void delayed_work(struct work_struct *work)
 			__validate_auth(monc);
 		}
 
-		if (is_auth) {
+		if (is_auth &&
+		    !(monc->con.peer_features & CEPH_FEATURE_MON_STATEFUL_SUB)) {
 			unsigned long now = jiffies;
 
 			dout("%s renew subs? now %lu renew after %lu\n",
-- 
GitLab


From dc93e0e2831de2f80817b89aae4864b88332fcce Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:45:00 +0200
Subject: [PATCH 0900/1958] libceph: fold [l]req->last_force_resend into
 ceph_osd_request_target

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  4 ++--
 net/ceph/osd_client.c           | 21 ++++++++++-----------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 85650b415e73f..ef630ebd1169c 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -148,6 +148,8 @@ struct ceph_osd_request_target {
 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
 	bool paused;
 
+	u32 last_force_resend;
+
 	int osd;
 };
 
@@ -193,7 +195,6 @@ struct ceph_osd_request {
 	unsigned long r_stamp;                /* jiffies, send or check time */
 	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
-	u32 r_last_force_resend;
 	u32 r_map_dne_bound;
 
 	struct ceph_osd_req_op r_ops[];
@@ -221,7 +222,6 @@ struct ceph_osd_linger_request {
 	struct list_head pending_lworks;
 
 	struct ceph_osd_request_target t;
-	u32 last_force_resend;
 	u32 map_dne_bound;
 
 	struct timespec mtime;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 924f07c36ddbc..aca6319b99b6e 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -384,6 +384,8 @@ static void target_copy(struct ceph_osd_request_target *dest,
 	dest->flags = src->flags;
 	dest->paused = src->paused;
 
+	dest->last_force_resend = src->last_force_resend;
+
 	dest->osd = src->osd;
 }
 
@@ -1311,7 +1313,6 @@ enum calc_target_result {
 
 static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 					   struct ceph_osd_request_target *t,
-					   u32 *last_force_resend,
 					   bool any_change)
 {
 	struct ceph_pg_pool_info *pi;
@@ -1332,11 +1333,10 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	}
 
 	if (osdc->osdmap->epoch == pi->last_force_request_resend) {
-		if (last_force_resend &&
-		    *last_force_resend < pi->last_force_request_resend) {
-			*last_force_resend = pi->last_force_request_resend;
+		if (t->last_force_resend < pi->last_force_request_resend) {
+			t->last_force_resend = pi->last_force_request_resend;
 			force_resend = true;
-		} else if (!last_force_resend) {
+		} else if (t->last_force_resend == 0) {
 			force_resend = true;
 		}
 	}
@@ -1645,7 +1645,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
-	ct_res = calc_target(osdc, &req->r_t, &req->r_last_force_resend, false);
+	ct_res = calc_target(osdc, &req->r_t, false);
 	if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked)
 		goto promote;
 
@@ -2441,7 +2441,7 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	struct ceph_osd *osd;
 
-	calc_target(osdc, &lreq->t, &lreq->last_force_resend, false);
+	calc_target(osdc, &lreq->t, false);
 	osd = lookup_create_osd(osdc, lreq->t.osd, true);
 	link_linger(osd, lreq);
 
@@ -3059,7 +3059,7 @@ recalc_linger_target(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	enum calc_target_result ct_res;
 
-	ct_res = calc_target(osdc, &lreq->t, &lreq->last_force_resend, true);
+	ct_res = calc_target(osdc, &lreq->t, true);
 	if (ct_res == CALC_TARGET_NEED_RESEND) {
 		struct ceph_osd *osd;
 
@@ -3130,8 +3130,7 @@ static void scan_requests(struct ceph_osd *osd,
 		n = rb_next(n); /* unlink_request(), check_pool_dne() */
 
 		dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
-		ct_res = calc_target(osdc, &req->r_t,
-				     &req->r_last_force_resend, false);
+		ct_res = calc_target(osdc, &req->r_t, false);
 		switch (ct_res) {
 		case CALC_TARGET_NO_ACTION:
 			force_resend_writes = cleared_full ||
@@ -3240,7 +3239,7 @@ static void kick_requests(struct ceph_osd_client *osdc,
 		erase_request(need_resend, req); /* before link_request() */
 
 		WARN_ON(req->r_osd);
-		calc_target(osdc, &req->r_t, NULL, false);
+		calc_target(osdc, &req->r_t, false);
 		osd = lookup_create_osd(osdc, req->r_t.osd, true);
 		link_request(osd, req);
 		if (!req->r_linger) {
-- 
GitLab


From 8e48cf00c48fdefb01f70db81f31438cd0c29dcc Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 5 Jun 2017 14:45:00 +0200
Subject: [PATCH 0901/1958] libceph: new pi->last_force_request_resend

The old (v15) pi->last_force_request_resend has been repurposed to
make pre-RESEND_ON_SPLIT clients that don't check for PG splits but do
obey pi->last_force_request_resend resend on splits.  See ceph.git
commit 189ca7ec6420 ("mon/OSDMonitor: make pre-luminous clients resend
ops on split").

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index bf2fcc837e366..710ee3dc01b9b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -682,11 +682,48 @@ static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
 		*p += len;
 	}
 
+	/*
+	 * last_force_op_resend_preluminous, will be overridden if the
+	 * map was encoded with RESEND_ON_SPLIT
+	 */
 	if (ev >= 15)
 		pi->last_force_request_resend = ceph_decode_32(p);
 	else
 		pi->last_force_request_resend = 0;
 
+	if (ev >= 16)
+		*p += 4; /* skip min_read_recency_for_promote */
+
+	if (ev >= 17)
+		*p += 8; /* skip expected_num_objects */
+
+	if (ev >= 19)
+		*p += 4; /* skip cache_target_dirty_high_ratio_micro */
+
+	if (ev >= 20)
+		*p += 4; /* skip min_write_recency_for_promote */
+
+	if (ev >= 21)
+		*p += 1; /* skip use_gmt_hitset */
+
+	if (ev >= 22)
+		*p += 1; /* skip fast_read */
+
+	if (ev >= 23) {
+		*p += 4; /* skip hit_set_grade_decay_rate */
+		*p += 4; /* skip hit_set_search_last_n */
+	}
+
+	if (ev >= 24) {
+		/* skip opts */
+		*p += 1 + 1; /* versions */
+		len = ceph_decode_32(p);
+		*p += len;
+	}
+
+	if (ev >= 25)
+		pi->last_force_request_resend = ceph_decode_32(p);
+
 	/* ignore the rest */
 
 	*p = pool_end;
-- 
GitLab


From dc98ff7230e5ccf11c621dff0d590e83574a7184 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:53 +0200
Subject: [PATCH 0902/1958] libceph: introduce ceph_spg,
 ceph_pg_to_primary_shard()

Store both raw pgid and actual spgid in ceph_osd_request_target.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  3 ++-
 include/linux/ceph/osdmap.h     | 10 ++++++++++
 net/ceph/debugfs.c              | 11 ++++++++++-
 net/ceph/osd_client.c           |  7 +++++--
 net/ceph/osdmap.c               | 33 +++++++++++++++++++++++++++++++++
 5 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index ef630ebd1169c..6114f7b021353 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -136,7 +136,8 @@ struct ceph_osd_request_target {
 	struct ceph_object_id target_oid;
 	struct ceph_object_locator target_oloc;
 
-	struct ceph_pg pgid;
+	struct ceph_pg pgid;               /* last raw pg we mapped to */
+	struct ceph_spg spgid;             /* last actual spg we mapped to */
 	u32 pg_num;
 	u32 pg_num_mask;
 	struct ceph_osds acting;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 938656f708078..7ae5b416b4b66 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -24,6 +24,13 @@ struct ceph_pg {
 	uint32_t seed;
 };
 
+#define CEPH_SPG_NOSHARD	-1
+
+struct ceph_spg {
+	struct ceph_pg pgid;
+	s8 shard;
+};
+
 int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
 
 #define CEPH_POOL_FLAG_HASHPSPOOL	(1ULL << 0) /* hash pg seed and pool id
@@ -271,6 +278,9 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
 			       const struct ceph_pg *raw_pgid,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting);
+bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      const struct ceph_pg *raw_pgid,
+			      struct ceph_spg *spgid);
 int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
 			      const struct ceph_pg *raw_pgid);
 
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 71ba13927b3d1..50ab1bdb16e22 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -147,11 +147,20 @@ static int monc_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+static void dump_spgid(struct seq_file *s, const struct ceph_spg *spgid)
+{
+	seq_printf(s, "%llu.%x", spgid->pgid.pool, spgid->pgid.seed);
+	if (spgid->shard != CEPH_SPG_NOSHARD)
+		seq_printf(s, "s%d", spgid->shard);
+}
+
 static void dump_target(struct seq_file *s, struct ceph_osd_request_target *t)
 {
 	int i;
 
-	seq_printf(s, "osd%d\t%llu.%x\t[", t->osd, t->pgid.pool, t->pgid.seed);
+	seq_printf(s, "osd%d\t%llu.%x\t", t->osd, t->pgid.pool, t->pgid.seed);
+	dump_spgid(s, &t->spgid);
+	seq_puts(s, "\t[");
 	for (i = 0; i < t->up.size; i++)
 		seq_printf(s, "%s%d", (!i ? "" : ","), t->up.osds[i]);
 	seq_printf(s, "]/%d\t[", t->up.primary);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index aca6319b99b6e..66509414f4c84 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -373,6 +373,7 @@ static void target_copy(struct ceph_osd_request_target *dest,
 	ceph_oloc_copy(&dest->target_oloc, &src->target_oloc);
 
 	dest->pgid = src->pgid; /* struct */
+	dest->spgid = src->spgid; /* struct */
 	dest->pg_num = src->pg_num;
 	dest->pg_num_mask = src->pg_num_mask;
 	ceph_osds_copy(&dest->acting, &src->acting);
@@ -1394,6 +1395,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	    ceph_osds_changed(&t->acting, &acting, any_change) ||
 	    force_resend) {
 		t->pgid = pgid; /* struct */
+		ceph_pg_to_primary_shard(osdc->osdmap, &pgid, &t->spgid);
 		ceph_osds_copy(&t->acting, &acting);
 		ceph_osds_copy(&t->up, &up);
 		t->size = pi->size;
@@ -1595,9 +1597,10 @@ static void send_request(struct ceph_osd_request *req)
 
 	encode_request(req, req->r_request);
 
-	dout("%s req %p tid %llu to pg %llu.%x osd%d flags 0x%x attempt %d\n",
+	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d flags 0x%x attempt %d\n",
 	     __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed,
-	     req->r_t.osd, req->r_flags, req->r_attempts);
+	     req->r_t.spgid.pgid.pool, req->r_t.spgid.pgid.seed,
+	     req->r_t.spgid.shard, osd->o_osd, req->r_flags, req->r_attempts);
 
 	req->r_t.paused = false;
 	req->r_stamp = jiffies;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 710ee3dc01b9b..a4155620eace7 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2265,6 +2265,39 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
 	WARN_ON(!osds_valid(up) || !osds_valid(acting));
 }
 
+bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      const struct ceph_pg *raw_pgid,
+			      struct ceph_spg *spgid)
+{
+	struct ceph_pg_pool_info *pi;
+	struct ceph_pg pgid;
+	struct ceph_osds up, acting;
+	int i;
+
+	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
+	if (!pi)
+		return false;
+
+	raw_pg_to_pg(pi, raw_pgid, &pgid);
+
+	if (ceph_can_shift_osds(pi)) {
+		spgid->pgid = pgid; /* struct */
+		spgid->shard = CEPH_SPG_NOSHARD;
+		return true;
+	}
+
+	ceph_pg_to_up_acting_osds(osdmap, &pgid, &up, &acting);
+	for (i = 0; i < acting.size; i++) {
+		if (acting.osds[i] == acting.primary) {
+			spgid->pgid = pgid; /* struct */
+			spgid->shard = i;
+			return true;
+		}
+	}
+
+	return false;
+}
+
 /*
  * Return acting primary for given PG, or -1 if none.
  */
-- 
GitLab


From 2e59ffd1df4aba5289f04d362efc8432fac14949 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:53 +0200
Subject: [PATCH 0903/1958] libceph: encode_{pgid,oloc}() helpers

Factor out encode_{pgid,oloc}() and use ceph_encode_string() for oid.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osd_client.c | 50 +++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 66509414f4c84..b0298c3681b6b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -540,7 +540,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 }
 EXPORT_SYMBOL(ceph_osdc_alloc_request);
 
-static int ceph_oloc_encoding_size(struct ceph_object_locator *oloc)
+static int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc)
 {
 	return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0);
 }
@@ -1485,6 +1485,28 @@ static void setup_request_data(struct ceph_osd_request *req,
 	WARN_ON(data_len != msg->data_length);
 }
 
+static void encode_pgid(void **p, const struct ceph_pg *pgid)
+{
+	ceph_encode_8(p, 1);
+	ceph_encode_64(p, pgid->pool);
+	ceph_encode_32(p, pgid->seed);
+	ceph_encode_32(p, -1); /* preferred */
+}
+
+static void encode_oloc(void **p, void *end,
+			const struct ceph_object_locator *oloc)
+{
+	ceph_start_encoding(p, 5, 4, ceph_oloc_encoding_size(oloc));
+	ceph_encode_64(p, oloc->pool);
+	ceph_encode_32(p, -1); /* preferred */
+	ceph_encode_32(p, 0);  /* key len */
+	if (oloc->pool_ns)
+		ceph_encode_string(p, end, oloc->pool_ns->str,
+				   oloc->pool_ns->len);
+	else
+		ceph_encode_32(p, 0);
+}
+
 static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 {
 	void *p = msg->front.iov_base;
@@ -1512,28 +1534,10 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	memset(p, 0, sizeof(struct ceph_eversion));
 	p += sizeof(struct ceph_eversion);
 
-	/* oloc */
-	ceph_start_encoding(&p, 5, 4,
-			    ceph_oloc_encoding_size(&req->r_t.target_oloc));
-	ceph_encode_64(&p, req->r_t.target_oloc.pool);
-	ceph_encode_32(&p, -1); /* preferred */
-	ceph_encode_32(&p, 0); /* key len */
-	if (req->r_t.target_oloc.pool_ns)
-		ceph_encode_string(&p, end, req->r_t.target_oloc.pool_ns->str,
-				   req->r_t.target_oloc.pool_ns->len);
-	else
-		ceph_encode_32(&p, 0);
-
-	/* pgid */
-	ceph_encode_8(&p, 1);
-	ceph_encode_64(&p, req->r_t.pgid.pool);
-	ceph_encode_32(&p, req->r_t.pgid.seed);
-	ceph_encode_32(&p, -1); /* preferred */
-
-	/* oid */
-	ceph_encode_32(&p, req->r_t.target_oid.name_len);
-	memcpy(p, req->r_t.target_oid.name, req->r_t.target_oid.name_len);
-	p += req->r_t.target_oid.name_len;
+	encode_oloc(&p, end, &req->r_t.target_oloc);
+	encode_pgid(&p, &req->r_t.pgid);
+	ceph_encode_string(&p, end, req->r_t.target_oid.name,
+			   req->r_t.target_oid.name_len);
 
 	/* ops, can imply data */
 	ceph_encode_16(&p, req->r_num_ops);
-- 
GitLab


From 98ad5ebd1505eb903ae8bc27e94c1ab0d1c3e651 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:54 +0200
Subject: [PATCH 0904/1958] libceph:
 ceph_connection_operations::reencode_message() method

Give upper layers a chance to reencode the message after the connection
is negotiated and ->peer_features is set.  OSD client will use this to
support both luminous and pre-luminous OSDs (in a single cluster): the
former need MOSDOp v8; the latter will continue to be sent MOSDOp v4.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/messenger.h | 2 ++
 net/ceph/messenger.c           | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c5c4c713e00f5..fbd94d9fa5dd4 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -44,6 +44,8 @@ struct ceph_connection_operations {
 					struct ceph_msg_header *hdr,
 					int *skip);
 
+	void (*reencode_message) (struct ceph_msg *msg);
+
 	int (*sign_message) (struct ceph_msg *msg);
 	int (*check_message_signature) (struct ceph_msg *msg);
 };
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9daed25406390..0c31035bbfee8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1288,13 +1288,16 @@ static void prepare_write_message(struct ceph_connection *con)
 		m->hdr.seq = cpu_to_le64(++con->out_seq);
 		m->needs_out_seq = false;
 	}
-	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
+
+	if (con->ops->reencode_message)
+		con->ops->reencode_message(m);
 
 	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
 	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
 	     m->data_length);
-	BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
+	WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len));
+	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
 
 	/* tag + hdr + front + middle */
 	con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
-- 
GitLab


From 8cb441c0545dfd4dafeedc1e2d7157e1072413ac Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:54 +0200
Subject: [PATCH 0905/1958] libceph: MOSDOp v8 encoding (actual spgid + full
 hash)

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  17 ++++
 include/linux/ceph/osdmap.h     |   4 +-
 net/ceph/osd_client.c           | 153 ++++++++++++++++++++++++++++----
 3 files changed, 154 insertions(+), 20 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 6114f7b021353..bca2718ac2537 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -205,6 +205,23 @@ struct ceph_request_redirect {
 	struct ceph_object_locator oloc;
 };
 
+/*
+ * osd request identifier
+ *
+ * caller name + incarnation# + tid to unique identify this request
+ */
+struct ceph_osd_reqid {
+	struct ceph_entity_name name;
+	__le64 tid;
+	__le32 inc;
+} __packed;
+
+struct ceph_blkin_trace_info {
+	__le64 trace_id;
+	__le64 span_id;
+	__le64 parent_span_id;
+} __packed;
+
 typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
 				 u64 notifier_id, void *data, size_t data_len);
 typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 7ae5b416b4b66..66447fc7f3341 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -205,11 +205,13 @@ static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
 	return &map->osd_addr[osd];
 }
 
+#define CEPH_PGID_ENCODING_LEN		(1 + 8 + 4 + 4)
+
 static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
 {
 	__u8 version;
 
-	if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
+	if (!ceph_has_room(p, end, CEPH_PGID_ENCODING_LEN)) {
 		pr_warn("incomplete pg encoding\n");
 		return -EINVAL;
 	}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b0298c3681b6b..eaaf17e7c37b2 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -12,6 +12,7 @@
 #include <linux/bio.h>
 #endif
 
+#include <linux/ceph/ceph_features.h>
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/messenger.h>
@@ -555,17 +556,21 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 	WARN_ON(ceph_oloc_empty(&req->r_base_oloc));
 
 	/* create request message */
-	msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */
-	msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */
+	msg_size = CEPH_ENCODING_START_BLK_LEN +
+			CEPH_PGID_ENCODING_LEN + 1; /* spgid */
+	msg_size += 4 + 4 + 4; /* hash, osdmap_epoch, flags */
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			sizeof(struct ceph_osd_reqid); /* reqid */
+	msg_size += sizeof(struct ceph_blkin_trace_info); /* trace */
+	msg_size += 4 + sizeof(struct ceph_timespec); /* client_inc, mtime */
 	msg_size += CEPH_ENCODING_START_BLK_LEN +
 			ceph_oloc_encoding_size(&req->r_base_oloc); /* oloc */
-	msg_size += 1 + 8 + 4 + 4; /* pgid */
 	msg_size += 4 + req->r_base_oid.name_len; /* oid */
 	msg_size += 2 + req->r_num_ops * sizeof(struct ceph_osd_op);
 	msg_size += 8; /* snapid */
 	msg_size += 8; /* snap_seq */
 	msg_size += 4 + 8 * (req->r_snapc ? req->r_snapc->num_snaps : 0);
-	msg_size += 4; /* retry_attempt */
+	msg_size += 4 + 8; /* retry_attempt, features */
 
 	if (req->r_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -1493,6 +1498,13 @@ static void encode_pgid(void **p, const struct ceph_pg *pgid)
 	ceph_encode_32(p, -1); /* preferred */
 }
 
+static void encode_spgid(void **p, const struct ceph_spg *spgid)
+{
+	ceph_start_encoding(p, 1, 1, CEPH_PGID_ENCODING_LEN + 1);
+	encode_pgid(p, &spgid->pgid);
+	ceph_encode_8(p, spgid->shard);
+}
+
 static void encode_oloc(void **p, void *end,
 			const struct ceph_object_locator *oloc)
 {
@@ -1507,7 +1519,8 @@ static void encode_oloc(void **p, void *end,
 		ceph_encode_32(p, 0);
 }
 
-static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
+static void encode_request_partial(struct ceph_osd_request *req,
+				   struct ceph_msg *msg)
 {
 	void *p = msg->front.iov_base;
 	void *const end = p + msg->front_alloc_len;
@@ -1524,18 +1537,25 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 
 	setup_request_data(req, msg);
 
-	ceph_encode_32(&p, 1); /* client_inc, always 1 */
+	encode_spgid(&p, &req->r_t.spgid); /* actual spg */
+	ceph_encode_32(&p, req->r_t.pgid.seed); /* raw hash */
 	ceph_encode_32(&p, req->r_osdc->osdmap->epoch);
 	ceph_encode_32(&p, req->r_flags);
+
+	/* reqid */
+	ceph_start_encoding(&p, 2, 2, sizeof(struct ceph_osd_reqid));
+	memset(p, 0, sizeof(struct ceph_osd_reqid));
+	p += sizeof(struct ceph_osd_reqid);
+
+	/* trace */
+	memset(p, 0, sizeof(struct ceph_blkin_trace_info));
+	p += sizeof(struct ceph_blkin_trace_info);
+
+	ceph_encode_32(&p, 0); /* client_inc, always 0 */
 	ceph_encode_timespec(p, &req->r_mtime);
 	p += sizeof(struct ceph_timespec);
 
-	/* reassert_version */
-	memset(p, 0, sizeof(struct ceph_eversion));
-	p += sizeof(struct ceph_eversion);
-
 	encode_oloc(&p, end, &req->r_t.target_oloc);
-	encode_pgid(&p, &req->r_t.pgid);
 	ceph_encode_string(&p, end, req->r_t.target_oid.name,
 			   req->r_t.target_oid.name_len);
 
@@ -1558,11 +1578,10 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	}
 
 	ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
+	BUG_ON(p != end - 8); /* space for features */
 
-	BUG_ON(p > end);
-	msg->front.iov_len = p - msg->front.iov_base;
-	msg->hdr.version = cpu_to_le16(4); /* MOSDOp v4 */
-	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+	msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
+	/* front_len is finalized in encode_request_finish() */
 	msg->hdr.data_len = cpu_to_le32(data_len);
 	/*
 	 * The header "data_off" is a hint to the receiver allowing it
@@ -1571,9 +1590,99 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	 */
 	msg->hdr.data_off = cpu_to_le16(req->r_data_offset);
 
-	dout("%s req %p oid %s oid_len %d front %zu data %u\n", __func__,
-	     req, req->r_t.target_oid.name, req->r_t.target_oid.name_len,
-	     msg->front.iov_len, data_len);
+	dout("%s req %p msg %p oid %s oid_len %d\n", __func__, req, msg,
+	     req->r_t.target_oid.name, req->r_t.target_oid.name_len);
+}
+
+static void encode_request_finish(struct ceph_msg *msg)
+{
+	void *p = msg->front.iov_base;
+	void *const end = p + msg->front_alloc_len;
+
+	if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
+		/* luminous OSD -- encode features and be done */
+		p = end - 8;
+		ceph_encode_64(&p, msg->con->peer_features);
+	} else {
+		struct {
+			char spgid[CEPH_ENCODING_START_BLK_LEN +
+				   CEPH_PGID_ENCODING_LEN + 1];
+			__le32 hash;
+			__le32 epoch;
+			__le32 flags;
+			char reqid[CEPH_ENCODING_START_BLK_LEN +
+				   sizeof(struct ceph_osd_reqid)];
+			char trace[sizeof(struct ceph_blkin_trace_info)];
+			__le32 client_inc;
+			struct ceph_timespec mtime;
+		} __packed head;
+		struct ceph_pg pgid;
+		void *oloc, *oid, *tail;
+		int oloc_len, oid_len, tail_len;
+		int len;
+
+		/*
+		 * Pre-luminous OSD -- reencode v8 into v4 using @head
+		 * as a temporary buffer.  Encode the raw PG; the rest
+		 * is just a matter of moving oloc, oid and tail blobs
+		 * around.
+		 */
+		memcpy(&head, p, sizeof(head));
+		p += sizeof(head);
+
+		oloc = p;
+		p += CEPH_ENCODING_START_BLK_LEN;
+		pgid.pool = ceph_decode_64(&p);
+		p += 4 + 4; /* preferred, key len */
+		len = ceph_decode_32(&p);
+		p += len;   /* nspace */
+		oloc_len = p - oloc;
+
+		oid = p;
+		len = ceph_decode_32(&p);
+		p += len;
+		oid_len = p - oid;
+
+		tail = p;
+		tail_len = (end - p) - 8;
+
+		p = msg->front.iov_base;
+		ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
+		ceph_encode_copy(&p, &head.epoch, sizeof(head.epoch));
+		ceph_encode_copy(&p, &head.flags, sizeof(head.flags));
+		ceph_encode_copy(&p, &head.mtime, sizeof(head.mtime));
+
+		/* reassert_version */
+		memset(p, 0, sizeof(struct ceph_eversion));
+		p += sizeof(struct ceph_eversion);
+
+		BUG_ON(p >= oloc);
+		memmove(p, oloc, oloc_len);
+		p += oloc_len;
+
+		pgid.seed = le32_to_cpu(head.hash);
+		encode_pgid(&p, &pgid); /* raw pg */
+
+		BUG_ON(p >= oid);
+		memmove(p, oid, oid_len);
+		p += oid_len;
+
+		/* tail -- ops, snapid, snapc, retry_attempt */
+		BUG_ON(p >= tail);
+		memmove(p, tail, tail_len);
+		p += tail_len;
+
+		msg->hdr.version = cpu_to_le16(4); /* MOSDOp v4 */
+	}
+
+	BUG_ON(p > end);
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
+	dout("%s msg %p tid %llu %u+%u+%u v%d\n", __func__, msg,
+	     le64_to_cpu(msg->hdr.tid), le32_to_cpu(msg->hdr.front_len),
+	     le32_to_cpu(msg->hdr.middle_len), le32_to_cpu(msg->hdr.data_len),
+	     le16_to_cpu(msg->hdr.version));
 }
 
 /*
@@ -1599,7 +1708,7 @@ static void send_request(struct ceph_osd_request *req)
 	else
 		WARN_ON(req->r_flags & CEPH_OSD_FLAG_RETRY);
 
-	encode_request(req, req->r_request);
+	encode_request_partial(req, req->r_request);
 
 	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d flags 0x%x attempt %d\n",
 	     __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed,
@@ -4577,6 +4686,11 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
+static void osd_reencode_message(struct ceph_msg *msg)
+{
+	encode_request_finish(msg);
+}
+
 static int osd_sign_message(struct ceph_msg *msg)
 {
 	struct ceph_osd *o = msg->con->private;
@@ -4601,6 +4715,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
+	.reencode_message = osd_reencode_message,
 	.sign_message = osd_sign_message,
 	.check_message_signature = osd_check_message_signature,
 	.fault = osd_fault,
-- 
GitLab


From 84ed45df4604ae9b1065b5fe2f250f57f7c69baf Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:54 +0200
Subject: [PATCH 0906/1958] libceph: drop need_resend from calc_target()

Replace it with more fine-grained bools to separate updating
ceph_osd_request_target fields and the decision to resend.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osd_client.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index eaaf17e7c37b2..4143f73590f35 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1325,8 +1325,9 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	struct ceph_pg pgid, last_pgid;
 	struct ceph_osds up, acting;
 	bool force_resend = false;
+	bool unpaused = false;
+	bool legacy_change;
 	bool need_check_tiering = false;
-	bool need_resend = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
 	enum calc_target_result ct_res;
 	int ret;
@@ -1393,12 +1394,12 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 
 	if (t->paused && !target_should_be_paused(osdc, t, pi)) {
 		t->paused = false;
-		need_resend = true;
+		unpaused = true;
 	}
+	legacy_change = ceph_pg_compare(&t->pgid, &pgid) ||
+			ceph_osds_changed(&t->acting, &acting, any_change);
 
-	if (ceph_pg_compare(&t->pgid, &pgid) ||
-	    ceph_osds_changed(&t->acting, &acting, any_change) ||
-	    force_resend) {
+	if (legacy_change || force_resend) {
 		t->pgid = pgid; /* struct */
 		ceph_pg_to_primary_shard(osdc->osdmap, &pgid, &t->spgid);
 		ceph_osds_copy(&t->acting, &acting);
@@ -1410,10 +1411,13 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		t->sort_bitwise = sort_bitwise;
 
 		t->osd = acting.primary;
-		need_resend = true;
 	}
 
-	ct_res = need_resend ? CALC_TARGET_NEED_RESEND : CALC_TARGET_NO_ACTION;
+	if (unpaused || legacy_change || force_resend)
+		ct_res = CALC_TARGET_NEED_RESEND;
+	else
+		ct_res = CALC_TARGET_NO_ACTION;
+
 out:
 	dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd);
 	return ct_res;
-- 
GitLab


From 7de030d6b10a56e991312a978ace6be3c090097c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:54 +0200
Subject: [PATCH 0907/1958] libceph: resend on PG splits if OSD has
 RESEND_ON_SPLIT

Note that ceph_osd_request_target fields are updated regardless of
RESEND_ON_SPLIT.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h |  2 ++
 net/ceph/osd_client.c       | 21 ++++++++++++++-------
 net/ceph/osdmap.c           |  7 +++----
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 66447fc7f3341..63fb073a3355c 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -249,6 +249,8 @@ static inline void ceph_osds_init(struct ceph_osds *set)
 
 void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src);
 
+bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
+		      u32 new_pg_num);
 bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 			  const struct ceph_osds *new_acting,
 			  const struct ceph_osds *old_up,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4143f73590f35..518dbac599d04 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1319,6 +1319,7 @@ enum calc_target_result {
 
 static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 					   struct ceph_osd_request_target *t,
+					   struct ceph_connection *con,
 					   bool any_change)
 {
 	struct ceph_pg_pool_info *pi;
@@ -1327,6 +1328,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	bool force_resend = false;
 	bool unpaused = false;
 	bool legacy_change;
+	bool split = false;
 	bool need_check_tiering = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
 	enum calc_target_result ct_res;
@@ -1398,8 +1400,10 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	}
 	legacy_change = ceph_pg_compare(&t->pgid, &pgid) ||
 			ceph_osds_changed(&t->acting, &acting, any_change);
+	if (t->pg_num)
+		split = ceph_pg_is_split(&last_pgid, t->pg_num, pi->pg_num);
 
-	if (legacy_change || force_resend) {
+	if (legacy_change || force_resend || split) {
 		t->pgid = pgid; /* struct */
 		ceph_pg_to_primary_shard(osdc->osdmap, &pgid, &t->spgid);
 		ceph_osds_copy(&t->acting, &acting);
@@ -1413,7 +1417,9 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		t->osd = acting.primary;
 	}
 
-	if (unpaused || legacy_change || force_resend)
+	if (unpaused || legacy_change || force_resend ||
+	    (split && con && CEPH_HAVE_FEATURE(con->peer_features,
+					       RESEND_ON_SPLIT)))
 		ct_res = CALC_TARGET_NEED_RESEND;
 	else
 		ct_res = CALC_TARGET_NO_ACTION;
@@ -1765,7 +1771,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
-	ct_res = calc_target(osdc, &req->r_t, false);
+	ct_res = calc_target(osdc, &req->r_t, NULL, false);
 	if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked)
 		goto promote;
 
@@ -2561,7 +2567,7 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	struct ceph_osd *osd;
 
-	calc_target(osdc, &lreq->t, false);
+	calc_target(osdc, &lreq->t, NULL, false);
 	osd = lookup_create_osd(osdc, lreq->t.osd, true);
 	link_linger(osd, lreq);
 
@@ -3179,7 +3185,7 @@ recalc_linger_target(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	enum calc_target_result ct_res;
 
-	ct_res = calc_target(osdc, &lreq->t, true);
+	ct_res = calc_target(osdc, &lreq->t, NULL, true);
 	if (ct_res == CALC_TARGET_NEED_RESEND) {
 		struct ceph_osd *osd;
 
@@ -3250,7 +3256,8 @@ static void scan_requests(struct ceph_osd *osd,
 		n = rb_next(n); /* unlink_request(), check_pool_dne() */
 
 		dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
-		ct_res = calc_target(osdc, &req->r_t, false);
+		ct_res = calc_target(osdc, &req->r_t, &req->r_osd->o_con,
+				     false);
 		switch (ct_res) {
 		case CALC_TARGET_NO_ACTION:
 			force_resend_writes = cleared_full ||
@@ -3359,7 +3366,7 @@ static void kick_requests(struct ceph_osd_client *osdc,
 		erase_request(need_resend, req); /* before link_request() */
 
 		WARN_ON(req->r_osd);
-		calc_target(osdc, &req->r_t, false);
+		calc_target(osdc, &req->r_t, NULL, false);
 		osd = lookup_create_osd(osdc, req->r_t.osd, true);
 		link_request(osd, req);
 		if (!req->r_linger) {
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index a4155620eace7..367879afed58b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1753,9 +1753,8 @@ void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
 	dest->primary = src->primary;
 }
 
-static bool is_split(const struct ceph_pg *pgid,
-		     u32 old_pg_num,
-		     u32 new_pg_num)
+bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
+		      u32 new_pg_num)
 {
 	int old_bits = calc_bits_of(old_pg_num);
 	int old_mask = (1 << old_bits) - 1;
@@ -1800,7 +1799,7 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 	       !osds_equal(old_up, new_up) ||
 	       old_size != new_size ||
 	       old_min_size != new_min_size ||
-	       is_split(pgid, old_pg_num, new_pg_num) ||
+	       ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
 	       old_sort_bitwise != new_sort_bitwise;
 }
 
-- 
GitLab


From a10bcb19ae02cea7d5e6650fbc2de3ced46b4e5d Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:55 +0200
Subject: [PATCH 0908/1958] libceph: delete from need_resend_linger before
 check_linger_pool_dne()

When processing a map update consisting of multiple incrementals, we
may end up running check_linger_pool_dne() on a lingering request that
was previously added to need_resend_linger list.  If it is concluded
that the target pool doesn't exist, the request is killed off while
still on need_resend_linger list, which leads to a crash on a NULL
lreq->osd in kick_requests():

    libceph: linger_id 18446462598732840961 pool does not exist
    BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
    IP: ceph_osdc_handle_map+0x4ae/0x870

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osd_client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 518dbac599d04..576101b635ef7 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -3243,6 +3243,7 @@ static void scan_requests(struct ceph_osd *osd,
 				list_add_tail(&lreq->scan_item, need_resend_linger);
 			break;
 		case CALC_TARGET_POOL_DNE:
+			list_del_init(&lreq->scan_item);
 			check_linger_pool_dne(lreq);
 			break;
 		}
-- 
GitLab


From 04c7d789e269c2b82bbd08106049a5a979cdb3fd Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:55 +0200
Subject: [PATCH 0909/1958] libceph: make sure need_resend targets reflect
 latest map

Otherwise we may miss events like PG splits, pool deletions, etc when
we get multiple incremental maps at once.  Because check_pool_dne() can
now be fed an unlinked request, finish_request() needed to be taught to
handle unlinked requests.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  1 +
 net/ceph/debugfs.c              |  2 +-
 net/ceph/osd_client.c           | 33 +++++++++++++++++++++++++--------
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index bca2718ac2537..62c672bcbb31a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -149,6 +149,7 @@ struct ceph_osd_request_target {
 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
 	bool paused;
 
+	u32 epoch;
 	u32 last_force_resend;
 
 	int osd;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 50ab1bdb16e22..c0089f8ccaeb9 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -166,7 +166,7 @@ static void dump_target(struct seq_file *s, struct ceph_osd_request_target *t)
 	seq_printf(s, "]/%d\t[", t->up.primary);
 	for (i = 0; i < t->acting.size; i++)
 		seq_printf(s, "%s%d", (!i ? "" : ","), t->acting.osds[i]);
-	seq_printf(s, "]/%d\t", t->acting.primary);
+	seq_printf(s, "]/%d\te%u\t", t->acting.primary, t->epoch);
 	if (t->target_oloc.pool_ns) {
 		seq_printf(s, "%*pE/%*pE\t0x%x",
 			(int)t->target_oloc.pool_ns->len,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 576101b635ef7..173ab9c68eb6c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -386,6 +386,7 @@ static void target_copy(struct ceph_osd_request_target *dest,
 	dest->flags = src->flags;
 	dest->paused = src->paused;
 
+	dest->epoch = src->epoch;
 	dest->last_force_resend = src->last_force_resend;
 
 	dest->osd = src->osd;
@@ -1334,6 +1335,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	enum calc_target_result ct_res;
 	int ret;
 
+	t->epoch = osdc->osdmap->epoch;
 	pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool);
 	if (!pi) {
 		t->osd = CEPH_HOMELESS_OSD;
@@ -1720,10 +1722,11 @@ static void send_request(struct ceph_osd_request *req)
 
 	encode_request_partial(req, req->r_request);
 
-	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d flags 0x%x attempt %d\n",
+	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d e%u flags 0x%x attempt %d\n",
 	     __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed,
 	     req->r_t.spgid.pgid.pool, req->r_t.spgid.pgid.seed,
-	     req->r_t.spgid.shard, osd->o_osd, req->r_flags, req->r_attempts);
+	     req->r_t.spgid.shard, osd->o_osd, req->r_t.epoch, req->r_flags,
+	     req->r_attempts);
 
 	req->r_t.paused = false;
 	req->r_stamp = jiffies;
@@ -1863,13 +1866,12 @@ static void submit_request(struct ceph_osd_request *req, bool wrlocked)
 static void finish_request(struct ceph_osd_request *req)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
-	struct ceph_osd *osd = req->r_osd;
 
-	verify_osd_locked(osd);
+	WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
 	dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
 
-	WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
-	unlink_request(osd, req);
+	if (req->r_osd)
+		unlink_request(req->r_osd, req);
 	atomic_dec(&osdc->num_requests);
 
 	/*
@@ -3356,8 +3358,25 @@ static void kick_requests(struct ceph_osd_client *osdc,
 			  struct list_head *need_resend_linger)
 {
 	struct ceph_osd_linger_request *lreq, *nlreq;
+	enum calc_target_result ct_res;
 	struct rb_node *n;
 
+	/* make sure need_resend targets reflect latest map */
+	for (n = rb_first(need_resend); n; ) {
+		struct ceph_osd_request *req =
+		    rb_entry(n, struct ceph_osd_request, r_node);
+
+		n = rb_next(n);
+
+		if (req->r_t.epoch < osdc->osdmap->epoch) {
+			ct_res = calc_target(osdc, &req->r_t, NULL, false);
+			if (ct_res == CALC_TARGET_POOL_DNE) {
+				erase_request(need_resend, req);
+				check_pool_dne(req);
+			}
+		}
+	}
+
 	for (n = rb_first(need_resend); n; ) {
 		struct ceph_osd_request *req =
 		    rb_entry(n, struct ceph_osd_request, r_node);
@@ -3366,8 +3385,6 @@ static void kick_requests(struct ceph_osd_client *osdc,
 		n = rb_next(n);
 		erase_request(need_resend, req); /* before link_request() */
 
-		WARN_ON(req->r_osd);
-		calc_target(osdc, &req->r_t, NULL, false);
 		osd = lookup_create_osd(osdc, req->r_t.osd, true);
 		link_request(osd, req);
 		if (!req->r_linger) {
-- 
GitLab


From db098ec4e41a6ad8f5b248b1287bb583754e944e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:55 +0200
Subject: [PATCH 0910/1958] libceph: always populate t->target_{oid,oloc} in
 calc_target()

need_check_tiering logic doesn't make a whole lot of sense.  Drop it
and apply tiering unconditionally on every calc_target() call instead.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osd_client.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 173ab9c68eb6c..c050e28d3c97f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1330,7 +1330,6 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	bool unpaused = false;
 	bool legacy_change;
 	bool split = false;
-	bool need_check_tiering = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
 	enum calc_target_result ct_res;
 	int ret;
@@ -1351,17 +1350,11 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 			force_resend = true;
 		}
 	}
-	if (ceph_oid_empty(&t->target_oid) || force_resend) {
-		ceph_oid_copy(&t->target_oid, &t->base_oid);
-		need_check_tiering = true;
-	}
-	if (ceph_oloc_empty(&t->target_oloc) || force_resend) {
-		ceph_oloc_copy(&t->target_oloc, &t->base_oloc);
-		need_check_tiering = true;
-	}
 
-	if (need_check_tiering &&
-	    (t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
+	/* apply tiering */
+	ceph_oid_copy(&t->target_oid, &t->base_oid);
+	ceph_oloc_copy(&t->target_oloc, &t->base_oloc);
+	if ((t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
 		if (t->flags & CEPH_OSD_FLAG_READ && pi->read_tier >= 0)
 			t->target_oloc.pool = pi->read_tier;
 		if (t->flags & CEPH_OSD_FLAG_WRITE && pi->write_tier >= 0)
-- 
GitLab


From 6d637a540e6f87b9c926d45a507ff143640517cf Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:55 +0200
Subject: [PATCH 0911/1958] libceph: use target pi for calc_target()
 calculations

For luminous and beyond we are encoding the actual spgid, which
requires operating with the correct pg_num, i.e. that of the target
pool.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osd_client.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index c050e28d3c97f..ce0055dc75448 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1306,7 +1306,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
 		       ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
 		       __pool_full(pi);
 
-	WARN_ON(pi->id != t->base_oloc.pool);
+	WARN_ON(pi->id != t->target_oloc.pool);
 	return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
 	       ((t->flags & CEPH_OSD_FLAG_WRITE) && pausewr) ||
 	       (osdc->osdmap->epoch < osdc->epoch_barrier);
@@ -1359,6 +1359,13 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 			t->target_oloc.pool = pi->read_tier;
 		if (t->flags & CEPH_OSD_FLAG_WRITE && pi->write_tier >= 0)
 			t->target_oloc.pool = pi->write_tier;
+
+		pi = ceph_pg_pool_by_id(osdc->osdmap, t->target_oloc.pool);
+		if (!pi) {
+			t->osd = CEPH_HOMELESS_OSD;
+			ct_res = CALC_TARGET_POOL_DNE;
+			goto out;
+		}
 	}
 
 	ret = ceph_object_locator_to_pg(osdc->osdmap, &t->target_oid,
-- 
GitLab


From df28152d53b449a72258000f592472215fc9371e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 15 Jun 2017 16:30:56 +0200
Subject: [PATCH 0912/1958] libceph: avoid unnecessary pi lookups in
 calc_target()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h | 10 +++++--
 net/ceph/osd_client.c       |  8 +++---
 net/ceph/osdmap.c           | 54 ++++++++++++++++++++-----------------
 3 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 63fb073a3355c..060d059acbf89 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -273,16 +273,22 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 					 u64 off, u64 len,
 					 u64 *bno, u64 *oxoff, u64 *oxlen);
 
+int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
+				const struct ceph_object_id *oid,
+				const struct ceph_object_locator *oloc,
+				struct ceph_pg *raw_pgid);
 int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
-			      struct ceph_object_id *oid,
-			      struct ceph_object_locator *oloc,
+			      const struct ceph_object_id *oid,
+			      const struct ceph_object_locator *oloc,
 			      struct ceph_pg *raw_pgid);
 
 void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
+			       struct ceph_pg_pool_info *pi,
 			       const struct ceph_pg *raw_pgid,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting);
 bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      struct ceph_pg_pool_info *pi,
 			      const struct ceph_pg *raw_pgid,
 			      struct ceph_spg *spgid);
 int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ce0055dc75448..620aa43a6a0a9 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1368,8 +1368,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		}
 	}
 
-	ret = ceph_object_locator_to_pg(osdc->osdmap, &t->target_oid,
-					&t->target_oloc, &pgid);
+	ret = __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc,
+					  &pgid);
 	if (ret) {
 		WARN_ON(ret != -ENOENT);
 		t->osd = CEPH_HOMELESS_OSD;
@@ -1379,7 +1379,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	last_pgid.pool = pgid.pool;
 	last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask);
 
-	ceph_pg_to_up_acting_osds(osdc->osdmap, &pgid, &up, &acting);
+	ceph_pg_to_up_acting_osds(osdc->osdmap, pi, &pgid, &up, &acting);
 	if (any_change &&
 	    ceph_is_new_interval(&t->acting,
 				 &acting,
@@ -1407,7 +1407,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 
 	if (legacy_change || force_resend || split) {
 		t->pgid = pgid; /* struct */
-		ceph_pg_to_primary_shard(osdc->osdmap, &pgid, &t->spgid);
+		ceph_pg_to_primary_shard(osdc->osdmap, pi, &pgid, &t->spgid);
 		ceph_osds_copy(&t->acting, &acting);
 		ceph_osds_copy(&t->up, &up);
 		t->size = pi->size;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 367879afed58b..1e2e190a4c2ab 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1923,16 +1923,12 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
  * Should only be called with target_oid and target_oloc (as opposed to
  * base_oid and base_oloc), since tiering isn't taken into account.
  */
-int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
-			      struct ceph_object_id *oid,
-			      struct ceph_object_locator *oloc,
-			      struct ceph_pg *raw_pgid)
+int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
+				const struct ceph_object_id *oid,
+				const struct ceph_object_locator *oloc,
+				struct ceph_pg *raw_pgid)
 {
-	struct ceph_pg_pool_info *pi;
-
-	pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
-	if (!pi)
-		return -ENOENT;
+	WARN_ON(pi->id != oloc->pool);
 
 	if (!oloc->pool_ns) {
 		raw_pgid->pool = oloc->pool;
@@ -1964,6 +1960,20 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
 	}
 	return 0;
 }
+
+int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
+			      const struct ceph_object_id *oid,
+			      const struct ceph_object_locator *oloc,
+			      struct ceph_pg *raw_pgid)
+{
+	struct ceph_pg_pool_info *pi;
+
+	pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
+	if (!pi)
+		return -ENOENT;
+
+	return __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
+}
 EXPORT_SYMBOL(ceph_object_locator_to_pg);
 
 /*
@@ -2236,19 +2246,14 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
  * resend a request.
  */
 void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
+			       struct ceph_pg_pool_info *pi,
 			       const struct ceph_pg *raw_pgid,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting)
 {
-	struct ceph_pg_pool_info *pi;
 	u32 pps;
 
-	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
-	if (!pi) {
-		ceph_osds_init(up);
-		ceph_osds_init(acting);
-		goto out;
-	}
+	WARN_ON(pi->id != raw_pgid->pool);
 
 	pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
 	raw_to_up_osds(osdmap, pi, up);
@@ -2260,23 +2265,19 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
 		if (acting->primary == -1)
 			acting->primary = up->primary;
 	}
-out:
 	WARN_ON(!osds_valid(up) || !osds_valid(acting));
 }
 
 bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      struct ceph_pg_pool_info *pi,
 			      const struct ceph_pg *raw_pgid,
 			      struct ceph_spg *spgid)
 {
-	struct ceph_pg_pool_info *pi;
 	struct ceph_pg pgid;
 	struct ceph_osds up, acting;
 	int i;
 
-	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
-	if (!pi)
-		return false;
-
+	WARN_ON(pi->id != raw_pgid->pool);
 	raw_pg_to_pg(pi, raw_pgid, &pgid);
 
 	if (ceph_can_shift_osds(pi)) {
@@ -2285,7 +2286,7 @@ bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
 		return true;
 	}
 
-	ceph_pg_to_up_acting_osds(osdmap, &pgid, &up, &acting);
+	ceph_pg_to_up_acting_osds(osdmap, pi, &pgid, &up, &acting);
 	for (i = 0; i < acting.size; i++) {
 		if (acting.osds[i] == acting.primary) {
 			spgid->pgid = pgid; /* struct */
@@ -2303,9 +2304,14 @@ bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
 int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
 			      const struct ceph_pg *raw_pgid)
 {
+	struct ceph_pg_pool_info *pi;
 	struct ceph_osds up, acting;
 
-	ceph_pg_to_up_acting_osds(osdmap, raw_pgid, &up, &acting);
+	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
+	if (!pi)
+		return -1;
+
+	ceph_pg_to_up_acting_osds(osdmap, pi, raw_pgid, &up, &acting);
 	return acting.primary;
 }
 EXPORT_SYMBOL(ceph_pg_to_acting_primary);
-- 
GitLab


From 76f827a7b1faaaebc53f89d184e95ea3a0b8dd71 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 19 Jun 2017 12:18:05 +0200
Subject: [PATCH 0913/1958] libceph: make DEFINE_RB_* helpers more general

Initially for ceph_pg_mapping, ceph_spg_mapping and ceph_hobject_id,
compared with ceph_pg_compare(), ceph_spg_compare() and hoid_compare()
respectively.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h | 49 +++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 3229ae6c78469..8a79587e1317e 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -184,10 +184,11 @@ static inline int calc_pages_for(u64 off, u64 len)
 		(off >> PAGE_SHIFT);
 }
 
-/*
- * These are not meant to be generic - an integer key is assumed.
- */
-#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)		\
+#define RB_BYVAL(a)      (a)
+#define RB_BYPTR(a)      (&(a))
+#define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b))
+
+#define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
 static void insert_##name(struct rb_root *root, type *t)		\
 {									\
 	struct rb_node **n = &root->rb_node;				\
@@ -197,11 +198,13 @@ static void insert_##name(struct rb_root *root, type *t)		\
 									\
 	while (*n) {							\
 		type *cur = rb_entry(*n, type, nodefld);		\
+		int cmp;						\
 									\
 		parent = *n;						\
-		if (t->keyfld < cur->keyfld)				\
+		cmp = cmpexp(keyexp(t->keyfld), keyexp(cur->keyfld));	\
+		if (cmp < 0)						\
 			n = &(*n)->rb_left;				\
-		else if (t->keyfld > cur->keyfld)			\
+		else if (cmp > 0)					\
 			n = &(*n)->rb_right;				\
 		else							\
 			BUG();						\
@@ -217,19 +220,24 @@ static void erase_##name(struct rb_root *root, type *t)			\
 	RB_CLEAR_NODE(&t->nodefld);					\
 }
 
-#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)		\
-extern type __lookup_##name##_key;					\
-static type *lookup_##name(struct rb_root *root,			\
-			   typeof(__lookup_##name##_key.keyfld) key)	\
+/*
+ * @lookup_param_type is a parameter and not constructed from (@type,
+ * @keyfld) with typeof() because adding const is too unwieldy.
+ */
+#define DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp,	\
+			       lookup_param_type, nodefld)		\
+static type *lookup_##name(struct rb_root *root, lookup_param_type key)	\
 {									\
 	struct rb_node *n = root->rb_node;				\
 									\
 	while (n) {							\
 		type *cur = rb_entry(n, type, nodefld);			\
+		int cmp;						\
 									\
-		if (key < cur->keyfld)					\
+		cmp = cmpexp(key, keyexp(cur->keyfld));			\
+		if (cmp < 0)						\
 			n = n->rb_left;					\
-		else if (key > cur->keyfld)				\
+		else if (cmp > 0)					\
 			n = n->rb_right;				\
 		else							\
 			return cur;					\
@@ -238,6 +246,23 @@ static type *lookup_##name(struct rb_root *root,			\
 	return NULL;							\
 }
 
+#define DEFINE_RB_FUNCS2(name, type, keyfld, cmpexp, keyexp,		\
+			 lookup_param_type, nodefld)			\
+DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld)	\
+DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp,		\
+		       lookup_param_type, nodefld)
+
+/*
+ * Shorthands for integer keys.
+ */
+#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)		\
+DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, nodefld)
+
+#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)		\
+extern type __lookup_##name##_key;					\
+DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL,	\
+		       typeof(__lookup_##name##_key.keyfld), nodefld)
+
 #define DEFINE_RB_FUNCS(name, type, keyfld, nodefld)			\
 DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)			\
 DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)
-- 
GitLab


From a02a946dfe9633d7e0202359836f6b5217a62824 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 19 Jun 2017 12:18:05 +0200
Subject: [PATCH 0914/1958] libceph: respect RADOS_BACKOFF backoffs

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_fs.h    |   1 +
 include/linux/ceph/osd_client.h |  45 +++
 include/linux/ceph/osdmap.h     |   1 +
 include/linux/ceph/rados.h      |   6 +
 net/ceph/ceph_common.c          |   1 +
 net/ceph/debugfs.c              |  74 ++++
 net/ceph/osd_client.c           | 593 ++++++++++++++++++++++++++++++++
 net/ceph/osdmap.c               |  16 +
 8 files changed, 737 insertions(+)

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index ad078ebe25d6b..edf5b04b918a2 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -147,6 +147,7 @@ struct ceph_dir_layout {
 #define CEPH_MSG_OSD_OP                 42
 #define CEPH_MSG_OSD_OPREPLY            43
 #define CEPH_MSG_WATCH_NOTIFY           44
+#define CEPH_MSG_OSD_BACKOFF            61
 
 
 /* watch-notify operations */
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 62c672bcbb31a..c6d96a5f46fd6 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -1,6 +1,7 @@
 #ifndef _FS_CEPH_OSD_CLIENT_H
 #define _FS_CEPH_OSD_CLIENT_H
 
+#include <linux/bitrev.h>
 #include <linux/completion.h>
 #include <linux/kref.h>
 #include <linux/mempool.h>
@@ -36,6 +37,8 @@ struct ceph_osd {
 	struct ceph_connection o_con;
 	struct rb_root o_requests;
 	struct rb_root o_linger_requests;
+	struct rb_root o_backoff_mappings;
+	struct rb_root o_backoffs_by_id;
 	struct list_head o_osd_lru;
 	struct ceph_auth_handshake o_auth;
 	unsigned long lru_ttl;
@@ -275,6 +278,48 @@ struct ceph_watch_item {
 	struct ceph_entity_addr addr;
 };
 
+struct ceph_spg_mapping {
+	struct rb_node node;
+	struct ceph_spg spgid;
+
+	struct rb_root backoffs;
+};
+
+struct ceph_hobject_id {
+	void *key;
+	size_t key_len;
+	void *oid;
+	size_t oid_len;
+	u64 snapid;
+	u32 hash;
+	u8 is_max;
+	void *nspace;
+	size_t nspace_len;
+	s64 pool;
+
+	/* cache */
+	u32 hash_reverse_bits;
+};
+
+static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
+{
+	hoid->hash_reverse_bits = bitrev32(hoid->hash);
+}
+
+/*
+ * PG-wide backoff: [begin, end)
+ * per-object backoff: begin == end
+ */
+struct ceph_osd_backoff {
+	struct rb_node spg_node;
+	struct rb_node id_node;
+
+	struct ceph_spg spgid;
+	u64 id;
+	struct ceph_hobject_id *begin;
+	struct ceph_hobject_id *end;
+};
+
 #define CEPH_LINGER_ID_START	0xffff000000000000ULL
 
 struct ceph_osd_client {
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 060d059acbf89..fe6d189bdd307 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -32,6 +32,7 @@ struct ceph_spg {
 };
 
 int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
+int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
 
 #define CEPH_POOL_FLAG_HASHPSPOOL	(1ULL << 0) /* hash pg seed and pool id
 						       together */
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 5d0018782d504..385db08bb8b2d 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -439,6 +439,12 @@ enum {
 
 const char *ceph_osd_watch_op_name(int o);
 
+enum {
+	CEPH_OSD_BACKOFF_OP_BLOCK = 1,
+	CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2,
+	CEPH_OSD_BACKOFF_OP_UNBLOCK = 3,
+};
+
 /*
  * an individual object operation.  each may be accompanied by some data
  * payload
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 47e94b560ba0d..3d265c5cb6d0b 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -85,6 +85,7 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_OSD_OP: return "osd_op";
 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
 	case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
+	case CEPH_MSG_OSD_BACKOFF: return "osd_backoff";
 	default: return "unknown";
 	}
 }
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index c0089f8ccaeb9..017f15c575f80 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -243,6 +243,73 @@ static void dump_linger_requests(struct seq_file *s, struct ceph_osd *osd)
 	mutex_unlock(&osd->lock);
 }
 
+static void dump_snapid(struct seq_file *s, u64 snapid)
+{
+	if (snapid == CEPH_NOSNAP)
+		seq_puts(s, "head");
+	else if (snapid == CEPH_SNAPDIR)
+		seq_puts(s, "snapdir");
+	else
+		seq_printf(s, "%llx", snapid);
+}
+
+static void dump_name_escaped(struct seq_file *s, unsigned char *name,
+			      size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (name[i] == '%' || name[i] == ':' || name[i] == '/' ||
+		    name[i] < 32 || name[i] >= 127) {
+			seq_printf(s, "%%%02x", name[i]);
+		} else {
+			seq_putc(s, name[i]);
+		}
+	}
+}
+
+static void dump_hoid(struct seq_file *s, const struct ceph_hobject_id *hoid)
+{
+	if (hoid->snapid == 0 && hoid->hash == 0 && !hoid->is_max &&
+	    hoid->pool == S64_MIN) {
+		seq_puts(s, "MIN");
+		return;
+	}
+	if (hoid->is_max) {
+		seq_puts(s, "MAX");
+		return;
+	}
+	seq_printf(s, "%lld:%08x:", hoid->pool, hoid->hash_reverse_bits);
+	dump_name_escaped(s, hoid->nspace, hoid->nspace_len);
+	seq_putc(s, ':');
+	dump_name_escaped(s, hoid->key, hoid->key_len);
+	seq_putc(s, ':');
+	dump_name_escaped(s, hoid->oid, hoid->oid_len);
+	seq_putc(s, ':');
+	dump_snapid(s, hoid->snapid);
+}
+
+static void dump_backoffs(struct seq_file *s, struct ceph_osd *osd)
+{
+	struct rb_node *n;
+
+	mutex_lock(&osd->lock);
+	for (n = rb_first(&osd->o_backoffs_by_id); n; n = rb_next(n)) {
+		struct ceph_osd_backoff *backoff =
+		    rb_entry(n, struct ceph_osd_backoff, id_node);
+
+		seq_printf(s, "osd%d\t", osd->o_osd);
+		dump_spgid(s, &backoff->spgid);
+		seq_printf(s, "\t%llu\t", backoff->id);
+		dump_hoid(s, backoff->begin);
+		seq_putc(s, '\t');
+		dump_hoid(s, backoff->end);
+		seq_putc(s, '\n');
+	}
+
+	mutex_unlock(&osd->lock);
+}
+
 static int osdc_show(struct seq_file *s, void *pp)
 {
 	struct ceph_client *client = s->private;
@@ -268,6 +335,13 @@ static int osdc_show(struct seq_file *s, void *pp)
 	}
 	dump_linger_requests(s, &osdc->homeless_osd);
 
+	seq_puts(s, "BACKOFFS\n");
+	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+
+		dump_backoffs(s, osd);
+	}
+
 	up_read(&osdc->lock);
 	return 0;
 }
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 620aa43a6a0a9..86a9737d8e3ff 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -50,6 +50,7 @@ static void link_linger(struct ceph_osd *osd,
 			struct ceph_osd_linger_request *lreq);
 static void unlink_linger(struct ceph_osd *osd,
 			  struct ceph_osd_linger_request *lreq);
+static void clear_backoffs(struct ceph_osd *osd);
 
 #if 1
 static inline bool rwsem_is_wrlocked(struct rw_semaphore *sem)
@@ -1019,6 +1020,8 @@ static void osd_init(struct ceph_osd *osd)
 	RB_CLEAR_NODE(&osd->o_node);
 	osd->o_requests = RB_ROOT;
 	osd->o_linger_requests = RB_ROOT;
+	osd->o_backoff_mappings = RB_ROOT;
+	osd->o_backoffs_by_id = RB_ROOT;
 	INIT_LIST_HEAD(&osd->o_osd_lru);
 	INIT_LIST_HEAD(&osd->o_keepalive_item);
 	osd->o_incarnation = 1;
@@ -1030,6 +1033,8 @@ static void osd_cleanup(struct ceph_osd *osd)
 	WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
 	WARN_ON(!RB_EMPTY_ROOT(&osd->o_requests));
 	WARN_ON(!RB_EMPTY_ROOT(&osd->o_linger_requests));
+	WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoff_mappings));
+	WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoffs_by_id));
 	WARN_ON(!list_empty(&osd->o_osd_lru));
 	WARN_ON(!list_empty(&osd->o_keepalive_item));
 
@@ -1150,6 +1155,7 @@ static void close_osd(struct ceph_osd *osd)
 		unlink_linger(osd, lreq);
 		link_linger(&osdc->homeless_osd, lreq);
 	}
+	clear_backoffs(osd);
 
 	__remove_osd_from_lru(osd);
 	erase_osd(&osdc->osds, osd);
@@ -1431,6 +1437,328 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	return ct_res;
 }
 
+static struct ceph_spg_mapping *alloc_spg_mapping(void)
+{
+	struct ceph_spg_mapping *spg;
+
+	spg = kmalloc(sizeof(*spg), GFP_NOIO);
+	if (!spg)
+		return NULL;
+
+	RB_CLEAR_NODE(&spg->node);
+	spg->backoffs = RB_ROOT;
+	return spg;
+}
+
+static void free_spg_mapping(struct ceph_spg_mapping *spg)
+{
+	WARN_ON(!RB_EMPTY_NODE(&spg->node));
+	WARN_ON(!RB_EMPTY_ROOT(&spg->backoffs));
+
+	kfree(spg);
+}
+
+/*
+ * rbtree of ceph_spg_mapping for handling map<spg_t, ...>, similar to
+ * ceph_pg_mapping.  Used to track OSD backoffs -- a backoff [range] is
+ * defined only within a specific spgid; it does not pass anything to
+ * children on split, or to another primary.
+ */
+DEFINE_RB_FUNCS2(spg_mapping, struct ceph_spg_mapping, spgid, ceph_spg_compare,
+		 RB_BYPTR, const struct ceph_spg *, node)
+
+static u64 hoid_get_bitwise_key(const struct ceph_hobject_id *hoid)
+{
+	return hoid->is_max ? 0x100000000ull : hoid->hash_reverse_bits;
+}
+
+static void hoid_get_effective_key(const struct ceph_hobject_id *hoid,
+				   void **pkey, size_t *pkey_len)
+{
+	if (hoid->key_len) {
+		*pkey = hoid->key;
+		*pkey_len = hoid->key_len;
+	} else {
+		*pkey = hoid->oid;
+		*pkey_len = hoid->oid_len;
+	}
+}
+
+static int compare_names(const void *name1, size_t name1_len,
+			 const void *name2, size_t name2_len)
+{
+	int ret;
+
+	ret = memcmp(name1, name2, min(name1_len, name2_len));
+	if (!ret) {
+		if (name1_len < name2_len)
+			ret = -1;
+		else if (name1_len > name2_len)
+			ret = 1;
+	}
+	return ret;
+}
+
+static int hoid_compare(const struct ceph_hobject_id *lhs,
+			const struct ceph_hobject_id *rhs)
+{
+	void *effective_key1, *effective_key2;
+	size_t effective_key1_len, effective_key2_len;
+	int ret;
+
+	if (lhs->is_max < rhs->is_max)
+		return -1;
+	if (lhs->is_max > rhs->is_max)
+		return 1;
+
+	if (lhs->pool < rhs->pool)
+		return -1;
+	if (lhs->pool > rhs->pool)
+		return 1;
+
+	if (hoid_get_bitwise_key(lhs) < hoid_get_bitwise_key(rhs))
+		return -1;
+	if (hoid_get_bitwise_key(lhs) > hoid_get_bitwise_key(rhs))
+		return 1;
+
+	ret = compare_names(lhs->nspace, lhs->nspace_len,
+			    rhs->nspace, rhs->nspace_len);
+	if (ret)
+		return ret;
+
+	hoid_get_effective_key(lhs, &effective_key1, &effective_key1_len);
+	hoid_get_effective_key(rhs, &effective_key2, &effective_key2_len);
+	ret = compare_names(effective_key1, effective_key1_len,
+			    effective_key2, effective_key2_len);
+	if (ret)
+		return ret;
+
+	ret = compare_names(lhs->oid, lhs->oid_len, rhs->oid, rhs->oid_len);
+	if (ret)
+		return ret;
+
+	if (lhs->snapid < rhs->snapid)
+		return -1;
+	if (lhs->snapid > rhs->snapid)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * For decoding ->begin and ->end of MOSDBackoff only -- no MIN/MAX
+ * compat stuff here.
+ *
+ * Assumes @hoid is zero-initialized.
+ */
+static int decode_hoid(void **p, void *end, struct ceph_hobject_id *hoid)
+{
+	u8 struct_v;
+	u32 struct_len;
+	int ret;
+
+	ret = ceph_start_decoding(p, end, 4, "hobject_t", &struct_v,
+				  &struct_len);
+	if (ret)
+		return ret;
+
+	if (struct_v < 4) {
+		pr_err("got struct_v %d < 4 of hobject_t\n", struct_v);
+		goto e_inval;
+	}
+
+	hoid->key = ceph_extract_encoded_string(p, end, &hoid->key_len,
+						GFP_NOIO);
+	if (IS_ERR(hoid->key)) {
+		ret = PTR_ERR(hoid->key);
+		hoid->key = NULL;
+		return ret;
+	}
+
+	hoid->oid = ceph_extract_encoded_string(p, end, &hoid->oid_len,
+						GFP_NOIO);
+	if (IS_ERR(hoid->oid)) {
+		ret = PTR_ERR(hoid->oid);
+		hoid->oid = NULL;
+		return ret;
+	}
+
+	ceph_decode_64_safe(p, end, hoid->snapid, e_inval);
+	ceph_decode_32_safe(p, end, hoid->hash, e_inval);
+	ceph_decode_8_safe(p, end, hoid->is_max, e_inval);
+
+	hoid->nspace = ceph_extract_encoded_string(p, end, &hoid->nspace_len,
+						   GFP_NOIO);
+	if (IS_ERR(hoid->nspace)) {
+		ret = PTR_ERR(hoid->nspace);
+		hoid->nspace = NULL;
+		return ret;
+	}
+
+	ceph_decode_64_safe(p, end, hoid->pool, e_inval);
+
+	ceph_hoid_build_hash_cache(hoid);
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int hoid_encoding_size(const struct ceph_hobject_id *hoid)
+{
+	return 8 + 4 + 1 + 8 + /* snapid, hash, is_max, pool */
+	       4 + hoid->key_len + 4 + hoid->oid_len + 4 + hoid->nspace_len;
+}
+
+static void encode_hoid(void **p, void *end, const struct ceph_hobject_id *hoid)
+{
+	ceph_start_encoding(p, 4, 3, hoid_encoding_size(hoid));
+	ceph_encode_string(p, end, hoid->key, hoid->key_len);
+	ceph_encode_string(p, end, hoid->oid, hoid->oid_len);
+	ceph_encode_64(p, hoid->snapid);
+	ceph_encode_32(p, hoid->hash);
+	ceph_encode_8(p, hoid->is_max);
+	ceph_encode_string(p, end, hoid->nspace, hoid->nspace_len);
+	ceph_encode_64(p, hoid->pool);
+}
+
+static void free_hoid(struct ceph_hobject_id *hoid)
+{
+	if (hoid) {
+		kfree(hoid->key);
+		kfree(hoid->oid);
+		kfree(hoid->nspace);
+		kfree(hoid);
+	}
+}
+
+static struct ceph_osd_backoff *alloc_backoff(void)
+{
+	struct ceph_osd_backoff *backoff;
+
+	backoff = kzalloc(sizeof(*backoff), GFP_NOIO);
+	if (!backoff)
+		return NULL;
+
+	RB_CLEAR_NODE(&backoff->spg_node);
+	RB_CLEAR_NODE(&backoff->id_node);
+	return backoff;
+}
+
+static void free_backoff(struct ceph_osd_backoff *backoff)
+{
+	WARN_ON(!RB_EMPTY_NODE(&backoff->spg_node));
+	WARN_ON(!RB_EMPTY_NODE(&backoff->id_node));
+
+	free_hoid(backoff->begin);
+	free_hoid(backoff->end);
+	kfree(backoff);
+}
+
+/*
+ * Within a specific spgid, backoffs are managed by ->begin hoid.
+ */
+DEFINE_RB_INSDEL_FUNCS2(backoff, struct ceph_osd_backoff, begin, hoid_compare,
+			RB_BYVAL, spg_node);
+
+static struct ceph_osd_backoff *lookup_containing_backoff(struct rb_root *root,
+					    const struct ceph_hobject_id *hoid)
+{
+	struct rb_node *n = root->rb_node;
+
+	while (n) {
+		struct ceph_osd_backoff *cur =
+		    rb_entry(n, struct ceph_osd_backoff, spg_node);
+		int cmp;
+
+		cmp = hoid_compare(hoid, cur->begin);
+		if (cmp < 0) {
+			n = n->rb_left;
+		} else if (cmp > 0) {
+			if (hoid_compare(hoid, cur->end) < 0)
+				return cur;
+
+			n = n->rb_right;
+		} else {
+			return cur;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Each backoff has a unique id within its OSD session.
+ */
+DEFINE_RB_FUNCS(backoff_by_id, struct ceph_osd_backoff, id, id_node)
+
+static void clear_backoffs(struct ceph_osd *osd)
+{
+	while (!RB_EMPTY_ROOT(&osd->o_backoff_mappings)) {
+		struct ceph_spg_mapping *spg =
+		    rb_entry(rb_first(&osd->o_backoff_mappings),
+			     struct ceph_spg_mapping, node);
+
+		while (!RB_EMPTY_ROOT(&spg->backoffs)) {
+			struct ceph_osd_backoff *backoff =
+			    rb_entry(rb_first(&spg->backoffs),
+				     struct ceph_osd_backoff, spg_node);
+
+			erase_backoff(&spg->backoffs, backoff);
+			erase_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+			free_backoff(backoff);
+		}
+		erase_spg_mapping(&osd->o_backoff_mappings, spg);
+		free_spg_mapping(spg);
+	}
+}
+
+/*
+ * Set up a temporary, non-owning view into @t.
+ */
+static void hoid_fill_from_target(struct ceph_hobject_id *hoid,
+				  const struct ceph_osd_request_target *t)
+{
+	hoid->key = NULL;
+	hoid->key_len = 0;
+	hoid->oid = t->target_oid.name;
+	hoid->oid_len = t->target_oid.name_len;
+	hoid->snapid = CEPH_NOSNAP;
+	hoid->hash = t->pgid.seed;
+	hoid->is_max = false;
+	if (t->target_oloc.pool_ns) {
+		hoid->nspace = t->target_oloc.pool_ns->str;
+		hoid->nspace_len = t->target_oloc.pool_ns->len;
+	} else {
+		hoid->nspace = NULL;
+		hoid->nspace_len = 0;
+	}
+	hoid->pool = t->target_oloc.pool;
+	ceph_hoid_build_hash_cache(hoid);
+}
+
+static bool should_plug_request(struct ceph_osd_request *req)
+{
+	struct ceph_osd *osd = req->r_osd;
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct ceph_hobject_id hoid;
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &req->r_t.spgid);
+	if (!spg)
+		return false;
+
+	hoid_fill_from_target(&hoid, &req->r_t);
+	backoff = lookup_containing_backoff(&spg->backoffs, &hoid);
+	if (!backoff)
+		return false;
+
+	dout("%s req %p tid %llu backoff osd%d spgid %llu.%xs%d id %llu\n",
+	     __func__, req, req->r_tid, osd->o_osd, backoff->spgid.pgid.pool,
+	     backoff->spgid.pgid.seed, backoff->spgid.shard, backoff->id);
+	return true;
+}
+
 static void setup_request_data(struct ceph_osd_request *req,
 			       struct ceph_msg *msg)
 {
@@ -1707,6 +2035,10 @@ static void send_request(struct ceph_osd_request *req)
 	verify_osd_locked(osd);
 	WARN_ON(osd->o_osd != req->r_t.osd);
 
+	/* backoff? */
+	if (should_plug_request(req))
+		return;
+
 	/*
 	 * We may have a previously queued request message hanging
 	 * around.  Cancel it to avoid corrupting the msgr.
@@ -3527,6 +3859,8 @@ static void kick_osd_requests(struct ceph_osd *osd)
 {
 	struct rb_node *n;
 
+	clear_backoffs(osd);
+
 	for (n = rb_first(&osd->o_requests); n; ) {
 		struct ceph_osd_request *req =
 		    rb_entry(n, struct ceph_osd_request, r_node);
@@ -3572,6 +3906,261 @@ static void osd_fault(struct ceph_connection *con)
 	up_write(&osdc->lock);
 }
 
+struct MOSDBackoff {
+	struct ceph_spg spgid;
+	u32 map_epoch;
+	u8 op;
+	u64 id;
+	struct ceph_hobject_id *begin;
+	struct ceph_hobject_id *end;
+};
+
+static int decode_MOSDBackoff(const struct ceph_msg *msg, struct MOSDBackoff *m)
+{
+	void *p = msg->front.iov_base;
+	void *const end = p + msg->front.iov_len;
+	u8 struct_v;
+	u32 struct_len;
+	int ret;
+
+	ret = ceph_start_decoding(&p, end, 1, "spg_t", &struct_v, &struct_len);
+	if (ret)
+		return ret;
+
+	ret = ceph_decode_pgid(&p, end, &m->spgid.pgid);
+	if (ret)
+		return ret;
+
+	ceph_decode_8_safe(&p, end, m->spgid.shard, e_inval);
+	ceph_decode_32_safe(&p, end, m->map_epoch, e_inval);
+	ceph_decode_8_safe(&p, end, m->op, e_inval);
+	ceph_decode_64_safe(&p, end, m->id, e_inval);
+
+	m->begin = kzalloc(sizeof(*m->begin), GFP_NOIO);
+	if (!m->begin)
+		return -ENOMEM;
+
+	ret = decode_hoid(&p, end, m->begin);
+	if (ret) {
+		free_hoid(m->begin);
+		return ret;
+	}
+
+	m->end = kzalloc(sizeof(*m->end), GFP_NOIO);
+	if (!m->end) {
+		free_hoid(m->begin);
+		return -ENOMEM;
+	}
+
+	ret = decode_hoid(&p, end, m->end);
+	if (ret) {
+		free_hoid(m->begin);
+		free_hoid(m->end);
+		return ret;
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static struct ceph_msg *create_backoff_message(
+				const struct ceph_osd_backoff *backoff,
+				u32 map_epoch)
+{
+	struct ceph_msg *msg;
+	void *p, *end;
+	int msg_size;
+
+	msg_size = CEPH_ENCODING_START_BLK_LEN +
+			CEPH_PGID_ENCODING_LEN + 1; /* spgid */
+	msg_size += 4 + 1 + 8; /* map_epoch, op, id */
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			hoid_encoding_size(backoff->begin);
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			hoid_encoding_size(backoff->end);
+
+	msg = ceph_msg_new(CEPH_MSG_OSD_BACKOFF, msg_size, GFP_NOIO, true);
+	if (!msg)
+		return NULL;
+
+	p = msg->front.iov_base;
+	end = p + msg->front_alloc_len;
+
+	encode_spgid(&p, &backoff->spgid);
+	ceph_encode_32(&p, map_epoch);
+	ceph_encode_8(&p, CEPH_OSD_BACKOFF_OP_ACK_BLOCK);
+	ceph_encode_64(&p, backoff->id);
+	encode_hoid(&p, end, backoff->begin);
+	encode_hoid(&p, end, backoff->end);
+	BUG_ON(p != end);
+
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.version = cpu_to_le16(1); /* MOSDBackoff v1 */
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
+	return msg;
+}
+
+static void handle_backoff_block(struct ceph_osd *osd, struct MOSDBackoff *m)
+{
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct ceph_msg *msg;
+
+	dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd,
+	     m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id);
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &m->spgid);
+	if (!spg) {
+		spg = alloc_spg_mapping();
+		if (!spg) {
+			pr_err("%s failed to allocate spg\n", __func__);
+			return;
+		}
+		spg->spgid = m->spgid; /* struct */
+		insert_spg_mapping(&osd->o_backoff_mappings, spg);
+	}
+
+	backoff = alloc_backoff();
+	if (!backoff) {
+		pr_err("%s failed to allocate backoff\n", __func__);
+		return;
+	}
+	backoff->spgid = m->spgid; /* struct */
+	backoff->id = m->id;
+	backoff->begin = m->begin;
+	m->begin = NULL; /* backoff now owns this */
+	backoff->end = m->end;
+	m->end = NULL;   /* ditto */
+
+	insert_backoff(&spg->backoffs, backoff);
+	insert_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+
+	/*
+	 * Ack with original backoff's epoch so that the OSD can
+	 * discard this if there was a PG split.
+	 */
+	msg = create_backoff_message(backoff, m->map_epoch);
+	if (!msg) {
+		pr_err("%s failed to allocate msg\n", __func__);
+		return;
+	}
+	ceph_con_send(&osd->o_con, msg);
+}
+
+static bool target_contained_by(const struct ceph_osd_request_target *t,
+				const struct ceph_hobject_id *begin,
+				const struct ceph_hobject_id *end)
+{
+	struct ceph_hobject_id hoid;
+	int cmp;
+
+	hoid_fill_from_target(&hoid, t);
+	cmp = hoid_compare(&hoid, begin);
+	return !cmp || (cmp > 0 && hoid_compare(&hoid, end) < 0);
+}
+
+static void handle_backoff_unblock(struct ceph_osd *osd,
+				   const struct MOSDBackoff *m)
+{
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct rb_node *n;
+
+	dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd,
+	     m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id);
+
+	backoff = lookup_backoff_by_id(&osd->o_backoffs_by_id, m->id);
+	if (!backoff) {
+		pr_err("%s osd%d spgid %llu.%xs%d id %llu backoff dne\n",
+		       __func__, osd->o_osd, m->spgid.pgid.pool,
+		       m->spgid.pgid.seed, m->spgid.shard, m->id);
+		return;
+	}
+
+	if (hoid_compare(backoff->begin, m->begin) &&
+	    hoid_compare(backoff->end, m->end)) {
+		pr_err("%s osd%d spgid %llu.%xs%d id %llu bad range?\n",
+		       __func__, osd->o_osd, m->spgid.pgid.pool,
+		       m->spgid.pgid.seed, m->spgid.shard, m->id);
+		/* unblock it anyway... */
+	}
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &backoff->spgid);
+	BUG_ON(!spg);
+
+	erase_backoff(&spg->backoffs, backoff);
+	erase_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+	free_backoff(backoff);
+
+	if (RB_EMPTY_ROOT(&spg->backoffs)) {
+		erase_spg_mapping(&osd->o_backoff_mappings, spg);
+		free_spg_mapping(spg);
+	}
+
+	for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) {
+		struct ceph_osd_request *req =
+		    rb_entry(n, struct ceph_osd_request, r_node);
+
+		if (!ceph_spg_compare(&req->r_t.spgid, &m->spgid)) {
+			/*
+			 * Match against @m, not @backoff -- the PG may
+			 * have split on the OSD.
+			 */
+			if (target_contained_by(&req->r_t, m->begin, m->end)) {
+				/*
+				 * If no other installed backoff applies,
+				 * resend.
+				 */
+				send_request(req);
+			}
+		}
+	}
+}
+
+static void handle_backoff(struct ceph_osd *osd, struct ceph_msg *msg)
+{
+	struct ceph_osd_client *osdc = osd->o_osdc;
+	struct MOSDBackoff m;
+	int ret;
+
+	down_read(&osdc->lock);
+	if (!osd_registered(osd)) {
+		dout("%s osd%d unknown\n", __func__, osd->o_osd);
+		up_read(&osdc->lock);
+		return;
+	}
+	WARN_ON(osd->o_osd != le64_to_cpu(msg->hdr.src.num));
+
+	mutex_lock(&osd->lock);
+	ret = decode_MOSDBackoff(msg, &m);
+	if (ret) {
+		pr_err("failed to decode MOSDBackoff: %d\n", ret);
+		ceph_msg_dump(msg);
+		goto out_unlock;
+	}
+
+	switch (m.op) {
+	case CEPH_OSD_BACKOFF_OP_BLOCK:
+		handle_backoff_block(osd, &m);
+		break;
+	case CEPH_OSD_BACKOFF_OP_UNBLOCK:
+		handle_backoff_unblock(osd, &m);
+		break;
+	default:
+		pr_err("%s osd%d unknown op %d\n", __func__, osd->o_osd, m.op);
+	}
+
+	free_hoid(m.begin);
+	free_hoid(m.end);
+
+out_unlock:
+	mutex_unlock(&osd->lock);
+	up_read(&osdc->lock);
+}
+
 /*
  * Process osd watch notifications
  */
@@ -4509,6 +5098,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 	case CEPH_MSG_OSD_OPREPLY:
 		handle_reply(osd, msg);
 		break;
+	case CEPH_MSG_OSD_BACKOFF:
+		handle_backoff(osd, msg);
+		break;
 	case CEPH_MSG_WATCH_NOTIFY:
 		handle_watch_notify(osdc, msg);
 		break;
@@ -4631,6 +5223,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
 	*skip = 0;
 	switch (type) {
 	case CEPH_MSG_OSD_MAP:
+	case CEPH_MSG_OSD_BACKOFF:
 	case CEPH_MSG_WATCH_NOTIFY:
 		return alloc_msg_with_page_vector(hdr);
 	case CEPH_MSG_OSD_OPREPLY:
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 1e2e190a4c2ab..1d87a736221b4 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -418,6 +418,22 @@ int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
 	return 0;
 }
 
+int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
+{
+	int ret;
+
+	ret = ceph_pg_compare(&lhs->pgid, &rhs->pgid);
+	if (ret)
+		return ret;
+
+	if (lhs->shard < rhs->shard)
+		return -1;
+	if (lhs->shard > rhs->shard)
+		return 1;
+
+	return 0;
+}
+
 /*
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds) and primary_temp (explicit primary setting)
-- 
GitLab


From 33333d107112be4d822234deb8820ec6eeb0831d Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:17 +0200
Subject: [PATCH 0915/1958] libceph: don't pass pgid by value

Make __{lookup,remove}_pg_mapping() look like their ceph_spg_mapping
counterparts: take const struct ceph_pg *.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 1d87a736221b4..06baf6bd40e44 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -465,7 +465,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
 }
 
 static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
-						   struct ceph_pg pgid)
+						   const struct ceph_pg *pgid)
 {
 	struct rb_node *n = root->rb_node;
 	struct ceph_pg_mapping *pg;
@@ -473,32 +473,32 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
 
 	while (n) {
 		pg = rb_entry(n, struct ceph_pg_mapping, node);
-		c = ceph_pg_compare(&pgid, &pg->pgid);
+		c = ceph_pg_compare(pgid, &pg->pgid);
 		if (c < 0) {
 			n = n->rb_left;
 		} else if (c > 0) {
 			n = n->rb_right;
 		} else {
 			dout("__lookup_pg_mapping %lld.%x got %p\n",
-			     pgid.pool, pgid.seed, pg);
+			     pgid->pool, pgid->seed, pg);
 			return pg;
 		}
 	}
 	return NULL;
 }
 
-static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
+static int __remove_pg_mapping(struct rb_root *root, const struct ceph_pg *pgid)
 {
 	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
 
 	if (pg) {
-		dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
+		dout("__remove_pg_mapping %lld.%x %p\n", pgid->pool, pgid->seed,
 		     pg);
 		rb_erase(&pg->node, root);
 		kfree(pg);
 		return 0;
 	}
-	dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
+	dout("__remove_pg_mapping %lld.%x dne\n", pgid->pool, pgid->seed);
 	return -ENOENT;
 }
 
@@ -1034,7 +1034,7 @@ static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map,
 
 		ceph_decode_32_safe(p, end, len, e_inval);
 
-		ret = __remove_pg_mapping(&map->pg_temp, pgid);
+		ret = __remove_pg_mapping(&map->pg_temp, &pgid);
 		BUG_ON(!incremental && ret != -ENOENT);
 
 		if (!incremental || len > 0) {
@@ -1095,7 +1095,7 @@ static int __decode_primary_temp(void **p, void *end, struct ceph_osdmap *map,
 
 		ceph_decode_32_safe(p, end, osd, e_inval);
 
-		ret = __remove_pg_mapping(&map->primary_temp, pgid);
+		ret = __remove_pg_mapping(&map->primary_temp, &pgid);
 		BUG_ON(!incremental && ret != -ENOENT);
 
 		if (!incremental || osd != (u32)-1) {
@@ -2226,7 +2226,7 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
 	ceph_osds_init(temp);
 
 	/* pg_temp? */
-	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
+	pg = __lookup_pg_mapping(&osdmap->pg_temp, &pgid);
 	if (pg) {
 		for (i = 0; i < pg->pg_temp.len; i++) {
 			if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
@@ -2249,7 +2249,7 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
 	}
 
 	/* primary_temp? */
-	pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid);
+	pg = __lookup_pg_mapping(&osdmap->primary_temp, &pgid);
 	if (pg)
 		temp->primary = pg->primary_temp.osd;
 }
-- 
GitLab


From a303bb0e58345fe9f7ab2f82b90266f2b5036058 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:17 +0200
Subject: [PATCH 0916/1958] libceph: introduce and switch to
 decode_pg_mapping()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 150 +++++++++++++++++++++++++---------------------
 1 file changed, 83 insertions(+), 67 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 06baf6bd40e44..41b380ac68acd 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -434,6 +434,25 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
 	return 0;
 }
 
+static struct ceph_pg_mapping *alloc_pg_mapping(size_t payload_len)
+{
+	struct ceph_pg_mapping *pg;
+
+	pg = kmalloc(sizeof(*pg) + payload_len, GFP_NOIO);
+	if (!pg)
+		return NULL;
+
+	RB_CLEAR_NODE(&pg->node);
+	return pg;
+}
+
+static void free_pg_mapping(struct ceph_pg_mapping *pg)
+{
+	WARN_ON(!RB_EMPTY_NODE(&pg->node));
+
+	kfree(pg);
+}
+
 /*
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds) and primary_temp (explicit primary setting)
@@ -1017,47 +1036,36 @@ static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
 	return __decode_pools(p, end, map, true);
 }
 
-static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map,
-			    bool incremental)
+typedef struct ceph_pg_mapping *(*decode_mapping_fn_t)(void **, void *, bool);
+
+static int decode_pg_mapping(void **p, void *end, struct rb_root *mapping_root,
+			     decode_mapping_fn_t fn, bool incremental)
 {
 	u32 n;
 
+	WARN_ON(!incremental && !fn);
+
 	ceph_decode_32_safe(p, end, n, e_inval);
 	while (n--) {
+		struct ceph_pg_mapping *pg;
 		struct ceph_pg pgid;
-		u32 len, i;
 		int ret;
 
 		ret = ceph_decode_pgid(p, end, &pgid);
 		if (ret)
 			return ret;
 
-		ceph_decode_32_safe(p, end, len, e_inval);
-
-		ret = __remove_pg_mapping(&map->pg_temp, &pgid);
-		BUG_ON(!incremental && ret != -ENOENT);
-
-		if (!incremental || len > 0) {
-			struct ceph_pg_mapping *pg;
+		ret = __remove_pg_mapping(mapping_root, &pgid);
+		WARN_ON(!incremental && ret != -ENOENT);
 
-			ceph_decode_need(p, end, len*sizeof(u32), e_inval);
+		if (fn) {
+			pg = fn(p, end, incremental);
+			if (IS_ERR(pg))
+				return PTR_ERR(pg);
 
-			if (len > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
-				return -EINVAL;
-
-			pg = kzalloc(sizeof(*pg) + len*sizeof(u32), GFP_NOFS);
-			if (!pg)
-				return -ENOMEM;
-
-			pg->pgid = pgid;
-			pg->pg_temp.len = len;
-			for (i = 0; i < len; i++)
-				pg->pg_temp.osds[i] = ceph_decode_32(p);
-
-			ret = __insert_pg_mapping(pg, &map->pg_temp);
-			if (ret) {
-				kfree(pg);
-				return ret;
+			if (pg) {
+				pg->pgid = pgid; /* struct */
+				__insert_pg_mapping(pg, mapping_root);
 			}
 		}
 	}
@@ -1068,69 +1076,77 @@ static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map,
 	return -EINVAL;
 }
 
+static struct ceph_pg_mapping *__decode_pg_temp(void **p, void *end,
+						bool incremental)
+{
+	struct ceph_pg_mapping *pg;
+	u32 len, i;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len == 0 && incremental)
+		return NULL;	/* new_pg_temp: [] to remove */
+	if (len > (SIZE_MAX - sizeof(*pg)) / sizeof(u32))
+		return ERR_PTR(-EINVAL);
+
+	ceph_decode_need(p, end, len * sizeof(u32), e_inval);
+	pg = alloc_pg_mapping(len * sizeof(u32));
+	if (!pg)
+		return ERR_PTR(-ENOMEM);
+
+	pg->pg_temp.len = len;
+	for (i = 0; i < len; i++)
+		pg->pg_temp.osds[i] = ceph_decode_32(p);
+
+	return pg;
+
+e_inval:
+	return ERR_PTR(-EINVAL);
+}
+
 static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
 {
-	return __decode_pg_temp(p, end, map, false);
+	return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
+				 false);
 }
 
 static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
 {
-	return __decode_pg_temp(p, end, map, true);
+	return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
+				 true);
 }
 
-static int __decode_primary_temp(void **p, void *end, struct ceph_osdmap *map,
-				 bool incremental)
+static struct ceph_pg_mapping *__decode_primary_temp(void **p, void *end,
+						     bool incremental)
 {
-	u32 n;
-
-	ceph_decode_32_safe(p, end, n, e_inval);
-	while (n--) {
-		struct ceph_pg pgid;
-		u32 osd;
-		int ret;
-
-		ret = ceph_decode_pgid(p, end, &pgid);
-		if (ret)
-			return ret;
-
-		ceph_decode_32_safe(p, end, osd, e_inval);
-
-		ret = __remove_pg_mapping(&map->primary_temp, &pgid);
-		BUG_ON(!incremental && ret != -ENOENT);
-
-		if (!incremental || osd != (u32)-1) {
-			struct ceph_pg_mapping *pg;
-
-			pg = kzalloc(sizeof(*pg), GFP_NOFS);
-			if (!pg)
-				return -ENOMEM;
+	struct ceph_pg_mapping *pg;
+	u32 osd;
 
-			pg->pgid = pgid;
-			pg->primary_temp.osd = osd;
+	ceph_decode_32_safe(p, end, osd, e_inval);
+	if (osd == (u32)-1 && incremental)
+		return NULL;	/* new_primary_temp: -1 to remove */
 
-			ret = __insert_pg_mapping(pg, &map->primary_temp);
-			if (ret) {
-				kfree(pg);
-				return ret;
-			}
-		}
-	}
+	pg = alloc_pg_mapping(0);
+	if (!pg)
+		return ERR_PTR(-ENOMEM);
 
-	return 0;
+	pg->primary_temp.osd = osd;
+	return pg;
 
 e_inval:
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 
 static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
 {
-	return __decode_primary_temp(p, end, map, false);
+	return decode_pg_mapping(p, end, &map->primary_temp,
+				 __decode_primary_temp, false);
 }
 
 static int decode_new_primary_temp(void **p, void *end,
 				   struct ceph_osdmap *map)
 {
-	return __decode_primary_temp(p, end, map, true);
+	return decode_pg_mapping(p, end, &map->primary_temp,
+				 __decode_primary_temp, true);
 }
 
 u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
-- 
GitLab


From ab75144be08cfc1d80f49e9c37970fcadb1215a2 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:17 +0200
Subject: [PATCH 0917/1958] libceph: kill __{insert,lookup,remove}_pg_mapping()

Switch to DEFINE_RB_FUNCS2-generated {insert,lookup,erase}_pg_mapping().

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 87 ++++++++---------------------------------------
 1 file changed, 15 insertions(+), 72 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 41b380ac68acd..4237477140170 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -457,69 +457,8 @@ static void free_pg_mapping(struct ceph_pg_mapping *pg)
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds) and primary_temp (explicit primary setting)
  */
-static int __insert_pg_mapping(struct ceph_pg_mapping *new,
-			       struct rb_root *root)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_pg_mapping *pg = NULL;
-	int c;
-
-	dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
-	while (*p) {
-		parent = *p;
-		pg = rb_entry(parent, struct ceph_pg_mapping, node);
-		c = ceph_pg_compare(&new->pgid, &pg->pgid);
-		if (c < 0)
-			p = &(*p)->rb_left;
-		else if (c > 0)
-			p = &(*p)->rb_right;
-		else
-			return -EEXIST;
-	}
-
-	rb_link_node(&new->node, parent, p);
-	rb_insert_color(&new->node, root);
-	return 0;
-}
-
-static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
-						   const struct ceph_pg *pgid)
-{
-	struct rb_node *n = root->rb_node;
-	struct ceph_pg_mapping *pg;
-	int c;
-
-	while (n) {
-		pg = rb_entry(n, struct ceph_pg_mapping, node);
-		c = ceph_pg_compare(pgid, &pg->pgid);
-		if (c < 0) {
-			n = n->rb_left;
-		} else if (c > 0) {
-			n = n->rb_right;
-		} else {
-			dout("__lookup_pg_mapping %lld.%x got %p\n",
-			     pgid->pool, pgid->seed, pg);
-			return pg;
-		}
-	}
-	return NULL;
-}
-
-static int __remove_pg_mapping(struct rb_root *root, const struct ceph_pg *pgid)
-{
-	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
-
-	if (pg) {
-		dout("__remove_pg_mapping %lld.%x %p\n", pgid->pool, pgid->seed,
-		     pg);
-		rb_erase(&pg->node, root);
-		kfree(pg);
-		return 0;
-	}
-	dout("__remove_pg_mapping %lld.%x dne\n", pgid->pool, pgid->seed);
-	return -ENOENT;
-}
+DEFINE_RB_FUNCS2(pg_mapping, struct ceph_pg_mapping, pgid, ceph_pg_compare,
+		 RB_BYPTR, const struct ceph_pg *, node)
 
 /*
  * rbtree of pg pool info
@@ -829,15 +768,15 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
 		struct ceph_pg_mapping *pg =
 			rb_entry(rb_first(&map->pg_temp),
 				 struct ceph_pg_mapping, node);
-		rb_erase(&pg->node, &map->pg_temp);
-		kfree(pg);
+		erase_pg_mapping(&map->pg_temp, pg);
+		free_pg_mapping(pg);
 	}
 	while (!RB_EMPTY_ROOT(&map->primary_temp)) {
 		struct ceph_pg_mapping *pg =
 			rb_entry(rb_first(&map->primary_temp),
 				 struct ceph_pg_mapping, node);
-		rb_erase(&pg->node, &map->primary_temp);
-		kfree(pg);
+		erase_pg_mapping(&map->primary_temp, pg);
+		free_pg_mapping(pg);
 	}
 	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
 		struct ceph_pg_pool_info *pi =
@@ -1055,8 +994,12 @@ static int decode_pg_mapping(void **p, void *end, struct rb_root *mapping_root,
 		if (ret)
 			return ret;
 
-		ret = __remove_pg_mapping(mapping_root, &pgid);
-		WARN_ON(!incremental && ret != -ENOENT);
+		pg = lookup_pg_mapping(mapping_root, &pgid);
+		if (pg) {
+			WARN_ON(!incremental);
+			erase_pg_mapping(mapping_root, pg);
+			free_pg_mapping(pg);
+		}
 
 		if (fn) {
 			pg = fn(p, end, incremental);
@@ -1065,7 +1008,7 @@ static int decode_pg_mapping(void **p, void *end, struct rb_root *mapping_root,
 
 			if (pg) {
 				pg->pgid = pgid; /* struct */
-				__insert_pg_mapping(pg, mapping_root);
+				insert_pg_mapping(mapping_root, pg);
 			}
 		}
 	}
@@ -2242,7 +2185,7 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
 	ceph_osds_init(temp);
 
 	/* pg_temp? */
-	pg = __lookup_pg_mapping(&osdmap->pg_temp, &pgid);
+	pg = lookup_pg_mapping(&osdmap->pg_temp, &pgid);
 	if (pg) {
 		for (i = 0; i < pg->pg_temp.len; i++) {
 			if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
@@ -2265,7 +2208,7 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
 	}
 
 	/* primary_temp? */
-	pg = __lookup_pg_mapping(&osdmap->primary_temp, &pgid);
+	pg = lookup_pg_mapping(&osdmap->primary_temp, &pgid);
 	if (pg)
 		temp->primary = pg->primary_temp.osd;
 }
-- 
GitLab


From 278b1d709c6acc6f7d138fed775c76695b068e43 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:17 +0200
Subject: [PATCH 0918/1958] libceph: ceph_decode_skip_* helpers

Some of these won't be as efficient as they could be (e.g.
ceph_decode_skip_set(... 32 ...) could advance by len * sizeof(u32)
once instead of advancing by sizeof(u32) len times), but that's fine
and not worth a bunch of extra macro code.

Replace skip_name_map() with ceph_decode_skip_map as an example.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/decode.h | 60 +++++++++++++++++++++++++++++++++++++
 net/ceph/osdmap.c           | 25 ++--------------
 2 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index f990f2cc907a4..14af9b70d3017 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -132,6 +132,66 @@ static inline char *ceph_extract_encoded_string(void **p, void *end,
 	return ERR_PTR(-ERANGE);
 }
 
+/*
+ * skip helpers
+ */
+#define ceph_decode_skip_n(p, end, n, bad)			\
+	do {							\
+		ceph_decode_need(p, end, n, bad);		\
+                *p += n;					\
+	} while (0)
+
+#define ceph_decode_skip_64(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u64), bad)
+
+#define ceph_decode_skip_32(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u32), bad)
+
+#define ceph_decode_skip_16(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u16), bad)
+
+#define ceph_decode_skip_8(p, end, bad)				\
+ceph_decode_skip_n(p, end, sizeof(u8), bad)
+
+#define ceph_decode_skip_string(p, end, bad)			\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		ceph_decode_skip_n(p, end, len, bad);		\
+	} while (0)
+
+#define ceph_decode_skip_set(p, end, type, bad)			\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--)					\
+			ceph_decode_skip_##type(p, end, bad);	\
+	} while (0)
+
+#define ceph_decode_skip_map(p, end, ktype, vtype, bad)		\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--) {					\
+			ceph_decode_skip_##ktype(p, end, bad);	\
+			ceph_decode_skip_##vtype(p, end, bad);	\
+		}						\
+	} while (0)
+
+#define ceph_decode_skip_map_of_map(p, end, ktype1, ktype2, vtype2, bad) \
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--) {					\
+			ceph_decode_skip_##ktype1(p, end, bad);	\
+			ceph_decode_skip_map(p, end, ktype2, vtype2, bad); \
+		}						\
+	} while (0)
+
 /*
  * struct ceph_timespec <-> struct timespec
  */
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4237477140170..f6d561edd5116 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -138,21 +138,6 @@ static int crush_decode_straw2_bucket(void **p, void *end,
 	return -EINVAL;
 }
 
-static int skip_name_map(void **p, void *end)
-{
-        int len;
-        ceph_decode_32_safe(p, end, len ,bad);
-        while (len--) {
-                int strlen;
-                *p += sizeof(u32);
-                ceph_decode_32_safe(p, end, strlen, bad);
-                *p += strlen;
-}
-        return 0;
-bad:
-        return -EINVAL;
-}
-
 static void crush_finalize(struct crush_map *c)
 {
 	__s32 b;
@@ -187,7 +172,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	void **p = &pbyval;
 	void *start = pbyval;
 	u32 magic;
-	u32 num_name_maps;
 
 	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
 
@@ -353,12 +337,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		}
 	}
 
-	/* ignore trailing name maps. */
-        for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
-                err = skip_name_map(p, end);
-                if (err < 0)
-                        goto done;
-        }
+	ceph_decode_skip_map(p, end, 32, string, bad); /* type_map */
+	ceph_decode_skip_map(p, end, 32, string, bad); /* name_map */
+	ceph_decode_skip_map(p, end, 32, string, bad); /* rule_name_map */
 
         /* tunables */
         ceph_decode_need(p, end, 3*sizeof(u32), done);
-- 
GitLab


From 6f428df47dae2c8ea31fd4c0c74a12a8a5ac2d1d Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:18 +0200
Subject: [PATCH 0919/1958] libceph: pg_upmap[_items] infrastructure

pg_temp and pg_upmap encodings are the same (PG -> array of osds),
except for the incremental remove: it's an empty mapping in new_pg_temp
for pg_temp and a separate old_pg_upmap set for pg_upmap.  (This isn't
to allow for empty pg_upmap mappings -- apparently, pg_temp just wasn't
looked at as an example for pg_upmap encoding.)

Reuse __decode_pg_temp() for decoding pg_upmap and new_pg_upmap.
__decode_pg_temp() stores into pg_temp union member, but since pg_upmap
union member is identical, reading through pg_upmap later is OK.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h |  10 ++-
 net/ceph/debugfs.c          |  23 ++++++
 net/ceph/osdmap.c           | 135 +++++++++++++++++++++++++++++++++++-
 3 files changed, 164 insertions(+), 4 deletions(-)

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index fe6d189bdd307..c612cff81f5cb 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -143,10 +143,14 @@ struct ceph_pg_mapping {
 		struct {
 			int len;
 			int osds[];
-		} pg_temp;
+		} pg_temp, pg_upmap;
 		struct {
 			int osd;
 		} primary_temp;
+		struct {
+			int len;
+			int from_to[][2];
+		} pg_upmap_items;
 	};
 };
 
@@ -165,6 +169,10 @@ struct ceph_osdmap {
 	struct rb_root pg_temp;
 	struct rb_root primary_temp;
 
+	/* remap (post-CRUSH, pre-up) */
+	struct rb_root pg_upmap;	/* PG := raw set */
+	struct rb_root pg_upmap_items;	/* from -> to within raw set */
+
 	u32 *osd_primary_affinity;
 
 	struct rb_root pg_pools;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 017f15c575f80..4f57d5bcaba2b 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -104,6 +104,29 @@ static int osdmap_show(struct seq_file *s, void *p)
 		seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
 			   pg->pgid.seed, pg->primary_temp.osd);
 	}
+	for (n = rb_first(&map->pg_upmap); n; n = rb_next(n)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(n, struct ceph_pg_mapping, node);
+
+		seq_printf(s, "pg_upmap %llu.%x [", pg->pgid.pool,
+			   pg->pgid.seed);
+		for (i = 0; i < pg->pg_upmap.len; i++)
+			seq_printf(s, "%s%d", (i == 0 ? "" : ","),
+				   pg->pg_upmap.osds[i]);
+		seq_printf(s, "]\n");
+	}
+	for (n = rb_first(&map->pg_upmap_items); n; n = rb_next(n)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(n, struct ceph_pg_mapping, node);
+
+		seq_printf(s, "pg_upmap_items %llu.%x [", pg->pgid.pool,
+			   pg->pgid.seed);
+		for (i = 0; i < pg->pg_upmap_items.len; i++)
+			seq_printf(s, "%s%d->%d", (i == 0 ? "" : ","),
+				   pg->pg_upmap_items.from_to[i][0],
+				   pg->pg_upmap_items.from_to[i][1]);
+		seq_printf(s, "]\n");
+	}
 
 	up_read(&osdc->lock);
 	return 0;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f6d561edd5116..a3f60d0bfd13e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -735,6 +735,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
 	map->pool_max = -1;
 	map->pg_temp = RB_ROOT;
 	map->primary_temp = RB_ROOT;
+	map->pg_upmap = RB_ROOT;
+	map->pg_upmap_items = RB_ROOT;
 	mutex_init(&map->crush_workspace_mutex);
 
 	return map;
@@ -759,6 +761,20 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
 		erase_pg_mapping(&map->primary_temp, pg);
 		free_pg_mapping(pg);
 	}
+	while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_upmap),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_upmap);
+		kfree(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_upmap_items),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_upmap_items);
+		kfree(pg);
+	}
 	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
 		struct ceph_pg_pool_info *pi =
 			rb_entry(rb_first(&map->pg_pools),
@@ -1161,6 +1177,75 @@ static int decode_new_primary_affinity(void **p, void *end,
 	return -EINVAL;
 }
 
+static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
+						 bool __unused)
+{
+	return __decode_pg_temp(p, end, false);
+}
+
+static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
+				 false);
+}
+
+static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
+				 true);
+}
+
+static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
+}
+
+static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
+						       bool __unused)
+{
+	struct ceph_pg_mapping *pg;
+	u32 len, i;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
+		return ERR_PTR(-EINVAL);
+
+	ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
+	pg = kzalloc(sizeof(*pg) + 2 * len * sizeof(u32), GFP_NOIO);
+	if (!pg)
+		return ERR_PTR(-ENOMEM);
+
+	pg->pg_upmap_items.len = len;
+	for (i = 0; i < len; i++) {
+		pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
+		pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
+	}
+
+	return pg;
+
+e_inval:
+	return ERR_PTR(-EINVAL);
+}
+
+static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items,
+				 __decode_pg_upmap_items, false);
+}
+
+static int decode_new_pg_upmap_items(void **p, void *end,
+				     struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items,
+				 __decode_pg_upmap_items, true);
+}
+
+static int decode_old_pg_upmap_items(void **p, void *end,
+				     struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
+}
+
 /*
  * decode a full map.
  */
@@ -1250,9 +1335,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 		if (err)
 			goto bad;
 	} else {
-		/* XXX can this happen? */
-		kfree(map->osd_primary_affinity);
-		map->osd_primary_affinity = NULL;
+		WARN_ON(map->osd_primary_affinity);
 	}
 
 	/* crush */
@@ -1261,6 +1344,26 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 	if (err)
 		goto bad;
 
+	*p += len;
+	if (struct_v >= 3) {
+		/* erasure_code_profiles */
+		ceph_decode_skip_map_of_map(p, end, string, string, string,
+					    bad);
+	}
+
+	if (struct_v >= 4) {
+		err = decode_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+	} else {
+		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
+		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
+	}
+
 	/* ignore the rest */
 	*p = end;
 
@@ -1520,6 +1623,32 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 			goto bad;
 	}
 
+	if (struct_v >= 3) {
+		/* new_erasure_code_profiles */
+		ceph_decode_skip_map_of_map(p, end, string, string, string,
+					    bad);
+		/* old_erasure_code_profiles */
+		ceph_decode_skip_set(p, end, string, bad);
+	}
+
+	if (struct_v >= 4) {
+		err = decode_new_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_old_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_new_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_old_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+	}
+
 	/* ignore the rest */
 	*p = end;
 
-- 
GitLab


From 463bb8da5042c165bf50ae2688d251c5af26f3cf Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:18 +0200
Subject: [PATCH 0920/1958] libceph: compute actual pgid in
 ceph_pg_to_up_acting_osds()

Move raw_pg_to_pg() call out of get_temp_osds() and into
ceph_pg_to_up_acting_osds(), for upcoming apply_upmap().

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index a3f60d0bfd13e..245c8025ab446 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2284,18 +2284,16 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap,
  */
 static void get_temp_osds(struct ceph_osdmap *osdmap,
 			  struct ceph_pg_pool_info *pi,
-			  const struct ceph_pg *raw_pgid,
+			  const struct ceph_pg *pgid,
 			  struct ceph_osds *temp)
 {
-	struct ceph_pg pgid;
 	struct ceph_pg_mapping *pg;
 	int i;
 
-	raw_pg_to_pg(pi, raw_pgid, &pgid);
 	ceph_osds_init(temp);
 
 	/* pg_temp? */
-	pg = lookup_pg_mapping(&osdmap->pg_temp, &pgid);
+	pg = lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
 		for (i = 0; i < pg->pg_temp.len; i++) {
 			if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
@@ -2318,7 +2316,7 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
 	}
 
 	/* primary_temp? */
-	pg = lookup_pg_mapping(&osdmap->primary_temp, &pgid);
+	pg = lookup_pg_mapping(&osdmap->primary_temp, pgid);
 	if (pg)
 		temp->primary = pg->primary_temp.osd;
 }
@@ -2336,14 +2334,16 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting)
 {
+	struct ceph_pg pgid;
 	u32 pps;
 
 	WARN_ON(pi->id != raw_pgid->pool);
+	raw_pg_to_pg(pi, raw_pgid, &pgid);
 
 	pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
 	raw_to_up_osds(osdmap, pi, up);
 	apply_primary_affinity(osdmap, pi, pps, up);
-	get_temp_osds(osdmap, pi, raw_pgid, acting);
+	get_temp_osds(osdmap, pi, &pgid, acting);
 	if (!acting->size) {
 		memcpy(acting->osds, up->osds, up->size * sizeof(up->osds[0]));
 		acting->size = up->size;
-- 
GitLab


From 1c2e7b451b889bead46cef410a737d1767cd6f0b Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 21 Jun 2017 17:27:18 +0200
Subject: [PATCH 0921/1958] libceph: apply_upmap()

Previously, pg_to_raw_osds() didn't filter for existent OSDs because
raw_to_up_osds() would filter for "up" ("up" is predicated on "exists")
and raw_to_up_osds() was called directly after pg_to_raw_osds().  Now,
with apply_upmap() call in there, nonexistent OSDs in pg_to_raw_osds()
output can affect apply_upmap().  Introduce remove_nonexistent_osds()
to deal with that.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 95 insertions(+), 2 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 245c8025ab446..93baa69407c5f 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2117,9 +2117,37 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 	return r;
 }
 
+static void remove_nonexistent_osds(struct ceph_osdmap *osdmap,
+				    struct ceph_pg_pool_info *pi,
+				    struct ceph_osds *set)
+{
+	int i;
+
+	if (ceph_can_shift_osds(pi)) {
+		int removed = 0;
+
+		/* shift left */
+		for (i = 0; i < set->size; i++) {
+			if (!ceph_osd_exists(osdmap, set->osds[i])) {
+				removed++;
+				continue;
+			}
+			if (removed)
+				set->osds[i - removed] = set->osds[i];
+		}
+		set->size -= removed;
+	} else {
+		/* set dne devices to NONE */
+		for (i = 0; i < set->size; i++) {
+			if (!ceph_osd_exists(osdmap, set->osds[i]))
+				set->osds[i] = CRUSH_ITEM_NONE;
+		}
+	}
+}
+
 /*
- * Calculate raw set (CRUSH output) for given PG.  The result may
- * contain nonexistent OSDs.  ->primary is undefined for a raw set.
+ * Calculate raw set (CRUSH output) for given PG and filter out
+ * nonexistent OSDs.  ->primary is undefined for a raw set.
  *
  * Placement seed (CRUSH input) is returned through @ppps.
  */
@@ -2162,6 +2190,70 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
 	}
 
 	raw->size = len;
+	remove_nonexistent_osds(osdmap, pi, raw);
+}
+
+/* apply pg_upmap[_items] mappings */
+static void apply_upmap(struct ceph_osdmap *osdmap,
+			const struct ceph_pg *pgid,
+			struct ceph_osds *raw)
+{
+	struct ceph_pg_mapping *pg;
+	int i, j;
+
+	pg = lookup_pg_mapping(&osdmap->pg_upmap, pgid);
+	if (pg) {
+		/* make sure targets aren't marked out */
+		for (i = 0; i < pg->pg_upmap.len; i++) {
+			int osd = pg->pg_upmap.osds[i];
+
+			if (osd != CRUSH_ITEM_NONE &&
+			    osd < osdmap->max_osd &&
+			    osdmap->osd_weight[osd] == 0) {
+				/* reject/ignore explicit mapping */
+				return;
+			}
+		}
+		for (i = 0; i < pg->pg_upmap.len; i++)
+			raw->osds[i] = pg->pg_upmap.osds[i];
+		raw->size = pg->pg_upmap.len;
+		return;
+	}
+
+	pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
+	if (pg) {
+		/*
+		 * Note: this approach does not allow a bidirectional swap,
+		 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
+		 */
+		for (i = 0; i < pg->pg_upmap_items.len; i++) {
+			int from = pg->pg_upmap_items.from_to[i][0];
+			int to = pg->pg_upmap_items.from_to[i][1];
+			int pos = -1;
+			bool exists = false;
+
+			/* make sure replacement doesn't already appear */
+			for (j = 0; j < raw->size; j++) {
+				int osd = raw->osds[j];
+
+				if (osd == to) {
+					exists = true;
+					break;
+				}
+				/* ignore mapping if target is marked out */
+				if (osd == from && pos < 0 &&
+				    !(to != CRUSH_ITEM_NONE &&
+				      to < osdmap->max_osd &&
+				      osdmap->osd_weight[to] == 0)) {
+					pos = j;
+				}
+			}
+			if (!exists && pos >= 0) {
+				raw->osds[pos] = to;
+				return;
+			}
+		}
+	}
 }
 
 /*
@@ -2341,6 +2433,7 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
 	raw_pg_to_pg(pi, raw_pgid, &pgid);
 
 	pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
+	apply_upmap(osdmap, &pgid, up);
 	raw_to_up_osds(osdmap, pi, up);
 	apply_primary_affinity(osdmap, pi, pps, up);
 	get_temp_osds(osdmap, pi, &pgid, acting);
-- 
GitLab


From 069f3222ca96acfe8c59937e98c401bda5475b48 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 22 Jun 2017 19:44:05 +0200
Subject: [PATCH 0922/1958] crush: implement weight and id overrides for straw2

bucket_straw2_choose needs to use weights that may be different from
weight_items. For instance to compensate for an uneven distribution
caused by a low number of values. Or to fix the probability biais
introduced by conditional probabilities (see
http://tracker.ceph.com/issues/15653 for more information).

We introduce a weight_set for each straw2 bucket to set the desired
weight for a given item at a given position. The weight of a given item
when picking the first replica (first position) may be different from
the weight the second replica (second position). For instance the weight
matrix for a given bucket containing items 3, 7 and 13 could be as
follows:

          position 0   position 1

item 3     0x10000      0x100000
item 7     0x40000       0x10000
item 13    0x40000       0x10000

When crush_do_rule picks the first of two replicas (position 0), item 7,
3 are four times more likely to be choosen by bucket_straw2_choose than
item 13. When choosing the second replica (position 1), item 3 is ten
times more likely to be choosen than item 7, 13.

By default the weight_set of each bucket exactly matches the content of
item_weights for each position to ensure backward compatibility.

bucket_straw2_choose compares items by using their id. The same ids are
also used to index buckets and they must be unique. For each item in a
bucket an array of ids can be provided for placement purposes and they
are used instead of the ids. If no replacement ids are provided, the
legacy behavior is preserved.

Reflects ceph.git commit 19537a450fd5c5a0bb8b7830947507a76db2ceca.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/crush/crush.h  | 58 ++++++++++++++++++++++++++++
 include/linux/crush/mapper.h |  9 ++---
 net/ceph/crush/mapper.c      | 74 +++++++++++++++++++++++++++---------
 net/ceph/osdmap.c            |  2 +-
 4 files changed, 119 insertions(+), 24 deletions(-)

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index fbecbd089d75f..d8676e56fa235 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -137,6 +137,64 @@ struct crush_bucket {
 
 };
 
+/** @ingroup API
+ *
+ * Replacement weights for each item in a bucket. The size of the
+ * array must be exactly the size of the straw2 bucket, just as the
+ * item_weights array.
+ *
+ */
+struct crush_weight_set {
+	__u32 *weights; /*!< 16.16 fixed point weights
+                             in the same order as items */
+	__u32 size;     /*!< size of the __weights__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for a given straw2 bucket, for
+ * placement purposes.
+ *
+ * When crush_do_rule() chooses the Nth item from a straw2 bucket, the
+ * replacement weights found at __weight_set[N]__ are used instead of
+ * the weights from __item_weights__. If __N__ is greater than
+ * __weight_set_size__, the weights found at __weight_set_size-1__ are
+ * used instead. For instance if __weight_set__ is:
+ *
+ *    [ [ 0x10000, 0x20000 ],   // position 0
+ *      [ 0x20000, 0x40000 ] ]  // position 1
+ *
+ * choosing the 0th item will use position 0 weights [ 0x10000, 0x20000 ]
+ * choosing the 1th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * choosing the 2th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * etc.
+ *
+ */
+struct crush_choose_arg {
+	__s32 *ids;            /*!< values to use instead of items */
+	__u32 ids_size;        /*!< size of the __ids__ array */
+	struct crush_weight_set *weight_set; /*!< weight replacements for
+                                                  a given position */
+	__u32 weight_set_size; /*!< size of the __weight_set__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for each bucket in the crushmap. The
+ * __size__ of the __args__ array must be exactly the same as the
+ * __map->max_buckets__.
+ *
+ * The __crush_choose_arg__ at index N will be used when choosing
+ * an item from the bucket __map->buckets[N]__ bucket, provided it
+ * is a straw2 bucket.
+ *
+ */
+struct crush_choose_arg_map {
+	struct crush_choose_arg *args; /*!< replacement for each bucket
+                                            in the crushmap */
+	__u32 size;                    /*!< size of the __args__ array */
+};
+
 struct crush_bucket_uniform {
 	struct crush_bucket h;
 	__u32 item_weight;  /* 16-bit fixed point; all items equally weighted */
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index c95e19e1ff11c..141edabb947e3 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -11,11 +11,10 @@
 #include "crush.h"
 
 extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
-extern int crush_do_rule(const struct crush_map *map,
-			 int ruleno,
-			 int x, int *result, int result_max,
-			 const __u32 *weights, int weight_max,
-			 void *cwin);
+int crush_do_rule(const struct crush_map *map,
+		  int ruleno, int x, int *result, int result_max,
+		  const __u32 *weight, int weight_max,
+		  void *cwin, const struct crush_choose_arg *choose_args);
 
 /*
  * Returns the exact amount of workspace that will need to be used
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index b5cd8c21bfdfb..0b2646a9cc509 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -302,19 +302,42 @@ static __u64 crush_ln(unsigned int xin)
  *
  */
 
+static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
+				     const struct crush_choose_arg *arg,
+				     int position)
+{
+	if (!arg || !arg->weight_set || arg->weight_set_size == 0)
+		return bucket->item_weights;
+
+	if (position >= arg->weight_set_size)
+		position = arg->weight_set_size - 1;
+	return arg->weight_set[position].weights;
+}
+
+static __s32 *get_choose_arg_ids(const struct crush_bucket_straw2 *bucket,
+				 const struct crush_choose_arg *arg)
+{
+	if (!arg || !arg->ids)
+		return bucket->h.items;
+
+	return arg->ids;
+}
+
 static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
-				int x, int r)
+				int x, int r,
+				const struct crush_choose_arg *arg,
+				int position)
 {
 	unsigned int i, high = 0;
 	unsigned int u;
-	unsigned int w;
 	__s64 ln, draw, high_draw = 0;
+	__u32 *weights = get_choose_arg_weights(bucket, arg, position);
+	__s32 *ids = get_choose_arg_ids(bucket, arg);
 
 	for (i = 0; i < bucket->h.size; i++) {
-		w = bucket->item_weights[i];
-		if (w) {
-			u = crush_hash32_3(bucket->h.hash, x,
-					   bucket->h.items[i], r);
+		dprintk("weight 0x%x item %d\n", weights[i], ids[i]);
+		if (weights[i]) {
+			u = crush_hash32_3(bucket->h.hash, x, ids[i], r);
 			u &= 0xffff;
 
 			/*
@@ -335,7 +358,7 @@ static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
 			 * weight means a larger (less negative) value
 			 * for draw.
 			 */
-			draw = div64_s64(ln, w);
+			draw = div64_s64(ln, weights[i]);
 		} else {
 			draw = S64_MIN;
 		}
@@ -352,7 +375,9 @@ static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
 
 static int crush_bucket_choose(const struct crush_bucket *in,
 			       struct crush_work_bucket *work,
-			       int x, int r)
+			       int x, int r,
+			       const struct crush_choose_arg *arg,
+			       int position)
 {
 	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
 	BUG_ON(in->size == 0);
@@ -374,7 +399,7 @@ static int crush_bucket_choose(const struct crush_bucket *in,
 	case CRUSH_BUCKET_STRAW2:
 		return bucket_straw2_choose(
 			(const struct crush_bucket_straw2 *)in,
-			x, r);
+			x, r, arg, position);
 	default:
 		dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
 		return in->items[0];
@@ -436,7 +461,8 @@ static int crush_choose_firstn(const struct crush_map *map,
 			       unsigned int vary_r,
 			       unsigned int stable,
 			       int *out2,
-			       int parent_r)
+			       int parent_r,
+			       const struct crush_choose_arg *choose_args)
 {
 	int rep;
 	unsigned int ftotal, flocal;
@@ -486,7 +512,10 @@ static int crush_choose_firstn(const struct crush_map *map,
 				else
 					item = crush_bucket_choose(
 						in, work->work[-1-in->id],
-						x, r);
+						x, r,
+						(choose_args ?
+						 &choose_args[-1-in->id] : 0),
+						outpos);
 				if (item >= map->max_devices) {
 					dprintk("   bad item %d\n", item);
 					skip_rep = 1;
@@ -543,7 +572,8 @@ static int crush_choose_firstn(const struct crush_map *map,
 							    vary_r,
 							    stable,
 							    NULL,
-							    sub_r) <= outpos)
+							    sub_r,
+							    choose_args) <= outpos)
 							/* didn't get leaf */
 							reject = 1;
 					} else {
@@ -620,7 +650,8 @@ static void crush_choose_indep(const struct crush_map *map,
 			       unsigned int recurse_tries,
 			       int recurse_to_leaf,
 			       int *out2,
-			       int parent_r)
+			       int parent_r,
+			       const struct crush_choose_arg *choose_args)
 {
 	const struct crush_bucket *in = bucket;
 	int endpos = outpos + left;
@@ -692,7 +723,10 @@ static void crush_choose_indep(const struct crush_map *map,
 
 				item = crush_bucket_choose(
 					in, work->work[-1-in->id],
-					x, r);
+					x, r,
+					(choose_args ?
+					 &choose_args[-1-in->id] : 0),
+					outpos);
 				if (item >= map->max_devices) {
 					dprintk("   bad item %d\n", item);
 					out[rep] = CRUSH_ITEM_NONE;
@@ -746,7 +780,8 @@ static void crush_choose_indep(const struct crush_map *map,
 							x, 1, numrep, 0,
 							out2, rep,
 							recurse_tries, 0,
-							0, NULL, r);
+							0, NULL, r,
+							choose_args);
 						if (out2[rep] == CRUSH_ITEM_NONE) {
 							/* placed nothing; no leaf */
 							break;
@@ -854,11 +889,12 @@ void crush_init_workspace(const struct crush_map *map, void *v)
  * @weight: weight vector (for map leaves)
  * @weight_max: size of weight vector
  * @cwin: pointer to at least crush_work_size() bytes of memory
+ * @choose_args: weights and ids for each known bucket
  */
 int crush_do_rule(const struct crush_map *map,
 		  int ruleno, int x, int *result, int result_max,
 		  const __u32 *weight, int weight_max,
-		  void *cwin)
+		  void *cwin, const struct crush_choose_arg *choose_args)
 {
 	int result_len;
 	struct crush_work *cw = cwin;
@@ -1013,7 +1049,8 @@ int crush_do_rule(const struct crush_map *map,
 						vary_r,
 						stable,
 						c+osize,
-						0);
+						0,
+						choose_args);
 				} else {
 					out_size = ((numrep < (result_max-osize)) ?
 						    numrep : (result_max-osize));
@@ -1030,7 +1067,8 @@ int crush_do_rule(const struct crush_map *map,
 						   choose_leaf_tries : 1,
 						recurse_to_leaf,
 						c+osize,
-						0);
+						0,
+						choose_args);
 					osize += out_size;
 				}
 			}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 93baa69407c5f..9da0ee61aca55 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2111,7 +2111,7 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 
 	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-			  weight, weight_max, map->crush_workspace);
+			  weight, weight_max, map->crush_workspace, NULL);
 	mutex_unlock(&map->crush_workspace_mutex);
 
 	return r;
-- 
GitLab


From 5cf9c4a9959b6273675310d14a834ef14fbca37c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 22 Jun 2017 19:44:05 +0200
Subject: [PATCH 0923/1958] libceph, crush: per-pool crush_choose_arg_map for
 crush_do_rule()

If there is no crush_choose_arg_map for a given pool, a NULL pointer is
passed to preserve existing crush_do_rule() behavior.

Reflects ceph.git commits 55fb91d64071552ea1bc65ab4ea84d3c8b73ab4b,
                          dbe36e08be00c6519a8c89718dd47b0219c20516.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/crush/crush.h |   8 ++
 net/ceph/crush/crush.c      |   3 +
 net/ceph/osdmap.c           | 200 +++++++++++++++++++++++++++++++++++-
 3 files changed, 208 insertions(+), 3 deletions(-)

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index d8676e56fa235..92e165d417a61 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -2,6 +2,7 @@
 #define CEPH_CRUSH_CRUSH_H
 
 #ifdef __KERNEL__
+# include <linux/rbtree.h>
 # include <linux/types.h>
 #else
 # include "crush_compat.h"
@@ -190,6 +191,10 @@ struct crush_choose_arg {
  *
  */
 struct crush_choose_arg_map {
+#ifdef __KERNEL__
+	struct rb_node node;
+	u64 choose_args_index;
+#endif
 	struct crush_choose_arg *args; /*!< replacement for each bucket
                                             in the crushmap */
 	__u32 size;                    /*!< size of the __args__ array */
@@ -294,6 +299,9 @@ struct crush_map {
 	__u32 allowed_bucket_algs;
 
 	__u32 *choose_tries;
+#else
+	/* CrushWrapper::choose_args */
+	struct rb_root choose_args;
 #endif
 };
 
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 5bf94c04f6454..4b428f46a8ca4 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,6 +1,7 @@
 #ifdef __KERNEL__
 # include <linux/slab.h>
 # include <linux/crush/crush.h>
+void clear_choose_args(struct crush_map *c);
 #else
 # include "crush_compat.h"
 # include "crush.h"
@@ -127,6 +128,8 @@ void crush_destroy(struct crush_map *map)
 
 #ifndef __KERNEL__
 	kfree(map->choose_tries);
+#else
+	clear_choose_args(map);
 #endif
 	kfree(map);
 }
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 9da0ee61aca55..f630d10722997 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -138,6 +138,177 @@ static int crush_decode_straw2_bucket(void **p, void *end,
 	return -EINVAL;
 }
 
+static struct crush_choose_arg_map *alloc_choose_arg_map(void)
+{
+	struct crush_choose_arg_map *arg_map;
+
+	arg_map = kzalloc(sizeof(*arg_map), GFP_NOIO);
+	if (!arg_map)
+		return NULL;
+
+	RB_CLEAR_NODE(&arg_map->node);
+	return arg_map;
+}
+
+static void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
+{
+	if (arg_map) {
+		int i, j;
+
+		WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
+
+		for (i = 0; i < arg_map->size; i++) {
+			struct crush_choose_arg *arg = &arg_map->args[i];
+
+			for (j = 0; j < arg->weight_set_size; j++)
+				kfree(arg->weight_set[j].weights);
+			kfree(arg->weight_set);
+			kfree(arg->ids);
+		}
+		kfree(arg_map->args);
+		kfree(arg_map);
+	}
+}
+
+DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
+		node);
+
+void clear_choose_args(struct crush_map *c)
+{
+	while (!RB_EMPTY_ROOT(&c->choose_args)) {
+		struct crush_choose_arg_map *arg_map =
+		    rb_entry(rb_first(&c->choose_args),
+			     struct crush_choose_arg_map, node);
+
+		erase_choose_arg_map(&c->choose_args, arg_map);
+		free_choose_arg_map(arg_map);
+	}
+}
+
+static u32 *decode_array_32_alloc(void **p, void *end, u32 *plen)
+{
+	u32 *a = NULL;
+	u32 len;
+	int ret;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len) {
+		u32 i;
+
+		a = kmalloc_array(len, sizeof(u32), GFP_NOIO);
+		if (!a) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_need(p, end, len * sizeof(u32), e_inval);
+		for (i = 0; i < len; i++)
+			a[i] = ceph_decode_32(p);
+	}
+
+	*plen = len;
+	return a;
+
+e_inval:
+	ret = -EINVAL;
+fail:
+	kfree(a);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Assumes @arg is zero-initialized.
+ */
+static int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg)
+{
+	int ret;
+
+	ceph_decode_32_safe(p, end, arg->weight_set_size, e_inval);
+	if (arg->weight_set_size) {
+		u32 i;
+
+		arg->weight_set = kmalloc_array(arg->weight_set_size,
+						sizeof(*arg->weight_set),
+						GFP_NOIO);
+		if (!arg->weight_set)
+			return -ENOMEM;
+
+		for (i = 0; i < arg->weight_set_size; i++) {
+			struct crush_weight_set *w = &arg->weight_set[i];
+
+			w->weights = decode_array_32_alloc(p, end, &w->size);
+			if (IS_ERR(w->weights)) {
+				ret = PTR_ERR(w->weights);
+				w->weights = NULL;
+				return ret;
+			}
+		}
+	}
+
+	arg->ids = decode_array_32_alloc(p, end, &arg->ids_size);
+	if (IS_ERR(arg->ids)) {
+		ret = PTR_ERR(arg->ids);
+		arg->ids = NULL;
+		return ret;
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int decode_choose_args(void **p, void *end, struct crush_map *c)
+{
+	struct crush_choose_arg_map *arg_map = NULL;
+	u32 num_choose_arg_maps, num_buckets;
+	int ret;
+
+	ceph_decode_32_safe(p, end, num_choose_arg_maps, e_inval);
+	while (num_choose_arg_maps--) {
+		arg_map = alloc_choose_arg_map();
+		if (!arg_map) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_64_safe(p, end, arg_map->choose_args_index,
+				    e_inval);
+		arg_map->size = c->max_buckets;
+		arg_map->args = kcalloc(arg_map->size, sizeof(*arg_map->args),
+					GFP_NOIO);
+		if (!arg_map->args) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_32_safe(p, end, num_buckets, e_inval);
+		while (num_buckets--) {
+			struct crush_choose_arg *arg;
+			u32 bucket_index;
+
+			ceph_decode_32_safe(p, end, bucket_index, e_inval);
+			if (bucket_index >= arg_map->size)
+				goto e_inval;
+
+			arg = &arg_map->args[bucket_index];
+			ret = decode_choose_arg(p, end, arg);
+			if (ret)
+				goto fail;
+		}
+
+		insert_choose_arg_map(&c->choose_args, arg_map);
+	}
+
+	return 0;
+
+e_inval:
+	ret = -EINVAL;
+fail:
+	free_choose_arg_map(arg_map);
+	return ret;
+}
+
 static void crush_finalize(struct crush_map *c)
 {
 	__s32 b;
@@ -179,6 +350,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	if (c == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	c->choose_args = RB_ROOT;
+
         /* set tunables to default values */
         c->choose_local_tries = 2;
         c->choose_local_fallback_tries = 5;
@@ -372,6 +545,21 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	dout("crush decode tunable chooseleaf_stable = %d\n",
 	     c->chooseleaf_stable);
 
+	if (*p != end) {
+		/* class_map */
+		ceph_decode_skip_map(p, end, 32, 32, bad);
+		/* class_name */
+		ceph_decode_skip_map(p, end, 32, string, bad);
+		/* class_bucket */
+		ceph_decode_skip_map_of_map(p, end, 32, 32, 32, bad);
+	}
+
+	if (*p != end) {
+		err = decode_choose_args(p, end, c);
+		if (err)
+			goto bad;
+	}
+
 done:
 	crush_finalize(c);
 	dout("crush_decode success\n");
@@ -2103,15 +2291,21 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
 
 static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 		    int *result, int result_max,
-		    const __u32 *weight, int weight_max)
+		    const __u32 *weight, int weight_max,
+		    u64 choose_args_index)
 {
+	struct crush_choose_arg_map *arg_map;
 	int r;
 
 	BUG_ON(result_max > CEPH_PG_MAX_SIZE);
 
+	arg_map = lookup_choose_arg_map(&map->crush->choose_args,
+					choose_args_index);
+
 	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-			  weight, weight_max, map->crush_workspace, NULL);
+			  weight, weight_max, map->crush_workspace,
+			  arg_map ? arg_map->args : NULL);
 	mutex_unlock(&map->crush_workspace_mutex);
 
 	return r;
@@ -2181,7 +2375,7 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
 	}
 
 	len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
-		       osdmap->osd_weight, osdmap->max_osd);
+		       osdmap->osd_weight, osdmap->max_osd, pi->id);
 	if (len < 0) {
 		pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
 		       len, ruleno, pi->id, pi->crush_ruleset, pi->type,
-- 
GitLab


From b88ed8d84fbd1e652cc7a1f6e03550d2b4edf653 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 22 Jun 2017 19:44:05 +0200
Subject: [PATCH 0924/1958] crush: crush_init_workspace starts with struct
 crush_work

It is not just a pointer to crush_work, it is the whole structure.
That is not a problem since it only contains a pointer. But it will
be a problem if new data members are added to crush_work.

Reflects ceph.git commit ee957dd431bfbeb6dadaf77764db8e0757417328.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/crush/mapper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 0b2646a9cc509..9887fce04219a 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -858,7 +858,7 @@ void crush_init_workspace(const struct crush_map *map, void *v)
 	 * set the pointer first and then reserve the space for it to
 	 * point to by incrementing the point.
 	 */
-	v += sizeof(struct crush_work *);
+	v += sizeof(struct crush_work);
 	w->work = v;
 	v += map->max_buckets * sizeof(struct crush_work_bucket *);
 	for (b = 0; b < map->max_buckets; ++b) {
-- 
GitLab


From 9eebe45c091e2dff22d4bd87360a624303148ed1 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 22 Jun 2017 19:44:05 +0200
Subject: [PATCH 0925/1958] crush: remove an obsolete comment

Reflects ceph.git commit dca1ae1e0a6b02029c3a7f9dec4114972be26d50.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/crush/mapper.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 9887fce04219a..746b145bfd113 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -1004,11 +1004,6 @@ int crush_do_rule(const struct crush_map *map,
 
 			for (i = 0; i < wsize; i++) {
 				int bno;
-				/*
-				 * see CRUSH_N, CRUSH_N_MINUS macros.
-				 * basically, numrep <= 0 means relative to
-				 * the provided result_max
-				 */
 				numrep = curstep->arg1;
 				if (numrep <= 0) {
 					numrep += result_max;
-- 
GitLab


From 0bb05da2ec57163b7a25efef001ed8f52b18b070 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 22 Jun 2017 19:44:06 +0200
Subject: [PATCH 0926/1958] libceph: osd_state is 32 bits wide in luminous

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h |  4 ++--
 net/ceph/debugfs.c          |  2 +-
 net/ceph/osdmap.c           | 29 ++++++++++++++++++++---------
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index c612cff81f5cb..a0996cb9faedd 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -162,7 +162,7 @@ struct ceph_osdmap {
 	u32 flags;         /* CEPH_OSDMAP_* */
 
 	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
-	u8 *osd_state;     /* CEPH_OSD_* */
+	u32 *osd_state;    /* CEPH_OSD_* */
 	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
 	struct ceph_entity_addr *osd_addr;
 
@@ -203,7 +203,7 @@ static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd)
 	return !ceph_osd_is_up(map, osd);
 }
 
-extern char *ceph_osdmap_state_str(char *str, int len, int state);
+char *ceph_osdmap_state_str(char *str, int len, u32 state);
 extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
 
 static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 4f57d5bcaba2b..fa5233e0d01cc 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -77,7 +77,7 @@ static int osdmap_show(struct seq_file *s, void *p)
 	}
 	for (i = 0; i < map->max_osd; i++) {
 		struct ceph_entity_addr *addr = &map->osd_addr[i];
-		int state = map->osd_state[i];
+		u32 state = map->osd_state[i];
 		char sb[64];
 
 		seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f630d10722997..864789c5974e0 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -11,7 +11,7 @@
 #include <linux/crush/hash.h>
 #include <linux/crush/mapper.h>
 
-char *ceph_osdmap_state_str(char *str, int len, int state)
+char *ceph_osdmap_state_str(char *str, int len, u32 state)
 {
 	if (!len)
 		return str;
@@ -984,7 +984,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
  */
 static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
 {
-	u8 *state;
+	u32 *state;
 	u32 *weight;
 	struct ceph_entity_addr *addr;
 	int i;
@@ -1484,13 +1484,21 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 
 	/* osd_state, osd_weight, osd_addrs->client_addr */
 	ceph_decode_need(p, end, 3*sizeof(u32) +
-			 map->max_osd*(1 + sizeof(*map->osd_weight) +
+			 map->max_osd*((struct_v >= 5 ? sizeof(u32) :
+							sizeof(u8)) +
+				       sizeof(*map->osd_weight) +
 				       sizeof(*map->osd_addr)), e_inval);
 
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
 
-	ceph_decode_copy(p, map->osd_state, map->max_osd);
+	if (struct_v >= 5) {
+		for (i = 0; i < map->max_osd; i++)
+			map->osd_state[i] = ceph_decode_32(p);
+	} else {
+		for (i = 0; i < map->max_osd; i++)
+			map->osd_state[i] = ceph_decode_8(p);
+	}
 
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
@@ -1598,7 +1606,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
  *     new_up_client: { osd=6, addr=... } # set osd_state and addr
  *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
  */
-static int decode_new_up_state_weight(void **p, void *end,
+static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
 				      struct ceph_osdmap *map)
 {
 	void *new_up_client;
@@ -1614,7 +1622,7 @@ static int decode_new_up_state_weight(void **p, void *end,
 
 	new_state = *p;
 	ceph_decode_32_safe(p, end, len, e_inval);
-	len *= sizeof(u32) + sizeof(u8);
+	len *= sizeof(u32) + (struct_v >= 5 ? sizeof(u32) : sizeof(u8));
 	ceph_decode_need(p, end, len, e_inval);
 	*p += len;
 
@@ -1650,11 +1658,14 @@ static int decode_new_up_state_weight(void **p, void *end,
 	len = ceph_decode_32(p);
 	while (len--) {
 		s32 osd;
-		u8 xorstate;
+		u32 xorstate;
 		int ret;
 
 		osd = ceph_decode_32(p);
-		xorstate = ceph_decode_8(p);
+		if (struct_v >= 5)
+			xorstate = ceph_decode_32(p);
+		else
+			xorstate = ceph_decode_8(p);
 		if (xorstate == 0)
 			xorstate = CEPH_OSD_UP;
 		BUG_ON(osd >= map->max_osd);
@@ -1788,7 +1799,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	}
 
 	/* new_up_client, new_state, new_weight */
-	err = decode_new_up_state_weight(p, end, map);
+	err = decode_new_up_state_weight(p, end, struct_v, map);
 	if (err)
 		goto bad;
 
-- 
GitLab


From 33e9c8dbfbcef8e4cda8e43a445e692ab7e0d8c0 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 26 Jun 2017 12:05:55 +0200
Subject: [PATCH 0927/1958] libceph: advertise support for NEW_OSDOP_ENCODING
 and SERVER_LUMINOUS

All four SERVER_LUMINOUS feature bits are implemented, switch it on!

NEW_OSDOP_ENCODING doesn't mean much for the client (it signifies
support for MOSDOp v6) but needs to be enabled in order to get the
latest (currently v25) pg_pool_t.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Acked-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 78a58770e6e94..f0f6c537b64cb 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -184,6 +184,11 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
 	 CEPH_FEATURE_CRUSH_TUNABLES |		\
+	 CEPH_FEATURE_SERVER_LUMINOUS |		\
+	 CEPH_FEATURE_RESEND_ON_SPLIT |		\
+	 CEPH_FEATURE_RADOS_BACKOFF |		\
+	 CEPH_FEATURE_OSDMAP_PG_UPMAP |		\
+	 CEPH_FEATURE_CRUSH_CHOOSE_ARGS |	\
 	 CEPH_FEATURE_MSG_AUTH |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
@@ -199,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_OSD_POOLRESEND |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
+	 CEPH_FEATURE_NEW_OSDOP_ENCODING |	\
 	 CEPH_FEATURE_SERVER_JEWEL |		\
 	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
-- 
GitLab


From 6eb0b8df9f74f33d1a69100117630a7a87a9cc96 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 7 Jul 2017 08:37:26 -0700
Subject: [PATCH 0928/1958] xfs: rename MAXPATHLEN to XFS_SYMLINK_MAXLEN

XFS has a maximum symlink target length of 1024 bytes; this is a
holdover from the Irix days.  Unfortunately, the constant establishing
this is 'MAXPATHLEN' and is /not/ the same as the Linux MAXPATHLEN,
which is 4096.

The kernel enforces its 1024 byte MAXPATHLEN on symlink targets, but
xfsprogs picks up the (Linux) system 4096 byte MAXPATHLEN, which means
that xfs_repair doesn't complain about oversized symlinks.

Since this is an on-disk format constraint, put the define in the XFS
namespace and move everything over to use the new name.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_format.h         | 1 +
 fs/xfs/libxfs/xfs_symlink_remote.c | 2 +-
 fs/xfs/libxfs/xfs_trans_resv.c     | 4 ++--
 fs/xfs/xfs_iops.c                  | 2 +-
 fs/xfs/xfs_linux.h                 | 1 -
 fs/xfs/xfs_symlink.c               | 6 +++---
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index e204a942e5bf3..23229f0c5b15d 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1211,6 +1211,7 @@ struct xfs_dsymlink_hdr {
 
 #define XFS_SYMLINK_CRC_OFF	offsetof(struct xfs_dsymlink_hdr, sl_crc)
 
+#define XFS_SYMLINK_MAXLEN	1024
 /*
  * The maximum pathlen is 1024 bytes. Since the minimum file system
  * blocksize is 512 bytes, we can get a max of 3 extents back from
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 2e2c6716b6239..c484877129a0d 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -114,7 +114,7 @@ xfs_symlink_verify(
 	if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
 		return false;
 	if (be32_to_cpu(dsl->sl_offset) +
-				be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
+				be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN)
 		return false;
 	if (dsl->sl_owner == 0)
 		return false;
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index b456cca1bfb23..6bd916bd35e24 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -477,14 +477,14 @@ xfs_calc_mkdir_reservation(
 /*
  * Making a new symplink is the same as creating a new file, but
  * with the added blocks for remote symlink data which can be up to 1kB in
- * length (MAXPATHLEN).
+ * length (XFS_SYMLINK_MAXLEN).
  */
 STATIC uint
 xfs_calc_symlink_reservation(
 	struct xfs_mount	*mp)
 {
 	return xfs_calc_create_reservation(mp) +
-	       xfs_calc_buf_res(1, MAXPATHLEN);
+	       xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
 }
 
 /*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 077e2b2ac7734..469c9fa4c178d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -460,7 +460,7 @@ xfs_vn_get_link(
 	if (!dentry)
 		return ERR_PTR(-ECHILD);
 
-	link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+	link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL);
 	if (!link)
 		goto out_err;
 
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ecdae42267d31..44abaecd1481e 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -143,7 +143,6 @@ typedef __u32			xfs_nlink_t;
 #define __return_address __builtin_return_address(0)
 
 #define XFS_PROJID_DEFAULT	0
-#define MAXPATHLEN	1024
 
 #define MIN(a,b)	(min(a,b))
 #define MAX(a,b)	(max(a,b))
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 493804857d67a..12cd9cf7de416 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -143,7 +143,7 @@ xfs_readlink(
 	if (!pathlen)
 		goto out;
 
-	if (pathlen < 0 || pathlen > MAXPATHLEN) {
+	if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
 		xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
 			 __func__, (unsigned long long) ip->i_ino,
 			 (long long) pathlen);
@@ -202,7 +202,7 @@ xfs_symlink(
 	 * Check component lengths of the target path name.
 	 */
 	pathlen = strlen(target_path);
-	if (pathlen >= MAXPATHLEN)      /* total string too long */
+	if (pathlen >= XFS_SYMLINK_MAXLEN)      /* total string too long */
 		return -ENAMETOOLONG;
 
 	udqp = gdqp = NULL;
@@ -559,7 +559,7 @@ xfs_inactive_symlink(
 		return 0;
 	}
 
-	if (pathlen < 0 || pathlen > MAXPATHLEN) {
+	if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
 		xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
 			 __func__, (unsigned long long)ip->i_ino, pathlen);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-- 
GitLab


From b77a5372076984d33731a8da0fd52fc718f62a23 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Thu, 6 Jul 2017 17:19:02 -0500
Subject: [PATCH 0929/1958] platform/x86: fujitsu-laptop: add NULL check on
 devm_kzalloc() return value

Check return value from call to devm_kzalloc()
in order to prevent a NULL pointer dereference.

This issue was detected using Coccinelle and the following semantic patch:

@@
expression x;
identifier fld;
@@

* x = devm_kzalloc(...);
  ... when != x == NULL
  x->fld

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/fujitsu-laptop.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index c1a852847d026..9a1034bec0fcb 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -695,6 +695,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 	if (call_fext_func(device,
 			   FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
 		led = devm_kzalloc(&device->dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
 		led->name = "fujitsu::logolamp";
 		led->brightness_set_blocking = logolamp_set;
 		led->brightness_get = logolamp_get;
@@ -707,6 +710,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 			    FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
 	    (call_fext_func(device, FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
 		led = devm_kzalloc(&device->dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
 		led->name = "fujitsu::kblamps";
 		led->brightness_set_blocking = kblamps_set;
 		led->brightness_get = kblamps_get;
@@ -723,6 +729,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 	 */
 	if (call_fext_func(device, FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) {
 		led = devm_kzalloc(&device->dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
 		led->name = "fujitsu::radio_led";
 		led->brightness_set_blocking = radio_led_set;
 		led->brightness_get = radio_led_get;
@@ -741,6 +750,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 	    (call_fext_func(device,
 			    FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
 		led = devm_kzalloc(&device->dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
 		led->name = "fujitsu::eco_led";
 		led->brightness_set_blocking = eco_led_set;
 		led->brightness_get = eco_led_get;
-- 
GitLab


From 0cc091d0c8c34092c471fb5ae7335d075d08c324 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 21 Jun 2017 20:55:55 -0700
Subject: [PATCH 0930/1958] f2fs: report # of free inodes more precisely

If the partition is small, we don't need to report total # of inodes including
hidden free nodes.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8e39b850bfc09..3da6fb276f8b6 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -680,6 +680,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 	block_t total_count, user_block_count, start_count, ovp_count;
+	u64 avail_node_count;
 
 	total_count = le64_to_cpu(sbi->raw_super->block_count);
 	user_block_count = sbi->user_block_count;
@@ -692,9 +693,16 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
 	buf->f_bavail = user_block_count - valid_user_blocks(sbi);
 
-	buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
-	buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
-							buf->f_bavail);
+	avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+
+	if (avail_node_count > user_block_count) {
+		buf->f_files = user_block_count;
+		buf->f_ffree = buf->f_bavail;
+	} else {
+		buf->f_files = avail_node_count;
+		buf->f_ffree = min(avail_node_count - valid_node_count(sbi),
+					buf->f_bavail);
+	}
 
 	buf->f_namelen = F2FS_NAME_LEN;
 	buf->f_fsid.val[0] = (u32)id;
-- 
GitLab


From d871cd046f1a5ae816c836cf114d57288bcb00b2 Mon Sep 17 00:00:00 2001
From: Yunlong Song <yunlong.song@huawei.com>
Date: Sat, 24 Jun 2017 15:57:19 +0800
Subject: [PATCH 0931/1958] f2fs: avoid redundant f2fs_flush after remount

create_flush_cmd_control will create redundant issue_flush_thread after each
remount with flush_merge option.

Signed-off-by: Yunlong Song <yunlong.song@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 86a0c1095939c..7637033ef87b8 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -555,6 +555,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 
 	if (SM_I(sbi)->fcc_info) {
 		fcc = SM_I(sbi)->fcc_info;
+		if (fcc->f2fs_issue_flush)
+			return err;
 		goto init_thread;
 	}
 
-- 
GitLab


From daeb433e42de97c79622f58681972200eec1d8da Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 26 Jun 2017 16:24:41 +0800
Subject: [PATCH 0932/1958] f2fs: introduce reserved_blocks in sysfs

In this patch, we add a new sysfs interface, with it, we can control
number of reserved blocks in system which could not be used by user,
it enable f2fs to let user to configure for adjusting over-provision
ratio dynamically instead of changing it by mkfs.

So we can expect it will help to reserve more free space for relieving
GC in both filesystem and flash device.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  6 ++++++
 fs/f2fs/f2fs.h                          | 13 +++++++++----
 fs/f2fs/super.c                         |  4 +++-
 fs/f2fs/sysfs.c                         | 16 +++++++++++++++-
 4 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index b09108811ff1f..84c606fb3ca4d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -124,3 +124,9 @@ Date:		May 2016
 Contact:	"Sheng Yong" <shengyong1@huawei.com>
 Description:
 		 Controls the injection type.
+
+What:		/sys/fs/f2fs/<disk>/reserved_blocks
+Date:		June 2017
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Description:
+		 Controls current reserved blocks in system.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c27a6264d9bf2..e15d55ccc4bc2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -966,6 +966,8 @@ struct f2fs_sb_info {
 	block_t total_valid_block_count;	/* # of valid blocks */
 	block_t discard_blks;			/* discard command candidats */
 	block_t last_valid_block_count;		/* for recovery */
+	block_t reserved_blocks;		/* configurable reserved blocks */
+
 	u32 s_next_generation;			/* for NFS support */
 
 	/* # of pages, see count_type */
@@ -1376,6 +1378,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
 				 struct inode *inode, blkcnt_t *count)
 {
 	blkcnt_t diff;
+	block_t avail_user_block_count;
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(sbi, FAULT_BLOCK)) {
@@ -1391,10 +1394,11 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 	spin_lock(&sbi->stat_lock);
 	sbi->total_valid_block_count += (block_t)(*count);
-	if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) {
-		diff = sbi->total_valid_block_count - sbi->user_block_count;
+	avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks;
+	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
+		diff = sbi->total_valid_block_count - avail_user_block_count;
 		*count -= diff;
-		sbi->total_valid_block_count = sbi->user_block_count;
+		sbi->total_valid_block_count = avail_user_block_count;
 		if (!*count) {
 			spin_unlock(&sbi->stat_lock);
 			percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
@@ -1556,7 +1560,8 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
 	spin_lock(&sbi->stat_lock);
 
 	valid_block_count = sbi->total_valid_block_count + 1;
-	if (unlikely(valid_block_count > sbi->user_block_count)) {
+	if (unlikely(valid_block_count + sbi->reserved_blocks >
+						sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
 		return false;
 	}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3da6fb276f8b6..c45bac6f17958 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -691,7 +691,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	buf->f_blocks = total_count - start_count;
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
-	buf->f_bavail = user_block_count - valid_user_blocks(sbi);
+	buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
+						sbi->reserved_blocks;
 
 	avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
 
@@ -1768,6 +1769,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->total_valid_block_count =
 				le64_to_cpu(sbi->ckpt->valid_block_count);
 	sbi->last_valid_block_count = sbi->total_valid_block_count;
+	sbi->reserved_blocks = 0;
 
 	for (i = 0; i < NR_INODE_TYPE; i++) {
 		INIT_LIST_HEAD(&sbi->inode_list[i]);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 714a3e47bbe8d..9adc202fcd6f7 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -30,6 +30,7 @@ enum {
 	FAULT_INFO_RATE,	/* struct f2fs_fault_info */
 	FAULT_INFO_TYPE,	/* struct f2fs_fault_info */
 #endif
+	RESERVED_BLOCKS,
 };
 
 struct f2fs_attr {
@@ -51,7 +52,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
 		return (unsigned char *)SM_I(sbi)->dcc_info;
 	else if (struct_type == NM_INFO)
 		return (unsigned char *)NM_I(sbi);
-	else if (struct_type == F2FS_SBI)
+	else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS)
 		return (unsigned char *)sbi;
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	else if (struct_type == FAULT_INFO_RATE ||
@@ -111,6 +112,17 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
 	if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
 		return -EINVAL;
 #endif
+	if (a->struct_type == RESERVED_BLOCKS) {
+		spin_lock(&sbi->stat_lock);
+		if ((unsigned long)sbi->total_valid_block_count + t >
+				(unsigned long)sbi->user_block_count) {
+			spin_unlock(&sbi->stat_lock);
+			return -EINVAL;
+		}
+		*ui = t;
+		spin_unlock(&sbi->stat_lock);
+		return count;
+	}
 	*ui = t;
 	return count;
 }
@@ -165,6 +177,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
+F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
@@ -208,6 +221,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(inject_type),
 #endif
 	ATTR_LIST(lifetime_write_kbytes),
+	ATTR_LIST(reserved_blocks),
 	NULL,
 };
 
-- 
GitLab


From cce1325247b9faafc520c5789fe60feef1fd7092 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Thu, 29 Jun 2017 23:17:45 +0800
Subject: [PATCH 0933/1958] f2fs: stop gc/discard thread in prior during umount

This patch resolves kernel panic for xfstests/081, caused by recent f2fs_bug_on

 f2fs: add f2fs_bug_on in __remove_discard_cmd

For fixing, we will stop gc/discard thread in prior in ->kill_sb in order to
avoid referring and releasing race among them.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    |  1 +
 fs/f2fs/segment.c | 19 +++++++++++++------
 fs/f2fs/super.c   |  7 ++++---
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e15d55ccc4bc2..abf9eea679669 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2293,6 +2293,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
+void stop_discard_thread(struct f2fs_sb_info *sbi);
 void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 void release_discard_addrs(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7637033ef87b8..6eaa98ea8ec69 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1065,6 +1065,18 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 		__wait_one_discard_bio(sbi, dc);
 }
 
+void stop_discard_thread(struct f2fs_sb_info *sbi)
+{
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+	if (dcc && dcc->f2fs_issue_discard) {
+		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
+
+		dcc->f2fs_issue_discard = NULL;
+		kthread_stop(discard_thread);
+	}
+}
+
 /* This comes from f2fs_put_super */
 void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
 {
@@ -1422,12 +1434,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
 	if (!dcc)
 		return;
 
-	if (dcc->f2fs_issue_discard) {
-		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
-
-		dcc->f2fs_issue_discard = NULL;
-		kthread_stop(discard_thread);
-	}
+	stop_discard_thread(sbi);
 
 	kfree(dcc);
 	SM_I(sbi)->dcc_info = NULL;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c45bac6f17958..f27c141cd8aac 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -566,8 +566,6 @@ static void f2fs_put_super(struct super_block *sb)
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	int i;
 
-	stop_gc_thread(sbi);
-
 	/* prevent remaining shrinker jobs */
 	mutex_lock(&sbi->umount_mutex);
 
@@ -1976,8 +1974,11 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
 
 static void kill_f2fs_super(struct super_block *sb)
 {
-	if (sb->s_root)
+	if (sb->s_root) {
 		set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
+		stop_gc_thread(F2FS_SB(sb));
+		stop_discard_thread(F2FS_SB(sb));
+	}
 	kill_block_super(sb);
 }
 
-- 
GitLab


From 6915ea9d8dd8690570fe4a9864b898447a4f3da0 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 30 Jun 2017 17:19:02 +0800
Subject: [PATCH 0934/1958] f2fs: introduce __check_sit_bitmap

After we introduce discard thread, discard command can be issued
concurrently with data allocating, this patch adds new function to
heck sit bitmap to ensure that userdata was invalid in which on-going
discard command covered.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6eaa98ea8ec69..4c246e3511031 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -763,6 +763,30 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 	bio_put(bio);
 }
 
+void __check_sit_bitmap(struct f2fs_sb_info *sbi,
+				block_t start, block_t end)
+{
+#ifdef CONFIG_F2FS_CHECK_FS
+	struct seg_entry *sentry;
+	unsigned int segno;
+	block_t blk = start;
+	unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
+	unsigned long *map;
+
+	while (blk < end) {
+		segno = GET_SEGNO(sbi, blk);
+		sentry = get_seg_entry(sbi, segno);
+		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
+
+		size = min((unsigned long)(end - blk), max_blocks);
+		map = (unsigned long *)(sentry->cur_valid_map);
+		offset = __find_rev_next_bit(map, size, offset);
+		f2fs_bug_on(sbi, offset != size);
+		blk += size;
+	}
+#endif
+}
+
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
 				struct discard_cmd *dc)
@@ -790,6 +814,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
 			bio->bi_opf |= REQ_SYNC;
 			submit_bio(bio);
 			list_move_tail(&dc->list, &dcc->wait_list);
+			__check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
 		}
 	} else {
 		__remove_discard_cmd(sbi, dc);
-- 
GitLab


From 0771fcc71c0c28bf31ac5c2c863b9f0de0fdf00d Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Thu, 29 Jun 2017 23:20:45 +0800
Subject: [PATCH 0935/1958] f2fs: skip ->writepages for {mete,node}_inode
 during recovery

Skip ->writepages in prior to ->writepage for {meta,node}_inode during
recovery, hence unneeded loop in ->writepages can be avoided.

Moreover, check SBI_POR_DOING earlier while writebacking pages.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  3 +++
 fs/f2fs/data.c       | 13 +++++++------
 fs/f2fs/node.c       |  3 +++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 12559a4b6c244..954917d582f82 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -269,6 +269,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
 	long diff, written;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto skip_write;
+
 	/* collect a number of dirty meta pages and write together */
 	if (wbc->for_kupdate ||
 		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7d3af48d34a98..cffcfa8d25711 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1492,6 +1492,9 @@ static int __write_data_page(struct page *page, bool *submitted,
 
 	trace_f2fs_writepage(page, DATA);
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto redirty_out;
+
 	if (page->index < end_index)
 		goto write;
 
@@ -1505,8 +1508,6 @@ static int __write_data_page(struct page *page, bool *submitted,
 
 	zero_user_segment(page, offset, PAGE_SIZE);
 write:
-	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
-		goto redirty_out;
 	if (f2fs_is_drop_cache(inode))
 		goto out;
 	/* we should not write 0'th page having journal header */
@@ -1754,6 +1755,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
 		return 0;
 
+	/* during POR, we don't need to trigger writepage at all. */
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto skip_write;
+
 	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
 			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
 			available_free_memory(sbi, DIRTY_DENTS))
@@ -1763,10 +1768,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
 		goto skip_write;
 
-	/* during POR, we don't need to trigger writepage at all. */
-	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
-		goto skip_write;
-
 	trace_f2fs_writepages(mapping->host, wbc, DATA);
 
 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f6f46be139f46..fd57ffd885081 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1687,6 +1687,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
 	struct blk_plug plug;
 	long diff;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto skip_write;
+
 	/* balancing f2fs's metadata in background */
 	f2fs_balance_fs_bg(sbi);
 
-- 
GitLab


From d58dfb75056c5f732a0b83c54d22c99b4edc947a Mon Sep 17 00:00:00 2001
From: Sheng Yong <shengyong1@huawei.com>
Date: Mon, 26 Jun 2017 10:41:35 +0800
Subject: [PATCH 0936/1958] f2fs: do not set LOST_PINO for newly created dir

Since directories will be written back with checkpoint and fsync a
directory will always write CP, there is no need to set LOST_PINO
after creating a directory.

Signed-off-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 94756f55a97e7..37f9c7f556058 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -415,7 +415,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
 	 * We lost i_pino from now on.
 	 */
 	if (is_inode_flag_set(inode, FI_INC_LINK)) {
-		file_lost_pino(inode);
+		if (!S_ISDIR(inode->i_mode))
+			file_lost_pino(inode);
 		/*
 		 * If link the tmpfile to alias through linkat path,
 		 * we should remove this inode from orphan list.
-- 
GitLab


From b855bf0e1640aa4cf2d1eef056eebcd43e0d1f5e Mon Sep 17 00:00:00 2001
From: Sheng Yong <shengyong1@huawei.com>
Date: Mon, 26 Jun 2017 10:41:36 +0800
Subject: [PATCH 0937/1958] f2fs: do not set LOST_PINO for renamed dir

After renaming a directory, fsck could detect unmatched pino. The scenario
can be reproduced as the following:

	$ mkdir /bar/subbar /foo
	$ rename /bar/subbar /foo

Then fsck will report:
[ASSERT] (__chk_dots_dentries:1182)  --> Bad inode number[0x3] for '..', parent parent ino is [0x4]

Rename sets LOST_PINO for old_inode. However, the flag cannot be cleared,
since dir is written back with CP. So, let's get rid of LOST_PINO for a
renamed dir and fix the pino directly at the end of rename.

Signed-off-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index c31b40e5f9cf6..b75dc2f4ad57f 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -772,7 +772,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 
 	down_write(&F2FS_I(old_inode)->i_sem);
-	file_lost_pino(old_inode);
+	if (!old_dir_entry || whiteout)
+		file_lost_pino(old_inode);
+	else
+		F2FS_I(old_inode)->i_pino = new_dir->i_ino;
 	up_write(&F2FS_I(old_inode)->i_sem);
 
 	old_inode->i_ctime = current_time(old_inode);
-- 
GitLab


From 6ac851ba895dce4b85b7adfa8ddb1fd25637e70a Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 5 Jul 2017 12:17:24 +0800
Subject: [PATCH 0938/1958] Revert "f2fs: fix to clean previous mount option
 when remount_fs"

Don't clear old mount option before parse new option during ->remount_fs
like other generic filesystems.

This reverts commit 26666c8a4366debae30ae37d0688b2bec92d196a.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f27c141cd8aac..af472f7968d09 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -846,7 +846,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 			clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
 	}
 
-	sbi->mount_opt.opt = 0;
 	default_options(sbi);
 
 	/* parse mount options */
-- 
GitLab


From 000519f27866afdfde020d097b76cf2c4038595e Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Thu, 6 Jul 2017 01:11:31 +0800
Subject: [PATCH 0939/1958] f2fs: don't count inode block in in-memory
 inode.i_blocks

Previously, we count all inode consumed blocks including inode block,
xattr block, index block, data block into i_blocks, for other generic
filesystems, they won't count inode block into i_blocks, so for
userspace applications or quota system, they may detect incorrect block
count according to i_blocks value in inode.

This patch changes to count all blocks into inode.i_blocks excluding
inode block, for on-disk i_blocks, we keep counting inode block for
backward compatibility.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h  | 22 ++++++++++++----------
 fs/f2fs/inode.c |  4 ++--
 fs/f2fs/node.c  | 16 ++++++----------
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index abf9eea679669..7bd0a45dd0814 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1355,8 +1355,6 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
 	return 0;
 }
 
-#define F2FS_DEFAULT_ALLOCATED_BLOCKS	1
-
 /*
  * Check whether the inode has blocks or not
  */
@@ -1364,8 +1362,7 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode)
 {
 	block_t xattr_block = F2FS_I(inode)->i_xattr_nid ? 1 : 0;
 
-	return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) >
-			(F2FS_DEFAULT_ALLOCATED_BLOCKS + xattr_block);
+	return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) > xattr_block;
 }
 
 static inline bool f2fs_has_xattr_block(unsigned int ofs)
@@ -1552,7 +1549,7 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
 }
 
 static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
-						struct inode *inode)
+					struct inode *inode, bool is_inode)
 {
 	block_t	valid_block_count;
 	unsigned int valid_node_count;
@@ -1572,8 +1569,12 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
 		return false;
 	}
 
-	if (inode)
-		f2fs_i_blocks_write(inode, 1, true);
+	if (inode) {
+		if (is_inode)
+			f2fs_mark_inode_dirty_sync(inode, true);
+		else
+			f2fs_i_blocks_write(inode, 1, true);
+	}
 
 	sbi->total_valid_node_count++;
 	sbi->total_valid_block_count++;
@@ -1584,15 +1585,16 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
 }
 
 static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
-						struct inode *inode)
+					struct inode *inode, bool is_inode)
 {
 	spin_lock(&sbi->stat_lock);
 
 	f2fs_bug_on(sbi, !sbi->total_valid_block_count);
 	f2fs_bug_on(sbi, !sbi->total_valid_node_count);
-	f2fs_bug_on(sbi, !inode->i_blocks);
+	f2fs_bug_on(sbi, !is_inode && !inode->i_blocks);
 
-	f2fs_i_blocks_write(inode, 1, false);
+	if (!is_inode)
+		f2fs_i_blocks_write(inode, 1, false);
 	sbi->total_valid_node_count--;
 	sbi->total_valid_block_count--;
 
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 1ff5bd418d87b..e42a7a8805dc5 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -130,7 +130,7 @@ static int do_read_inode(struct inode *inode)
 	i_gid_write(inode, le32_to_cpu(ri->i_gid));
 	set_nlink(inode, le32_to_cpu(ri->i_links));
 	inode->i_size = le64_to_cpu(ri->i_size);
-	inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks));
+	inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
 
 	inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
 	inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
@@ -268,7 +268,7 @@ int update_inode(struct inode *inode, struct page *node_page)
 	ri->i_gid = cpu_to_le32(i_gid_read(inode));
 	ri->i_links = cpu_to_le32(inode->i_nlink);
 	ri->i_size = cpu_to_le64(i_size_read(inode));
-	ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks));
+	ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
 
 	if (et) {
 		read_lock(&et->lock);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index fd57ffd885081..b9f14ba6441f5 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -678,15 +678,11 @@ static void truncate_node(struct dnode_of_data *dn)
 	struct node_info ni;
 
 	get_node_info(sbi, dn->nid, &ni);
-	if (dn->inode->i_blocks == 0) {
-		f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
-		goto invalidate;
-	}
 	f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
 
 	/* Deallocate node address */
 	invalidate_blocks(sbi, ni.blk_addr);
-	dec_valid_node_count(sbi, dn->inode);
+	dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
 	set_node_addr(sbi, &ni, NULL_ADDR, false);
 
 	if (dn->nid == dn->inode->i_ino) {
@@ -694,7 +690,7 @@ static void truncate_node(struct dnode_of_data *dn)
 		dec_valid_inode_count(sbi);
 		f2fs_inode_synced(dn->inode);
 	}
-invalidate:
+
 	clear_node_page_dirty(dn->node_page);
 	set_sbi_flag(sbi, SBI_IS_DIRTY);
 
@@ -1044,7 +1040,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
-	if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
+	if (unlikely(!inc_valid_node_count(sbi, dn->inode, !ofs))) {
 		err = -ENOSPC;
 		goto fail;
 	}
@@ -2207,14 +2203,14 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
 	get_node_info(sbi, prev_xnid, &ni);
 	f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
 	invalidate_blocks(sbi, ni.blk_addr);
-	dec_valid_node_count(sbi, inode);
+	dec_valid_node_count(sbi, inode, false);
 	set_node_addr(sbi, &ni, NULL_ADDR, false);
 
 recover_xnid:
 	/* 2: update xattr nid in inode */
 	remove_free_nid(sbi, new_xnid);
 	f2fs_i_xnid_write(inode, new_xnid);
-	if (unlikely(!inc_valid_node_count(sbi, inode)))
+	if (unlikely(!inc_valid_node_count(sbi, inode, false)))
 		f2fs_bug_on(sbi, 1);
 	update_inode_page(inode);
 
@@ -2272,7 +2268,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
 	new_ni = old_ni;
 	new_ni.ino = ino;
 
-	if (unlikely(!inc_valid_node_count(sbi, NULL)))
+	if (unlikely(!inc_valid_node_count(sbi, NULL, true)))
 		WARN_ON(1);
 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 	inc_valid_inode_count(sbi);
-- 
GitLab


From ff1048e7dffe0582a50e2eaf90e13fc76ea8493d Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 6 Jul 2017 14:46:01 -0700
Subject: [PATCH 0940/1958] f2fs: relax migratepage for atomic written page

In order to avoid lock contention for atomic written pages, we'd better give
EBUSY in f2fs_migrate_page when mode is asynchronous. We expect it will be
released soon as transaction commits.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cffcfa8d25711..72fc866cad195 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2203,8 +2203,12 @@ int f2fs_migrate_page(struct address_space *mapping,
 	BUG_ON(PageWriteback(page));
 
 	/* migrating an atomic written page is safe with the inmem_lock hold */
-	if (atomic_written && !mutex_trylock(&fi->inmem_lock))
-		return -EAGAIN;
+	if (atomic_written) {
+		if (mode != MIGRATE_SYNC)
+			return -EBUSY;
+		if (!mutex_trylock(&fi->inmem_lock))
+			return -EAGAIN;
+	}
 
 	/*
 	 * A reference is expected if PagePrivate set when move mapping,
-- 
GitLab


From d1aa245354ae4605d1183f542ed8d45811c439f6 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 7 Jul 2017 14:10:15 +0800
Subject: [PATCH 0941/1958] f2fs: use spin_{,un}lock_irq{save,restore}

generic/361 reports below warning, this is because: once, there is
someone entering into critical region of sbi.cp_lock, if write_end_io.
f2fs_stop_checkpoint is invoked from an triggered IRQ, we will encounter
deadlock.

So this patch changes to use spin_{,un}lock_irq{save,restore} to create
critical region without IRQ enabled to avoid potential deadlock.

 irq event stamp: 83391573
 loop: Write error at byte offset 438729728, length 1024.
 hardirqs last  enabled at (83391573): [<c1809752>] restore_all+0xf/0x65
 hardirqs last disabled at (83391572): [<c1809eac>] reschedule_interrupt+0x30/0x3c
 loop: Write error at byte offset 438860288, length 1536.
 softirqs last  enabled at (83389244): [<c180cc4e>] __do_softirq+0x1ae/0x476
 softirqs last disabled at (83389237): [<c101ca7c>] do_softirq_own_stack+0x2c/0x40
 loop: Write error at byte offset 438990848, length 2048.
 ================================
 WARNING: inconsistent lock state
 4.12.0-rc2+ #30 Tainted: G           O
 --------------------------------
 inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
 xfs_io/7959 [HC1[1]:SC0[0]:HE0:SE1] takes:
  (&(&sbi->cp_lock)->rlock){?.+...}, at: [<f96f96cc>] f2fs_stop_checkpoint+0x1c/0x50 [f2fs]
 {HARDIRQ-ON-W} state was registered at:
   __lock_acquire+0x527/0x7b0
   lock_acquire+0xae/0x220
   _raw_spin_lock+0x42/0x50
   do_checkpoint+0x165/0x9e0 [f2fs]
   write_checkpoint+0x33f/0x740 [f2fs]
   __f2fs_sync_fs+0x92/0x1f0 [f2fs]
   f2fs_sync_fs+0x12/0x20 [f2fs]
   sync_filesystem+0x67/0x80
   generic_shutdown_super+0x27/0x100
   kill_block_super+0x22/0x50
   kill_f2fs_super+0x3a/0x40 [f2fs]
   deactivate_locked_super+0x3d/0x70
   deactivate_super+0x40/0x60
   cleanup_mnt+0x39/0x70
   __cleanup_mnt+0x10/0x20
   task_work_run+0x69/0x80
   exit_to_usermode_loop+0x57/0x85
   do_fast_syscall_32+0x18c/0x1b0
   entry_SYSENTER_32+0x4c/0x7b
 irq event stamp: 1957420
 hardirqs last  enabled at (1957419): [<c1808f37>] _raw_spin_unlock_irq+0x27/0x50
 hardirqs last disabled at (1957420): [<c1809f9c>] call_function_single_interrupt+0x30/0x3c
 softirqs last  enabled at (1953784): [<c180cc4e>] __do_softirq+0x1ae/0x476
 softirqs last disabled at (1953773): [<c101ca7c>] do_softirq_own_stack+0x2c/0x40

 other info that might help us debug this:
  Possible unsafe locking scenario:

        CPU0
        ----
   lock(&(&sbi->cp_lock)->rlock);
   <Interrupt>
     lock(&(&sbi->cp_lock)->rlock);

  *** DEADLOCK ***

 2 locks held by xfs_io/7959:
  #0:  (sb_writers#13){.+.+.+}, at: [<c11fd7ca>] vfs_write+0x16a/0x190
  #1:  (&sb->s_type->i_mutex_key#16){+.+.+.}, at: [<f96e33f5>] f2fs_file_write_iter+0x25/0x140 [f2fs]

 stack backtrace:
 CPU: 2 PID: 7959 Comm: xfs_io Tainted: G           O    4.12.0-rc2+ #30
 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
 Call Trace:
  dump_stack+0x5f/0x92
  print_usage_bug+0x1d3/0x1dd
  ? check_usage_backwards+0xe0/0xe0
  mark_lock+0x23d/0x280
  __lock_acquire+0x699/0x7b0
  ? __this_cpu_preempt_check+0xf/0x20
  ? trace_hardirqs_off_caller+0x91/0xe0
  lock_acquire+0xae/0x220
  ? f2fs_stop_checkpoint+0x1c/0x50 [f2fs]
  _raw_spin_lock+0x42/0x50
  ? f2fs_stop_checkpoint+0x1c/0x50 [f2fs]
  f2fs_stop_checkpoint+0x1c/0x50 [f2fs]
  f2fs_write_end_io+0x147/0x150 [f2fs]
  bio_endio+0x7a/0x1e0
  blk_update_request+0xad/0x410
  blk_mq_end_request+0x16/0x60
  lo_complete_rq+0x3c/0x70
  __blk_mq_complete_request_remote+0x11/0x20
  flush_smp_call_function_queue+0x6d/0x120
  ? debug_smp_processor_id+0x12/0x20
  generic_smp_call_function_single_interrupt+0x12/0x30
  smp_call_function_single_interrupt+0x25/0x40
  call_function_single_interrupt+0x37/0x3c
 EIP: _raw_spin_unlock_irq+0x2d/0x50
 EFLAGS: 00000296 CPU: 2
 EAX: 00000001 EBX: d2ccc51c ECX: 00000001 EDX: c1aacebd
 ESI: 00000000 EDI: 00000000 EBP: c96c9d1c ESP: c96c9d18
  DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
  ? inherit_task_group.isra.98.part.99+0x6b/0xb0
  __add_to_page_cache_locked+0x1d4/0x290
  add_to_page_cache_lru+0x38/0xb0
  pagecache_get_page+0x8e/0x200
  f2fs_write_begin+0x96/0xf00 [f2fs]
  ? trace_hardirqs_on_caller+0xdd/0x1c0
  ? current_time+0x17/0x50
  ? trace_hardirqs_on+0xb/0x10
  generic_perform_write+0xa9/0x170
  __generic_file_write_iter+0x1a2/0x1f0
  ? f2fs_preallocate_blocks+0x137/0x160 [f2fs]
  f2fs_file_write_iter+0x6e/0x140 [f2fs]
  ? __lock_acquire+0x429/0x7b0
  __vfs_write+0xc1/0x140
  vfs_write+0x9b/0x190
  SyS_pwrite64+0x63/0xa0
  do_fast_syscall_32+0xa1/0x1b0
  entry_SYSENTER_32+0x4c/0x7b
 EIP: 0xb7786c61
 EFLAGS: 00000293 CPU: 2
 EAX: ffffffda EBX: 00000003 ECX: 08416000 EDX: 00001000
 ESI: 18b24000 EDI: 00000000 EBP: 00000003 ESP: bf9b36b0
  DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b

Fixes: aaec2b1d1879 ("f2fs: introduce cp_lock to protect updating of ckpt_flags")
Cc: stable@vger.kernel.org
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 11 ++++++-----
 fs/f2fs/f2fs.h       | 18 ++++++++++++------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 954917d582f82..56bbf592e4875 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1053,8 +1053,9 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	unsigned long flags;
 
-	spin_lock(&sbi->cp_lock);
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 
 	if ((cpc->reason & CP_UMOUNT) &&
 			le32_to_cpu(ckpt->cp_pack_total_block_count) >
@@ -1085,14 +1086,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	/* set this flag to activate crc|cp_ver for recovery */
 	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
 
-	spin_unlock(&sbi->cp_lock);
+	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
-	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
+	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags;
 	block_t start_blk;
 	unsigned int data_sum_blocks, orphan_blocks;
 	__u32 crc32 = 0;
@@ -1134,12 +1135,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* 2 cp  + n data seg summary + orphan inode blocks */
 	data_sum_blocks = npages_for_summary_flush(sbi, false);
-	spin_lock(&sbi->cp_lock);
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
 		__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 	else
 		__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
-	spin_unlock(&sbi->cp_lock);
+	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 
 	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
 	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7bd0a45dd0814..ced78035a4167 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1254,9 +1254,11 @@ static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 
 static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 {
-	spin_lock(&sbi->cp_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 	__set_ckpt_flags(F2FS_CKPT(sbi), f);
-	spin_unlock(&sbi->cp_lock);
+	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
 static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
@@ -1270,22 +1272,26 @@ static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f
 
 static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 {
-	spin_lock(&sbi->cp_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 	__clear_ckpt_flags(F2FS_CKPT(sbi), f);
-	spin_unlock(&sbi->cp_lock);
+	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
 static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
 {
+	unsigned long flags;
+
 	set_sbi_flag(sbi, SBI_NEED_FSCK);
 
 	if (lock)
-		spin_lock(&sbi->cp_lock);
+		spin_lock_irqsave(&sbi->cp_lock, flags);
 	__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
 	kfree(NM_I(sbi)->nat_bits);
 	NM_I(sbi)->nat_bits = NULL;
 	if (lock)
-		spin_unlock(&sbi->cp_lock);
+		spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
 static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
-- 
GitLab


From d29460e5cfc9bc2241886f9f60d0650ad745cf10 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 21 Jun 2017 17:52:39 -0700
Subject: [PATCH 0942/1958] f2fs: avoid deadlock caused by lock order of page
 and lock_op

- punch_hole
 - fill_zero
  - f2fs_lock_op
  - get_new_data_page
   - lock_page

- f2fs_write_data_pages
 - lock_page
 - do_write_data_page
  - f2fs_lock_op

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 72fc866cad195..7dd5fb647d435 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1404,8 +1404,9 @@ int do_write_data_page(struct f2fs_io_info *fio)
 		}
 	}
 
-	if (fio->need_lock == LOCK_REQ)
-		f2fs_lock_op(fio->sbi);
+	/* Deadlock due to between page->lock and f2fs_lock_op */
+	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
+		return -EAGAIN;
 
 	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
 	if (err)
@@ -1667,7 +1668,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 			}
 
 			done_index = page->index;
-
+retry_write:
 			lock_page(page);
 
 			if (unlikely(page->mapping != mapping)) {
@@ -1703,6 +1704,15 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 					unlock_page(page);
 					ret = 0;
 					continue;
+				} else if (ret == -EAGAIN) {
+					ret = 0;
+					if (wbc->sync_mode == WB_SYNC_ALL) {
+						cond_resched();
+						congestion_wait(BLK_RW_ASYNC,
+									HZ/50);
+						goto retry_write;
+					}
+					continue;
 				}
 				done_index = page->index + 1;
 				done = 1;
-- 
GitLab


From 3eaa11e2780dc38350c133bd998cac1df488d040 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Fri, 7 Jul 2017 21:08:52 +0200
Subject: [PATCH 0943/1958] lightnvm: pblk: control I/O flow also on tear down
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When removing a pblk instance, control the write I/O flow to the
controller as we do in the fast path.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/pblk-core.c     | 61 +++++++++++++++++++++++++-------
 drivers/lightnvm/pblk-recovery.c | 31 ++++++++++------
 drivers/lightnvm/pblk-write.c    | 19 ++++------
 drivers/lightnvm/pblk.h          |  2 ++
 4 files changed, 78 insertions(+), 35 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 11fe0c5b2a9cf..81501644fb158 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -1670,13 +1670,10 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
 	queue_work(wq, &line_ws->ws);
 }
 
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
-		  unsigned long *lun_bitmap)
+static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
+			     int nr_ppas, int pos)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-	struct pblk_lun *rlun;
-	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+	struct pblk_lun *rlun = &pblk->luns[pos];
 	int ret;
 
 	/*
@@ -1690,14 +1687,8 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
 				ppa_list[0].g.ch != ppa_list[i].g.ch);
 #endif
-	/* If the LUN has been locked for this same request, do no attempt to
-	 * lock it again
-	 */
-	if (test_and_set_bit(pos, lun_bitmap))
-		return;
 
-	rlun = &pblk->luns[pos];
-	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
 	if (ret) {
 		switch (ret) {
 		case -ETIME:
@@ -1710,6 +1701,50 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 	}
 }
 
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+	__pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+		  unsigned long *lun_bitmap)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+	/* If the LUN has been locked for this same request, do no attempt to
+	 * lock it again
+	 */
+	if (test_and_set_bit(pos, lun_bitmap))
+		return;
+
+	__pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_lun *rlun;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+#ifdef CONFIG_NVM_DEBUG
+	int i;
+
+	for (i = 1; i < nr_ppas; i++)
+		WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
+				ppa_list[0].g.ch != ppa_list[i].g.ch);
+#endif
+
+	rlun = &pblk->luns[pos];
+	up(&rlun->wr_sem);
+}
+
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		unsigned long *lun_bitmap)
 {
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 0e48d3e4e1437..cb556e06673e7 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -340,9 +340,14 @@ static void pblk_end_io_recov(struct nvm_rq *rqd)
 	struct pblk *pblk = pad_rq->pblk;
 	struct nvm_tgt_dev *dev = pblk->dev;
 
-	kref_put(&pad_rq->ref, pblk_recov_complete);
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
+	bio_put(rqd->bio);
 	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 	pblk_free_rqd(pblk, rqd, WRITE);
+
+	atomic_dec(&pblk->inflight_io);
+	kref_put(&pad_rq->ref, pblk_recov_complete);
 }
 
 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
@@ -385,7 +390,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (rq_ppas < pblk->min_write_pgs) {
 		pr_err("pblk: corrupted pad line %d\n", line->id);
-		goto free_rq;
+		goto fail_free_pad;
 	}
 
 	rq_len = rq_ppas * geo->sec_size;
@@ -393,7 +398,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
 	if (!meta_list) {
 		ret = -ENOMEM;
-		goto free_data;
+		goto fail_free_pad;
 	}
 
 	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
@@ -404,9 +409,9 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 		ret = PTR_ERR(rqd);
 		goto fail_free_meta;
 	}
-	memset(rqd, 0, pblk_w_rq_size);
 
-	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+						PBLK_VMALLOC_META, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		ret = PTR_ERR(bio);
 		goto fail_free_rqd;
@@ -453,15 +458,15 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 	}
 
 	kref_get(&pad_rq->ref);
+	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
 		pr_err("pblk: I/O submission failed: %d\n", ret);
-		goto free_data;
+		pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+		goto fail_free_bio;
 	}
 
-	atomic_dec(&pblk->inflight_io);
-
 	left_line_ppas -= rq_ppas;
 	left_ppas -= rq_ppas;
 	if (left_ppas && left_line_ppas)
@@ -475,17 +480,23 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 		ret = -ETIME;
 	}
 
+	if (!pblk_line_is_full(line))
+		pr_err("pblk: corrupted padded line: %d\n", line->id);
+
+	vfree(data);
 free_rq:
 	kfree(pad_rq);
-free_data:
-	vfree(data);
 	return ret;
 
+fail_free_bio:
+	bio_put(bio);
 fail_free_rqd:
 	pblk_free_rqd(pblk, rqd, WRITE);
 fail_free_meta:
 	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+fail_free_pad:
 	kfree(pad_rq);
+	vfree(data);
 	return ret;
 }
 
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index d62a8f4faaf43..cc2b9414d17ca 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -178,15 +178,12 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
 	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_line *line = m_ctx->private;
 	struct pblk_emeta *emeta = line->emeta;
-	int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
-	struct pblk_lun *rlun = &pblk->luns[pos];
 	int sync;
 
-	up(&rlun->wr_sem);
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 
 	if (rqd->error) {
 		pblk_log_write_err(pblk, rqd);
@@ -203,6 +200,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 								pblk->close_wq);
 
 	bio_put(rqd->bio);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 	pblk_free_rqd(pblk, rqd, READ);
 
 	atomic_dec(&pblk->inflight_io);
@@ -367,7 +365,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_emeta *emeta = meta_line->emeta;
 	struct pblk_g_ctx *m_ctx;
-	struct pblk_lun *rlun;
 	struct bio *bio;
 	struct nvm_rq *rqd;
 	void *data;
@@ -411,13 +408,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 			rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
 	}
 
-	rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
-	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
-	if (ret) {
-		pr_err("pblk: lun semaphore timed out (%d)\n", ret);
-		goto fail_free_bio;
-	}
-
 	emeta->mem += rq_len;
 	if (emeta->mem >= lm->emeta_len[0]) {
 		spin_lock(&l_mg->close_lock);
@@ -427,6 +417,8 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 		spin_unlock(&l_mg->close_lock);
 	}
 
+	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
 		pr_err("pblk: emeta I/O submission failed: %d\n", ret);
@@ -436,10 +428,13 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	return NVM_IO_OK;
 
 fail_rollback:
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 	spin_lock(&l_mg->close_lock);
 	pblk_dealloc_page(pblk, meta_line, rq_ppas);
 	list_add(&meta_line->list, &meta_line->list);
 	spin_unlock(&l_mg->close_lock);
+
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 fail_free_bio:
 	if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
 		bio_put(bio);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 15931381348c7..0c5692cc2f605 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -739,8 +739,10 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 		   unsigned long secs_to_flush);
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		  unsigned long *lun_bitmap);
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		unsigned long *lun_bitmap);
 void pblk_end_bio_sync(struct bio *bio);
-- 
GitLab


From 56c76417ad310a060252a13f88001c35b73d241d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Fri, 7 Jul 2017 21:08:53 +0200
Subject: [PATCH 0944/1958] lightnvm: pblk: remove unnecessary checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove unnecessary checks when freeing dma memory in the completion
path.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/pblk-write.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index cc2b9414d17ca..3ad9e56d24734 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -39,9 +39,7 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
 	ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
 
-	if (rqd->meta_list)
-		nvm_dev_dma_free(dev->parent, rqd->meta_list,
-							rqd->dma_meta_list);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 
 	bio_put(rqd->bio);
 	pblk_free_rqd(pblk, rqd, WRITE);
@@ -224,9 +222,6 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	if (!rqd->meta_list)
 		return -ENOMEM;
 
-	if (unlikely(nr_secs == 1))
-		return 0;
-
 	rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
 	rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
 
-- 
GitLab


From 7f56c30bd0a232822aca38d288da475613bdff9b Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 7 Jul 2017 15:37:38 -0600
Subject: [PATCH 0945/1958] vfio: Remove unnecessary uses of
 vfio_container.group_lock

The original intent of vfio_container.group_lock is to protect
vfio_container.group_list, however over time it's become a crutch to
prevent changes in container composition any time we call into the
iommu driver backend.  This introduces problems when we start to have
more complex interactions, for example when a user's DMA unmap request
triggers a notification to an mdev vendor driver, who responds by
attempting to unpin mappings within that request, re-entering the
iommu backend.  We incorrectly assume that the use of read-locks here
allow for this nested locking behavior, but a poorly timed write-lock
could in fact trigger a deadlock.

The current use of group_lock seems to fall into the trap of locking
code, not data.  Correct that by removing uses of group_lock that are
not directly related to group_list.  Note that the vfio type1 iommu
backend has its own mutex, vfio_iommu.lock, which it uses to protect
itself for each of these interfaces anyway.  The group_lock appears to
be a redundancy for these interfaces and type1 even goes so far as to
release its mutex to allow for exactly the re-entrant code path above.

Reported-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: stable@vger.kernel.org # v4.10+
---
 drivers/vfio/vfio.c | 38 --------------------------------------
 1 file changed, 38 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 7597a377eb4e4..330d50582f400 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1175,15 +1175,11 @@ static long vfio_fops_unl_ioctl(struct file *filep,
 		ret = vfio_ioctl_set_iommu(container, arg);
 		break;
 	default:
-		down_read(&container->group_lock);
-
 		driver = container->iommu_driver;
 		data = container->iommu_data;
 
 		if (driver) /* passthrough all unrecognized ioctls */
 			ret = driver->ops->ioctl(data, cmd, arg);
-
-		up_read(&container->group_lock);
 	}
 
 	return ret;
@@ -1237,15 +1233,11 @@ static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
 	struct vfio_iommu_driver *driver;
 	ssize_t ret = -EINVAL;
 
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->read))
 		ret = driver->ops->read(container->iommu_data,
 					buf, count, ppos);
 
-	up_read(&container->group_lock);
-
 	return ret;
 }
 
@@ -1256,15 +1248,11 @@ static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
 	struct vfio_iommu_driver *driver;
 	ssize_t ret = -EINVAL;
 
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->write))
 		ret = driver->ops->write(container->iommu_data,
 					 buf, count, ppos);
 
-	up_read(&container->group_lock);
-
 	return ret;
 }
 
@@ -1274,14 +1262,10 @@ static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
 	struct vfio_iommu_driver *driver;
 	int ret = -EINVAL;
 
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->mmap))
 		ret = driver->ops->mmap(container->iommu_data, vma);
 
-	up_read(&container->group_lock);
-
 	return ret;
 }
 
@@ -1993,8 +1977,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
 		goto err_pin_pages;
 
 	container = group->container;
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->pin_pages))
 		ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
@@ -2002,7 +1984,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
 	else
 		ret = -ENOTTY;
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 err_pin_pages:
@@ -2042,8 +2023,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
 		goto err_unpin_pages;
 
 	container = group->container;
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->unpin_pages))
 		ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
@@ -2051,7 +2030,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
 	else
 		ret = -ENOTTY;
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 err_unpin_pages:
@@ -2073,8 +2051,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
 		return -EINVAL;
 
 	container = group->container;
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->register_notifier))
 		ret = driver->ops->register_notifier(container->iommu_data,
@@ -2082,7 +2058,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
 	else
 		ret = -ENOTTY;
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 	return ret;
@@ -2100,8 +2075,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 		return -EINVAL;
 
 	container = group->container;
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->unregister_notifier))
 		ret = driver->ops->unregister_notifier(container->iommu_data,
@@ -2109,7 +2082,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 	else
 		ret = -ENOTTY;
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 	return ret;
@@ -2127,7 +2099,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
 					unsigned long *events,
 					struct notifier_block *nb)
 {
-	struct vfio_container *container;
 	int ret;
 	bool set_kvm = false;
 
@@ -2145,9 +2116,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
 	if (ret)
 		return -EINVAL;
 
-	container = group->container;
-	down_read(&container->group_lock);
-
 	ret = blocking_notifier_chain_register(&group->notifier, nb);
 
 	/*
@@ -2158,7 +2126,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
 		blocking_notifier_call_chain(&group->notifier,
 					VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 	return ret;
@@ -2167,19 +2134,14 @@ static int vfio_register_group_notifier(struct vfio_group *group,
 static int vfio_unregister_group_notifier(struct vfio_group *group,
 					 struct notifier_block *nb)
 {
-	struct vfio_container *container;
 	int ret;
 
 	ret = vfio_group_add_container_user(group);
 	if (ret)
 		return -EINVAL;
 
-	container = group->container;
-	down_read(&container->group_lock);
-
 	ret = blocking_notifier_chain_unregister(&group->notifier, nb);
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 	return ret;
-- 
GitLab


From b9504247a6eb2bd68e3f620ca1f3fbe7309123ea Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sun, 25 Jun 2017 20:22:57 +0200
Subject: [PATCH 0946/1958] mtd: Fix check in mtd_unpoint()

The code checks that ->_point is not NULL, but we should actually check
->_unpoint value which is dereferenced a few lines after the check.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdcore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 1517da3ddd7d0..956382cea2568 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -991,7 +991,7 @@ EXPORT_SYMBOL_GPL(mtd_point);
 /* We probably shouldn't allow XIP if the unpoint isn't a NULL */
 int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 {
-	if (!mtd->_point)
+	if (!mtd->_unpoint)
 		return -EOPNOTSUPP;
 	if (from < 0 || from >= mtd->size || len > mtd->size - from)
 		return -EINVAL;
-- 
GitLab


From cd87d867920155911d0d2e6485b769d853547750 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 7 Jul 2017 18:55:17 -0700
Subject: [PATCH 0947/1958] xfs: don't crash on unexpected holes in dir/attr
 btrees

In quite a few places we call xfs_da_read_buf with a mappedbno that we
don't control, then assume that the function passes back either an error
code or a buffer pointer.  Unfortunately, if mappedbno == -2 and bno
maps to a hole, we get a return code of zero and a NULL buffer, which
means that we crash if we actually try to use that buffer pointer.  This
happens immediately when we set the buffer type for transaction context.

Therefore, check that we have no error code and a non-NULL bp before
trying to use bp.  This patch is a follow-up to an incomplete fix in
96a3aefb8ffde231 ("xfs: don't crash if reading a directory results in an
unexpected hole").

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c  | 2 +-
 fs/xfs/libxfs/xfs_da_btree.c   | 2 +-
 fs/xfs/libxfs/xfs_dir2_block.c | 2 +-
 fs/xfs/libxfs/xfs_dir2_leaf.c  | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 2852521fc8ecf..c6c15e5717e42 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -351,7 +351,7 @@ xfs_attr3_leaf_read(
 
 	err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
 				XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
-	if (!err && tp)
+	if (!err && tp && *bpp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
 	return err;
 }
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 356f21d5cd357..6d4335815c3f8 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -263,7 +263,7 @@ xfs_da3_node_read(
 
 	err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
 					which_fork, &xfs_da3_node_buf_ops);
-	if (!err && tp) {
+	if (!err && tp && *bpp) {
 		struct xfs_da_blkinfo	*info = (*bpp)->b_addr;
 		int			type;
 
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index aa17cb788946b..43c902f7a68d4 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -139,7 +139,7 @@ xfs_dir3_block_read(
 
 	err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
 				XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
-	if (!err && tp)
+	if (!err && tp && *bpp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
 	return err;
 }
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 7002024a5d0d4..27297a689d9c2 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -268,7 +268,7 @@ xfs_dir3_leaf_read(
 
 	err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
 				XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
-	if (!err && tp)
+	if (!err && tp && *bpp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
 	return err;
 }
@@ -285,7 +285,7 @@ xfs_dir3_leafn_read(
 
 	err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
 				XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
-	if (!err && tp)
+	if (!err && tp && *bpp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
 	return err;
 }
-- 
GitLab


From 8f1a357d41a22009150cf404b5aa5876efdb59b1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 6 Jul 2017 20:26:17 +0300
Subject: [PATCH 0948/1958] i2c: Provide a stub for i2c_detect_slave_mode()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers would like to call i2c_detect_slave_mode() even if !I2C_SLAVE.
Give them what they want to,

Otherwise kernel will not compile:
drivers/i2c/busses/i2c-designware-platdrv.c: In function ‘dw_i2c_plat_probe’:
drivers/i2c/busses/i2c-designware-platdrv.c:331:6: error: implicit declaration of function ‘i2c_detect_slave_mode’ [-Werror=implicit-function-declaration]
  if (i2c_detect_slave_mode(&pdev->dev))
      ^~~~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors

Fixes: 6e38cf3b4421 ("i2c: designware: Let slave adapter support be optional")
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 72d0ece70ed30..00ca5b86a753f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -295,6 +295,8 @@ static inline int i2c_slave_event(struct i2c_client *client,
 {
 	return client->slave_cb(client, event, val);
 }
+#else
+static inline bool i2c_detect_slave_mode(struct device *dev) { return false; }
 #endif
 
 /**
-- 
GitLab


From dac953401c9722cca04a4a884e82e32fe82c0dad Mon Sep 17 00:00:00 2001
From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Date: Wed, 28 Jun 2017 22:37:00 -0500
Subject: [PATCH 0949/1958] cifs: prototype declaration and definition to set
 acl for smb 2 - 3 and cifsacl mount options

Modified current set info function to accommodate multiple info types and
additional information.

Added cifs acl specific function to invoke set info functionality.

Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2pdu.c   | 39 +++++++++++++++++++++++++++------------
 fs/cifs/smb2proto.h |  3 +++
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 4938e8b6d32fa..c3c8eaff56dfd 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3000,8 +3000,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
 
 static int
 send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
-	       u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class,
-	       unsigned int num, void **data, unsigned int *size)
+	       u64 persistent_fid, u64 volatile_fid, u32 pid, u8 info_class,
+	       u8 info_type, u32 additional_info, unsigned int num,
+		void **data, unsigned int *size)
 {
 	struct smb2_set_info_req *req;
 	struct smb2_set_info_rsp *rsp = NULL;
@@ -3037,10 +3038,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 	req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid);
 
-	req->InfoType = SMB2_O_INFO_FILE;
+	req->InfoType = info_type;
 	req->FileInfoClass = info_class;
 	req->PersistentFileId = persistent_fid;
 	req->VolatileFileId = volatile_fid;
+	req->AdditionalInformation = cpu_to_le32(additional_info);
 
 	/* 4 for RFC1001 length and 1 for Buffer */
 	req->BufferOffset =
@@ -3100,8 +3102,8 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
 	size[1] = len + 2 /* null */;
 
 	rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			   current->tgid, FILE_RENAME_INFORMATION, 2, data,
-			   size);
+		current->tgid, FILE_RENAME_INFORMATION, SMB2_O_INFO_FILE,
+		0, 2, data, size);
 	kfree(data);
 	return rc;
 }
@@ -3118,8 +3120,8 @@ SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
 	size = 1; /* sizeof __u8 */
 
 	return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			current->tgid, FILE_DISPOSITION_INFORMATION, 1, &data,
-			&size);
+		current->tgid, FILE_DISPOSITION_INFORMATION, SMB2_O_INFO_FILE,
+		0, 1, &data, &size);
 }
 
 int
@@ -3148,7 +3150,8 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
 	size[1] = len + 2 /* null */;
 
 	rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			   current->tgid, FILE_LINK_INFORMATION, 2, data, size);
+			current->tgid, FILE_LINK_INFORMATION, SMB2_O_INFO_FILE,
+			0, 2, data, size);
 	kfree(data);
 	return rc;
 }
@@ -3168,10 +3171,12 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 
 	if (is_falloc)
 		return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			pid, FILE_ALLOCATION_INFORMATION, 1, &data, &size);
+			pid, FILE_ALLOCATION_INFORMATION, SMB2_O_INFO_FILE,
+			0, 1, &data, &size);
 	else
 		return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			pid, FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
+			pid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE,
+			0, 1, &data, &size);
 }
 
 int
@@ -3181,8 +3186,18 @@ SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 	unsigned int size;
 	size = sizeof(FILE_BASIC_INFO);
 	return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			     current->tgid, FILE_BASIC_INFORMATION, 1,
-			     (void **)&buf, &size);
+		current->tgid, FILE_BASIC_INFORMATION, SMB2_O_INFO_FILE,
+		0, 1, (void **)&buf, &size);
+}
+
+int
+SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
+		u64 persistent_fid, u64 volatile_fid,
+		struct cifs_ntsd *pnntsd, int pacllen, int aclflag)
+{
+	return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+			current->tgid, 0, SMB2_O_INFO_SECURITY, aclflag,
+			1, (void **)&pnntsd, &pacllen);
 }
 
 int
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 3595cd7551477..1cadaf9f3c588 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -166,6 +166,9 @@ extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 			 u64 persistent_fid, u64 volatile_fid,
 			 FILE_BASIC_INFO *buf);
+extern int SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
+			u64 persistent_fid, u64 volatile_fid,
+			struct cifs_ntsd *pnntsd, int pacllen, int aclflag);
 extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 				u64 persistent_fid, u64 volatile_fid);
 extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
-- 
GitLab


From 366ed846df607a79b4d9b52b097c01f9d53b9d2a Mon Sep 17 00:00:00 2001
From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Date: Wed, 28 Jun 2017 22:37:32 -0500
Subject: [PATCH 0950/1958] cifs: Use smb 2 - 3 and cifsacl mount options
 setacl function

Added set acl function. Very similar to set cifs acl function for smb1.

Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2ops.c | 63 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ccbb397debbc2..ed98daa0891cf 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1370,6 +1370,63 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
 	return pntsd;
 }
 
+#ifdef CONFIG_CIFS_ACL
+static int
+set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
+		struct inode *inode, const char *path, int aclflag)
+{
+	u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+	unsigned int xid;
+	int rc, access_flags = 0;
+	struct cifs_tcon *tcon;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
+	struct cifs_fid fid;
+	struct cifs_open_parms oparms;
+	__le16 *utf16_path;
+
+	cifs_dbg(FYI, "set smb3 acl for path %s\n", path);
+	if (IS_ERR(tlink))
+		return PTR_ERR(tlink);
+
+	tcon = tlink_tcon(tlink);
+	xid = get_xid();
+
+	if (backup_cred(cifs_sb))
+		oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+	else
+		oparms.create_options = 0;
+
+	if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP)
+		access_flags = WRITE_OWNER;
+	else
+		access_flags = WRITE_DAC;
+
+	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
+	if (!utf16_path)
+		return -ENOMEM;
+
+	oparms.tcon = tcon;
+	oparms.desired_access = access_flags;
+	oparms.disposition = FILE_OPEN;
+	oparms.path = path;
+	oparms.fid = &fid;
+	oparms.reconnect = false;
+
+	rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
+	kfree(utf16_path);
+	if (!rc) {
+		rc = SMB2_set_acl(xid, tlink_tcon(tlink), fid.persistent_fid,
+			    fid.volatile_fid, pnntsd, acllen, aclflag);
+		SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+	}
+
+	cifs_put_tlink(tlink);
+	free_xid(xid);
+	return rc;
+}
+#endif /* CIFS_ACL */
+
 /* Retrieve an ACL from the server */
 static struct cifs_ntsd *
 get_smb2_acl(struct cifs_sb_info *cifs_sb,
@@ -2498,7 +2555,7 @@ struct smb_version_operations smb20_operations = {
 #ifdef CONFIG_CIFS_ACL
 	.get_acl = get_smb2_acl,
 	.get_acl_by_fid = get_smb2_acl_by_fid,
-/*	.set_acl = set_smb3_acl, */
+	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
 };
 
@@ -2587,7 +2644,7 @@ struct smb_version_operations smb21_operations = {
 #ifdef CONFIG_CIFS_ACL
 	.get_acl = get_smb2_acl,
 	.get_acl_by_fid = get_smb2_acl_by_fid,
-/*	.set_acl = set_smb3_acl, */
+	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
 };
 
@@ -2686,7 +2743,7 @@ struct smb_version_operations smb30_operations = {
 #ifdef CONFIG_CIFS_ACL
 	.get_acl = get_smb2_acl,
 	.get_acl_by_fid = get_smb2_acl_by_fid,
-/*	.set_acl = set_smb3_acl, */
+	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
 };
 
-- 
GitLab


From 4395d484b98154b28f895b722681710bdd40376c Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilov@microsoft.com>
Date: Sat, 8 Jul 2017 14:17:37 -0700
Subject: [PATCH 0951/1958] CIFS: Display SMB2 error codes in the hex format

Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2maperror.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 3030a9dfb0ddc..7ca9808a0daa0 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -2475,8 +2475,8 @@ map_smb2_to_linux_error(char *buf, bool log_err)
 
 	/* on error mapping not found  - return EIO */
 
-	cifs_dbg(FYI, "Mapping SMB2 status code %d to POSIX err %d\n",
-		 smb2err, rc);
+	cifs_dbg(FYI, "Mapping SMB2 status code 0x%08x to POSIX err %d\n",
+		 __le32_to_cpu(smb2err), rc);
 
 	return rc;
 }
-- 
GitLab


From 511c54a2f69195b28afb9dd119f03787b1625bb4 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilov@microsoft.com>
Date: Sat, 8 Jul 2017 14:32:00 -0700
Subject: [PATCH 0952/1958] CIFS: Reconnect expired SMB sessions

According to the MS-SMB2 spec (3.2.5.1.6) once the client receives
STATUS_NETWORK_SESSION_EXPIRED error code from a server it should
reconnect the current SMB session. Currently the client doesn't do
that. This can result in subsequent client requests failing by
the server. The patch adds an additional logic to the demultiplex
thread to identify expired sessions and reconnect them.

Cc: <stable@vger.kernel.org>
Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  2 ++
 fs/cifs/cifssmb.c  |  7 +++++++
 fs/cifs/connect.c  |  7 +++++++
 fs/cifs/smb2ops.c  | 23 +++++++++++++++++++++++
 4 files changed, 39 insertions(+)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bcc7d9acad64b..fb482515a64b5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -367,6 +367,8 @@ struct smb_version_operations {
 	unsigned int (*calc_smb_size)(void *);
 	/* check for STATUS_PENDING and process it in a positive case */
 	bool (*is_status_pending)(char *, struct TCP_Server_Info *, int);
+	/* check for STATUS_NETWORK_SESSION_EXPIRED */
+	bool (*is_session_expired)(char *);
 	/* send oplock break response */
 	int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *,
 			       struct cifsInodeInfo *);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index fbb0d4cbda413..72a53bd198656 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1460,6 +1460,13 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 		return length;
 	server->total_read += length;
 
+	if (server->ops->is_session_expired &&
+	    server->ops->is_session_expired(buf)) {
+		cifs_reconnect(server);
+		wake_up(&server->response_q);
+		return -1;
+	}
+
 	if (server->ops->is_status_pending &&
 	    server->ops->is_status_pending(buf, server, 0)) {
 		cifs_discard_remaining_data(server);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9365c0cf77adb..c59d77f64f741 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -812,6 +812,13 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 		cifs_dump_mem("Bad SMB: ", buf,
 			min_t(unsigned int, server->total_read, 48));
 
+	if (server->ops->is_session_expired &&
+	    server->ops->is_session_expired(buf)) {
+		cifs_reconnect(server);
+		wake_up(&server->response_q);
+		return -1;
+	}
+
 	if (server->ops->is_status_pending &&
 	    server->ops->is_status_pending(buf, server, length))
 		return -1;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ed98daa0891cf..cfacf2c97e941 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1036,6 +1036,18 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
 	return true;
 }
 
+static bool
+smb2_is_session_expired(char *buf)
+{
+	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+
+	if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED)
+		return false;
+
+	cifs_dbg(FYI, "Session expired\n");
+	return true;
+}
+
 static int
 smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 		     struct cifsInodeInfo *cinode)
@@ -2217,6 +2229,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
 		return -ENOTSUPP;
 	}
 
+	if (server->ops->is_session_expired &&
+	    server->ops->is_session_expired(buf)) {
+		cifs_reconnect(server);
+		wake_up(&server->response_q);
+		return -1;
+	}
+
 	if (server->ops->is_status_pending &&
 			server->ops->is_status_pending(buf, server, 0))
 		return -1;
@@ -2534,6 +2553,7 @@ struct smb_version_operations smb20_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,
@@ -2622,6 +2642,7 @@ struct smb_version_operations smb21_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,
@@ -2712,6 +2733,7 @@ struct smb_version_operations smb30_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,
@@ -2812,6 +2834,7 @@ struct smb_version_operations smb311_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,
-- 
GitLab


From 2a38e12053b760a8f5e85030eb89512660077c15 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Sat, 8 Jul 2017 18:48:15 -0500
Subject: [PATCH 0953/1958] [SMB3] Remove ifdef since SMB3 (and later) now
 STRONGLY preferred

Remove the CONFIG_CIFS_SMB2 ifdef and Kconfig option since they
must always be on now.

For various security reasons, SMB3 and later are STRONGLY preferred
over CIFS and older dialects, and SMB3 (and later) will now be
the default dialects so we do not want to allow them to be
ifdeffed out.

In the longer term, we may be able to make older CIFS support
disableable in Kconfig with a new set of #ifdef, but we always
want SMB3 and later support enabled.

Signed-off-by: Steven French <smfrench@gmail.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/Kconfig        | 83 ++++++++++++++++--------------------------
 fs/cifs/Makefile       |  7 ++--
 fs/cifs/cifs_unicode.c |  2 -
 fs/cifs/cifs_unicode.h |  2 -
 fs/cifs/cifsfs.c       | 13 ++-----
 fs/cifs/cifsglob.h     | 18 ---------
 fs/cifs/connect.c      | 28 --------------
 fs/cifs/ioctl.c        |  2 -
 fs/cifs/link.c         |  4 --
 fs/cifs/misc.c         | 11 +-----
 10 files changed, 39 insertions(+), 131 deletions(-)

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index afeefe79c25e7..f7243617316c0 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -1,5 +1,5 @@
 config CIFS
-	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
+	tristate "SMB3 and CIFS support (advanced network filesystem)"
 	depends on INET
 	select NLS
 	select CRYPTO
@@ -10,28 +10,35 @@ config CIFS
 	select CRYPTO_ECB
 	select CRYPTO_DES
 	help
-	  This is the client VFS module for the Common Internet File System
-	  (CIFS) protocol which is the successor to the Server Message Block
-	  (SMB) protocol, the native file sharing mechanism for most early
-	  PC operating systems.  The CIFS protocol is fully supported by
-	  file servers such as Windows 2000 (including Windows 2003, Windows 2008,
-	  NT 4 and Windows XP) as well by Samba (which provides excellent CIFS
+	  This is the client VFS module for the SMB3 family of NAS protocols,
+	  as well as for earlier dialects such as SMB2.1, SMB2 and the
+	  Common Internet File System (CIFS) protocol.  CIFS was the successor
+	  to the original dialect, the Server Message Block (SMB) protocol, the
+	  native file sharing mechanism for most early PC operating systems.
+
+	  The SMB3 protocol is supported by most modern operating systems and
+	  NAS appliances (e.g. Samba, Windows 8, Windows 2012, MacOS).
+	  The older CIFS protocol was included in Windows NT4, 2000 and XP (and
+	  later) as well by Samba (which provides excellent CIFS and SMB3
 	  server support for Linux and many other operating systems). Limited
-	  support for OS/2 and Windows ME and similar servers is provided as
-	  well.
+	  support for OS/2 and Windows ME and similar very old servers is
+	  provided as well.
 
-	  The module also provides optional support for the followon
-	  protocols for CIFS including SMB3, which enables
-	  useful performance and security features (see the description
-	  of CONFIG_CIFS_SMB2).
-
-	  The cifs module provides an advanced network file system
-	  client for mounting to CIFS compliant servers.  It includes
+	  The cifs module provides an advanced network file system client
+	  for mounting to SMB3 (and CIFS) compliant servers.  It includes
 	  support for DFS (hierarchical name space), secure per-user
 	  session establishment via Kerberos or NTLM or NTLMv2,
 	  safe distributed caching (oplock), optional packet
 	  signing, Unicode and other internationalization improvements.
-	  If you need to mount to Samba or Windows from this machine, say Y.
+
+	  In general, the default dialects, SMB3 and later, enable better
+	  performance, security and features, than would be possible with CIFS.
+	  Note that when mounting to Samba, due to the CIFS POSIX extensions,
+	  CIFS mounts can provide slightly better POSIX compatibility
+	  than SMB3 mounts. SMB2/SMB3 mount options are also
+	  slightly simpler (compared to CIFS) due to protocol improvements.
+
+	  If you need to mount to Samba, Macs or Windows from this machine, say Y.
 
 config CIFS_STATS
         bool "CIFS statistics"
@@ -89,7 +96,7 @@ config CIFS_UPCALL
 	  Enables an upcall mechanism for CIFS which accesses userspace helper
 	  utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets
 	  which are needed to mount to certain secure servers (for which more
-	  secure Kerberos authentication is required). If unsure, say N.
+	  secure Kerberos authentication is required). If unsure, say Y.
 
 config CIFS_XATTR
         bool "CIFS extended attributes"
@@ -105,7 +112,7 @@ config CIFS_XATTR
           (used by some filesystems to store ACLs) is not supported at
           this time.
 
-          If unsure, say N.
+          If unsure, say Y.
 
 config CIFS_POSIX
         bool "CIFS POSIX Extensions"
@@ -125,7 +132,7 @@ config CIFS_ACL
 	  help
 	    Allows fetching CIFS/NTFS ACL from the server.  The DACL blob
 	    is handed over to the application/caller.  See the man
-	    page for getcifsacl for more information.
+	    page for getcifsacl for more information.  If unsure, say Y.
 
 config CIFS_DEBUG
 	bool "Enable CIFS debugging routines"
@@ -148,12 +155,13 @@ config CIFS_DEBUG2
 
 config CIFS_DEBUG_DUMP_KEYS
 	bool "Dump encryption keys for offline decryption (Unsafe)"
-	depends on CIFS_DEBUG && CIFS_SMB2
+	depends on CIFS_DEBUG
 	help
 	   Enabling this will dump the encryption and decryption keys
 	   used to communicate on an encrypted share connection on the
 	   console. This allows Wireshark to decrypt and dissect
 	   encrypted network captures. Enable this carefully.
+	   If unsure, say N.
 
 config CIFS_DFS_UPCALL
 	  bool "DFS feature support"
@@ -166,7 +174,7 @@ config CIFS_DFS_UPCALL
 	    an upcall mechanism for CIFS which contacts userspace helper
 	    utilities to provide server name resolution (host names to
 	    IP addresses) which is needed for implicit mounts of DFS junction
-	    points. If unsure, say N.
+	    points. If unsure, say Y.
 
 config CIFS_NFSD_EXPORT
 	  bool "Allow nfsd to export CIFS file system"
@@ -174,38 +182,9 @@ config CIFS_NFSD_EXPORT
 	  help
 	   Allows NFS server to export a CIFS mounted share (nfsd over cifs)
 
-config CIFS_SMB2
-	bool "SMB2 and SMB3 network file system support"
-	depends on CIFS
-	select KEYS
-	select FSCACHE
-	select DNS_RESOLVER
-	select CRYPTO_AES
-	select CRYPTO_SHA256
-	select CRYPTO_CMAC
-	select CRYPTO_AEAD2
-	select CRYPTO_CCM
-
-	help
-	  This enables support for the Server Message Block version 2
-	  family of protocols, including SMB3.  SMB3 support is
-	  enabled on mount by specifying "vers=3.0" in the mount
-	  options. These protocols are the successors to the popular
-	  CIFS and SMB network file sharing protocols. SMB3 is the
-	  native file sharing mechanism for the more recent
-	  versions of Windows (Windows 8 and Windows 2012 and
-	  later) and Samba server and many others support SMB3 well.
-	  In general SMB3 enables better performance, security
-	  and features, than would be possible with CIFS (Note that
-	  when mounting to Samba, due to the CIFS POSIX extensions,
-	  CIFS mounts can provide slightly better POSIX compatibility
-	  than SMB3 mounts do though). Note that SMB2/SMB3 mount
-	  options are also slightly simpler (compared to CIFS) due
-	  to protocol improvements.
-
 config CIFS_SMB311
 	bool "SMB3.1.1 network file system support (Experimental)"
-	depends on CIFS_SMB2
+	depends on CIFS
 
 	help
 	  This enables experimental support for the newest, SMB3.1.1, dialect.
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index eed7eb09f46f3..5e853a395b92f 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,7 +6,9 @@ obj-$(CONFIG_CIFS) += cifs.o
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
 	  link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
 	  cifs_unicode.o nterr.o cifsencrypt.o \
-	  readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o
+	  readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \
+	  smb2ops.o smb2maperror.o smb2transport.o \
+	  smb2misc.o smb2pdu.o smb2inode.o smb2file.o
 
 cifs-$(CONFIG_CIFS_XATTR) += xattr.o
 cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
@@ -16,6 +18,3 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
 cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
 
 cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
-
-cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \
-			    smb2misc.o smb2pdu.o smb2inode.o smb2file.o
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index e0445e2075b2e..b380e0871372d 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -588,7 +588,6 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 	return j;
 }
 
-#ifdef CONFIG_CIFS_SMB2
 /*
  * cifs_local_to_utf16_bytes - how long will a string be after conversion?
  * @from - pointer to input string
@@ -647,4 +646,3 @@ cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len,
 	*utf16_len = len;
 	return dst;
 }
-#endif /* CONFIG_CIFS_SMB2 */
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 8a79a34e66b8b..8360b74530a9f 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -116,11 +116,9 @@ char *cifs_strndup_from_utf16(const char *src, const int maxlen,
 extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
 			      const struct nls_table *cp, int mapChars);
 extern int cifs_remap(struct cifs_sb_info *cifs_sb);
-#ifdef CONFIG_CIFS_SMB2
 extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
 				     int *utf16_len, const struct nls_table *cp,
 				     int remap);
-#endif /* CONFIG_CIFS_SMB2 */
 #endif
 
 wchar_t cifs_toupper(wchar_t in);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9a1667e0e8d60..556f480c6936a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -51,9 +51,7 @@
 #include <linux/key-type.h>
 #include "cifs_spnego.h"
 #include "fscache.h"
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2pdu.h"
-#endif
 
 int cifsFYI = 0;
 bool traceSMB;
@@ -277,9 +275,8 @@ cifs_alloc_inode(struct super_block *sb)
 	cifs_inode->uniqueid = 0;
 	cifs_inode->createtime = 0;
 	cifs_inode->epoch = 0;
-#ifdef CONFIG_CIFS_SMB2
 	generate_random_uuid(cifs_inode->lease_key);
-#endif
+
 	/*
 	 * Can not set i_flags here - they get immediately overwritten to zero
 	 * by the VFS.
@@ -1213,14 +1210,12 @@ cifs_destroy_inodecache(void)
 static int
 cifs_init_request_bufs(void)
 {
-	size_t max_hdr_size = MAX_CIFS_HDR_SIZE;
-#ifdef CONFIG_CIFS_SMB2
 	/*
 	 * SMB2 maximum header size is bigger than CIFS one - no problems to
 	 * allocate some more bytes for CIFS.
 	 */
-	max_hdr_size = MAX_SMB2_HDR_SIZE;
-#endif
+	size_t max_hdr_size = MAX_SMB2_HDR_SIZE;
+
 	if (CIFSMaxBufSize < 8192) {
 	/* Buffer size can not be smaller than 2 * PATH_MAX since maximum
 	Unicode path name has to fit in any SMB/CIFS path based frames */
@@ -1476,12 +1471,10 @@ MODULE_SOFTDEP("pre: hmac");
 MODULE_SOFTDEP("pre: md4");
 MODULE_SOFTDEP("pre: md5");
 MODULE_SOFTDEP("pre: nls");
-#ifdef CONFIG_CIFS_SMB2
 MODULE_SOFTDEP("pre: aes");
 MODULE_SOFTDEP("pre: cmac");
 MODULE_SOFTDEP("pre: sha256");
 MODULE_SOFTDEP("pre: aead2");
 MODULE_SOFTDEP("pre: ccm");
-#endif /* CONFIG_CIFS_SMB2 */
 module_init(init_cifs)
 module_exit(exit_cifs)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index fb482515a64b5..221693fe49ec8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -29,9 +29,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/scatterlist.h>
 #include <uapi/linux/cifs/cifs_mount.h>
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2pdu.h"
-#endif
 
 #define CIFS_MAGIC_NUMBER 0xFF534D42      /* the first four bytes of SMB PDUs */
 
@@ -612,12 +610,10 @@ struct TCP_Server_Info {
 	__u16 sec_mode;
 	bool sign; /* is signing enabled on this connection? */
 	bool session_estab; /* mark when very first sess is established */
-#ifdef CONFIG_CIFS_SMB2
 	int echo_credits;  /* echo reserved slots */
 	int oplock_credits;  /* oplock break reserved slots */
 	bool echoes:1; /* enable echoes */
 	__u8 client_guid[SMB2_CLIENT_GUID_SIZE]; /* Client GUID */
-#endif
 	u16 dialect; /* dialect index that server chose */
 	bool oplocks:1; /* enable oplocks */
 	unsigned int maxReq;	/* Clients should submit no more */
@@ -661,13 +657,11 @@ struct TCP_Server_Info {
 	atomic_t in_send; /* requests trying to send */
 	atomic_t num_waiters;   /* blocked waiting to get in sendrecv */
 #endif
-#ifdef CONFIG_CIFS_SMB2
 	unsigned int	max_read;
 	unsigned int	max_write;
 	__u8		preauth_hash[512];
 	struct delayed_work reconnect; /* reconnect workqueue job */
 	struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
-#endif /* CONFIG_CIFS_SMB2 */
 	unsigned long echo_interval;
 };
 
@@ -849,13 +843,11 @@ struct cifs_ses {
 	bool sign;		/* is signing required? */
 	bool need_reconnect:1; /* connection reset, uid now invalid */
 	bool domainAuto:1;
-#ifdef CONFIG_CIFS_SMB2
 	__u16 session_flags;
 	__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
 	__u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
 	__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
 	__u8 preauth_hash[512];
-#endif /* CONFIG_CIFS_SMB2 */
 };
 
 static inline bool
@@ -907,12 +899,10 @@ struct cifs_tcon {
 			atomic_t num_acl_get;
 			atomic_t num_acl_set;
 		} cifs_stats;
-#ifdef CONFIG_CIFS_SMB2
 		struct {
 			atomic_t smb2_com_sent[NUMBER_OF_SMB2_COMMANDS];
 			atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS];
 		} smb2_stats;
-#endif /* CONFIG_CIFS_SMB2 */
 	} stats;
 #ifdef CONFIG_CIFS_STATS2
 	unsigned long long time_writes;
@@ -948,7 +938,6 @@ struct cifs_tcon {
 	bool need_reopen_files:1; /* need to reopen tcon file handles */
 	bool use_resilient:1; /* use resilient instead of durable handles */
 	bool use_persistent:1; /* use persistent instead of durable handles */
-#ifdef CONFIG_CIFS_SMB2
 	bool print:1;		/* set if connection to printer share */
 	__le32 capabilities;
 	__u32 share_flags;
@@ -961,7 +950,6 @@ struct cifs_tcon {
 	__u32 max_chunks;
 	__u32 max_bytes_chunk;
 	__u32 max_bytes_copy;
-#endif /* CONFIG_CIFS_SMB2 */
 #ifdef CONFIG_CIFS_FSCACHE
 	u64 resource_id;		/* server resource id */
 	struct fscache_cookie *fscache;	/* cookie for share */
@@ -1064,12 +1052,10 @@ struct cifs_open_parms {
 
 struct cifs_fid {
 	__u16 netfid;
-#ifdef CONFIG_CIFS_SMB2
 	__u64 persistent_fid;	/* persist file id for smb2 */
 	__u64 volatile_fid;	/* volatile file id for smb2 */
 	__u8 lease_key[SMB2_LEASE_KEY_SIZE];	/* lease key for smb2 */
 	__u8 create_guid[16];
-#endif
 	struct cifs_pending_open *pending_open;
 	unsigned int epoch;
 	bool purge_cache;
@@ -1107,10 +1093,8 @@ struct cifsFileInfo {
 
 struct cifs_io_parms {
 	__u16 netfid;
-#ifdef CONFIG_CIFS_SMB2
 	__u64 persistent_fid;	/* persist file id for smb2 */
 	__u64 volatile_fid;	/* volatile file id for smb2 */
-#endif
 	__u32 pid;
 	__u64 offset;
 	unsigned int length;
@@ -1236,9 +1220,7 @@ struct cifsInodeInfo {
 	u64  server_eof;		/* current file size on server -- protected by i_lock */
 	u64  uniqueid;			/* server inode number */
 	u64  createtime;		/* creation time on server */
-#ifdef CONFIG_CIFS_SMB2
 	__u8 lease_key[SMB2_LEASE_KEY_SIZE];	/* lease key for this inode */
-#endif
 #ifdef CONFIG_CIFS_FSCACHE
 	struct fscache_cookie *fscache;
 #endif
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c59d77f64f741..6ab261cd0208e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -55,9 +55,7 @@
 #include "nterr.h"
 #include "rfc1002pdu.h"
 #include "fscache.h"
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2proto.h"
-#endif
 
 #define CIFS_PORT 445
 #define RFC1001_PORT 139
@@ -341,9 +339,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 		server->tcpStatus = CifsNeedReconnect;
 	spin_unlock(&GlobalMid_Lock);
 	server->maxBuf = 0;
-#ifdef CONFIG_CIFS_SMB2
 	server->max_read = 0;
-#endif
 
 	cifs_dbg(FYI, "Reconnecting tcp session\n");
 
@@ -1129,7 +1125,6 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
 		vol->ops = &smb1_operations;
 		vol->vals = &smb1_values;
 		break;
-#ifdef CONFIG_CIFS_SMB2
 	case Smb_20:
 		vol->ops = &smb20_operations;
 		vol->vals = &smb20_values;
@@ -1152,7 +1147,6 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
 		vol->vals = &smb311_values;
 		break;
 #endif /* SMB311 */
-#endif
 	default:
 		cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
 		return 1;
@@ -2177,7 +2171,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 
 	cancel_delayed_work_sync(&server->echo);
 
-#ifdef CONFIG_CIFS_SMB2
 	if (from_reconnect)
 		/*
 		 * Avoid deadlock here: reconnect work calls
@@ -2188,7 +2181,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 		cancel_delayed_work(&server->reconnect);
 	else
 		cancel_delayed_work_sync(&server->reconnect);
-#endif
 
 	spin_lock(&GlobalMid_Lock);
 	server->tcpStatus = CifsExiting;
@@ -2254,17 +2246,13 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 	INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
 	INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
 	INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
-#ifdef CONFIG_CIFS_SMB2
 	INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
 	mutex_init(&tcp_ses->reconnect_mutex);
-#endif
 	memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
 	       sizeof(tcp_ses->srcaddr));
 	memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
 		sizeof(tcp_ses->dstaddr));
-#ifdef CONFIG_CIFS_SMB2
 	generate_random_uuid(tcp_ses->client_guid);
-#endif
 	/*
 	 * at this point we are the only ones with the pointer
 	 * to the struct since the kernel thread not created yet
@@ -2662,10 +2650,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 		return 0;
 	if (tcon->seal != volume_info->seal)
 		return 0;
-#ifdef CONFIG_CIFS_SMB2
 	if (tcon->snapshot_time != volume_info->snapshot_time)
 		return 0;
-#endif /* CONFIG_CIFS_SMB2 */
 	return 1;
 }
 
@@ -2740,7 +2726,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 	}
 
 	if (volume_info->snapshot_time) {
-#ifdef CONFIG_CIFS_SMB2
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
 			     "Use SMB2 or later for snapshot mount option\n");
@@ -2748,11 +2733,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 			goto out_fail;
 		} else
 			tcon->snapshot_time = volume_info->snapshot_time;
-#else
-		cifs_dbg(VFS, "Snapshot mount option requires SMB2 support\n");
-		rc = -EOPNOTSUPP;
-		goto out_fail;
-#endif /* CONFIG_CIFS_SMB2 */
 	}
 
 	tcon->ses = ses;
@@ -2788,7 +2768,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 			     "SMB3 or later required for persistent handles\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
-#ifdef CONFIG_CIFS_SMB2
 		} else if (ses->server->capabilities &
 			   SMB2_GLOBAL_CAP_PERSISTENT_HANDLES)
 			tcon->use_persistent = true;
@@ -2797,15 +2776,12 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 				"Persistent handles not supported on share\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
-#endif /* CONFIG_CIFS_SMB2 */
 		}
-#ifdef CONFIG_CIFS_SMB2
 	} else if ((tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY)
 	     && (ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES)
 	     && (volume_info->nopersistent == false)) {
 		cifs_dbg(FYI, "enabling persistent handles\n");
 		tcon->use_persistent = true;
-#endif /* CONFIG_CIFS_SMB2 */
 	} else if (volume_info->resilient) {
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
@@ -2822,7 +2798,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 				 "SMB3 or later required for encryption\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
-#ifdef CONFIG_CIFS_SMB2
 		} else if (tcon->ses->server->capabilities &
 					SMB2_GLOBAL_CAP_ENCRYPTION)
 			tcon->seal = true;
@@ -2830,7 +2805,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 			cifs_dbg(VFS, "Encryption is not supported on share\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
-#endif /* CONFIG_CIFS_SMB2 */
 		}
 	}
 
@@ -3745,14 +3719,12 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 		goto mount_fail_check;
 	}
 
-#ifdef CONFIG_CIFS_SMB2
 	if ((volume_info->persistent == true) && ((ses->server->capabilities &
 		SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) == 0)) {
 		cifs_dbg(VFS, "persistent handles not supported by server\n");
 		rc = -EOPNOTSUPP;
 		goto mount_fail_check;
 	}
-#endif /* CONFIG_CIFS_SMB2*/
 
 	/* search for existing tcon to this server share */
 	tcon = cifs_get_tcon(ses, volume_info);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 76fb0917dc8c3..54f32f9143a91 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -101,7 +101,6 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
 	fsinf->fs_attributes = le32_to_cpu(tcon->fsAttrInfo.Attributes);
 	fsinf->max_path_component =
 		le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength);
-#ifdef CONFIG_CIFS_SMB2
 	fsinf->vol_serial_number = tcon->vol_serial_number;
 	fsinf->vol_create_time = le64_to_cpu(tcon->vol_create_time);
 	fsinf->share_flags = tcon->share_flags;
@@ -110,7 +109,6 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
 	fsinf->optimal_sector_size = tcon->perf_sector_size;
 	fsinf->max_bytes_chunk = tcon->max_bytes_chunk;
 	fsinf->maximal_access = tcon->maximal_access;
-#endif /* SMB2 */
 	fsinf->cifs_posix_caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
 
 	if (copy_to_user(arg, fsinf, sizeof(struct smb_mnt_fs_info)))
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index c4d996f78e1c9..60b5a11ee11b3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -29,9 +29,7 @@
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
 #include "cifs_unicode.h"
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2proto.h"
-#endif
 
 /*
  * M-F Symlink Functions - Begin
@@ -402,7 +400,6 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 /*
  * SMB 2.1/SMB3 Protocol specific functions
  */
-#ifdef CONFIG_CIFS_SMB2
 int
 smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 		      struct cifs_sb_info *cifs_sb, const unsigned char *path,
@@ -525,7 +522,6 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 	kfree(utf16_path);
 	return rc;
 }
-#endif /* CONFIG_CIFS_SMB2 */
 
 /*
  * M-F Symlink Functions - End
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3b147dc6af634..eea93ac15ef03 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -30,9 +30,7 @@
 #include "smberr.h"
 #include "nterr.h"
 #include "cifs_unicode.h"
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2pdu.h"
-#endif
 
 extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
@@ -149,15 +147,12 @@ struct smb_hdr *
 cifs_buf_get(void)
 {
 	struct smb_hdr *ret_buf = NULL;
-	size_t buf_size = sizeof(struct smb_hdr);
-
-#ifdef CONFIG_CIFS_SMB2
 	/*
 	 * SMB2 header is bigger than CIFS one - no problems to clean some
 	 * more bytes for CIFS.
 	 */
-	buf_size = sizeof(struct smb2_hdr);
-#endif
+	size_t buf_size = sizeof(struct smb2_hdr);
+
 	/*
 	 * We could use negotiated size instead of max_msgsize -
 	 * but it may be more efficient to always alloc same size
@@ -620,9 +615,7 @@ void
 cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink,
 			     struct cifs_pending_open *open)
 {
-#ifdef CONFIG_CIFS_SMB2
 	memcpy(open->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE);
-#endif
 	open->oplock = CIFS_OPLOCK_NO_CHANGE;
 	open->tlink = tlink;
 	fid->pending_open = open;
-- 
GitLab


From eef914a9eb5eb83e60eb498315a491cd1edc13a1 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Sat, 8 Jul 2017 17:30:41 -0500
Subject: [PATCH 0954/1958] [SMB3] Improve security, move default dialect to
 SMB3 from old CIFS

Due to recent publicity about security vulnerabilities in the
much older CIFS dialect, move the default dialect to the
widely accepted (and quite secure) SMB3.0 dialect from the
old default of the CIFS dialect.

We do not want to be encouraging use of less secure dialects,
and both Microsoft and CERT now strongly recommend not using the
older CIFS dialect (SMB Security Best Practices
"recommends disabling SMBv1").

SMB3 is both secure and widely available: in Windows 8 and later,
Samba and Macs.

Users can still choose to explicitly mount with the less secure
dialect (for old servers) by choosing "vers=1.0" on the cifs
mount

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/connect.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6ab261cd0208e..59647eb72c5fb 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1272,9 +1272,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 	vol->actimeo = CIFS_DEF_ACTIMEO;
 
-	/* FIXME: add autonegotiation -- for now, SMB1 is default */
-	vol->ops = &smb1_operations;
-	vol->vals = &smb1_values;
+	/* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */
+	vol->ops = &smb30_operations; /* both secure and accepted widely */
+	vol->vals = &smb30_values;
 
 	vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
 
-- 
GitLab


From 3941dae15ed90437396389e8bb7d2d5b3e63ba4a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 20 Apr 2017 16:43:12 -0400
Subject: [PATCH 0955/1958] drm_dp_aux_dev: switch to read_iter/write_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/gpu/drm/drm_dp_aux_dev.c | 109 +++++++++++++------------------
 1 file changed, 46 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index ec1ed94b23902..d34e5096887a4 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -32,6 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/uio.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drmP.h>
@@ -140,101 +141,83 @@ static loff_t auxdev_llseek(struct file *file, loff_t offset, int whence)
 	return fixed_size_llseek(file, offset, whence, AUX_MAX_OFFSET);
 }
 
-static ssize_t auxdev_read(struct file *file, char __user *buf, size_t count,
-			   loff_t *offset)
+static ssize_t auxdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
-	size_t bytes_pending, num_bytes_processed = 0;
-	struct drm_dp_aux_dev *aux_dev = file->private_data;
+	struct drm_dp_aux_dev *aux_dev = iocb->ki_filp->private_data;
+	loff_t pos = iocb->ki_pos;
 	ssize_t res = 0;
 
 	if (!atomic_inc_not_zero(&aux_dev->usecount))
 		return -ENODEV;
 
-	bytes_pending = min((loff_t)count, AUX_MAX_OFFSET - (*offset));
-
-	if (!access_ok(VERIFY_WRITE, buf, bytes_pending)) {
-		res = -EFAULT;
-		goto out;
-	}
+	iov_iter_truncate(to, AUX_MAX_OFFSET - pos);
 
-	while (bytes_pending > 0) {
-		uint8_t localbuf[DP_AUX_MAX_PAYLOAD_BYTES];
-		ssize_t todo = min_t(size_t, bytes_pending, sizeof(localbuf));
+	while (iov_iter_count(to)) {
+		uint8_t buf[DP_AUX_MAX_PAYLOAD_BYTES];
+		ssize_t todo = min(iov_iter_count(to), sizeof(buf));
 
 		if (signal_pending(current)) {
-			res = num_bytes_processed ?
-				num_bytes_processed : -ERESTARTSYS;
-			goto out;
+			res = -ERESTARTSYS;
+			break;
 		}
 
-		res = drm_dp_dpcd_read(aux_dev->aux, *offset, localbuf, todo);
-		if (res <= 0) {
-			res = num_bytes_processed ? num_bytes_processed : res;
-			goto out;
-		}
-		if (__copy_to_user(buf + num_bytes_processed, localbuf, res)) {
-			res = num_bytes_processed ?
-				num_bytes_processed : -EFAULT;
-			goto out;
+		res = drm_dp_dpcd_read(aux_dev->aux, pos, buf, todo);
+		if (res <= 0)
+			break;
+
+		if (copy_to_iter(buf, res, to) != res) {
+			res = -EFAULT;
+			break;
 		}
-		bytes_pending -= res;
-		*offset += res;
-		num_bytes_processed += res;
-		res = num_bytes_processed;
+
+		pos += res;
 	}
 
-out:
+	if (pos != iocb->ki_pos)
+		res = pos - iocb->ki_pos;
+	iocb->ki_pos = pos;
+
 	atomic_dec(&aux_dev->usecount);
 	wake_up_atomic_t(&aux_dev->usecount);
 	return res;
 }
 
-static ssize_t auxdev_write(struct file *file, const char __user *buf,
-			    size_t count, loff_t *offset)
+static ssize_t auxdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-	size_t bytes_pending, num_bytes_processed = 0;
-	struct drm_dp_aux_dev *aux_dev = file->private_data;
+	struct drm_dp_aux_dev *aux_dev = iocb->ki_filp->private_data;
+	loff_t pos = iocb->ki_pos;
 	ssize_t res = 0;
 
 	if (!atomic_inc_not_zero(&aux_dev->usecount))
 		return -ENODEV;
 
-	bytes_pending = min((loff_t)count, AUX_MAX_OFFSET - *offset);
-
-	if (!access_ok(VERIFY_READ, buf, bytes_pending)) {
-		res = -EFAULT;
-		goto out;
-	}
+	iov_iter_truncate(from, AUX_MAX_OFFSET - pos);
 
-	while (bytes_pending > 0) {
-		uint8_t localbuf[DP_AUX_MAX_PAYLOAD_BYTES];
-		ssize_t todo = min_t(size_t, bytes_pending, sizeof(localbuf));
+	while (iov_iter_count(from)) {
+		uint8_t buf[DP_AUX_MAX_PAYLOAD_BYTES];
+		ssize_t todo = min(iov_iter_count(from), sizeof(buf));
 
 		if (signal_pending(current)) {
-			res = num_bytes_processed ?
-				num_bytes_processed : -ERESTARTSYS;
-			goto out;
+			res = -ERESTARTSYS;
+			break;
 		}
 
-		if (__copy_from_user(localbuf,
-				     buf + num_bytes_processed, todo)) {
-			res = num_bytes_processed ?
-				num_bytes_processed : -EFAULT;
-			goto out;
+		if (!copy_from_iter_full(buf, todo, from)) {
+			res = -EFAULT;
+			break;
 		}
 
-		res = drm_dp_dpcd_write(aux_dev->aux, *offset, localbuf, todo);
-		if (res <= 0) {
-			res = num_bytes_processed ? num_bytes_processed : res;
-			goto out;
-		}
-		bytes_pending -= res;
-		*offset += res;
-		num_bytes_processed += res;
-		res = num_bytes_processed;
+		res = drm_dp_dpcd_write(aux_dev->aux, pos, buf, todo);
+		if (res <= 0)
+			break;
+
+		pos += res;
 	}
 
-out:
+	if (pos != iocb->ki_pos)
+		res = pos - iocb->ki_pos;
+	iocb->ki_pos = pos;
+
 	atomic_dec(&aux_dev->usecount);
 	wake_up_atomic_t(&aux_dev->usecount);
 	return res;
@@ -251,8 +234,8 @@ static int auxdev_release(struct inode *inode, struct file *file)
 static const struct file_operations auxdev_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= auxdev_llseek,
-	.read		= auxdev_read,
-	.write		= auxdev_write,
+	.read_iter	= auxdev_read_iter,
+	.write_iter	= auxdev_write_iter,
 	.open		= auxdev_open,
 	.release	= auxdev_release,
 };
-- 
GitLab


From 0abd675e97e60d40e61d59532f8118b0e439034e Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sun, 9 Jul 2017 00:13:07 +0800
Subject: [PATCH 0956/1958] f2fs: support plain user/group quota

This patch adds to support plain user/group quota.

Change Note by Jaegeuk Kim.

- Use f2fs page cache for quota files in order to consider garbage collection.
  so, quota files are not tolerable for sudden power-cuts, so user needs to do
  quotacheck.

- setattr() calls dquot_transfer which will transfer inode->i_blocks.
  We can't reclaim that during f2fs_evict_inode(). So, we need to count
  node blocks as well in order to match i_blocks with dquot's space.

  Note that, Chao wrote a patch to count inode->i_blocks without inode block.
  (f2fs: don't count inode block in in-memory inode.i_blocks)

- in f2fs_remount, we need to make RW in prior to dquot_resume.

- handle fault_injection case during f2fs_quota_off_umount

- TODO: Project quota

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt |   2 +
 fs/f2fs/data.c                     |  10 +-
 fs/f2fs/f2fs.h                     |  88 ++++++---
 fs/f2fs/file.c                     |  34 +++-
 fs/f2fs/inode.c                    |   5 +
 fs/f2fs/namei.c                    |  66 ++++++-
 fs/f2fs/node.c                     |   9 +-
 fs/f2fs/super.c                    | 280 +++++++++++++++++++++++++++++
 8 files changed, 454 insertions(+), 40 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 8b04a6359530b..273ccb26885e9 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -162,6 +162,8 @@ mode=%s                Control block allocation mode which supports "adaptive"
                        writes towards main area.
 io_bits=%u             Set the bit size of write IO requests. It should be set
                        with "mode=lfs".
+usrquota               Enable plain user disk quota accounting.
+grpquota               Enable plain group disk quota accounting.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7dd5fb647d435..2513568594765 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -491,14 +491,15 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
 int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+	int err;
 
 	if (!count)
 		return 0;
 
 	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 		return -EPERM;
-	if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
-		return -ENOSPC;
+	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+		return err;
 
 	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
 						dn->ofs_in_node, count);
@@ -749,6 +750,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
 	struct node_info ni;
 	pgoff_t fofs;
 	blkcnt_t count = 1;
+	int err;
 
 	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 		return -EPERM;
@@ -757,8 +759,8 @@ static int __allocate_data_block(struct dnode_of_data *dn)
 	if (dn->data_blkaddr == NEW_ADDR)
 		goto alloc;
 
-	if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
-		return -ENOSPC;
+	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+		return err;
 
 alloc:
 	get_node_info(sbi, dn->nid, &ni);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ced78035a4167..42c39f0bfd883 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/quotaops.h>
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
 #include <linux/fscrypt_supp.h>
 #else
@@ -88,6 +89,8 @@ extern char *fault_name[FAULT_MAX];
 #define F2FS_MOUNT_FAULT_INJECTION	0x00010000
 #define F2FS_MOUNT_ADAPTIVE		0x00020000
 #define F2FS_MOUNT_LFS			0x00040000
+#define F2FS_MOUNT_USRQUOTA		0x00080000
+#define F2FS_MOUNT_GRPQUOTA		0x00100000
 
 #define clear_opt(sbi, option)	((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)	((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -521,6 +524,12 @@ struct f2fs_inode_info {
 	nid_t i_xattr_nid;		/* node id that contains xattrs */
 	loff_t	last_disk_size;		/* lastly written file size */
 
+#ifdef CONFIG_QUOTA
+	struct dquot *i_dquot[MAXQUOTAS];
+
+	/* quota space reservation, managed internally by quota code */
+	qsize_t i_reserved_quota;
+#endif
 	struct list_head dirty_list;	/* dirty list for dirs and files */
 	struct list_head gdirty_list;	/* linked in global dirty list */
 	struct list_head inmem_pages;	/* inmemory pages managed by f2fs */
@@ -1376,17 +1385,23 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
 	return ofs == XATTR_NODE_OFFSET;
 }
 
-static inline void f2fs_i_blocks_write(struct inode *, block_t, bool);
-static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
+static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
+static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 				 struct inode *inode, blkcnt_t *count)
 {
-	blkcnt_t diff;
+	blkcnt_t diff = 0, release = 0;
 	block_t avail_user_block_count;
+	int ret;
+
+	ret = dquot_reserve_block(inode, *count);
+	if (ret)
+		return ret;
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(sbi, FAULT_BLOCK)) {
 		f2fs_show_injection_info(FAULT_BLOCK);
-		return false;
+		release = *count;
+		goto enospc;
 	}
 #endif
 	/*
@@ -1401,17 +1416,24 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
 	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
 		diff = sbi->total_valid_block_count - avail_user_block_count;
 		*count -= diff;
+		release = diff;
 		sbi->total_valid_block_count = avail_user_block_count;
 		if (!*count) {
 			spin_unlock(&sbi->stat_lock);
 			percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
-			return false;
+			goto enospc;
 		}
 	}
 	spin_unlock(&sbi->stat_lock);
 
-	f2fs_i_blocks_write(inode, *count, true);
-	return true;
+	if (release)
+		dquot_release_reservation_block(inode, release);
+	f2fs_i_blocks_write(inode, *count, true, true);
+	return 0;
+
+enospc:
+	dquot_release_reservation_block(inode, release);
+	return -ENOSPC;
 }
 
 static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
@@ -1425,7 +1447,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
 	f2fs_bug_on(sbi, inode->i_blocks < sectors);
 	sbi->total_valid_block_count -= (block_t)count;
 	spin_unlock(&sbi->stat_lock);
-	f2fs_i_blocks_write(inode, count, false);
+	f2fs_i_blocks_write(inode, count, false, true);
 }
 
 static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1554,11 +1576,18 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
 	return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
 }
 
-static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
+static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 					struct inode *inode, bool is_inode)
 {
 	block_t	valid_block_count;
 	unsigned int valid_node_count;
+	bool quota = inode && !is_inode;
+
+	if (quota) {
+		int ret = dquot_reserve_block(inode, 1);
+		if (ret)
+			return ret;
+	}
 
 	spin_lock(&sbi->stat_lock);
 
@@ -1566,28 +1595,33 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
 	if (unlikely(valid_block_count + sbi->reserved_blocks >
 						sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
-		return false;
+		goto enospc;
 	}
 
 	valid_node_count = sbi->total_valid_node_count + 1;
 	if (unlikely(valid_node_count > sbi->total_node_count)) {
 		spin_unlock(&sbi->stat_lock);
-		return false;
+		goto enospc;
 	}
 
+	sbi->total_valid_node_count++;
+	sbi->total_valid_block_count++;
+	spin_unlock(&sbi->stat_lock);
+
 	if (inode) {
 		if (is_inode)
 			f2fs_mark_inode_dirty_sync(inode, true);
 		else
-			f2fs_i_blocks_write(inode, 1, true);
+			f2fs_i_blocks_write(inode, 1, true, true);
 	}
 
-	sbi->total_valid_node_count++;
-	sbi->total_valid_block_count++;
-	spin_unlock(&sbi->stat_lock);
-
 	percpu_counter_inc(&sbi->alloc_valid_block_count);
-	return true;
+	return 0;
+
+enospc:
+	if (quota)
+		dquot_release_reservation_block(inode, 1);
+	return -ENOSPC;
 }
 
 static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
@@ -1599,12 +1633,13 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
 	f2fs_bug_on(sbi, !sbi->total_valid_node_count);
 	f2fs_bug_on(sbi, !is_inode && !inode->i_blocks);
 
-	if (!is_inode)
-		f2fs_i_blocks_write(inode, 1, false);
 	sbi->total_valid_node_count--;
 	sbi->total_valid_block_count--;
 
 	spin_unlock(&sbi->stat_lock);
+
+	if (!is_inode)
+		f2fs_i_blocks_write(inode, 1, false, true);
 }
 
 static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
@@ -1879,14 +1914,21 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc)
 }
 
 static inline void f2fs_i_blocks_write(struct inode *inode,
-					block_t diff, bool add)
+					block_t diff, bool add, bool claim)
 {
 	bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE);
 	bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER);
-	blkcnt_t sectors = diff << F2FS_LOG_SECTORS_PER_BLOCK;
 
-	inode->i_blocks = add ? inode->i_blocks + sectors :
-				inode->i_blocks - sectors;
+	/* add = 1, claim = 1 should be dquot_reserve_block in pair */
+	if (add) {
+		if (claim)
+			dquot_claim_block(inode, diff);
+		else
+			dquot_alloc_block_nofail(inode, diff);
+	} else {
+		dquot_free_block(inode, diff);
+	}
+
 	f2fs_mark_inode_dirty_sync(inode, true);
 	if (clean || recover)
 		set_inode_flag(inode, FI_AUTO_RECOVER);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3f56b27e761b8..527c9f36d9719 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -442,11 +442,10 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int f2fs_file_open(struct inode *inode, struct file *filp)
 {
-	int ret = generic_file_open(inode, filp);
 	struct dentry *dir;
 
-	if (!ret && f2fs_encrypted_inode(inode)) {
-		ret = fscrypt_get_encryption_info(inode);
+	if (f2fs_encrypted_inode(inode)) {
+		int ret = fscrypt_get_encryption_info(inode);
 		if (ret)
 			return -EACCES;
 		if (!fscrypt_has_encryption_key(inode))
@@ -459,7 +458,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
 		return -EPERM;
 	}
 	dput(dir);
-	return ret;
+	return dquot_file_open(inode, filp);
 }
 
 int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
@@ -710,6 +709,20 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		return err;
 
+	if (is_quota_modification(inode, attr)) {
+		err = dquot_initialize(inode);
+		if (err)
+			return err;
+	}
+	if ((attr->ia_valid & ATTR_UID &&
+		!uid_eq(attr->ia_uid, inode->i_uid)) ||
+		(attr->ia_valid & ATTR_GID &&
+		!gid_eq(attr->ia_gid, inode->i_gid))) {
+		err = dquot_transfer(inode, attr);
+		if (err)
+			return err;
+	}
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (f2fs_encrypted_inode(inode)) {
 			err = fscrypt_get_encryption_info(inode);
@@ -996,9 +1009,9 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
 
 				if (do_replace[i]) {
 					f2fs_i_blocks_write(src_inode,
-								1, false);
+							1, false, false);
 					f2fs_i_blocks_write(dst_inode,
-								1, true);
+							1, true, false);
 					f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
 					blkaddr[i], ni.version, true, false);
 
@@ -1523,6 +1536,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
 	inode_lock(inode);
 
+	/* Is it quota file? Do not allow user to mess with it */
+	if (IS_NOQUOTA(inode)) {
+		inode_unlock(inode);
+		ret = -EPERM;
+		goto unlock_out;
+	}
+
 	flags = f2fs_mask_flags(inode->i_mode, flags);
 
 	oldflags = fi->i_flags;
@@ -1542,7 +1562,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 	inode->i_ctime = current_time(inode);
 	f2fs_set_inode_flags(inode);
 	f2fs_mark_inode_dirty_sync(inode, false);
-
+unlock_out:
 	inode_unlock(inode);
 out:
 	mnt_drop_write_file(filp);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index e42a7a8805dc5..6cd312a17c695 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -373,6 +373,8 @@ void f2fs_evict_inode(struct inode *inode)
 	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
+	dquot_initialize(inode);
+
 	remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
 	remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
 
@@ -405,8 +407,11 @@ void f2fs_evict_inode(struct inode *inode)
 
 	if (err)
 		update_inode_page(inode);
+	dquot_free_inode(inode);
 	sb_end_intwrite(inode->i_sb);
 no_delete:
+	dquot_drop(inode);
+
 	stat_dec_inline_xattr(inode);
 	stat_dec_inline_dir(inode);
 	stat_dec_inline_inode(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b75dc2f4ad57f..760d85223c813 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -15,6 +15,7 @@
 #include <linux/ctype.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
+#include <linux/quotaops.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -42,6 +43,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 	}
 	f2fs_unlock_op(sbi);
 
+	nid_free = true;
+
 	inode_init_owner(inode, dir, mode);
 
 	inode->i_ino = ino;
@@ -52,10 +55,17 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 	err = insert_inode_locked(inode);
 	if (err) {
 		err = -EINVAL;
-		nid_free = true;
 		goto fail;
 	}
 
+	err = dquot_initialize(inode);
+	if (err)
+		goto fail_drop;
+
+	err = dquot_alloc_inode(inode);
+	if (err)
+		goto fail_drop;
+
 	/* If the directory encrypted, then we should encrypt the inode. */
 	if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
 		f2fs_set_encrypted_inode(inode);
@@ -85,6 +95,16 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 		set_inode_flag(inode, FI_FREE_NID);
 	iput(inode);
 	return ERR_PTR(err);
+fail_drop:
+	trace_f2fs_new_inode(inode, err);
+	dquot_drop(inode);
+	inode->i_flags |= S_NOQUOTA;
+	if (nid_free)
+		set_inode_flag(inode, FI_FREE_NID);
+	clear_nlink(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	return ERR_PTR(err);
 }
 
 static int is_multimedia_file(const unsigned char *s, const char *sub)
@@ -136,6 +156,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	nid_t ino = 0;
 	int err;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -180,6 +204,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 			!fscrypt_has_permitted_context(dir, inode))
 		return -EPERM;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	f2fs_balance_fs(sbi, true);
 
 	inode->i_ctime = current_time(inode);
@@ -347,6 +375,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 
 	trace_f2fs_unlink_enter(dir, dentry);
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	de = f2fs_find_entry(dir, &dentry->d_name, &page);
 	if (!de) {
 		if (IS_ERR(page))
@@ -413,6 +445,10 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 	if (disk_link.len > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -500,6 +536,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct inode *inode;
 	int err;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, S_IFDIR | mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -548,6 +588,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	int err = 0;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -583,6 +627,10 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	int err;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -676,6 +724,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto out;
 	}
 
+	err = dquot_initialize(old_dir);
+	if (err)
+		goto out;
+
+	err = dquot_initialize(new_dir);
+	if (err)
+		goto out;
+
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_entry) {
 		if (IS_ERR(old_page))
@@ -856,6 +912,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 			 !fscrypt_has_permitted_context(old_dir, new_inode)))
 		return -EPERM;
 
+	err = dquot_initialize(old_dir);
+	if (err)
+		goto out;
+
+	err = dquot_initialize(new_dir);
+	if (err)
+		goto out;
+
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_entry) {
 		if (IS_ERR(old_page))
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b9f14ba6441f5..3ed2f947f5da3 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1040,10 +1040,9 @@ struct page *new_node_page(struct dnode_of_data *dn,
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
-	if (unlikely(!inc_valid_node_count(sbi, dn->inode, !ofs))) {
-		err = -ENOSPC;
+	if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
 		goto fail;
-	}
+
 #ifdef CONFIG_F2FS_CHECK_FS
 	get_node_info(sbi, dn->nid, &new_ni);
 	f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
@@ -2210,7 +2209,7 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
 	/* 2: update xattr nid in inode */
 	remove_free_nid(sbi, new_xnid);
 	f2fs_i_xnid_write(inode, new_xnid);
-	if (unlikely(!inc_valid_node_count(sbi, inode, false)))
+	if (unlikely(inc_valid_node_count(sbi, inode, false)))
 		f2fs_bug_on(sbi, 1);
 	update_inode_page(inode);
 
@@ -2268,7 +2267,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
 	new_ni = old_ni;
 	new_ni.ino = ino;
 
-	if (unlikely(!inc_valid_node_count(sbi, NULL, true)))
+	if (unlikely(inc_valid_node_count(sbi, NULL, true)))
 		WARN_ON(1);
 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 	inc_valid_inode_count(sbi);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index af472f7968d09..dd92170e0d6d7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -22,6 +22,7 @@
 #include <linux/random.h>
 #include <linux/exportfs.h>
 #include <linux/blkdev.h>
+#include <linux/quotaops.h>
 #include <linux/f2fs_fs.h>
 #include <linux/sysfs.h>
 
@@ -106,6 +107,8 @@ enum {
 	Opt_fault_injection,
 	Opt_lazytime,
 	Opt_nolazytime,
+	Opt_usrquota,
+	Opt_grpquota,
 	Opt_err,
 };
 
@@ -141,6 +144,8 @@ static match_table_t f2fs_tokens = {
 	{Opt_fault_injection, "fault_injection=%u"},
 	{Opt_lazytime, "lazytime"},
 	{Opt_nolazytime, "nolazytime"},
+	{Opt_usrquota, "usrquota"},
+	{Opt_grpquota, "grpquota"},
 	{Opt_err, NULL},
 };
 
@@ -380,6 +385,20 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_nolazytime:
 			sb->s_flags &= ~MS_LAZYTIME;
 			break;
+#ifdef CONFIG_QUOTA
+		case Opt_usrquota:
+			set_opt(sbi, USRQUOTA);
+			break;
+		case Opt_grpquota:
+			set_opt(sbi, GRPQUOTA);
+			break;
+#else
+		case Opt_usrquota:
+		case Opt_grpquota:
+			f2fs_msg(sb, KERN_INFO,
+					"quota operations not supported");
+			break;
+#endif
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -421,6 +440,10 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	init_rwsem(&fi->dio_rwsem[WRITE]);
 	init_rwsem(&fi->i_mmap_sem);
 
+#ifdef CONFIG_QUOTA
+	memset(&fi->i_dquot, 0, sizeof(fi->i_dquot));
+	fi->i_reserved_quota = 0;
+#endif
 	/* Will be used by directory only */
 	fi->i_dir_level = F2FS_SB(sb)->dir_level;
 	return &fi->vfs_inode;
@@ -561,11 +584,14 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 	kfree(sbi->devs);
 }
 
+static void f2fs_quota_off_umount(struct super_block *sb);
 static void f2fs_put_super(struct super_block *sb)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	int i;
 
+	f2fs_quota_off_umount(sb);
+
 	/* prevent remaining shrinker jobs */
 	mutex_lock(&sbi->umount_mutex);
 
@@ -782,6 +808,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",fault_injection=%u",
 				sbi->fault_info.inject_rate);
 #endif
+#ifdef CONFIG_QUOTA
+	if (test_opt(sbi, USRQUOTA))
+		seq_puts(seq, ",usrquota");
+	if (test_opt(sbi, GRPQUOTA))
+		seq_puts(seq, ",grpquota");
+#endif
 
 	return 0;
 }
@@ -822,6 +854,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	struct f2fs_mount_info org_mount_opt;
+	unsigned long old_sb_flags;
 	int err, active_logs;
 	bool need_restart_gc = false;
 	bool need_stop_gc = false;
@@ -835,6 +868,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	 * need to restore them.
 	 */
 	org_mount_opt = sbi->mount_opt;
+	old_sb_flags = sb->s_flags;
 	active_logs = sbi->active_logs;
 
 	/* recover superblocks we couldn't write due to previous RO mount */
@@ -860,6 +894,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
 		goto skip;
 
+	if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) {
+		err = dquot_suspend(sb, -1);
+		if (err < 0)
+			goto restore_opts;
+	} else {
+		/* dquot_resume needs RW */
+		sb->s_flags &= ~MS_RDONLY;
+		dquot_resume(sb, -1);
+	}
+
 	/* disallow enable/disable extent_cache dynamically */
 	if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
 		err = -EINVAL;
@@ -924,12 +968,237 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 restore_opts:
 	sbi->mount_opt = org_mount_opt;
 	sbi->active_logs = active_logs;
+	sb->s_flags = old_sb_flags;
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	sbi->fault_info = ffi;
 #endif
 	return err;
 }
 
+#ifdef CONFIG_QUOTA
+/* Read data from quotafile */
+static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
+			       size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	struct address_space *mapping = inode->i_mapping;
+	block_t blkidx = F2FS_BYTES_TO_BLK(off);
+	int offset = off & (sb->s_blocksize - 1);
+	int tocopy;
+	size_t toread;
+	loff_t i_size = i_size_read(inode);
+	struct page *page;
+	char *kaddr;
+
+	if (off > i_size)
+		return 0;
+
+	if (off + len > i_size)
+		len = i_size - off;
+	toread = len;
+	while (toread > 0) {
+		tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
+repeat:
+		page = read_mapping_page(mapping, blkidx, NULL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		lock_page(page);
+
+		if (unlikely(page->mapping != mapping)) {
+			f2fs_put_page(page, 1);
+			goto repeat;
+		}
+		if (unlikely(!PageUptodate(page))) {
+			f2fs_put_page(page, 1);
+			return -EIO;
+		}
+
+		kaddr = kmap_atomic(page);
+		memcpy(data, kaddr + offset, tocopy);
+		kunmap_atomic(kaddr);
+		f2fs_put_page(page, 1);
+
+		offset = 0;
+		toread -= tocopy;
+		data += tocopy;
+		blkidx++;
+	}
+	return len;
+}
+
+/* Write to quotafile */
+static ssize_t f2fs_quota_write(struct super_block *sb, int type,
+				const char *data, size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	struct address_space *mapping = inode->i_mapping;
+	const struct address_space_operations *a_ops = mapping->a_ops;
+	int offset = off & (sb->s_blocksize - 1);
+	size_t towrite = len;
+	struct page *page;
+	char *kaddr;
+	int err = 0;
+	int tocopy;
+
+	while (towrite > 0) {
+		tocopy = min_t(unsigned long, sb->s_blocksize - offset,
+								towrite);
+
+		err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
+							&page, NULL);
+		if (unlikely(err))
+			break;
+
+		kaddr = kmap_atomic(page);
+		memcpy(kaddr + offset, data, tocopy);
+		kunmap_atomic(kaddr);
+		flush_dcache_page(page);
+
+		a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
+						page, NULL);
+		offset = 0;
+		towrite -= tocopy;
+		off += tocopy;
+		data += tocopy;
+		cond_resched();
+	}
+
+	if (len == towrite)
+		return err;
+	inode->i_version++;
+	inode->i_mtime = inode->i_ctime = current_time(inode);
+	f2fs_mark_inode_dirty_sync(inode, false);
+	return len - towrite;
+}
+
+static struct dquot **f2fs_get_dquots(struct inode *inode)
+{
+	return F2FS_I(inode)->i_dquot;
+}
+
+static qsize_t *f2fs_get_reserved_space(struct inode *inode)
+{
+	return &F2FS_I(inode)->i_reserved_quota;
+}
+
+static int f2fs_quota_sync(struct super_block *sb, int type)
+{
+	struct quota_info *dqopt = sb_dqopt(sb);
+	int cnt;
+	int ret;
+
+	ret = dquot_writeback_dquots(sb, type);
+	if (ret)
+		return ret;
+
+	/*
+	 * Now when everything is written we can discard the pagecache so
+	 * that userspace sees the changes.
+	 */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (type != -1 && cnt != type)
+			continue;
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+
+		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+		if (ret)
+			return ret;
+
+		inode_lock(dqopt->files[cnt]);
+		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
+		inode_unlock(dqopt->files[cnt]);
+	}
+	return 0;
+}
+
+static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
+							const struct path *path)
+{
+	struct inode *inode;
+	int err;
+
+	err = f2fs_quota_sync(sb, -1);
+	if (err)
+		return err;
+
+	err = dquot_quota_on(sb, type, format_id, path);
+	if (err)
+		return err;
+
+	inode = d_inode(path->dentry);
+
+	inode_lock(inode);
+	F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL;
+	inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+					S_NOATIME | S_IMMUTABLE);
+	inode_unlock(inode);
+	f2fs_mark_inode_dirty_sync(inode, false);
+
+	return 0;
+}
+
+static int f2fs_quota_off(struct super_block *sb, int type)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	int err;
+
+	if (!inode || !igrab(inode))
+		return dquot_quota_off(sb, type);
+
+	f2fs_quota_sync(sb, -1);
+
+	err = dquot_quota_off(sb, type);
+	if (err)
+		goto out_put;
+
+	inode_lock(inode);
+	F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL);
+	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+	inode_unlock(inode);
+	f2fs_mark_inode_dirty_sync(inode, false);
+out_put:
+	iput(inode);
+	return err;
+}
+
+static void f2fs_quota_off_umount(struct super_block *sb)
+{
+	int type;
+
+	for (type = 0; type < MAXQUOTAS; type++)
+		f2fs_quota_off(sb, type);
+}
+
+static const struct dquot_operations f2fs_quota_operations = {
+	.get_reserved_space = f2fs_get_reserved_space,
+	.write_dquot	= dquot_commit,
+	.acquire_dquot	= dquot_acquire,
+	.release_dquot	= dquot_release,
+	.mark_dirty	= dquot_mark_dquot_dirty,
+	.write_info	= dquot_commit_info,
+	.alloc_dquot	= dquot_alloc,
+	.destroy_dquot	= dquot_destroy,
+	.get_next_id	= dquot_get_next_id,
+};
+
+static const struct quotactl_ops f2fs_quotactl_ops = {
+	.quota_on	= f2fs_quota_on,
+	.quota_off	= f2fs_quota_off,
+	.quota_sync	= f2fs_quota_sync,
+	.get_state	= dquot_get_state,
+	.set_info	= dquot_set_dqinfo,
+	.get_dqblk	= dquot_get_dqblk,
+	.set_dqblk	= dquot_set_dqblk,
+	.get_nextdqblk	= dquot_get_next_dqblk,
+};
+#else
+static inline void f2fs_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
 static struct super_operations f2fs_sops = {
 	.alloc_inode	= f2fs_alloc_inode,
 	.drop_inode	= f2fs_drop_inode,
@@ -937,6 +1206,11 @@ static struct super_operations f2fs_sops = {
 	.write_inode	= f2fs_write_inode,
 	.dirty_inode	= f2fs_dirty_inode,
 	.show_options	= f2fs_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read	= f2fs_quota_read,
+	.quota_write	= f2fs_quota_write,
+	.get_dquots	= f2fs_get_dquots,
+#endif
 	.evict_inode	= f2fs_evict_inode,
 	.put_super	= f2fs_put_super,
 	.sync_fs	= f2fs_sync_fs,
@@ -1679,6 +1953,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_max_links = F2FS_LINK_MAX;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 
+#ifdef CONFIG_QUOTA
+	sb->dq_op = &f2fs_quota_operations;
+	sb->s_qcop = &f2fs_quotactl_ops;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+#endif
+
 	sb->s_op = &f2fs_sops;
 	sb->s_cop = &f2fs_cryptops;
 	sb->s_xattr = f2fs_xattr_handlers;
-- 
GitLab


From 799ce1dbb9bba56ff21733838a05070787fdcde5 Mon Sep 17 00:00:00 2001
From: Tommy Nguyen <remyabel@gmail.com>
Date: Sun, 9 Jul 2017 10:25:43 +0100
Subject: [PATCH 0957/1958] befs: add kernel-doc formatting for
 befs_bt_read_super()

fs/befs/TODO mentions some comments needing conversion to Kernel-Doc
formatting. This patch changes the comment describing befs_bt_read_super().

Signed-off-by: Tommy Nguyen <remyabel@gmail.com>
Signed-off-by: Luis de Bethencourt <luisbg@kernel.org>
---
 fs/befs/btree.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index d509887c580ce..1b7e0f7128d6d 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -120,18 +120,15 @@ static int befs_compare_strings(const void *key1, int keylen1,
 				const void *key2, int keylen2);
 
 /**
- * befs_bt_read_super - read in btree superblock convert to cpu byteorder
- * @sb: Filesystem superblock
- * @ds: Datastream to read from
- * @sup: Buffer in which to place the btree superblock
+ * befs_bt_read_super() - read in btree superblock convert to cpu byteorder
+ * @sb:        Filesystem superblock
+ * @ds:        Datastream to read from
+ * @sup:       Buffer in which to place the btree superblock
  *
  * Calls befs_read_datastream to read in the btree superblock and
  * makes sure it is in cpu byteorder, byteswapping if necessary.
- *
- * On success, returns BEFS_OK and *@sup contains the btree superblock,
- * in cpu byte order.
- *
- * On failure, BEFS_ERR is returned.
+ * Return: BEFS_OK on success and if *@sup contains the btree superblock in cpu
+ * byte order. Otherwise return BEFS_ERR on error.
  */
 static int
 befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds,
-- 
GitLab


From fca18a47cf3eb8425ec19c2dfc374f3d04f5219f Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Sat, 8 Jul 2017 00:27:30 +0530
Subject: [PATCH 0958/1958] trace/kprobes: Sanitize derived event names

When we derive event names, convert some expected symbols (such as ':'
used to specify module:name and '.' present in some symbols) into
underscores so that the event name is not rejected.

Before this patch:
    # echo 'p kobject_example:foo_store' > kprobe_events
    trace_kprobe: Failed to allocate trace_probe.(-22)
    -sh: write error: Invalid argument

After this patch:
    # echo 'p kobject_example:foo_store' > kprobe_events
    # cat kprobe_events
    p:kprobes/p_kobject_example_foo_store_0 kobject_example:foo_store

Link: http://lkml.kernel.org/r/66c189e09e71361aba91dd4a5bd146a1b62a7a51.1499453040.git.naveen.n.rao@linux.vnet.ibm.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c129fca6ec993..44fd819aa33da 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -598,6 +598,14 @@ static struct notifier_block trace_kprobe_module_nb = {
 	.priority = 1	/* Invoked after kprobe module callback */
 };
 
+/* Convert certain expected symbols into '_' when generating event names */
+static inline void sanitize_event_name(char *name)
+{
+	while (*name++ != '\0')
+		if (*name == ':' || *name == '.')
+			*name = '_';
+}
+
 static int create_trace_kprobe(int argc, char **argv)
 {
 	/*
@@ -740,6 +748,7 @@ static int create_trace_kprobe(int argc, char **argv)
 		else
 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
 				 is_return ? 'r' : 'p', addr);
+		sanitize_event_name(buf);
 		event = buf;
 	}
 	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
-- 
GitLab


From f7181e5aaab58d81c5453887e39a1e1041f6bb1a Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Sat, 8 Jul 2017 00:27:31 +0530
Subject: [PATCH 0959/1958] selftests/ftrace: Update multiple kprobes test for
 powerpc

KPROBES_ON_FTRACE is only available on powerpc64le. Update comment to
clarify this.

Also, we should use an offset of 8 to ensure that the probe does not
fall on ftrace location. The current offset of 4 will fall before the
function local entry point and won't fire, while an offset of 12 or 16
will fall on ftrace location. Offset 8 is currently guaranteed to not be
the ftrace location.

Link: http://lkml.kernel.org/r/3d32e8fa076070e83527476fdfa3a747bb9a1a3a.1499453040.git.naveen.n.rao@linux.vnet.ibm.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../selftests/ftrace/test.d/kprobe/multiple_kprobes.tc        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index f4d1ff785d67d..2a1cb99087461 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -2,10 +2,10 @@
 # description: Register/unregister many kprobe events
 
 # ftrace fentry skip size depends on the machine architecture.
-# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc
+# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
 case `uname -m` in
   x86_64|i[3456]86) OFFS=5;;
-  ppc*) OFFS=4;;
+  ppc64le) OFFS=8;;
   *) OFFS=0;;
 esac
 
-- 
GitLab


From ff431b1390cbf8764f8550b8e427eb28d4a6328e Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Sat, 8 Jul 2017 00:27:32 +0530
Subject: [PATCH 0960/1958] selftests/ftrace: Add a test to probe module
 functions

Add a kprobes test to ensure that we are able to add a probe on a
module function using 'p <mod>:<func>' format, with/without having to
specify a probe name.

Link: http://lkml.kernel.org/r/2d8087e25a7ad9206f3e2b7b4bb0c3c86eaa38af.1499453040.git.naveen.n.rao@linux.vnet.ibm.com

Suggested-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../ftrace/test.d/kprobe/kprobe_module.tc     | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
new file mode 100644
index 0000000000000..6d634e4b76805
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
@@ -0,0 +1,28 @@
+#!/bin/sh
+# description: Kprobe dynamic event - probing module
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on a module function without specifying event name" ;:
+
+MOD=`lsmod | head -n 2 | tail -n 1 | cut -f1 -d" "`
+FUNC=`grep -m 1 ".* t .*\\[$MOD\\]" /proc/kallsyms | xargs | cut -f3 -d" "`
+[ "x" != "x$MOD" -a "y" != "y$FUNC" ] || exit_unresolved
+echo "p $MOD:$FUNC" > kprobe_events
+PROBE_NAME=`echo $MOD:$FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on a module function with new event name" ;:
+
+echo "p:event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on a module function with new event and group name" ;:
+
+echo "p:kprobes1/event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes1/event1 || exit_failure
+
+echo > kprobe_events
-- 
GitLab


From 8cb0bc9e3f863f3e77522d02443a75c0e45a6ed0 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 8 Jul 2017 00:27:33 +0530
Subject: [PATCH 0961/1958] selftests/ftrace: Add a testcase for kprobe event
 naming

Add a testcase for kprobe event naming. This testcase checks whether the
kprobe events can automatically ganerate its event name on normal
function and dot-suffixed function.  Also it checks whether the kprobe
events can correctly define new event with given event name and group
name.

Link: http://lkml.kernel.org/r/61ae96fd1fcd14ee652c8b6525c218b8661bb0d2.1499453040.git.naveen.n.rao@linux.vnet.ibm.com

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
[Updated tests to use vfs_read and symbols with '.isra.',
added check for kprobe_events and a command to clear it on exit,
various additional checks and tests]
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../ftrace/test.d/kprobe/kprobe_eventname.tc  | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
new file mode 100644
index 0000000000000..b9302cc82c122
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -0,0 +1,36 @@
+#!/bin/sh
+# description: Kprobe event auto/manual naming
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on function without name" ;:
+
+FUNC=`grep " [tT] .*vfs_read$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+PROBE_NAME=`echo $FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on function with new name" ;:
+
+echo "p:event1 $FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on function with new name and group" ;:
+
+echo "p:kprobes2/event2 $FUNC" > kprobe_events
+test -d events/kprobes2/event2 || exit_failure
+
+:;: "Add an event on dot function without name" ;:
+
+FUNC=`grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:`
+[ "x" != "x$EVENT" ] || exit_failure
+test -d events/$EVENT || exit_failure
+
+echo > kprobe_events
-- 
GitLab


From 87ebb94e28ccac00bacba0d6a0dac0aac5b9c63b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:33 +0900
Subject: [PATCH 0962/1958] kbuild: remove useless $(gen) variable in
 Makefile.headersinst

We have no true case for the $(if $(gen), ...) conditional.  Drop it
to simplify the gendir calculation.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.headersinst | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index c583a1e1bd3c1..4e9287bfdf284 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -39,9 +39,6 @@ skip-inst := $(if $(filter %/uapi,$(obj)),1)
 
 ifeq ($(skip-inst),)
 
-# generated header directory
-gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
-
 # Kbuild file is optional
 kbuild-file := $(srctree)/$(obj)/Kbuild
 -include $(kbuild-file)
@@ -53,7 +50,7 @@ endif
 
 installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
 
-gendir        := $(objtree)/$(gen)
+gendir        := $(objtree)/$(subst include/,include/generated/,$(obj))
 header-files  := $(notdir $(wildcard $(srcdir)/*.h))
 header-files  += $(notdir $(wildcard $(srcdir)/*.agh))
 header-files  := $(filter-out $(no-export-headers), $(header-files))
-- 
GitLab


From 0f042eea0a4a4fb1b1999784d745a47c0bb2c2f9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:34 +0900
Subject: [PATCH 0963/1958] kbuild: fix comment about dst of headers_{install,
 check}_all

Commit 61562f981e92 ("uapi: export all arch specifics directories")
changed the dst from asm-<arch> to arch-<arch> for headers_install_all
or headers_check_all.  Update the comment.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 06ef9947cf7c0..ce58756287400 100644
--- a/Makefile
+++ b/Makefile
@@ -1133,7 +1133,7 @@ firmware_install:
 #Default location for installed headers
 export INSTALL_HDR_PATH = $(objtree)/usr
 
-# If we do an all arch process set dst to asm-$(hdr-arch)
+# If we do an all arch process set dst to include/arch-$(hdr-arch)
 hdr-dst = $(if $(KBUILD_HEADERS), dst=include/arch-$(hdr-arch), dst=include)
 
 PHONY += archheaders
-- 
GitLab


From a8ff49a1d92da4eb566d026adc43946852975129 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:35 +0900
Subject: [PATCH 0964/1958] kbuild: pass dst= to Makefile.headersinst from top
 Makefile

We can always pass dst= from the top Makefile.  This will simplify
the logic in Makefile.headersinst.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile                     | 4 ++--
 scripts/Makefile.headersinst | 8 ++------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index ce58756287400..70c414b158590 100644
--- a/Makefile
+++ b/Makefile
@@ -1154,7 +1154,7 @@ PHONY += headers_install
 headers_install: __headers
 	$(if $(wildcard $(srctree)/arch/$(hdr-arch)/include/uapi/asm/Kbuild),, \
 	  $(error Headers not exportable for the $(SRCARCH) architecture))
-	$(Q)$(MAKE) $(hdr-inst)=include/uapi
+	$(Q)$(MAKE) $(hdr-inst)=include/uapi dst=include
 	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst)
 
 PHONY += headers_check_all
@@ -1163,7 +1163,7 @@ headers_check_all: headers_install_all
 
 PHONY += headers_check
 headers_check: headers_install
-	$(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=include/uapi dst=include HDRCHECK=1
 	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst) HDRCHECK=1
 
 # ---------------------------------------------------------------------------
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 4e9287bfdf284..e9147f05ea772 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -23,15 +23,12 @@ subdirs       := $(patsubst $(srcdir)/%/,%,\
 		 $(filter-out $(srcdir)/,\
 		 $(sort $(dir $(wildcard $(srcdir)/*/)))))
 
-# caller may set destination dir (when installing to asm/)
-_dst          := $(if $(dst),$(dst),$(obj))
-
 # Recursion
 __headers: $(subdirs)
 
 .PHONY: $(subdirs)
 $(subdirs):
-	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@
+	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(dst)/$@
 
 # Skip header install/check for include/uapi and arch/$(hdr-arch)/include/uapi.
 # We have only sub-directories there.
@@ -48,8 +45,7 @@ ifneq ($(wildcard $(old-kbuild-file)),)
 include $(old-kbuild-file)
 endif
 
-installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
-
+installdir    := $(INSTALL_HDR_PATH)/$(dst)
 gendir        := $(objtree)/$(subst include/,include/generated/,$(obj))
 header-files  := $(notdir $(wildcard $(srcdir)/*.h))
 header-files  += $(notdir $(wildcard $(srcdir)/*.agh))
-- 
GitLab


From 7a7e98f4b341fc56cb352fd2bacfaecbc3f988fc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:36 +0900
Subject: [PATCH 0965/1958] arm64: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

For arm64, "generic-y += kvm_para.h" is doubled in asm/Kbuild and
uapi/asm/Kbuild.  So, the one in the former can be simply removed.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/Kbuild      | 17 -----------------
 arch/arm64/include/uapi/asm/Kbuild | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index a7a97a6080335..f81c7b685fc6f 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -6,41 +6,24 @@ generic-y += dma.h
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
 generic-y += msi.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
 generic-y += set_memory.h
-generic-y += shmbuf.h
 generic-y += simd.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += swab.h
 generic-y += switch_to.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index 13a97aa2285f7..fc28bd95c6d38 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -1,4 +1,20 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
 generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
-- 
GitLab


From ae884913ac1a76366f84561a5e89f483365c0fb7 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:37 +0900
Subject: [PATCH 0966/1958] ARM: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/arm/include/asm/Kbuild      | 16 ----------------
 arch/arm/include/uapi/asm/Kbuild | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index d8360501c0821..721ab5ecfb9b0 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -1,39 +1,23 @@
-
-
-generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += early_ioremap.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
-generic-y += msgbuf.h
 generic-y += msi.h
-generic-y += param.h
 generic-y += parport.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
 generic-y += seccomp.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
 generic-y += simd.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += timex.h
 generic-y += trace_clock.h
 generic-y += unaligned.h
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 5fb3368e70cbc..8e17fe80b55bf 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -5,4 +5,18 @@ generated-y += unistd-common.h
 generated-y += unistd-oabi.h
 generated-y += unistd-eabi.h
 
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += termbits.h
+generic-y += termios.h
-- 
GitLab


From 3e4f9376659d8646302a317848ffb3a12101aa89 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:38 +0900
Subject: [PATCH 0967/1958] arc: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/arc/include/asm/Kbuild      | 24 ------------------------
 arch/arc/include/uapi/asm/Kbuild | 24 ++++++++++++++++++++++++
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 3e74ca5e6402a..353dae386b2fa 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,51 +1,27 @@
-generic-y += auxvec.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
 generic-y += msi.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
-generic-y += sembuf.h
-generic-y += shmbuf.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index b55fc2ae1e8c1..fa6d0ff4ff894 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -1,4 +1,28 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From 6b4be36250b7fd14f717a61ae3e72f4ba6bdfe04 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:39 +0900
Subject: [PATCH 0968/1958] blackfin: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/blackfin/include/asm/Kbuild      | 24 +-----------------------
 arch/blackfin/include/uapi/asm/Kbuild | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index dc4ef9ac1e83b..fe736973630f7 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -1,50 +1,28 @@
-
-generic-y += auxvec.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += extable.h
 generic-y += fb.h
 generic-y += futex.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += percpu.h
 generic-y += pgalloc.h
 generic-y += preempt.h
-generic-y += resource.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += setup.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index b15bf6bc0e94f..aa624b4ab6557 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -1,2 +1,24 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From 97b81556f656594b343d516158ea305d4e0dcae5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:40 +0900
Subject: [PATCH 0969/1958] c6x: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/c6x/include/asm/Kbuild      | 28 +---------------------------
 arch/c6x/include/uapi/asm/Kbuild | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index a3c8d05c4cc76..d717329c8cf9d 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -1,8 +1,5 @@
-
 generic-y += atomic.h
-generic-y += auxvec.h
 generic-y += barrier.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += current.h
@@ -10,55 +7,32 @@ generic-y += device.h
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hw_irq.h
 generic-y += io.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += mmu.h
 generic-y += mmu_context.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += pgalloc.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += tlbflush.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 1c44d3b3eba03..67ee896a76a7f 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -1,5 +1,30 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
 generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From c6ec85cefb476e4ab914d18951fa4c04d96b3bb6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:41 +0900
Subject: [PATCH 0970/1958] cris: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h", "generic-y += emergency-restart.h"
up in order to keep the entries sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/cris/include/asm/Kbuild      | 21 ++-------------------
 arch/cris/include/uapi/asm/Kbuild | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index acc5781100c2d..460349cb147fa 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -1,48 +1,31 @@
 generic-y += atomic.h
-generic-y += auxvec.h
 generic-y += barrier.h
-generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += cmpxchg.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
-generic-y += errno.h
+generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
-generic-y += emergency-restart.h
-generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hardirq.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += module.h
-generic-y += msgbuf.h
 generic-y += percpu.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
-generic-y += sembuf.h
-generic-y += shmbuf.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += statfs.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index b55fc2ae1e8c1..3687b54bb18ed 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -1,4 +1,21 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += types.h
-- 
GitLab


From c26e2c0c6d2728377d7219095676dc372a421c1c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:42 +0900
Subject: [PATCH 0971/1958] h8300: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" and "generic-y += trace_clock.h"
in order to keep the entries sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/h8300/include/asm/Kbuild      | 30 ++----------------------------
 arch/h8300/include/uapi/asm/Kbuild | 26 ++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 99c824608a31a..bc077491d299c 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -1,5 +1,4 @@
 generic-y += asm-offsets.h
-generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bugs.h
 generic-y += cacheflush.h
@@ -11,66 +10,41 @@ generic-y += device.h
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hash.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += mmu.h
 generic-y += mmu_context.h
 generic-y += module.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += pgalloc.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += scatterlist.h
 generic-y += sections.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += setup.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
 generic-y += spinlock.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += swab.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += timex.h
 generic-y += tlbflush.h
-generic-y += trace_clock.h
 generic-y += topology.h
-generic-y += types.h
-generic-y += ucontext.h
+generic-y += trace_clock.h
 generic-y += unaligned.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index b55fc2ae1e8c1..187aed820e71f 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -1,4 +1,30 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From e4c694a3b07de1f193387121c7cae717ff7f4483 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:43 +0900
Subject: [PATCH 0972/1958] hexagon: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/hexagon/include/asm/Kbuild      | 24 +-----------------------
 arch/hexagon/include/uapi/asm/Kbuild | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 0fc9cb04e6ada..34013683d123e 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -1,4 +1,3 @@
-generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bug.h
 generic-y += bugs.h
@@ -7,53 +6,32 @@ generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
 generic-y += iomap.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index b55fc2ae1e8c1..cb5df3aad3a84 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -1,4 +1,26 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From e18e340fd171fbb9bd38125c3d0ba5d3fd3ddfb9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:44 +0900
Subject: [PATCH 0973/1958] ia64: remove redundant generic-y += kvm_para.h from
 asm/Kbuild

"generic-y += kvm_para.h" is doubled in asm/Kbuild and uapi/asm/Kbuild.
The one in the former should be simply removed because kvm_para.h is
exported.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/ia64/include/asm/Kbuild | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 502a91d8dbbd8..1d7641f891e1c 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -1,8 +1,6 @@
-
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += irq_work.h
-generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
-- 
GitLab


From e64f6ead4b8a0d096ca47cf3e3f4ace070476c2b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:45 +0900
Subject: [PATCH 0974/1958] m32r: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/m32r/include/asm/Kbuild      | 4 +---
 arch/m32r/include/uapi/asm/Kbuild | 3 ++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index c000ffac85861..7e11b125c35e1 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -1,10 +1,9 @@
-
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += irq_work.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += module.h
@@ -12,4 +11,3 @@ generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
-generic-y += kprobes.h
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index c94ee54210bc4..1c44d3b3eba03 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-generic-y	+= siginfo.h
+generic-y += kvm_para.h
+generic-y += siginfo.h
-- 
GitLab


From 5608e72dfde31a6c2e051092f7aac026794971b8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:46 +0900
Subject: [PATCH 0975/1958] m68k: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

For m68k, "generic-y += termios.h" is doubled in asm/Kbuild and
uapi/asm/Kbuild.  So, the one in the former can be simply removed.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/Kbuild      | 13 +------------
 arch/m68k/include/uapi/asm/Kbuild | 10 ++++++++++
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 5ecf4e47b2e2e..59d6d0d38f67a 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -1,36 +1,25 @@
 generic-y += barrier.h
-generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += device.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += futex.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += percpu.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
-generic-y += shmparam.h
 generic-y += spinlock.h
-generic-y += statfs.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 68b45cc87e2cb..3717b64a620df 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -2,11 +2,21 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
 generic-y += msgbuf.h
+generic-y += resource.h
 generic-y += sembuf.h
 generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
 generic-y += socket.h
 generic-y += sockios.h
+generic-y += statfs.h
 generic-y += termbits.h
 generic-y += termios.h
+generic-y += types.h
-- 
GitLab


From d4442e42e5de2314084d0cca0d0eab08daac21af Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:47 +0900
Subject: [PATCH 0976/1958] metag: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/metag/include/asm/Kbuild      | 26 +-------------------------
 arch/metag/include/uapi/asm/Kbuild | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 8f940553a5796..3fba97ed9bb21 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -1,58 +1,34 @@
-generic-y += auxvec.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += device.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
 generic-y += sections.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
 generic-y += switch_to.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += timex.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
index b29731ebd7a93..6ac763d9a3e34 100644
--- a/arch/metag/include/uapi/asm/Kbuild
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -1,6 +1,30 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
 generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
 generic-y += resource.h
+generic-y += sembuf.h
 generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From 9ffd93852fc66f2bdf61a574db107f106e9fb2ff Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:48 +0900
Subject: [PATCH 0977/1958] microblaze: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

For microblaze, "generic-y += siginfo.h" is doubled in asm/Kbuild and
uapi/asm/Kbuild.  So, the one in the former can be simply removed.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/microblaze/include/asm/Kbuild      | 25 -------------------------
 arch/microblaze/include/uapi/asm/Kbuild | 25 ++++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 83a4ef3a2495a..9d66f77938419 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,22 +1,15 @@
-
 generic-y += barrier.h
 generic-y += bitops.h
-generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += hardirq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
@@ -27,31 +20,13 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += siginfo.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += swab.h
 generic-y += syscalls.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += ucontext.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index cb6784f4a427e..e77a596f3f1ef 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -1,5 +1,28 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-generic-y += types.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From c96396f0780e3b217f5194538e6dcd3c22d00122 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Date: Mon, 19 Jun 2017 11:36:20 +0200
Subject: [PATCH 0978/1958] tools: timer: add rtctest_setdate

This tool allow to set directly the time and date to a RTC device.

Unlike other tools isn't doens't use "struct timeval" or "time_t"
so it is safe for 32bits platforms when testing for y2038/2106 bug.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 tools/testing/selftests/timers/Makefile       |  2 +-
 .../selftests/timers/rtctest_setdate.c        | 86 +++++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/timers/rtctest_setdate.c

diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 5fa1d7e9a9158..54481f17518a9 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -9,7 +9,7 @@ TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
 
 TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 		      skew_consistency clocksource-switch leap-a-day \
-		      leapcrash set-tai set-2038 set-tz
+		      leapcrash set-tai set-2038 set-tz rtctest_setdate
 
 
 include ../lib.mk
diff --git a/tools/testing/selftests/timers/rtctest_setdate.c b/tools/testing/selftests/timers/rtctest_setdate.c
new file mode 100644
index 0000000000000..2cb78489eca46
--- /dev/null
+++ b/tools/testing/selftests/timers/rtctest_setdate.c
@@ -0,0 +1,86 @@
+/* Real Time Clock Driver Test
+ *	by: Benjamin Gaignard (benjamin.gaignard@linaro.org)
+ *
+ * To build
+ *	gcc rtctest_setdate.c -o rtctest_setdate
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+static const char default_time[] = "00:00:00";
+
+int main(int argc, char **argv)
+{
+	int fd, retval;
+	struct rtc_time new, current;
+	const char *rtc, *date;
+	const char *time = default_time;
+
+	switch (argc) {
+	case 4:
+		time = argv[3];
+		/* FALLTHROUGH */
+	case 3:
+		date = argv[2];
+		rtc = argv[1];
+		break;
+	default:
+		fprintf(stderr, "usage: rtctest_setdate <rtcdev> <DD-MM-YYYY> [HH:MM:SS]\n");
+		return 1;
+	}
+
+	fd = open(rtc, O_RDONLY);
+	if (fd == -1) {
+		perror(rtc);
+		exit(errno);
+	}
+
+	sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year);
+	new.tm_mon -= 1;
+	new.tm_year -= 1900;
+	sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec);
+
+	fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n",
+		new.tm_mday, new.tm_mon + 1, new.tm_year + 1900,
+		new.tm_hour, new.tm_min, new.tm_sec);
+
+	/* Write the new date in RTC */
+	retval = ioctl(fd, RTC_SET_TIME, &new);
+	if (retval == -1) {
+		perror("RTC_SET_TIME ioctl");
+		close(fd);
+		exit(errno);
+	}
+
+	/* Read back */
+	retval = ioctl(fd, RTC_RD_TIME, &current);
+	if (retval == -1) {
+		perror("RTC_RD_TIME ioctl");
+		exit(errno);
+	}
+
+	fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+		current.tm_mday, current.tm_mon + 1, current.tm_year + 1900,
+		current.tm_hour, current.tm_min, current.tm_sec);
+
+	close(fd);
+	return 0;
+}
-- 
GitLab


From 69c31226fc9f887dcd5c27a91f5cc0f244a6953e Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Date: Mon, 19 Jun 2017 11:36:21 +0200
Subject: [PATCH 0979/1958] rtc: rtctest: add check for problematic dates

Some dates could be problematic because they reach the limits of
RTC hardware capabilities.
This patch add various of them but since it will change RTC date
it will be activated only when 'd' args is set.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 tools/testing/selftests/timers/rtctest.c | 128 ++++++++++++++++++++++-
 1 file changed, 124 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
index 4230d3052e5d5..f61170f7b024a 100644
--- a/tools/testing/selftests/timers/rtctest.c
+++ b/tools/testing/selftests/timers/rtctest.c
@@ -21,6 +21,9 @@
 #include <stdlib.h>
 #include <errno.h>
 
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
 
 /*
  * This expects the new RTC class driver framework, working with
@@ -29,23 +32,84 @@
  */
 static const char default_rtc[] = "/dev/rtc0";
 
+static struct rtc_time cutoff_dates[] = {
+	{
+		.tm_year = 70, /* 1970 -1900 */
+		.tm_mday = 1,
+	},
+	/* signed time_t 19/01/2038 3:14:08 */
+	{
+		.tm_year = 138,
+		.tm_mday = 19,
+	},
+	{
+		.tm_year = 138,
+		.tm_mday = 20,
+	},
+	{
+		.tm_year = 199, /* 2099 -1900 */
+		.tm_mday = 1,
+	},
+	{
+		.tm_year = 200, /* 2100 -1900 */
+		.tm_mday = 1,
+	},
+	/* unsigned time_t 07/02/2106 7:28:15*/
+	{
+		.tm_year = 205,
+		.tm_mon = 1,
+		.tm_mday = 7,
+	},
+	{
+		.tm_year = 206,
+		.tm_mon = 1,
+		.tm_mday = 8,
+	},
+	/* signed time on 64bit in nanoseconds 12/04/2262 01:47:16*/
+	{
+		.tm_year = 362,
+		.tm_mon = 3,
+		.tm_mday = 12,
+	},
+	{
+		.tm_year = 362, /* 2262 -1900 */
+		.tm_mon = 3,
+		.tm_mday = 13,
+	},
+};
+
+static int compare_dates(struct rtc_time *a, struct rtc_time *b)
+{
+	if (a->tm_year != b->tm_year ||
+	    a->tm_mon != b->tm_mon ||
+	    a->tm_mday != b->tm_mday ||
+	    a->tm_hour != b->tm_hour ||
+	    a->tm_min != b->tm_min ||
+	    ((b->tm_sec - a->tm_sec) > 1))
+		return 1;
+
+	return 0;
+}
 
 int main(int argc, char **argv)
 {
-	int i, fd, retval, irqcount = 0;
+	int i, fd, retval, irqcount = 0, dangerous = 0;
 	unsigned long tmp, data;
 	struct rtc_time rtc_tm;
 	const char *rtc = default_rtc;
 	struct timeval start, end, diff;
 
 	switch (argc) {
+	case 3:
+		if (*argv[2] == 'd')
+			dangerous = 1;
 	case 2:
 		rtc = argv[1];
 		/* FALLTHROUGH */
 	case 1:
 		break;
 	default:
-		fprintf(stderr, "usage:  rtctest [rtcdev]\n");
+		fprintf(stderr, "usage:  rtctest [rtcdev] [d]\n");
 		return 1;
 	}
 
@@ -202,7 +266,7 @@ int main(int argc, char **argv)
 		/* not all RTCs support periodic IRQs */
 		if (errno == EINVAL) {
 			fprintf(stderr, "\nNo periodic IRQ support\n");
-			goto done;
+			goto test_DATE;
 		}
 		perror("RTC_IRQP_READ ioctl");
 		exit(errno);
@@ -221,7 +285,7 @@ int main(int argc, char **argv)
 			if (errno == EINVAL) {
 				fprintf(stderr,
 					"\n...Periodic IRQ rate is fixed\n");
-				goto done;
+				goto test_DATE;
 			}
 			perror("RTC_IRQP_SET ioctl");
 			exit(errno);
@@ -269,6 +333,62 @@ int main(int argc, char **argv)
 		}
 	}
 
+test_DATE:
+	if (!dangerous)
+		goto done;
+
+	fprintf(stderr, "\nTesting problematic dates\n");
+
+	for (i = 0; i < ARRAY_SIZE(cutoff_dates); i++) {
+		struct rtc_time current;
+
+		/* Write the new date in RTC */
+		retval = ioctl(fd, RTC_SET_TIME, &cutoff_dates[i]);
+		if (retval == -1) {
+			perror("RTC_SET_TIME ioctl");
+			close(fd);
+			exit(errno);
+		}
+
+		/* Read back */
+		retval = ioctl(fd, RTC_RD_TIME, &current);
+		if (retval == -1) {
+			perror("RTC_RD_TIME ioctl");
+			exit(errno);
+		}
+
+		if(compare_dates(&cutoff_dates[i], &current)) {
+			fprintf(stderr,"Setting date %d failed\n",
+			        cutoff_dates[i].tm_year + 1900);
+			goto done;
+		}
+
+		cutoff_dates[i].tm_sec += 5;
+
+		/* Write the new alarm in RTC */
+		retval = ioctl(fd, RTC_ALM_SET, &cutoff_dates[i]);
+		if (retval == -1) {
+			perror("RTC_ALM_SET ioctl");
+			close(fd);
+			exit(errno);
+		}
+
+		/* Read back */
+		retval = ioctl(fd, RTC_ALM_READ, &current);
+		if (retval == -1) {
+			perror("RTC_ALM_READ ioctl");
+			exit(errno);
+		}
+
+		if(compare_dates(&cutoff_dates[i], &current)) {
+			fprintf(stderr,"Setting alarm %d failed\n",
+			        cutoff_dates[i].tm_year + 1900);
+			goto done;
+		}
+
+		fprintf(stderr, "Setting year %d is OK \n",
+			cutoff_dates[i].tm_year + 1900);
+	}
 done:
 	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
 
-- 
GitLab


From f4b82d39e40fc9c3493826abb49c64c37c7025b9 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Date: Mon, 19 Jun 2017 11:36:22 +0200
Subject: [PATCH 0980/1958] rtc: st-lpc: make it robust against y2038/2106 bug

Make driver use u64 variables and functions to be sure that
it will support dates after year 2038.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Acked-by: Patrice Chotard <patrice.chotard@st.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/rtc-st-lpc.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c
index 74c0a336ceea0..82b0af159a282 100644
--- a/drivers/rtc/rtc-st-lpc.c
+++ b/drivers/rtc/rtc-st-lpc.c
@@ -99,7 +99,7 @@ static int st_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	lpt = ((unsigned long long)lpt_msb << 32) | lpt_lsb;
 	do_div(lpt, rtc->clkrate);
-	rtc_time_to_tm(lpt, tm);
+	rtc_time64_to_tm(lpt, tm);
 
 	return 0;
 }
@@ -107,13 +107,10 @@ static int st_rtc_read_time(struct device *dev, struct rtc_time *tm)
 static int st_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct st_rtc *rtc = dev_get_drvdata(dev);
-	unsigned long long lpt;
-	unsigned long secs, flags;
-	int ret;
+	unsigned long long lpt, secs;
+	unsigned long flags;
 
-	ret = rtc_tm_to_time(tm, &secs);
-	if (ret)
-		return ret;
+	secs = rtc_tm_to_time64(tm);
 
 	lpt = (unsigned long long)secs * rtc->clkrate;
 
@@ -161,13 +158,13 @@ static int st_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
 	struct st_rtc *rtc = dev_get_drvdata(dev);
 	struct rtc_time now;
-	unsigned long now_secs;
-	unsigned long alarm_secs;
+	unsigned long long now_secs;
+	unsigned long long alarm_secs;
 	unsigned long long lpa;
 
 	st_rtc_read_time(dev, &now);
-	rtc_tm_to_time(&now, &now_secs);
-	rtc_tm_to_time(&t->time, &alarm_secs);
+	now_secs = rtc_tm_to_time64(&now);
+	alarm_secs = rtc_tm_to_time64(&t->time);
 
 	/* Invalid alarm time */
 	if (now_secs > alarm_secs)
-- 
GitLab


From 49aac8204da5f344f52ed9b3eb8736ca7a60c4a8 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Thu, 29 Jun 2017 00:20:19 -0700
Subject: [PATCH 0981/1958] Input: xen-kbdfront - add multi-touch support

Extend xen_kbdfront to provide multi-touch support to unprivileged domains.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
[dtor: factor out various sub-protocols - multitouch, single touch, keys]
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/xen-kbdfront.c | 219 ++++++++++++++++++++++++------
 1 file changed, 179 insertions(+), 40 deletions(-)

diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index eb770613a9bd8..fa130e7b734c7 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/input.h>
+#include <linux/input/mt.h>
 #include <linux/slab.h>
 
 #include <asm/xen/hypervisor.h>
@@ -34,11 +35,14 @@
 struct xenkbd_info {
 	struct input_dev *kbd;
 	struct input_dev *ptr;
+	struct input_dev *mtouch;
 	struct xenkbd_page *page;
 	int gref;
 	int irq;
 	struct xenbus_device *xbdev;
 	char phys[32];
+	/* current MT slot/contact ID we are injecting events in */
+	int mtouch_cur_contact_id;
 };
 
 enum { KPARAM_X, KPARAM_Y, KPARAM_CNT };
@@ -56,6 +60,112 @@ static void xenkbd_disconnect_backend(struct xenkbd_info *);
  * to do that.
  */
 
+static void xenkbd_handle_motion_event(struct xenkbd_info *info,
+				       struct xenkbd_motion *motion)
+{
+	input_report_rel(info->ptr, REL_X, motion->rel_x);
+	input_report_rel(info->ptr, REL_Y, motion->rel_y);
+	if (motion->rel_z)
+		input_report_rel(info->ptr, REL_WHEEL, -motion->rel_z);
+	input_sync(info->ptr);
+}
+
+static void xenkbd_handle_position_event(struct xenkbd_info *info,
+					 struct xenkbd_position *pos)
+{
+	input_report_abs(info->ptr, ABS_X, pos->abs_x);
+	input_report_abs(info->ptr, ABS_Y, pos->abs_y);
+	if (pos->rel_z)
+		input_report_rel(info->ptr, REL_WHEEL, -pos->rel_z);
+	input_sync(info->ptr);
+}
+
+static void xenkbd_handle_key_event(struct xenkbd_info *info,
+				    struct xenkbd_key *key)
+{
+	struct input_dev *dev;
+
+	if (test_bit(key->keycode, info->ptr->keybit)) {
+		dev = info->ptr;
+	} else if (test_bit(key->keycode, info->kbd->keybit)) {
+		dev = info->kbd;
+	} else {
+		pr_warn("unhandled keycode 0x%x\n", key->keycode);
+		return;
+	}
+
+	input_report_key(dev, key->keycode, key->pressed);
+	input_sync(dev);
+}
+
+static void xenkbd_handle_mt_event(struct xenkbd_info *info,
+				   struct xenkbd_mtouch *mtouch)
+{
+	if (unlikely(!info->mtouch))
+		return;
+
+	if (mtouch->contact_id != info->mtouch_cur_contact_id) {
+		info->mtouch_cur_contact_id = mtouch->contact_id;
+		input_mt_slot(info->mtouch, mtouch->contact_id);
+	}
+
+	switch (mtouch->event_type) {
+	case XENKBD_MT_EV_DOWN:
+		input_mt_report_slot_state(info->mtouch, MT_TOOL_FINGER, true);
+		/* fall through */
+
+	case XENKBD_MT_EV_MOTION:
+		input_report_abs(info->mtouch, ABS_MT_POSITION_X,
+				 mtouch->u.pos.abs_x);
+		input_report_abs(info->mtouch, ABS_MT_POSITION_Y,
+				 mtouch->u.pos.abs_y);
+		break;
+
+	case XENKBD_MT_EV_SHAPE:
+		input_report_abs(info->mtouch, ABS_MT_TOUCH_MAJOR,
+				 mtouch->u.shape.major);
+		input_report_abs(info->mtouch, ABS_MT_TOUCH_MINOR,
+				 mtouch->u.shape.minor);
+		break;
+
+	case XENKBD_MT_EV_ORIENT:
+		input_report_abs(info->mtouch, ABS_MT_ORIENTATION,
+				 mtouch->u.orientation);
+		break;
+
+	case XENKBD_MT_EV_UP:
+		input_mt_report_slot_state(info->mtouch, MT_TOOL_FINGER, false);
+		break;
+
+	case XENKBD_MT_EV_SYN:
+		input_mt_sync_frame(info->mtouch);
+		input_sync(info->mtouch);
+		break;
+	}
+}
+
+static void xenkbd_handle_event(struct xenkbd_info *info,
+				union xenkbd_in_event *event)
+{
+	switch (event->type) {
+	case XENKBD_TYPE_MOTION:
+		xenkbd_handle_motion_event(info, &event->motion);
+		break;
+
+	case XENKBD_TYPE_KEY:
+		xenkbd_handle_key_event(info, &event->key);
+		break;
+
+	case XENKBD_TYPE_POS:
+		xenkbd_handle_position_event(info, &event->pos);
+		break;
+
+	case XENKBD_TYPE_MTOUCH:
+		xenkbd_handle_mt_event(info, &event->mtouch);
+		break;
+	}
+}
+
 static irqreturn_t input_handler(int rq, void *dev_id)
 {
 	struct xenkbd_info *info = dev_id;
@@ -66,44 +176,8 @@ static irqreturn_t input_handler(int rq, void *dev_id)
 	if (prod == page->in_cons)
 		return IRQ_HANDLED;
 	rmb();			/* ensure we see ring contents up to prod */
-	for (cons = page->in_cons; cons != prod; cons++) {
-		union xenkbd_in_event *event;
-		struct input_dev *dev;
-		event = &XENKBD_IN_RING_REF(page, cons);
-
-		dev = info->ptr;
-		switch (event->type) {
-		case XENKBD_TYPE_MOTION:
-			input_report_rel(dev, REL_X, event->motion.rel_x);
-			input_report_rel(dev, REL_Y, event->motion.rel_y);
-			if (event->motion.rel_z)
-				input_report_rel(dev, REL_WHEEL,
-						 -event->motion.rel_z);
-			break;
-		case XENKBD_TYPE_KEY:
-			dev = NULL;
-			if (test_bit(event->key.keycode, info->kbd->keybit))
-				dev = info->kbd;
-			if (test_bit(event->key.keycode, info->ptr->keybit))
-				dev = info->ptr;
-			if (dev)
-				input_report_key(dev, event->key.keycode,
-						 event->key.pressed);
-			else
-				pr_warn("unhandled keycode 0x%x\n",
-					event->key.keycode);
-			break;
-		case XENKBD_TYPE_POS:
-			input_report_abs(dev, ABS_X, event->pos.abs_x);
-			input_report_abs(dev, ABS_Y, event->pos.abs_y);
-			if (event->pos.rel_z)
-				input_report_rel(dev, REL_WHEEL,
-						 -event->pos.rel_z);
-			break;
-		}
-		if (dev)
-			input_sync(dev);
-	}
+	for (cons = page->in_cons; cons != prod; cons++)
+		xenkbd_handle_event(info, &XENKBD_IN_RING_REF(page, cons));
 	mb();			/* ensure we got ring contents */
 	page->in_cons = cons;
 	notify_remote_via_irq(info->irq);
@@ -115,9 +189,9 @@ static int xenkbd_probe(struct xenbus_device *dev,
 				  const struct xenbus_device_id *id)
 {
 	int ret, i;
-	unsigned int abs;
+	unsigned int abs, touch;
 	struct xenkbd_info *info;
-	struct input_dev *kbd, *ptr;
+	struct input_dev *kbd, *ptr, *mtouch;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
@@ -152,6 +226,17 @@ static int xenkbd_probe(struct xenbus_device *dev,
 		}
 	}
 
+	touch = xenbus_read_unsigned(dev->nodename,
+				     XENKBD_FIELD_FEAT_MTOUCH, 0);
+	if (touch) {
+		ret = xenbus_write(XBT_NIL, dev->nodename,
+				   XENKBD_FIELD_REQ_MTOUCH, "1");
+		if (ret) {
+			pr_warn("xenkbd: can't request multi-touch");
+			touch = 0;
+		}
+	}
+
 	/* keyboard */
 	kbd = input_allocate_device();
 	if (!kbd)
@@ -208,6 +293,58 @@ static int xenkbd_probe(struct xenbus_device *dev,
 	}
 	info->ptr = ptr;
 
+	/* multi-touch device */
+	if (touch) {
+		int num_cont, width, height;
+
+		mtouch = input_allocate_device();
+		if (!mtouch)
+			goto error_nomem;
+
+		num_cont = xenbus_read_unsigned(info->xbdev->nodename,
+						XENKBD_FIELD_MT_NUM_CONTACTS,
+						1);
+		width = xenbus_read_unsigned(info->xbdev->nodename,
+					     XENKBD_FIELD_MT_WIDTH,
+					     XENFB_WIDTH);
+		height = xenbus_read_unsigned(info->xbdev->nodename,
+					      XENKBD_FIELD_MT_HEIGHT,
+					      XENFB_HEIGHT);
+
+		mtouch->name = "Xen Virtual Multi-touch";
+		mtouch->phys = info->phys;
+		mtouch->id.bustype = BUS_PCI;
+		mtouch->id.vendor = 0x5853;
+		mtouch->id.product = 0xfffd;
+
+		input_set_abs_params(mtouch, ABS_MT_TOUCH_MAJOR,
+				     0, 255, 0, 0);
+		input_set_abs_params(mtouch, ABS_MT_POSITION_X,
+				     0, width, 0, 0);
+		input_set_abs_params(mtouch, ABS_MT_POSITION_Y,
+				     0, height, 0, 0);
+		input_set_abs_params(mtouch, ABS_MT_PRESSURE,
+				     0, 255, 0, 0);
+
+		ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT);
+		if (ret) {
+			input_free_device(mtouch);
+			xenbus_dev_fatal(info->xbdev, ret,
+					 "input_mt_init_slots");
+			goto error;
+		}
+
+		ret = input_register_device(mtouch);
+		if (ret) {
+			input_free_device(mtouch);
+			xenbus_dev_fatal(info->xbdev, ret,
+					 "input_register_device(mtouch)");
+			goto error;
+		}
+		info->mtouch_cur_contact_id = -1;
+		info->mtouch = mtouch;
+	}
+
 	ret = xenkbd_connect_backend(dev, info);
 	if (ret < 0)
 		goto error;
@@ -240,6 +377,8 @@ static int xenkbd_remove(struct xenbus_device *dev)
 		input_unregister_device(info->kbd);
 	if (info->ptr)
 		input_unregister_device(info->ptr);
+	if (info->mtouch)
+		input_unregister_device(info->mtouch);
 	free_page((unsigned long)info->page);
 	kfree(info);
 	return 0;
-- 
GitLab


From 0f107573da417c7f5c6d3a0160ebacc3adb019c4 Mon Sep 17 00:00:00 2001
From: Joseph Lo <josephl@nvidia.com>
Date: Sun, 2 Jul 2017 13:38:31 -0700
Subject: [PATCH 0982/1958] Input: gpio_keys - handle the missing key press
 event in resume phase

The GPIO key press event might be missed in the resume phase, if the key
had been released before the system had been resumed to the stage that it
could capture the press event. So we simulate the wakeup key press event
in case the key had been released by the time we got interrupt handler
to run.

Signed-off-by: Joseph Lo <josephl@nvidia.com>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/gpio_keys.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index da3d362f21b11..a047b9af83698 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -48,6 +48,7 @@ struct gpio_button_data {
 	spinlock_t lock;
 	bool disabled;
 	bool key_pressed;
+	bool suspended;
 };
 
 struct gpio_keys_drvdata {
@@ -396,8 +397,20 @@ static irqreturn_t gpio_keys_gpio_isr(int irq, void *dev_id)
 
 	BUG_ON(irq != bdata->irq);
 
-	if (bdata->button->wakeup)
+	if (bdata->button->wakeup) {
+		const struct gpio_keys_button *button = bdata->button;
+
 		pm_stay_awake(bdata->input->dev.parent);
+		if (bdata->suspended  &&
+		    (button->type == 0 || button->type == EV_KEY)) {
+			/*
+			 * Simulate wakeup key press in case the key has
+			 * already released by the time we got interrupt
+			 * handler to run.
+			 */
+			input_report_key(bdata->input, button->code, 1);
+		}
+	}
 
 	mod_delayed_work(system_wq,
 			 &bdata->work,
@@ -855,6 +868,7 @@ static int __maybe_unused gpio_keys_suspend(struct device *dev)
 			struct gpio_button_data *bdata = &ddata->data[i];
 			if (bdata->button->wakeup)
 				enable_irq_wake(bdata->irq);
+			bdata->suspended = true;
 		}
 	} else {
 		mutex_lock(&input->mutex);
@@ -878,6 +892,7 @@ static int __maybe_unused gpio_keys_resume(struct device *dev)
 			struct gpio_button_data *bdata = &ddata->data[i];
 			if (bdata->button->wakeup)
 				disable_irq_wake(bdata->irq);
+			bdata->suspended = false;
 		}
 	} else {
 		mutex_lock(&input->mutex);
-- 
GitLab


From de8c5221aa003935d6d31becf5850b247dff14a1 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Fri, 7 Jul 2017 14:20:00 -0500
Subject: [PATCH 0983/1958] tcmu: Fix dev_config_store

Currently when there is a reconfig, the uio_info->name
does not get updated to reflect the change in the dev_config
name change.

On restart tcmu-runner there will be a mismatch between
the dev_config string in uio and the tcmu structure that contains
the string. When this occurs it'll reload the one in uio
and you lose the reconfigured device path.

v2: Created a helper function for the updating of uio_info

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 32 +++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index cbbfba0c13527..2f1fa927682e3 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1418,19 +1418,14 @@ static int tcmu_netlink_event(struct tcmu_dev *udev, enum tcmu_genl_cmd cmd,
 	return ret;
 }
 
-static int tcmu_configure_device(struct se_device *dev)
+static int tcmu_update_uio_info(struct tcmu_dev *udev)
 {
-	struct tcmu_dev *udev = TCMU_DEV(dev);
 	struct tcmu_hba *hba = udev->hba->hba_ptr;
 	struct uio_info *info;
-	struct tcmu_mailbox *mb;
-	size_t size;
-	size_t used;
-	int ret = 0;
+	size_t size, used;
 	char *str;
 
 	info = &udev->uio_info;
-
 	size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name,
 			udev->dev_config);
 	size += 1; /* for \0 */
@@ -1439,12 +1434,27 @@ static int tcmu_configure_device(struct se_device *dev)
 		return -ENOMEM;
 
 	used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name);
-
 	if (udev->dev_config[0])
 		snprintf(str + used, size - used, "/%s", udev->dev_config);
 
 	info->name = str;
 
+	return 0;
+}
+
+static int tcmu_configure_device(struct se_device *dev)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+	struct uio_info *info;
+	struct tcmu_mailbox *mb;
+	int ret = 0;
+
+	ret = tcmu_update_uio_info(udev);
+	if (ret)
+		return ret;
+
+	info = &udev->uio_info;
+
 	udev->mb_addr = vzalloc(CMDR_SIZE);
 	if (!udev->mb_addr) {
 		ret = -ENOMEM;
@@ -1786,6 +1796,12 @@ static ssize_t tcmu_dev_config_store(struct config_item *item, const char *page,
 			pr_err("Unable to reconfigure device\n");
 			return ret;
 		}
+		strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN);
+
+		ret = tcmu_update_uio_info(udev);
+		if (ret)
+			return ret;
+		return count;
 	}
 	strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN);
 
-- 
GitLab


From 388fe6996bf658146e70c0df986981eae4be0385 Mon Sep 17 00:00:00 2001
From: Tang Wenji <tang.wenji@zte.com.cn>
Date: Sat, 8 Jul 2017 11:15:44 +0800
Subject: [PATCH 0984/1958] target: Fix cmd size for PR-OUT in
 passthrough_parse_cdb

The cmd size should be 4bytes form byte5 to byte8 when CDB opcode
is PERSISTENT_RESERVE_OUT in SPC3 and SPC4

(Also fix up the same in spc_parse_cdb - MNC)

Signed-off-by: Tang Wenji <tang.wenji@zte.com.cn>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 2 +-
 drivers/target/target_core_spc.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 1c7c57a3c52e1..e8dd6da164b28 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1183,7 +1183,7 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 		}
 		if (cdb[0] == PERSISTENT_RESERVE_OUT) {
 			cmd->execute_cmd = target_scsi3_emulate_pr_out;
-			size = get_unaligned_be16(&cdb[7]);
+			size = get_unaligned_be32(&cdb[5]);
 			return target_cmd_size_check(cmd, size);
 		}
 
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index f59ac76710318..cb0461a108080 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -1307,7 +1307,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		cmd->execute_cmd = target_scsi3_emulate_pr_in;
 		break;
 	case PERSISTENT_RESERVE_OUT:
-		*size = get_unaligned_be16(&cdb[7]);
+		*size = get_unaligned_be32(&cdb[5]);
 		cmd->execute_cmd = target_scsi3_emulate_pr_out;
 		break;
 	case RELEASE:
-- 
GitLab


From 7ee0031786dc3762f61d0ec9171bb75d56a5ac76 Mon Sep 17 00:00:00 2001
From: Tang Wenji <tang.wenji@zte.com.cn>
Date: Sat, 8 Jul 2017 11:28:25 +0800
Subject: [PATCH 0985/1958] target: Fix return sense reason in
 target_scsi3_emulate_pr_out

The sense reason should be TCM_PARAMETER_LIST_LENGTH_ERROR when
parmeter length error.

Also the cdb[1] & 0x1f has been assigned to local variable sa,
so use sa instead of it.

Signed-off-by: Tang Wenji <tang.wenji@zte.com.cn>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pr.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 9921d4d6bb418..6d5def64db61d 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -3585,7 +3585,7 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	if (cmd->data_length < 24) {
 		pr_warn("SPC-PR: Received PR OUT parameter list"
 			" length too small: %u\n", cmd->data_length);
-		return TCM_INVALID_PARAMETER_LIST;
+		return TCM_PARAMETER_LIST_LENGTH_ERROR;
 	}
 
 	/*
@@ -3629,7 +3629,7 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	/*
 	 * SPEC_I_PT=1 is only valid for Service action: REGISTER
 	 */
-	if (spec_i_pt && ((cdb[1] & 0x1f) != PRO_REGISTER))
+	if (spec_i_pt && (sa != PRO_REGISTER))
 		return TCM_INVALID_PARAMETER_LIST;
 
 	/*
@@ -3641,11 +3641,11 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	 * the sense key set to ILLEGAL REQUEST, and the additional sense
 	 * code set to PARAMETER LIST LENGTH ERROR.
 	 */
-	if (!spec_i_pt && ((cdb[1] & 0x1f) != PRO_REGISTER_AND_MOVE) &&
+	if (!spec_i_pt && (sa != PRO_REGISTER_AND_MOVE) &&
 	    (cmd->data_length != 24)) {
 		pr_warn("SPC-PR: Received PR OUT illegal parameter"
 			" list length: %u\n", cmd->data_length);
-		return TCM_INVALID_PARAMETER_LIST;
+		return TCM_PARAMETER_LIST_LENGTH_ERROR;
 	}
 
 	/*
@@ -3685,7 +3685,7 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 		break;
 	default:
 		pr_err("Unknown PERSISTENT_RESERVE_OUT service"
-			" action: 0x%02x\n", cdb[1] & 0x1f);
+			" action: 0x%02x\n", sa);
 		return TCM_INVALID_CDB_FIELD;
 	}
 
-- 
GitLab


From 68a6afa7fad2644e6b575aba2f2fbda81e730e5b Mon Sep 17 00:00:00 2001
From: Christos Gkekas <chris.gekas@gmail.com>
Date: Sun, 9 Jul 2017 11:45:04 +0100
Subject: [PATCH 0986/1958] cifs: Clean up unused variables in smb2pdu.c

There are multiple unused variables struct TCP_Server_Info *server
defined in many methods in smb2pdu.c. They should be removed and related
logic simplified.

Signed-off-by: Christos Gkekas <chris.gekas@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index c3c8eaff56dfd..5fb2fc2d0080b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1167,15 +1167,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	int rc = 0;
 	int resp_buftype;
 	int unc_path_len;
-	struct TCP_Server_Info *server;
 	__le16 *unc_path = NULL;
 	int flags = 0;
 
 	cifs_dbg(FYI, "TCON\n");
 
-	if ((ses->server) && tree)
-		server = ses->server;
-	else
+	if (!(ses->server) || !tree)
 		return -EIO;
 
 	unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL);
@@ -1294,15 +1291,12 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
 {
 	struct smb2_tree_disconnect_req *req; /* response is trivial */
 	int rc = 0;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	int flags = 0;
 
 	cifs_dbg(FYI, "Tree Disconnect\n");
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
@@ -1794,7 +1788,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	struct smb2_ioctl_req *req;
 	struct smb2_ioctl_rsp *rsp;
 	struct smb2_sync_hdr *shdr;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct kvec iov[2];
 	struct kvec rsp_iov;
@@ -1817,9 +1810,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	else
 		return -EIO;
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req);
@@ -1977,7 +1968,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 {
 	struct smb2_close_req *req;
 	struct smb2_close_rsp *rsp;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	struct kvec iov[1];
 	struct kvec rsp_iov;
@@ -1987,9 +1977,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 
 	cifs_dbg(FYI, "Close\n");
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	rc = small_smb2_init(SMB2_CLOSE, tcon, (void **) &req);
@@ -2091,15 +2079,12 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
 	struct kvec rsp_iov;
 	int rc = 0;
 	int resp_buftype;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	int flags = 0;
 
 	cifs_dbg(FYI, "Query Info\n");
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req);
@@ -2311,7 +2296,6 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	   u64 volatile_fid)
 {
 	struct smb2_flush_req *req;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	struct kvec iov[1];
 	struct kvec rsp_iov;
@@ -2321,9 +2305,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 
 	cifs_dbg(FYI, "Flush\n");
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req);
@@ -3011,13 +2993,10 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 	int rc = 0;
 	int resp_buftype;
 	unsigned int i;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	int flags = 0;
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	if (!num)
-- 
GitLab


From d1438ad8f3eec7207618b8e01f9f3eec7b6f67c4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 7 Jul 2017 18:08:25 -0700
Subject: [PATCH 0987/1958] nvme_fc/nvmet_fc: revise Create Association
 descriptor length

Revises the Create Association LS for the amount of pad expected in 1.16.

Add defines for the minimum lengths that a target can accept (e.g. variable
pad lengths)

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/nvme-fc.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index bc711a10be05c..21c37e39e41a2 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -17,6 +17,7 @@
 
 /*
  * This file contains definitions relative to FC-NVME r1.14 (16-020vB).
+ * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content.
  */
 
 #ifndef _NVME_FC_H
@@ -193,9 +194,21 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
 	uuid_t	hostid;
 	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
 	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
-	u8	rsvd632[384];
+	__be32	rsvd584[108];		/* pad to 1016 bytes,
+					 * which makes overall LS rqst
+					 * payload 1024 bytes
+					 */
 };
 
+#define FCNVME_LSDESC_CRA_CMD_DESC_MINLEN	\
+		offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, rsvd584)
+
+#define FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN	\
+		(FCNVME_LSDESC_CRA_CMD_DESC_MINLEN - \
+		 offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, ersp_ratio))
+
+
+
 /* FCNVME_LSDESC_CREATE_CONN_CMD */
 struct fcnvme_lsdesc_cr_conn_cmd {
 	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
@@ -273,6 +286,14 @@ struct fcnvme_ls_cr_assoc_rqst {
 	struct fcnvme_lsdesc_cr_assoc_cmd	assoc_cmd;
 };
 
+#define FCNVME_LSDESC_CRA_RQST_MINLEN	\
+		(offsetof(struct fcnvme_ls_cr_assoc_rqst, assoc_cmd) + \
+			FCNVME_LSDESC_CRA_CMD_DESC_MINLEN)
+
+#define FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN	\
+		FCNVME_LSDESC_CRA_CMD_DESC_MINLEN
+
+
 struct fcnvme_ls_cr_assoc_acc {
 	struct fcnvme_ls_acc_hdr		hdr;
 	struct fcnvme_lsdesc_assoc_id		associd;
-- 
GitLab


From 4cb7ca8073e1f226487168155c9a887079d605e4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 7 Jul 2017 18:08:26 -0700
Subject: [PATCH 0988/1958] nvmet_fc: Accept variable pad lengths on Create
 Association LS

Target validation of the Create Association LS revised to accept any
LS as long as all non-pad data has been received. This allows a (newer)
target to accept the LS from older initiators with varying pad lengths.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/fc.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 7692a96c9065b..1e6dcc241b3cf 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1164,18 +1164,24 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 
 	memset(acc, 0, sizeof(*acc));
 
-	if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_assoc_rqst))
+	/*
+	 * FC-NVME spec changes. There are initiators sending different
+	 * lengths as padding sizes for Create Association Cmd descriptor
+	 * was incorrect.
+	 * Accept anything of "minimum" length. Assume format per 1.15
+	 * spec (with HOSTID reduced to 16 bytes), ignore how long the
+	 * trailing pad length is.
+	 */
+	if (iod->rqstdatalen < FCNVME_LSDESC_CRA_RQST_MINLEN)
 		ret = VERR_CR_ASSOC_LEN;
-	else if (rqst->desc_list_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_ls_cr_assoc_rqst)))
+	else if (rqst->desc_list_len <
+			cpu_to_be32(FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN))
 		ret = VERR_CR_ASSOC_RQST_LEN;
 	else if (rqst->assoc_cmd.desc_tag !=
 			cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD))
 		ret = VERR_CR_ASSOC_CMD;
-	else if (rqst->assoc_cmd.desc_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)))
+	else if (rqst->assoc_cmd.desc_len <
+			cpu_to_be32(FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN))
 		ret = VERR_CR_ASSOC_CMD_LEN;
 	else if (!rqst->assoc_cmd.ersp_ratio ||
 		 (be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >=
-- 
GitLab


From 2ee0e4ed5ca24c0642a7b72d75b4fe6dfc0a8db8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 6 Jul 2017 12:26:52 +0300
Subject: [PATCH 0989/1958] nvme-pci: compile warnings in nvme_alloc_host_mem()

"i" should be signed or it could cause a forever loop on the cleanup
path. "size" can be used uninitialized.

Fixes: 87ad72a59a38 ("nvme-pci: implement host memory buffer support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 882ed36771176..73fddf2c99f94 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1582,9 +1582,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
 static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
 {
 	struct nvme_host_mem_buf_desc *descs;
-	u32 chunk_size, max_entries, i = 0;
+	u32 chunk_size, max_entries;
+	int i = 0;
 	void **bufs;
-	u64 size, tmp;
+	u64 size = 0, tmp;
 
 	/* start big and work our way down */
 	chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
-- 
GitLab


From 7581d5ca2bb269cfc2ce2d0cb489aac513167f6b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 22 Jun 2017 17:02:11 +0100
Subject: [PATCH 0990/1958] drm/i915/fbdev: Check for existence of ifbdev->vma
 before operations

Commit fabef825626d ("drm/i915: Drop struct_mutex around frontbuffer
flushes") adds a dependency to ifbdev->vma when flushing the framebufer,
but the checks are only against the existence of the ifbdev->fb and not
against ifbdev->vma. This leaves a window of opportunity where we may
try to operate on the fbdev prior to it being probed (thanks to
asynchronous booting).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101534
Fixes: fabef825626d ("drm/i915: Drop struct_mutex around frontbuffer flushes")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170622160211.783-1-chris@chris-wilson.co.uk
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: stable@vger.kernel.org
(cherry picked from commit 15727ed0d944ce1dec8b9e1082dd3df29a0fdf44)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_fbdev.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 03347c6ae5993..0c4cde6b2e6fc 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -535,13 +535,14 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
 
 	drm_fb_helper_fini(&ifbdev->helper);
 
-	if (ifbdev->fb) {
+	if (ifbdev->vma) {
 		mutex_lock(&ifbdev->helper.dev->struct_mutex);
 		intel_unpin_fb_vma(ifbdev->vma);
 		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
+	}
 
+	if (ifbdev->fb)
 		drm_framebuffer_remove(&ifbdev->fb->base);
-	}
 
 	kfree(ifbdev);
 }
@@ -765,7 +766,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
 	struct intel_fbdev *ifbdev = dev_priv->fbdev;
 	struct fb_info *info;
 
-	if (!ifbdev || !ifbdev->fb)
+	if (!ifbdev || !ifbdev->vma)
 		return;
 
 	info = ifbdev->helper.fbdev;
@@ -812,7 +813,7 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev)
 {
 	struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
 
-	if (ifbdev && ifbdev->fb)
+	if (ifbdev && ifbdev->vma)
 		drm_fb_helper_hotplug_event(&ifbdev->helper);
 }
 
@@ -824,7 +825,7 @@ void intel_fbdev_restore_mode(struct drm_device *dev)
 		return;
 
 	intel_fbdev_sync(ifbdev);
-	if (!ifbdev->fb)
+	if (!ifbdev->vma)
 		return;
 
 	if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper) == 0)
-- 
GitLab


From b27c1e683d2c8cd666a042b02096d18237911a37 Mon Sep 17 00:00:00 2001
From: weiping zhang <zhangweiping@didichuxing.com>
Date: Mon, 10 Jul 2017 16:46:59 +0800
Subject: [PATCH 0991/1958] nvme-pci: add module parameter for io queue depth

Adjust io queue depth more easily, and make sure io queue depth >= 2.

Signed-off-by: weiping zhang <zhangweiping@didichuxing.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 73fddf2c99f94..48d3ed3d48d10 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -35,7 +35,6 @@
 
 #include "nvme.h"
 
-#define NVME_Q_DEPTH		1024
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 
@@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444);
 MODULE_PARM_DESC(max_host_mem_size_mb,
 	"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+	.set = io_queue_depth_set,
+	.get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+
 struct nvme_dev;
 struct nvme_queue;
 
@@ -104,6 +113,17 @@ struct nvme_dev {
 	void **host_mem_desc_bufs;
 };
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+	int n = 0, ret;
+
+	ret = kstrtoint(val, 10, &n);
+	if (ret != 0 || n < 2)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
 static inline unsigned int sq_idx(unsigned int qid, u32 stride)
 {
 	return qid * 2 * stride;
@@ -1893,7 +1913,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 
 	dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
-				NVME_Q_DEPTH);
+				io_queue_depth);
 	dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
 	dev->dbs = dev->bar + 4096;
 
-- 
GitLab


From 7e988b103d0d52190244517edc76e649071284bb Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 7 Jul 2017 15:49:00 +0200
Subject: [PATCH 0992/1958] KVM: use correct accessor function for
 __kvm_memslots

kvm memslots are protected by srcu and not by rcu. We must use
srcu_dereference_check instead of rcu_dereference_check.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b3ca77a96b2d7..648b34cabb382 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -568,9 +568,8 @@ void kvm_put_kvm(struct kvm *kvm);
 
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
-	return rcu_dereference_check(kvm->memslots[as_id],
-			srcu_read_lock_held(&kvm->srcu)
-			|| lockdep_is_held(&kvm->slots_lock));
+	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
+			lockdep_is_held(&kvm->slots_lock));
 }
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
-- 
GitLab


From 1e2a516e89fc412a754327522ab271b42f99c6b4 Mon Sep 17 00:00:00 2001
From: Balbir Singh <bsingharora@gmail.com>
Date: Thu, 29 Jun 2017 21:57:26 +1000
Subject: [PATCH 0993/1958] powerpc/kexec: Fix radix to hash kexec due to
 IAMR/AMOR

This patch fixes a crash seen while doing a kexec from radix mode to
hash mode. Key 0 is special in hash and used in the RPN by default, we
set the key values to 0 today. In radix mode key 0 is used to control
supervisor<->user access. In hash key 0 is used by default, so the
first instruction after the switch causes a crash on kexec.

Commit 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of
user space") introduced the setting of IAMR and AMOR values to prevent
execution of user mode instructions from supervisor mode. We need to
clean up these SPR's on kexec.

Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user space")
Cc: stable@vger.kernel.org # v4.10+
Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/misc_64.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index c119044cad0d5..8ac0bd2bddb0c 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -614,6 +614,18 @@ _GLOBAL(kexec_sequence)
 	li	r0,0
 	std	r0,16(r1)
 
+BEGIN_FTR_SECTION
+	/*
+	 * This is the best time to turn AMR/IAMR off.
+	 * key 0 is used in radix for supervisor<->user
+	 * protection, but on hash key 0 is reserved
+	 * ideally we want to enter with a clean state.
+	 * NOTE, we rely on r0 being 0 from above.
+	 */
+	mtspr	SPRN_IAMR,r0
+	mtspr	SPRN_AMOR,r0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
 	/* save regs for local vars on new stack.
 	 * yes, we won't go back, but ...
 	 */
-- 
GitLab


From 1c0eaf0f56d6128af7f0f252855173fcee85d202 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 30 Jun 2017 17:37:32 -0500
Subject: [PATCH 0994/1958] powerpc/powernv: Tell OPAL about our MMU mode on
 POWER9

That will allow OPAL to configure the CPU in an optimal way.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/opal-api.h    |  9 +++++++++
 arch/powerpc/platforms/powernv/opal.c  | 19 +++++++++++++++++--
 arch/powerpc/platforms/powernv/setup.c | 11 ++++++++++-
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index ef930ba500f94..3130a73652c70 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -876,6 +876,15 @@ struct OpalIoPhb4ErrorData {
 enum {
 	OPAL_REINIT_CPUS_HILE_BE	= (1 << 0),
 	OPAL_REINIT_CPUS_HILE_LE	= (1 << 1),
+
+	/* These two define the base MMU mode of the host on P9
+	 *
+	 * On P9 Nimbus DD2.0 and Cumlus (and later), KVM can still
+	 * create hash guests in "radix" mode with care (full core
+	 * switch only).
+	 */
+	OPAL_REINIT_CPUS_MMU_HASH	= (1 << 2),
+	OPAL_REINIT_CPUS_MMU_RADIX	= (1 << 3),
 };
 
 typedef struct oppanel_line {
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 59684b4af4d1d..9b87abb178f02 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -59,6 +59,8 @@ static struct task_struct *kopald_tsk;
 
 void opal_configure_cores(void)
 {
+	u64 reinit_flags = 0;
+
 	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
 	 *
 	 * It will preserve non volatile GPRs and HSPRG0/1. It will
@@ -66,11 +68,24 @@ void opal_configure_cores(void)
 	 * but it might clobber a bunch.
 	 */
 #ifdef __BIG_ENDIAN__
-	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
+	reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
 #else
-	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
+	reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
 #endif
 
+	/*
+	 * POWER9 always support running hash:
+	 *  ie. Host hash  supports  hash guests
+	 *      Host radix supports  hash/radix guests
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
+		if (early_radix_enabled())
+			reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
+	}
+
+	opal_reinit_cpus(reinit_flags);
+
 	/* Restore some bits */
 	if (cur_cpu_spec->cpu_restore)
 		cur_cpu_spec->cpu_restore();
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 2dc7e5fb86c33..897aa1400eb83 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -225,6 +225,8 @@ static void pnv_kexec_wait_secondaries_down(void)
 
 static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 {
+	u64 reinit_flags;
+
 	if (xive_enabled())
 		xive_kexec_teardown_cpu(secondary);
 	else
@@ -254,8 +256,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 		 * We might be running as little-endian - now that interrupts
 		 * are disabled, reset the HILE bit to big-endian so we don't
 		 * take interrupts in the wrong endian later
+		 *
+		 * We reinit to enable both radix and hash on P9 to ensure
+		 * the mode used by the next kernel is always supported.
 		 */
-		opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
+		reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
+				OPAL_REINIT_CPUS_MMU_HASH;
+		opal_reinit_cpus(reinit_flags);
 	}
 }
 #endif /* CONFIG_KEXEC_CORE */
-- 
GitLab


From c6bb0b8d426a8cf865ca9c8a532cc3a2927cfceb Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sat, 8 Jul 2017 07:45:32 -0500
Subject: [PATCH 0995/1958] powerpc/mm/radix: Properly clear process table
 entry

On radix, the process table entry we want to clear when destroying a
context is entry 0, not entry 1. This has no *immediate* consequence
on Power9, but it can cause other bugs to become worse.

Fixes: 7e381c0ff618 ("powerpc/mm/radix: Add mmu context handling callback for radix")
Cc: stable@vger.kernel.org # v4.7+
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_book3s64.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 71de2c6d88f34..ceff76262c7e8 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -223,9 +223,15 @@ void destroy_context(struct mm_struct *mm)
 	mm->context.cop_lockp = NULL;
 #endif /* CONFIG_PPC_ICSWX */
 
-	if (radix_enabled())
-		process_tb[mm->context.id].prtb1 = 0;
-	else
+	if (radix_enabled()) {
+		/*
+		 * Radix doesn't have a valid bit in the process table
+		 * entries. However we know that at least P9 implementation
+		 * will avoid caching an entry with an invalid RTS field,
+		 * and 0 is invalid. So this will do.
+		 */
+		process_tb[mm->context.id].prtb0 = 0;
+	} else
 		subpage_prot_free(mm);
 	destroy_pagetable_page(mm);
 	__destroy_context(mm->context.id);
-- 
GitLab


From 3a6a04706fd08eb5677fdfc086e26fcd5eb154f4 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 7 Jul 2017 16:12:16 -0500
Subject: [PATCH 0996/1958] powerpc/mm/radix: Synchronize updates to the
 process table

When writing to the process table, we need to ensure the store is
visible to a subsequent access by the MMU. We assume we never have
the PID active while doing the update, so a ptesync/isync pair
should hopefully be a big enough hammer for our purpose.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_book3s64.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index ceff76262c7e8..abed1fe6992fa 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -138,6 +138,14 @@ static int radix__init_new_context(struct mm_struct *mm)
 	rts_field = radix__get_tree_size();
 	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
 
+	/*
+	 * Order the above store with subsequent update of the PID
+	 * register (at which point HW can start loading/caching
+	 * the entry) and the corresponding load by the MMU from
+	 * the L2 cache.
+	 */
+	asm volatile("ptesync;isync" : : : "memory");
+
 	mm->context.npu_context = NULL;
 
 	return index;
-- 
GitLab


From c43aeb198048f64abda8655fdcdebe71cf1877ba Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 10 Jul 2017 07:40:49 -0400
Subject: [PATCH 0997/1958] fix brown paperbag bug in inlined copy_..._iter()

"copied nothing" == "return 0", not "return full size".

Fixes: aa28de275a24 "iov_iter/hardening: move object size checks to inlined part"
Spotted-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 342d2dc225b95..8a642cda641c8 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -103,7 +103,7 @@ static __always_inline __must_check
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, true)))
-		return bytes;
+		return 0;
 	else
 		return _copy_to_iter(addr, bytes, i);
 }
@@ -112,7 +112,7 @@ static __always_inline __must_check
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter(addr, bytes, i);
 }
@@ -130,7 +130,7 @@ static __always_inline __must_check
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter_nocache(addr, bytes, i);
 }
@@ -160,7 +160,7 @@ static __always_inline __must_check
 size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter_flushcache(addr, bytes, i);
 }
-- 
GitLab


From 42a6e0996084972574e0a2b23e7326b78b0f64c5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 10 Jul 2017 13:22:50 +0200
Subject: [PATCH 0998/1958] nvmem: include linux/err.h from header

The new support for nvmem devices from the rtc layer caused a build
error in some configurations:

include/linux/nvmem-provider.h: In function 'nvmem_register':
include/linux/nvmem-provider.h:51:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration]

This adds the missing include to ensure we can always include
the header.

Fixes: 697e5a47aa12 ("rtc: add generic nvmem support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/nvmem-provider.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index cd93416d762ed..497706f5adcac 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -12,6 +12,9 @@
 #ifndef _LINUX_NVMEM_PROVIDER_H
 #define _LINUX_NVMEM_PROVIDER_H
 
+#include <linux/err.h>
+#include <linux/errno.h>
+
 struct nvmem_device;
 struct nvmem_cell_info;
 typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset,
-- 
GitLab


From b49defe83659cefbb1763d541e779da32594ab10 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 30 Jun 2017 13:25:45 +0200
Subject: [PATCH 0999/1958] kvm: avoid unused variable warning for UP builds

The uniprocessor version of smp_call_function_many does not evaluate
all of its argument, and the compiler emits a warning about "wait"
being unused.  This breaks the build on architectures for which
"-Werror" is enabled by default.

Work around it by moving the invocation of smp_call_function_many to
its own inline function.

Reported-by: Paul Mackerras <paulus@ozlabs.org>
Cc: stable@vger.kernel.org
Fixes: 7a97cec26b94c909f4cbad2dc3186af3e457a522
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 19f0ecb9b93e2..0d796c9a64824 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -187,12 +187,23 @@ static void ack_flush(void *_completed)
 {
 }
 
+static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
+{
+	if (unlikely(!cpus))
+		cpus = cpu_online_mask;
+
+	if (cpumask_empty(cpus))
+		return false;
+
+	smp_call_function_many(cpus, ack_flush, NULL, wait);
+	return true;
+}
+
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
 	cpumask_var_t cpus;
-	bool called = true;
-	bool wait = req & KVM_REQUEST_WAIT;
+	bool called;
 	struct kvm_vcpu *vcpu;
 
 	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
@@ -207,14 +218,9 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
 		if (cpus != NULL && cpu != -1 && cpu != me &&
 		    kvm_request_needs_ipi(vcpu, req))
-			cpumask_set_cpu(cpu, cpus);
+			__cpumask_set_cpu(cpu, cpus);
 	}
-	if (unlikely(cpus == NULL))
-		smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait);
-	else if (!cpumask_empty(cpus))
-		smp_call_function_many(cpus, ack_flush, NULL, wait);
-	else
-		called = false;
+	called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
 	put_cpu();
 	free_cpumask_var(cpus);
 	return called;
-- 
GitLab


From 70bcd708dfd1de453905212c9c5c755b1844772b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 5 Jul 2017 12:38:06 +0200
Subject: [PATCH 1000/1958] KVM: vmx: expose more information for
 KVM_INTERNAL_ERROR_DELIVERY_EV exits

This exit ended up being reported, but the currently exposed data does not provide
much of a starting point for debugging.  In the reported case, the vmexit was
an EPT misconfiguration (MMIO access).  Let userspace report ethe exit qualification
and, if relevant, the GPA.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f76efad248aba..7592a18ecc1ca 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8418,9 +8418,15 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 			exit_reason != EXIT_REASON_TASK_SWITCH)) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
-		vcpu->run->internal.ndata = 2;
+		vcpu->run->internal.ndata = 3;
 		vcpu->run->internal.data[0] = vectoring_info;
 		vcpu->run->internal.data[1] = exit_reason;
+		vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
+		if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
+			vcpu->run->internal.ndata++;
+			vcpu->run->internal.data[3] =
+				vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+		}
 		return 0;
 	}
 
-- 
GitLab


From ff0fa73247e442518936baa43c3f037b17f10fa7 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Tue, 4 Jul 2017 22:33:07 -0500
Subject: [PATCH 1001/1958] btrfs: nowait aio: Correct assignment of pos

Assigning pos for usage early messes up in append mode, where the pos is
re-assigned in generic_write_checks(). Assign pos later to get the
correct position to write from iocb->ki_pos.

Since check_can_nocow also uses the value of pos, we shift
generic_write_checks() before check_can_nocow(). Checks with IOCB_DIRECT
are present in generic_write_checks(), so checking for IOCB_NOWAIT is
enough.

Also, put locking sequence in the fast path.

This fixes a user visible bug, as reported:

"apparently breaks several shell related features on my system.
In zsh history stopped working, because no new entries are added
anymore.
I fist noticed the issue when I tried to build mplayer. It uses a shell
script to generate a help_mp.h file:
[...]

Here is a simple testcase:

 % echo "foo" >> test
 % echo "foo" >> test
 % cat test
 foo
 %
"

Fixes: edf064e7c6fe ("btrfs: nowait aio support")
CC: Jens Axboe <axboe@kernel.dk>
Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Link: https://lkml.kernel.org/r/20170704042306.GA274@x4
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 59e2dccdf75b7..ad53832838b50 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1875,16 +1875,25 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	ssize_t num_written = 0;
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 	ssize_t err;
-	loff_t pos = iocb->ki_pos;
+	loff_t pos;
 	size_t count = iov_iter_count(from);
 	loff_t oldsize;
 	int clean_page = 0;
 
-	if ((iocb->ki_flags & IOCB_NOWAIT) &&
-			(iocb->ki_flags & IOCB_DIRECT)) {
-		/* Don't sleep on inode rwsem */
-		if (!inode_trylock(inode))
+	if (!inode_trylock(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
 			return -EAGAIN;
+		inode_lock(inode);
+	}
+
+	err = generic_write_checks(iocb, from);
+	if (err <= 0) {
+		inode_unlock(inode);
+		return err;
+	}
+
+	pos = iocb->ki_pos;
+	if (iocb->ki_flags & IOCB_NOWAIT) {
 		/*
 		 * We will allocate space in case nodatacow is not set,
 		 * so bail
@@ -1895,13 +1904,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 			inode_unlock(inode);
 			return -EAGAIN;
 		}
-	} else
-		inode_lock(inode);
-
-	err = generic_write_checks(iocb, from);
-	if (err <= 0) {
-		inode_unlock(inode);
-		return err;
 	}
 
 	current->backing_dev_info = inode_to_bdi(inode);
-- 
GitLab


From abe594c2cf55b46d4feb61c01fe9063afff2e50c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 9 Jul 2017 21:31:02 +0200
Subject: [PATCH 1002/1958] ALSA: pcm: Protect call to dma_mmap_coherent() by
 check for HAS_DMA

If NO_DMA=y:

    sound/core/pcm_native.o: In function `snd_pcm_lib_default_mmap':
    pcm_native.c:(.text+0x144c): undefined reference to `bad_dma_ops'
    pcm_native.c:(.text+0x1474): undefined reference to `dma_common_mmap'

Add a check for HAS_DMA to fix this.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/pcm_native.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 0d18343105313..b26c7e61d6004 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3502,7 +3502,7 @@ int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream,
 	}
 #endif /* CONFIG_GENERIC_ALLOCATOR */
 #ifndef CONFIG_X86 /* for avoiding warnings arch/x86/mm/pat.c */
-	if (!substream->ops->page &&
+	if (IS_ENABLED(CONFIG_HAS_DMA) && !substream->ops->page &&
 	    substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV)
 		return dma_mmap_coherent(substream->dma_buffer.dev.dev,
 					 area,
-- 
GitLab


From 85dc0f8554fa024b02eb50dcca71fa9881a45ea5 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 10 Jul 2017 16:05:58 +0200
Subject: [PATCH 1003/1958] ALSA: pcm: Simplify check for dma_mmap_coherent()
 availability

We check the availability of dma_mmap_coherent() in hw_support_mmap()
but with an ugly ifdef of lots of arch-checks.  Now we have a nice
CONFIG_ARCH_NO_COHERENT_DMA_MMAP kconfig, and this can be used
together with CONFIG_HAS_DMA check for a cleaner and more
comprehensive check.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/pcm_native.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index b26c7e61d6004..2a730f38e3756 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -238,10 +238,8 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream)
 {
 	if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
 		return false;
-	/* check architectures that return -EINVAL from dma_mmap_coherent() */
-	/* FIXME: this should be some global flag */
-#if defined(CONFIG_C6X) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) ||\
-	defined(CONFIG_PARISC) || defined(CONFIG_XTENSA)
+	/* architecture supports dma_mmap_coherent()? */
+#if defined(CONFIG_ARCH_NO_COHERENT_DMA_MMAP) || !defined(CONFIG_HAS_DMA)
 	if (!substream->ops->mmap &&
 	    substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV)
 		return false;
-- 
GitLab


From f01fc4177352614743ce3fe289687387fcf9e6c6 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 10 Jul 2017 13:34:08 +0100
Subject: [PATCH 1004/1958] ARM/PCI: Fix pcibios_init_resource() struct
 pci_host_bridge leak

Since commit 97ad2bdcbe85 ("ARM/PCI: Convert PCI scan API to
pci_scan_root_bus_bridge()") the space for struct pci_sys_data is allocated
by pci_alloc_host_bridge() as part of the struct pci_host_bridge.

Therefore, failure paths must deallocate the entire pci_host_bridge by
using pci_free_host_bridge().

Fixes: 97ad2bdcbe85 ("ARM/PCI: Convert PCI scan API to pci_scan_root_bus_bridge()")
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
[bhelgaas: changelog]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andrew Lunn <andrew@lunn.ch>
---
 arch/arm/kernel/bios32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 56dc1a3a33b43..c1809fb549ddd 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -480,7 +480,7 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
 
 			ret = pcibios_init_resource(nr, sys, hw->io_optional);
 			if (ret)  {
-				kfree(sys);
+				pci_free_host_bridge(bridge);
 				break;
 			}
 
-- 
GitLab


From c2f30f08c115a6bb9061afb4ba314f7e41f20686 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 10 Jul 2017 17:24:02 +0300
Subject: [PATCH 1005/1958] nvmet: avoid unneeded assignment of submit_bio
 return value

We actually using the cookie returned from the last submit_bio
call.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/io-cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 40128793e6135..3b4d47a6abdb8 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -85,7 +85,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
 			bio_set_op_attrs(bio, op, op_flags);
 
 			bio_chain(bio, prev);
-			cookie = submit_bio(prev);
+			submit_bio(prev);
 		}
 
 		sector += sg->length >> 9;
-- 
GitLab


From 4aaf7694f841edc96fe0f72958aabe59204b3611 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Tue, 4 Jul 2017 11:20:30 +0800
Subject: [PATCH 1006/1958] md/bitmap: don't read page from device with
 Bitmap_sync

The device owns Bitmap_sync flag needs recovery
to become in sync, and read page from this type
device could get stale status.

Also add comments for Bitmap_sync bit per the
suggestion from Shaohua and Neil.

Previous disscussion can be found here:
https://marc.info/?t=149760428900004&r=1&w=2

Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/bitmap.c | 3 ++-
 drivers/md/md.h     | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index f4eace5ea1840..40f3cd7eab0fc 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -156,7 +156,8 @@ static int read_sb_page(struct mddev *mddev, loff_t offset,
 
 	rdev_for_each(rdev, mddev) {
 		if (! test_bit(In_sync, &rdev->flags)
-		    || test_bit(Faulty, &rdev->flags))
+		    || test_bit(Faulty, &rdev->flags)
+		    || test_bit(Bitmap_sync, &rdev->flags))
 			continue;
 
 		target = offset + index * (PAGE_SIZE/512);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 991f0fe2dcc68..b50eb4ac1b829 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -134,7 +134,9 @@ enum flag_bits {
 	Faulty,			/* device is known to have a fault */
 	In_sync,		/* device is in_sync with rest of array */
 	Bitmap_sync,		/* ..actually, not quite In_sync.  Need a
-				 * bitmap-based recovery to get fully in sync
+				 * bitmap-based recovery to get fully in sync.
+				 * The bit is only meaningful before device
+				 * has been passed to pers->hot_add_disk.
 				 */
 	WriteMostly,		/* Avoid reading if at all possible */
 	AutoDetected,		/* added by auto-detect */
-- 
GitLab


From b5d27718f38843a74552e9a93d32e2391fd3999f Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Wed, 5 Jul 2017 17:34:04 +0800
Subject: [PATCH 1007/1958] Raid5 should update rdev->sectors after reshape

The raid5 md device is created by the disks which we don't use the total size. For example,
the size of the device is 5G and it just uses 3G of the devices to create one raid5 device.
Then change the chunksize and wait reshape to finish. After reshape finishing stop the raid
and assemble it again. It fails.
mdadm -CR /dev/md0 -l5 -n3 /dev/loop[0-2] --size=3G --chunk=32 --assume-clean
mdadm /dev/md0 --grow --chunk=64
wait reshape to finish
mdadm -S /dev/md0
mdadm -As
The error messages:
[197519.814302] md: loop1 does not have a valid v1.2 superblock, not importing!
[197519.821686] md: md_import_device returned -22

After reshape the data offset is changed. It selects backwards direction in this condition.
In function super_1_load it compares the available space of the underlying device with
sb->data_size. The new data offset gets bigger after reshape. So super_1_load returns -EINVAL.
rdev->sectors is updated in md_finish_reshape. Then sb->data_size is set in super_1_sync based
on rdev->sectors. So add md_finish_reshape in end_reshape.

Signed-off-by: Xiao Ni <xni@redhat.com>
Acked-by: Guoqing Jiang <gqjiang@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2ceb338b094b2..aeeb8d6854e26 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7951,12 +7951,10 @@ static void end_reshape(struct r5conf *conf)
 {
 
 	if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
-		struct md_rdev *rdev;
 
 		spin_lock_irq(&conf->device_lock);
 		conf->previous_raid_disks = conf->raid_disks;
-		rdev_for_each(rdev, conf->mddev)
-			rdev->data_offset = rdev->new_data_offset;
+		md_finish_reshape(conf->mddev);
 		smp_wmb();
 		conf->reshape_progress = MaxSector;
 		conf->mddev->reshape_position = MaxSector;
-- 
GitLab


From 6f6e0b217a93011f8e11b9a2d5521a08fcf36990 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Thu, 6 Jul 2017 16:58:33 -0500
Subject: [PATCH 1008/1958] drm/rockchip: fix NULL check on devm_kzalloc()
 return value

The right variable to check here is port, not dp.

This issue was detected using Coccinelle and the following semantic patch:

@@
expression x;
identifier fld;
@@

* x = devm_kzalloc(...);
  ... when != x == NULL
  x->fld

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Acked-by: Mark Yao <mark.yao@rock-chips.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170706215833.GA25411@embeddedgus
---
 drivers/gpu/drm/rockchip/cdn-dp-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index 14fa1f8351e8d..9b0b0588bbedb 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -1195,7 +1195,7 @@ static int cdn_dp_probe(struct platform_device *pdev)
 			continue;
 
 		port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
-		if (!dp)
+		if (!port)
 			return -ENOMEM;
 
 		port->extcon = extcon;
-- 
GitLab


From de92cd6cf4899c0876a8f5519769a786ad7cdeea Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 10 Jul 2017 11:37:51 +0200
Subject: [PATCH 1009/1958] net/mlx5: IPSec, fix 64-bit division correctly

The new IPSec offload code introduced a build error:

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.o: In function `mlx5e_ipsec_build_inverse_table':
ipsec_rxtx.c:(.text+0x556): undefined reference

Another patch was added on top to fix the build error, but
that introduced a new bug, as we now use the remainder of
the division rather than the result.

This makes it use the correct helper function instead.

Fixes: 5dfd87b67cd9 ("net/mlx5: IPSec, Fix 64-bit division on 32-bit builds")
Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 7d06c673851a5..4614ddfa91ebc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -363,7 +363,6 @@ void mlx5e_ipsec_build_inverse_table(void)
 {
 	u16 mss_inv;
 	u32 mss;
-	u64 n;
 
 	/* Calculate 1/x inverse table for use in GSO data path.
 	 * Using this table, we provide the IPSec accelerator with the value of
@@ -373,8 +372,7 @@ void mlx5e_ipsec_build_inverse_table(void)
 	 */
 	mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
 	for (mss = 2; mss < MAX_LSO_MSS; mss++) {
-		n = 1ULL << 32;
-		mss_inv = do_div(n, mss) >> 16;
+		mss_inv = div_u64(1ULL << 32, mss) >> 16;
 		mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
 	}
 }
-- 
GitLab


From 7cee9384cb3e25de33d75ecdbf08bb15b4ea9fa5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 10 Jul 2017 11:40:19 -0700
Subject: [PATCH 1010/1958] Fix up over-eager 'wait_queue_t' renaming

Commit ac6424b981bc ("sched/wait: Rename wait_queue_t =>
wait_queue_entry_t") had scripted the renaming incorrectly, and didn't
actually check that the 'wait_queue_t' was a full token.

As a result, it also triggered on 'wait_queue_token', and renamed that
to 'wait_queue_entry_token' entry in the autofs4 packet structure
definition too.  That was entirely incorrect, and not intended.

The end result built fine when building just the kernel - because
everything had been renamed consistently there - but caused problems in
user space because the "struct autofs_packet_missing" type is exported
as part of the uapi.

This scripts it all back again:

    git grep -lw wait_queue_entry_token |
        xargs sed -i 's/wait_queue_entry_token/wait_queue_token/g'

and checks the end result.

Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Fixes: ac6424b981bc ("sched/wait: Rename wait_queue_t => wait_queue_entry_t")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/autofs4.txt | 12 ++++++------
 fs/autofs4/autofs_i.h                 |  2 +-
 fs/autofs4/waitq.c                    | 18 +++++++++---------
 include/uapi/linux/auto_fs.h          |  4 ++--
 include/uapi/linux/auto_fs4.h         |  4 ++--
 5 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt
index 8444dc3d57e87..f10dd590f69fe 100644
--- a/Documentation/filesystems/autofs4.txt
+++ b/Documentation/filesystems/autofs4.txt
@@ -316,7 +316,7 @@ For version 5, the format of the message is:
         struct autofs_v5_packet {
                 int proto_version;                /* Protocol version */
                 int type;                        /* Type of packet */
-                autofs_wqt_t wait_queue_entry_token;
+                autofs_wqt_t wait_queue_token;
                 __u32 dev;
                 __u64 ino;
                 __u32 uid;
@@ -341,12 +341,12 @@ The pipe will be set to "packet mode" (equivalent to passing
 `O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
 most one packet, and any unread portion of a packet will be discarded.
 
-The `wait_queue_entry_token` is a unique number which can identify a
+The `wait_queue_token` is a unique number which can identify a
 particular request to be acknowledged.  When a message is sent over
 the pipe the affected dentry is marked as either "active" or
 "expiring" and other accesses to it block until the message is
 acknowledged using one of the ioctls below and the relevant
-`wait_queue_entry_token`.
+`wait_queue_token`.
 
 Communicating with autofs: root directory ioctls
 ------------------------------------------------
@@ -358,7 +358,7 @@ capability, or must be the automount daemon.
 The available ioctl commands are:
 
 - **AUTOFS_IOC_READY**: a notification has been handled.  The argument
-    to the ioctl command is the "wait_queue_entry_token" number
+    to the ioctl command is the "wait_queue_token" number
     corresponding to the notification being acknowledged.
 - **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with
     the error code `ENOENT`.
@@ -382,14 +382,14 @@ The available ioctl commands are:
         struct autofs_packet_expire_multi {
                 int proto_version;              /* Protocol version */
                 int type;                       /* Type of packet */
-                autofs_wqt_t wait_queue_entry_token;
+                autofs_wqt_t wait_queue_token;
                 int len;
                 char name[NAME_MAX+1];
         };
 
      is required.  This is filled in with the name of something
      that can be unmounted or removed.  If nothing can be expired,
-     `errno` is set to `EAGAIN`.  Even though a `wait_queue_entry_token`
+     `errno` is set to `EAGAIN`.  Even though a `wait_queue_token`
      is present in the structure, no "wait queue" is established
      and no acknowledgment is needed.
 - **AUTOFS_IOC_EXPIRE_MULTI**:  This is similar to
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 974f5346458a1..beef981aa54f3 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -83,7 +83,7 @@ struct autofs_info {
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
 	struct autofs_wait_queue *next;
-	autofs_wqt_t wait_queue_entry_token;
+	autofs_wqt_t wait_queue_token;
 	/* We use the following to see what we are waiting for */
 	struct qstr name;
 	u32 dev;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 7071895b06782..24a58bf9ca72c 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	size_t pktsz;
 
 	pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
-		 (unsigned long) wq->wait_queue_entry_token,
+		 (unsigned long) wq->wait_queue_token,
 		 wq->name.len, wq->name.name, type);
 
 	memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
@@ -120,7 +120,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*mp);
 
-		mp->wait_queue_entry_token = wq->wait_queue_entry_token;
+		mp->wait_queue_token = wq->wait_queue_token;
 		mp->len = wq->name.len;
 		memcpy(mp->name, wq->name.name, wq->name.len);
 		mp->name[wq->name.len] = '\0';
@@ -133,7 +133,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*ep);
 
-		ep->wait_queue_entry_token = wq->wait_queue_entry_token;
+		ep->wait_queue_token = wq->wait_queue_token;
 		ep->len = wq->name.len;
 		memcpy(ep->name, wq->name.name, wq->name.len);
 		ep->name[wq->name.len] = '\0';
@@ -153,7 +153,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*packet);
 
-		packet->wait_queue_entry_token = wq->wait_queue_entry_token;
+		packet->wait_queue_token = wq->wait_queue_token;
 		packet->len = wq->name.len;
 		memcpy(packet->name, wq->name.name, wq->name.len);
 		packet->name[wq->name.len] = '\0';
@@ -428,7 +428,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 			return -ENOMEM;
 		}
 
-		wq->wait_queue_entry_token = autofs4_next_wait_queue;
+		wq->wait_queue_token = autofs4_next_wait_queue;
 		if (++autofs4_next_wait_queue == 0)
 			autofs4_next_wait_queue = 1;
 		wq->next = sbi->queues;
@@ -461,7 +461,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 		}
 
 		pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_token, wq->name.len,
 			 wq->name.name, notify);
 
 		/*
@@ -471,7 +471,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 	} else {
 		wq->wait_ctr++;
 		pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_token, wq->name.len,
 			 wq->name.name, notify);
 		mutex_unlock(&sbi->wq_mutex);
 		kfree(qstr.name);
@@ -550,13 +550,13 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 }
 
 
-int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_entry_token, int status)
+int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
 {
 	struct autofs_wait_queue *wq, **wql;
 
 	mutex_lock(&sbi->wq_mutex);
 	for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
-		if (wq->wait_queue_entry_token == wait_queue_entry_token)
+		if (wq->wait_queue_token == wait_queue_token)
 			break;
 	}
 
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 1953f8d6063b3..aa63451ef20aa 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -26,7 +26,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	AUTOFS_PROTO_VERSION
 
 /*
- * The wait_queue_entry_token (autofs_wqt_t) is part of a structure which is passed
+ * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
  * back to the kernel via ioctl from userspace. On architectures where 32- and
  * 64-bit userspace binaries can be executed it's important that the size of
  * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
@@ -49,7 +49,7 @@ struct autofs_packet_hdr {
 
 struct autofs_packet_missing {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_entry_token;
+	autofs_wqt_t wait_queue_token;
 	int len;
 	char name[NAME_MAX+1];
 };	
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 65b72d0222e7c..7c6da423d54ee 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h
@@ -108,7 +108,7 @@ enum autofs_notify {
 /* v4 multi expire (via pipe) */
 struct autofs_packet_expire_multi {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_entry_token;
+	autofs_wqt_t wait_queue_token;
 	int len;
 	char name[NAME_MAX+1];
 };
@@ -123,7 +123,7 @@ union autofs_packet_union {
 /* autofs v5 common packet struct */
 struct autofs_v5_packet {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_entry_token;
+	autofs_wqt_t wait_queue_token;
 	__u32 dev;
 	__u64 ino;
 	__u32 uid;
-- 
GitLab


From b222dd2fdd53a40dd8f1d3082ae98e52883cce0d Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Mon, 10 Jul 2017 11:40:17 -0700
Subject: [PATCH 1011/1958] block: call bio_uninit in bio_endio

bio_free isn't a good place to free cgroup info. There are a
lot of cases bio is allocated in special way (for example, in stack) and
never gets called by bio_put hence bio_free, we are leaking memory. This
patch moves the free to bio endio, which should be called anyway. The
bio_uninit call in bio_free is kept, in case the bio never gets called
bio endio.

This assumes ->bi_end_io() doesn't access cgroup info, which seems true
in my audit.

This along with Christoph's integrity patch should fix the memory leak
issue.

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index 9cabf5d0be209..9a63597aaaccd 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1833,6 +1833,8 @@ void bio_endio(struct bio *bio)
 	}
 
 	blk_throtl_bio_endio(bio);
+	/* release cgroup info */
+	bio_uninit(bio);
 	if (bio->bi_end_io)
 		bio->bi_end_io(bio);
 }
-- 
GitLab


From d37a369790774af66a4aee61a188384d21b17a43 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 10 Jul 2017 16:08:07 -0300
Subject: [PATCH 1012/1958] perf evsel: Fix attr.exclude_kernel setting for
 default cycles:p

To allow probing the max attr.precise_ip setting for non-root users
we unconditionally set attr.exclude_kernel, which makes the detection
work but should be done only for !root, fix it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 97365e81366f ("perf evsel: Set attr.exclude_kernel when probing max attr.precise_ip")
Link: http://lkml.kernel.org/n/tip-bl6bbxzxloonzvm4nvt7oqgj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 87b4318866704..f2a1876d268ea 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -273,7 +273,7 @@ struct perf_evsel *perf_evsel__new_cycles(void)
 	struct perf_event_attr attr = {
 		.type	= PERF_TYPE_HARDWARE,
 		.config	= PERF_COUNT_HW_CPU_CYCLES,
-		.exclude_kernel	= 1,
+		.exclude_kernel	= geteuid() != 0,
 	};
 	struct perf_evsel *evsel;
 
-- 
GitLab


From ede5626d303b721dd02246a3850380943c24e380 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 10 Jul 2017 16:19:25 -0300
Subject: [PATCH 1013/1958] perf evsel: State in the default event name if
 attr.exclude_kernel is set

When no event is specified perf will use the "cycles" hardware event
with the highest precision available in the processor, and excluding
kernel events for non-root users, so make that clear in the event name
by setting the "u" event modifier, i.e. "cycles:upp".

E.g.:

The default for root:

  # perf record usleep 1
  # perf evlist -v
  cycles:ppp: ..., precise_ip: 3, exclude_kernel: 0, ...
  #

And for !root:

  $ perf record usleep 1
  $ perf evlist -v
  cycles:uppp: ... , precise_ip: 3, exclude_kernel: 1, ...
  $

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-lf29zcdl422i9knrgde0uwy3@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f2a1876d268ea..413f74df08de7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -298,8 +298,10 @@ struct perf_evsel *perf_evsel__new_cycles(void)
 		goto out;
 
 	/* use asprintf() because free(evsel) assumes name is allocated */
-	if (asprintf(&evsel->name, "cycles%.*s",
-		     attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0)
+	if (asprintf(&evsel->name, "cycles%s%s%.*s",
+		     (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
+		     attr.exclude_kernel ? "u" : "",
+		     attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
 		goto error_free;
 out:
 	return evsel;
-- 
GitLab


From 80f62589fa52f530cffc50e78c0b5a2ae572d61e Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Thu, 8 Jun 2017 14:01:44 +0800
Subject: [PATCH 1014/1958] perf annotate: Fix broken arrow at row 0 connecting
 jmp instruction to its target
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the jump instruction is displayed at the row 0 in annotate view,
the arrow is broken. An example:

 16.86 │   ┌──je     82
  0.01 │      movsd  (%rsp),%xmm0
       │      movsd  0x8(%rsp),%xmm4
       │      movsd  0x8(%rsp),%xmm1
       │      movsd  (%rsp),%xmm3
       │      divsd  %xmm4,%xmm0
       │      divsd  %xmm3,%xmm1
       │      movsd  (%rsp),%xmm2
       │      addsd  %xmm1,%xmm0
       │      addsd  %xmm2,%xmm0
       │      movsd  %xmm0,(%rsp)
       │82:   sub    $0x1,%ebx
 83.03 │    ↑ jne    38
       │      add    $0x10,%rsp
       │      xor    %eax,%eax
       │      pop    %rbx
       │    ← retq

The patch increments the row number before checking with 0.

Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Fixes: 944e1abed9e1 ("perf ui browser: Add method to draw up/down arrow line")
Link: http://lkml.kernel.org/r/1496901704-30275-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index a4d3762cd8250..83874b0e266c5 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -704,7 +704,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser,
 		ui_browser__gotorc(browser, row, column + 1);
 		SLsmg_draw_hline(2);
 
-		if (row++ == 0)
+		if (++row == 0)
 			goto out;
 	} else
 		row = 0;
-- 
GitLab


From 94df7fe0d3e6418113783ff646507287c233e1e3 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 7 Jul 2017 11:17:31 +0200
Subject: [PATCH 1015/1958] um: v2: Use generic NOTES macro

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/asm/common.lds.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index 133055311dce1..9e6d5997cfc45 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -15,7 +15,7 @@
   PROVIDE (_unprotected_end = .);
 
   . = ALIGN(4096);
-  .note : { *(.note.*) }
+  NOTES
   EXCEPTION_TABLE(0)
 
   BUG_TABLE
-- 
GitLab


From 61e8d462457f202bf0c6393133425ad387825e22 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Thu, 6 Jul 2017 09:35:27 +0200
Subject: [PATCH 1016/1958] um: Correctly check for PTRACE_GETRESET/SETREGSET

When checking for PTRACE_GETRESET/SETREGSET, make sure that
the correct header file is included. We need linux/ptrace.h
which contains all ptrace UAPI related defines.
Otherwise #if defined(PTRACE_GETRESET) is always false.

Cc: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/user-offsets.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index 8af0fb5d27807..ae4cd58c0c7a4 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -5,7 +5,7 @@
 #include <sys/mman.h>
 #include <sys/user.h>
 #define __FRAME_OFFSETS
-#include <asm/ptrace.h>
+#include <linux/ptrace.h>
 #include <asm/types.h>
 
 #ifdef __i386__
-- 
GitLab


From 3c778a7fcfaa4bb51aefb4f2c7364a5e2c4d0b66 Mon Sep 17 00:00:00 2001
From: Benson Leung <bleung@chromium.org>
Date: Wed, 16 Nov 2016 10:19:25 -0800
Subject: [PATCH 1017/1958] platform/chrome : Add myself as Maintainer

I'll be taking over maintainership of platform/chrome from Olof,
so let's add me to the list of maintainers.

Signed-off-by: Benson Leung <bleung@chromium.org>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Olof Johansson <olof@lixom.net>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 09b5ab6a8a5ce..e1eeb2919d6ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3265,9 +3265,10 @@ F:	Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt
 F:	drivers/input/touchscreen/chipone_icn8318.c
 
 CHROME HARDWARE PLATFORM SUPPORT
+M:	Benson Leung <bleung@chromium.org>
 M:	Olof Johansson <olof@lixom.net>
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/olof/chrome-platform.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bleung/chrome-platform.git
 F:	drivers/platform/chrome/
 
 CISCO VIC ETHERNET NIC DRIVER
-- 
GitLab


From 23955622ff8d231bcc9650b3d06583f117a6e3ba Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Mon, 10 Jul 2017 15:47:11 -0700
Subject: [PATCH 1018/1958] swap: add block io poll in swapin path

For fast flash disk, async IO could introduce overhead because of
context switch.  block-mq now supports IO poll, which improves
performance and latency a lot.  swapin is a good place to use this
technique, because the task is waiting for the swapin page to continue
execution.

In my virtual machine, directly read 4k data from a NVMe with iopoll is
about 60% better than that without poll.  With iopoll support in swapin
patch, my microbenchmark (a task does random memory write) is about
10%~25% faster.  CPU utilization increases a lot though, 2x and even 3x
CPU utilization.  This will depend on disk speed.

While iopoll in swapin isn't intended for all usage cases, it's a win
for latency sensistive workloads with high speed swap disk.  block layer
has knob to control poll in runtime.  If poll isn't enabled in block
layer, there should be no noticeable change in swapin.

I got a chance to run the same test in a NVMe with DRAM as the media.
In simple fio IO test, blkpoll boosts 50% performance in single thread
test and ~20% in 8 threads test.  So this is the base line.  In above
swap test, blkpoll boosts ~27% performance in single thread test.
blkpoll uses 2x CPU time though.

If we enable hybid polling, the performance gain has very slight drop
but CPU time is only 50% worse than that without blkpoll.  Also we can
adjust parameter of hybid poll, with it, the CPU time penality is
reduced further.  In 8 threads test, blkpoll doesn't help though.  The
performance is similar to that without blkpoll, but cpu utilization is
similar too.  There is lock contention in swap path.  The cpu time
spending on blkpoll isn't high.  So overall, blkpoll swapin isn't worse
than that without it.

The swapin readahead might read several pages in in the same time and
form a big IO request.  Since the IO will take longer time, it doesn't
make sense to do poll, so the patch only does iopoll for single page
swapin.

[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/070c3c3e40b711e7b1390002c991e86a-b5408f0@7511894063d3764ff01ea8111f5a004d7dd700ed078797c204a24e620ddb965c
Signed-off-by: Shaohua Li <shli@fb.com>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  5 +++--
 mm/madvise.c         |  4 ++--
 mm/page_io.c         | 23 +++++++++++++++++++++--
 mm/swap_state.c      | 10 ++++++----
 mm/swapfile.c        |  2 +-
 5 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5ab1c98c7d274..61e7180cee21a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -331,7 +331,7 @@ extern void kswapd_stop(int nid);
 #include <linux/blk_types.h> /* for bio_end_io_t */
 
 /* linux/mm/page_io.c */
-extern int swap_readpage(struct page *);
+extern int swap_readpage(struct page *page, bool do_poll);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
 extern void end_swap_bio_write(struct bio *bio);
 extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
@@ -362,7 +362,8 @@ extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
 extern struct page *lookup_swap_cache(swp_entry_t);
 extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
-			struct vm_area_struct *vma, unsigned long addr);
+			struct vm_area_struct *vma, unsigned long addr,
+			bool do_poll);
 extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr,
 			bool *new_page_allocated);
diff --git a/mm/madvise.c b/mm/madvise.c
index 25b78ee4fc2c7..8eda1841c576a 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -205,7 +205,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 			continue;
 
 		page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
-								vma, index);
+							vma, index, false);
 		if (page)
 			put_page(page);
 	}
@@ -246,7 +246,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 		}
 		swap = radix_to_swp_entry(page);
 		page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
-								NULL, 0);
+							NULL, 0, false);
 		if (page)
 			put_page(page);
 	}
diff --git a/mm/page_io.c b/mm/page_io.c
index 2da71e627812e..b6c4ac388209c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -117,6 +117,7 @@ static void swap_slot_free_notify(struct page *page)
 static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
+	struct task_struct *waiter = bio->bi_private;
 
 	if (bio->bi_status) {
 		SetPageError(page);
@@ -132,7 +133,9 @@ static void end_swap_bio_read(struct bio *bio)
 	swap_slot_free_notify(page);
 out:
 	unlock_page(page);
+	WRITE_ONCE(bio->bi_private, NULL);
 	bio_put(bio);
+	wake_up_process(waiter);
 }
 
 int generic_swapfile_activate(struct swap_info_struct *sis,
@@ -329,11 +332,13 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 	return ret;
 }
 
-int swap_readpage(struct page *page)
+int swap_readpage(struct page *page, bool do_poll)
 {
 	struct bio *bio;
 	int ret = 0;
 	struct swap_info_struct *sis = page_swap_info(page);
+	blk_qc_t qc;
+	struct block_device *bdev;
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -372,9 +377,23 @@ int swap_readpage(struct page *page)
 		ret = -ENOMEM;
 		goto out;
 	}
+	bdev = bio->bi_bdev;
+	bio->bi_private = current;
 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 	count_vm_event(PSWPIN);
-	submit_bio(bio);
+	bio_get(bio);
+	qc = submit_bio(bio);
+	while (do_poll) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!READ_ONCE(bio->bi_private))
+			break;
+
+		if (!blk_mq_poll(bdev_get_queue(bdev), qc))
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	bio_put(bio);
+
 out:
 	return ret;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 9c71b6b2562fe..b68c93014f50c 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -412,14 +412,14 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  * the swap entry is no longer in use.
  */
 struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
-			struct vm_area_struct *vma, unsigned long addr)
+		struct vm_area_struct *vma, unsigned long addr, bool do_poll)
 {
 	bool page_was_allocated;
 	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
 			vma, addr, &page_was_allocated);
 
 	if (page_was_allocated)
-		swap_readpage(retpage);
+		swap_readpage(retpage, do_poll);
 
 	return retpage;
 }
@@ -496,11 +496,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	unsigned long start_offset, end_offset;
 	unsigned long mask;
 	struct blk_plug plug;
+	bool do_poll = true;
 
 	mask = swapin_nr_pages(offset) - 1;
 	if (!mask)
 		goto skip;
 
+	do_poll = false;
 	/* Read a page_cluster sized and aligned cluster around offset. */
 	start_offset = offset & ~mask;
 	end_offset = offset | mask;
@@ -511,7 +513,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	for (offset = start_offset; offset <= end_offset ; offset++) {
 		/* Ok, do the async read-ahead now */
 		page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
-						gfp_mask, vma, addr);
+						gfp_mask, vma, addr, false);
 		if (!page)
 			continue;
 		if (offset != entry_offset && likely(!PageTransCompound(page)))
@@ -522,7 +524,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */
 skip:
-	return read_swap_cache_async(entry, gfp_mask, vma, addr);
+	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
 }
 
 int init_swap_address_space(unsigned int type, unsigned long nr_pages)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 811d90e1c929a..6ba4aab2db0b5 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1868,7 +1868,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
 		swap_map = &si->swap_map[i];
 		entry = swp_entry(type, i);
 		page = read_swap_cache_async(entry,
-					GFP_HIGHUSER_MOVABLE, NULL, 0);
+					GFP_HIGHUSER_MOVABLE, NULL, 0, false);
 		if (!page) {
 			/*
 			 * Either swap_duplicate() failed because entry
-- 
GitLab


From 7a8f58f3918869dda0d71b2e9245baedbbe7bc5e Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 10 Jul 2017 15:47:14 -0700
Subject: [PATCH 1019/1958] mm, page_alloc: fallback to smallest page when not
 stealing whole pageblock

Since commit 3bc48f96cf11 ("mm, page_alloc: split smallest stolen page
in fallback") we pick the smallest (but sufficient) page of all that
have been stolen from a pageblock of different migratetype.  However,
there are cases when we decide not to steal the whole pageblock.

Practically in the current implementation it means that we are trying to
fallback for a MIGRATE_MOVABLE allocation of order X, go through the
freelists from MAX_ORDER-1 down to X, and find free page of order Y.  If
Y is less than pageblock_order / 2, we decide not to steal all pages
from the pageblock.  When Y > X, it means we are potentially splitting a
larger page than we need, as there might be other pages of order Z,
where X <= Z < Y.  Since Y is already too small to steal whole
pageblock, picking smallest available Z will result in the same decision
and we avoid splitting a higher-order page in a MIGRATE_UNMOVABLE or
MIGRATE_RECLAIMABLE pageblock.

This patch therefore changes the fallback algorithm so that in the
situation described above, we switch the fallback search strategy to go
from order X upwards to find the smallest suitable fallback.  In theory
there shouldn't be a downside of this change wrt fragmentation.

This has been tested with mmtests' stress-highalloc performing
GFP_KERNEL order-4 allocations, here is the relevant extfrag tracepoint
statistics:

                                                        4.12.0-rc2      4.12.0-rc2
                                                         1-kernel4       2-kernel4
  Page alloc extfrag event                                  25640976    69680977
  Extfrag fragmenting                                       25621086    69661364
  Extfrag fragmenting for unmovable                            74409       73204
  Extfrag fragmenting unmovable placed with movable            69003       67684
  Extfrag fragmenting unmovable placed with reclaim.            5406        5520
  Extfrag fragmenting for reclaimable                           6398        8467
  Extfrag fragmenting reclaimable placed with movable            869         884
  Extfrag fragmenting reclaimable placed with unmov.            5529        7583
  Extfrag fragmenting for movable                           25540279    69579693

Since we force movable allocations to steal the smallest available page
(which we then practially always split), we steal less per fallback, so
the number of fallbacks increases and steals potentially happen from
different pageblocks.  This is however not an issue for movable pages
that can be compacted.

Importantly, the "unmovable placed with movable" statistics is lower,
which is the result of less fragmentation in the unmovable pageblocks.
The effect on reclaimable allocation is a bit unclear.

Link: http://lkml.kernel.org/r/20170529093947.22618-1-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 53 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bd65b60939b61..8690357170480 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2216,7 +2216,11 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
 	int fallback_mt;
 	bool can_steal;
 
-	/* Find the largest possible block of pages in the other list */
+	/*
+	 * Find the largest available free page in the other list. This roughly
+	 * approximates finding the pageblock with the most free pages, which
+	 * would be too costly to do exactly.
+	 */
 	for (current_order = MAX_ORDER-1;
 				current_order >= order && current_order <= MAX_ORDER-1;
 				--current_order) {
@@ -2226,19 +2230,50 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
 		if (fallback_mt == -1)
 			continue;
 
-		page = list_first_entry(&area->free_list[fallback_mt],
-						struct page, lru);
+		/*
+		 * We cannot steal all free pages from the pageblock and the
+		 * requested migratetype is movable. In that case it's better to
+		 * steal and split the smallest available page instead of the
+		 * largest available page, because even if the next movable
+		 * allocation falls back into a different pageblock than this
+		 * one, it won't cause permanent fragmentation.
+		 */
+		if (!can_steal && start_migratetype == MIGRATE_MOVABLE
+					&& current_order > order)
+			goto find_smallest;
 
-		steal_suitable_fallback(zone, page, start_migratetype,
-								can_steal);
+		goto do_steal;
+	}
 
-		trace_mm_page_alloc_extfrag(page, order, current_order,
-			start_migratetype, fallback_mt);
+	return false;
 
-		return true;
+find_smallest:
+	for (current_order = order; current_order < MAX_ORDER;
+							current_order++) {
+		area = &(zone->free_area[current_order]);
+		fallback_mt = find_suitable_fallback(area, current_order,
+				start_migratetype, false, &can_steal);
+		if (fallback_mt != -1)
+			break;
 	}
 
-	return false;
+	/*
+	 * This should not happen - we already found a suitable fallback
+	 * when looking for the largest page.
+	 */
+	VM_BUG_ON(current_order == MAX_ORDER);
+
+do_steal:
+	page = list_first_entry(&area->free_list[fallback_mt],
+							struct page, lru);
+
+	steal_suitable_fallback(zone, page, start_migratetype, can_steal);
+
+	trace_mm_page_alloc_extfrag(page, order, current_order,
+		start_migratetype, fallback_mt);
+
+	return true;
+
 }
 
 /*
-- 
GitLab


From 0a1345f8fed962958047dc0148f94d9bed160824 Mon Sep 17 00:00:00 2001
From: Yevgen Pronenko <y.pronenko@gmail.com>
Date: Mon, 10 Jul 2017 15:47:17 -0700
Subject: [PATCH 1020/1958] mm/memory.c: convert to DEFINE_DEBUGFS_ATTRIBUTE

The preferred strategy to define debugfs attributes is to use the
DEFINE_DEBUGFS_ATTRIBUTE() macro and to use debugfs_create_file_unsafe().

Link: http://lkml.kernel.org/r/20170528145948.32127-1-y.pronenko@gmail.com
Signed-off-by: Yevgen Pronenko <y.pronenko@gmail.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index e31dd97e61143..cbb57194687e3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3262,14 +3262,14 @@ static int fault_around_bytes_set(void *data, u64 val)
 		fault_around_bytes = PAGE_SIZE; /* rounddown_pow_of_two(0) is undefined */
 	return 0;
 }
-DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops,
+DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops,
 		fault_around_bytes_get, fault_around_bytes_set, "%llu\n");
 
 static int __init fault_around_debugfs(void)
 {
 	void *ret;
 
-	ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL,
+	ret = debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
 			&fault_around_bytes_fops);
 	if (!ret)
 		pr_warn("Failed to create fault_around_bytes in debugfs");
-- 
GitLab


From 06226226773d56685de6ee9eb3f5d668e9f772ee Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 10 Jul 2017 15:47:20 -0700
Subject: [PATCH 1021/1958] mm, vmscan: avoid thrashing anon lru when free +
 file is low

The purpose of the code that commit 623762517e23 ("revert 'mm: vmscan:
do not swap anon pages just because free+file is low'") reintroduces is
to prefer swapping anonymous memory rather than trashing the file lru.

If the anonymous inactive lru for the set of eligible zones is
considered low, however, or the length of the list for the given reclaim
priority does not allow for effective anonymous-only reclaiming, then
avoid forcing SCAN_ANON.  Forcing SCAN_ANON will end up thrashing the
small list and leave unreclaimed memory on the file lrus.

If the inactive list is insufficient, fallback to balanced reclaim so
the file lru doesn't remain untouched.

[akpm@linux-foundation.org: fix build]
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1705011432220.137835@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Suggested-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9e95fafc026b4..e9210f825219c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2228,8 +2228,17 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 		}
 
 		if (unlikely(pgdatfile + pgdatfree <= total_high_wmark)) {
-			scan_balance = SCAN_ANON;
-			goto out;
+			/*
+			 * Force SCAN_ANON if there are enough inactive
+			 * anonymous pages on the LRU in eligible zones.
+			 * Otherwise, the small LRU gets thrashed.
+			 */
+			if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
+					>> sc->priority) {
+				scan_balance = SCAN_ANON;
+				goto out;
+			}
 		}
 	}
 
-- 
GitLab


From dbac61a3f2afac562efa51b0a196ed71b6b8d109 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Mon, 10 Jul 2017 15:47:23 -0700
Subject: [PATCH 1022/1958] mm/memory_hotplug.c: add NULL check to avoid
 potential NULL pointer dereference

The NULL check at line 1226: if (!pgdat), implies that pointer pgdat
might be NULL.

rollback_node_hotadd() dereferences this pointer.  Add NULL check to
avoid a potential NULL pointer dereference.

Addresses-Coverity-ID: 1369133
Link: http://lkml.kernel.org/r/20170530212436.GA6195@embeddedgus
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f79aac7a12b5c..7b1311ac5f7ba 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1268,7 +1268,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 
 error:
 	/* rollback pgdat allocation and others */
-	if (new_pgdat)
+	if (new_pgdat && pgdat)
 		rollback_node_hotadd(nid, pgdat);
 	memblock_remove(start, size);
 
-- 
GitLab


From 3457f4147675108aa83f9f33c136f06bb9f8518f Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nick.desaulniers@gmail.com>
Date: Mon, 10 Jul 2017 15:47:26 -0700
Subject: [PATCH 1023/1958] mm/zsmalloc.c: fix -Wunneeded-internal-declaration
 warning

is_first_page() is only called from the macro VM_BUG_ON_PAGE() which is
only compiled in as a runtime check when CONFIG_DEBUG_VM is set,
otherwise is checked at compile time and not actually compiled in.

Fixes the following warning, found with Clang:

  mm/zsmalloc.c:472:12: warning: function 'is_first_page' is not needed and will not be emitted [-Wunneeded-internal-declaration]
  static int is_first_page(struct page *page)
           ^

Link: http://lkml.kernel.org/r/20170524053859.29059-1-nick.desaulniers@gmail.com
Signed-off-by: Nick Desaulniers <nick.desaulniers@gmail.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index d41edd28298b6..15959d35fc26c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -469,7 +469,7 @@ static bool is_zspage_isolated(struct zspage *zspage)
 	return zspage->isolated;
 }
 
-static int is_first_page(struct page *page)
+static __maybe_unused int is_first_page(struct page *page)
 {
 	return PagePrivate(page);
 }
-- 
GitLab


From 241f01fbeda2521f802eeef4de0261387e6e9c1d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 10 Jul 2017 15:47:29 -0700
Subject: [PATCH 1024/1958] fs/buffer.c: make bh_lru_install() more efficient

To install a buffer_head into the cpu's LRU queue, bh_lru_install()
would construct a new copy of the queue and then memcpy it over the real
queue.  But it's easily possible to do the update in-place, which is
faster and simpler.  Some work can also be skipped if the buffer_head
was already in the queue.

As a microbenchmark I timed how long it takes to run sb_getblk()
10,000,000 times alternating between BH_LRU_SIZE + 1 blocks.
Effectively, this benchmarks looking up buffer_heads that are in the
page cache but not in the LRU:

	Before this patch: 1.758s
	After this patch: 1.653s

This patch also removes about 350 bytes of compiled code (on x86_64),
partly due to removal of the memcpy() which was being inlined+unrolled.

Link: http://lkml.kernel.org/r/20161229193445.1913-1-ebiggers3@gmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c | 43 +++++++++++++++----------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index ea0e05ec29169..5715dac7821fe 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1281,44 +1281,31 @@ static inline void check_irqs_on(void)
 }
 
 /*
- * The LRU management algorithm is dopey-but-simple.  Sorry.
+ * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
+ * inserted at the front, and the buffer_head at the back if any is evicted.
+ * Or, if already in the LRU it is moved to the front.
  */
 static void bh_lru_install(struct buffer_head *bh)
 {
-	struct buffer_head *evictee = NULL;
+	struct buffer_head *evictee = bh;
+	struct bh_lru *b;
+	int i;
 
 	check_irqs_on();
 	bh_lru_lock();
-	if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
-		struct buffer_head *bhs[BH_LRU_SIZE];
-		int in;
-		int out = 0;
-
-		get_bh(bh);
-		bhs[out++] = bh;
-		for (in = 0; in < BH_LRU_SIZE; in++) {
-			struct buffer_head *bh2 =
-				__this_cpu_read(bh_lrus.bhs[in]);
 
-			if (bh2 == bh) {
-				__brelse(bh2);
-			} else {
-				if (out >= BH_LRU_SIZE) {
-					BUG_ON(evictee != NULL);
-					evictee = bh2;
-				} else {
-					bhs[out++] = bh2;
-				}
-			}
+	b = this_cpu_ptr(&bh_lrus);
+	for (i = 0; i < BH_LRU_SIZE; i++) {
+		swap(evictee, b->bhs[i]);
+		if (evictee == bh) {
+			bh_lru_unlock();
+			return;
 		}
-		while (out < BH_LRU_SIZE)
-			bhs[out++] = NULL;
-		memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
 	}
-	bh_lru_unlock();
 
-	if (evictee)
-		__brelse(evictee);
+	get_bh(bh);
+	bh_lru_unlock();
+	brelse(evictee);
 }
 
 /*
-- 
GitLab


From 243abd5b7803d540280f029bc5224a4a2892579a Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:32 -0700
Subject: [PATCH 1025/1958] mm: hugetlb: prevent reuse of hwpoisoned free
 hugepages

Patch series "mm: hwpoison: fixlet for hugetlb migration".

This patchset updates the hwpoison/hugetlb code to address 2 reported
issues.

One is madvise(MADV_HWPOISON) failure reported by Intel's lkp robot (see
http://lkml.kernel.org/r/20170417055948.GM31394@yexl-desktop.) First
half was already fixed in mainline, and another half about hugetlb cases
are solved in this series.

Another issue is "narrow-down error affected region into a single 4kB
page instead of a whole hugetlb page" issue, which was tried by Anshuman
(http://lkml.kernel.org/r/20170420110627.12307-1-khandual@linux.vnet.ibm.com)
and I updated it to apply it more widely.

This patch (of 9):

We no longer use MIGRATE_ISOLATE to prevent reuse of hwpoison hugepages
as we did before.  So current dequeue_huge_page_node() doesn't work as
intended because it still uses is_migrate_isolate_page() for this check.
This patch fixes it with PageHWPoison flag.

Link: http://lkml.kernel.org/r/1496305019-5493-2-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c        | 3 +--
 mm/memory-failure.c | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1a88006ec6343..41a1b48cefbfc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,7 +22,6 @@
 #include <linux/rmap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
-#include <linux/page-isolation.h>
 #include <linux/jhash.h>
 
 #include <asm/page.h>
@@ -872,7 +871,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 	struct page *page;
 
 	list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
-		if (!is_migrate_isolate_page(page))
+		if (!PageHWPoison(page))
 			break;
 	/*
 	 * if 'non-isolated free hugepage' not found on the list,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index dbe3e50c9aa5e..e3bf6432ed258 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -49,7 +49,6 @@
 #include <linux/swap.h>
 #include <linux/backing-dev.h>
 #include <linux/migrate.h>
-#include <linux/page-isolation.h>
 #include <linux/suspend.h>
 #include <linux/slab.h>
 #include <linux/swapops.h>
-- 
GitLab


From 09612fa6532a838e159d3b3aacfa8641698a89d2 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:35 -0700
Subject: [PATCH 1026/1958] mm: hugetlb: return immediately for hugetlb page in
 __delete_from_page_cache()

We avoid calling __mod_node_page_state(NR_FILE_PAGES) for hugetlb page
now, but it's not enough because later code doesn't handle hugetlb
properly.  Actually in our testing, WARN_ON_ONCE(PageDirty(page)) at the
end of this function fires for hugetlb, which makes no sense.  So we
should return immediately for hugetlb pages.

Link: http://lkml.kernel.org/r/1496305019-5493-3-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 3247b42080347..a49702445ce05 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -239,14 +239,16 @@ void __delete_from_page_cache(struct page *page, void *shadow)
 	/* Leave page->index set: truncation lookup relies upon it */
 
 	/* hugetlb pages do not participate in page cache accounting. */
-	if (!PageHuge(page))
-		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
+	if (PageHuge(page))
+		return;
+
+	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
 	if (PageSwapBacked(page)) {
 		__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
 		if (PageTransHuge(page))
 			__dec_node_page_state(page, NR_SHMEM_THPS);
 	} else {
-		VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
+		VM_BUG_ON_PAGE(PageTransHuge(page), page);
 	}
 
 	/*
-- 
GitLab


From b37ff71cc626a0c1b5e098ff9a0b723815f6aaeb Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:38 -0700
Subject: [PATCH 1027/1958] mm: hwpoison: change PageHWPoison behavior on
 hugetlb pages

We'd like to narrow down the error region in memory error on hugetlb
pages.  However, currently we set PageHWPoison flags on all subpages in
the error hugepage and add # of subpages to num_hwpoison_pages, which
doesn't fit our purpose.

So this patch changes the behavior and we only set PageHWPoison on the
head page then increase num_hwpoison_pages only by 1.  This is a
preparation for narrow-down part which comes in later patches.

Link: http://lkml.kernel.org/r/1496305019-5493-4-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h |  9 -----
 mm/memory-failure.c     | 87 ++++++++++++-----------------------------
 2 files changed, 24 insertions(+), 72 deletions(-)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 5c3a5f3e7eec6..c5ff7b217ee6e 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -196,15 +196,6 @@ static inline void num_poisoned_pages_dec(void)
 	atomic_long_dec(&num_poisoned_pages);
 }
 
-static inline void num_poisoned_pages_add(long num)
-{
-	atomic_long_add(num, &num_poisoned_pages);
-}
-
-static inline void num_poisoned_pages_sub(long num)
-{
-	atomic_long_sub(num, &num_poisoned_pages);
-}
 #else
 
 static inline swp_entry_t make_hwpoison_entry(struct page *page)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e3bf6432ed258..a9ddb0e72f5bb 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1009,22 +1009,6 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	return unmap_success;
 }
 
-static void set_page_hwpoison_huge_page(struct page *hpage)
-{
-	int i;
-	int nr_pages = 1 << compound_order(hpage);
-	for (i = 0; i < nr_pages; i++)
-		SetPageHWPoison(hpage + i);
-}
-
-static void clear_page_hwpoison_huge_page(struct page *hpage)
-{
-	int i;
-	int nr_pages = 1 << compound_order(hpage);
-	for (i = 0; i < nr_pages; i++)
-		ClearPageHWPoison(hpage + i);
-}
-
 /**
  * memory_failure - Handle memory failure of a page.
  * @pfn: Page Number of the corrupted page
@@ -1050,7 +1034,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	struct page *hpage;
 	struct page *orig_head;
 	int res;
-	unsigned int nr_pages;
 	unsigned long page_flags;
 
 	if (!sysctl_memory_failure_recovery)
@@ -1064,24 +1047,23 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 
 	p = pfn_to_page(pfn);
 	orig_head = hpage = compound_head(p);
+
+	/* tmporary check code, to be updated in later patches */
+	if (PageHuge(p)) {
+		if (TestSetPageHWPoison(hpage)) {
+			pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
+			return 0;
+		}
+		goto tmp;
+	}
 	if (TestSetPageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
 		return 0;
 	}
 
-	/*
-	 * Currently errors on hugetlbfs pages are measured in hugepage units,
-	 * so nr_pages should be 1 << compound_order.  OTOH when errors are on
-	 * transparent hugepages, they are supposed to be split and error
-	 * measurement is done in normal page units.  So nr_pages should be one
-	 * in this case.
-	 */
-	if (PageHuge(p))
-		nr_pages = 1 << compound_order(hpage);
-	else /* normal page or thp */
-		nr_pages = 1;
-	num_poisoned_pages_add(nr_pages);
+tmp:
+	num_poisoned_pages_inc();
 
 	/*
 	 * We need/can do nothing about count=0 pages.
@@ -1109,12 +1091,11 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 			if (PageHWPoison(hpage)) {
 				if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
 				    || (p != hpage && TestSetPageHWPoison(hpage))) {
-					num_poisoned_pages_sub(nr_pages);
+					num_poisoned_pages_dec();
 					unlock_page(hpage);
 					return 0;
 				}
 			}
-			set_page_hwpoison_huge_page(hpage);
 			res = dequeue_hwpoisoned_huge_page(hpage);
 			action_result(pfn, MF_MSG_FREE_HUGE,
 				      res ? MF_IGNORED : MF_DELAYED);
@@ -1137,7 +1118,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 				pr_err("Memory failure: %#lx: thp split failed\n",
 					pfn);
 			if (TestClearPageHWPoison(p))
-				num_poisoned_pages_sub(nr_pages);
+				num_poisoned_pages_dec();
 			put_hwpoison_page(p);
 			return -EBUSY;
 		}
@@ -1193,14 +1174,14 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	 */
 	if (!PageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
-		num_poisoned_pages_sub(nr_pages);
+		num_poisoned_pages_dec();
 		unlock_page(hpage);
 		put_hwpoison_page(hpage);
 		return 0;
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
-			num_poisoned_pages_sub(nr_pages);
+			num_poisoned_pages_dec();
 		unlock_page(hpage);
 		put_hwpoison_page(hpage);
 		return 0;
@@ -1219,14 +1200,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 		put_hwpoison_page(hpage);
 		return 0;
 	}
-	/*
-	 * Set PG_hwpoison on all pages in an error hugepage,
-	 * because containment is done in hugepage unit for now.
-	 * Since we have done TestSetPageHWPoison() for the head page with
-	 * page lock held, we can safely set PG_hwpoison bits on tail pages.
-	 */
-	if (PageHuge(p))
-		set_page_hwpoison_huge_page(hpage);
 
 	/*
 	 * It's very difficult to mess with pages currently under IO
@@ -1397,7 +1370,6 @@ int unpoison_memory(unsigned long pfn)
 	struct page *page;
 	struct page *p;
 	int freeit = 0;
-	unsigned int nr_pages;
 	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
 					DEFAULT_RATELIMIT_BURST);
 
@@ -1442,8 +1414,6 @@ int unpoison_memory(unsigned long pfn)
 		return 0;
 	}
 
-	nr_pages = 1 << compound_order(page);
-
 	if (!get_hwpoison_page(p)) {
 		/*
 		 * Since HWPoisoned hugepage should have non-zero refcount,
@@ -1473,10 +1443,8 @@ int unpoison_memory(unsigned long pfn)
 	if (TestClearPageHWPoison(page)) {
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 pfn, &unpoison_rs);
-		num_poisoned_pages_sub(nr_pages);
+		num_poisoned_pages_dec();
 		freeit = 1;
-		if (PageHuge(page))
-			clear_page_hwpoison_huge_page(page);
 	}
 	unlock_page(page);
 
@@ -1608,14 +1576,10 @@ static int soft_offline_huge_page(struct page *page, int flags)
 			ret = -EIO;
 	} else {
 		/* overcommit hugetlb page will be freed to buddy */
-		if (PageHuge(page)) {
-			set_page_hwpoison_huge_page(hpage);
+		SetPageHWPoison(page);
+		if (PageHuge(page))
 			dequeue_hwpoisoned_huge_page(hpage);
-			num_poisoned_pages_add(1 << compound_order(hpage));
-		} else {
-			SetPageHWPoison(page);
-			num_poisoned_pages_inc();
-		}
+		num_poisoned_pages_inc();
 	}
 	return ret;
 }
@@ -1731,15 +1695,12 @@ static int soft_offline_in_use_page(struct page *page, int flags)
 
 static void soft_offline_free_page(struct page *page)
 {
-	if (PageHuge(page)) {
-		struct page *hpage = compound_head(page);
+	struct page *head = compound_head(page);
 
-		set_page_hwpoison_huge_page(hpage);
-		if (!dequeue_hwpoisoned_huge_page(hpage))
-			num_poisoned_pages_add(1 << compound_order(hpage));
-	} else {
-		if (!TestSetPageHWPoison(page))
-			num_poisoned_pages_inc();
+	if (!TestSetPageHWPoison(head)) {
+		num_poisoned_pages_inc();
+		if (PageHuge(head))
+			dequeue_hwpoisoned_huge_page(head);
 	}
 }
 
-- 
GitLab


From c3114a84f7f96c9d5c73c8bfa7e21ff42fda97e2 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Mon, 10 Jul 2017 15:47:41 -0700
Subject: [PATCH 1028/1958] mm: hugetlb: soft-offline: dissolve source hugepage
 after successful migration

Currently hugepage migrated by soft-offline (i.e.  due to correctable
memory errors) is contained as a hugepage, which means many non-error
pages in it are unreusable, i.e.  wasted.

This patch solves this issue by dissolving source hugepages into buddy.
As done in previous patch, PageHWPoison is set only on a head page of
the error hugepage.  Then in dissoliving we move the PageHWPoison flag
to the raw error page so that all healthy subpages return back to buddy.

[arnd@arndb.de: fix warnings: replace some macros with inline functions]
  Link: http://lkml.kernel.org/r/20170609102544.2947326-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/1496305019-5493-5-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 31 +++++++++++++++++++++++++++----
 mm/hugetlb.c            | 10 +++++++++-
 mm/memory-failure.c     |  5 +----
 mm/migrate.c            |  2 ++
 4 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 46bfb702e7d62..668ab1742ef6e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -472,6 +472,7 @@ static inline pgoff_t basepage_index(struct page *page)
 	return __basepage_index(page);
 }
 
+extern int dissolve_free_huge_page(struct page *page);
 extern int dissolve_free_huge_pages(unsigned long start_pfn,
 				    unsigned long end_pfn);
 static inline bool hugepage_migration_supported(struct hstate *h)
@@ -550,15 +551,37 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
 {
 	return 1;
 }
-#define hstate_index_to_shift(index) 0
-#define hstate_index(h) 0
+
+static inline unsigned hstate_index_to_shift(unsigned index)
+{
+	return 0;
+}
+
+static inline int hstate_index(struct hstate *h)
+{
+	return 0;
+}
 
 static inline pgoff_t basepage_index(struct page *page)
 {
 	return page->index;
 }
-#define dissolve_free_huge_pages(s, e)	0
-#define hugepage_migration_supported(h)	false
+
+static inline int dissolve_free_huge_page(struct page *page)
+{
+	return 0;
+}
+
+static inline int dissolve_free_huge_pages(unsigned long start_pfn,
+					   unsigned long end_pfn)
+{
+	return 0;
+}
+
+static inline bool hugepage_migration_supported(struct hstate *h)
+{
+	return false;
+}
 
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 					   struct mm_struct *mm, pte_t *pte)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 41a1b48cefbfc..b2d44363837a6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1459,7 +1459,7 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
  * number of free hugepages would be reduced below the number of reserved
  * hugepages.
  */
-static int dissolve_free_huge_page(struct page *page)
+int dissolve_free_huge_page(struct page *page)
 {
 	int rc = 0;
 
@@ -1472,6 +1472,14 @@ static int dissolve_free_huge_page(struct page *page)
 			rc = -EBUSY;
 			goto out;
 		}
+		/*
+		 * Move PageHWPoison flag from head page to the raw error page,
+		 * which makes any subpages rather than the error page reusable.
+		 */
+		if (PageHWPoison(head) && page != head) {
+			SetPageHWPoison(page);
+			ClearPageHWPoison(head);
+		}
 		list_del(&head->lru);
 		h->free_huge_pages--;
 		h->free_huge_pages_node[nid]--;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a9ddb0e72f5bb..42c5803e62758 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1575,11 +1575,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
 		if (ret > 0)
 			ret = -EIO;
 	} else {
-		/* overcommit hugetlb page will be freed to buddy */
-		SetPageHWPoison(page);
 		if (PageHuge(page))
-			dequeue_hwpoisoned_huge_page(hpage);
-		num_poisoned_pages_inc();
+			dissolve_free_huge_page(page);
 	}
 	return ret;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 051cc1555d36e..8935cbe362ce7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1252,6 +1252,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 out:
 	if (rc != -EAGAIN)
 		putback_active_hugepage(hpage);
+	if (reason == MR_MEMORY_FAILURE && !test_set_page_hwpoison(hpage))
+		num_poisoned_pages_inc();
 
 	/*
 	 * If migration was not successful and there's a freeing callback, use
-- 
GitLab


From d4a3a60b37bf4609f9b17961a0db2f6e7ec746cd Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:44 -0700
Subject: [PATCH 1029/1958] mm: soft-offline: dissolve free hugepage if
 soft-offlined

Now we have code to rescue most of healthy pages from a hwpoisoned
hugepage.  So let's apply it to soft_offline_free_page too.

Link: http://lkml.kernel.org/r/1496305019-5493-6-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 42c5803e62758..8a7b39486b9d0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1697,7 +1697,7 @@ static void soft_offline_free_page(struct page *page)
 	if (!TestSetPageHWPoison(head)) {
 		num_poisoned_pages_inc();
 		if (PageHuge(head))
-			dequeue_hwpoisoned_huge_page(head);
+			dissolve_free_huge_page(page);
 	}
 }
 
-- 
GitLab


From 761ad8d7c7b5485bb66fd5bccb58a891fe784544 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:47 -0700
Subject: [PATCH 1030/1958] mm: hwpoison: introduce memory_failure_hugetlb()

memory_failure() is a big function and hard to maintain.  Handling
hugetlb- and non-hugetlb- case in a single function is not good, so this
patch separates PageHuge() branch into a new function, which saves many
PageHuge() check.

Link: http://lkml.kernel.org/r/1496305019-5493-7-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 134 +++++++++++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 52 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8a7b39486b9d0..5db3827f0d369 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1009,6 +1009,76 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	return unmap_success;
 }
 
+static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags)
+{
+	struct page_state *ps;
+	struct page *p = pfn_to_page(pfn);
+	struct page *head = compound_head(p);
+	int res;
+	unsigned long page_flags;
+
+	if (TestSetPageHWPoison(head)) {
+		pr_err("Memory failure: %#lx: already hardware poisoned\n",
+		       pfn);
+		return 0;
+	}
+
+	num_poisoned_pages_inc();
+
+	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
+		/*
+		 * Check "filter hit" and "race with other subpage."
+		 */
+		lock_page(head);
+		if (PageHWPoison(head)) {
+			if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+			    || (p != head && TestSetPageHWPoison(head))) {
+				num_poisoned_pages_dec();
+				unlock_page(head);
+				return 0;
+			}
+		}
+		unlock_page(head);
+		dissolve_free_huge_page(p);
+		action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED);
+		return 0;
+	}
+
+	lock_page(head);
+	page_flags = head->flags;
+
+	if (!PageHWPoison(head)) {
+		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
+		num_poisoned_pages_dec();
+		unlock_page(head);
+		put_hwpoison_page(head);
+		return 0;
+	}
+
+	if (!hwpoison_user_mappings(p, pfn, trapno, flags, &head)) {
+		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
+		res = -EBUSY;
+		goto out;
+	}
+
+	res = -EBUSY;
+
+	for (ps = error_states;; ps++)
+		if ((p->flags & ps->mask) == ps->res)
+			break;
+
+	page_flags |= (p->flags & (1UL << PG_dirty));
+
+	if (!ps->mask)
+		for (ps = error_states;; ps++)
+			if ((page_flags & ps->mask) == ps->res)
+				break;
+	res = page_action(ps, p, pfn);
+out:
+	unlock_page(head);
+	return res;
+}
+
 /**
  * memory_failure - Handle memory failure of a page.
  * @pfn: Page Number of the corrupted page
@@ -1046,33 +1116,22 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	}
 
 	p = pfn_to_page(pfn);
-	orig_head = hpage = compound_head(p);
-
-	/* tmporary check code, to be updated in later patches */
-	if (PageHuge(p)) {
-		if (TestSetPageHWPoison(hpage)) {
-			pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
-			return 0;
-		}
-		goto tmp;
-	}
+	if (PageHuge(p))
+		return memory_failure_hugetlb(pfn, trapno, flags);
 	if (TestSetPageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
 		return 0;
 	}
 
-tmp:
+	orig_head = hpage = compound_head(p);
 	num_poisoned_pages_inc();
 
 	/*
 	 * We need/can do nothing about count=0 pages.
 	 * 1) it's a free page, and therefore in safe hand:
 	 *    prep_new_page() will be the gate keeper.
-	 * 2) it's a free hugepage, which is also safe:
-	 *    an affected hugepage will be dequeued from hugepage freelist,
-	 *    so there's no concern about reusing it ever after.
-	 * 3) it's part of a non-compound high order page.
+	 * 2) it's part of a non-compound high order page.
 	 *    Implies some kernel user: cannot stop them from
 	 *    R/W the page; let's pray that the page has been
 	 *    used and will be freed some time later.
@@ -1083,31 +1142,13 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 		if (is_free_buddy_page(p)) {
 			action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
 			return 0;
-		} else if (PageHuge(hpage)) {
-			/*
-			 * Check "filter hit" and "race with other subpage."
-			 */
-			lock_page(hpage);
-			if (PageHWPoison(hpage)) {
-				if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
-				    || (p != hpage && TestSetPageHWPoison(hpage))) {
-					num_poisoned_pages_dec();
-					unlock_page(hpage);
-					return 0;
-				}
-			}
-			res = dequeue_hwpoisoned_huge_page(hpage);
-			action_result(pfn, MF_MSG_FREE_HUGE,
-				      res ? MF_IGNORED : MF_DELAYED);
-			unlock_page(hpage);
-			return res;
 		} else {
 			action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
 			return -EBUSY;
 		}
 	}
 
-	if (!PageHuge(p) && PageTransHuge(hpage)) {
+	if (PageTransHuge(hpage)) {
 		lock_page(p);
 		if (!PageAnon(p) || unlikely(split_huge_page(p))) {
 			unlock_page(p);
@@ -1145,7 +1186,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 		return 0;
 	}
 
-	lock_page(hpage);
+	lock_page(p);
 
 	/*
 	 * The page could have changed compound pages during the locking.
@@ -1175,32 +1216,21 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	if (!PageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
 		num_poisoned_pages_dec();
-		unlock_page(hpage);
-		put_hwpoison_page(hpage);
+		unlock_page(p);
+		put_hwpoison_page(p);
 		return 0;
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
 			num_poisoned_pages_dec();
-		unlock_page(hpage);
-		put_hwpoison_page(hpage);
+		unlock_page(p);
+		put_hwpoison_page(p);
 		return 0;
 	}
 
-	if (!PageHuge(p) && !PageTransTail(p) && !PageLRU(p))
+	if (!PageTransTail(p) && !PageLRU(p))
 		goto identify_page_state;
 
-	/*
-	 * For error on the tail page, we should set PG_hwpoison
-	 * on the head page to show that the hugepage is hwpoisoned
-	 */
-	if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
-		action_result(pfn, MF_MSG_POISONED_HUGE, MF_IGNORED);
-		unlock_page(hpage);
-		put_hwpoison_page(hpage);
-		return 0;
-	}
-
 	/*
 	 * It's very difficult to mess with pages currently under IO
 	 * and in many cases impossible, so we just avoid it here.
@@ -1248,7 +1278,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 				break;
 	res = page_action(ps, p, pfn);
 out:
-	unlock_page(hpage);
+	unlock_page(p);
 	return res;
 }
 EXPORT_SYMBOL_GPL(memory_failure);
-- 
GitLab


From 78bb920344b8a6f04b79a7c254041723b931c94f Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:50 -0700
Subject: [PATCH 1031/1958] mm: hwpoison: dissolve in-use hugepage in
 unrecoverable memory error

Currently me_huge_page() relies on dequeue_hwpoisoned_huge_page() to
keep the error hugepage away from the system, which is OK but not good
enough because the hugepage still has a refcount and unpoison doesn't
work on the error hugepage (PageHWPoison flags are cleared but pages are
still leaked.) And there's "wasting health subpages" issue too.  This
patch reworks on me_huge_page() to solve these issues.

For hugetlb file, recently we have truncating code so let's use it in
hugetlbfs specific ->error_remove_page().

For anonymous hugepage, it's helpful to dissolve the error page after
freeing it into free hugepage list.  Migration entry and PageHWPoison in
the head page prevent the access to it.

TODO: dissolve_free_huge_page() can fail but we don't considered it yet.
It's not critical (and at least no worse that now) because in such case
the error hugepage just stays in free hugepage list without being
dissolved.  By virtue of PageHWPoison in head page, it's never allocated
to processes.

[akpm@linux-foundation.org: fix unused var warnings]
Fixes: 23a003bfd23ea9ea0b7756b920e51f64b284b468 ("mm/madvise: pass return code of memory_failure() to userspace")
Link: http://lkml.kernel.org/r/20170417055948.GM31394@yexl-desktop
Link: http://lkml.kernel.org/r/1496305019-5493-8-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c | 11 ++++++
 mm/memory-failure.c  | 93 +++++++++++++++++++++++++-------------------
 2 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d44f5456eb9ba..52388611635e2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -851,6 +851,16 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
 	return MIGRATEPAGE_SUCCESS;
 }
 
+static int hugetlbfs_error_remove_page(struct address_space *mapping,
+				struct page *page)
+{
+	struct inode *inode = mapping->host;
+
+	remove_huge_page(page);
+	hugetlb_fix_reserve_counts(inode);
+	return 0;
+}
+
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -966,6 +976,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 	.write_end	= hugetlbfs_write_end,
 	.set_page_dirty	= hugetlbfs_set_page_dirty,
 	.migratepage    = hugetlbfs_migrate_page,
+	.error_remove_page	= hugetlbfs_error_remove_page,
 };
 
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5db3827f0d369..6f8f69f4a9866 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -554,6 +554,39 @@ static int delete_from_lru_cache(struct page *p)
 	return -EIO;
 }
 
+static int truncate_error_page(struct page *p, unsigned long pfn,
+				struct address_space *mapping)
+{
+	int ret = MF_FAILED;
+
+	if (mapping->a_ops->error_remove_page) {
+		int err = mapping->a_ops->error_remove_page(mapping, p);
+
+		if (err != 0) {
+			pr_info("Memory failure: %#lx: Failed to punch page: %d\n",
+				pfn, err);
+		} else if (page_has_private(p) &&
+			   !try_to_release_page(p, GFP_NOIO)) {
+			pr_info("Memory failure: %#lx: failed to release buffers\n",
+				pfn);
+		} else {
+			ret = MF_RECOVERED;
+		}
+	} else {
+		/*
+		 * If the file system doesn't support it just invalidate
+		 * This fails on dirty or anything with private pages
+		 */
+		if (invalidate_inode_page(p))
+			ret = MF_RECOVERED;
+		else
+			pr_info("Memory failure: %#lx: Failed to invalidate\n",
+				pfn);
+	}
+
+	return ret;
+}
+
 /*
  * Error hit kernel page.
  * Do nothing, try to be lucky and not touch this instead. For a few cases we
@@ -578,8 +611,6 @@ static int me_unknown(struct page *p, unsigned long pfn)
  */
 static int me_pagecache_clean(struct page *p, unsigned long pfn)
 {
-	int err;
-	int ret = MF_FAILED;
 	struct address_space *mapping;
 
 	delete_from_lru_cache(p);
@@ -611,30 +642,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 	 *
 	 * Open: to take i_mutex or not for this? Right now we don't.
 	 */
-	if (mapping->a_ops->error_remove_page) {
-		err = mapping->a_ops->error_remove_page(mapping, p);
-		if (err != 0) {
-			pr_info("Memory failure: %#lx: Failed to punch page: %d\n",
-				pfn, err);
-		} else if (page_has_private(p) &&
-				!try_to_release_page(p, GFP_NOIO)) {
-			pr_info("Memory failure: %#lx: failed to release buffers\n",
-				pfn);
-		} else {
-			ret = MF_RECOVERED;
-		}
-	} else {
-		/*
-		 * If the file system doesn't support it just invalidate
-		 * This fails on dirty or anything with private pages
-		 */
-		if (invalidate_inode_page(p))
-			ret = MF_RECOVERED;
-		else
-			pr_info("Memory failure: %#lx: Failed to invalidate\n",
-				pfn);
-	}
-	return ret;
+	return truncate_error_page(p, pfn, mapping);
 }
 
 /*
@@ -740,24 +748,29 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 {
 	int res = 0;
 	struct page *hpage = compound_head(p);
+	struct address_space *mapping;
 
 	if (!PageHuge(hpage))
 		return MF_DELAYED;
 
-	/*
-	 * We can safely recover from error on free or reserved (i.e.
-	 * not in-use) hugepage by dequeuing it from freelist.
-	 * To check whether a hugepage is in-use or not, we can't use
-	 * page->lru because it can be used in other hugepage operations,
-	 * such as __unmap_hugepage_range() and gather_surplus_pages().
-	 * So instead we use page_mapping() and PageAnon().
-	 */
-	if (!(page_mapping(hpage) || PageAnon(hpage))) {
-		res = dequeue_hwpoisoned_huge_page(hpage);
-		if (!res)
-			return MF_RECOVERED;
+	mapping = page_mapping(hpage);
+	if (mapping) {
+		res = truncate_error_page(hpage, pfn, mapping);
+	} else {
+		unlock_page(hpage);
+		/*
+		 * migration entry prevents later access on error anonymous
+		 * hugepage, so we can free and dissolve it into buddy to
+		 * save healthy subpages.
+		 */
+		if (PageAnon(hpage))
+			put_page(hpage);
+		dissolve_free_huge_page(p);
+		res = MF_RECOVERED;
+		lock_page(hpage);
 	}
-	return MF_DELAYED;
+
+	return res;
 }
 
 /*
@@ -856,7 +869,7 @@ static int page_action(struct page_state *ps, struct page *p,
 	count = page_count(p) - 1;
 	if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
 		count--;
-	if (count != 0) {
+	if (count > 0) {
 		pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
 		       pfn, action_page_types[ps->type], count);
 		result = MF_FAILED;
-- 
GitLab


From ddd40d8a2c4ef8f2152ea6d227e11475cf7e5bfa Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:53 -0700
Subject: [PATCH 1032/1958] mm: hugetlb: delete dequeue_hwpoisoned_huge_page()

dequeue_hwpoisoned_huge_page() is no longer used, so let's remove it.

Link: http://lkml.kernel.org/r/1496305019-5493-9-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  5 -----
 mm/hugetlb.c            | 34 ----------------------------------
 mm/memory-failure.c     | 11 -----------
 3 files changed, 50 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 668ab1742ef6e..57f700ac127ed 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -116,7 +116,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						vm_flags_t vm_flags);
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 						long freed);
-int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
 void free_huge_page(struct page *page);
@@ -192,10 +191,6 @@ static inline void hugetlb_show_meminfo(void)
 #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
 				src_addr, pagep)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address, sz)	0
-static inline int dequeue_hwpoisoned_huge_page(struct page *page)
-{
-	return 0;
-}
 
 static inline bool isolate_huge_page(struct page *page, struct list_head *list)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b2d44363837a6..8254e8f6db6b6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4746,40 +4746,6 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
 	return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
 }
 
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * This function is called from memory failure code.
- */
-int dequeue_hwpoisoned_huge_page(struct page *hpage)
-{
-	struct hstate *h = page_hstate(hpage);
-	int nid = page_to_nid(hpage);
-	int ret = -EBUSY;
-
-	spin_lock(&hugetlb_lock);
-	/*
-	 * Just checking !page_huge_active is not enough, because that could be
-	 * an isolated/hwpoisoned hugepage (which have >0 refcount).
-	 */
-	if (!page_huge_active(hpage) && !page_count(hpage)) {
-		/*
-		 * Hwpoisoned hugepage isn't linked to activelist or freelist,
-		 * but dangling hpage->lru can trigger list-debug warnings
-		 * (this happens when we call unpoison_memory() on it),
-		 * so let it point to itself with list_del_init().
-		 */
-		list_del_init(&hpage->lru);
-		set_page_refcounted(hpage);
-		h->free_huge_pages--;
-		h->free_huge_pages_node[nid]--;
-		ret = 0;
-	}
-	spin_unlock(&hugetlb_lock);
-	return ret;
-}
-#endif
-
 bool isolate_huge_page(struct page *page, struct list_head *list)
 {
 	bool ret = true;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6f8f69f4a9866..2aec57c07652e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1458,17 +1458,6 @@ int unpoison_memory(unsigned long pfn)
 	}
 
 	if (!get_hwpoison_page(p)) {
-		/*
-		 * Since HWPoisoned hugepage should have non-zero refcount,
-		 * race between memory failure and unpoison seems to happen.
-		 * In such case unpoison fails and memory failure runs
-		 * to the end.
-		 */
-		if (PageHuge(page)) {
-			unpoison_pr_info("Unpoison: Memory failure is now running on free hugepage %#lx\n",
-					 pfn, &unpoison_rs);
-			return 0;
-		}
 		if (TestClearPageHWPoison(p))
 			num_poisoned_pages_dec();
 		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
-- 
GitLab


From 0348d2ebec9b00ea87b42dffdb3f393007303b82 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Mon, 10 Jul 2017 15:47:56 -0700
Subject: [PATCH 1033/1958] mm: hwpoison: introduce idenfity_page_state

Factoring duplicate code into a function.

Link: http://lkml.kernel.org/r/1496305019-5493-10-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 57 ++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2aec57c07652e..e2e0cb0e1d0f0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1022,9 +1022,31 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	return unmap_success;
 }
 
-static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags)
+static int identify_page_state(unsigned long pfn, struct page *p,
+				unsigned long page_flags)
 {
 	struct page_state *ps;
+
+	/*
+	 * The first check uses the current page flags which may not have any
+	 * relevant information. The second check with the saved page flags is
+	 * carried out only if the first check can't determine the page status.
+	 */
+	for (ps = error_states;; ps++)
+		if ((p->flags & ps->mask) == ps->res)
+			break;
+
+	page_flags |= (p->flags & (1UL << PG_dirty));
+
+	if (!ps->mask)
+		for (ps = error_states;; ps++)
+			if ((page_flags & ps->mask) == ps->res)
+				break;
+	return page_action(ps, p, pfn);
+}
+
+static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags)
+{
 	struct page *p = pfn_to_page(pfn);
 	struct page *head = compound_head(p);
 	int res;
@@ -1074,19 +1096,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags)
 		goto out;
 	}
 
-	res = -EBUSY;
-
-	for (ps = error_states;; ps++)
-		if ((p->flags & ps->mask) == ps->res)
-			break;
-
-	page_flags |= (p->flags & (1UL << PG_dirty));
-
-	if (!ps->mask)
-		for (ps = error_states;; ps++)
-			if ((page_flags & ps->mask) == ps->res)
-				break;
-	res = page_action(ps, p, pfn);
+	res = identify_page_state(pfn, p, page_flags);
 out:
 	unlock_page(head);
 	return res;
@@ -1112,7 +1122,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags)
  */
 int memory_failure(unsigned long pfn, int trapno, int flags)
 {
-	struct page_state *ps;
 	struct page *p;
 	struct page *hpage;
 	struct page *orig_head;
@@ -1273,23 +1282,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	}
 
 identify_page_state:
-	res = -EBUSY;
-	/*
-	 * The first check uses the current page flags which may not have any
-	 * relevant information. The second check with the saved page flagss is
-	 * carried out only if the first check can't determine the page status.
-	 */
-	for (ps = error_states;; ps++)
-		if ((p->flags & ps->mask) == ps->res)
-			break;
-
-	page_flags |= (p->flags & (1UL << PG_dirty));
-
-	if (!ps->mask)
-		for (ps = error_states;; ps++)
-			if ((page_flags & ps->mask) == ps->res)
-				break;
-	res = page_action(ps, p, pfn);
+	res = identify_page_state(pfn, p, page_flags);
 out:
 	unlock_page(p);
 	return res;
-- 
GitLab


From b6bb9811496d26744c5a91a14c851face8b9f1e7 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 10 Jul 2017 15:47:59 -0700
Subject: [PATCH 1034/1958] mm, vmpressure: pass-through notification support

By default, vmpressure events are not pass-through, i.e.  they propagate
up through the memcg hierarchy until an event notifier is found for any
threshold level.

This presents a difficulty when a thread waiting on a read(2) for a
vmpressure event cannot distinguish between local memory pressure and
memory pressure in a descendant memcg, especially when that thread may
not control the memcg hierarchy.

Consider a user-controlled child memcg with a smaller limit than a
top-level memcg controlled by the "Activity Manager" specified in
Documentation/cgroup-v1/memory.txt.  It may register for memory pressure
notification for descendant memcgs to make a policy decision: oom kill a
low priority job, increase the limit, decrease other limits, etc.  If it
registers for memory pressure notification on the top-level memcg, it
currently cannot distinguish between memory pressure in its own memcg or
a descendant memcg, which is user-controlled.

Conversely, if a user registers for memory pressure notification on
their own descendant memcg, the Activity Manager does not receive any
pressure notification for that child memcg hierarchy.  Vmpressure events
are not received for ancestor memcgs if the memcg experiencing pressure
have notifiers registered, perhaps outside the knowledge of the thread
waiting on read(2) at the top level.

Both of these are consequences of vmpressure notification not being
pass-through.

This implements a pass-through behavior for vmpressure events.  When
writing to control.event_control, vmpressure event handlers may
optionally specify a mode.  There are two new modes:

 - "hierarchy": always propagate memory pressure events up the hierarchy
   regardless if descendant memcgs have their own notifiers registered,
   and

 - "local": only receive notifications when the memcg for which the
   event is registered experiences memory pressure.

Of course, processes may register for one notification of "low,local",
for example, and another for "low".

If no mode is specified, the current behavior is maintained for
backwards compatibility.

See the change to Documentation/cgroup-v1/memory.txt for full
specification.

[dan.carpenter@oracle.com: free the same pointer we allocated]
  Link: http://lkml.kernel.org/r/20170613191820.GA20003@elgon.mountain
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1705311421320.8946@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Anton Vorontsov <anton@enomsg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroup-v1/memory.txt |  47 ++++++++---
 mm/vmpressure.c                    | 122 ++++++++++++++++++++++-------
 2 files changed, 128 insertions(+), 41 deletions(-)

diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
index 946e69103cdd7..cefb636390700 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.txt
@@ -789,23 +789,46 @@ way to trigger. Applications should do whatever they can to help the
 system. It might be too late to consult with vmstat or any other
 statistics, so it's advisable to take an immediate action.
 
-The events are propagated upward until the event is handled, i.e. the
-events are not pass-through. Here is what this means: for example you have
-three cgroups: A->B->C. Now you set up an event listener on cgroups A, B
-and C, and suppose group C experiences some pressure. In this situation,
-only group C will receive the notification, i.e. groups A and B will not
-receive it. This is done to avoid excessive "broadcasting" of messages,
-which disturbs the system and which is especially bad if we are low on
-memory or thrashing. So, organize the cgroups wisely, or propagate the
-events manually (or, ask us to implement the pass-through events,
-explaining why would you need them.)
+By default, events are propagated upward until the event is handled, i.e. the
+events are not pass-through. For example, you have three cgroups: A->B->C. Now
+you set up an event listener on cgroups A, B and C, and suppose group C
+experiences some pressure. In this situation, only group C will receive the
+notification, i.e. groups A and B will not receive it. This is done to avoid
+excessive "broadcasting" of messages, which disturbs the system and which is
+especially bad if we are low on memory or thrashing. Group B, will receive
+notification only if there are no event listers for group C.
+
+There are three optional modes that specify different propagation behavior:
+
+ - "default": this is the default behavior specified above. This mode is the
+   same as omitting the optional mode parameter, preserved by backwards
+   compatibility.
+
+ - "hierarchy": events always propagate up to the root, similar to the default
+   behavior, except that propagation continues regardless of whether there are
+   event listeners at each level, with the "hierarchy" mode. In the above
+   example, groups A, B, and C will receive notification of memory pressure.
+
+ - "local": events are pass-through, i.e. they only receive notifications when
+   memory pressure is experienced in the memcg for which the notification is
+   registered. In the above example, group C will receive notification if
+   registered for "local" notification and the group experiences memory
+   pressure. However, group B will never receive notification, regardless if
+   there is an event listener for group C or not, if group B is registered for
+   local notification.
+
+The level and event notification mode ("hierarchy" or "local", if necessary) are
+specified by a comma-delimited string, i.e. "low,hierarchy" specifies
+hierarchical, pass-through, notification for all ancestor memcgs. Notification
+that is the default, non pass-through behavior, does not specify a mode.
+"medium,local" specifies pass-through notification for the medium level.
 
 The file memory.pressure_level is only used to setup an eventfd. To
 register a notification, an application must:
 
 - create an eventfd using eventfd(2);
 - open memory.pressure_level;
-- write string like "<event_fd> <fd of memory.pressure_level> <level>"
+- write string as "<event_fd> <fd of memory.pressure_level> <level[,mode]>"
   to cgroup.event_control.
 
 Application will be notified through eventfd when memory pressure is at
@@ -821,7 +844,7 @@ Test:
    # cd /sys/fs/cgroup/memory/
    # mkdir foo
    # cd foo
-   # cgroup_event_listener memory.pressure_level low &
+   # cgroup_event_listener memory.pressure_level low,hierarchy &
    # echo 8000000 > memory.limit_in_bytes
    # echo 8000000 > memory.memsw.limit_in_bytes
    # echo $$ > tasks
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index ce0618bfa8d06..85350ce2d25d7 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -93,12 +93,25 @@ enum vmpressure_levels {
 	VMPRESSURE_NUM_LEVELS,
 };
 
+enum vmpressure_modes {
+	VMPRESSURE_NO_PASSTHROUGH = 0,
+	VMPRESSURE_HIERARCHY,
+	VMPRESSURE_LOCAL,
+	VMPRESSURE_NUM_MODES,
+};
+
 static const char * const vmpressure_str_levels[] = {
 	[VMPRESSURE_LOW] = "low",
 	[VMPRESSURE_MEDIUM] = "medium",
 	[VMPRESSURE_CRITICAL] = "critical",
 };
 
+static const char * const vmpressure_str_modes[] = {
+	[VMPRESSURE_NO_PASSTHROUGH] = "default",
+	[VMPRESSURE_HIERARCHY] = "hierarchy",
+	[VMPRESSURE_LOCAL] = "local",
+};
+
 static enum vmpressure_levels vmpressure_level(unsigned long pressure)
 {
 	if (pressure >= vmpressure_level_critical)
@@ -141,27 +154,31 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
 struct vmpressure_event {
 	struct eventfd_ctx *efd;
 	enum vmpressure_levels level;
+	enum vmpressure_modes mode;
 	struct list_head node;
 };
 
 static bool vmpressure_event(struct vmpressure *vmpr,
-			     enum vmpressure_levels level)
+			     const enum vmpressure_levels level,
+			     bool ancestor, bool signalled)
 {
 	struct vmpressure_event *ev;
-	bool signalled = false;
+	bool ret = false;
 
 	mutex_lock(&vmpr->events_lock);
-
 	list_for_each_entry(ev, &vmpr->events, node) {
-		if (level >= ev->level) {
-			eventfd_signal(ev->efd, 1);
-			signalled = true;
-		}
+		if (ancestor && ev->mode == VMPRESSURE_LOCAL)
+			continue;
+		if (signalled && ev->mode == VMPRESSURE_NO_PASSTHROUGH)
+			continue;
+		if (level < ev->level)
+			continue;
+		eventfd_signal(ev->efd, 1);
+		ret = true;
 	}
-
 	mutex_unlock(&vmpr->events_lock);
 
-	return signalled;
+	return ret;
 }
 
 static void vmpressure_work_fn(struct work_struct *work)
@@ -170,6 +187,8 @@ static void vmpressure_work_fn(struct work_struct *work)
 	unsigned long scanned;
 	unsigned long reclaimed;
 	enum vmpressure_levels level;
+	bool ancestor = false;
+	bool signalled = false;
 
 	spin_lock(&vmpr->sr_lock);
 	/*
@@ -194,12 +213,9 @@ static void vmpressure_work_fn(struct work_struct *work)
 	level = vmpressure_calc_level(scanned, reclaimed);
 
 	do {
-		if (vmpressure_event(vmpr, level))
-			break;
-		/*
-		 * If not handled, propagate the event upward into the
-		 * hierarchy.
-		 */
+		if (vmpressure_event(vmpr, level, ancestor, signalled))
+			signalled = true;
+		ancestor = true;
 	} while ((vmpr = vmpressure_parent(vmpr)));
 }
 
@@ -326,17 +342,40 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
 	vmpressure(gfp, memcg, true, vmpressure_win, 0);
 }
 
+static enum vmpressure_levels str_to_level(const char *arg)
+{
+	enum vmpressure_levels level;
+
+	for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++)
+		if (!strcmp(vmpressure_str_levels[level], arg))
+			return level;
+	return -1;
+}
+
+static enum vmpressure_modes str_to_mode(const char *arg)
+{
+	enum vmpressure_modes mode;
+
+	for (mode = 0; mode < VMPRESSURE_NUM_MODES; mode++)
+		if (!strcmp(vmpressure_str_modes[mode], arg))
+			return mode;
+	return -1;
+}
+
+#define MAX_VMPRESSURE_ARGS_LEN	(strlen("critical") + strlen("hierarchy") + 2)
+
 /**
  * vmpressure_register_event() - Bind vmpressure notifications to an eventfd
  * @memcg:	memcg that is interested in vmpressure notifications
  * @eventfd:	eventfd context to link notifications with
- * @args:	event arguments (used to set up a pressure level threshold)
+ * @args:	event arguments (pressure level threshold, optional mode)
  *
  * This function associates eventfd context with the vmpressure
  * infrastructure, so that the notifications will be delivered to the
- * @eventfd. The @args parameter is a string that denotes pressure level
- * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
- * "critical").
+ * @eventfd. The @args parameter is a comma-delimited string that denotes a
+ * pressure level threshold (one of vmpressure_str_levels, i.e. "low", "medium",
+ * or "critical") and an optional mode (one of vmpressure_str_modes, i.e.
+ * "hierarchy" or "local").
  *
  * To be used as memcg event method.
  */
@@ -345,28 +384,53 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
 {
 	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
 	struct vmpressure_event *ev;
-	int level;
+	enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH;
+	enum vmpressure_levels level = -1;
+	char *spec, *spec_orig;
+	char *token;
+	int ret = 0;
+
+	spec_orig = spec = kzalloc(MAX_VMPRESSURE_ARGS_LEN + 1, GFP_KERNEL);
+	if (!spec) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	strncpy(spec, args, MAX_VMPRESSURE_ARGS_LEN);
 
-	for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) {
-		if (!strcmp(vmpressure_str_levels[level], args))
-			break;
+	/* Find required level */
+	token = strsep(&spec, ",");
+	level = str_to_level(token);
+	if (level == -1) {
+		ret = -EINVAL;
+		goto out;
 	}
 
-	if (level >= VMPRESSURE_NUM_LEVELS)
-		return -EINVAL;
+	/* Find optional mode */
+	token = strsep(&spec, ",");
+	if (token) {
+		mode = str_to_mode(token);
+		if (mode == -1) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
 
 	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
-	if (!ev)
-		return -ENOMEM;
+	if (!ev) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	ev->efd = eventfd;
 	ev->level = level;
+	ev->mode = mode;
 
 	mutex_lock(&vmpr->events_lock);
 	list_add(&ev->node, &vmpr->events);
 	mutex_unlock(&vmpr->events_lock);
-
-	return 0;
+out:
+	kfree(spec_orig);
+	return ret;
 }
 
 /**
-- 
GitLab


From 1860033237d4be09c5d7382585f0c7229367a534 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:48:02 -0700
Subject: [PATCH 1035/1958] mm: make PR_SET_THP_DISABLE immediately active

PR_SET_THP_DISABLE has a rather subtle semantic.  It doesn't affect any
existing mapping because it only updated mm->def_flags which is a
template for new mappings.

The mappings created after prctl(PR_SET_THP_DISABLE) have VM_NOHUGEPAGE
flag set.  This can be quite surprising for all those applications which
do not do prctl(); fork() & exec() and want to control their own THP
behavior.

Another usecase when the immediate semantic of the prctl might be useful
is a combination of pre- and post-copy migration of containers with
CRIU.  In this case CRIU populates a part of a memory region with data
that was saved during the pre-copy stage.  Afterwards, the region is
registered with userfaultfd and CRIU expects to get page faults for the
parts of the region that were not yet populated.  However, khugepaged
collapses the pages and the expected page faults do not occur.

In more general case, the prctl(PR_SET_THP_DISABLE) could be used as a
temporary mechanism for enabling/disabling THP process wide.

Implementation wise, a new MMF_DISABLE_THP flag is added.  This flag is
tested when decision whether to use huge pages is taken either during
page fault of at the time of THP collapse.

It should be noted, that the new implementation makes PR_SET_THP_DISABLE
master override to any per-VMA setting, which was not the case
previously.

Fixes: a0715cc22601 ("mm, thp: add VM_INIT_DEF_MASK and PRCTL_THP_DISABLE")
Link: http://lkml.kernel.org/r/1496415802-30944-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h        | 1 +
 include/linux/khugepaged.h     | 3 ++-
 include/linux/sched/coredump.h | 5 ++++-
 kernel/sys.c                   | 6 +++---
 mm/khugepaged.c                | 3 ++-
 mm/shmem.c                     | 8 +++++---
 6 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index d3b3e8fcc717d..40d7b7dd2653f 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -92,6 +92,7 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 	   (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&			\
 	   ((__vma)->vm_flags & VM_HUGEPAGE))) &&			\
 	 !((__vma)->vm_flags & VM_NOHUGEPAGE) &&			\
+	 !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) &&		\
 	 !is_vma_temporary_stack(__vma))
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 5d9a400af5091..f0d7335336cd6 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -48,7 +48,8 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
 	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
 		if ((khugepaged_always() ||
 		     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
-		    !(vm_flags & VM_NOHUGEPAGE))
+		    !(vm_flags & VM_NOHUGEPAGE) &&
+		    !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 			if (__khugepaged_enter(vma->vm_mm))
 				return -ENOMEM;
 	return 0;
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 69eedcef8f03f..98ae0d05aa32e 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -68,7 +68,10 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
 #define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
 #define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
+#define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
 
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
+				 MMF_DISABLE_THP_MASK)
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 47d901586b4e0..73fc0af147d02 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2360,7 +2360,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_THP_DISABLE:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
-		error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+		error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
 		break;
 	case PR_SET_THP_DISABLE:
 		if (arg3 || arg4 || arg5)
@@ -2368,9 +2368,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		if (down_write_killable(&me->mm->mmap_sem))
 			return -EINTR;
 		if (arg2)
-			me->mm->def_flags |= VM_NOHUGEPAGE;
+			set_bit(MMF_DISABLE_THP, &me->mm->flags);
 		else
-			me->mm->def_flags &= ~VM_NOHUGEPAGE;
+			clear_bit(MMF_DISABLE_THP, &me->mm->flags);
 		up_write(&me->mm->mmap_sem);
 		break;
 	case PR_MPX_ENABLE_MANAGEMENT:
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index df4ebdb2b10a3..c01f177a1120a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -816,7 +816,8 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
 static bool hugepage_vma_check(struct vm_area_struct *vma)
 {
 	if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-	    (vma->vm_flags & VM_NOHUGEPAGE))
+	    (vma->vm_flags & VM_NOHUGEPAGE) ||
+	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		return false;
 	if (shmem_file(vma->vm_file)) {
 		if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
diff --git a/mm/shmem.c b/mm/shmem.c
index 9418f5a9bc468..b0aa6075d164d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1977,10 +1977,12 @@ static int shmem_fault(struct vm_fault *vmf)
 	}
 
 	sgp = SGP_CACHE;
-	if (vma->vm_flags & VM_HUGEPAGE)
-		sgp = SGP_HUGE;
-	else if (vma->vm_flags & VM_NOHUGEPAGE)
+
+	if ((vma->vm_flags & VM_NOHUGEPAGE) ||
+	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		sgp = SGP_NOHUGE;
+	else if (vma->vm_flags & VM_HUGEPAGE)
+		sgp = SGP_HUGE;
 
 	error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
 				  gfp, vma, vmf, &ret);
-- 
GitLab


From 34c8105792731118b69582562e1178417493e65a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Mon, 10 Jul 2017 15:48:05 -0700
Subject: [PATCH 1036/1958] mm/memcontrol: exclude @root from checks in
 mem_cgroup_low

Make @root exclusive in mem_cgroup_low; it is never considered low when
looked at directly and is not checked when traversing the tree.  In
effect, @root is handled identically to how root_mem_cgroup was
previously handled by mem_cgroup_low.

If @root is not excluded from the checks, a cgroup underneath @root will
never be considered low during targeted reclaim of @root, e.g.  due to
memory.current > memory.high, unless @root is misconfigured to have
memory.low > memory.high.

Excluding @root enables using memory.low to prioritize memory usage
between cgroups within a subtree of the hierarchy that is limited by
memory.high or memory.max, e.g.  when ROOT owns @root's controls but
delegates the @root directory to a USER so that USER can create and
administer children of @root.

For example, given cgroup A with children B and C:

    A
   / \
  B   C

and

  1. A/memory.current > A/memory.high
  2. A/B/memory.current < A/B/memory.low
  3. A/C/memory.current >= A/C/memory.low

As 'A' is high, i.e.  triggers reclaim from 'A', and 'B' is low, we
should reclaim from 'C' until 'A' is no longer high or until we can no
longer reclaim from 'C'.  If 'A', i.e.  @root, isn't excluded by
mem_cgroup_low when reclaming from 'A', then 'B' won't be considered low
and we will reclaim indiscriminately from both 'B' and 'C'.

Here is the test I used to confirm the bug and the patch.

20:00:55@sjchrist-vm ? ~ $ cat ~/.bin/memcg_low_test
#!/bin/bash

x62mb=$((62<<20))
x66mb=$((66<<20))
x94mb=$((94<<20))
x98mb=$((98<<20))

setup() {
    set -e

    if [[ -n $DEBUG ]]; then
        set -x
    fi

    trap teardown EXIT HUP INT TERM

    if [[ ! -e /mnt/1gb.swap ]]; then
        sudo fallocate -l 1G /mnt/1gb.swap > /dev/null
        sudo mkswap /mnt/1gb.swap > /dev/null
    fi
    if ! swapon --show=NAME | grep -q "/mnt/1gb.swap"; then
        sudo swapon /mnt/1gb.swap
    fi

    if [[ ! -e /cgroup/cgroup.controllers ]]; then
        sudo mount -t cgroup2 none /cgroup
    fi

    grep -q memory /cgroup/cgroup.controllers

    sudo sh -c "echo '+memory' > /cgroup/cgroup.subtree_control"

    sudo mkdir /cgroup/A && sudo chown $USER:$USER /cgroup/A
    sudo sh -c "echo '+memory' > /cgroup/A/cgroup.subtree_control"
    sudo sh -c "echo '96m' > /cgroup/A/memory.high"

    mkdir /cgroup/A/0
    mkdir /cgroup/A/1

    echo 64m > /cgroup/A/0/memory.low
}

teardown() {
    set +e

    trap - EXIT HUP INT TERM

    if [[ -z $1 ]]; then
        printf "\n"
        printf "%0.s*" {1..35}
        printf "\nFAILED!\n\n"
        tail /cgroup/A/**/memory.current
        printf "%0.s*" {1..35}
        printf "\n\n"
    fi

    ps | grep stress | tr -s ' ' | cut -f 2 -d ' ' | xargs -I % kill %

    sleep 2

    if [[ -e /cgroup/A/0 ]]; then
        rmdir /cgroup/A/0
    fi
    if [[ -e /cgroup/A/1 ]]; then
        rmdir /cgroup/A/1
    fi
    if [[ -e /cgroup/A ]]; then
        sudo rmdir /cgroup/A
    fi
}

stress_test() {
    sudo sh -c "echo $$ > /cgroup/A/$1/cgroup.procs"
    stress --vm 1 --vm-bytes 64M --vm-keep > /dev/null &

    sudo sh -c "echo $$ > /cgroup/A/$2/cgroup.procs"
    stress --vm 1 --vm-bytes 64M --vm-keep > /dev/null &

    sudo sh -c "echo $$ > /cgroup/cgroup.procs"

    sleep 1

    # A/0 should be consuming more memory than A/1
    [[ $(cat /cgroup/A/0/memory.current) -ge $(cat /cgroup/A/1/memory.current) ]]

    # A/0 should be consuming ~64mb
    [[ $(cat /cgroup/A/0/memory.current) -ge $x62mb ]] && [[ $(cat /cgroup/A/0/memory.current) -le $x66mb ]]

    # A should cumulatively be consuming ~96mb
    [[ $(cat /cgroup/A/memory.current) -ge $x94mb ]] && [[ $(cat /cgroup/A/memory.current) -le $x98mb ]]

    # Stop the stressors
    ps | grep stress | tr -s ' ' | cut -f 2 -d ' ' | xargs -I % kill %
}

teardown 1
setup

for ((i=1;i<=$1;i++)); do
    printf "ITERATION $i of $1 - stress_test 0 1"
    stress_test 0 1
    printf "\x1b[2K\r"

    printf "ITERATION $i of $1 - stress_test 1 0"
    stress_test 1 0
    printf "\x1b[2K\r"

    printf "ITERATION $i of $1 - PASSED\n"
done

teardown 1

echo PASSED!

20:11:26@sjchrist-vm ? ~ $ memcg_low_test 10

Link: http://lkml.kernel.org/r/1496434412-21005-1-git-send-email-sean.j.christopherson@intel.com
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 50 +++++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 425aa0caa712d..a6eef3603b724 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5317,38 +5317,52 @@ struct cgroup_subsys memory_cgrp_subsys = {
 
 /**
  * mem_cgroup_low - check if memory consumption is below the normal range
- * @root: the highest ancestor to consider
+ * @root: the top ancestor of the sub-tree being checked
  * @memcg: the memory cgroup to check
  *
  * Returns %true if memory consumption of @memcg, and that of all
- * configurable ancestors up to @root, is below the normal range.
+ * ancestors up to (but not including) @root, is below the normal range.
+ *
+ * @root is exclusive; it is never low when looked at directly and isn't
+ * checked when traversing the hierarchy.
+ *
+ * Excluding @root enables using memory.low to prioritize memory usage
+ * between cgroups within a subtree of the hierarchy that is limited by
+ * memory.high or memory.max.
+ *
+ * For example, given cgroup A with children B and C:
+ *
+ *    A
+ *   / \
+ *  B   C
+ *
+ * and
+ *
+ *  1. A/memory.current > A/memory.high
+ *  2. A/B/memory.current < A/B/memory.low
+ *  3. A/C/memory.current >= A/C/memory.low
+ *
+ * As 'A' is high, i.e. triggers reclaim from 'A', and 'B' is low, we
+ * should reclaim from 'C' until 'A' is no longer high or until we can
+ * no longer reclaim from 'C'.  If 'A', i.e. @root, isn't excluded by
+ * mem_cgroup_low when reclaming from 'A', then 'B' won't be considered
+ * low and we will reclaim indiscriminately from both 'B' and 'C'.
  */
 bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
 {
 	if (mem_cgroup_disabled())
 		return false;
 
-	/*
-	 * The toplevel group doesn't have a configurable range, so
-	 * it's never low when looked at directly, and it is not
-	 * considered an ancestor when assessing the hierarchy.
-	 */
-
-	if (memcg == root_mem_cgroup)
-		return false;
-
-	if (page_counter_read(&memcg->memory) >= memcg->low)
+	if (!root)
+		root = root_mem_cgroup;
+	if (memcg == root)
 		return false;
 
-	while (memcg != root) {
-		memcg = parent_mem_cgroup(memcg);
-
-		if (memcg == root_mem_cgroup)
-			break;
-
+	for (; memcg != root; memcg = parent_mem_cgroup(memcg)) {
 		if (page_counter_read(&memcg->memory) >= memcg->low)
 			return false;
 	}
+
 	return true;
 }
 
-- 
GitLab


From 78c72746f56b212ecf768a7e67cee3b7cf89238c Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Mon, 10 Jul 2017 15:48:09 -0700
Subject: [PATCH 1037/1958] vmalloc: show lazy-purged vma info in vmallocinfo

When ioremap a 67112960 bytes vm_area with the vmallocinfo:
 [..]
 0xec79b000-0xec7fa000  389120 ftl_add_mtd+0x4d0/0x754 pages=94 vmalloc
 0xec800000-0xecbe1000 4067328 kbox_proc_mem_write+0x104/0x1c4 phys=8b520000 ioremap

we get the result:
 0xf1000000-0xf5001000 67112960 devm_ioremap+0x38/0x7c phys=40000000 ioremap

For the align for ioremap must be less than '1 << IOREMAP_MAX_ORDER':

	if (flags & VM_IOREMAP)
		align = 1ul << clamp_t(int, get_count_order_long(size),
			PAGE_SHIFT, IOREMAP_MAX_ORDER);

So it makes idiot like me a litte puzzled why this was a jump the
vm_area from 0xec800000-0xecbe1000 to 0xf1000000-0xf5001000, and leaving
0xed000000-0xf1000000 as a big hole.

This patch is to show all of vm_area, including vmas which are freeing
but still in the vmap_area_list, to make it more clear about why we will
get 0xf1000000-0xf5001000 in the above case.  And we will get a
vmallocinfo like:

 [..]
 0xec79b000-0xec7fa000  389120 ftl_add_mtd+0x4d0/0x754 pages=94 vmalloc
 0xec800000-0xecbe1000 4067328 kbox_proc_mem_write+0x104/0x1c4 phys=8b520000 ioremap
 [..]
 0xece7c000-0xece7e000    8192 unpurged vm_area
 0xece7e000-0xece83000   20480 vm_map_ram
 0xf0099000-0xf00aa000   69632 vm_map_ram

after this patch.

Link: http://lkml.kernel.org/r/1496649682-20710-1-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: zijun_hu <zijun_hu@htc.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6211a807cb31e..6016ab079e2bd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -325,6 +325,7 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 
 /*** Global kva allocator ***/
 
+#define VM_LAZY_FREE	0x02
 #define VM_VM_AREA	0x04
 
 static DEFINE_SPINLOCK(vmap_area_lock);
@@ -1497,6 +1498,7 @@ struct vm_struct *remove_vm_area(const void *addr)
 		spin_lock(&vmap_area_lock);
 		va->vm = NULL;
 		va->flags &= ~VM_VM_AREA;
+		va->flags |= VM_LAZY_FREE;
 		spin_unlock(&vmap_area_lock);
 
 		vmap_debug_free_range(va->va_start, va->va_end);
@@ -2704,8 +2706,14 @@ static int s_show(struct seq_file *m, void *p)
 	 * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
 	 * behalf of vmap area is being tear down or vm_map_ram allocation.
 	 */
-	if (!(va->flags & VM_VM_AREA))
+	if (!(va->flags & VM_VM_AREA)) {
+		seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
+			(void *)va->va_start, (void *)va->va_end,
+			va->va_end - va->va_start,
+			va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
+
 		return 0;
+	}
 
 	v = va->vm;
 
-- 
GitLab


From e35ef6397b8273a0059a2b837c26f92b0ecf8596 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Mon, 10 Jul 2017 15:48:12 -0700
Subject: [PATCH 1038/1958] mm/cma.c: warn if the CMA area could not be
 activated

While activating a CMA area we check to make sure that all the PFNs in
the range are inside the same zone.  This is a requirement for
alloc_contig_range() to work.  Any CMA area failing the check is
disabled for good.  This happens silently right now making all future
cma_alloc() allocations failure inevitable.

Here we add an error message stating that the CMA area could not be
activated which makes it easier to explain any future cma_alloc()
failures on it.  While in there, change the bail out goto label from
'err' to 'not_in_zone' which makes more sense.

Link: http://lkml.kernel.org/r/20170605023729.26303-1-khandual@linux.vnet.ibm.com
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/cma.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/cma.c b/mm/cma.c
index 978b4a1441ef7..9e45491917895 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -127,7 +127,7 @@ static int __init cma_activate_area(struct cma *cma)
 			 * to be in the same zone.
 			 */
 			if (page_zone(pfn_to_page(pfn)) != zone)
-				goto err;
+				goto not_in_zone;
 		}
 		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
 	} while (--i);
@@ -141,7 +141,8 @@ static int __init cma_activate_area(struct cma *cma)
 
 	return 0;
 
-err:
+not_in_zone:
+	pr_err("CMA area %s could not be activated\n", cma->name);
 	kfree(cma->bitmap);
 	cma->count = 0;
 	return -EINVAL;
-- 
GitLab


From d715cf804a0318e83c75c0a7abd1a4b9ce13e8da Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Mon, 10 Jul 2017 15:48:15 -0700
Subject: [PATCH 1039/1958] mm/hugetlb.c: warn the user when issues arise on
 boot due to hugepages

When the user specifies too many hugepages or an invalid
default_hugepagesz the communication to the user is implicit in the
allocation message.  This patch adds a warning when the desired page
count is not allocated and prints an error when the default_hugepagesz
is invalid on boot.

During boot hugepages will allocate until there is a fraction of the
hugepage size left.  That is, we allocate until either the request is
satisfied or memory for the pages is exhausted.  When memory for the
pages is exhausted, it will most likely lead to the system failing with
the OOM manager not finding enough (or anything) to kill (unless you're
using really big hugepages in the order of 100s of MB or in the GBs).
The user will most likely see the OOM messages much later in the boot
sequence than the implicitly stated message.  Worse yet, you may even
get an OOM for each processor which causes many pages of OOMs on modern
systems.  Although these messages will be printed earlier than the OOM
messages, at least giving the user errors and warnings will highlight
the configuration as an issue.  I'm trying to point the user in the
right direction by providing a more robust statement of what is failing.

During the sysctl or echo command, the user can check the results much
easier than if the system hangs during boot and the scenario of having
nothing to OOM for kernel memory is highly unlikely.

Mike said:
 "Before sending out this patch, I asked Liam off list why he was doing
  it. Was it something he just thought would be useful? Or, was there
  some type of user situation/need. He said that he had been called in
  to assist on several occasions when a system OOMed during boot. In
  almost all of these situations, the user had grossly misconfigured
  huge pages.

  DB users want to pre-allocate just the right amount of huge pages, but
  sometimes they can be really off. In such situations, the huge page
  init code just allocates as many huge pages as it can and reports the
  number allocated. There is no indication that it quit allocating
  because it ran out of memory. Of course, a user could compare the
  number in the message to what they requested on the command line to
  determine if they got all the huge pages they requested. The thought
  was that it would be useful to at least flag this situation. That way,
  the user might be able to better relate the huge page allocation
  failure to the OOM.

  I'm not sure if the e-mail discussion made it obvious that this is
  something he has seen on several occasions.

  I see Michal's point that this will only flag the situation where
  someone configures huge pages very badly. And, a more extensive look
  at the situation of misconfiguring huge pages might be in order. But,
  this has happened on several occasions which led to the creation of
  this patch"

[akpm@linux-foundation.org: reposition memfmt() to avoid forward declaration]
Link: http://lkml.kernel.org/r/20170603005413.10380-1-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: zhongjiang <zhongjiang@huawei.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8254e8f6db6b6..761a669d0b62d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -70,6 +70,17 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
 /* Forward declaration */
 static int hugetlb_acct_memory(struct hstate *h, long delta);
 
+static char * __init memfmt(char *buf, unsigned long n)
+{
+	if (n >= (1UL << 30))
+		sprintf(buf, "%lu GB", n >> 30);
+	else if (n >= (1UL << 20))
+		sprintf(buf, "%lu MB", n >> 20);
+	else
+		sprintf(buf, "%lu KB", n >> 10);
+	return buf;
+}
+
 static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
 {
 	bool free = (spool->count == 0) && (spool->used_hpages == 0);
@@ -2212,7 +2223,14 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 					 &node_states[N_MEMORY]))
 			break;
 	}
-	h->max_huge_pages = i;
+	if (i < h->max_huge_pages) {
+		char buf[32];
+
+		memfmt(buf, huge_page_size(h)),
+		pr_warn("HugeTLB: allocating %lu of page size %s failed.  Only allocated %lu hugepages.\n",
+			h->max_huge_pages, buf, i);
+		h->max_huge_pages = i;
+	}
 }
 
 static void __init hugetlb_init_hstates(void)
@@ -2230,17 +2248,6 @@ static void __init hugetlb_init_hstates(void)
 	VM_BUG_ON(minimum_order == UINT_MAX);
 }
 
-static char * __init memfmt(char *buf, unsigned long n)
-{
-	if (n >= (1UL << 30))
-		sprintf(buf, "%lu GB", n >> 30);
-	else if (n >= (1UL << 20))
-		sprintf(buf, "%lu MB", n >> 20);
-	else
-		sprintf(buf, "%lu KB", n >> 10);
-	return buf;
-}
-
 static void __init report_hugepages(void)
 {
 	struct hstate *h;
@@ -2808,6 +2815,11 @@ static int __init hugetlb_init(void)
 		return 0;
 
 	if (!size_to_hstate(default_hstate_size)) {
+		if (default_hstate_size != 0) {
+			pr_err("HugeTLB: unsupported default_hugepagesz %lu. Reverting to %lu\n",
+			       default_hstate_size, HPAGE_SIZE);
+		}
+
 		default_hstate_size = HPAGE_SIZE;
 		if (!size_to_hstate(default_hstate_size))
 			hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
-- 
GitLab


From 7ab0e50ad0831e714dcdc3de44a7fe3887732b7c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 10 Jul 2017 15:48:18 -0700
Subject: [PATCH 1040/1958] oom, trace: remove ENUM evaluation of
 COMPACTION_FEEDBACK

After enabling CONFIG_TRACE_ENUM_MAP_FILE (which will soon be renamed to
CONFIG_TRACE_EVAL_MAP_FILE), I am able to examine the enums that have
been evaluated:

 # cat /sys/kernel/debug/tracing/enum_map

(which will soon be renamed to eval_map)

And it showed some interesting results:

  [..]
  ZONE_MOVABLE 3 (oom)
  ZONE_NORMAL 2 (oom)
  ZONE_DMA32 1 (oom)
  ZONE_DMA 0 (oom)
  3 3 (oom)
  2 2 (oom)
  1 1 (oom)
  COMPACT_PRIO_ASYNC 2 (oom)
  COMPACT_PRIO_SYNC_LIGHT 1 (oom)
  COMPACT_PRIO_SYNC_FULL 0 (oom)
  [..]
  ZONE_DMA 0 (vmscan)
  3 3 (vmscan)
  2 2 (vmscan)
  1 1 (vmscan)
  COMPACT_PRIO_ASYNC 2 (vmscan)
  [..]
  ZONE_DMA 0 (kmem)
  3 3 (kmem)
  2 2 (kmem)
  1 1 (kmem)
  COMPACT_PRIO_ASYNC 2 (kmem)
  [..]
  ZONE_DMA 0 (compaction)
  3 3 (compaction)
  2 2 (compaction)
  1 1 (compaction)
  COMPACT_PRIO_ASYNC 2 (compaction)
  [..]

The name within the parenthesis are the trace systems that the enum/eval
maps are associated with. When there's a number evaluated to another
number, that tells me that the TRACE_DEFINE_ENUM() was used on a #define
and not an enum. As #defines get converted normally, they are not needed
to be evaluated.

Each of the above trace systems with the number to number evaluation
included the file include/trace/events/mmflags.h which has:

 /* High-level compaction status feedback */
 #define COMPACTION_FAILED       1
 #define COMPACTION_WITHDRAWN    2
 #define COMPACTION_PROGRESS     3

[..]

 #define COMPACTION_FEEDBACK             \
        EM(COMPACTION_FAILED,           "failed")       \
        EM(COMPACTION_WITHDRAWN,        "withdrawn")    \
        EMe(COMPACTION_PROGRESS,        "progress")

Which is still needed for the __print_symbolic() usage in the
trace_event.  But it is not needed to be evaluated.

Removing the evaluation part removes the unnecessary evaluations of
numbers to numbers.

Link: http://lkml.kernel.org/r/20170615074944.7be9a647@gandalf.local.home
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/mmflags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 304ff94363b21..10e3663a75a67 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -257,7 +257,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 
 COMPACTION_STATUS
 COMPACTION_PRIORITY
-COMPACTION_FEEDBACK
+/* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
 ZONE_TYPE
 LRU_NAMES
 
-- 
GitLab


From 16981d763501c0e06e434cf6b59f964c520e0ccc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Jul 2017 15:48:22 -0700
Subject: [PATCH 1041/1958] mm: improve readability of
 transparent_hugepage_enabled()

Turn the macro into a static inline and rewrite the condition checks for
better readability in preparation for adding another condition.

[ross.zwisler@linux.intel.com: fix logic to make conversion equivalent]
[akpm@linux-foundation.org: resolve vs mm-make-pr_set_thp_disable-immediately-active.patch]
[akpm@linux-foundation.org: include coredump.h for MMF_DISABLE_THP]
Link: http://lkml.kernel.org/r/149739530612.20686.14760671150202647861.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 40d7b7dd2653f..f4239d3c9c730 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_HUGE_MM_H
 #define _LINUX_HUGE_MM_H
 
+#include <linux/sched/coredump.h>
+
 extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
@@ -85,15 +87,29 @@ extern struct kobj_attribute shmem_enabled_attr;
 
 extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 
-#define transparent_hugepage_enabled(__vma)				\
-	((transparent_hugepage_flags &					\
-	  (1<<TRANSPARENT_HUGEPAGE_FLAG) ||				\
-	  (transparent_hugepage_flags &					\
-	   (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&			\
-	   ((__vma)->vm_flags & VM_HUGEPAGE))) &&			\
-	 !((__vma)->vm_flags & VM_NOHUGEPAGE) &&			\
-	 !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) &&		\
-	 !is_vma_temporary_stack(__vma))
+extern unsigned long transparent_hugepage_flags;
+
+static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_NOHUGEPAGE)
+		return false;
+
+	if (is_vma_temporary_stack(vma))
+		return false;
+
+	if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+		return false;
+
+	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
+		return true;
+
+	if (transparent_hugepage_flags &
+				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+		return !!(vma->vm_flags & VM_HUGEPAGE);
+
+	return false;
+}
+
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
 	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
@@ -105,8 +121,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 #define transparent_hugepage_debug_cow() 0
 #endif /* CONFIG_DEBUG_VM */
 
-extern unsigned long transparent_hugepage_flags;
-
 extern unsigned long thp_get_unmapped_area(struct file *filp,
 		unsigned long addr, unsigned long len, unsigned long pgoff,
 		unsigned long flags);
@@ -225,7 +239,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
 
 #define hpage_nr_pages(x) 1
 
-#define transparent_hugepage_enabled(__vma) 0
+static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+	return false;
+}
 
 static inline void prep_transhuge_page(struct page *page) {}
 
-- 
GitLab


From baabda261424517110ea98c6651f632ebf2561e3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 10 Jul 2017 15:48:25 -0700
Subject: [PATCH 1042/1958] mm: always enable thp for dax mappings

The madvise policy for transparent huge pages is meant to avoid unwanted
allocations of transparent huge pages.  It allows a policy of disabling
the extra memory pressure and effort to arrange for a huge page when it
is not needed.

DAX by definition never incurs this overhead since it is statically
allocated.  The policy choice makes even less sense for device-dax which
tries to guarantee a given tlb-fault size.  Specifically, the following
setting:

	echo never > /sys/kernel/mm/transparent_hugepage/enabled

...violates that guarantee and silently disables all device-dax
instances with a 2M or 1G alignment.  So, let's avoid that non-obvious
side effect by force enabling thp for dax mappings in all cases.

It is worth noting that the reason this uses vma_is_dax(), and the
resulting header include changes, is that previous attempts to add a
VM_DAX flag were NAKd.

Link: http://lkml.kernel.org/r/149739531127.20686.15813586620597484283.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dax.h     | 5 -----
 include/linux/fs.h      | 6 ++++++
 include/linux/huge_mm.h | 5 +++++
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8f39db7439c3c..7948118757328 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -154,11 +154,6 @@ static inline unsigned int dax_radix_order(void *entry)
 #endif
 int dax_pfn_mkwrite(struct vm_fault *vmf);
 
-static inline bool vma_is_dax(struct vm_area_struct *vma)
-{
-	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
-}
-
 static inline bool dax_mapping(struct address_space *mapping)
 {
 	return mapping->host && IS_DAX(mapping->host);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cfa47125d524..78e1dbbe4cfd6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -18,6 +18,7 @@
 #include <linux/bug.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/mm_types.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
 #include <linux/fcntl.h>
@@ -3127,6 +3128,11 @@ static inline bool io_is_direct(struct file *filp)
 	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
+static inline bool vma_is_dax(struct vm_area_struct *vma)
+{
+	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
+}
+
 static inline int iocb_flags(struct file *file)
 {
 	int res = 0;
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f4239d3c9c730..ee696347f928f 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -3,6 +3,8 @@
 
 #include <linux/sched/coredump.h>
 
+#include <linux/fs.h> /* only for vma_is_dax() */
+
 extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
@@ -103,6 +105,9 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
 	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
 		return true;
 
+	if (vma_is_dax(vma))
+		return true;
+
 	if (transparent_hugepage_flags &
 				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
 		return !!(vma->vm_flags & VM_HUGEPAGE);
-- 
GitLab


From 108a7ac448caff8e35e8c3f92f65faad893e5aca Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Jul 2017 15:48:28 -0700
Subject: [PATCH 1043/1958] include/linux/page_ref.h: ensure page_ref_unfreeze
 is ordered against prior accesses

page_ref_freeze and page_ref_unfreeze are designed to be used as a pair,
wrapping a critical section where struct pages can be modified without
having to worry about consistency for a concurrent fast-GUP.

Whilst page_ref_freeze has full barrier semantics due to its use of
atomic_cmpxchg, page_ref_unfreeze is implemented using atomic_set, which
doesn't provide any barrier semantics and allows the operation to be
reordered with respect to page modifications in the critical section.

This patch ensures that page_ref_unfreeze is ordered after any critical
section updates, by invoking smp_mb() prior to the atomic_set.

Link: http://lkml.kernel.org/r/1497349722-6731-3-git-send-email-will.deacon@arm.com
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_ref.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 610e132719184..1fd71733aa68e 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -174,6 +174,7 @@ static inline void page_ref_unfreeze(struct page *page, int count)
 	VM_BUG_ON_PAGE(page_count(page) != 0, page);
 	VM_BUG_ON(count == 0);
 
+	smp_mb();
 	atomic_set(&page->_refcount, count);
 	if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
 		__page_ref_unfreeze(page, count);
-- 
GitLab


From f4e177d12686bf98b5a047b5187121a71ee0dd8c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Jul 2017 15:48:31 -0700
Subject: [PATCH 1044/1958] mm/migrate.c: stabilise page count when migrating
 transparent hugepages

When migrating a transparent hugepage, migrate_misplaced_transhuge_page
guards itself against a concurrent fastgup of the page by checking that
the page count is equal to 2 before and after installing the new pmd.

If the page count changes, then the pmd is reverted back to the original
entry, however there is a small window where the new (possibly writable)
pmd is installed and the underlying page could be written by userspace.
Restoring the old pmd could therefore result in loss of data.

This patch fixes the problem by freezing the page count whilst updating
the page tables, which protects against a concurrent fastgup without the
need to restore the old pmd in the failure case (since the page count
can no longer change under our feet).

Link: http://lkml.kernel.org/r/1497349722-6731-4-git-send-email-will.deacon@arm.com
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 8935cbe362ce7..6276715518735 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1916,7 +1916,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	int page_lru = page_is_file_cache(page);
 	unsigned long mmun_start = address & HPAGE_PMD_MASK;
 	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
-	pmd_t orig_entry;
 
 	/*
 	 * Rate-limit the amount of data that is being migrated to a node.
@@ -1959,8 +1958,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	/* Recheck the target PMD */
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	ptl = pmd_lock(mm, pmd);
-	if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
-fail_putback:
+	if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
 		spin_unlock(ptl);
 		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
@@ -1982,7 +1980,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 		goto out_unlock;
 	}
 
-	orig_entry = *pmd;
 	entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 
@@ -1999,15 +1996,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	set_pmd_at(mm, mmun_start, pmd, entry);
 	update_mmu_cache_pmd(vma, address, &entry);
 
-	if (page_count(page) != 2) {
-		set_pmd_at(mm, mmun_start, pmd, orig_entry);
-		flush_pmd_tlb_range(vma, mmun_start, mmun_end);
-		mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
-		update_mmu_cache_pmd(vma, address, &entry);
-		page_remove_rmap(new_page, true);
-		goto fail_putback;
-	}
-
+	page_ref_unfreeze(page, 2);
 	mlock_migrate_page(new_page, page);
 	page_remove_rmap(page, true);
 	set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
-- 
GitLab


From ed8a555323a6fddadfd9e259fc4e9c41c191ca6c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2017 15:48:34 -0700
Subject: [PATCH 1045/1958] zram: use __sysfs_match_string() helper

Use __sysfs_match_string() helper instead of open coded variant.

Link: http://lkml.kernel.org/r/20170609120835.22156-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zcomp.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 12046f4f00e4c..5b8992beffec8 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -68,13 +68,11 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
 
 bool zcomp_available_algorithm(const char *comp)
 {
-	int i = 0;
+	int i;
 
-	while (backends[i]) {
-		if (sysfs_streq(comp, backends[i]))
-			return true;
-		i++;
-	}
+	i = __sysfs_match_string(backends, -1, comp);
+	if (i >= 0)
+		return true;
 
 	/*
 	 * Crypto does not ignore a trailing new line symbol,
-- 
GitLab


From 9f123ab544df1c92acd6a029067e8bde44780740 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@kernel.org>
Date: Mon, 10 Jul 2017 15:48:37 -0700
Subject: [PATCH 1046/1958] mm, memory_hotplug: support movable_node for
 hotpluggable nodes

movable_node kernel parameter allows making hotpluggable NUMA nodes to
put all the hotplugable memory into movable zone which allows more or
less reliable memory hotremove.  At least this is the case for the NUMA
nodes present during the boot (see find_zone_movable_pfns_for_nodes).

This is not the case for the memory hotplug, though.

	echo online > /sys/devices/system/memory/memoryXYZ/state

will default to a kernel zone (usually ZONE_NORMAL) unless the
particular memblock is already in the movable zone range which is not
the case normally when onlining the memory from the udev rule context
for a freshly hotadded NUMA node.  The only option currently is to have
a special udev rule to echo online_movable to all memblocks belonging to
such a node which is rather clumsy.  Not to mention this is inconsistent
as well because what ended up in the movable zone during the boot will
end up in a kernel zone after hotremove & hotadd without special care.

It would be nice to reuse memblock_is_hotpluggable but the runtime
hotplug doesn't have that information available because the boot and
hotplug paths are not shared and it would be really non trivial to make
them use the same code path because the runtime hotplug doesn't play
with the memblock allocator at all.

Teach move_pfn_range that MMOP_ONLINE_KEEP can use the movable zone if
movable_node is enabled and the range doesn't overlap with the existing
normal zone.  This should provide a reasonable default onlining
strategy.

Strictly speaking the semantic is not identical with the boot time
initialization because find_zone_movable_pfns_for_nodes covers only the
hotplugable range as described by the BIOS/FW.  From my experience this
is usually a full node though (except for Node0 which is special and
never goes away completely).  If this turns out to be a problem in the
real life we can tweak the code to store hotplug flag into memblocks but
let's keep this simple now.

Link: http://lkml.kernel.org/r/20170612111227.GI7476@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Cc: <qiuxishi@huawei.com>
Cc: Kani Toshimitsu <toshi.kani@hpe.com>
Cc: <slaoub@gmail.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/memory-hotplug.txt | 12 +++++++++---
 mm/memory_hotplug.c              | 19 ++++++++++++++++---
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 670f3ded0802d..5c628e19d6cd4 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -282,20 +282,26 @@ offlined it is possible to change the individual block's state by writing to the
 % echo online > /sys/devices/system/memory/memoryXXX/state
 
 This onlining will not change the ZONE type of the target memory block,
-If the memory block is in ZONE_NORMAL, you can change it to ZONE_MOVABLE:
+If the memory block doesn't belong to any zone an appropriate kernel zone
+(usually ZONE_NORMAL) will be used unless movable_node kernel command line
+option is specified when ZONE_MOVABLE will be used.
+
+You can explicitly request to associate it with ZONE_MOVABLE by
 
 % echo online_movable > /sys/devices/system/memory/memoryXXX/state
 (NOTE: current limit: this memory block must be adjacent to ZONE_MOVABLE)
 
-And if the memory block is in ZONE_MOVABLE, you can change it to ZONE_NORMAL:
+Or you can explicitly request a kernel zone (usually ZONE_NORMAL) by:
 
 % echo online_kernel > /sys/devices/system/memory/memoryXXX/state
 (NOTE: current limit: this memory block must be adjacent to ZONE_NORMAL)
 
+An explicit zone onlining can fail (e.g. when the range is already within
+and existing and incompatible zone already).
+
 After this, memory block XXX's state will be 'online' and the amount of
 available memory will be increased.
 
-Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
 This may be changed in future.
 
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7b1311ac5f7ba..32abde2e2472d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -934,6 +934,19 @@ struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
 	return &pgdat->node_zones[ZONE_NORMAL];
 }
 
+static inline bool movable_pfn_range(int nid, struct zone *default_zone,
+		unsigned long start_pfn, unsigned long nr_pages)
+{
+	if (!allow_online_pfn_range(nid, start_pfn, nr_pages,
+				MMOP_ONLINE_KERNEL))
+		return true;
+
+	if (!movable_node_is_enabled())
+		return false;
+
+	return !zone_intersects(default_zone, start_pfn, nr_pages);
+}
+
 /*
  * Associates the given pfn range with the given node and the zone appropriate
  * for the given online type.
@@ -949,10 +962,10 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
 		/*
 		 * MMOP_ONLINE_KEEP defaults to MMOP_ONLINE_KERNEL but use
 		 * movable zone if that is not possible (e.g. we are within
-		 * or past the existing movable zone)
+		 * or past the existing movable zone). movable_node overrides
+		 * this default and defaults to movable zone
 		 */
-		if (!allow_online_pfn_range(nid, start_pfn, nr_pages,
-					MMOP_ONLINE_KERNEL))
+		if (movable_pfn_range(nid, zone, start_pfn, nr_pages))
 			zone = movable_zone;
 	} else if (online_type == MMOP_ONLINE_MOVABLE) {
 		zone = &pgdat->node_zones[ZONE_MOVABLE];
-- 
GitLab


From 7f252f277b66854c61d3abdd4c196d6dc64fa333 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:48:41 -0700
Subject: [PATCH 1047/1958] mm, memory_hotplug: simplify empty node mask
 handling in new_node_page

new_node_page tries to allocate the target page on a different NUMA node
than the source page.  This makes sense in most cases during the hotplug
because we are likely to offline the whole numa node.  But there are
cases where there are no other nodes to fallback (e.g.  when offlining
parts of the only existing node) and we have to fallback to allocating
from the source node.  The current code does that but it can be
simplified by checking the nmask and updating it before we even try to
allocate rather than special casing it.

This patch shouldn't introduce any functional change.

Link: http://lkml.kernel.org/r/20170608074553.22152-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: zhong jiang <zhongjiang@huawei.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 32abde2e2472d..f42a8ef93ec47 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1436,7 +1436,15 @@ static struct page *new_node_page(struct page *page, unsigned long private,
 	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
 	int nid = page_to_nid(page);
 	nodemask_t nmask = node_states[N_MEMORY];
-	struct page *new_page = NULL;
+
+	/*
+	 * try to allocate from a different node but reuse this node if there
+	 * are no other online nodes to be used (e.g. we are offlining a part
+	 * of the only existing node)
+	 */
+	node_clear(nid, nmask);
+	if (nodes_empty(nmask))
+		node_set(nid, nmask);
 
 	/*
 	 * TODO: allocate a destination hugepage from a nearest neighbor node,
@@ -1447,18 +1455,11 @@ static struct page *new_node_page(struct page *page, unsigned long private,
 		return alloc_huge_page_node(page_hstate(compound_head(page)),
 					next_node_in(nid, nmask));
 
-	node_clear(nid, nmask);
-
 	if (PageHighMem(page)
 	    || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
 		gfp_mask |= __GFP_HIGHMEM;
 
-	if (!nodes_empty(nmask))
-		new_page = __alloc_pages_nodemask(gfp_mask, 0, nid, &nmask);
-	if (!new_page)
-		new_page = __alloc_pages(gfp_mask, 0, nid);
-
-	return new_page;
+	return __alloc_pages_nodemask(gfp_mask, 0, nid, &nmask);
 }
 
 #define NR_OFFLINE_AT_ONCE_PAGES	(256)
-- 
GitLab


From 4db9b2efe94967be34e3b136a93251a3c1736dd5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:48:44 -0700
Subject: [PATCH 1048/1958] hugetlb, memory_hotplug: prefer to use reserved
 pages for migration

new_node_page will try to use the origin's next NUMA node as the
migration destination for hugetlb pages.  If such a node doesn't have
any preallocated pool it falls back to __alloc_buddy_huge_page_no_mpol
to allocate a surplus page instead.  This is quite subotpimal for any
configuration when hugetlb pages are no distributed to all NUMA nodes
evenly.  Say we have a hotplugable node 4 and spare hugetlb pages are
node 0

  /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages:10000
  /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node3/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node4/hugepages/hugepages-2048kB/nr_hugepages:10000
  /sys/devices/system/node/node5/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node6/hugepages/hugepages-2048kB/nr_hugepages:0
  /sys/devices/system/node/node7/hugepages/hugepages-2048kB/nr_hugepages:0

Now we consume the whole pool on node 4 and try to offline this node.
All the allocated pages should be moved to node0 which has enough
preallocated pages to hold them.  With the current implementation
offlining very likely fails because hugetlb allocations during runtime
are much less reliable.

Fix this by reusing the nodemask which excludes migration source and try
to find a first node which has a page in the preallocated pool first and
fall back to __alloc_buddy_huge_page_no_mpol only when the whole pool is
consumed.

[akpm@linux-foundation.org: remove bogus arg from alloc_huge_page_nodemask() stub]
Link: http://lkml.kernel.org/r/20170608074553.22152-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: zhong jiang <zhongjiang@huawei.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  2 ++
 mm/hugetlb.c            | 27 +++++++++++++++++++++++++++
 mm/memory_hotplug.c     |  9 ++-------
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 57f700ac127ed..8fd0725d3f304 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -349,6 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
+struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
 
@@ -524,6 +525,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
 #define alloc_huge_page_node(h, nid) NULL
+#define alloc_huge_page_nodemask(h, nmask) NULL
 #define alloc_huge_page_noerr(v, a, r) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 761a669d0b62d..01c11ceb47d6c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1723,6 +1723,33 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
 	return page;
 }
 
+struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
+{
+	struct page *page = NULL;
+	int node;
+
+	spin_lock(&hugetlb_lock);
+	if (h->free_huge_pages - h->resv_huge_pages > 0) {
+		for_each_node_mask(node, *nmask) {
+			page = dequeue_huge_page_node_exact(h, node);
+			if (page)
+				break;
+		}
+	}
+	spin_unlock(&hugetlb_lock);
+	if (page)
+		return page;
+
+	/* No reservations, try to overcommit */
+	for_each_node_mask(node, *nmask) {
+		page = __alloc_buddy_huge_page_no_mpol(h, node);
+		if (page)
+			return page;
+	}
+
+	return NULL;
+}
+
 /*
  * Increase the hugetlb pool such that it can accommodate a reservation
  * of size 'delta'.
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f42a8ef93ec47..1cf3404bd0652 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1446,14 +1446,9 @@ static struct page *new_node_page(struct page *page, unsigned long private,
 	if (nodes_empty(nmask))
 		node_set(nid, nmask);
 
-	/*
-	 * TODO: allocate a destination hugepage from a nearest neighbor node,
-	 * accordance with memory policy of the user process if possible. For
-	 * now as a simple work-around, we use the next node for destination.
-	 */
 	if (PageHuge(page))
-		return alloc_huge_page_node(page_hstate(compound_head(page)),
-					next_node_in(nid, nmask));
+		return alloc_huge_page_nodemask(
+				page_hstate(compound_head(page)), &nmask);
 
 	if (PageHighMem(page)
 	    || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
-- 
GitLab


From 8b9132388964df2cfe151a88fd1dd8219dabf23c Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:48:47 -0700
Subject: [PATCH 1049/1958] mm: unify new_node_page and alloc_migrate_target

Commit 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest
neighbor node when mem-offline") has duplicated a large part of
alloc_migrate_target with some hotplug specific special casing.

To be more precise it tried to enfore the allocation from a different
node than the original page.  As a result the two function diverged in
their shared logic, e.g.  the hugetlb allocation strategy.

Let's unify the two and express different NUMA requirements by the given
nodemask.  new_node_page will simply exclude the node it doesn't care
about and alloc_migrate_target will use all the available nodes.
alloc_migrate_target will then learn to migrate hugetlb pages more
sanely and use preallocated pool when possible.

Please note that alloc_migrate_target used to call alloc_page resp.
alloc_pages_current so the memory policy of the current context which is
quite strange when we consider that it is used in the context of
alloc_contig_range which just tries to migrate pages which stand in the
way.

Link: http://lkml.kernel.org/r/20170608074553.22152-4-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: zhong jiang <zhongjiang@huawei.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 16 ++++++++++++++++
 mm/memory_hotplug.c     | 11 +----------
 mm/page_isolation.c     | 18 ++----------------
 3 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 48e24844b3c50..d9675b665cc4e 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -4,6 +4,7 @@
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
 #include <linux/migrate_mode.h>
+#include <linux/hugetlb.h>
 
 typedef struct page *new_page_t(struct page *page, unsigned long private,
 				int **reason);
@@ -30,6 +31,21 @@ enum migrate_reason {
 /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
 extern char *migrate_reason_names[MR_TYPES];
 
+static inline struct page *new_page_nodemask(struct page *page,
+				int preferred_nid, nodemask_t *nodemask)
+{
+	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
+
+	if (PageHuge(page))
+		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
+				nodemask);
+
+	if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
+		gfp_mask |= __GFP_HIGHMEM;
+
+	return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask);
+}
+
 #ifdef CONFIG_MIGRATION
 
 extern void putback_movable_pages(struct list_head *l);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1cf3404bd0652..203c46306a746 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1433,7 +1433,6 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 static struct page *new_node_page(struct page *page, unsigned long private,
 		int **result)
 {
-	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
 	int nid = page_to_nid(page);
 	nodemask_t nmask = node_states[N_MEMORY];
 
@@ -1446,15 +1445,7 @@ static struct page *new_node_page(struct page *page, unsigned long private,
 	if (nodes_empty(nmask))
 		node_set(nid, nmask);
 
-	if (PageHuge(page))
-		return alloc_huge_page_nodemask(
-				page_hstate(compound_head(page)), &nmask);
-
-	if (PageHighMem(page)
-	    || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
-		gfp_mask |= __GFP_HIGHMEM;
-
-	return __alloc_pages_nodemask(gfp_mask, 0, nid, &nmask);
+	return new_page_nodemask(page, nid, &nmask);
 }
 
 #define NR_OFFLINE_AT_ONCE_PAGES	(256)
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 3606104893e0b..757410d9f758a 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -8,6 +8,7 @@
 #include <linux/memory.h>
 #include <linux/hugetlb.h>
 #include <linux/page_owner.h>
+#include <linux/migrate.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -294,20 +295,5 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 struct page *alloc_migrate_target(struct page *page, unsigned long private,
 				  int **resultp)
 {
-	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
-
-	/*
-	 * TODO: allocate a destination hugepage from a nearest neighbor node,
-	 * accordance with memory policy of the user process if possible. For
-	 * now as a simple work-around, we use the next node for destination.
-	 */
-	if (PageHuge(page))
-		return alloc_huge_page_node(page_hstate(compound_head(page)),
-					    next_node_in(page_to_nid(page),
-							 node_online_map));
-
-	if (PageHighMem(page))
-		gfp_mask |= __GFP_HIGHMEM;
-
-	return alloc_page(gfp_mask);
+	return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
 }
-- 
GitLab


From 69ed779a1454d9a57ba3738135eec9be550e0613 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 10 Jul 2017 15:48:50 -0700
Subject: [PATCH 1050/1958] mm, hugetlb: schedule when potentially allocating
 many hugepages

A few hugetlb allocators loop while calling the page allocator and can
potentially prevent rescheduling if the page allocator slowpath is not
utilized.

Conditionally schedule when large numbers of hugepages can be allocated.

Anshuman:
 "Fixes a task which was getting hung while writing like 10000 hugepages
  (16MB on POWER8) into /proc/sys/vm/nr_hugepages."

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1706091535300.66176@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Tested-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 01c11ceb47d6c..176e0318960f9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1781,6 +1781,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
 			break;
 		}
 		list_add(&page->lru, &surplus_list);
+		cond_resched();
 	}
 	allocated += i;
 
@@ -2249,6 +2250,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 		} else if (!alloc_fresh_huge_page(h,
 					 &node_states[N_MEMORY]))
 			break;
+		cond_resched();
 	}
 	if (i < h->max_huge_pages) {
 		char buf[32];
-- 
GitLab


From 6a1a8b80728c3ae327a82a6cd772e0d554eebf2e Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:48:53 -0700
Subject: [PATCH 1051/1958] mm, memcg: fix potential undefined behavior in
 mem_cgroup_event_ratelimit()

Alice has reported the following UBSAN splat:

  UBSAN: Undefined behaviour in mm/memcontrol.c:661:17
  signed integer overflow:
  -2147483644 - 2147483525 cannot be represented in type 'long int'
  CPU: 1 PID: 11758 Comm: mybibtex2filena Tainted: P           O 4.9.25-gentoo #4
  Hardware name: XXXXXX, BIOS YYYYYY
  Call Trace:
    dump_stack+0x59/0x87
    ubsan_epilogue+0xe/0x40
    handle_overflow+0xbb/0xf0
    __ubsan_handle_sub_overflow+0x12/0x20
    memcg_check_events.isra.36+0x223/0x360
    mem_cgroup_commit_charge+0x55/0x140
    wp_page_copy+0x34e/0xb80
    do_wp_page+0x1e6/0x1300
    handle_mm_fault+0x88b/0x1990
    __do_page_fault+0x2de/0x8a0
    do_page_fault+0x1a/0x20
    error_code+0x67/0x6c

The reason is that we subtract two signed types.  Let's fix this by
truly mimicing time_after and cast the result of the subtraction.

Link: http://lkml.kernel.org/r/20170616150057.GQ30580@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Alice Ferrazzi <alicef@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a6eef3603b724..3df3c04d73ab0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -631,7 +631,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 	val = __this_cpu_read(memcg->stat->nr_page_events);
 	next = __this_cpu_read(memcg->stat->targets[target]);
 	/* from time_after() in jiffies.h */
-	if ((long)next - (long)val < 0) {
+	if ((long)(next - val) < 0) {
 		switch (target) {
 		case MEM_CGROUP_TARGET_THRESH:
 			next = val + THRESHOLDS_EVENTS_TARGET;
-- 
GitLab


From c6247f72d46457408d98969c4dfb78adc95053e4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 10 Jul 2017 15:48:56 -0700
Subject: [PATCH 1052/1958] mm/hugetlb.c: replace memfmt with string_get_size

The hugetlb code has its own function to report human-readable sizes.
Convert it to use the shared string_get_size() function.  This will lead
to a minor difference in user visible output (MiB/GiB instead of MB/GB),
but some would argue that's desirable anyway.

Link: http://lkml.kernel.org/r/20170606190350.GA20010@bombadil.infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: zhong jiang <zhongjiang@huawei.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 176e0318960f9..9077865818121 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -20,6 +20,7 @@
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
 #include <linux/rmap.h>
+#include <linux/string_helpers.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/jhash.h>
@@ -70,17 +71,6 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
 /* Forward declaration */
 static int hugetlb_acct_memory(struct hstate *h, long delta);
 
-static char * __init memfmt(char *buf, unsigned long n)
-{
-	if (n >= (1UL << 30))
-		sprintf(buf, "%lu GB", n >> 30);
-	else if (n >= (1UL << 20))
-		sprintf(buf, "%lu MB", n >> 20);
-	else
-		sprintf(buf, "%lu KB", n >> 10);
-	return buf;
-}
-
 static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
 {
 	bool free = (spool->count == 0) && (spool->used_hpages == 0);
@@ -2255,7 +2245,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 	if (i < h->max_huge_pages) {
 		char buf[32];
 
-		memfmt(buf, huge_page_size(h)),
+		string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
 		pr_warn("HugeTLB: allocating %lu of page size %s failed.  Only allocated %lu hugepages.\n",
 			h->max_huge_pages, buf, i);
 		h->max_huge_pages = i;
@@ -2283,9 +2273,10 @@ static void __init report_hugepages(void)
 
 	for_each_hstate(h) {
 		char buf[32];
+
+		string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
 		pr_info("HugeTLB registered %s page size, pre-allocated %ld pages\n",
-			memfmt(buf, huge_page_size(h)),
-			h->free_huge_pages);
+			buf, h->free_huge_pages);
 	}
 }
 
-- 
GitLab


From 76b6f9b7edccdfcc7ad1588f24ba01e19db725f3 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 10 Jul 2017 15:48:59 -0700
Subject: [PATCH 1053/1958] mm/truncate.c: fix THP handling in
 invalidate_mapping_pages()

The condition checking for THP straddling end of invalidated range is
wrong - it checks 'index' against 'end' but 'index' has been already
advanced to point to the end of THP and thus the condition can never be
true.  As a result THP straddling 'end' has been fully invalidated.
Given the nature of invalidate_mapping_pages(), this could be only
performance issue.  In fact, we are lucky the condition is wrong because
if it was ever true, we'd leave locked page behind.

Fix the condition checking for THP straddling 'end' and also properly
unlock the page.  Also update the comment before the condition to
explain why we decide not to invalidate the page as it was not clear to
me and I had to ask Kirill.

Link: http://lkml.kernel.org/r/20170619124723.21656-1-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/truncate.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 6479ed2afc53f..2330223841fbb 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -530,9 +530,15 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			} else if (PageTransHuge(page)) {
 				index += HPAGE_PMD_NR - 1;
 				i += HPAGE_PMD_NR - 1;
-				/* 'end' is in the middle of THP */
-				if (index ==  round_down(end, HPAGE_PMD_NR))
+				/*
+				 * 'end' is in the middle of THP. Don't
+				 * invalidate the page as the part outside of
+				 * 'end' could be still useful.
+				 */
+				if (index > end) {
+					unlock_page(page);
 					continue;
+				}
 			}
 
 			ret = invalidate_inode_page(page);
-- 
GitLab


From 230ca982ba69ae63294017a3800800ad79d5f003 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Mon, 10 Jul 2017 15:49:02 -0700
Subject: [PATCH 1054/1958] userfaultfd: non-cooperative: add madvise() event
 for MADV_FREE request

MADV_FREE is identical to MADV_DONTNEED from the point of view of uffd
monitor.  The monitor has to stop handling #PF events in the range being
freed.  We are reusing userfaultfd_remove callback along with the logic
required to re-get and re-validate the VMA which may change or disappear
because userfaultfd_remove releases mmap_sem.

Link: http://lkml.kernel.org/r/1497876311-18615-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/madvise.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 8eda1841c576a..9976852f1e1cb 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -451,9 +451,6 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 	struct mmu_gather tlb;
 
-	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
-		return -EINVAL;
-
 	/* MADV_FREE works for only anon vma at the moment */
 	if (!vma_is_anonymous(vma))
 		return -EINVAL;
@@ -477,14 +474,6 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
 	return 0;
 }
 
-static long madvise_free(struct vm_area_struct *vma,
-			     struct vm_area_struct **prev,
-			     unsigned long start, unsigned long end)
-{
-	*prev = vma;
-	return madvise_free_single_vma(vma, start, end);
-}
-
 /*
  * Application no longer needs these pages.  If the pages are dirty,
  * it's OK to just throw them away.  The app will be more careful about
@@ -504,9 +493,17 @@ static long madvise_free(struct vm_area_struct *vma,
  * An interface that causes the system to free clean pages and flush
  * dirty pages is already available as msync(MS_INVALIDATE).
  */
-static long madvise_dontneed(struct vm_area_struct *vma,
-			     struct vm_area_struct **prev,
-			     unsigned long start, unsigned long end)
+static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
+					unsigned long start, unsigned long end)
+{
+	zap_page_range(vma, start, end - start);
+	return 0;
+}
+
+static long madvise_dontneed_free(struct vm_area_struct *vma,
+				  struct vm_area_struct **prev,
+				  unsigned long start, unsigned long end,
+				  int behavior)
 {
 	*prev = vma;
 	if (!can_madv_dontneed_vma(vma))
@@ -526,7 +523,8 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 			 * is also < vma->vm_end. If start <
 			 * vma->vm_start it means an hole materialized
 			 * in the user address space within the
-			 * virtual range passed to MADV_DONTNEED.
+			 * virtual range passed to MADV_DONTNEED
+			 * or MADV_FREE.
 			 */
 			return -ENOMEM;
 		}
@@ -537,7 +535,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 			 * Don't fail if end > vma->vm_end. If the old
 			 * vma was splitted while the mmap_sem was
 			 * released the effect of the concurrent
-			 * operation may not cause MADV_DONTNEED to
+			 * operation may not cause madvise() to
 			 * have an undefined result. There may be an
 			 * adjacent next vma that we'll walk
 			 * next. userfaultfd_remove() will generate an
@@ -549,8 +547,13 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 		}
 		VM_WARN_ON(start >= end);
 	}
-	zap_page_range(vma, start, end - start);
-	return 0;
+
+	if (behavior == MADV_DONTNEED)
+		return madvise_dontneed_single_vma(vma, start, end);
+	else if (behavior == MADV_FREE)
+		return madvise_free_single_vma(vma, start, end);
+	else
+		return -EINVAL;
 }
 
 /*
@@ -656,9 +659,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	case MADV_WILLNEED:
 		return madvise_willneed(vma, prev, start, end);
 	case MADV_FREE:
-		return madvise_free(vma, prev, start, end);
 	case MADV_DONTNEED:
-		return madvise_dontneed(vma, prev, start, end);
+		return madvise_dontneed_free(vma, prev, start, end, behavior);
 	default:
 		return madvise_behavior(vma, prev, start, end, behavior);
 	}
-- 
GitLab


From 422580c3cea7faaca67f6199375b79565d3d8ebd Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 10 Jul 2017 15:49:05 -0700
Subject: [PATCH 1055/1958] mm/oom_kill.c: add tracepoints for oom
 reaper-related events

During the debugging of the problem described in
https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa in
https://lkml.org/lkml/2017/5/19/383 , I've found that the existing debug
output is not really useful to understand issues related to the oom
reaper.

So, I assume, that adding some tracepoints might help with debugging of
similar issues.

Trace the following events:
 1) a process is marked as an oom victim,
 2) a process is added to the oom reaper list,
 3) the oom reaper starts reaping process's mm,
 4) the oom reaper finished reaping,
 5) the oom reaper skips reaping.

How it works in practice? Below is an example which show how the problem
mentioned above can be found: one process is added twice to the
oom_reaper list:

  $ cd /sys/kernel/debug/tracing
  $ echo "oom:mark_victim" > set_event
  $ echo "oom:wake_reaper" >> set_event
  $ echo "oom:skip_task_reaping" >> set_event
  $ echo "oom:start_task_reaping" >> set_event
  $ echo "oom:finish_task_reaping" >> set_event
  $ cat trace_pipe
          allocate-502   [001] ....    91.836405: mark_victim: pid=502
          allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
          allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
        oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
        oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
        oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502

Link: http://lkml.kernel.org/r/20170530185231.GA13412@castle
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++
 mm/oom_kill.c              |  7 ++++
 2 files changed, 87 insertions(+)

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 38baeb27221a9..c3c19d47ae5e3 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
 			__entry->wmark_check)
 );
 
+TRACE_EVENT(mark_victim,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(wake_reaper,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(start_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(finish_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(skip_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 0e2c925e7826f..9e8b4f030c1c4 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		ret = false;
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
@@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	if (!mmget_not_zero(mm)) {
 		up_read(&mm->mmap_sem);
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
+	trace_start_task_reaping(tsk->pid);
+
 	/*
 	 * Tell all users of get_user/copy_from_user etc... that the content
 	 * is no longer stable. No barriers really needed because unmapping
@@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 * put the oom_reaper out of the way.
 	 */
 	mmput_async(mm);
+	trace_finish_task_reaping(tsk->pid);
 unlock_oom:
 	mutex_unlock(&oom_lock);
 	return ret;
@@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
 	tsk->oom_reaper_list = oom_reaper_list;
 	oom_reaper_list = tsk;
 	spin_unlock(&oom_reaper_lock);
+	trace_wake_reaper(tsk->pid);
 	wake_up(&oom_reaper_wait);
 }
 
@@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
 	 */
 	__thaw_task(tsk);
 	atomic_inc(&oom_victims);
+	trace_mark_victim(tsk->pid);
 }
 
 /**
-- 
GitLab


From aaf14e40a33a2c9350471387031ca40c00f5a006 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:49:08 -0700
Subject: [PATCH 1056/1958] mm, hugetlb: unclutter hugetlb allocation layers

Patch series "mm, hugetlb: allow proper node fallback dequeue".

While working on a hugetlb migration issue addressed in a separate
patchset[1] I have noticed that the hugetlb allocations from the
preallocated pool are quite subotimal.

 [1] //lkml.kernel.org/r/20170608074553.22152-1-mhocko@kernel.org

There is no fallback mechanism implemented and no notion of preferred
node.  I have tried to work around it but Vlastimil was right to push
back for a more robust solution.  It seems that such a solution is to
reuse zonelist approach we use for the page alloctor.

This series has 3 patches.  The first one tries to make hugetlb
allocation layers more clear.  The second one implements the zonelist
hugetlb pool allocation and introduces a preferred node semantic which
is used by the migration callbacks.  The last patch is a clean up.

This patch (of 3):

Hugetlb allocation path for fresh huge pages is unnecessarily complex
and it mixes different interfaces between layers.

__alloc_buddy_huge_page is the central place to perform a new
allocation.  It checks for the hugetlb overcommit and then relies on
__hugetlb_alloc_buddy_huge_page to invoke the page allocator.  This is
all good except that __alloc_buddy_huge_page pushes vma and address down
the callchain and so __hugetlb_alloc_buddy_huge_page has to deal with
two different allocation modes - one for memory policy and other node
specific (or to make it more obscure node non-specific) requests.

This just screams for a reorganization.

This patch pulls out all the vma specific handling up to
__alloc_buddy_huge_page_with_mpol where it belongs.
__alloc_buddy_huge_page will get nodemask argument and
__hugetlb_alloc_buddy_huge_page will become a trivial wrapper over the
page allocator.

In short:
__alloc_buddy_huge_page_with_mpol - memory policy handling
  __alloc_buddy_huge_page - overcommit handling and accounting
    __hugetlb_alloc_buddy_huge_page - page allocator layer

Also note that __hugetlb_alloc_buddy_huge_page and its cpuset retry loop
is not really needed because the page allocator already handles the
cpusets update.

Finally __hugetlb_alloc_buddy_huge_page had a special case for node
specific allocations (when no policy is applied and there is a node
given).  This has relied on __GFP_THISNODE to not fallback to a different
node.  alloc_huge_page_node is the only caller which relies on this
behavior so move the __GFP_THISNODE there.

Not only does this remove quite some code it also should make those
layers easier to follow and clear wrt responsibilities.

Link: http://lkml.kernel.org/r/20170622193034.28972-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |   2 +-
 mm/hugetlb.c            | 133 +++++++++-------------------------------
 2 files changed, 30 insertions(+), 105 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8fd0725d3f304..66b621469f52f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -349,7 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask);
+struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9077865818121..fd6e0c50f949b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1521,82 +1521,19 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
 	return rc;
 }
 
-/*
- * There are 3 ways this can get called:
- * 1. With vma+addr: we use the VMA's memory policy
- * 2. With !vma, but nid=NUMA_NO_NODE:  We try to allocate a huge
- *    page from any node, and let the buddy allocator itself figure
- *    it out.
- * 3. With !vma, but nid!=NUMA_NO_NODE.  We allocate a huge page
- *    strictly from 'nid'
- */
 static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
-		struct vm_area_struct *vma, unsigned long addr, int nid)
+		gfp_t gfp_mask, int nid, nodemask_t *nmask)
 {
 	int order = huge_page_order(h);
-	gfp_t gfp = htlb_alloc_mask(h)|__GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
-	unsigned int cpuset_mems_cookie;
 
-	/*
-	 * We need a VMA to get a memory policy.  If we do not
-	 * have one, we use the 'nid' argument.
-	 *
-	 * The mempolicy stuff below has some non-inlined bits
-	 * and calls ->vm_ops.  That makes it hard to optimize at
-	 * compile-time, even when NUMA is off and it does
-	 * nothing.  This helps the compiler optimize it out.
-	 */
-	if (!IS_ENABLED(CONFIG_NUMA) || !vma) {
-		/*
-		 * If a specific node is requested, make sure to
-		 * get memory from there, but only when a node
-		 * is explicitly specified.
-		 */
-		if (nid != NUMA_NO_NODE)
-			gfp |= __GFP_THISNODE;
-		/*
-		 * Make sure to call something that can handle
-		 * nid=NUMA_NO_NODE
-		 */
-		return alloc_pages_node(nid, gfp, order);
-	}
-
-	/*
-	 * OK, so we have a VMA.  Fetch the mempolicy and try to
-	 * allocate a huge page with it.  We will only reach this
-	 * when CONFIG_NUMA=y.
-	 */
-	do {
-		struct page *page;
-		struct mempolicy *mpol;
-		int nid;
-		nodemask_t *nodemask;
-
-		cpuset_mems_cookie = read_mems_allowed_begin();
-		nid = huge_node(vma, addr, gfp, &mpol, &nodemask);
-		mpol_cond_put(mpol);
-		page = __alloc_pages_nodemask(gfp, order, nid, nodemask);
-		if (page)
-			return page;
-	} while (read_mems_allowed_retry(cpuset_mems_cookie));
-
-	return NULL;
+	gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
+	if (nid == NUMA_NO_NODE)
+		nid = numa_mem_id();
+	return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
 }
 
-/*
- * There are two ways to allocate a huge page:
- * 1. When you have a VMA and an address (like a fault)
- * 2. When you have no VMA (like when setting /proc/.../nr_hugepages)
- *
- * 'vma' and 'addr' are only for (1).  'nid' is always NUMA_NO_NODE in
- * this case which signifies that the allocation should be done with
- * respect for the VMA's memory policy.
- *
- * For (2), we ignore 'vma' and 'addr' and use 'nid' exclusively. This
- * implies that memory policies will not be taken in to account.
- */
-static struct page *__alloc_buddy_huge_page(struct hstate *h,
-		struct vm_area_struct *vma, unsigned long addr, int nid)
+static struct page *__alloc_buddy_huge_page(struct hstate *h, gfp_t gfp_mask,
+		int nid, nodemask_t *nmask)
 {
 	struct page *page;
 	unsigned int r_nid;
@@ -1604,15 +1541,6 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	if (hstate_is_gigantic(h))
 		return NULL;
 
-	/*
-	 * Make sure that anyone specifying 'nid' is not also specifying a VMA.
-	 * This makes sure the caller is picking _one_ of the modes with which
-	 * we can call this function, not both.
-	 */
-	if (vma || (addr != -1)) {
-		VM_WARN_ON_ONCE(addr == -1);
-		VM_WARN_ON_ONCE(nid != NUMA_NO_NODE);
-	}
 	/*
 	 * Assume we will successfully allocate the surplus page to
 	 * prevent racing processes from causing the surplus to exceed
@@ -1646,7 +1574,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	}
 	spin_unlock(&hugetlb_lock);
 
-	page = __hugetlb_alloc_buddy_huge_page(h, vma, addr, nid);
+	page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
 
 	spin_lock(&hugetlb_lock);
 	if (page) {
@@ -1670,19 +1598,6 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	return page;
 }
 
-/*
- * Allocate a huge page from 'nid'.  Note, 'nid' may be
- * NUMA_NO_NODE, which means that it may be allocated
- * anywhere.
- */
-static
-struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
-{
-	unsigned long addr = -1;
-
-	return __alloc_buddy_huge_page(h, NULL, addr, nid);
-}
-
 /*
  * Use the VMA's mpolicy to allocate a huge page from the buddy.
  */
@@ -1690,7 +1605,17 @@ static
 struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
 		struct vm_area_struct *vma, unsigned long addr)
 {
-	return __alloc_buddy_huge_page(h, vma, addr, NUMA_NO_NODE);
+	struct page *page;
+	struct mempolicy *mpol;
+	gfp_t gfp_mask = htlb_alloc_mask(h);
+	int nid;
+	nodemask_t *nodemask;
+
+	nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
+	page = __alloc_buddy_huge_page(h, gfp_mask, nid, nodemask);
+	mpol_cond_put(mpol);
+
+	return page;
 }
 
 /*
@@ -1700,21 +1625,26 @@ struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
  */
 struct page *alloc_huge_page_node(struct hstate *h, int nid)
 {
+	gfp_t gfp_mask = htlb_alloc_mask(h);
 	struct page *page = NULL;
 
+	if (nid != NUMA_NO_NODE)
+		gfp_mask |= __GFP_THISNODE;
+
 	spin_lock(&hugetlb_lock);
 	if (h->free_huge_pages - h->resv_huge_pages > 0)
 		page = dequeue_huge_page_node(h, nid);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page)
-		page = __alloc_buddy_huge_page_no_mpol(h, nid);
+		page = __alloc_buddy_huge_page(h, gfp_mask, nid, NULL);
 
 	return page;
 }
 
-struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
+struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask)
 {
+	gfp_t gfp_mask = htlb_alloc_mask(h);
 	struct page *page = NULL;
 	int node;
 
@@ -1731,13 +1661,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
 		return page;
 
 	/* No reservations, try to overcommit */
-	for_each_node_mask(node, *nmask) {
-		page = __alloc_buddy_huge_page_no_mpol(h, node);
-		if (page)
-			return page;
-	}
-
-	return NULL;
+	return __alloc_buddy_huge_page(h, gfp_mask, NUMA_NO_NODE, nmask);
 }
 
 /*
@@ -1765,7 +1689,8 @@ static int gather_surplus_pages(struct hstate *h, int delta)
 retry:
 	spin_unlock(&hugetlb_lock);
 	for (i = 0; i < needed; i++) {
-		page = __alloc_buddy_huge_page_no_mpol(h, NUMA_NO_NODE);
+		page = __alloc_buddy_huge_page(h, htlb_alloc_mask(h),
+				NUMA_NO_NODE, NULL);
 		if (!page) {
 			alloc_ok = false;
 			break;
-- 
GitLab


From 3e59fcb0e8c1c40aecb60fa4c2d1543d6a097184 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:49:11 -0700
Subject: [PATCH 1057/1958] hugetlb: add support for preferred node to
 alloc_huge_page_nodemask

alloc_huge_page_nodemask tries to allocate from any numa node in the
allowed node mask starting from lower numa nodes.  This might lead to
filling up those low NUMA nodes while others are not used.  We can
reduce this risk by introducing a concept of the preferred node similar
to what we have in the regular page allocator.  We will start allocating
from the preferred nid and then iterate over all allowed nodes in the
zonelist order until we try them all.

This is mimicing the page allocator logic except it operates on per-node
mempools.  dequeue_huge_page_vma already does this so distill the
zonelist logic into a more generic dequeue_huge_page_nodemask and use it
in alloc_huge_page_nodemask.

This will allow us to use proper per numa distance fallback also for
alloc_huge_page_node which can use alloc_huge_page_nodemask now and we
can get rid of alloc_huge_page_node helper which doesn't have any user
anymore.

Link: http://lkml.kernel.org/r/20170622193034.28972-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  5 ++-
 include/linux/migrate.h |  2 +-
 mm/hugetlb.c            | 88 ++++++++++++++++++++---------------------
 3 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 66b621469f52f..8d9fe131a2409 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -349,7 +349,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask);
+struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+				nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
 
@@ -525,7 +526,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
 #define alloc_huge_page_node(h, nid) NULL
-#define alloc_huge_page_nodemask(h, nmask) NULL
+#define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL
 #define alloc_huge_page_noerr(v, a, r) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index d9675b665cc4e..4634da521238f 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -38,7 +38,7 @@ static inline struct page *new_page_nodemask(struct page *page,
 
 	if (PageHuge(page))
 		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
-				nodemask);
+				preferred_nid, nodemask);
 
 	if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
 		gfp_mask |= __GFP_HIGHMEM;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fd6e0c50f949b..1e516520433dc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -887,19 +887,39 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 	return page;
 }
 
-static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
+static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
+		nodemask_t *nmask)
 {
-	struct page *page;
-	int node;
+	unsigned int cpuset_mems_cookie;
+	struct zonelist *zonelist;
+	struct zone *zone;
+	struct zoneref *z;
+	int node = -1;
 
-	if (nid != NUMA_NO_NODE)
-		return dequeue_huge_page_node_exact(h, nid);
+	zonelist = node_zonelist(nid, gfp_mask);
+
+retry_cpuset:
+	cpuset_mems_cookie = read_mems_allowed_begin();
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
+		struct page *page;
+
+		if (!cpuset_zone_allowed(zone, gfp_mask))
+			continue;
+		/*
+		 * no need to ask again on the same node. Pool is node rather than
+		 * zone aware
+		 */
+		if (zone_to_nid(zone) == node)
+			continue;
+		node = zone_to_nid(zone);
 
-	for_each_online_node(node) {
 		page = dequeue_huge_page_node_exact(h, node);
 		if (page)
 			return page;
 	}
+	if (unlikely(read_mems_allowed_retry(cpuset_mems_cookie)))
+		goto retry_cpuset;
+
 	return NULL;
 }
 
@@ -917,15 +937,11 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 				unsigned long address, int avoid_reserve,
 				long chg)
 {
-	struct page *page = NULL;
+	struct page *page;
 	struct mempolicy *mpol;
-	nodemask_t *nodemask;
 	gfp_t gfp_mask;
+	nodemask_t *nodemask;
 	int nid;
-	struct zonelist *zonelist;
-	struct zone *zone;
-	struct zoneref *z;
-	unsigned int cpuset_mems_cookie;
 
 	/*
 	 * A child process with MAP_PRIVATE mappings created by their parent
@@ -940,32 +956,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 	if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
 		goto err;
 
-retry_cpuset:
-	cpuset_mems_cookie = read_mems_allowed_begin();
 	gfp_mask = htlb_alloc_mask(h);
 	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-	zonelist = node_zonelist(nid, gfp_mask);
-
-	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-						MAX_NR_ZONES - 1, nodemask) {
-		if (cpuset_zone_allowed(zone, gfp_mask)) {
-			page = dequeue_huge_page_node(h, zone_to_nid(zone));
-			if (page) {
-				if (avoid_reserve)
-					break;
-				if (!vma_has_reserves(vma, chg))
-					break;
-
-				SetPagePrivate(page);
-				h->resv_huge_pages--;
-				break;
-			}
-		}
+	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+	if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
+		SetPagePrivate(page);
+		h->resv_huge_pages--;
 	}
 
 	mpol_cond_put(mpol);
-	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
-		goto retry_cpuset;
 	return page;
 
 err:
@@ -1633,7 +1632,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
 
 	spin_lock(&hugetlb_lock);
 	if (h->free_huge_pages - h->resv_huge_pages > 0)
-		page = dequeue_huge_page_node(h, nid);
+		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page)
@@ -1642,26 +1641,27 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
 	return page;
 }
 
-struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask)
+
+struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+		nodemask_t *nmask)
 {
 	gfp_t gfp_mask = htlb_alloc_mask(h);
-	struct page *page = NULL;
-	int node;
 
 	spin_lock(&hugetlb_lock);
 	if (h->free_huge_pages - h->resv_huge_pages > 0) {
-		for_each_node_mask(node, *nmask) {
-			page = dequeue_huge_page_node_exact(h, node);
-			if (page)
-				break;
+		struct page *page;
+
+		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
+		if (page) {
+			spin_unlock(&hugetlb_lock);
+			return page;
 		}
 	}
 	spin_unlock(&hugetlb_lock);
-	if (page)
-		return page;
 
 	/* No reservations, try to overcommit */
-	return __alloc_buddy_huge_page(h, gfp_mask, NUMA_NO_NODE, nmask);
+
+	return __alloc_buddy_huge_page(h, gfp_mask, preferred_nid, nmask);
 }
 
 /*
-- 
GitLab


From ef77ba5ce6b0e6b657036ee9fc455fc164b215f8 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:49:14 -0700
Subject: [PATCH 1058/1958] mm, hugetlb, soft_offline: use new_page_nodemask
 for soft offline migration

new_page is yet another duplication of the migration callback which has
to handle hugetlb migration specially.  We can safely use the generic
new_page_nodemask for the same purpose.

Please note that gigantic hugetlb pages do not need any special handling
because alloc_huge_page_nodemask will make sure to check pages in all
per node pools.  The reason this was done previously was that
alloc_huge_page_node treated NO_NUMA_NODE and a specific node
differently and so alloc_huge_page_node(nid) would check on this
specific node.

Link: http://lkml.kernel.org/r/20170622193034.28972-4-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e2e0cb0e1d0f0..1cd3b3569af8a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1484,16 +1484,8 @@ EXPORT_SYMBOL(unpoison_memory);
 static struct page *new_page(struct page *p, unsigned long private, int **x)
 {
 	int nid = page_to_nid(p);
-	if (PageHuge(p)) {
-		struct hstate *hstate = page_hstate(compound_head(p));
 
-		if (hstate_is_gigantic(hstate))
-			return alloc_huge_page_node(hstate, NUMA_NO_NODE);
-
-		return alloc_huge_page_node(hstate, nid);
-	} else {
-		return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0);
-	}
+	return new_page_nodemask(p, nid, &node_states[N_MEMORY]);
 }
 
 /*
-- 
GitLab


From 727c080f03e7e2e20e868efd461d4f1022b61d9b Mon Sep 17 00:00:00 2001
From: Vinayak Menon <vinmenon@codeaurora.org>
Date: Mon, 10 Jul 2017 15:49:17 -0700
Subject: [PATCH 1059/1958] mm: avoid taking zone lock in
 pagetypeinfo_showmixed()

pagetypeinfo_showmixedcount_print is found to take a lot of time to
complete and it does this holding the zone lock and disabling
interrupts.  In some cases it is found to take more than a second (On a
2.4GHz,8Gb RAM,arm64 cpu).

Avoid taking the zone lock similar to what is done by read_page_owner,
which means possibility of inaccurate results.

Link: http://lkml.kernel.org/r/1498045643-12257-1-git-send-email-vinmenon@codeaurora.org
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: zhongjiang <zhongjiang@huawei.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_owner.c |  6 +++++-
 mm/vmstat.c     | 24 ++++++++++++++----------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/mm/page_owner.c b/mm/page_owner.c
index 60634dc53a885..0fd9dcf2c5dc1 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -281,7 +281,11 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 				continue;
 
 			if (PageBuddy(page)) {
-				pfn += (1UL << page_order(page)) - 1;
+				unsigned long freepage_order;
+
+				freepage_order = page_order_unsafe(page);
+				if (freepage_order < MAX_ORDER)
+					pfn += (1UL << freepage_order) - 1;
 				continue;
 			}
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 744ceaeb42a06..9a4441bbeef26 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1130,7 +1130,7 @@ static void frag_stop(struct seq_file *m, void *arg)
  * If @assert_populated is true, only use callback for zones that are populated.
  */
 static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
-		bool assert_populated,
+		bool assert_populated, bool nolock,
 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
 {
 	struct zone *zone;
@@ -1141,9 +1141,11 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 		if (assert_populated && !populated_zone(zone))
 			continue;
 
-		spin_lock_irqsave(&zone->lock, flags);
+		if (!nolock)
+			spin_lock_irqsave(&zone->lock, flags);
 		print(m, pgdat, zone);
-		spin_unlock_irqrestore(&zone->lock, flags);
+		if (!nolock)
+			spin_unlock_irqrestore(&zone->lock, flags);
 	}
 }
 #endif
@@ -1166,7 +1168,7 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
 static int frag_show(struct seq_file *m, void *arg)
 {
 	pg_data_t *pgdat = (pg_data_t *)arg;
-	walk_zones_in_node(m, pgdat, true, frag_show_print);
+	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
 	return 0;
 }
 
@@ -1207,7 +1209,7 @@ static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
 		seq_printf(m, "%6d ", order);
 	seq_putc(m, '\n');
 
-	walk_zones_in_node(m, pgdat, true, pagetypeinfo_showfree_print);
+	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
 
 	return 0;
 }
@@ -1258,7 +1260,8 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
 		seq_printf(m, "%12s ", migratetype_names[mtype]);
 	seq_putc(m, '\n');
-	walk_zones_in_node(m, pgdat, true, pagetypeinfo_showblockcount_print);
+	walk_zones_in_node(m, pgdat, true, false,
+		pagetypeinfo_showblockcount_print);
 
 	return 0;
 }
@@ -1284,7 +1287,8 @@ static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
 		seq_printf(m, "%12s ", migratetype_names[mtype]);
 	seq_putc(m, '\n');
 
-	walk_zones_in_node(m, pgdat, true, pagetypeinfo_showmixedcount_print);
+	walk_zones_in_node(m, pgdat, true, true,
+		pagetypeinfo_showmixedcount_print);
 #endif /* CONFIG_PAGE_OWNER */
 }
 
@@ -1446,7 +1450,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 static int zoneinfo_show(struct seq_file *m, void *arg)
 {
 	pg_data_t *pgdat = (pg_data_t *)arg;
-	walk_zones_in_node(m, pgdat, false, zoneinfo_show_print);
+	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
 	return 0;
 }
 
@@ -1852,7 +1856,7 @@ static int unusable_show(struct seq_file *m, void *arg)
 	if (!node_state(pgdat->node_id, N_MEMORY))
 		return 0;
 
-	walk_zones_in_node(m, pgdat, true, unusable_show_print);
+	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
 
 	return 0;
 }
@@ -1904,7 +1908,7 @@ static int extfrag_show(struct seq_file *m, void *arg)
 {
 	pg_data_t *pgdat = (pg_data_t *)arg;
 
-	walk_zones_in_node(m, pgdat, true, extfrag_show_print);
+	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
 
 	return 0;
 }
-- 
GitLab


From a7be6e5a7f8da433065b23f98ff68f445113080a Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Mon, 10 Jul 2017 15:49:20 -0700
Subject: [PATCH 1060/1958] mm: drop useless local parameters of
 __register_one_node()

__register_one_node() initializes local parameters "p_node" & "parent"
for register_node().

But, register_node() does not use them.

Remove the related code of "parent" node, cleanup __register_one_node()
and register_node().

Link: http://lkml.kernel.org/r/1498013846-20149-1-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 73d39bc58c42d..d8dc83017d8dc 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -288,7 +288,7 @@ static void node_device_release(struct device *dev)
  *
  * Initialize and register the node device.
  */
-static int register_node(struct node *node, int num, struct node *parent)
+static int register_node(struct node *node, int num)
 {
 	int error;
 
@@ -567,19 +567,14 @@ static void init_node_hugetlb_work(int nid) { }
 
 int __register_one_node(int nid)
 {
-	int p_node = parent_node(nid);
-	struct node *parent = NULL;
 	int error;
 	int cpu;
 
-	if (p_node != nid)
-		parent = node_devices[p_node];
-
 	node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
 	if (!node_devices[nid])
 		return -ENOMEM;
 
-	error = register_node(node_devices[nid], nid, parent);
+	error = register_node(node_devices[nid], nid);
 
 	/* link cpu under this node */
 	for_each_present_cpu(cpu) {
-- 
GitLab


From 8c03cc85a035ae7a208c28c4382ecfeb6adf79a6 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Mon, 10 Jul 2017 15:49:23 -0700
Subject: [PATCH 1061/1958] fs/proc/task_mmu.c: remove obsolete comment in
 show_map_vma()

After commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas")
we do not hide stack guard page in /proc/<pid>/maps

Link: http://lkml.kernel.org/r/211f3c2a-f7ef-7c13-82bf-46fd426f6e1b@virtuozzo.com
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 520802da059c2..b836fd61ed878 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -298,7 +298,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
 	}
 
-	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
 	end = vma->vm_end;
 
-- 
GitLab


From b002529d256307602c669d1886c0b953b52b8700 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 10 Jul 2017 15:49:26 -0700
Subject: [PATCH 1062/1958] mm/page_alloc.c: eliminate unsigned confusion in
 __rmqueue_fallback

Since current_order starts as MAX_ORDER-1 and is then only decremented,
the second half of the loop condition seems superfluous.  However, if
order is 0, we may decrement current_order past 0, making it UINT_MAX.
This is obviously too subtle ([1], [2]).

Since we need to add some comment anyway, change the two variables to
signed, making the counting-down for loop look more familiar, and
apparently also making gcc generate slightly smaller code.

[1] https://lkml.org/lkml/2016/6/20/493
[2] https://lkml.org/lkml/2017/6/19/345

[akpm@linux-foundation.org: fix up reject fixupping]
Link: http://lkml.kernel.org/r/20170621185529.2265-1-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reported-by: Hao Lee <haolee.swjtu@gmail.com>
Acked-by: Wei Yang <weiyang@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8690357170480..d90c31951b901 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2206,12 +2206,16 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
  * list of requested migratetype, possibly along with other pages from the same
  * block, depending on fragmentation avoidance heuristics. Returns true if
  * fallback was found so that __rmqueue_smallest() can grab it.
+ *
+ * The use of signed ints for order and current_order is a deliberate
+ * deviation from the rest of this file, to make the for loop
+ * condition simpler.
  */
 static inline bool
-__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
+__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 {
 	struct free_area *area;
-	unsigned int current_order;
+	int current_order;
 	struct page *page;
 	int fallback_mt;
 	bool can_steal;
@@ -2221,8 +2225,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
 	 * approximates finding the pageblock with the most free pages, which
 	 * would be too costly to do exactly.
 	 */
-	for (current_order = MAX_ORDER-1;
-				current_order >= order && current_order <= MAX_ORDER-1;
+	for (current_order = MAX_ORDER - 1; current_order >= order;
 				--current_order) {
 		area = &(zone->free_area[current_order]);
 		fallback_mt = find_suitable_fallback(area, current_order,
-- 
GitLab


From f07e0f849a921ec8c0b509c8b8030455a2b21f7f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 10 Jul 2017 15:49:29 -0700
Subject: [PATCH 1063/1958] mm/swap_slots.c: don't disable preemption while
 taking the per-CPU cache

get_cpu_var() disables preemption and returns the per-CPU version of the
variable.  Disabling preemption is useful to ensure atomic access to the
variable within the critical section.

In this case however, after the per-CPU version of the variable is
obtained the ->free_lock is acquired.  For that reason it seems the raw
accessor could be used.  It only seems that ->slots_ret should be
retested (because with disabled preemption this variable can not be set
to NULL otherwise).

This popped up during PREEMPT-RT testing because it tries to take
spinlocks in a preempt disabled section.  In RT, spinlocks can sleep.

Link: http://lkml.kernel.org/r/20170623114755.2ebxdysacvgxzott@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ying Huang <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_slots.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 90c1032a8ac30..13a174006b912 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -273,11 +273,11 @@ int free_swap_slot(swp_entry_t entry)
 {
 	struct swap_slots_cache *cache;
 
-	cache = &get_cpu_var(swp_slots);
+	cache = raw_cpu_ptr(&swp_slots);
 	if (use_swap_slot_cache && cache->slots_ret) {
 		spin_lock_irq(&cache->free_lock);
 		/* Swap slots cache may be deactivated before acquiring lock */
-		if (!use_swap_slot_cache) {
+		if (!use_swap_slot_cache || !cache->slots_ret) {
 			spin_unlock_irq(&cache->free_lock);
 			goto direct_free;
 		}
@@ -297,7 +297,6 @@ int free_swap_slot(swp_entry_t entry)
 direct_free:
 		swapcache_free_entries(&entry, 1);
 	}
-	put_cpu_var(swp_slots);
 
 	return 0;
 }
-- 
GitLab


From 618b8c20d03c9ea06711bd36d906322ba35c0add Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 10 Jul 2017 15:49:32 -0700
Subject: [PATCH 1064/1958] include/linux/mmzone.h: remove ancient/ambiguous
 comment

Currently pg_data_t is just a struct which describes a NUMA node memory
layout.  Let's keep the comment simple and remove ambiguity.

Link: http://lkml.kernel.org/r/1498220534-22717-1-git-send-email-nborisov@suse.com
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7e8f100cb56d3..16532fa0bb64c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -603,12 +603,9 @@ extern struct page *mem_map;
 #endif
 
 /*
- * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
- * (mostly NUMA machines?) to denote a higher-level memory zone than the
- * zone denotes.
- *
  * On NUMA machines, each NUMA node would have a pg_data_t to describe
- * it's memory layout.
+ * it's memory layout. On UMA machines there is a single pglist_data which
+ * describes the whole memory.
  *
  * Memory statistics and page replacement data structures are maintained on a
  * per-zone basis.
-- 
GitLab


From e3d3910a57ab9c70cddb2522ae711ff9bff89e7c Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 10 Jul 2017 15:49:35 -0700
Subject: [PATCH 1065/1958] include/linux/backing-dev.h: simplify wb_stat_sum

wb_stat_sum() disables interrupts and calls __wb_stat_sum() which
eventually calls __percpu_counter_sum().  However, the percpu routine is
already irq-safe.  Simplify the code a bit by making wb_stat_sum()
directly call percpu_counter_sum_positive() and not disable interrupts.

Also remove the now-uneeded __wb_stat_sum() which was just a wrapper
over percpu_counter_sum_positive().

Link: http://lkml.kernel.org/r/1498230681-29103-1-git-send-email-nborisov@suse.com
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ace73f96eb1ee..334165c911f0a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -104,22 +104,9 @@ static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 	return percpu_counter_read_positive(&wb->stat[item]);
 }
 
-static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
-				enum wb_stat_item item)
-{
-	return percpu_counter_sum_positive(&wb->stat[item]);
-}
-
 static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	s64 sum;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	sum = __wb_stat_sum(wb, item);
-	local_irq_restore(flags);
-
-	return sum;
+	return percpu_counter_sum_positive(&wb->stat[item]);
 }
 
 extern void wb_writeout_inc(struct bdi_writeback *wb);
-- 
GitLab


From d09b6468824d9e41dc30708475f954e455cf7146 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:49:38 -0700
Subject: [PATCH 1066/1958] mm: document highmem_is_dirtyable sysctl

It seems that there are still people using 32b kernels which a lot of
memory and the IO tend to suck a lot for them by default.  Mostly
because writers are throttled too when the lowmem is used.  We have
highmem_is_dirtyable to work around that issue but it seems we never
bothered to document it.  Let's do it now, finally.

Link: http://lkml.kernel.org/r/20170626093200.18958-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Alkis Georgopoulos <alkisg@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/vm.txt | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index b4ad97f10b8e6..48244c42ff521 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -240,6 +240,26 @@ fragmentation index is <= extfrag_threshold. The default value is 500.
 
 ==============================================================
 
+highmem_is_dirtyable
+
+Available only for systems with CONFIG_HIGHMEM enabled (32b systems).
+
+This parameter controls whether the high memory is considered for dirty
+writers throttling.  This is not the case by default which means that
+only the amount of memory directly visible/usable by the kernel can
+be dirtied. As a result, on systems with a large amount of memory and
+lowmem basically depleted writers might be throttled too early and
+streaming writes can get very slow.
+
+Changing the value to non zero would allow more memory to be dirtied
+and thus allow writers to write more data which can be flushed to the
+storage more effectively. Note this also comes with a risk of pre-mature
+OOM killer because some writers (e.g. direct block device writes) can
+only use the low memory and they can fill it up with dirty data without
+any throttling.
+
+==============================================================
+
 hugepages_treat_as_movable
 
 This parameter controls whether we can allocate hugepages from ZONE_MOVABLE
-- 
GitLab


From a52149f129bd161818fd7a0b6450aaa30a2cbd77 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 10 Jul 2017 15:49:41 -0700
Subject: [PATCH 1067/1958] mm/memory_hotplug.c: remove unused local zone_type
 from __remove_zone()

__remove_zone() sets up up zone_type, but never uses it for anything.
This does not cause a warning, due to the (necessary) use of
-Wno-unused-but-set-variable.  However, it's noise, so just delete it.

Link: http://lkml.kernel.org/r/20170624043421.24465-2-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 203c46306a746..7cd4377ac83ee 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -580,11 +580,8 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
 {
 	struct pglist_data *pgdat = zone->zone_pgdat;
 	int nr_pages = PAGES_PER_SECTION;
-	int zone_type;
 	unsigned long flags;
 
-	zone_type = zone - pgdat->node_zones;
-
 	pgdat_resize_lock(zone->zone_pgdat, &flags);
 	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
 	shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
-- 
GitLab


From e048cb32f69038aa1c8f11e5c1b331be4181659d Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Mon, 10 Jul 2017 15:49:44 -0700
Subject: [PATCH 1068/1958] cma: fix calculation of aligned offset

The align_offset parameter is used by bitmap_find_next_zero_area_off()
to represent the offset of map's base from the previous alignment
boundary; the function ensures that the returned index, plus the
align_offset, honors the specified align_mask.

The logic introduced by commit b5be83e308f7 ("mm: cma: align to physical
address, not CMA region position") has the cma driver calculate the
offset to the *next* alignment boundary.  In most cases, the base
alignment is greater than that specified when making allocations,
resulting in a zero offset whether we align up or down.  In the example
given with the commit, the base alignment (8MB) was half the requested
alignment (16MB) so the math also happened to work since the offset is
8MB in both directions.  However, when requesting allocations with an
alignment greater than twice that of the base, the returned index would
not be correctly aligned.

Also, the align_order arguments of cma_bitmap_aligned_mask() and
cma_bitmap_aligned_offset() should not be negative so the argument type
was made unsigned.

Fixes: b5be83e308f7 ("mm: cma: align to physical address, not CMA region position")
Link: http://lkml.kernel.org/r/20170628170742.2895-1-opendmb@gmail.com
Signed-off-by: Angus Clark <angus@angusclark.org>
Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Gregory Fong <gregory.0xf0@gmail.com>
Cc: Doug Berger <opendmb@gmail.com>
Cc: Angus Clark <angus@angusclark.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Shiraz Hashim <shashim@codeaurora.org>
Cc: Jaewon Kim <jaewon31.kim@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/cma.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/mm/cma.c b/mm/cma.c
index 9e45491917895..c0da318c020e6 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -59,7 +59,7 @@ const char *cma_get_name(const struct cma *cma)
 }
 
 static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
-					     int align_order)
+					     unsigned int align_order)
 {
 	if (align_order <= cma->order_per_bit)
 		return 0;
@@ -67,17 +67,14 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
 }
 
 /*
- * Find a PFN aligned to the specified order and return an offset represented in
- * order_per_bits.
+ * Find the offset of the base PFN from the specified align_order.
+ * The value returned is represented in order_per_bits.
  */
 static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
-					       int align_order)
+					       unsigned int align_order)
 {
-	if (align_order <= cma->order_per_bit)
-		return 0;
-
-	return (ALIGN(cma->base_pfn, (1UL << align_order))
-		- cma->base_pfn) >> cma->order_per_bit;
+	return (cma->base_pfn & ((1UL << align_order) - 1))
+		>> cma->order_per_bit;
 }
 
 static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
-- 
GitLab


From bb01b64cfab7c22f3848cb73dc0c2b46b8d38499 Mon Sep 17 00:00:00 2001
From: "zhenwei.pi" <zhenwei.pi@youruncloud.com>
Date: Mon, 10 Jul 2017 15:49:47 -0700
Subject: [PATCH 1069/1958] mm/balloon_compaction.c: enqueue zero page to
 balloon device

presently pages in the balloon device have random value, and these pages
will be scanned by ksmd on the host.  They usually cannot be merged.
Enqueue zero pages will resolve this problem.

Link: http://lkml.kernel.org/r/1498698637-26389-1-git-send-email-zhenwei.pi@youruncloud.com
Signed-off-by: zhenwei.pi <zhenwei.pi@youruncloud.com>
Cc: Gioh Kim <gi-oh.kim@profitbricks.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Rafael Aquini <aquini@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/balloon_compaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index da91df50ba31a..9075aa54e9551 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -24,7 +24,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 {
 	unsigned long flags;
 	struct page *page = alloc_page(balloon_mapping_gfp_mask() |
-					__GFP_NOMEMALLOC | __GFP_NORETRY);
+				__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_ZERO);
 	if (!page)
 		return NULL;
 
-- 
GitLab


From 561b5e0709e4a248c67d024d4d94b6e31e3edf2f Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:49:51 -0700
Subject: [PATCH 1070/1958] mm/mmap.c: do not blow on PROT_NONE MAP_FIXED holes
 in the stack

Commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas") has
introduced a regression in some rust and Java environments which are
trying to implement their own stack guard page.  They are punching a new
MAP_FIXED mapping inside the existing stack Vma.

This will confuse expand_{downwards,upwards} into thinking that the
stack expansion would in fact get us too close to an existing non-stack
vma which is a correct behavior wrt safety.  It is a real regression on
the other hand.

Let's work around the problem by considering PROT_NONE mapping as a part
of the stack.  This is a gros hack but overflowing to such a mapping
would trap anyway an we only can hope that usespace knows what it is
doing and handle it propely.

Fixes: 1be7107fbe18 ("mm: larger stack guard gap, between vmas")
Link: http://lkml.kernel.org/r/20170705182849.GA18027@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Debugged-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 7f8cfe9d9b4d0..d1902872414f8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2244,7 +2244,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 		gap_addr = TASK_SIZE;
 
 	next = vma->vm_next;
-	if (next && next->vm_start < gap_addr) {
+	if (next && next->vm_start < gap_addr &&
+			(next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
 		if (!(next->vm_flags & VM_GROWSUP))
 			return -ENOMEM;
 		/* Check that both stack segments have the same anon_vma? */
@@ -2328,7 +2329,8 @@ int expand_downwards(struct vm_area_struct *vma,
 	if (gap_addr > address)
 		return -ENOMEM;
 	prev = vma->vm_prev;
-	if (prev && prev->vm_end > gap_addr) {
+	if (prev && prev->vm_end > gap_addr &&
+			(prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
 		if (!(prev->vm_flags & VM_GROWSDOWN))
 			return -ENOMEM;
 		/* Check that both stack segments have the same anon_vma? */
-- 
GitLab


From 32e4e6d5cbb0c0e427391635991fe65e17797af8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 10 Jul 2017 15:49:54 -0700
Subject: [PATCH 1071/1958] mm/mmap.c: expand_downwards: don't require the gap
 if !vm_prev

expand_stack(vma) fails if address < stack_guard_gap even if there is no
vma->vm_prev.  I don't think this makes sense, and we didn't do this
before the recent commit 1be7107fbe18 ("mm: larger stack guard gap,
between vmas").

We do not need a gap in this case, any address is fine as long as
security_mmap_addr() doesn't object.

This also simplifies the code, we know that address >= prev->vm_end and
thus underflow is not possible.

Link: http://lkml.kernel.org/r/20170628175258.GA24881@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index d1902872414f8..49c56b811b03f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2316,7 +2316,6 @@ int expand_downwards(struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *prev;
-	unsigned long gap_addr;
 	int error;
 
 	address &= PAGE_MASK;
@@ -2325,15 +2324,12 @@ int expand_downwards(struct vm_area_struct *vma,
 		return error;
 
 	/* Enforce stack_guard_gap */
-	gap_addr = address - stack_guard_gap;
-	if (gap_addr > address)
-		return -ENOMEM;
 	prev = vma->vm_prev;
-	if (prev && prev->vm_end > gap_addr &&
+	/* Check that both stack segments have the same anon_vma? */
+	if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
 			(prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
-		if (!(prev->vm_flags & VM_GROWSDOWN))
+		if (address - prev->vm_end < stack_guard_gap)
 			return -ENOMEM;
-		/* Check that both stack segments have the same anon_vma? */
 	}
 
 	/* We must make sure the anon_vma is allocated. */
-- 
GitLab


From 2c80cd57c74339889a8752b20862a16c28929c3a Mon Sep 17 00:00:00 2001
From: Sahitya Tummala <stummala@codeaurora.org>
Date: Mon, 10 Jul 2017 15:49:57 -0700
Subject: [PATCH 1072/1958] mm/list_lru.c: fix list_lru_count_node() to be race
 free

list_lru_count_node() iterates over all memcgs to get the total number of
entries on the node but it can race with memcg_drain_all_list_lrus(),
which migrates the entries from a dead cgroup to another.  This can return
incorrect number of entries from list_lru_count_node().

Fix this by keeping track of entries per node and simply return it in
list_lru_count_node().

Link: http://lkml.kernel.org/r/1498707555-30525-1-git-send-email-stummala@codeaurora.org
Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Alexander Polakov <apolyakov@beget.ru>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_lru.h |  1 +
 mm/list_lru.c            | 14 ++++++--------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index cb0ba9f2a9a29..fa7fd03cb5f96 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -44,6 +44,7 @@ struct list_lru_node {
 	/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
 	struct list_lru_memcg	*memcg_lrus;
 #endif
+	long nr_items;
 } ____cacheline_aligned_in_smp;
 
 struct list_lru {
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 234676e31edd3..7a40fa2be858a 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
 		l = list_lru_from_kmem(nlru, item);
 		list_add_tail(item, &l->list);
 		l->nr_items++;
+		nlru->nr_items++;
 		spin_unlock(&nlru->lock);
 		return true;
 	}
@@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
 		l = list_lru_from_kmem(nlru, item);
 		list_del_init(item);
 		l->nr_items--;
+		nlru->nr_items--;
 		spin_unlock(&nlru->lock);
 		return true;
 	}
@@ -183,15 +185,10 @@ EXPORT_SYMBOL_GPL(list_lru_count_one);
 
 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
 {
-	long count = 0;
-	int memcg_idx;
+	struct list_lru_node *nlru;
 
-	count += __list_lru_count_one(lru, nid, -1);
-	if (list_lru_memcg_aware(lru)) {
-		for_each_memcg_cache_index(memcg_idx)
-			count += __list_lru_count_one(lru, nid, memcg_idx);
-	}
-	return count;
+	nlru = &lru->node[nid];
+	return nlru->nr_items;
 }
 EXPORT_SYMBOL_GPL(list_lru_count_node);
 
@@ -226,6 +223,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
 			assert_spin_locked(&nlru->lock);
 		case LRU_REMOVED:
 			isolated++;
+			nlru->nr_items--;
 			/*
 			 * If the lru lock has been dropped, our list
 			 * traversal is now invalid and so we have to
-- 
GitLab


From b17c070fb624cf10162cf92ea5e1ec25cd8ac176 Mon Sep 17 00:00:00 2001
From: Sahitya Tummala <stummala@codeaurora.org>
Date: Mon, 10 Jul 2017 15:50:00 -0700
Subject: [PATCH 1073/1958] fs/dcache.c: fix spin lockup issue on nlru->lock

__list_lru_walk_one() acquires nlru spin lock (nlru->lock) for longer
duration if there are more number of items in the lru list.  As per the
current code, it can hold the spin lock for upto maximum UINT_MAX
entries at a time.  So if there are more number of items in the lru
list, then "BUG: spinlock lockup suspected" is observed in the below
path:

  spin_bug+0x90
  do_raw_spin_lock+0xfc
  _raw_spin_lock+0x28
  list_lru_add+0x28
  dput+0x1c8
  path_put+0x20
  terminate_walk+0x3c
  path_lookupat+0x100
  filename_lookup+0x6c
  user_path_at_empty+0x54
  SyS_faccessat+0xd0
  el0_svc_naked+0x24

This nlru->lock is acquired by another CPU in this path -

  d_lru_shrink_move+0x34
  dentry_lru_isolate_shrink+0x48
  __list_lru_walk_one.isra.10+0x94
  list_lru_walk_node+0x40
  shrink_dcache_sb+0x60
  do_remount_sb+0xbc
  do_emergency_remount+0xb0
  process_one_work+0x228
  worker_thread+0x2e0
  kthread+0xf4
  ret_from_fork+0x10

Fix this lockup by reducing the number of entries to be shrinked from
the lru list to 1024 at once.  Also, add cond_resched() before
processing the lru list again.

Link: http://marc.info/?t=149722864900001&r=1&w=2
Link: http://lkml.kernel.org/r/1498707575-2472-1-git-send-email-stummala@codeaurora.org
Signed-off-by: Sahitya Tummala <stummala@codeaurora.org>
Suggested-by: Jan Kara <jack@suse.cz>
Suggested-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Alexander Polakov <apolyakov@beget.ru>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 7ece68d0d4dbd..6c30be668487c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1160,11 +1160,12 @@ void shrink_dcache_sb(struct super_block *sb)
 		LIST_HEAD(dispose);
 
 		freed = list_lru_walk(&sb->s_dentry_lru,
-			dentry_lru_isolate_shrink, &dispose, UINT_MAX);
+			dentry_lru_isolate_shrink, &dispose, 1024);
 
 		this_cpu_sub(nr_dentry_unused, freed);
 		shrink_dentry_list(&dispose);
-	} while (freed > 0);
+		cond_resched();
+	} while (list_lru_count(&sb->s_dentry_lru) > 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
-- 
GitLab


From 24c79d8e0a46bbd010ca9e0dc988a23981bcd423 Mon Sep 17 00:00:00 2001
From: Krzysztof Opasiak <k.opasiak@samsung.com>
Date: Mon, 10 Jul 2017 15:50:03 -0700
Subject: [PATCH 1074/1958] mm: use dedicated helper to access rlimit value

Use rlimit() helper instead of manually writing whole chain from current
task to rlim_cur.

Link: http://lkml.kernel.org/r/20170705172811.8027-1-k.opasiak@samsung.com
Signed-off-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 49c56b811b03f..7fa6759322d1f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2177,7 +2177,6 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 			     unsigned long size, unsigned long grow)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	struct rlimit *rlim = current->signal->rlim;
 	unsigned long new_start;
 
 	/* address space limit tests */
@@ -2185,7 +2184,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 		return -ENOMEM;
 
 	/* Stack limit test */
-	if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+	if (size > rlimit(RLIMIT_STACK))
 		return -ENOMEM;
 
 	/* mlock limit tests */
@@ -2193,7 +2192,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 		unsigned long locked;
 		unsigned long limit;
 		locked = mm->locked_vm + grow;
-		limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
+		limit = rlimit(RLIMIT_MEMLOCK);
 		limit >>= PAGE_SHIFT;
 		if (locked > limit && !capable(CAP_IPC_LOCK))
 			return -ENOMEM;
-- 
GitLab


From a47fed5b5b014f5a13878b90ef2c3a7dc294189f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 10 Jul 2017 15:50:06 -0700
Subject: [PATCH 1075/1958] mm: swap: provide lru_add_drain_all_cpuslocked()

The rework of the cpu hotplug locking unearthed potential deadlocks with
the memory hotplug locking code.

The solution for these is to rework the memory hotplug locking code as
well and take the cpu hotplug lock before the memory hotplug lock in
mem_hotplug_begin(), but this will cause a recursive locking of the cpu
hotplug lock when the memory hotplug code calls lru_add_drain_all().

Split out the inner workings of lru_add_drain_all() into
lru_add_drain_all_cpuslocked() so this function can be invoked from the
memory hotplug code with the cpu hotplug lock held.

Link: http://lkml.kernel.org/r/20170704093421.419329357@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  1 +
 mm/swap.c            | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 61e7180cee21a..d83d28e53e623 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -277,6 +277,7 @@ extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
+extern void lru_add_drain_all_cpuslocked(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
diff --git a/mm/swap.c b/mm/swap.c
index 4f44dbd7f780b..60b1d2a758522 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -688,7 +688,7 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
 
 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 
-void lru_add_drain_all(void)
+void lru_add_drain_all_cpuslocked(void)
 {
 	static DEFINE_MUTEX(lock);
 	static struct cpumask has_work;
@@ -702,7 +702,6 @@ void lru_add_drain_all(void)
 		return;
 
 	mutex_lock(&lock);
-	get_online_cpus();
 	cpumask_clear(&has_work);
 
 	for_each_online_cpu(cpu) {
@@ -722,10 +721,16 @@ void lru_add_drain_all(void)
 	for_each_cpu(cpu, &has_work)
 		flush_work(&per_cpu(lru_add_drain_work, cpu));
 
-	put_online_cpus();
 	mutex_unlock(&lock);
 }
 
+void lru_add_drain_all(void)
+{
+	get_online_cpus();
+	lru_add_drain_all_cpuslocked();
+	put_online_cpus();
+}
+
 /**
  * release_pages - batched put_page()
  * @pages: array of pages to release
-- 
GitLab


From 3f906ba23689a3f824424c50f3ae937c2c70f676 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 10 Jul 2017 15:50:09 -0700
Subject: [PATCH 1076/1958] mm/memory-hotplug: switch locking to a percpu rwsem

Andrey reported a potential deadlock with the memory hotplug lock and
the cpu hotplug lock.

The reason is that memory hotplug takes the memory hotplug lock and then
calls stop_machine() which calls get_online_cpus().  That's the reverse
lock order to get_online_cpus(); get_online_mems(); in mm/slub_common.c

The problem has been there forever.  The reason why this was never
reported is that the cpu hotplug locking had this homebrewn recursive
reader writer semaphore construct which due to the recursion evaded the
full lock dep coverage.  The memory hotplug code copied that construct
verbatim and therefor has similar issues.

Three steps to fix this:

1) Convert the memory hotplug locking to a per cpu rwsem so the
   potential issues get reported proper by lockdep.

2) Lock the online cpus in mem_hotplug_begin() before taking the memory
   hotplug rwsem and use stop_machine_cpuslocked() in the page_alloc
   code to avoid recursive locking.

3) The cpu hotpluck locking in #2 causes a recursive locking of the cpu
   hotplug lock via __offline_pages() -> lru_add_drain_all(). Solve this
   by invoking lru_add_drain_all_cpuslocked() instead.

Link: http://lkml.kernel.org/r/20170704093421.506836322@linutronix.de
Reported-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 89 ++++++++-------------------------------------
 mm/page_alloc.c     |  2 +-
 2 files changed, 16 insertions(+), 75 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7cd4377ac83ee..8dccc317aac2a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -52,32 +52,17 @@ static void generic_online_page(struct page *page);
 static online_page_callback_t online_page_callback = generic_online_page;
 static DEFINE_MUTEX(online_page_callback_lock);
 
-/* The same as the cpu_hotplug lock, but for memory hotplug. */
-static struct {
-	struct task_struct *active_writer;
-	struct mutex lock; /* Synchronizes accesses to refcount, */
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing mem hotplug operation.
-	 */
-	int refcount;
+DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} mem_hotplug = {
-	.active_writer = NULL,
-	.lock = __MUTEX_INITIALIZER(mem_hotplug.lock),
-	.refcount = 0,
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	.dep_map = {.name = "mem_hotplug.lock" },
-#endif
-};
+void get_online_mems(void)
+{
+	percpu_down_read(&mem_hotplug_lock);
+}
 
-/* Lockdep annotations for get/put_online_mems() and mem_hotplug_begin/end() */
-#define memhp_lock_acquire_read() lock_map_acquire_read(&mem_hotplug.dep_map)
-#define memhp_lock_acquire()      lock_map_acquire(&mem_hotplug.dep_map)
-#define memhp_lock_release()      lock_map_release(&mem_hotplug.dep_map)
+void put_online_mems(void)
+{
+	percpu_up_read(&mem_hotplug_lock);
+}
 
 bool movable_node_enabled = false;
 
@@ -99,60 +84,16 @@ static int __init setup_memhp_default_state(char *str)
 }
 __setup("memhp_default_state=", setup_memhp_default_state);
 
-void get_online_mems(void)
-{
-	might_sleep();
-	if (mem_hotplug.active_writer == current)
-		return;
-	memhp_lock_acquire_read();
-	mutex_lock(&mem_hotplug.lock);
-	mem_hotplug.refcount++;
-	mutex_unlock(&mem_hotplug.lock);
-
-}
-
-void put_online_mems(void)
-{
-	if (mem_hotplug.active_writer == current)
-		return;
-	mutex_lock(&mem_hotplug.lock);
-
-	if (WARN_ON(!mem_hotplug.refcount))
-		mem_hotplug.refcount++; /* try to fix things up */
-
-	if (!--mem_hotplug.refcount && unlikely(mem_hotplug.active_writer))
-		wake_up_process(mem_hotplug.active_writer);
-	mutex_unlock(&mem_hotplug.lock);
-	memhp_lock_release();
-
-}
-
-/* Serializes write accesses to mem_hotplug.active_writer. */
-static DEFINE_MUTEX(memory_add_remove_lock);
-
 void mem_hotplug_begin(void)
 {
-	mutex_lock(&memory_add_remove_lock);
-
-	mem_hotplug.active_writer = current;
-
-	memhp_lock_acquire();
-	for (;;) {
-		mutex_lock(&mem_hotplug.lock);
-		if (likely(!mem_hotplug.refcount))
-			break;
-		__set_current_state(TASK_UNINTERRUPTIBLE);
-		mutex_unlock(&mem_hotplug.lock);
-		schedule();
-	}
+	cpus_read_lock();
+	percpu_down_write(&mem_hotplug_lock);
 }
 
 void mem_hotplug_done(void)
 {
-	mem_hotplug.active_writer = NULL;
-	mutex_unlock(&mem_hotplug.lock);
-	memhp_lock_release();
-	mutex_unlock(&memory_add_remove_lock);
+	percpu_up_write(&mem_hotplug_lock);
+	cpus_read_unlock();
 }
 
 /* add this memory to iomem resource */
@@ -1725,7 +1666,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 		goto failed_removal;
 	ret = 0;
 	if (drain) {
-		lru_add_drain_all();
+		lru_add_drain_all_cpuslocked();
 		cond_resched();
 		drain_all_pages(zone);
 	}
@@ -1746,7 +1687,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 		}
 	}
 	/* drain all zone's lru pagevec, this is asynchronous... */
-	lru_add_drain_all();
+	lru_add_drain_all_cpuslocked();
 	yield();
 	/* drain pcp pages, this is synchronous. */
 	drain_all_pages(zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d90c31951b901..64b7d82a9b1ab 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5278,7 +5278,7 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
 #endif
 		/* we have to stop all cpus to guarantee there is no user
 		   of zonelist */
-		stop_machine(__build_all_zonelists, pgdat, NULL);
+		stop_machine_cpuslocked(__build_all_zonelists, pgdat, NULL);
 		/* cpuset refresh routine should be here */
 	}
 	vm_total_pages = nr_free_pagecache_pages();
-- 
GitLab


From 9d1f4b3f5b29bea431525e528a3ff2dc806ad904 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:50:12 -0700
Subject: [PATCH 1077/1958] mm: disallow early_pfn_to_nid on configurations
 which do not implement it

early_pfn_to_nid will return node 0 if both HAVE_ARCH_EARLY_PFN_TO_NID
and HAVE_MEMBLOCK_NODE_MAP are disabled.  It seems we are safe now
because all architectures which support NUMA define one of them (with an
exception of alpha which however has CONFIG_NUMA marked as broken) so
this works as expected.  It can get silently and subtly broken too
easily, though.  Make sure we fail the compilation if NUMA is enabled
and there is no proper implementation for this function.  If that ever
happens we know that either the specific configuration is invalid and
the fix should either disable NUMA or enable one of the above configs.

Link: http://lkml.kernel.org/r/20170704075803.15979-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 16532fa0bb64c..fc14b8b3f6ce8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1055,6 +1055,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 	!defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
 static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 {
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
 	return 0;
 }
 #endif
-- 
GitLab


From bc1bb362334ebc4c65dd4301f10fb70902b3db7d Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 10 Jul 2017 15:50:15 -0700
Subject: [PATCH 1078/1958] zram: constify attribute_group structures.

attribute_groups are not supposed to change at runtime.  All functions
working with attribute_groups provided by <linux/sysfs.h> work with
const attribute_group.  So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   8293	    841	      4	   9138	   23b2	drivers/block/zram/zram_drv.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   8357	    777	      4	   9138	   23b2	drivers/block/zram/zram_drv.o

Link: http://lkml.kernel.org/r/65680c1c4d85818f7094cbfa31c91bf28185ba1b.1499061182.git.arvind.yadav.cs@gmail.com
Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d3e3af22a088a..856d5dc02451d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1124,7 +1124,7 @@ static struct attribute *zram_disk_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group zram_disk_attr_group = {
+static const struct attribute_group zram_disk_attr_group = {
 	.attrs = zram_disk_attrs,
 };
 
-- 
GitLab


From cf8e0fedf0784ef4bc1889380b09eda295e3d109 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Mon, 10 Jul 2017 15:50:18 -0700
Subject: [PATCH 1079/1958] mm/zsmalloc: simplify zs_max_alloc_size handling

Commit 40f9fb8cffc6 ("mm/zsmalloc: support allocating obj with size of
ZS_MAX_ALLOC_SIZE") fixes a size calculation error that prevented
zsmalloc to allocate an object of the maximal size (ZS_MAX_ALLOC_SIZE).
I think however the fix is unneededly complicated.

This patch replaces the dynamic calculation of zs_size_classes at init
time by a compile time calculation that uses the DIV_ROUND_UP() macro
already used in get_size_class_index().

[akpm@linux-foundation.org: use min_t]
Link: http://lkml.kernel.org/r/20170630114859.1979-1-jmarchan@redhat.com
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Mahendran Ganesh <opensource.ganesh@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 52 +++++++++++++++------------------------------------
 1 file changed, 15 insertions(+), 37 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 15959d35fc26c..013eea76685e1 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -116,6 +116,11 @@
 #define OBJ_INDEX_BITS	(BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
 #define OBJ_INDEX_MASK	((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
 
+#define FULLNESS_BITS	2
+#define CLASS_BITS	8
+#define ISOLATED_BITS	3
+#define MAGIC_VAL_BITS	8
+
 #define MAX(a, b) ((a) >= (b) ? (a) : (b))
 /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
 #define ZS_MIN_ALLOC_SIZE \
@@ -137,6 +142,8 @@
  *  (reason above)
  */
 #define ZS_SIZE_CLASS_DELTA	(PAGE_SIZE >> CLASS_BITS)
+#define ZS_SIZE_CLASSES	(DIV_ROUND_UP(ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE, \
+				      ZS_SIZE_CLASS_DELTA) + 1)
 
 enum fullness_group {
 	ZS_EMPTY,
@@ -168,11 +175,6 @@ static struct dentry *zs_stat_root;
 static struct vfsmount *zsmalloc_mnt;
 #endif
 
-/*
- * number of size_classes
- */
-static int zs_size_classes;
-
 /*
  * We assign a page to ZS_ALMOST_EMPTY fullness group when:
  *	n <= N / f, where
@@ -244,7 +246,7 @@ struct link_free {
 struct zs_pool {
 	const char *name;
 
-	struct size_class **size_class;
+	struct size_class *size_class[ZS_SIZE_CLASSES];
 	struct kmem_cache *handle_cachep;
 	struct kmem_cache *zspage_cachep;
 
@@ -268,11 +270,6 @@ struct zs_pool {
 #endif
 };
 
-#define FULLNESS_BITS	2
-#define CLASS_BITS	8
-#define ISOLATED_BITS	3
-#define MAGIC_VAL_BITS	8
-
 struct zspage {
 	struct {
 		unsigned int fullness:FULLNESS_BITS;
@@ -551,7 +548,7 @@ static int get_size_class_index(int size)
 		idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
 				ZS_SIZE_CLASS_DELTA);
 
-	return min(zs_size_classes - 1, idx);
+	return min_t(int, ZS_SIZE_CLASSES - 1, idx);
 }
 
 static inline void zs_stat_inc(struct size_class *class,
@@ -610,7 +607,7 @@ static int zs_stats_size_show(struct seq_file *s, void *v)
 			"obj_allocated", "obj_used", "pages_used",
 			"pages_per_zspage", "freeable");
 
-	for (i = 0; i < zs_size_classes; i++) {
+	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
 		class = pool->size_class[i];
 
 		if (class->index != i)
@@ -1294,17 +1291,6 @@ static int zs_cpu_dead(unsigned int cpu)
 	return 0;
 }
 
-static void __init init_zs_size_classes(void)
-{
-	int nr;
-
-	nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
-	if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
-		nr += 1;
-
-	zs_size_classes = nr;
-}
-
 static bool can_merge(struct size_class *prev, int pages_per_zspage,
 					int objs_per_zspage)
 {
@@ -2145,7 +2131,7 @@ static void async_free_zspage(struct work_struct *work)
 	struct zs_pool *pool = container_of(work, struct zs_pool,
 					free_work);
 
-	for (i = 0; i < zs_size_classes; i++) {
+	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
 		class = pool->size_class[i];
 		if (class->index != i)
 			continue;
@@ -2263,7 +2249,7 @@ unsigned long zs_compact(struct zs_pool *pool)
 	int i;
 	struct size_class *class;
 
-	for (i = zs_size_classes - 1; i >= 0; i--) {
+	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
 		class = pool->size_class[i];
 		if (!class)
 			continue;
@@ -2309,7 +2295,7 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker,
 	struct zs_pool *pool = container_of(shrinker, struct zs_pool,
 			shrinker);
 
-	for (i = zs_size_classes - 1; i >= 0; i--) {
+	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
 		class = pool->size_class[i];
 		if (!class)
 			continue;
@@ -2361,12 +2347,6 @@ struct zs_pool *zs_create_pool(const char *name)
 		return NULL;
 
 	init_deferred_free(pool);
-	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
-			GFP_KERNEL);
-	if (!pool->size_class) {
-		kfree(pool);
-		return NULL;
-	}
 
 	pool->name = kstrdup(name, GFP_KERNEL);
 	if (!pool->name)
@@ -2379,7 +2359,7 @@ struct zs_pool *zs_create_pool(const char *name)
 	 * Iterate reversely, because, size of size_class that we want to use
 	 * for merging should be larger or equal to current size.
 	 */
-	for (i = zs_size_classes - 1; i >= 0; i--) {
+	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
 		int size;
 		int pages_per_zspage;
 		int objs_per_zspage;
@@ -2453,7 +2433,7 @@ void zs_destroy_pool(struct zs_pool *pool)
 	zs_unregister_migration(pool);
 	zs_pool_stat_destroy(pool);
 
-	for (i = 0; i < zs_size_classes; i++) {
+	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
 		int fg;
 		struct size_class *class = pool->size_class[i];
 
@@ -2492,8 +2472,6 @@ static int __init zs_init(void)
 	if (ret)
 		goto hp_setup_fail;
 
-	init_zs_size_classes();
-
 #ifdef CONFIG_ZPOOL
 	zpool_register_driver(&zs_zpool_driver);
 #endif
-- 
GitLab


From 458f7920f9b1e6c313944d498c440f8599b8a136 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Mon, 10 Jul 2017 15:50:21 -0700
Subject: [PATCH 1080/1958] mm/kasan/kasan_init.c: use kasan_zero_pud for p4d
 table

There is missing optimization in zero_p4d_populate() that can save some
memory when mapping zero shadow.  Implement it like as others.

Link: http://lkml.kernel.org/r/1494829255-23946-1-git-send-email-iamjoonsoo.kim@lge.com
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/kasan_init.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index b96a5f773d880..554e4c0f23a2c 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -118,6 +118,18 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 
 	do {
 		next = p4d_addr_end(addr, end);
+		if (IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) {
+			pud_t *pud;
+			pmd_t *pmd;
+
+			p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud));
+			pud = pud_offset(p4d, addr);
+			pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
+			pmd = pmd_offset(pud, addr);
+			pmd_populate_kernel(&init_mm, pmd,
+						lm_alias(kasan_zero_pte));
+			continue;
+		}
 
 		if (p4d_none(*p4d)) {
 			p4d_populate(&init_mm, p4d,
-- 
GitLab


From c634d807d98e3e7def43e72d28528c84c612ab98 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 10 Jul 2017 15:50:24 -0700
Subject: [PATCH 1081/1958] mm/kasan: get rid of speculative shadow checks

For some unaligned memory accesses we have to check additional byte of
the shadow memory.  Currently we load that byte speculatively to have
only single load + branch on the optimistic fast path.

However, this approach has some downsides:

 - It's unaligned access, so this prevents porting KASAN on
   architectures which doesn't support unaligned accesses.

 - We have to map additional shadow page to prevent crash if speculative
   load happens near the end of the mapped memory. This would
   significantly complicate upcoming memory hotplug support.

I wasn't able to notice any performance degradation with this patch.  So
these speculative loads is just a pain with no gain, let's remove them.

Link: http://lkml.kernel.org/r/20170601162338.23540-1-aryabinin@virtuozzo.com
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/kasan.c | 98 ++++++++----------------------------------------
 1 file changed, 16 insertions(+), 82 deletions(-)

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index c81549d5c8330..212bc62041de9 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -134,94 +134,30 @@ static __always_inline bool memory_is_poisoned_1(unsigned long addr)
 	return false;
 }
 
-static __always_inline bool memory_is_poisoned_2(unsigned long addr)
+static __always_inline bool memory_is_poisoned_2_4_8(unsigned long addr,
+						unsigned long size)
 {
-	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
-
-	if (unlikely(*shadow_addr)) {
-		if (memory_is_poisoned_1(addr + 1))
-			return true;
-
-		/*
-		 * If single shadow byte covers 2-byte access, we don't
-		 * need to do anything more. Otherwise, test the first
-		 * shadow byte.
-		 */
-		if (likely(((addr + 1) & KASAN_SHADOW_MASK) != 0))
-			return false;
-
-		return unlikely(*(u8 *)shadow_addr);
-	}
+	u8 *shadow_addr = (u8 *)kasan_mem_to_shadow((void *)addr);
 
-	return false;
-}
-
-static __always_inline bool memory_is_poisoned_4(unsigned long addr)
-{
-	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
-
-	if (unlikely(*shadow_addr)) {
-		if (memory_is_poisoned_1(addr + 3))
-			return true;
-
-		/*
-		 * If single shadow byte covers 4-byte access, we don't
-		 * need to do anything more. Otherwise, test the first
-		 * shadow byte.
-		 */
-		if (likely(((addr + 3) & KASAN_SHADOW_MASK) >= 3))
-			return false;
-
-		return unlikely(*(u8 *)shadow_addr);
-	}
-
-	return false;
-}
-
-static __always_inline bool memory_is_poisoned_8(unsigned long addr)
-{
-	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
-
-	if (unlikely(*shadow_addr)) {
-		if (memory_is_poisoned_1(addr + 7))
-			return true;
-
-		/*
-		 * If single shadow byte covers 8-byte access, we don't
-		 * need to do anything more. Otherwise, test the first
-		 * shadow byte.
-		 */
-		if (likely(IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
-			return false;
-
-		return unlikely(*(u8 *)shadow_addr);
-	}
+	/*
+	 * Access crosses 8(shadow size)-byte boundary. Such access maps
+	 * into 2 shadow bytes, so we need to check them both.
+	 */
+	if (unlikely(((addr + size - 1) & KASAN_SHADOW_MASK) < size - 1))
+		return *shadow_addr || memory_is_poisoned_1(addr + size - 1);
 
-	return false;
+	return memory_is_poisoned_1(addr + size - 1);
 }
 
 static __always_inline bool memory_is_poisoned_16(unsigned long addr)
 {
-	u32 *shadow_addr = (u32 *)kasan_mem_to_shadow((void *)addr);
-
-	if (unlikely(*shadow_addr)) {
-		u16 shadow_first_bytes = *(u16 *)shadow_addr;
-
-		if (unlikely(shadow_first_bytes))
-			return true;
-
-		/*
-		 * If two shadow bytes covers 16-byte access, we don't
-		 * need to do anything more. Otherwise, test the last
-		 * shadow byte.
-		 */
-		if (likely(IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
-			return false;
+	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
 
-		return memory_is_poisoned_1(addr + 15);
-	}
+	/* Unaligned 16-bytes access maps into 3 shadow bytes. */
+	if (unlikely(!IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
+		return *shadow_addr || memory_is_poisoned_1(addr + 15);
 
-	return false;
+	return *shadow_addr;
 }
 
 static __always_inline unsigned long bytes_is_zero(const u8 *start,
@@ -292,11 +228,9 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size)
 		case 1:
 			return memory_is_poisoned_1(addr);
 		case 2:
-			return memory_is_poisoned_2(addr);
 		case 4:
-			return memory_is_poisoned_4(addr);
 		case 8:
-			return memory_is_poisoned_8(addr);
+			return memory_is_poisoned_2_4_8(addr, size);
 		case 16:
 			return memory_is_poisoned_16(addr);
 		default:
-- 
GitLab


From 4d461333f144456b80d9eabd7cee7ac02fa5d0ee Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 10 Jul 2017 15:50:27 -0700
Subject: [PATCH 1082/1958] x86/kasan: don't allocate extra shadow memory

We used to read several bytes of the shadow memory in advance.
Therefore additional shadow memory mapped to prevent crash if
speculative load would happen near the end of the mapped shadow memory.

Now we don't have such speculative loads, so we no longer need to map
additional shadow memory.

Link: http://lkml.kernel.org/r/20170601162338.23540-2-aryabinin@virtuozzo.com
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/kasan_init_64.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 88215ac16b24b..02c9d75534091 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -23,12 +23,7 @@ static int __init map_range(struct range *range)
 	start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
 	end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
 
-	/*
-	 * end + 1 here is intentional. We check several shadow bytes in advance
-	 * to slightly speed up fastpath. In some rare cases we could cross
-	 * boundary of mapped shadow, so we just map some more here.
-	 */
-	return vmemmap_populate(start, end + 1, NUMA_NO_NODE);
+	return vmemmap_populate(start, end, NUMA_NO_NODE);
 }
 
 static void __init clear_pgds(unsigned long start,
-- 
GitLab


From 3f9ec80f7b22ec46272a32408c5cd91ae2f8c4c0 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 10 Jul 2017 15:50:31 -0700
Subject: [PATCH 1083/1958] arm64/kasan: don't allocate extra shadow memory

We used to read several bytes of the shadow memory in advance.
Therefore additional shadow memory mapped to prevent crash if
speculative load would happen near the end of the mapped shadow memory.

Now we don't have such speculative loads, so we no longer need to map
additional shadow memory.

Link: http://lkml.kernel.org/r/20170601162338.23540-3-aryabinin@virtuozzo.com
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/kasan_init.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 687a358a37337..81f03959a4ab2 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -191,14 +191,8 @@ void __init kasan_init(void)
 		if (start >= end)
 			break;
 
-		/*
-		 * end + 1 here is intentional. We check several shadow bytes in
-		 * advance to slightly speed up fastpath. In some rare cases
-		 * we could cross boundary of mapped shadow, so we just map
-		 * some more here.
-		 */
 		vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
-				(unsigned long)kasan_mem_to_shadow(end) + 1,
+				(unsigned long)kasan_mem_to_shadow(end),
 				pfn_to_nid(virt_to_pfn(start)));
 	}
 
-- 
GitLab


From fa69b5989bb0031efa96f2f4ff49caf6532a0970 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 10 Jul 2017 15:50:34 -0700
Subject: [PATCH 1084/1958] mm/kasan: add support for memory hotplug

KASAN doesn't happen work with memory hotplug because hotplugged memory
doesn't have any shadow memory.  So any access to hotplugged memory
would cause a crash on shadow check.

Use memory hotplug notifier to allocate and map shadow memory when the
hotplugged memory is going online and free shadow after the memory
offlined.

Link: http://lkml.kernel.org/r/20170601162338.23540-4-aryabinin@virtuozzo.com
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/Kconfig       |  1 -
 mm/kasan/kasan.c | 40 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index 46ef77d5c3326..48b1af447fa74 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -161,7 +161,6 @@ config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
 	depends on SPARSEMEM || X86_64_ACPI_NUMA
 	depends on ARCH_ENABLE_MEMORY_HOTPLUG
-	depends on COMPILE_TEST || !KASAN
 
 config MEMORY_HOTPLUG_SPARSE
 	def_bool y
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 212bc62041de9..509974a1911e7 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -737,17 +737,47 @@ void __asan_unpoison_stack_memory(const void *addr, size_t size)
 EXPORT_SYMBOL(__asan_unpoison_stack_memory);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static int kasan_mem_notifier(struct notifier_block *nb,
+static int __meminit kasan_mem_notifier(struct notifier_block *nb,
 			unsigned long action, void *data)
 {
-	return (action == MEM_GOING_ONLINE) ? NOTIFY_BAD : NOTIFY_OK;
+	struct memory_notify *mem_data = data;
+	unsigned long nr_shadow_pages, start_kaddr, shadow_start;
+	unsigned long shadow_end, shadow_size;
+
+	nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT;
+	start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn);
+	shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr);
+	shadow_size = nr_shadow_pages << PAGE_SHIFT;
+	shadow_end = shadow_start + shadow_size;
+
+	if (WARN_ON(mem_data->nr_pages % KASAN_SHADOW_SCALE_SIZE) ||
+		WARN_ON(start_kaddr % (KASAN_SHADOW_SCALE_SIZE << PAGE_SHIFT)))
+		return NOTIFY_BAD;
+
+	switch (action) {
+	case MEM_GOING_ONLINE: {
+		void *ret;
+
+		ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
+					shadow_end, GFP_KERNEL,
+					PAGE_KERNEL, VM_NO_GUARD,
+					pfn_to_nid(mem_data->start_pfn),
+					__builtin_return_address(0));
+		if (!ret)
+			return NOTIFY_BAD;
+
+		kmemleak_ignore(ret);
+		return NOTIFY_OK;
+	}
+	case MEM_OFFLINE:
+		vfree((void *)shadow_start);
+	}
+
+	return NOTIFY_OK;
 }
 
 static int __init kasan_memhotplug_init(void)
 {
-	pr_info("WARNING: KASAN doesn't support memory hot-add\n");
-	pr_info("Memory hot-add will be disabled\n");
-
 	hotplug_memory_notifier(kasan_mem_notifier, 0);
 
 	return 0;
-- 
GitLab


From f5bd62cd44119a11a30976f6a97f4aab2c35a097 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Mon, 10 Jul 2017 15:50:37 -0700
Subject: [PATCH 1085/1958] mm/kasan/kasan.c: rename XXX_is_zero to
 XXX_is_nonzero

They return positive value, that is, true, if non-zero value is found.
Rename them to reduce confusion.

Link: http://lkml.kernel.org/r/20170516012350.GA16015@js1304-desktop
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/kasan.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 509974a1911e7..ca11bc4ce2050 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -160,7 +160,7 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr)
 	return *shadow_addr;
 }
 
-static __always_inline unsigned long bytes_is_zero(const u8 *start,
+static __always_inline unsigned long bytes_is_nonzero(const u8 *start,
 					size_t size)
 {
 	while (size) {
@@ -173,7 +173,7 @@ static __always_inline unsigned long bytes_is_zero(const u8 *start,
 	return 0;
 }
 
-static __always_inline unsigned long memory_is_zero(const void *start,
+static __always_inline unsigned long memory_is_nonzero(const void *start,
 						const void *end)
 {
 	unsigned int words;
@@ -181,11 +181,11 @@ static __always_inline unsigned long memory_is_zero(const void *start,
 	unsigned int prefix = (unsigned long)start % 8;
 
 	if (end - start <= 16)
-		return bytes_is_zero(start, end - start);
+		return bytes_is_nonzero(start, end - start);
 
 	if (prefix) {
 		prefix = 8 - prefix;
-		ret = bytes_is_zero(start, prefix);
+		ret = bytes_is_nonzero(start, prefix);
 		if (unlikely(ret))
 			return ret;
 		start += prefix;
@@ -194,12 +194,12 @@ static __always_inline unsigned long memory_is_zero(const void *start,
 	words = (end - start) / 8;
 	while (words) {
 		if (unlikely(*(u64 *)start))
-			return bytes_is_zero(start, 8);
+			return bytes_is_nonzero(start, 8);
 		start += 8;
 		words--;
 	}
 
-	return bytes_is_zero(start, (end - start) % 8);
+	return bytes_is_nonzero(start, (end - start) % 8);
 }
 
 static __always_inline bool memory_is_poisoned_n(unsigned long addr,
@@ -207,7 +207,7 @@ static __always_inline bool memory_is_poisoned_n(unsigned long addr,
 {
 	unsigned long ret;
 
-	ret = memory_is_zero(kasan_mem_to_shadow((void *)addr),
+	ret = memory_is_nonzero(kasan_mem_to_shadow((void *)addr),
 			kasan_mem_to_shadow((void *)addr + size - 1) + 1);
 
 	if (unlikely(ret)) {
-- 
GitLab


From 822d5ec25884b4e4436c819d03035fc0dd689309 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 10 Jul 2017 15:50:40 -0700
Subject: [PATCH 1086/1958] kasan: make get_wild_bug_type() static

The helper function get_wild_bug_type() does not need to be in global
scope, so make it static.

Cleans up sparse warning:

  "symbol 'get_wild_bug_type' was not declared. Should it be static?"

Link: http://lkml.kernel.org/r/20170622090049.10658-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/report.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index beee0e980e2dd..04bb1d3eb9ece 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -107,7 +107,7 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info)
 	return bug_type;
 }
 
-const char *get_wild_bug_type(struct kasan_access_info *info)
+static const char *get_wild_bug_type(struct kasan_access_info *info)
 {
 	const char *bug_type = "unknown-crash";
 
-- 
GitLab


From a371e2fa3ab993c915eaa713333d8981823dffe8 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 10 Jul 2017 15:50:43 -0700
Subject: [PATCH 1087/1958] frv: remove wrapper header for asm/device.h

frv's asm/device.h is merely including asm-generic/device.h.  Thus, the
arch specific header can be omitted and the generic header can be used
directly.

Link: http://lkml.kernel.org/r/20170517124915.26904-1-tklauser@distanz.ch
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/include/asm/Kbuild   | 1 +
 arch/frv/include/asm/device.h | 7 -------
 2 files changed, 1 insertion(+), 7 deletions(-)
 delete mode 100644 arch/frv/include/asm/device.h

diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index cce3bc3603ea8..69f612a556347 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -1,5 +1,6 @@
 
 generic-y += clkdev.h
+generic-y += device.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += irq_work.h
diff --git a/arch/frv/include/asm/device.h b/arch/frv/include/asm/device.h
deleted file mode 100644
index d8f9872b0e2dc..0000000000000
--- a/arch/frv/include/asm/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
-- 
GitLab


From 31b7fb448c2bda757f3386c3eb50c2718bf6d776 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 10 Jul 2017 15:50:46 -0700
Subject: [PATCH 1088/1958] frv: use generic fb.h

The arch uses a verbatim copy of the asm-generic version and does not
add any own implementations to the header, so use asm-generic/fb.h
instead of duplicating code.

Link: http://lkml.kernel.org/r/20170517083307.1697-1-tklauser@distanz.ch
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/include/asm/Kbuild |  1 +
 arch/frv/include/asm/fb.h   | 12 ------------
 2 files changed, 1 insertion(+), 12 deletions(-)
 delete mode 100644 arch/frv/include/asm/fb.h

diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 69f612a556347..2cf7648787b28 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += clkdev.h
 generic-y += device.h
 generic-y += exec.h
 generic-y += extable.h
+generic-y += fb.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/frv/include/asm/fb.h b/arch/frv/include/asm/fb.h
deleted file mode 100644
index c7df380309920..0000000000000
--- a/arch/frv/include/asm/fb.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/fb.h>
-
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
-- 
GitLab


From b6d0f14abb7c1d9d522c064633c820aaeb9bbfbf Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Jul 2017 15:50:49 -0700
Subject: [PATCH 1089/1958] frv: cmpxchg: implement cmpxchg64()

FRV supports 64-bit cmpxchg, which is provided by the arch code as
__cmpxchg_64 and subsequently used to implement atomic64_cmpxchg.

This patch hooks up the generic cmpxchg64 API using the same function,
which also provides default definitions of the relaxed, acquire and
release variants.  This fixes the build when COMPILE_TEST=y and
IOMMU_IO_PGTABLE_LPAE=y.

Link: http://lkml.kernel.org/r/1499084670-6996-1-git-send-email-will.deacon@arm.com
Signed-off-by: Will Deacon <will.deacon@arm.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/include/asm/cmpxchg.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/frv/include/asm/cmpxchg.h b/arch/frv/include/asm/cmpxchg.h
index a899765102ea4..ad1f11cfa92a3 100644
--- a/arch/frv/include/asm/cmpxchg.h
+++ b/arch/frv/include/asm/cmpxchg.h
@@ -76,6 +76,7 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
  * - if (*ptr != test) then orig = *ptr;
  */
 extern uint64_t __cmpxchg_64(uint64_t test, uint64_t new, volatile uint64_t *v);
+#define cmpxchg64(p, o, n)	__cmpxchg_64((o), (n), (p))
 
 #ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
 
-- 
GitLab


From cde1b69389772ca8855fabdfe5d9ae9ba1d7d33a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 10 Jul 2017 15:50:52 -0700
Subject: [PATCH 1090/1958] fs/proc/generic.c: switch to ida_simple_get/remove

The code can be much simplified by switching to ida_simple_get/remove.

Link: http://lkml.kernel.org/r/8d1cc9f7-5115-c9dc-028e-c0770b6bfe1f@gmail.com
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c | 32 +++++++-------------------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 9425c0d97262b..e3cda0b5968fa 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -180,7 +180,6 @@ static int xlate_proc_name(const char *name, struct proc_dir_entry **ret,
 }
 
 static DEFINE_IDA(proc_inum_ida);
-static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 
 #define PROC_DYNAMIC_FIRST 0xF0000000U
 
@@ -190,37 +189,20 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
  */
 int proc_alloc_inum(unsigned int *inum)
 {
-	unsigned int i;
-	int error;
+	int i;
 
-retry:
-	if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
-		return -ENOMEM;
+	i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1,
+			   GFP_KERNEL);
+	if (i < 0)
+		return i;
 
-	spin_lock_irq(&proc_inum_lock);
-	error = ida_get_new(&proc_inum_ida, &i);
-	spin_unlock_irq(&proc_inum_lock);
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error)
-		return error;
-
-	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
-		spin_lock_irq(&proc_inum_lock);
-		ida_remove(&proc_inum_ida, i);
-		spin_unlock_irq(&proc_inum_lock);
-		return -ENOSPC;
-	}
-	*inum = PROC_DYNAMIC_FIRST + i;
+	*inum = PROC_DYNAMIC_FIRST + (unsigned int)i;
 	return 0;
 }
 
 void proc_free_inum(unsigned int inum)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&proc_inum_lock, flags);
-	ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
-	spin_unlock_irqrestore(&proc_inum_lock, flags);
+	ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
 }
 
 /*
-- 
GitLab


From 0b396923ee9bdcb4a208df2148712b79b6dee73e Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:50:55 -0700
Subject: [PATCH 1091/1958] asm-generic/bug.h: declare struct pt_regs; before
 function prototype

This series of patches splits BUILD_BUG related macros out of
"include/linux/bug.h" into new file "include/linux/build_bug.h" (patch
5), and changes the pointer type checking in the `container_of()` macro
to deal with pointers of array type better (patch 6).  Patches 1 to 4
are prerequisites.

Patches 2, 3, 4, and 5 have been inserted since the previous version of
this patch series.  Patch 6 here corresponds to v3 and v4's patch 2.

Patch 1 was a prerequisite in v3 of this series to avoid a lot of
warnings when <linux/bug.h> was included by <linux/kernel.h>.  That is
no longer relevant for v5 of the series, but I left it in because it was
acked by a Arnd Bergmann and Michal Nazarewicz.

Patches 2, 3, and 4 are some checkpatch clean-ups on
"include/linux/bug.h" before splitting out the BUILD_BUG stuff in patch
5.

Patch 5 splits the BUILD_BUG related macros out of "include/linux/bug.h"
into new file "include/linux/build_bug.h" because including
<linux/bug.h> in "include/linux/kernel.h" would result in build failures
due to circular dependencies.

Patch 6 changes the pointer type checking by `container_of()` to avoid
some incompatible pointer warnings when the dereferenced pointer has
array type.

1) asm-generic/bug.h: declare struct pt_regs; before function prototype
2) linux/bug.h: correct formatting of block comment
3) linux/bug.h: correct "(foo*)" should be "(foo *)"
4) linux/bug.h: correct "space required before that '-'"
5) bug: split BUILD_BUG stuff out into <linux/build_bug.h>
6) kernel.h: handle pointers to arrays better in container_of()

This patch (of 6):

The declaration of `__warn()` has `struct pt_regs *regs` as one of its
parameters.  This can result in compiler warnings if `struct regs` is not
already declared.  Add an empty declaration of `struct pt_regs` to avoid
the warnings.

Link: http://lkml.kernel.org/r/20170525120316.24473-2-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index d6f4aed479a12..87191357d303c 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -97,6 +97,7 @@ extern void warn_slowpath_null(const char *file, const int line);
 
 /* used internally by panic.c */
 struct warn_args;
+struct pt_regs;
 
 void __warn(const char *file, int line, void *caller, unsigned taint,
 	    struct pt_regs *regs, struct warn_args *args);
-- 
GitLab


From e9d5a48499391fe5b0615610858665ba8149e255 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:50:58 -0700
Subject: [PATCH 1092/1958] linux/bug.h: correct formatting of block comment

Correct these checkpatch.pl warnings:

|WARNING: Block comments use * on subsequent lines
|#34: FILE: include/linux/bug.h:34:
|+/* Force a compilation error if condition is true, but also produce a
|+   result (of value 0 and type size_t), so the expression can be used

|WARNING: Block comments use a trailing */ on a separate line
|#36: FILE: include/linux/bug.h:36:
|+   aren't permitted). */

Link: http://lkml.kernel.org/r/20170525120316.24473-3-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 687b557fc5eb9..ca24007e2dc36 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -30,10 +30,12 @@ struct pt_regs;
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
 	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
 
-/* Force a compilation error if condition is true, but also produce a
-   result (of value 0 and type size_t), so the expression can be used
-   e.g. in a structure initializer (or where-ever else comma expressions
-   aren't permitted). */
+/*
+ * Force a compilation error if condition is true, but also produce a
+ * result (of value 0 and type size_t), so the expression can be used
+ * e.g. in a structure initializer (or where-ever else comma expressions
+ * aren't permitted).
+ */
 #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 
-- 
GitLab


From 8cdd7cca9287abf4c849c01e2a4e8207ad3e3a82 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:51:01 -0700
Subject: [PATCH 1093/1958] linux/bug.h: correct "(foo*)" should be "(foo *)"

Correct this checkpatch.pl error:

|ERROR: "(foo*)" should be "(foo *)"
|#19: FILE: include/linux/bug.h:19:
|+#define BUILD_BUG_ON_NULL(e) ((void*)0)

Link: http://lkml.kernel.org/r/20170525120316.24473-4-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/bug.h b/include/linux/bug.h
index ca24007e2dc36..216a1b79653d4 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -16,7 +16,7 @@ struct pt_regs;
 #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_ZERO(e) (0)
-#define BUILD_BUG_ON_NULL(e) ((void*)0)
+#define BUILD_BUG_ON_NULL(e) ((void *)0)
 #define BUILD_BUG_ON_INVALID(e) (0)
 #define BUILD_BUG_ON_MSG(cond, msg) (0)
 #define BUILD_BUG_ON(condition) (0)
-- 
GitLab


From 47e81e59d98b90727a02ceb486407eeed5eb8727 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:51:04 -0700
Subject: [PATCH 1094/1958] linux/bug.h: correct "space required before that
 '-'"

Correct these checkpatch.pl errors:

|ERROR: space required before that '-' (ctx:OxO)
|#37: FILE: include/linux/bug.h:37:
|+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))

|ERROR: space required before that '-' (ctx:OxO)
|#38: FILE: include/linux/bug.h:38:
|+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))

I decided to wrap the bitfield expressions that begin with minus signs
in parentheses rather than insert spaces before the minus signs.

Link: http://lkml.kernel.org/r/20170525120316.24473-5-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 216a1b79653d4..483207cb99fb8 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -36,8 +36,8 @@ struct pt_regs;
  * e.g. in a structure initializer (or where-ever else comma expressions
  * aren't permitted).
  */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
-#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); }))
 
 /*
  * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
-- 
GitLab


From bc6245e5efd70c41eaf9334b1b5e646745cb0fb3 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 10 Jul 2017 15:51:07 -0700
Subject: [PATCH 1095/1958] bug: split BUILD_BUG stuff out into
 <linux/build_bug.h>

Including <linux/bug.h> pulls in a lot of bloat from <asm/bug.h> and
<asm-generic/bug.h> that is not needed to call the BUILD_BUG() family of
macros.  Split them out into their own header, <linux/build_bug.h>.

Also correct some checkpatch.pl errors for the BUILD_BUG_ON_ZERO() and
BUILD_BUG_ON_NULL() macros by adding parentheses around the bitfield
widths that begin with a minus sign.

Link: http://lkml.kernel.org/r/20170525120316.24473-6-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h       | 74 +---------------------------------
 include/linux/build_bug.h | 84 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 73 deletions(-)
 create mode 100644 include/linux/build_bug.h

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 483207cb99fb8..5d5554c874fd6 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -3,6 +3,7 @@
 
 #include <asm/bug.h>
 #include <linux/compiler.h>
+#include <linux/build_bug.h>
 
 enum bug_trap_type {
 	BUG_TRAP_TYPE_NONE = 0,
@@ -13,82 +14,9 @@ enum bug_trap_type {
 struct pt_regs;
 
 #ifdef __CHECKER__
-#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
-#define BUILD_BUG_ON_ZERO(e) (0)
-#define BUILD_BUG_ON_NULL(e) ((void *)0)
-#define BUILD_BUG_ON_INVALID(e) (0)
-#define BUILD_BUG_ON_MSG(cond, msg) (0)
-#define BUILD_BUG_ON(condition) (0)
-#define BUILD_BUG() (0)
 #define MAYBE_BUILD_BUG_ON(cond) (0)
 #else /* __CHECKER__ */
 
-/* Force a compilation error if a constant expression is not a power of 2 */
-#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)	\
-	BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
-	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
-
-/*
- * Force a compilation error if condition is true, but also produce a
- * result (of value 0 and type size_t), so the expression can be used
- * e.g. in a structure initializer (or where-ever else comma expressions
- * aren't permitted).
- */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
-#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); }))
-
-/*
- * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
- * expression but avoids the generation of any code, even if that expression
- * has side-effects.
- */
-#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
-
-/**
- * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
- *		      error message.
- * @condition: the condition which the compiler should know is false.
- *
- * See BUILD_BUG_ON for description.
- */
-#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
-
-/**
- * BUILD_BUG_ON - break compile if a condition is true.
- * @condition: the condition which the compiler should know is false.
- *
- * If you have some code which relies on certain constants being equal, or
- * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
- * detect if someone changes it.
- *
- * The implementation uses gcc's reluctance to create a negative array, but gcc
- * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
- * inline functions).  Luckily, in 4.3 they added the "error" function
- * attribute just for this type of case.  Thus, we use a negative sized array
- * (should always create an error on gcc versions older than 4.4) and then call
- * an undefined function with the error attribute (should always create an
- * error on gcc 4.3 and later).  If for some reason, neither creates a
- * compile-time error, we'll still have a link-time error, which is harder to
- * track down.
- */
-#ifndef __OPTIMIZE__
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#else
-#define BUILD_BUG_ON(condition) \
-	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
-#endif
-
-/**
- * BUILD_BUG - break compile if used.
- *
- * If you have some code that you expect the compiler to eliminate at
- * build time, you should use BUILD_BUG to detect if it is
- * unexpectedly used.
- */
-#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
-
 #define MAYBE_BUILD_BUG_ON(cond)			\
 	do {						\
 		if (__builtin_constant_p((cond)))       \
diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
new file mode 100644
index 0000000000000..b7d22d60008a9
--- /dev/null
+++ b/include/linux/build_bug.h
@@ -0,0 +1,84 @@
+#ifndef _LINUX_BUILD_BUG_H
+#define _LINUX_BUILD_BUG_H
+
+#include <linux/compiler.h>
+
+#ifdef __CHECKER__
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
+#define BUILD_BUG_ON_ZERO(e) (0)
+#define BUILD_BUG_ON_NULL(e) ((void *)0)
+#define BUILD_BUG_ON_INVALID(e) (0)
+#define BUILD_BUG_ON_MSG(cond, msg) (0)
+#define BUILD_BUG_ON(condition) (0)
+#define BUILD_BUG() (0)
+#else /* __CHECKER__ */
+
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)	\
+	BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
+	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
+/*
+ * Force a compilation error if condition is true, but also produce a
+ * result (of value 0 and type size_t), so the expression can be used
+ * e.g. in a structure initializer (or where-ever else comma expressions
+ * aren't permitted).
+ */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); }))
+
+/*
+ * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
+ * expression but avoids the generation of any code, even if that expression
+ * has side-effects.
+ */
+#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
+
+/**
+ * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
+ *		      error message.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * See BUILD_BUG_ON for description.
+ */
+#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
+
+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ *
+ * The implementation uses gcc's reluctance to create a negative array, but gcc
+ * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
+ * inline functions).  Luckily, in 4.3 they added the "error" function
+ * attribute just for this type of case.  Thus, we use a negative sized array
+ * (should always create an error on gcc versions older than 4.4) and then call
+ * an undefined function with the error attribute (should always create an
+ * error on gcc 4.3 and later).  If for some reason, neither creates a
+ * compile-time error, we'll still have a link-time error, which is harder to
+ * track down.
+ */
+#ifndef __OPTIMIZE__
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+#define BUILD_BUG_ON(condition) \
+	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
+#endif
+
+/**
+ * BUILD_BUG - break compile if used.
+ *
+ * If you have some code that you expect the compiler to eliminate at
+ * build time, you should use BUILD_BUG to detect if it is
+ * unexpectedly used.
+ */
+#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
+
+#endif	/* __CHECKER__ */
+
+#endif	/* _LINUX_BUILD_BUG_H */
-- 
GitLab


From 287f3ca563d8ba0ede4ac0cec84218a1ea5e848f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Mon, 10 Jul 2017 15:51:10 -0700
Subject: [PATCH 1096/1958] ARM: fix rd_size declaration

The global variable 'rd_size' is declared as 'int' in source file
arch/arm/kernel/atags_parse.c and as 'unsigned long' in
drivers/block/brd.c.  Fix this inconsistency.

Additionally, remove the declarations of rd_image_start, rd_prompt and
rd_doload from parse_tag_ramdisk() since these duplicate existing
declarations in <linux/initrd.h>.

Link: http://lkml.kernel.org/r/20170627065024.12347-1-bart.vanassche@wdc.com
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Yan <yanaijie@huawei.com>
Cc: Zhaohongjiang <zhaohongjiang@huawei.com>
Cc: Miao Xie <miaoxie@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/atags_parse.c | 3 +--
 drivers/block/brd.c           | 1 +
 include/linux/initrd.h        | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 68c6ae0b9e4ca..98fbfd235ac87 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/initrd.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/root_dev.h>
@@ -91,8 +92,6 @@ __tagtable(ATAG_VIDEOTEXT, parse_tag_videotext);
 #ifdef CONFIG_BLK_DEV_RAM
 static int __init parse_tag_ramdisk(const struct tag *tag)
 {
-	extern int rd_size, rd_image_start, rd_prompt, rd_doload;
-
 	rd_image_start = tag->u.ramdisk.start;
 	rd_doload = (tag->u.ramdisk.flags & 1) == 0;
 	rd_prompt = (tag->u.ramdisk.flags & 2) == 0;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 17723fd50a53b..104b71c0490dd 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/initrd.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/major.h>
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index 55289d261b4fa..bc67b767f9ce5 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -10,6 +10,9 @@ extern int rd_prompt;
 /* starting block # of image */
 extern int rd_image_start;
 
+/* size of a single RAM disk */
+extern unsigned long rd_size;
+
 /* 1 if it is not an error if initrd_start < memory_start */
 extern int initrd_below_start_ok;
 
-- 
GitLab


From 9dcdcea11491f6eee65bd1b352293ca01e4b7997 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 10 Jul 2017 15:51:14 -0700
Subject: [PATCH 1097/1958] kernel/ksysfs.c: constify attribute_group
 structures.

attribute_groups are not supposed to change at runtime.  All functions
working with attribute_groups provided by <linux/sysfs.h> work with
const attribute_group.  So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   1120	    544	     16	   1680	    690	kernel/ksysfs.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   1160	    480	     16	   1656	    678	kernel/ksysfs.o

Link: http://lkml.kernel.org/r/aa224b3cc923fdbb3edd0c41b2c639c85408c9e8.1498737347.git.arvind.yadav.cs@gmail.com
Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Dave Young <dyoung@redhat.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Petr Tesarik <ptesarik@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/ksysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 23cd706512383..df1a9aa602a08 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -234,7 +234,7 @@ static struct attribute * kernel_attrs[] = {
 	NULL
 };
 
-static struct attribute_group kernel_attr_group = {
+static const struct attribute_group kernel_attr_group = {
 	.attrs = kernel_attrs,
 };
 
-- 
GitLab


From b7b2562f7252878e18de60c24f320052076f9de8 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 10 Jul 2017 15:51:17 -0700
Subject: [PATCH 1098/1958] kernel/groups.c: use sort library function

setgroups is not exactly a hot path, so we might as well use the library
function instead of open-coding the sorting.  Saves ~150 bytes.

Link: http://lkml.kernel.org/r/1497301378-22739-1-git-send-email-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/groups.c | 35 +++++++++++------------------------
 1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/kernel/groups.c b/kernel/groups.c
index d09727692a2af..434f6665f187d 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -5,6 +5,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/security.h>
+#include <linux/sort.h>
 #include <linux/syscalls.h>
 #include <linux/user_namespace.h>
 #include <linux/vmalloc.h>
@@ -76,32 +77,18 @@ static int groups_from_user(struct group_info *group_info,
 	return 0;
 }
 
-/* a simple Shell sort */
+static int gid_cmp(const void *_a, const void *_b)
+{
+	kgid_t a = *(kgid_t *)_a;
+	kgid_t b = *(kgid_t *)_b;
+
+	return gid_gt(a, b) - gid_lt(a, b);
+}
+
 static void groups_sort(struct group_info *group_info)
 {
-	int base, max, stride;
-	int gidsetsize = group_info->ngroups;
-
-	for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
-		; /* nothing */
-	stride /= 3;
-
-	while (stride) {
-		max = gidsetsize - stride;
-		for (base = 0; base < max; base++) {
-			int left = base;
-			int right = left + stride;
-			kgid_t tmp = group_info->gid[right];
-
-			while (left >= 0 && gid_gt(group_info->gid[left], tmp)) {
-				group_info->gid[right] = group_info->gid[left];
-				right = left;
-				left -= stride;
-			}
-			group_info->gid[right] = tmp;
-		}
-		stride /= 3;
-	}
+	sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
+	     gid_cmp, NULL);
 }
 
 /* a simple bsearch */
-- 
GitLab


From 63b23e2cbc8e80de3e40184ecb2c3bfb705776fa Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 15:51:20 -0700
Subject: [PATCH 1099/1958] kernel/kallsyms.c: replace all_var with
 IS_ENABLED(CONFIG_KALLSYMS_ALL)

'all_var' looks like a variable, but is actually a macro.  Use
IS_ENABLED(CONFIG_KALLSYMS_ALL) for clarification.

Link: http://lkml.kernel.org/r/1497577591-3434-1-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kallsyms.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6a3b249a2ae10..127e7cfafa552 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -28,12 +28,6 @@
 
 #include <asm/sections.h>
 
-#ifdef CONFIG_KALLSYMS_ALL
-#define all_var 1
-#else
-#define all_var 0
-#endif
-
 /*
  * These will be re-linked against their real values
  * during the second link stage.
@@ -82,7 +76,7 @@ static inline int is_kernel(unsigned long addr)
 
 static int is_ksym_addr(unsigned long addr)
 {
-	if (all_var)
+	if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
 		return is_kernel(addr);
 
 	return is_kernel_text(addr) || is_kernel_inittext(addr);
@@ -280,7 +274,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
 	if (!symbol_end) {
 		if (is_kernel_inittext(addr))
 			symbol_end = (unsigned long)_einittext;
-		else if (all_var)
+		else if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
 			symbol_end = (unsigned long)_end;
 		else
 			symbol_end = (unsigned long)_etext;
-- 
GitLab


From b689d4a72fae7a8c4f4d097ef2f6e56643933bfd Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 10 Jul 2017 15:51:23 -0700
Subject: [PATCH 1100/1958] MAINTAINERS: give proc sysctl some maintainer love

We poke at proc sysctl enough that really we should declare it
maintained.  We'll just be Cc'd and sending updates / ACK'ing changes
through akpm's tree.

Link: http://lkml.kernel.org/r/20170524231305.8649-1-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ad8107b47dba..d8eab9322ba28 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10559,6 +10559,17 @@ W:	http://wireless.kernel.org/en/users/Drivers/p54
 S:	Obsolete
 F:	drivers/net/wireless/intersil/prism54/
 
+PROC SYSCTL
+M:	"Luis R. Rodriguez" <mcgrof@kernel.org>
+M:	Kees Cook <keescook@chromium.org>
+L:	linux-kernel@vger.kernel.org
+L:	linux-fsdevel@vger.kernel.org
+S:	Maintained
+F:	fs/proc/proc_sysctl.c
+F:	include/linux/sysctl.h
+F:	kernel/sysctl.c
+F:	tools/testing/selftests/sysctl/
+
 PS3 NETWORK SUPPORT
 M:	Geoff Levand <geoff@infradead.org>
 L:	netdev@vger.kernel.org
-- 
GitLab


From 3cc78125a081bb79eb38f3e9a585e5a81bb81cb1 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 10 Jul 2017 15:51:26 -0700
Subject: [PATCH 1101/1958] lib/test_bitmap.c: add optimisation tests

Patch series "Bitmap optimisations", v2.

These three bitmap patches use more efficient specialisations when the
compiler can figure out that it's safe to do so.  Thanks to Rasmus's
eagle eyes, a nasty bug in v1 was avoided, and I've added a test case
which would have caught it.

This patch (of 4):

This version of the test is actually a no-op; the next patch will enable
it.

Link: http://lkml.kernel.org/r/20170628153221.11322-2-willy@infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index e2cbd43d193cf..252d3bddbe7de 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -333,10 +333,42 @@ static void __init test_bitmap_u32_array_conversions(void)
 	}
 }
 
+#define __bitmap_set(a, b, c)	bitmap_set(a, b, c)
+#define __bitmap_clear(a, b, c)	bitmap_clear(a, b, c)
+
+static void noinline __init test_mem_optimisations(void)
+{
+	DECLARE_BITMAP(bmap1, 1024);
+	DECLARE_BITMAP(bmap2, 1024);
+	unsigned int start, nbits;
+
+	for (start = 0; start < 1024; start += 8) {
+		memset(bmap1, 0x5a, sizeof(bmap1));
+		memset(bmap2, 0x5a, sizeof(bmap2));
+		for (nbits = 0; nbits < 1024 - start; nbits += 8) {
+			bitmap_set(bmap1, start, nbits);
+			__bitmap_set(bmap2, start, nbits);
+			if (!bitmap_equal(bmap1, bmap2, 1024))
+				printk("set not equal %d %d\n", start, nbits);
+			if (!__bitmap_equal(bmap1, bmap2, 1024))
+				printk("set not __equal %d %d\n", start, nbits);
+
+			bitmap_clear(bmap1, start, nbits);
+			__bitmap_clear(bmap2, start, nbits);
+			if (!bitmap_equal(bmap1, bmap2, 1024))
+				printk("clear not equal %d %d\n", start, nbits);
+			if (!__bitmap_equal(bmap1, bmap2, 1024))
+				printk("clear not __equal %d %d\n", start,
+									nbits);
+		}
+	}
+}
+
 static int __init test_bitmap_init(void)
 {
 	test_zero_fill_copy();
 	test_bitmap_u32_array_conversions();
+	test_mem_optimisations();
 
 	if (failed_tests == 0)
 		pr_info("all %u tests passed\n", total_tests);
-- 
GitLab


From e5af323c9badd5dc09af7ccf9d45616ebffc623c Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 10 Jul 2017 15:51:29 -0700
Subject: [PATCH 1102/1958] bitmap: optimise bitmap_set and bitmap_clear of a
 single bit

We have eight users calling bitmap_clear for a single bit and seventeen
calling bitmap_set for a single bit.  Rather than fix all of them to
call __clear_bit or __set_bit, turn bitmap_clear and bitmap_set into
inline functions and make this special case efficient.

Link: http://lkml.kernel.org/r/20170628153221.11322-3-willy@infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 23 ++++++++++++++++++++---
 lib/bitmap.c           |  8 ++++----
 lib/test_bitmap.c      |  3 ---
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3b77588a93602..4e0f0c8167af7 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -112,9 +112,8 @@ extern int __bitmap_intersects(const unsigned long *bitmap1,
 extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-
-extern void bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_clear(unsigned long *map, unsigned int start, int len);
 
 extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
 						    unsigned long size,
@@ -315,6 +314,24 @@ static __always_inline int bitmap_weight(const unsigned long *src, unsigned int
 	return __bitmap_weight(src, nbits);
 }
 
+static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
+		unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__set_bit(start, map);
+	else
+		__bitmap_set(map, start, nbits);
+}
+
+static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
+		unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__clear_bit(start, map);
+	else
+		__bitmap_clear(map, start, nbits);
+}
+
 static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
 				unsigned int shift, int nbits)
 {
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 08c6ef3a2b6fc..9a532805364b6 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -251,7 +251,7 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-void bitmap_set(unsigned long *map, unsigned int start, int len)
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
@@ -270,9 +270,9 @@ void bitmap_set(unsigned long *map, unsigned int start, int len)
 		*p |= mask_to_set;
 	}
 }
-EXPORT_SYMBOL(bitmap_set);
+EXPORT_SYMBOL(__bitmap_set);
 
-void bitmap_clear(unsigned long *map, unsigned int start, int len)
+void __bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
@@ -291,7 +291,7 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len)
 		*p &= ~mask_to_clear;
 	}
 }
-EXPORT_SYMBOL(bitmap_clear);
+EXPORT_SYMBOL(__bitmap_clear);
 
 /**
  * bitmap_find_next_zero_area_off - find a contiguous aligned zero area
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 252d3bddbe7de..2526a2975c51b 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -333,9 +333,6 @@ static void __init test_bitmap_u32_array_conversions(void)
 	}
 }
 
-#define __bitmap_set(a, b, c)	bitmap_set(a, b, c)
-#define __bitmap_clear(a, b, c)	bitmap_clear(a, b, c)
-
 static void noinline __init test_mem_optimisations(void)
 {
 	DECLARE_BITMAP(bmap1, 1024);
-- 
GitLab


From 2a98dc028f911a7c59c87d11d4eed6626be1605b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 10 Jul 2017 15:51:32 -0700
Subject: [PATCH 1103/1958] include/linux/bitmap.h: turn bitmap_set and
 bitmap_clear into memset when possible

Several callers have constant 'start' and an 'nbits' that is a multiple
of 8, so we can turn them into calls to memset.  We don't need the
entirety of 'start' and 'nbits' to be constant, we just need to know
whether they're divisible by 8.

Link: http://lkml.kernel.org/r/20170628153221.11322-4-willy@infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 4e0f0c8167af7..c04c9d155e59c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -319,6 +319,9 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
 {
 	if (__builtin_constant_p(nbits) && nbits == 1)
 		__set_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0xff, nbits / 8);
 	else
 		__bitmap_set(map, start, nbits);
 }
@@ -328,6 +331,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
 {
 	if (__builtin_constant_p(nbits) && nbits == 1)
 		__clear_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0, nbits / 8);
 	else
 		__bitmap_clear(map, start, nbits);
 }
-- 
GitLab


From 2c6deb01525ac11cc03c44fe31e3f45ce2cadaf9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 10 Jul 2017 15:51:35 -0700
Subject: [PATCH 1104/1958] bitmap: use memcmp optimisation in more situations

Commit 7dd968163f7c ("bitmap: bitmap_equal memcmp optimization") was
rather more restrictive than necessary; we can use memcmp() to implement
bitmap_equal() as long as the number of bits can be proved to be a
multiple of 8.  And architectures other than s390 may be able to make
good use of this optimisation.

[arnd@arndb.de: fix build: add a memcmp() declaration]
  Link: http://lkml.kernel.org/r/20170630153908.3439707-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20170628153221.11322-5-willy@infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/boot/compressed/decompress.c | 1 +
 include/linux/bitmap.h                | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index ea7832702a8f4..f3a4bedd1afc1 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -33,6 +33,7 @@ extern void error(char *);
 /* Not needed, but used in some headers pulled in by decompressors */
 extern char * strstr(const char * s1, const char *s2);
 extern size_t strlen(const char *s);
+extern int memcmp(const void *cs, const void *ct, size_t count);
 
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index c04c9d155e59c..5797ca6fdfe2e 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -266,10 +266,8 @@ static inline int bitmap_equal(const unsigned long *src1,
 {
 	if (small_const_nbits(nbits))
 		return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
-#ifdef CONFIG_S390
-	if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0)
+	if (__builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
 		return !memcmp(src1, src2, nbits / 8);
-#endif
 	return __bitmap_equal(src1, src2, nbits);
 }
 
-- 
GitLab


From 512750ef8b06290a55d749239f956f9c21d7daca Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 10 Jul 2017 15:51:38 -0700
Subject: [PATCH 1105/1958] lib/kstrtox.c: delete end-of-string test

Standard "while (*s)" test is unnecessary because NUL won't pass
valid-digit test anyway.  Save one branch per parsed character.

Link: http://lkml.kernel.org/r/20170514193756.GA32563@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/kstrtox.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index bf85e05ce8581..90013f4841c77 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -51,7 +51,7 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 
 	res = 0;
 	rv = 0;
-	while (*s) {
+	while (1) {
 		unsigned int val;
 
 		if ('0' <= *s && *s <= '9')
-- 
GitLab


From be5f3c7774a158c5bd08de22d54b0612f954dfa8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 10 Jul 2017 15:51:41 -0700
Subject: [PATCH 1106/1958] lib/kstrtox.c: use "unsigned int" more

gcc does generates stupid code sign extending data back and forth.  Help
by using "unsigned int".

	add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-61 (-61)
	function                                     old     new   delta
	_parse_integer                               128     123      -5

It _still_ does generate useless MOVSX but I don't know how to delete it:

0000000000000070 <_parse_integer>:
			...
  a0:   89 c2                   mov    edx,eax
  a2:   83 e8 30                sub    eax,0x30
  a5:   83 f8 09                cmp    eax,0x9
  a8:   76 11                   jbe    bb <_parse_integer+0x4b>
  aa:   83 ca 20                or     edx,0x20
  ad:   0f be c2      ===>      movsx  eax,dl         <===
			useless
  b0:   8d 50 9f                lea    edx,[rax-0x61]
  b3:   83 fa 05                cmp    edx,0x5

Patch also helps on embedded archs which generally only like "int".  On
arm "and 0xff" is generated which is waste because all values used in
comparisons are positive.

Link: http://lkml.kernel.org/r/20170514194720.GB32563@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/kstrtox.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 90013f4841c77..720144075c1ea 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -52,12 +52,14 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 	res = 0;
 	rv = 0;
 	while (1) {
+		unsigned int c = *s;
+		unsigned int lc = c | 0x20; /* don't tolower() this line */
 		unsigned int val;
 
-		if ('0' <= *s && *s <= '9')
-			val = *s - '0';
-		else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
-			val = _tolower(*s) - 'a' + 10;
+		if ('0' <= c && c <= '9')
+			val = c - '0';
+		else if ('a' <= lc && lc <= 'f')
+			val = lc - 'a' + 10;
 		else
 			break;
 
-- 
GitLab


From 0f789b67647205b77dee56fcc27a7d8de3fcd52e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 10 Jul 2017 15:51:43 -0700
Subject: [PATCH 1107/1958] lib/interval_tree_test.c: allow the module to be
 compiled-in

Patch series "lib/interval_tree_test: some debugging improvements".

Here are some patches that update the interval_tree_test module allowing
users to pass finer grained options to run the actual test.

This patch (of 4):

It is a tristate after all, and also serves well for quick debugging.

Link: http://lkml.kernel.org/r/20170518174936.20265-2-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ca9460f049b82..e20fc079bebd6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1594,7 +1594,7 @@ config RBTREE_TEST
 
 config INTERVAL_TREE_TEST
 	tristate "Interval tree test"
-	depends on m && DEBUG_KERNEL
+	depends on DEBUG_KERNEL
 	select INTERVAL_TREE
 	help
 	  A benchmark measuring the performance of the interval tree library
-- 
GitLab


From a54dae0338b7f01eb0f9c7571fb9b74f791d1c6b Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 10 Jul 2017 15:51:46 -0700
Subject: [PATCH 1108/1958] lib/interval_tree_test.c: make test options module
 parameters

Allows for more flexible debugging.

Link: http://lkml.kernel.org/r/20170518174936.20265-3-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/interval_tree_test.c | 57 ++++++++++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index 245900b98c8e4..1093f0496d5e5 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -1,16 +1,25 @@
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/interval_tree.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <asm/timex.h>
 
-#define NODES        100
-#define PERF_LOOPS   100000
-#define SEARCHES     100
-#define SEARCH_LOOPS 10000
+#define __param(type, name, init, msg)		\
+	static type name = init;		\
+	module_param(name, type, 0444);		\
+	MODULE_PARM_DESC(name, msg);
+
+__param(int, nnodes, 100, "Number of nodes in the interval tree");
+__param(int, perf_loops, 100000, "Number of iterations modifying the tree");
+
+__param(int, nsearches, 100, "Number of searches to the interval tree");
+__param(int, search_loops, 10000, "Number of iterations searching the tree");
+
 
 static struct rb_root root = RB_ROOT;
-static struct interval_tree_node nodes[NODES];
-static u32 queries[SEARCHES];
+static struct interval_tree_node *nodes = NULL;
+static u32 *queries = NULL;
 
 static struct rnd_state rnd;
 
@@ -29,7 +38,8 @@ search(unsigned long query, struct rb_root *root)
 static void init(void)
 {
 	int i;
-	for (i = 0; i < NODES; i++) {
+
+	for (i = 0; i < nnodes; i++) {
 		u32 a = prandom_u32_state(&rnd);
 		u32 b = prandom_u32_state(&rnd);
 		if (a <= b) {
@@ -40,7 +50,7 @@ static void init(void)
 			nodes[i].last = a;
 		}
 	}
-	for (i = 0; i < SEARCHES; i++)
+	for (i = 0; i < nsearches; i++)
 		queries[i] = prandom_u32_state(&rnd);
 }
 
@@ -50,6 +60,16 @@ static int interval_tree_test_init(void)
 	unsigned long results;
 	cycles_t time1, time2, time;
 
+	nodes = kmalloc(nnodes * sizeof(struct interval_tree_node), GFP_KERNEL);
+	if (!nodes)
+		return -ENOMEM;
+
+	queries = kmalloc(nsearches * sizeof(int), GFP_KERNEL);
+	if (!queries) {
+		kfree(nodes);
+		return -ENOMEM;
+	}
+
 	printk(KERN_ALERT "interval tree insert/remove");
 
 	prandom_seed_state(&rnd, 3141592653589793238ULL);
@@ -57,39 +77,42 @@ static int interval_tree_test_init(void)
 
 	time1 = get_cycles();
 
-	for (i = 0; i < PERF_LOOPS; i++) {
-		for (j = 0; j < NODES; j++)
+	for (i = 0; i < perf_loops; i++) {
+		for (j = 0; j < nnodes; j++)
 			interval_tree_insert(nodes + j, &root);
-		for (j = 0; j < NODES; j++)
+		for (j = 0; j < nnodes; j++)
 			interval_tree_remove(nodes + j, &root);
 	}
 
 	time2 = get_cycles();
 	time = time2 - time1;
 
-	time = div_u64(time, PERF_LOOPS);
+	time = div_u64(time, perf_loops);
 	printk(" -> %llu cycles\n", (unsigned long long)time);
 
 	printk(KERN_ALERT "interval tree search");
 
-	for (j = 0; j < NODES; j++)
+	for (j = 0; j < nnodes; j++)
 		interval_tree_insert(nodes + j, &root);
 
 	time1 = get_cycles();
 
 	results = 0;
-	for (i = 0; i < SEARCH_LOOPS; i++)
-		for (j = 0; j < SEARCHES; j++)
+	for (i = 0; i < search_loops; i++)
+		for (j = 0; j < nsearches; j++)
 			results += search(queries[j], &root);
 
 	time2 = get_cycles();
 	time = time2 - time1;
 
-	time = div_u64(time, SEARCH_LOOPS);
-	results = div_u64(results, SEARCH_LOOPS);
+	time = div_u64(time, search_loops);
+	results = div_u64(results, search_loops);
 	printk(" -> %llu cycles (%lu results)\n",
 	       (unsigned long long)time, results);
 
+	kfree(queries);
+	kfree(nodes);
+
 	return -EAGAIN; /* Fail will directly unload the module */
 }
 
-- 
GitLab


From a8ec14d4f6aa8e245efacc992c8ee6ea0464ce2a Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 10 Jul 2017 15:51:49 -0700
Subject: [PATCH 1109/1958] lib/interval_tree_test.c: allow users to limit
 scope of endpoint

Add a 'max_endpoint' parameter such that users may easily limit the size
of the intervals that are randomly generated.

Link: http://lkml.kernel.org/r/20170518174936.20265-4-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/interval_tree_test.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index 1093f0496d5e5..0fef6364a9580 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -16,6 +16,7 @@ __param(int, perf_loops, 100000, "Number of iterations modifying the tree");
 __param(int, nsearches, 100, "Number of searches to the interval tree");
 __param(int, search_loops, 10000, "Number of iterations searching the tree");
 
+__param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint");
 
 static struct rb_root root = RB_ROOT;
 static struct interval_tree_node *nodes = NULL;
@@ -40,18 +41,20 @@ static void init(void)
 	int i;
 
 	for (i = 0; i < nnodes; i++) {
-		u32 a = prandom_u32_state(&rnd);
-		u32 b = prandom_u32_state(&rnd);
-		if (a <= b) {
-			nodes[i].start = a;
-			nodes[i].last = b;
-		} else {
-			nodes[i].start = b;
-			nodes[i].last = a;
-		}
+		u32 b = (prandom_u32_state(&rnd) >> 4) % max_endpoint;
+		u32 a = (prandom_u32_state(&rnd) >> 4) % b;
+
+		nodes[i].start = a;
+		nodes[i].last = b;
 	}
+
+	/*
+	 * Limit the search scope to what the user defined.
+	 * Otherwise we are merely measuring empty walks,
+	 * which is pointless.
+	 */
 	for (i = 0; i < nsearches; i++)
-		queries[i] = prandom_u32_state(&rnd);
+		queries[i] = (prandom_u32_state(&rnd) >> 4) % max_endpoint;
 }
 
 static int interval_tree_test_init(void)
-- 
GitLab


From c46ecce431ebe6b1a9551d1f530eb432dae5c39b Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 10 Jul 2017 15:51:52 -0700
Subject: [PATCH 1110/1958] lib/interval_tree_test.c: allow full tree search

...  such that a user can specify visiting all the nodes in the tree
(intersects with the world).  This is a nice opposite from the very
basic default query which is a single point.

Link: http://lkml.kernel.org/r/20170518174936.20265-5-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/interval_tree_test.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index 0fef6364a9580..df495fe814210 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -15,6 +15,7 @@ __param(int, perf_loops, 100000, "Number of iterations modifying the tree");
 
 __param(int, nsearches, 100, "Number of searches to the interval tree");
 __param(int, search_loops, 10000, "Number of iterations searching the tree");
+__param(bool, search_all, false, "Searches will iterate all nodes in the tree");
 
 __param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint");
 
@@ -25,13 +26,13 @@ static u32 *queries = NULL;
 static struct rnd_state rnd;
 
 static inline unsigned long
-search(unsigned long query, struct rb_root *root)
+search(struct rb_root *root, unsigned long start, unsigned long last)
 {
 	struct interval_tree_node *node;
 	unsigned long results = 0;
 
-	for (node = interval_tree_iter_first(root, query, query); node;
-	     node = interval_tree_iter_next(node, query, query))
+	for (node = interval_tree_iter_first(root, start, last); node;
+	     node = interval_tree_iter_next(node, start, last))
 		results++;
 	return results;
 }
@@ -102,8 +103,12 @@ static int interval_tree_test_init(void)
 
 	results = 0;
 	for (i = 0; i < search_loops; i++)
-		for (j = 0; j < nsearches; j++)
-			results += search(queries[j], &root);
+		for (j = 0; j < nsearches; j++) {
+			unsigned long start = search_all ? 0 : queries[j];
+			unsigned long last = search_all ? max_endpoint : queries[j];
+
+			results += search(&root, start, last);
+		}
 
 	time2 = get_cycles();
 	time = time2 - time1;
-- 
GitLab


From 12e8fd6fd380261fd200d2e8f7a519ade73ea05b Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 10 Jul 2017 15:51:55 -0700
Subject: [PATCH 1111/1958] lib/rhashtable.c: use kvzalloc() in
 bucket_table_alloc() when possible

bucket_table_alloc() can be currently called with GFP_KERNEL or
GFP_ATOMIC.  For the former we basically have an open coded kvzalloc()
while the later only uses kzalloc().  Let's simplify the code a bit by
the dropping the open coded path and replace it with kvzalloc().

Link: http://lkml.kernel.org/r/20170531155145.17111-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/rhashtable.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index d9e7274a04cd9..42466c167257c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -211,11 +211,10 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
-	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
-	    gfp != GFP_KERNEL)
+	if (gfp != GFP_KERNEL)
 		tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
-	if (tbl == NULL && gfp == GFP_KERNEL)
-		tbl = vzalloc(size);
+	else
+		tbl = kvzalloc(size, gfp);
 
 	size = nbuckets;
 
-- 
GitLab


From a94c33dd1f677d16c4f1a162b4b3e9eba1b07c24 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Mon, 10 Jul 2017 15:51:58 -0700
Subject: [PATCH 1112/1958] lib/extable.c: use bsearch() library function in
 search_extable()

[thomas@m3y3r.de: v3: fix arch specific implementations]
  Link: http://lkml.kernel.org/r/1497890858.12931.7.camel@m3y3r.de
Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/kernel/module.c |  3 ++-
 arch/mips/kernel/traps.c  |  3 ++-
 arch/sh/mm/extable_64.c   | 34 +++++++++++++++++---------------
 arch/sparc/mm/extable.c   | 28 +++++++++++++-------------
 include/linux/extable.h   |  5 +++--
 kernel/extable.c          |  3 ++-
 kernel/module.c           |  2 +-
 lib/extable.c             | 41 ++++++++++++++++++++-------------------
 8 files changed, 63 insertions(+), 56 deletions(-)

diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 94627a3a6a0d9..50c020c47e546 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -317,7 +317,8 @@ const struct exception_table_entry *search_module_dbetables(unsigned long addr)
 
 	spin_lock_irqsave(&dbe_lock, flags);
 	list_for_each_entry(dbe, &dbe_list, dbe_list) {
-		e = search_extable(dbe->dbe_start, dbe->dbe_end - 1, addr);
+		e = search_extable(dbe->dbe_start,
+				   dbe->dbe_end - dbe->dbe_start, addr);
 		if (e)
 			break;
 	}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 38dfa27730ff1..b68b4d0726d3e 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -429,7 +429,8 @@ static const struct exception_table_entry *search_dbe_tables(unsigned long addr)
 {
 	const struct exception_table_entry *e;
 
-	e = search_extable(__start___dbe_table, __stop___dbe_table - 1, addr);
+	e = search_extable(__start___dbe_table,
+			   __stop___dbe_table - __start___dbe_table, addr);
 	if (!e)
 		e = search_module_dbetables(addr);
 	return e;
diff --git a/arch/sh/mm/extable_64.c b/arch/sh/mm/extable_64.c
index b90cdfad2c78d..7a3b4d33d2e7d 100644
--- a/arch/sh/mm/extable_64.c
+++ b/arch/sh/mm/extable_64.c
@@ -10,6 +10,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <linux/bsearch.h>
 #include <linux/rwsem.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
@@ -40,10 +41,23 @@ static const struct exception_table_entry *check_exception_ranges(unsigned long
 	return NULL;
 }
 
+static int cmp_ex_search(const void *key, const void *elt)
+{
+	const struct exception_table_entry *_elt = elt;
+	unsigned long _key = *(unsigned long *)key;
+
+	/* avoid overflow */
+	if (_key > _elt->insn)
+		return 1;
+	if (_key < _elt->insn)
+		return -1;
+	return 0;
+}
+
 /* Simple binary search */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-		 const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+		 const size_t num,
 		 unsigned long value)
 {
 	const struct exception_table_entry *mid;
@@ -52,20 +66,8 @@ search_extable(const struct exception_table_entry *first,
 	if (mid)
 		return mid;
 
-        while (first <= last) {
-		long diff;
-
-		mid = (last - first) / 2 + first;
-		diff = mid->insn - value;
-                if (diff == 0)
-                        return mid;
-                else if (diff < 0)
-                        first = mid+1;
-                else
-                        last = mid-1;
-        }
-
-        return NULL;
+	return bsearch(&value, base, num,
+		       sizeof(struct exception_table_entry), cmp_ex_search);
 }
 
 int fixup_exception(struct pt_regs *regs)
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
index db214e9931d92..2422511dc8c5f 100644
--- a/arch/sparc/mm/extable.c
+++ b/arch/sparc/mm/extable.c
@@ -13,11 +13,11 @@ void sort_extable(struct exception_table_entry *start,
 
 /* Caller knows they are in a range if ret->fixup == 0 */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *start,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value)
 {
-	const struct exception_table_entry *walk;
+	int i;
 
 	/* Single insn entries are encoded as:
 	 *	word 1:	insn address
@@ -37,30 +37,30 @@ search_extable(const struct exception_table_entry *start,
 	 */
 
 	/* 1. Try to find an exact match. */
-	for (walk = start; walk <= last; walk++) {
-		if (walk->fixup == 0) {
+	for (i = 0; i < num; i++) {
+		if (base[i].fixup == 0) {
 			/* A range entry, skip both parts. */
-			walk++;
+			i++;
 			continue;
 		}
 
 		/* A deleted entry; see trim_init_extable */
-		if (walk->fixup == -1)
+		if (base[i].fixup == -1)
 			continue;
 
-		if (walk->insn == value)
-			return walk;
+		if (base[i].insn == value)
+			return &base[i];
 	}
 
 	/* 2. Try to find a range match. */
-	for (walk = start; walk <= (last - 1); walk++) {
-		if (walk->fixup)
+	for (i = 0; i < (num - 1); i++) {
+		if (base[i].fixup)
 			continue;
 
-		if (walk[0].insn <= value && walk[1].insn > value)
-			return walk;
+		if (base[i].insn <= value && base[i + 1].insn > value)
+			return &base[i];
 
-		walk++;
+		i++;
 	}
 
         return NULL;
diff --git a/include/linux/extable.h b/include/linux/extable.h
index 7effea4b257d9..28addad0dda7a 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h
@@ -2,13 +2,14 @@
 #define _LINUX_EXTABLE_H
 
 #include <linux/stddef.h>	/* for NULL */
+#include <linux/types.h>
 
 struct module;
 struct exception_table_entry;
 
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value);
 void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish);
diff --git a/kernel/extable.c b/kernel/extable.c
index 223df4a328a49..38c2412401a1b 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -55,7 +55,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
 {
 	const struct exception_table_entry *e;
 
-	e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
+	e = search_extable(__start___ex_table,
+			   __stop___ex_table - __start___ex_table, addr);
 	if (!e)
 		e = search_module_extables(addr);
 	return e;
diff --git a/kernel/module.c b/kernel/module.c
index b3dbdde82e803..b0f92a3651403 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4196,7 +4196,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
 		goto out;
 
 	e = search_extable(mod->extable,
-			   mod->extable + mod->num_exentries - 1,
+			   mod->num_exentries,
 			   addr);
 out:
 	preempt_enable();
diff --git a/lib/extable.c b/lib/extable.c
index 62968daa66a94..f54996fdd0b88 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/bsearch.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sort.h>
@@ -51,7 +52,7 @@ static void swap_ex(void *a, void *b, int size)
  * This is used both for the kernel exception table and for
  * the exception tables of modules that get loaded.
  */
-static int cmp_ex(const void *a, const void *b)
+static int cmp_ex_sort(const void *a, const void *b)
 {
 	const struct exception_table_entry *x = a, *y = b;
 
@@ -67,7 +68,7 @@ void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish)
 {
 	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, swap_ex);
+	     cmp_ex_sort, swap_ex);
 }
 
 #ifdef CONFIG_MODULES
@@ -93,6 +94,20 @@ void trim_init_extable(struct module *m)
 #endif /* !ARCH_HAS_SORT_EXTABLE */
 
 #ifndef ARCH_HAS_SEARCH_EXTABLE
+
+static int cmp_ex_search(const void *key, const void *elt)
+{
+	const struct exception_table_entry *_elt = elt;
+	unsigned long _key = *(unsigned long *)key;
+
+	/* avoid overflow */
+	if (_key > ex_to_insn(_elt))
+		return 1;
+	if (_key < ex_to_insn(_elt))
+		return -1;
+	return 0;
+}
+
 /*
  * Search one exception table for an entry corresponding to the
  * given instruction address, and return the address of the entry,
@@ -101,25 +116,11 @@ void trim_init_extable(struct module *m)
  * already sorted.
  */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value)
 {
-	while (first <= last) {
-		const struct exception_table_entry *mid;
-
-		mid = ((last - first) >> 1) + first;
-		/*
-		 * careful, the distance between value and insn
-		 * can be larger than MAX_LONG:
-		 */
-		if (ex_to_insn(mid) < value)
-			first = mid + 1;
-		else if (ex_to_insn(mid) > value)
-			last = mid - 1;
-		else
-			return mid;
-	}
-	return NULL;
+	return bsearch(&value, base, num,
+		       sizeof(struct exception_table_entry), cmp_ex_search);
 }
 #endif
-- 
GitLab


From 166a0f780a8fdb67f232c85e9905ba84f7247da9 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Mon, 10 Jul 2017 15:52:01 -0700
Subject: [PATCH 1113/1958] lib/bsearch.c: micro-optimize pivot position
 calculation

There is a slightly faster way (in terms of the number of instructions
being used) to calculate the position of a middle element, preserving
integer overflow safeness.

./scripts/bloat-o-meter lib/bsearch.o.old lib/bsearch.o.new
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-24 (-24)
function                                     old     new   delta
bsearch                                      122      98     -24

TEST

INT array of size 100001, elements [0..100000]. gcc 7.1, Os, x86_64.

a) bsearch() of existing key "100001 - 2":

BASE
====

$ perf stat ./a.out

 Performance counter stats for './a.out':

        619.445196      task-clock:u (msec)       #    0.999 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               133      page-faults:u             #    0.215 K/sec
     1,949,517,279      cycles:u                  #    3.147 GHz                      (83.06%)
       181,017,938      stalled-cycles-frontend:u #    9.29% frontend cycles idle     (83.05%)
        82,959,265      stalled-cycles-backend:u  #    4.26% backend cycles idle      (67.02%)
     4,355,706,383      instructions:u            #    2.23  insn per cycle
                                                  #    0.04  stalled cycles per insn  (83.54%)
     1,051,539,242      branches:u                # 1697.550 M/sec                    (83.54%)
        15,263,381      branch-misses:u           #    1.45% of all branches          (83.43%)

       0.620082548 seconds time elapsed

PATCHED
=======

$ perf stat ./a.out

 Performance counter stats for './a.out':

        475.097316      task-clock:u (msec)       #    0.999 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               135      page-faults:u             #    0.284 K/sec
     1,487,467,717      cycles:u                  #    3.131 GHz                      (82.95%)
       186,537,162      stalled-cycles-frontend:u #   12.54% frontend cycles idle     (82.93%)
        28,797,869      stalled-cycles-backend:u  #    1.94% backend cycles idle      (67.10%)
     3,807,564,203      instructions:u            #    2.56  insn per cycle
                                                  #    0.05  stalled cycles per insn  (83.57%)
     1,049,344,291      branches:u                # 2208.693 M/sec                    (83.60%)
             5,485      branch-misses:u           #    0.00% of all branches          (83.58%)

       0.475760235 seconds time elapsed

b) bsearch() of un-existing key "100001 + 2":

BASE
====

$ perf stat ./a.out

 Performance counter stats for './a.out':

        499.244480      task-clock:u (msec)       #    0.999 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               132      page-faults:u             #    0.264 K/sec
     1,571,194,855      cycles:u                  #    3.147 GHz                      (83.18%)
        13,450,980      stalled-cycles-frontend:u #    0.86% frontend cycles idle     (83.18%)
        21,256,072      stalled-cycles-backend:u  #    1.35% backend cycles idle      (66.78%)
     4,171,197,909      instructions:u            #    2.65  insn per cycle
                                                  #    0.01  stalled cycles per insn  (83.68%)
     1,009,175,281      branches:u                # 2021.405 M/sec                    (83.79%)
             3,122      branch-misses:u           #    0.00% of all branches          (83.37%)

       0.499871144 seconds time elapsed

PATCHED
=======

$ perf stat ./a.out

 Performance counter stats for './a.out':

        399.023499      task-clock:u (msec)       #    0.998 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               134      page-faults:u             #    0.336 K/sec
     1,245,793,991      cycles:u                  #    3.122 GHz                      (83.39%)
        11,529,273      stalled-cycles-frontend:u #    0.93% frontend cycles idle     (83.46%)
        12,116,311      stalled-cycles-backend:u  #    0.97% backend cycles idle      (66.92%)
     3,679,710,005      instructions:u            #    2.95  insn per cycle
                                                  #    0.00  stalled cycles per insn  (83.47%)
     1,009,792,625      branches:u                # 2530.660 M/sec                    (83.46%)
             2,590      branch-misses:u           #    0.00% of all branches          (83.12%)

       0.399733539 seconds time elapsed

Link: http://lkml.kernel.org/r/20170607150457.5905-1-sergey.senozhatsky@gmail.com
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bsearch.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/lib/bsearch.c b/lib/bsearch.c
index e33c179089dbf..18b445b010c35 100644
--- a/lib/bsearch.c
+++ b/lib/bsearch.c
@@ -33,19 +33,21 @@
 void *bsearch(const void *key, const void *base, size_t num, size_t size,
 	      int (*cmp)(const void *key, const void *elt))
 {
-	size_t start = 0, end = num;
+	const char *pivot;
 	int result;
 
-	while (start < end) {
-		size_t mid = start + (end - start) / 2;
+	while (num > 0) {
+		pivot = base + (num >> 1) * size;
+		result = cmp(key, pivot);
 
-		result = cmp(key, base + mid * size);
-		if (result < 0)
-			end = mid;
-		else if (result > 0)
-			start = mid + 1;
-		else
-			return (void *)base + mid * size;
+		if (result == 0)
+			return (void *)pivot;
+
+		if (result > 0) {
+			base = pivot + size;
+			num--;
+		}
+		num >>= 1;
 	}
 
 	return NULL;
-- 
GitLab


From fb0d0e088e194e7d966c9a1b3c58900664e5d7db Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 10 Jul 2017 15:52:04 -0700
Subject: [PATCH 1114/1958] checkpatch: improve the unnecessary OOM message
 test

Use the context around a patch to avoid missing some candidates.

Link: http://lkml.kernel.org/r/865e874fbae5decc331a849bd8d71c325db6bc80.1496343345.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3a225d078e75f..907e079e59fdf 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5311,7 +5311,7 @@ sub process {
 			my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
 #			print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
 
-			if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
+			if ($s =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|kmemdup|(?:dev_)?alloc_skb)/) {
 				WARN("OOM_MESSAGE",
 				     "Possible unnecessary 'out of memory' message\n" . $hereprev);
 			}
-- 
GitLab


From 628f91a28649d063a048629d9d15b3e5c4dcaa37 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 10 Jul 2017 15:52:07 -0700
Subject: [PATCH 1115/1958] checkpatch: warn when a MAINTAINERS entry isn't
 [A-Z]:\t

For consistency, MAINTAINERS entries should be an upper case letter,
then a colon, then a tab, then the value.

Warn when an entry doesn't have this form.  --fix it too.

Link: http://lkml.kernel.org/r/9aaaf03ec10adf3888b5e98dd2176b7fe9b5fad8.1496343345.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 907e079e59fdf..3cf05505e833e 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2776,6 +2776,17 @@ sub process {
 			#print "is_start<$is_start> is_end<$is_end> length<$length>\n";
 		}
 
+# check for MAINTAINERS entries that don't have the right form
+		if ($realfile =~ /^MAINTAINERS$/ &&
+		    $rawline =~ /^\+[A-Z]:/ &&
+		    $rawline !~ /^\+[A-Z]:\t\S/) {
+			if (WARN("MAINTAINERS_STYLE",
+				 "MAINTAINERS entries use one tab after TYPE:\n" . $herecurr) &&
+			    $fix) {
+				$fixed[$fixlinenr] =~ s/^(\+[A-Z]):\s*/$1:\t/;
+			}
+		}
+
 # discourage the use of boolean for type definition attributes of Kconfig options
 		if ($realfile =~ /Kconfig/ &&
 		    $line =~ /^\+\s*\bboolean\b/) {
-- 
GitLab


From fe658f94b2c911729afbffeeb1f5f03f0a26d9e6 Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.vnet.ibm.com>
Date: Mon, 10 Jul 2017 15:52:10 -0700
Subject: [PATCH 1116/1958] checkpatch: [HLP]LIST_HEAD is also declaration

Fixes the following false warning among others for LLIST_HEAD and
PLIST_HEAD:

    WARNING: Missing a blank line after declarations
    #71: FILE: drivers/s390/scsi/zfcp_fsf.c:422:
    +	struct hlist_node *tmp;
    +	HLIST_HEAD(remove_queue);

Link: http://lkml.kernel.org/r/20170614133512.89425-1-maier@linux.vnet.ibm.com
Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com>
Acked-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3cf05505e833e..bc2417711b6a3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -733,7 +733,7 @@ our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)};
 
 our $declaration_macros = qr{(?x:
 	(?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(|
-	(?:$Storage\s+)?LIST_HEAD\s*\(|
+	(?:$Storage\s+)?[HLP]?LIST_HEAD\s*\(|
 	(?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
 )};
 
-- 
GitLab


From ca8198640fa9aeea71ae61b02fee6ee5e097f243 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 10 Jul 2017 15:52:13 -0700
Subject: [PATCH 1117/1958] checkpatch: fix stepping through statements with
 $stat and ctx_statement_block

Fix the off-by-one in the suppression of lines in a statement block.

This means that for multiple line statements like

	foo(bar,
	    baz,
	    qux);

$stat has been inspected first correctly for the entire statement,
and subsequently incorrectly just for

	    qux);

This fix will help make tracking appropriate indentation a little easier.

Link: http://lkml.kernel.org/r/71b25979c90412133c717084036c9851cd2b7bcb.1496862585.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index bc2417711b6a3..2f61f54b89407 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3219,7 +3219,7 @@ sub process {
 		my ($stat, $cond, $line_nr_next, $remain_next, $off_next,
 		    $realline_next);
 #print "LINE<$line>\n";
-		if ($linenr >= $suppress_statement &&
+		if ($linenr > $suppress_statement &&
 		    $realcnt && $sline =~ /.\s*\S/) {
 			($stat, $cond, $line_nr_next, $remain_next, $off_next) =
 				ctx_statement_block($linenr, $realcnt, 0);
-- 
GitLab


From 948b133a1b62441bd2ae98b866f409017191fdd3 Mon Sep 17 00:00:00 2001
From: Heinrich Schuchardt <xypron.glpk@gmx.de>
Date: Mon, 10 Jul 2017 15:52:16 -0700
Subject: [PATCH 1118/1958] checkpatch: remove false warning for commit
 reference

Checkpatch warns of an incorrect commit reference style for any
hexadecimal number of 12 digits and more.

Numbers of 12 digits are not necessarily commit ids.

For an example provoking the problem see
  https://patchwork.kernel.org/patch/9170897/

Checkpatch should only warn if the number refers to an existing commit.

Link: http://lkml.kernel.org/r/20170607184008.5869-1-xypron.glpk@gmx.de
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Acked-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 2f61f54b89407..e9618ca312513 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -867,6 +867,7 @@ sub git_commit_info {
 #		    echo "commit $(cut -c 1-12,41-)"
 #		done
 	} elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
+		$id = undef;
 	} else {
 		$id = substr($lines[0], 0, 12);
 		$desc = substr($lines[0], 41);
@@ -2606,7 +2607,8 @@ sub process {
 			($id, $description) = git_commit_info($orig_commit,
 							      $id, $orig_desc);
 
-			if ($short || $long || $space || $case || ($orig_desc ne $description) || !$hasparens) {
+			if (defined($id) &&
+			   ($short || $long || $space || $case || ($orig_desc ne $description) || !$hasparens)) {
 				ERROR("GIT_COMMIT_ID",
 				      "Please use git commit description style 'commit <12+ chars of sha1> (\"<title line>\")' - ie: '${init_char}ommit $id (\"$description\")'\n" . $herecurr);
 			}
-- 
GitLab


From a0ad75964e58cd7d9b6910e2bbb8a7e8656c0f51 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 10 Jul 2017 15:52:19 -0700
Subject: [PATCH 1119/1958] checkpatch: improve tests for multiple line
 function definitions

Add a block that identifies multiple line function definitions.

Save the function name into $context_function to improve the embedded
function name test.

Look for misplaced open brace on the function definition.
Emit an OPEN_BRACE error when the function definition is similar to

     void foo(int arg1,
              int arg2) {

Miscellanea:

o Remove the $realfile test in function declaration w/o named arguments test
o Comment the function declaration w/o named arguments test

Link: http://lkml.kernel.org/r/de620ed6ebab75fdfa323741ada2134a0f545892.1496835238.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Tested-by: David Kershner <david.kershner@unisys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e9618ca312513..a103f4fc30a2c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5899,7 +5899,8 @@ sub process {
 			     "externs should be avoided in .c files\n" .  $herecurr);
 		}
 
-		if ($realfile =~ /\.[ch]$/ && defined $stat &&
+# check for function declarations that have arguments without identifier names
+		if (defined $stat &&
 		    $stat =~ /^.\s*(?:extern\s+)?$Type\s*$Ident\s*\(\s*([^{]+)\s*\)\s*;/s &&
 		    $1 ne "void") {
 			my $args = trim($1);
@@ -5912,6 +5913,29 @@ sub process {
 			}
 		}
 
+# check for function definitions
+		if ($^V && $^V ge 5.10.0 &&
+		    defined $stat &&
+		    $stat =~ /^.\s*(?:$Storage\s+)?$Type\s*($Ident)\s*$balanced_parens\s*{/s) {
+			$context_function = $1;
+
+# check for multiline function definition with misplaced open brace
+			my $ok = 0;
+			my $cnt = statement_rawlines($stat);
+			my $herectx = $here . "\n";
+			for (my $n = 0; $n < $cnt; $n++) {
+				my $rl = raw_line($linenr, $n);
+				$herectx .=  $rl . "\n";
+				$ok = 1 if ($rl =~ /^[ \+]\{/);
+				$ok = 1 if ($rl =~ /\{/ && $n == 0);
+				last if $rl =~ /^[ \+].*\{/;
+			}
+			if (!$ok) {
+				ERROR("OPEN_BRACE",
+				      "open brace '{' following function definitions go on the next line\n" . $herectx);
+			}
+		}
+
 # checks for new __setup's
 		if ($rawline =~ /\b__setup\("([^"]*)"/) {
 			my $name = $1;
-- 
GitLab


From 8d81ae05d0176da1c54aeaed697fa34be5c5575e Mon Sep 17 00:00:00 2001
From: Cyril Bur <cyrilbur@gmail.com>
Date: Mon, 10 Jul 2017 15:52:21 -0700
Subject: [PATCH 1120/1958] checkpatch: silence perl 5.26.0 unescaped left
 brace warnings

As of perl 5, version 26, subversion 0 (v5.26.0) some new warnings have
occurred when running checkpatch.

Unescaped left brace in regex is deprecated here (and will be fatal in
Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){
<-- HERE \s*/ at scripts/checkpatch.pl line 3544.

Unescaped left brace in regex is deprecated here (and will be fatal in
Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){
<-- HERE \s*/ at scripts/checkpatch.pl line 3885.

Unescaped left brace in regex is deprecated here (and will be fatal in
Perl 5.30), passed through in regex; marked by <-- HERE in
m/^(\+.*(?:do|\))){ <-- HERE / at scripts/checkpatch.pl line 4374.

It seems perfectly reasonable to do as the warning suggests and simply
escape the left brace in these three locations.

Link: http://lkml.kernel.org/r/20170607060135.17384-1-cyrilbur@gmail.com
Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Acked-by: Joe Perches <joe@perches.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a103f4fc30a2c..ab12e3040abb3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3555,7 +3555,7 @@ sub process {
 				$fixedline =~ s/\s*=\s*$/ = {/;
 				fix_insert_line($fixlinenr, $fixedline);
 				$fixedline = $line;
-				$fixedline =~ s/^(.\s*){\s*/$1/;
+				$fixedline =~ s/^(.\s*)\{\s*/$1/;
 				fix_insert_line($fixlinenr, $fixedline);
 			}
 		}
@@ -3896,7 +3896,7 @@ sub process {
 				my $fixedline = rtrim($prevrawline) . " {";
 				fix_insert_line($fixlinenr, $fixedline);
 				$fixedline = $rawline;
-				$fixedline =~ s/^(.\s*){\s*/$1\t/;
+				$fixedline =~ s/^(.\s*)\{\s*/$1\t/;
 				if ($fixedline !~ /^\+\s*$/) {
 					fix_insert_line($fixlinenr, $fixedline);
 				}
@@ -4385,7 +4385,7 @@ sub process {
 			if (ERROR("SPACING",
 				  "space required before the open brace '{'\n" . $herecurr) &&
 			    $fix) {
-				$fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
+				$fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/;
 			}
 		}
 
-- 
GitLab


From 737c0767758bbd65cf95c44ccc09bca970e2ef8e Mon Sep 17 00:00:00 2001
From: John Brooks <john@fastquake.com>
Date: Mon, 10 Jul 2017 15:52:24 -0700
Subject: [PATCH 1121/1958] checkpatch: change format of --color argument to
 --color[=WHEN]

The boolean --color argument did not offer the ability to force
colourized output even if stdout is not a terminal.  Change the format
of the argument to the familiar --color[=WHEN] construct as seen in
common Linux utilities such as git, ls and dmesg, which allows the user
to specify whether to colourize output "always", "never", or "auto" when
the output is a terminal.  The default is "auto".

The old command-line uses of --color and --no-color are unchanged.

Link: http://lkml.kernel.org/r/efe43bdbad400f39ba691ae663044462493b0773.1496799721.git.joe@perches.com
Signed-off-by: John Brooks <john@fastquake.com>
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index ab12e3040abb3..63409dbd0de5c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -57,7 +57,7 @@ my $codespell = 0;
 my $codespellfile = "/usr/share/codespell/dictionary.txt";
 my $conststructsfile = "$D/const_structs.checkpatch";
 my $typedefsfile = "";
-my $color = 1;
+my $color = "auto";
 my $allow_c99_comments = 1;
 
 sub help {
@@ -116,7 +116,8 @@ Options:
                              (default:/usr/share/codespell/dictionary.txt)
   --codespellfile            Use this codespell dictionary
   --typedefsfile             Read additional types from this file
-  --color                    Use colors when output is STDOUT (default: on)
+  --color[=WHEN]             Use colors 'always', 'never', or only when output
+                             is a terminal ('auto'). Default is 'auto'.
   -h, --help, --version      display this help and exit
 
 When FILE is - read standard input.
@@ -182,6 +183,14 @@ if (-f $conf) {
 	unshift(@ARGV, @conf_args) if @conf_args;
 }
 
+# Perl's Getopt::Long allows options to take optional arguments after a space.
+# Prevent --color by itself from consuming other arguments
+foreach (@ARGV) {
+	if ($_ eq "--color" || $_ eq "-color") {
+		$_ = "--color=$color";
+	}
+}
+
 GetOptions(
 	'q|quiet+'	=> \$quiet,
 	'tree!'		=> \$tree,
@@ -212,7 +221,9 @@ GetOptions(
 	'codespell!'	=> \$codespell,
 	'codespellfile=s'	=> \$codespellfile,
 	'typedefsfile=s'	=> \$typedefsfile,
-	'color!'	=> \$color,
+	'color=s'	=> \$color,
+	'no-color'	=> \$color,	#keep old behaviors of -nocolor
+	'nocolor'	=> \$color,	#keep old behaviors of -nocolor
 	'h|help'	=> \$help,
 	'version'	=> \$help
 ) or help(1);
@@ -238,6 +249,18 @@ if ($#ARGV < 0) {
 	push(@ARGV, '-');
 }
 
+if ($color =~ /^[01]$/) {
+	$color = !$color;
+} elsif ($color =~ /^always$/i) {
+	$color = 1;
+} elsif ($color =~ /^never$/i) {
+	$color = 0;
+} elsif ($color =~ /^auto$/i) {
+	$color = (-t STDOUT);
+} else {
+	die "Invalid color mode: $color\n";
+}
+
 sub hash_save_array_words {
 	my ($hashRef, $arrayRef) = @_;
 
@@ -1883,7 +1906,7 @@ sub report {
 		return 0;
 	}
 	my $output = '';
-	if (-t STDOUT && $color) {
+	if ($color) {
 		if ($level eq 'ERROR') {
 			$output .= RED;
 		} elsif ($level eq 'WARNING') {
@@ -1894,10 +1917,10 @@ sub report {
 	}
 	$output .= $prefix . $level . ':';
 	if ($show_types) {
-		$output .= BLUE if (-t STDOUT && $color);
+		$output .= BLUE if ($color);
 		$output .= "$type:";
 	}
-	$output .= RESET if (-t STDOUT && $color);
+	$output .= RESET if ($color);
 	$output .= ' ' . $msg . "\n";
 
 	if ($showfile) {
-- 
GitLab


From 7fe528a27dee5fcab3bc093ee6f311080f799e29 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 10 Jul 2017 15:52:27 -0700
Subject: [PATCH 1122/1958] checkpatch: improve macro reuse test

checkpatch reports a false positive when using token pasting argument
multiple times in a macro.

Fix it.

Miscellanea:

o Make the $tmp variable name used in the macro argument tests
  a bit more descriptive

Link: http://lkml.kernel.org/r/cf434ae7602838388c7cb49d42bca93ab88527e7.1498483044.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 63409dbd0de5c..43171ed88115e 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -4940,17 +4940,17 @@ sub process {
 			foreach my $arg (@def_args) {
 			        next if ($arg =~ /\.\.\./);
 			        next if ($arg =~ /^type$/i);
-				my $tmp = $define_stmt;
-				$tmp =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
-				$tmp =~ s/\#+\s*$arg\b//g;
-				$tmp =~ s/\b$arg\s*\#\#//g;
-				my $use_cnt = $tmp =~ s/\b$arg\b//g;
+				my $tmp_stmt = $define_stmt;
+				$tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
+				$tmp_stmt =~ s/\#+\s*$arg\b//g;
+				$tmp_stmt =~ s/\b$arg\s*\#\#//g;
+				my $use_cnt = $tmp_stmt =~ s/\b$arg\b//g;
 				if ($use_cnt > 1) {
 					CHK("MACRO_ARG_REUSE",
 					    "Macro argument reuse '$arg' - possible side-effects?\n" . "$herectx");
 				    }
 # check if any macro arguments may have other precedence issues
-				if ($define_stmt =~ m/($Operators)?\s*\b$arg\b\s*($Operators)?/m &&
+				if ($tmp_stmt =~ m/($Operators)?\s*\b$arg\b\s*($Operators)?/m &&
 				    ((defined($1) && $1 ne ',') ||
 				     (defined($2) && $2 ne ','))) {
 					CHK("MACRO_ARG_PRECEDENCE",
-- 
GitLab


From fd71f6326844efac98d99c0c34e7ca7419506b15 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 10 Jul 2017 15:52:30 -0700
Subject: [PATCH 1123/1958] checkpatch: improve multi-line alignment test

The current test fails to warn about improper alignment with code like

	foo->bar = func(arg1,
				arg2);

because foo->bar is not a single identifier.

Convert the $Ident to $Lval which allows for multiple dereferences.

Link: http://lkml.kernel.org/r/01c35b9b6a12a415e57746d45d589bfaad39952a.1498841563.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 43171ed88115e..8f940c09918f0 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2993,7 +2993,7 @@ sub process {
 
 # check multi-line statement indentation matches previous line
 		if ($^V && $^V ge 5.10.0 &&
-		    $prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|$Ident\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
+		    $prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
 			$prevline =~ /^\+(\t*)(.*)$/;
 			my $oldindent = $1;
 			my $rest = $2;
-- 
GitLab


From c257a340ede0104e902807f0f001799850343ae9 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 10 Jul 2017 15:52:33 -0700
Subject: [PATCH 1124/1958] fs, epoll: short circuit fetching events if thread
 has been killed

We've encountered zombies that are waiting for a thread to exit that are
looping in ep_poll() almost endlessly although there is a pending
SIGKILL as a result of a group exit.

This happens because we always find ep_events_available() and fetch more
events and never are able to check for signal_pending() that would break
from the loop and return -EINTR.

Special case fatal signals and break immediately to guarantee that we
loop to fetch more events and delay making a timely exit.

It would also be possible to simply move the check for signal_pending()
higher than checking for ep_events_available(), but there have been no
reports of delayed signal handling other than SIGKILL preventing zombies
from exiting that would be fixed by this.

It fixes an issue for us where we have witnessed zombies sticking around
for at least O(minutes), but considering the code has been like this
forever and nobody else has complained that I have found, I would simply
queue it up for 4.12.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1705031722350.76784@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jan Kara <jack@suse.cz>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b1c8e23ddf65b..a6d194831ed86 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1748,6 +1748,16 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 			 * to TASK_INTERRUPTIBLE before doing the checks.
 			 */
 			set_current_state(TASK_INTERRUPTIBLE);
+			/*
+			 * Always short-circuit for fatal signals to allow
+			 * threads to make a timely exit without the chance of
+			 * finding more events available and fetching
+			 * repeatedly.
+			 */
+			if (fatal_signal_pending(current)) {
+				res = -EINTR;
+				break;
+			}
 			if (ep_events_available(ep) || timed_out)
 				break;
 			if (signal_pending(current)) {
-- 
GitLab


From eab09532d40090698b05a07c1c87f39fdbc5fab5 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 10 Jul 2017 15:52:37 -0700
Subject: [PATCH 1125/1958] binfmt_elf: use ELF_ET_DYN_BASE only for PIE

The ELF_ET_DYN_BASE position was originally intended to keep loaders
away from ET_EXEC binaries.  (For example, running "/lib/ld-linux.so.2
/bin/cat" might cause the subsequent load of /bin/cat into where the
loader had been loaded.)

With the advent of PIE (ET_DYN binaries with an INTERP Program Header),
ELF_ET_DYN_BASE continued to be used since the kernel was only looking
at ET_DYN.  However, since ELF_ET_DYN_BASE is traditionally set at the
top 1/3rd of the TASK_SIZE, a substantial portion of the address space
is unused.

For 32-bit tasks when RLIMIT_STACK is set to RLIM_INFINITY, programs are
loaded above the mmap region.  This means they can be made to collide
(CVE-2017-1000370) or nearly collide (CVE-2017-1000371) with
pathological stack regions.

Lowering ELF_ET_DYN_BASE solves both by moving programs below the mmap
region in all cases, and will now additionally avoid programs falling
back to the mmap region by enforcing MAP_FIXED for program loads (i.e.
if it would have collided with the stack, now it will fail to load
instead of falling back to the mmap region).

To allow for a lower ELF_ET_DYN_BASE, loaders (ET_DYN without INTERP)
are loaded into the mmap region, leaving space available for either an
ET_EXEC binary with a fixed location or PIE being loaded into mmap by
the loader.  Only PIE programs are loaded offset from ELF_ET_DYN_BASE,
which means architectures can now safely lower their values without risk
of loaders colliding with their subsequently loaded programs.

For 64-bit, ELF_ET_DYN_BASE is best set to 4GB to allow runtimes to use
the entire 32-bit address space for 32-bit pointers.

Thanks to PaX Team, Daniel Micay, and Rik van Riel for inspiration and
suggestions on how to implement this solution.

Fixes: d1fd836dcf00 ("mm: split ET_DYN ASLR from mmap ASLR")
Link: http://lkml.kernel.org/r/20170621173201.GA114489@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Qualys Security Advisory <qsa@qualys.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Grzegorz Andrejczuk <grzegorz.andrejczuk@intel.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/elf.h | 13 +++++----
 fs/binfmt_elf.c            | 59 ++++++++++++++++++++++++++++++++------
 2 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index e8ab9a46bc689..1c18d83d3f094 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -245,12 +245,13 @@ extern int force_personality32;
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE		(TASK_SIZE / 3 * 2)
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE		(mmap_is_ia32() ? 0x000400000UL : \
+						  0x100000000UL)
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports.  This could be done in user space,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5075fd5c62c86..7465c3ea5dd50 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -927,17 +927,60 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 
 		vaddr = elf_ppnt->p_vaddr;
+		/*
+		 * If we are loading ET_EXEC or we have already performed
+		 * the ET_DYN load_addr calculations, proceed normally.
+		 */
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 			elf_flags |= MAP_FIXED;
 		} else if (loc->elf_ex.e_type == ET_DYN) {
-			/* Try and get dynamic programs out of the way of the
-			 * default mmap base, as well as whatever program they
-			 * might try to exec.  This is because the brk will
-			 * follow the loader, and is not movable.  */
-			load_bias = ELF_ET_DYN_BASE - vaddr;
-			if (current->flags & PF_RANDOMIZE)
-				load_bias += arch_mmap_rnd();
-			load_bias = ELF_PAGESTART(load_bias);
+			/*
+			 * This logic is run once for the first LOAD Program
+			 * Header for ET_DYN binaries to calculate the
+			 * randomization (load_bias) for all the LOAD
+			 * Program Headers, and to calculate the entire
+			 * size of the ELF mapping (total_size). (Note that
+			 * load_addr_set is set to true later once the
+			 * initial mapping is performed.)
+			 *
+			 * There are effectively two types of ET_DYN
+			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
+			 * and loaders (ET_DYN without INTERP, since they
+			 * _are_ the ELF interpreter). The loaders must
+			 * be loaded away from programs since the program
+			 * may otherwise collide with the loader (especially
+			 * for ET_EXEC which does not have a randomized
+			 * position). For example to handle invocations of
+			 * "./ld.so someprog" to test out a new version of
+			 * the loader, the subsequent program that the
+			 * loader loads must avoid the loader itself, so
+			 * they cannot share the same load range. Sufficient
+			 * room for the brk must be allocated with the
+			 * loader as well, since brk must be available with
+			 * the loader.
+			 *
+			 * Therefore, programs are loaded offset from
+			 * ELF_ET_DYN_BASE and loaders are loaded into the
+			 * independently randomized mmap region (0 load_bias
+			 * without MAP_FIXED).
+			 */
+			if (elf_interpreter) {
+				load_bias = ELF_ET_DYN_BASE;
+				if (current->flags & PF_RANDOMIZE)
+					load_bias += arch_mmap_rnd();
+				elf_flags |= MAP_FIXED;
+			} else
+				load_bias = 0;
+
+			/*
+			 * Since load_bias is used for all subsequent loading
+			 * calculations, we must lower it by the first vaddr
+			 * so that the remaining calculations based on the
+			 * ELF vaddrs will be correctly offset. The result
+			 * is then page aligned.
+			 */
+			load_bias = ELF_PAGESTART(load_bias - vaddr);
+
 			total_size = total_mapping_size(elf_phdata,
 							loc->elf_ex.e_phnum);
 			if (!total_size) {
-- 
GitLab


From 6a9af90a3bcde217a1c053e135f5f43e5d5fafbd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 10 Jul 2017 15:52:40 -0700
Subject: [PATCH 1126/1958] arm: move ELF_ET_DYN_BASE to 4MB

Now that explicitly executed loaders are loaded in the mmap region, we
have more freedom to decide where we position PIE binaries in the
address space to avoid possible collisions with mmap or stack regions.

4MB is chosen here mainly to have parity with x86, where this is the
traditional minimum load location, likely to avoid historically
requiring a 4MB page table entry when only a portion of the first 4MB
would be used (since the NULL address is avoided).

For ARM the position could be 0x8000, the standard ET_EXEC load address,
but that is needlessly close to the NULL address, and anyone running PIE
on 32-bit ARM will have an MMU, so the tight mapping is not needed.

Link: http://lkml.kernel.org/r/1498154792-49952-2-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Grzegorz Andrejczuk <grzegorz.andrejczuk@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Qualys Security Advisory <qsa@qualys.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/elf.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index d2315ffd8f126..f13ae153fb246 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -112,12 +112,8 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE	(TASK_SIZE / 3 * 2)
+/* This is the base location for PIE (ET_DYN with INTERP) loads. */
+#define ELF_ET_DYN_BASE		0x400000UL
 
 /* When the program starts, a1 contains a pointer to a function to be 
    registered with atexit, as per the SVR4 ABI.  A value of 0 means we 
-- 
GitLab


From 02445990a96e60a67526510d8b00f7e3d14101c3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 10 Jul 2017 15:52:44 -0700
Subject: [PATCH 1127/1958] arm64: move ELF_ET_DYN_BASE to 4GB / 4MB

Now that explicitly executed loaders are loaded in the mmap region, we
have more freedom to decide where we position PIE binaries in the
address space to avoid possible collisions with mmap or stack regions.

For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit
address space for 32-bit pointers.  On 32-bit use 4MB, to match ARM.
This could be 0x8000, the standard ET_EXEC load address, but that is
needlessly close to the NULL address, and anyone running arm compat PIE
will have an MMU, so the tight mapping is not needed.

Link: http://lkml.kernel.org/r/1498251600-132458-4-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/elf.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index ac3fb7441510d..acae781f7359e 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -113,12 +113,11 @@
 #define ELF_EXEC_PAGESIZE	PAGE_SIZE
 
 /*
- * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
- * use of this is to invoke "./ld.so someprog" to test out a new version of
- * the loader.  We need to make sure that it is out of the way of the program
- * that it will "exec", and that there is sufficient room for the brk.
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
  */
-#define ELF_ET_DYN_BASE	(2 * TASK_SIZE_64 / 3)
+#define ELF_ET_DYN_BASE		0x100000000UL
 
 #ifndef __ASSEMBLY__
 
@@ -174,7 +173,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 
 #ifdef CONFIG_COMPAT
 
-#define COMPAT_ELF_ET_DYN_BASE		(2 * TASK_SIZE_32 / 3)
+/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
+#define COMPAT_ELF_ET_DYN_BASE		0x000400000UL
 
 /* AArch32 registers. */
 #define COMPAT_ELF_NGREG		18
-- 
GitLab


From 47ebb09d54856500c5a5e14824781902b3bb738e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 10 Jul 2017 15:52:47 -0700
Subject: [PATCH 1128/1958] powerpc: move ELF_ET_DYN_BASE to 4GB / 4MB

Now that explicitly executed loaders are loaded in the mmap region, we
have more freedom to decide where we position PIE binaries in the
address space to avoid possible collisions with mmap or stack regions.

For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit
address space for 32-bit pointers.  On 32-bit use 4MB, which is the
traditional x86 minimum load location, likely to avoid historically
requiring a 4MB page table entry when only a portion of the first 4MB
would be used (since the NULL address is avoided).

Link: http://lkml.kernel.org/r/1498154792-49952-4-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/elf.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 09bde6e34f5d5..548d9a411a0d6 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -23,12 +23,13 @@
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	PAGE_SIZE
 
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE	0x20000000
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE		(is_32bit_task() ? 0x000400000UL : \
+						   0x100000000UL)
 
 #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
 
-- 
GitLab


From a73dc5370e153ac63718d850bddf0c9aa9d871e6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 10 Jul 2017 15:52:51 -0700
Subject: [PATCH 1129/1958] s390: reduce ELF_ET_DYN_BASE

Now that explicitly executed loaders are loaded in the mmap region, we
have more freedom to decide where we position PIE binaries in the
address space to avoid possible collisions with mmap or stack regions.

For 64-bit, align to 4GB to allow runtimes to use the entire 32-bit
address space for 32-bit pointers.  On 32-bit use 4MB, which is the
traditional x86 minimum load location, likely to avoid historically
requiring a 4MB page table entry when only a portion of the first 4MB
would be used (since the NULL address is avoided).  For s390 the
position could be 0x10000, but that is needlessly close to the NULL
address.

Link: http://lkml.kernel.org/r/1498154792-49952-5-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/elf.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index ec024c08dabe6..c92ed0170be22 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -193,14 +193,13 @@ struct arch_elf_state {
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk. 64-bit
-   tasks are aligned to 4GB. */
-#define ELF_ET_DYN_BASE (is_compat_task() ? \
-				(STACK_TOP / 3 * 2) : \
-				(STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE		(is_compat_task() ? 0x000400000UL : \
+						    0x100000000UL)
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports. */
-- 
GitLab


From 67c6777a5d331dda32a4c4a1bf0cac85bdaaaed8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 10 Jul 2017 15:52:54 -0700
Subject: [PATCH 1130/1958] binfmt_elf: safely increment argv pointers

When building the argv/envp pointers, the envp is needlessly
pre-incremented instead of just continuing after the argv pointers are
finished.  In some (likely impossible) race where the strings could be
changed from userspace between copy_strings() and here, it might be
possible to confuse the envp position.  Instead, just use sp like
everything else.

Link: http://lkml.kernel.org/r/20170622173838.GA43308@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Qualys Security Advisory <qsa@qualys.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Grzegorz Andrejczuk <grzegorz.andrejczuk@intel.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7465c3ea5dd50..879ff9c7ffd01 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -163,8 +163,6 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	unsigned long p = bprm->p;
 	int argc = bprm->argc;
 	int envc = bprm->envc;
-	elf_addr_t __user *argv;
-	elf_addr_t __user *envp;
 	elf_addr_t __user *sp;
 	elf_addr_t __user *u_platform;
 	elf_addr_t __user *u_base_platform;
@@ -304,38 +302,38 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
 	if (__put_user(argc, sp++))
 		return -EFAULT;
-	argv = sp;
-	envp = argv + argc + 1;
 
-	/* Populate argv and envp */
+	/* Populate list of argv pointers back to argv strings. */
 	p = current->mm->arg_end = current->mm->arg_start;
 	while (argc-- > 0) {
 		size_t len;
-		if (__put_user((elf_addr_t)p, argv++))
+		if (__put_user((elf_addr_t)p, sp++))
 			return -EFAULT;
 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 		if (!len || len > MAX_ARG_STRLEN)
 			return -EINVAL;
 		p += len;
 	}
-	if (__put_user(0, argv))
+	if (__put_user(0, sp++))
 		return -EFAULT;
-	current->mm->arg_end = current->mm->env_start = p;
+	current->mm->arg_end = p;
+
+	/* Populate list of envp pointers back to envp strings. */
+	current->mm->env_end = current->mm->env_start = p;
 	while (envc-- > 0) {
 		size_t len;
-		if (__put_user((elf_addr_t)p, envp++))
+		if (__put_user((elf_addr_t)p, sp++))
 			return -EFAULT;
 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 		if (!len || len > MAX_ARG_STRLEN)
 			return -EINVAL;
 		p += len;
 	}
-	if (__put_user(0, envp))
+	if (__put_user(0, sp++))
 		return -EFAULT;
 	current->mm->env_end = p;
 
 	/* Put the elf_info on the stack in the right place.  */
-	sp = (elf_addr_t __user *)envp + 1;
 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 		return -EFAULT;
 	return 0;
-- 
GitLab


From 4ea77014af0d6205b05503d1c7aac6eace11d473 Mon Sep 17 00:00:00 2001
From: zhongjiang <zhongjiang@huawei.com>
Date: Mon, 10 Jul 2017 15:52:57 -0700
Subject: [PATCH 1131/1958] kernel/signal.c: avoid undefined behaviour in
 kill_something_info

When running kill(72057458746458112, 0) in userspace I hit the following
issue.

  UBSAN: Undefined behaviour in kernel/signal.c:1462:11
  negation of -2147483648 cannot be represented in type 'int':
  CPU: 226 PID: 9849 Comm: test Tainted: G    B          ---- -------   3.10.0-327.53.58.70.x86_64_ubsan+ #116
  Hardware name: Huawei Technologies Co., Ltd. RH8100 V3/BC61PBIA, BIOS BLHSV028 11/11/2014
  Call Trace:
    dump_stack+0x19/0x1b
    ubsan_epilogue+0xd/0x50
    __ubsan_handle_negate_overflow+0x109/0x14e
    SYSC_kill+0x43e/0x4d0
    SyS_kill+0xe/0x10
    system_call_fastpath+0x16/0x1b

Add code to avoid the UBSAN detection.

[akpm@linux-foundation.org: tweak comment]
Link: http://lkml.kernel.org/r/1496670008-59084-1-git-send-email-zhongjiang@huawei.com
Signed-off-by: zhongjiang <zhongjiang@huawei.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/signal.c b/kernel/signal.c
index 48a59eefd8adb..caed9133ae527 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1402,6 +1402,10 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
 		return ret;
 	}
 
+	/* -INT_MIN is undefined.  Exclude this case to avoid a UBSAN warning */
+	if (pid == INT_MIN)
+		return -ESRCH;
+
 	read_lock(&tasklist_lock);
 	if (pid != -1) {
 		ret = __kill_pgrp_info(sig, info,
-- 
GitLab


From dd83c161fbcc5d8be637ab159c0de015cbff5ba4 Mon Sep 17 00:00:00 2001
From: zhongjiang <zhongjiang@huawei.com>
Date: Mon, 10 Jul 2017 15:53:01 -0700
Subject: [PATCH 1132/1958] kernel/exit.c: avoid undefined behaviour when
 calling wait4()

wait4(-2147483648, 0x20, 0, 0xdd0000) triggers:
UBSAN: Undefined behaviour in kernel/exit.c:1651:9

The related calltrace is as follows:

  negation of -2147483648 cannot be represented in type 'int':
  CPU: 9 PID: 16482 Comm: zj Tainted: G    B          ---- -------   3.10.0-327.53.58.71.x86_64+ #66
  Hardware name: Huawei Technologies Co., Ltd. Tecal RH2285          /BC11BTSA              , BIOS CTSAV036 04/27/2011
  Call Trace:
    dump_stack+0x19/0x1b
    ubsan_epilogue+0xd/0x50
    __ubsan_handle_negate_overflow+0x109/0x14e
    SyS_wait4+0x1cb/0x1e0
    system_call_fastpath+0x16/0x1b

Exclude the overflow to avoid the UBSAN warning.

Link: http://lkml.kernel.org/r/1497264618-20212-1-git-send-email-zhongjiang@huawei.com
Signed-off-by: zhongjiang <zhongjiang@huawei.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/exit.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/exit.c b/kernel/exit.c
index 608c9775a37bc..c5548faa9f377 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1639,6 +1639,10 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
 			__WNOTHREAD|__WCLONE|__WALL))
 		return -EINVAL;
 
+	/* -INT_MIN is not defined */
+	if (upid == INT_MIN)
+		return -ESRCH;
+
 	if (upid == -1)
 		type = PIDTYPE_MAX;
 	else if (upid < 0) {
-- 
GitLab


From 41d0c2ecde19cfe93071ed7b979a53ba60b12840 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 6 Jul 2017 20:51:28 +1000
Subject: [PATCH 1133/1958] powerpc/powernv: Fix local TLB flush for boot and
 MCE on POWER9

There are two cases outside the normal address space management
where a CPU's local TLB is to be flushed:

  1. Host boot; in case something has left stale entries in the
     TLB (e.g., kexec).

  2. Machine check; to clean corrupted TLB entries.

CPU state restore from deep idle states also flushes the TLB.
However this seems to be a side effect of reusing the boot code to set
CPU state, rather than a requirement itself.

The current flushing has a number of problems with ISA v3.0B:

- The current radix mode of the MMU is not taken into account. tlbiel
  is undefined if the R field does not match the current radix mode.

- ISA v3.0B hash must flush the partition and process table caches.

- ISA v3.0B radix must flush partition and process scoped translations,
  partition and process table caches, and also the page walk cache.

Add POWER9 cases to handle these, with radix vs hash determined by the
host MMU mode.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/cpu_setup_power.S | 13 +++++--
 arch/powerpc/kernel/dt_cpu_ftrs.c     | 16 +-------
 arch/powerpc/kernel/mce_power.c       | 56 ++++++++++++++++++++++++++-
 3 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 10cb2896b2ae5..610955fe8b81c 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -218,13 +218,20 @@ __init_tlb_power8:
 	ptesync
 1:	blr
 
+/*
+ * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process
+ * and one for partition scope to clear process and partition table entries.
+ */
 __init_tlb_power9:
-	li	r6,POWER9_TLB_SETS_HASH
+	li	r6,POWER9_TLB_SETS_HASH - 1
 	mtctr	r6
 	li	r7,0xc00	/* IS field = 0b11 */
+	li	r8,0
 	ptesync
-2:	tlbiel	r7
-	addi	r7,r7,0x1000
+	PPC_TLBIEL(7, 8, 2, 1, 0)
+	PPC_TLBIEL(7, 8, 2, 0, 0)
+2:	addi	r7,r7,0x1000
+	PPC_TLBIEL(7, 8, 0, 0, 0)
 	bdnz	2b
 	ptesync
 1:	blr
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 4c7656dc4e04f..1df770e8cbe03 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -94,9 +94,6 @@ static void (*init_pmu_registers)(void);
 
 static void cpufeatures_flush_tlb(void)
 {
-	unsigned long rb;
-	unsigned int i, num_sets;
-
 	/*
 	 * This is a temporary measure to keep equivalent TLB flush as the
 	 * cputable based setup code.
@@ -105,24 +102,15 @@ static void cpufeatures_flush_tlb(void)
 	case PVR_POWER8:
 	case PVR_POWER8E:
 	case PVR_POWER8NVL:
-		num_sets = POWER8_TLB_SETS;
+		__flush_tlb_power8(POWER8_TLB_SETS);
 		break;
 	case PVR_POWER9:
-		num_sets = POWER9_TLB_SETS_HASH;
+		__flush_tlb_power9(POWER9_TLB_SETS_HASH);
 		break;
 	default:
-		num_sets = 1;
 		pr_err("unknown CPU version for boot TLB flush\n");
 		break;
 	}
-
-	asm volatile("ptesync" : : : "memory");
-	rb = TLBIEL_INVAL_SET;
-	for (i = 0; i < num_sets; i++) {
-		asm volatile("tlbiel %0" : : "r" (rb));
-		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
-	}
-	asm volatile("ptesync" : : : "memory");
 }
 
 static void __restore_cpu_cpufeatures(void)
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index d24e689e893f0..b76ca198e09c1 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -53,6 +53,60 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action)
 	asm volatile("ptesync" : : : "memory");
 }
 
+static void flush_tlb_300(unsigned int num_sets, unsigned int action)
+{
+	unsigned long rb;
+	unsigned int i;
+	unsigned int r;
+
+	switch (action) {
+	case TLB_INVAL_SCOPE_GLOBAL:
+		rb = TLBIEL_INVAL_SET;
+		break;
+	case TLB_INVAL_SCOPE_LPID:
+		rb = TLBIEL_INVAL_SET_LPID;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	asm volatile("ptesync" : : : "memory");
+
+	if (early_radix_enabled())
+		r = 1;
+	else
+		r = 0;
+
+	/*
+	 * First flush table/PWC caches with set 0, then flush the
+	 * rest of the sets, partition scope. Radix must then do it
+	 * all again with process scope. Hash just has to flush
+	 * process table.
+	 */
+	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
+			"r"(rb), "r"(0), "i"(2), "i"(0), "r"(r));
+	for (i = 1; i < num_sets; i++) {
+		unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
+
+		asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
+				"r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r));
+	}
+
+	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
+			"r"(rb), "r"(0), "i"(2), "i"(1), "r"(r));
+	if (early_radix_enabled()) {
+		for (i = 1; i < num_sets; i++) {
+			unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
+
+			asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
+				"r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r));
+		}
+	}
+
+	asm volatile("ptesync" : : : "memory");
+}
+
 /*
  * Generic routines to flush TLB on POWER processors. These routines
  * are used as flush_tlb hook in the cpu_spec.
@@ -79,7 +133,7 @@ void __flush_tlb_power9(unsigned int action)
 	else
 		num_sets = POWER9_TLB_SETS_HASH;
 
-	flush_tlb_206(num_sets, action);
+	flush_tlb_300(num_sets, action);
 }
 
 
-- 
GitLab


From 2400fd822f467cb4c886c879d8ad99feac9cf319 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Thu, 6 Jul 2017 18:46:43 +1000
Subject: [PATCH 1134/1958] powerpc/asm: Mark cr0 as clobbered in mftb()

The workaround for the CELL timebase bug does not correctly mark cr0 as
being clobbered. This means GCC doesn't know that the asm block changes cr0 and
might leave the result of an unrelated comparison in cr0 across the block, which
we then trash, leading to basically random behaviour.

Fixes: 859deea949c3 ("[POWERPC] Cell timebase bug workaround")
Cc: stable@vger.kernel.org # v2.6.19+
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
[mpe: Tweak change log and flag for stable]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7e50e47375d63..a3b6575c7842e 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1303,7 +1303,7 @@ static inline void msr_check_and_clear(unsigned long bits)
 				"	.llong 0\n"			\
 				".previous"				\
 			: "=r" (rval) \
-			: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL)); \
+			: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
 			rval;})
 #else
 #define mftb()		({unsigned long rval;	\
-- 
GitLab


From c17d5d5f51f72f24e0e17a4450ae5010bf6962d9 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Mon, 10 Jul 2017 14:53:31 -0500
Subject: [PATCH 1135/1958] target: export lio pgr/alua support as device attr

Older kernels could crash or hang if the user write/read some ALUA files
with pscsi and tcmu backends. This patch exports if LIO supports
executing PGR and ALUA scsi commands/checks for the se_device, so userspace
can easily test.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_configfs.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 9b8abd55c21cc..7e87d952bb7af 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -1085,6 +1085,24 @@ static ssize_t block_size_store(struct config_item *item,
 	return count;
 }
 
+static ssize_t alua_support_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = to_attrib(item);
+	u8 flags = da->da_dev->transport->transport_flags;
+
+	return snprintf(page, PAGE_SIZE, "%d\n",
+			flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ? 0 : 1);
+}
+
+static ssize_t pgr_support_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = to_attrib(item);
+	u8 flags = da->da_dev->transport->transport_flags;
+
+	return snprintf(page, PAGE_SIZE, "%d\n",
+			flags & TRANSPORT_FLAG_PASSTHROUGH_PGR ? 0 : 1);
+}
+
 CONFIGFS_ATTR(, emulate_model_alias);
 CONFIGFS_ATTR(, emulate_dpo);
 CONFIGFS_ATTR(, emulate_fua_write);
@@ -1116,6 +1134,8 @@ CONFIGFS_ATTR(, unmap_granularity);
 CONFIGFS_ATTR(, unmap_granularity_alignment);
 CONFIGFS_ATTR(, unmap_zeroes_data);
 CONFIGFS_ATTR(, max_write_same_len);
+CONFIGFS_ATTR_RO(, alua_support);
+CONFIGFS_ATTR_RO(, pgr_support);
 
 /*
  * dev_attrib attributes for devices using the target core SBC/SPC
@@ -1154,6 +1174,8 @@ struct configfs_attribute *sbc_attrib_attrs[] = {
 	&attr_unmap_granularity_alignment,
 	&attr_unmap_zeroes_data,
 	&attr_max_write_same_len,
+	&attr_alua_support,
+	&attr_pgr_support,
 	NULL,
 };
 EXPORT_SYMBOL(sbc_attrib_attrs);
@@ -1168,6 +1190,8 @@ struct configfs_attribute *passthrough_attrib_attrs[] = {
 	&attr_hw_block_size,
 	&attr_hw_max_sectors,
 	&attr_hw_queue_depth,
+	&attr_alua_support,
+	&attr_pgr_support,
 	NULL,
 };
 EXPORT_SYMBOL(passthrough_attrib_attrs);
-- 
GitLab


From 0de98709896d9c02ce3121ec3afb524253a5853f Mon Sep 17 00:00:00 2001
From: "Zhou, Wenjia" <zhiyuan_zhu@htc.com>
Date: Tue, 4 Jul 2017 15:47:00 +0800
Subject: [PATCH 1136/1958] drm/i915/gvt: Fix a memory leak in
 intel_gvt_init_gtt()

It will causes memory leak, if the function setup_spt_oos() fail,
in the function intel_gvt_init_gtt(),
which allocated by get_zeroed_page() and mapped by dma_map_page().

Unmap and free the page,  after STP oos initialize fail,
it will fix this issue.

Signed-off-by: Zhou, Wenjia <zhiyuan_zhu@htc.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 66374dba3b1a2..6166e34d892b1 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2259,6 +2259,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
 		ret = setup_spt_oos(gvt);
 		if (ret) {
 			gvt_err("fail to initialize SPT oos\n");
+			dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
+			__free_page(gvt->gtt.scratch_ggtt_page);
 			return ret;
 		}
 	}
-- 
GitLab


From 3364bf5fd00f0391ad090f547932a5c4b2068dbc Mon Sep 17 00:00:00 2001
From: Ping Gao <ping.a.gao@intel.com>
Date: Tue, 4 Jul 2017 16:09:58 +0800
Subject: [PATCH 1137/1958] drm/i915/gvt: Audit the command buffer address

The command buffer address in context like ring buffer base address
and wa_ctx address need to be audit to make sure they are in the
valid GGTT range.

Signed-off-by: Ping Gao <ping.a.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 51241de5e7a74..713848c363494 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2536,6 +2536,11 @@ static int scan_workload(struct intel_vgpu_workload *workload)
 		gma_head == gma_tail)
 		return 0;
 
+	if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ret = ip_gma_set(&s, gma_head);
 	if (ret)
 		goto out;
@@ -2579,6 +2584,11 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	s.rb_va = wa_ctx->indirect_ctx.shadow_va;
 	s.workload = workload;
 
+	if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ret = ip_gma_set(&s, gma_head);
 	if (ret)
 		goto out;
-- 
GitLab


From 08673c3e27aa4407899e4fbb4738dac25370f706 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Fri, 7 Jul 2017 13:21:52 +0800
Subject: [PATCH 1138/1958] drm/i915/gvt: Revert "drm/i915/gvt: Fix possible
 recursive locking issue"

This reverts commit 62d02fd1f807bf5a259a242c483c9fb98a242630.

The rwsem recursive trace should not be fixed from kvmgt side by using
a workqueue and it is an issue should be fixed in VFIO. So this one
should be reverted.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h   |  3 --
 drivers/gpu/drm/i915/gvt/kvmgt.c | 55 ++++++--------------------------
 2 files changed, 10 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 28d817e96e58c..3a74e79eac2f6 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -182,9 +182,6 @@ struct intel_vgpu {
 		struct kvm *kvm;
 		struct work_struct release_work;
 		atomic_t released;
-		struct work_struct unpin_work;
-		spinlock_t unpin_lock; /* To protect unpin_list */
-		struct list_head unpin_list;
 	} vdev;
 #endif
 };
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 75a6e1d8af0d3..fd0c85f9ef3c3 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -78,7 +78,6 @@ struct gvt_dma {
 	struct rb_node node;
 	gfn_t gfn;
 	unsigned long iova;
-	struct list_head list;
 };
 
 static inline bool handle_valid(unsigned long handle)
@@ -167,7 +166,6 @@ static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 
 	new->gfn = gfn;
 	new->iova = iova;
-	INIT_LIST_HEAD(&new->list);
 
 	mutex_lock(&vgpu->vdev.cache_lock);
 	while (*link) {
@@ -199,52 +197,26 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
 	kfree(entry);
 }
 
-static void intel_vgpu_unpin_work(struct work_struct *work)
+static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
 {
-	struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
-					       vdev.unpin_work);
 	struct device *dev = mdev_dev(vgpu->vdev.mdev);
 	struct gvt_dma *this;
-	unsigned long gfn;
-
-	for (;;) {
-		spin_lock(&vgpu->vdev.unpin_lock);
-		if (list_empty(&vgpu->vdev.unpin_list)) {
-			spin_unlock(&vgpu->vdev.unpin_lock);
-			break;
-		}
-		this = list_first_entry(&vgpu->vdev.unpin_list,
-					struct gvt_dma, list);
-		list_del(&this->list);
-		spin_unlock(&vgpu->vdev.unpin_lock);
-
-		gfn = this->gfn;
-		vfio_unpin_pages(dev, &gfn, 1);
-		kfree(this);
-	}
-}
-
-static bool gvt_cache_mark_remove(struct intel_vgpu *vgpu, gfn_t gfn)
-{
-	struct gvt_dma *this;
+	unsigned long g1;
+	int rc;
 
 	mutex_lock(&vgpu->vdev.cache_lock);
 	this  = __gvt_cache_find(vgpu, gfn);
 	if (!this) {
 		mutex_unlock(&vgpu->vdev.cache_lock);
-		return false;
+		return;
 	}
+
+	g1 = gfn;
 	gvt_dma_unmap_iova(vgpu, this->iova);
-	/* remove this from rb tree */
-	rb_erase(&this->node, &vgpu->vdev.cache);
+	rc = vfio_unpin_pages(dev, &g1, 1);
+	WARN_ON(rc != 1);
+	__gvt_cache_remove_entry(vgpu, this);
 	mutex_unlock(&vgpu->vdev.cache_lock);
-
-	/* put this to the unpin_list */
-	spin_lock(&vgpu->vdev.unpin_lock);
-	list_move_tail(&this->list, &vgpu->vdev.unpin_list);
-	spin_unlock(&vgpu->vdev.unpin_lock);
-
-	return true;
 }
 
 static void gvt_cache_init(struct intel_vgpu *vgpu)
@@ -485,9 +457,6 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 	}
 
 	INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
-	INIT_WORK(&vgpu->vdev.unpin_work, intel_vgpu_unpin_work);
-	spin_lock_init(&vgpu->vdev.unpin_lock);
-	INIT_LIST_HEAD(&vgpu->vdev.unpin_list);
 
 	vgpu->vdev.mdev = mdev;
 	mdev_set_drvdata(mdev, vgpu);
@@ -517,7 +486,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
 	struct intel_vgpu *vgpu = container_of(nb,
 					struct intel_vgpu,
 					vdev.iommu_notifier);
-	bool sched_unmap = false;
 
 	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
 		struct vfio_iommu_type1_dma_unmap *unmap = data;
@@ -527,10 +495,7 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
 		end_gfn = gfn + unmap->size / PAGE_SIZE;
 
 		while (gfn < end_gfn)
-			sched_unmap |= gvt_cache_mark_remove(vgpu, gfn++);
-
-		if (sched_unmap)
-			schedule_work(&vgpu->vdev.unpin_work);
+			gvt_cache_remove(vgpu, gfn++);
 	}
 
 	return NOTIFY_OK;
-- 
GitLab


From 4cc74389a551dc95fce72d58c11e55a93b6ecd19 Mon Sep 17 00:00:00 2001
From: Weinan Li <weinan.z.li@intel.com>
Date: Mon, 19 Jun 2017 08:49:17 +0800
Subject: [PATCH 1139/1958] drm/i915/gvt: remove scheduler_mutex in per-engine
 workload_thread

For the vGPU workloads, now GVT-g use per vGPU scheduler, the per-ring
work_thread only pick workload belongs to the current vGPU. And with time
slice based scheduler, it waits all the engines become idle before do vGPU
switch. So we can run free dispatch in per-ring work_thread, different ring
running in different 'vGPU' won't happen.

For the workloads between vGPU and Host, this scheduler_mutex can't block
host to dispatch workload into other ring engines.

Here remove this mutex since it impacts the performance when applications
use more than 1 ring engines in 1 vgpu.

ring0 running in vGPU1, ring1 running in Host. Will happen.
ring0 running in vGPU1, ring1 running in vGPU2. Won't happen.

Signed-off-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Ping Gao <ping.a.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 488fdea348a97..5aeba13a5de4d 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -464,8 +464,6 @@ struct workload_thread_param {
 	int ring_id;
 };
 
-static DEFINE_MUTEX(scheduler_mutex);
-
 static int workload_thread(void *priv)
 {
 	struct workload_thread_param *p = (struct workload_thread_param *)priv;
@@ -497,8 +495,6 @@ static int workload_thread(void *priv)
 		if (!workload)
 			break;
 
-		mutex_lock(&scheduler_mutex);
-
 		gvt_dbg_sched("ring id %d next workload %p vgpu %d\n",
 				workload->ring_id, workload,
 				workload->vgpu->id);
@@ -537,9 +533,6 @@ static int workload_thread(void *priv)
 					FORCEWAKE_ALL);
 
 		intel_runtime_pm_put(gvt->dev_priv);
-
-		mutex_unlock(&scheduler_mutex);
-
 	}
 	return 0;
 }
-- 
GitLab


From 0cf5ec41839d82ee7f8fbb47f137b7afc562b9f1 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Fri, 23 Jun 2017 13:01:11 +0800
Subject: [PATCH 1140/1958] drm/i915/gvt: Use fence error from GVT request for
 workload status

The req->fence.error will be set if this request caused GPU hang so
we can use this value to workload->status to indicate whether this
GVT request caused any problem. If it caused GPU hang, we shouldn't
trigger any context switch back to the guest.

v2:
- only take -EIO from fence->error. (Zhenyu)

Fixes: 8f1117abb408 (drm/i915/gvt: handle workload lifecycle properly)
Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 5aeba13a5de4d..4f7057d62d88b 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -174,15 +174,6 @@ static int shadow_context_status_change(struct notifier_block *nb,
 		atomic_set(&workload->shadow_ctx_active, 1);
 		break;
 	case INTEL_CONTEXT_SCHEDULE_OUT:
-		/* If the status is -EINPROGRESS means this workload
-		 * doesn't meet any issue during dispatching so when
-		 * get the SCHEDULE_OUT set the status to be zero for
-		 * good. If the status is NOT -EINPROGRESS means there
-		 * is something wrong happened during dispatching and
-		 * the status should not be set to zero
-		 */
-		if (workload->status == -EINPROGRESS)
-			workload->status = 0;
 		atomic_set(&workload->shadow_ctx_active, 0);
 		break;
 	default:
@@ -427,6 +418,18 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 		wait_event(workload->shadow_ctx_status_wq,
 			   !atomic_read(&workload->shadow_ctx_active));
 
+		/* If this request caused GPU hang, req->fence.error will
+		 * be set to -EIO. Use -EIO to set workload status so
+		 * that when this request caused GPU hang, didn't trigger
+		 * context switch interrupt to guest.
+		 */
+		if (likely(workload->status == -EINPROGRESS)) {
+			if (workload->req->fence.error == -EIO)
+				workload->status = -EIO;
+			else
+				workload->status = 0;
+		}
+
 		i915_gem_request_put(fetch_and_zero(&workload->req));
 
 		if (!workload->status && !vgpu->resetting) {
-- 
GitLab


From 65553b12e8fd3730b07e486c15dd6574563ddc58 Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Tue, 11 Jul 2017 15:15:47 +0800
Subject: [PATCH 1141/1958] ALSA: hda/realtek - New codec device ID for ALC1220

Codec ID is 0x1168.
It is another ALC1220 codec.

Signed-off-by: Kailang Yang <kailang@realtek.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index e2116d2ea529c..45d58fc1df393 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -379,6 +379,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 		break;
 	case 0x10ec0899:
 	case 0x10ec0900:
+	case 0x10ec1168:
 	case 0x10ec1220:
 		alc_update_coef_idx(codec, 0x7, 1<<1, 0);
 		break;
@@ -7815,6 +7816,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
 	HDA_CODEC_ENTRY(0x10ec0892, "ALC892", patch_alc662),
 	HDA_CODEC_ENTRY(0x10ec0899, "ALC898", patch_alc882),
 	HDA_CODEC_ENTRY(0x10ec0900, "ALC1150", patch_alc882),
+	HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882),
 	HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882),
 	{} /* terminator */
 };
-- 
GitLab


From 095f6d76221dd4b99d004e4c826ac4382c6ce5e3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 5 Jul 2017 10:41:05 +0200
Subject: [PATCH 1142/1958] MAINTAINERS: Update the PTRACE entry

Document the status quo: Roland has been busy with other projects for years,
so list Oleg as the de-facto maintainer.

Also update the file patterns.

Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roland McGrath <roland@hack.frob.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 MAINTAINERS | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d357695ee4fe0..cbe90323c35aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10355,7 +10355,6 @@ F:	drivers/ptp/*
 F:	include/linux/ptp_cl*
 
 PTRACE SUPPORT
-M:	Roland McGrath <roland@hack.frob.com>
 M:	Oleg Nesterov <oleg@redhat.com>
 S:	Maintained
 F:	include/asm-generic/syscall.h
@@ -10363,7 +10362,12 @@ F:	include/linux/ptrace.h
 F:	include/linux/regset.h
 F:	include/linux/tracehook.h
 F:	include/uapi/linux/ptrace.h
+F:	include/uapi/linux/ptrace.h
+F:	include/asm-generic/ptrace.h
 F:	kernel/ptrace.c
+F:	arch/*/ptrace*.c
+F:	arch/*/*/ptrace*.c
+F:	arch/*/include/asm/ptrace*.h
 
 PULSE8-CEC DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
-- 
GitLab


From 6a8a75f3235724c5941a33e287b2f98966ad14c5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 11 Jul 2017 10:56:54 +0200
Subject: [PATCH 1143/1958] Revert "perf/core: Drop kernel samples even though
 :u is specified"

This reverts commit cc1582c231ea041fbc68861dfaf957eaf902b829.

This commit introduced a regression that broke rr-project, which uses sampling
events to receive a signal on overflow (but does not care about the contents
of the sample). These signals are critical to the correct operation of rr.

There's been some back and forth about how to fix it - but to not keep
applications in limbo queue up a revert.

Reported-by: Kyle Huey <me@kylehuey.com>
Acked-by: Kyle Huey <me@kylehuey.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170628105600.GC5981@leverpostej
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4d2c32f984824..9747e422ab204 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7308,21 +7308,6 @@ int perf_event_account_interrupt(struct perf_event *event)
 	return __perf_event_account_interrupt(event, 1);
 }
 
-static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
-{
-	/*
-	 * Due to interrupt latency (AKA "skid"), we may enter the
-	 * kernel before taking an overflow, even if the PMU is only
-	 * counting user events.
-	 * To avoid leaking information to userspace, we must always
-	 * reject kernel samples when exclude_kernel is set.
-	 */
-	if (event->attr.exclude_kernel && !user_mode(regs))
-		return false;
-
-	return true;
-}
-
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7343,12 +7328,6 @@ static int __perf_event_overflow(struct perf_event *event,
 
 	ret = __perf_event_account_interrupt(event, throttle);
 
-	/*
-	 * For security, drop the skid kernel samples if necessary.
-	 */
-	if (!sample_is_allowed(event, regs))
-		return ret;
-
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
-- 
GitLab


From 50740024bc393b608f7e391ac35e70f33938dd24 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Mon, 26 Jun 2017 10:33:49 +0200
Subject: [PATCH 1144/1958] drm/i915: Make DP-MST connector info work

Commit 9a148a96fc3a ("drm/i915/debugfs: add dp mst info") adds support
for DP-MST to intel_connector_info, but forgot to remove the early
return for DP-MST.

Remove it, and print out MST connectors directly.

Fixes: 9a148a96fc3a ("drm/i915/debugfs: add dp mst info")
Cc: <stable@vger.kernel.org> # v4.11+
Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Cc: Libin Yang <libin.yang@intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170626083349.24389-1-maarten.lankhorst@linux.intel.com
Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
(cherry picked from commit 77d1f615c78a73a04254fa2bff07ee9fa27145d9)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4577b0af6886f..23cd865a3da66 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3083,7 +3083,7 @@ static void intel_connector_info(struct seq_file *m,
 			   connector->display_info.cea_rev);
 	}
 
-	if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST)
+	if (!intel_encoder)
 		return;
 
 	switch (connector->connector_type) {
-- 
GitLab


From a66ca4146f3f736ff968c099926ccb1c1761b1a4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:24:56 +0100
Subject: [PATCH 1145/1958] spufs: Implement show_options

Implement the show_options superblock op for spufs as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeremy Kerr <jk@ozlabs.org>
cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/inode.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index d8af9bc0489f7..9558d725a99b5 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -605,6 +605,24 @@ static const match_table_t spufs_tokens = {
 	{ Opt_err,    NULL  },
 };
 
+static int spufs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct spufs_sb_info *sbi = spufs_get_sb_info(root->d_sb);
+	struct inode *inode = root->d_inode;
+
+	if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, inode->i_uid));
+	if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, inode->i_gid));
+	if ((inode->i_mode & S_IALLUGO) != 0775)
+		seq_printf(m, ",mode=%o", inode->i_mode);
+	if (sbi->debug)
+		seq_puts(m, ",debug");
+	return 0;
+}
+
 static int
 spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
 {
@@ -724,11 +742,9 @@ spufs_fill_super(struct super_block *sb, void *data, int silent)
 		.destroy_inode = spufs_destroy_inode,
 		.statfs = simple_statfs,
 		.evict_inode = spufs_evict_inode,
-		.show_options = generic_show_options,
+		.show_options = spufs_show_options,
 	};
 
-	save_mount_options(sb, data);
-
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
-- 
GitLab


From 3ab7947ac3b5cd625c186bb608678bffd881e472 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:25:03 +0100
Subject: [PATCH 1146/1958] befs: Implement show_options

Implement the show_options superblock op for befs as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Luis de Bethencourt <luisbg@osg.samsung.com>
cc: Salah Triki <salah.triki@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/befs/linuxvfs.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 63e7c4760bfb4..4a4a5a3661583 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/cred.h>
 #include <linux/exportfs.h>
+#include <linux/seq_file.h>
 
 #include "befs.h"
 #include "btree.h"
@@ -53,6 +54,7 @@ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
 static void befs_put_super(struct super_block *);
 static int befs_remount(struct super_block *, int *, char *);
 static int befs_statfs(struct dentry *, struct kstatfs *);
+static int befs_show_options(struct seq_file *, struct dentry *);
 static int parse_options(char *, struct befs_mount_options *);
 static struct dentry *befs_fh_to_dentry(struct super_block *sb,
 				struct fid *fid, int fh_len, int fh_type);
@@ -66,7 +68,7 @@ static const struct super_operations befs_sops = {
 	.put_super	= befs_put_super,	/* uninit super */
 	.statfs		= befs_statfs,	/* statfs */
 	.remount_fs	= befs_remount,
-	.show_options	= generic_show_options,
+	.show_options	= befs_show_options,
 };
 
 /* slab cache for befs_inode_info objects */
@@ -771,6 +773,24 @@ parse_options(char *options, struct befs_mount_options *opts)
 	return 1;
 }
 
+static int befs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct befs_sb_info *befs_sb = BEFS_SB(root->d_sb);
+	struct befs_mount_options *opts = &befs_sb->mount_opts;
+
+	if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, opts->uid));
+	if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, opts->gid));
+	if (opts->iocharset)
+		seq_printf(m, ",charset=%s", opts->iocharset);
+	if (opts->debug)
+		seq_puts(m, ",debug");
+	return 0;
+}
+
 /* This function has the responsibiltiy of getting the
  * filesystem ready for unmounting.
  * Basically, we free everything that we allocated in
@@ -804,8 +824,6 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	const off_t x86_sb_off = 512;
 	int blocksize;
 
-	save_mount_options(sb, data);
-
 	sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL);
 	if (sb->s_fs_info == NULL)
 		goto unacquire_none;
-- 
GitLab


From 26a7655e6a55768a082336dac8a4563344e890a2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:25:16 +0100
Subject: [PATCH 1147/1958] affs: Implement show_options

Implement the show_options superblock op for affs as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/super.c | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/fs/affs/super.c b/fs/affs/super.c
index c2c27a8f128ef..7bf47a41cb4f7 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -20,9 +20,11 @@
 #include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
+#include <linux/seq_file.h>
 #include "affs.h"
 
 static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
+static int affs_show_options(struct seq_file *m, struct dentry *root);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
 static void
@@ -159,7 +161,7 @@ static const struct super_operations affs_sops = {
 	.sync_fs	= affs_sync_fs,
 	.statfs		= affs_statfs,
 	.remount_fs	= affs_remount,
-	.show_options	= generic_show_options,
+	.show_options	= affs_show_options,
 };
 
 enum {
@@ -293,6 +295,40 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
 	return 1;
 }
 
+static int affs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct super_block *sb = root->d_sb;
+	struct affs_sb_info *sbi = AFFS_SB(sb);
+
+	if (sb->s_blocksize)
+		seq_printf(m, ",bs=%lu", sb->s_blocksize);
+	if (affs_test_opt(sbi->s_flags, SF_SETMODE))
+		seq_printf(m, ",mode=%o", sbi->s_mode);
+	if (affs_test_opt(sbi->s_flags, SF_MUFS))
+		seq_puts(m, ",mufs");
+	if (affs_test_opt(sbi->s_flags, SF_NO_TRUNCATE))
+		seq_puts(m, ",nofilenametruncate");
+	if (affs_test_opt(sbi->s_flags, SF_PREFIX))
+		seq_printf(m, ",prefix=%s", sbi->s_prefix);
+	if (affs_test_opt(sbi->s_flags, SF_IMMUTABLE))
+		seq_puts(m, ",protect");
+	if (sbi->s_reserved != 2)
+		seq_printf(m, ",reserved=%u", sbi->s_reserved);
+	if (sbi->s_root_block != (sbi->s_reserved + sbi->s_partition_size - 1) / 2)
+		seq_printf(m, ",root=%u", sbi->s_root_block);
+	if (affs_test_opt(sbi->s_flags, SF_SETGID))
+		seq_printf(m, ",setgid=%u",
+			   from_kgid_munged(&init_user_ns, sbi->s_gid));
+	if (affs_test_opt(sbi->s_flags, SF_SETUID))
+		seq_printf(m, ",setuid=%u",
+			   from_kuid_munged(&init_user_ns, sbi->s_uid));
+	if (affs_test_opt(sbi->s_flags, SF_VERBOSE))
+		seq_puts(m, ",verbose");
+	if (sbi->s_volume[0])
+		seq_printf(m, ",volume=%s", sbi->s_volume);
+	return 0;
+}
+
 /* This function definitely needs to be split up. Some fine day I'll
  * hopefully have the guts to do so. Until then: sorry for the mess.
  */
@@ -316,8 +352,6 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
 	u8			 sig[4];
 	int			 ret;
 
-	save_mount_options(sb, data);
-
 	pr_debug("read_super(%s)\n", data ? (const char *)data : "no options");
 
 	sb->s_magic             = AFFS_SUPER_MAGIC;
@@ -548,8 +582,6 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 	}
 
 	flush_delayed_work(&sbi->sb_work);
-	if (new_opts)
-		replace_mount_options(sb, new_opts);
 
 	sbi->s_flags = mount_flags;
 	sbi->s_mode  = mode;
-- 
GitLab


From 677018a6ce620c8ca85abae1a07a41d66247d420 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:25:23 +0100
Subject: [PATCH 1148/1958] afs: Implement show_options

Implement the show_options superblock op for afs as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Also implement the show_devname op to display the correct device name and thus
avoid the need to display the cell= and volume= options.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/super.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/fs/afs/super.c b/fs/afs/super.c
index c79633e5cfd80..35d7e550b29cb 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -37,6 +37,8 @@ static void afs_kill_super(struct super_block *sb);
 static struct inode *afs_alloc_inode(struct super_block *sb);
 static void afs_destroy_inode(struct inode *inode);
 static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
+static int afs_show_devname(struct seq_file *m, struct dentry *root);
+static int afs_show_options(struct seq_file *m, struct dentry *root);
 
 struct file_system_type afs_fs_type = {
 	.owner		= THIS_MODULE,
@@ -53,7 +55,8 @@ static const struct super_operations afs_super_ops = {
 	.drop_inode	= afs_drop_inode,
 	.destroy_inode	= afs_destroy_inode,
 	.evict_inode	= afs_evict_inode,
-	.show_options	= generic_show_options,
+	.show_devname	= afs_show_devname,
+	.show_options	= afs_show_options,
 };
 
 static struct kmem_cache *afs_inode_cachep;
@@ -135,6 +138,45 @@ void __exit afs_fs_exit(void)
 	_leave("");
 }
 
+/*
+ * Display the mount device name in /proc/mounts.
+ */
+static int afs_show_devname(struct seq_file *m, struct dentry *root)
+{
+	struct afs_super_info *as = root->d_sb->s_fs_info;
+	struct afs_volume *volume = as->volume;
+	struct afs_cell *cell = volume->cell;
+	const char *suf = "";
+	char pref = '%';
+
+	switch (volume->type) {
+	case AFSVL_RWVOL:
+		break;
+	case AFSVL_ROVOL:
+		pref = '#';
+		if (volume->type_force)
+			suf = ".readonly";
+		break;
+	case AFSVL_BACKVOL:
+		pref = '#';
+		suf = ".backup";
+		break;
+	}
+
+	seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->vlocation->vldb.name, suf);
+	return 0;
+}
+
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int afs_show_options(struct seq_file *m, struct dentry *root)
+{
+	if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
+		seq_puts(m, "autocell");
+	return 0;
+}
+
 /*
  * parse the mount options
  * - this function has been shamelessly adapted from the ext3 fs which
@@ -426,7 +468,6 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 			deactivate_locked_super(sb);
 			goto error;
 		}
-		save_mount_options(sb, new_opts);
 		sb->s_flags |= MS_ACTIVE;
 	} else {
 		_debug("reuse");
-- 
GitLab


From 86a1da6d30ad727c2a9cc5d6a51bff6d830036b5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:25:30 +0100
Subject: [PATCH 1149/1958] isofs: Implement show_options

Implement the show_options superblock op for omfs as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/isofs/inode.c | 51 +++++++++++++++++++++++++++++++++++++++++++++---
 fs/isofs/isofs.h |  3 +++
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 020ba09361464..f80ee600d1bcd 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -23,6 +23,7 @@
 #include <linux/parser.h>
 #include <linux/mpage.h>
 #include <linux/user_namespace.h>
+#include <linux/seq_file.h>
 
 #include "isofs.h"
 #include "zisofs.h"
@@ -57,6 +58,7 @@ static void isofs_put_super(struct super_block *sb)
 
 static int isofs_read_inode(struct inode *, int relocated);
 static int isofs_statfs (struct dentry *, struct kstatfs *);
+static int isofs_show_options(struct seq_file *, struct dentry *);
 
 static struct kmem_cache *isofs_inode_cachep;
 
@@ -123,7 +125,7 @@ static const struct super_operations isofs_sops = {
 	.put_super	= isofs_put_super,
 	.statfs		= isofs_statfs,
 	.remount_fs	= isofs_remount,
-	.show_options	= generic_show_options,
+	.show_options	= isofs_show_options,
 };
 
 
@@ -472,6 +474,48 @@ static int parse_options(char *options, struct iso9660_options *popt)
 	return 1;
 }
 
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int isofs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct isofs_sb_info *sbi = ISOFS_SB(root->d_sb);
+
+	if (!sbi->s_rock)		seq_puts(m, ",norock");
+	else if (!sbi->s_joliet_level)	seq_puts(m, ",nojoliet");
+	if (sbi->s_cruft)		seq_puts(m, ",cruft");
+	if (sbi->s_hide)		seq_puts(m, ",hide");
+	if (sbi->s_nocompress)		seq_puts(m, ",nocompress");
+	if (sbi->s_overriderockperm)	seq_puts(m, ",overriderockperm");
+	if (sbi->s_showassoc)		seq_puts(m, ",showassoc");
+	if (sbi->s_utf8)		seq_puts(m, ",utf8");
+
+	if (sbi->s_check)		seq_printf(m, ",check=%c", sbi->s_check);
+	if (sbi->s_mapping)		seq_printf(m, ",map=%c", sbi->s_mapping);
+	if (sbi->s_session != -1)	seq_printf(m, ",session=%u", sbi->s_session);
+	if (sbi->s_sbsector != -1)	seq_printf(m, ",sbsector=%u", sbi->s_sbsector);
+
+	if (root->d_sb->s_blocksize != 1024)
+		seq_printf(m, ",blocksize=%lu", root->d_sb->s_blocksize);
+
+	if (sbi->s_uid_set)
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, sbi->s_uid));
+	if (sbi->s_gid_set)
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, sbi->s_gid));
+
+	if (sbi->s_dmode != ISOFS_INVALID_MODE)
+		seq_printf(m, ",dmode=%o", sbi->s_dmode);
+	if (sbi->s_fmode != ISOFS_INVALID_MODE)
+		seq_printf(m, ",fmode=%o", sbi->s_fmode);
+
+	if (sbi->s_nls_iocharset &&
+	    strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
+		seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+	return 0;
+}
+
 /*
  * look if the driver can tell the multi session redirection value
  *
@@ -583,8 +627,6 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 	int table, error = -EINVAL;
 	unsigned int vol_desc_start;
 
-	save_mount_options(s, data);
-
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
 		return -ENOMEM;
@@ -605,6 +647,8 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 	opt.blocksize = sb_min_blocksize(s, opt.blocksize);
 
 	sbi->s_high_sierra = 0; /* default is iso9660 */
+	sbi->s_session = opt.session;
+	sbi->s_sbsector = opt.sbsector;
 
 	vol_desc_start = (opt.sbsector != -1) ?
 		opt.sbsector : isofs_get_last_session(s,opt.session);
@@ -911,6 +955,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 		table += 2;
 	if (opt.check == 'r')
 		table++;
+	sbi->s_check = opt.check;
 
 	if (table)
 		s->s_d_op = &isofs_dentry_ops[table - 1];
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 0ac4c1f73fbd6..133a456b04259 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -36,8 +36,11 @@ struct isofs_sb_info {
 	unsigned long s_max_size;
 	
 	int           s_rock_offset; /* offset of SUSP fields within SU area */
+	s32           s_sbsector;
 	unsigned char s_joliet_level;
 	unsigned char s_mapping;
+	unsigned char s_check;
+	unsigned char s_session;
 	unsigned int  s_high_sierra:1;
 	unsigned int  s_rock:2;
 	unsigned int  s_utf8:1;
-- 
GitLab


From c4fac9100456995c10b65c13be84554258ed7fc8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:25:37 +0100
Subject: [PATCH 1150/1958] 9p: Implement show_options

Implement the show_options superblock op for 9p as part of a bid to get
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Ron Minnich <rminnich@sandia.gov>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: v9fs-developer@lists.sourceforge.net
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/v9fs.c               | 61 ++++++++++++++++++++++++++++++++++++++
 fs/9p/v9fs.h               |  3 ++
 fs/9p/vfs_super.c          |  6 ++--
 include/net/9p/client.h    | 13 ++++++++
 include/net/9p/transport.h |  1 +
 net/9p/client.c            | 25 ++++++++++++++++
 net/9p/trans_fd.c          | 31 +++++++++++++++++--
 net/9p/trans_rdma.c        | 31 +++++++++++++++++--
 8 files changed, 161 insertions(+), 10 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c202930086edb..8fb89ddc6cc7e 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -33,6 +33,7 @@
 #include <linux/parser.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
+#include <linux/seq_file.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
@@ -82,6 +83,13 @@ static const match_table_t tokens = {
 	{Opt_err, NULL}
 };
 
+static const char *const v9fs_cache_modes[nr__p9_cache_modes] = {
+	[CACHE_NONE]	= "none",
+	[CACHE_MMAP]	= "mmap",
+	[CACHE_LOOSE]	= "loose",
+	[CACHE_FSCACHE]	= "fscache",
+};
+
 /* Interpret mount options for cache mode */
 static int get_cache_mode(char *s)
 {
@@ -104,6 +112,58 @@ static int get_cache_mode(char *s)
 	return version;
 }
 
+/*
+ * Display the mount options in /proc/mounts.
+ */
+int v9fs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct v9fs_session_info *v9ses = root->d_sb->s_fs_info;
+
+	if (v9ses->debug)
+		seq_printf(m, ",debug=%x", v9ses->debug);
+	if (!uid_eq(v9ses->dfltuid, V9FS_DEFUID))
+		seq_printf(m, ",dfltuid=%u",
+			   from_kuid_munged(&init_user_ns, v9ses->dfltuid));
+	if (!gid_eq(v9ses->dfltgid, V9FS_DEFGID))
+		seq_printf(m, ",dfltgid=%u",
+			   from_kgid_munged(&init_user_ns, v9ses->dfltgid));
+	if (v9ses->afid != ~0)
+		seq_printf(m, ",afid=%u", v9ses->afid);
+	if (strcmp(v9ses->uname, V9FS_DEFUSER) != 0)
+		seq_printf(m, ",uname=%s", v9ses->uname);
+	if (strcmp(v9ses->aname, V9FS_DEFANAME) != 0)
+		seq_printf(m, ",aname=%s", v9ses->aname);
+	if (v9ses->nodev)
+		seq_puts(m, ",nodevmap");
+	if (v9ses->cache)
+		seq_printf(m, ",%s", v9fs_cache_modes[v9ses->cache]);
+#ifdef CONFIG_9P_FSCACHE
+	if (v9ses->cachetag && v9ses->cache == CACHE_FSCACHE)
+		seq_printf(m, ",cachetag=%s", v9ses->cachetag);
+#endif
+
+	switch (v9ses->flags & V9FS_ACCESS_MASK) {
+	case V9FS_ACCESS_USER:
+		seq_puts(m, ",access=user");
+		break;
+	case V9FS_ACCESS_ANY:
+		seq_puts(m, ",access=any");
+		break;
+	case V9FS_ACCESS_CLIENT:
+		seq_puts(m, ",access=client");
+		break;
+	case V9FS_ACCESS_SINGLE:
+		seq_printf(m, ",access=%u",
+			   from_kuid_munged(&init_user_ns, v9ses->uid));
+		break;
+	}
+
+	if (v9ses->flags & V9FS_POSIX_ACL)
+		seq_puts(m, ",posixacl");
+
+	return p9_show_client_options(m, v9ses->clnt);
+}
+
 /**
  * v9fs_parse_options - parse mount options into session structure
  * @v9ses: existing v9fs session information
@@ -230,6 +290,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 			break;
 		case Opt_cachetag:
 #ifdef CONFIG_9P_FSCACHE
+			kfree(v9ses->cachetag);
 			v9ses->cachetag = match_strdup(&args[0]);
 #endif
 			break;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 76eaf49abd3ae..982e017acadbc 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -67,6 +67,7 @@ enum p9_cache_modes {
 	CACHE_MMAP,
 	CACHE_LOOSE,
 	CACHE_FSCACHE,
+	nr__p9_cache_modes
 };
 
 /**
@@ -137,6 +138,8 @@ static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
 	return container_of(inode, struct v9fs_inode, vfs_inode);
 }
 
+extern int v9fs_show_options(struct seq_file *m, struct dentry *root);
+
 struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
 									char *);
 extern void v9fs_session_close(struct v9fs_session_info *v9ses);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index a0965fb587a5f..8b75463cb2116 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -33,7 +33,6 @@
 #include <linux/string.h>
 #include <linux/inet.h>
 #include <linux/pagemap.h>
-#include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
@@ -104,7 +103,6 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 		sb->s_flags |= MS_POSIXACL;
 #endif
 
-	save_mount_options(sb, data);
 	return 0;
 }
 
@@ -349,7 +347,7 @@ static const struct super_operations v9fs_super_ops = {
 	.destroy_inode = v9fs_destroy_inode,
 	.statfs = simple_statfs,
 	.evict_inode = v9fs_evict_inode,
-	.show_options = generic_show_options,
+	.show_options = v9fs_show_options,
 	.umount_begin = v9fs_umount_begin,
 	.write_inode = v9fs_write_inode,
 };
@@ -360,7 +358,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
 	.statfs = v9fs_statfs,
 	.drop_inode = v9fs_drop_inode,
 	.evict_inode = v9fs_evict_inode,
-	.show_options = generic_show_options,
+	.show_options = v9fs_show_options,
 	.umount_begin = v9fs_umount_begin,
 	.write_inode = v9fs_write_inode_dotl,
 };
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index b582339ccef5c..7af9d769b97d8 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -157,6 +157,18 @@ struct p9_client {
 	enum p9_trans_status status;
 	void *trans;
 
+	union {
+		struct {
+			int rfd;
+			int wfd;
+		} fd;
+		struct {
+			u16 port;
+			bool privport;
+
+		} tcp;
+	} trans_opts;
+
 	struct p9_idpool *fidpool;
 	struct list_head fidlist;
 
@@ -213,6 +225,7 @@ struct p9_dirent {
 
 struct iov_iter;
 
+int p9_show_client_options(struct seq_file *m, struct p9_client *clnt);
 int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb);
 int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid,
 		     const char *name);
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 5122b5e40f78f..1625fb842ac47 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -62,6 +62,7 @@ struct p9_trans_module {
 	int (*cancelled)(struct p9_client *, struct p9_req_t *req);
 	int (*zc_request)(struct p9_client *, struct p9_req_t *,
 			  struct iov_iter *, struct iov_iter *, int , int, int);
+	int (*show_options)(struct seq_file *, struct p9_client *);
 };
 
 void v9fs_register_trans(struct p9_trans_module *m);
diff --git a/net/9p/client.c b/net/9p/client.c
index 1218fb3b52dad..4674235b0d9b1 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -37,6 +37,7 @@
 #include <linux/uio.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
+#include <linux/seq_file.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include "protocol.h"
@@ -77,6 +78,30 @@ inline int p9_is_proto_dotu(struct p9_client *clnt)
 }
 EXPORT_SYMBOL(p9_is_proto_dotu);
 
+int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
+{
+	if (clnt->msize != 8192)
+		seq_printf(m, ",msize=%u", clnt->msize);
+	seq_printf(m, "trans=%s", clnt->trans_mod->name);
+
+	switch (clnt->proto_version) {
+	case p9_proto_legacy:
+		seq_puts(m, ",noextend");
+		break;
+	case p9_proto_2000u:
+		seq_puts(m, ",version=9p2000.u");
+		break;
+	case p9_proto_2000L:
+		/* Default */
+		break;
+	}
+
+	if (clnt->trans_mod->show_options)
+		return clnt->trans_mod->show_options(m, clnt);
+	return 0;
+}
+EXPORT_SYMBOL(p9_show_client_options);
+
 /*
  * Some error codes are taken directly from the server replies,
  * make sure they are valid.
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 7bc2208b6cc4c..f2e0eaf58018e 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -41,6 +41,7 @@
 #include <linux/file.h>
 #include <linux/parser.h>
 #include <linux/slab.h>
+#include <linux/seq_file.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
@@ -51,6 +52,9 @@
 #define MAX_SOCK_BUF (64*1024)
 #define MAXPOLLWADDR	2
 
+static struct p9_trans_module p9_tcp_trans;
+static struct p9_trans_module p9_fd_trans;
+
 /**
  * struct p9_fd_opts - per-transport options
  * @rfd: file descriptor for reading (trans=fd)
@@ -63,7 +67,7 @@ struct p9_fd_opts {
 	int rfd;
 	int wfd;
 	u16 port;
-	int privport;
+	bool privport;
 };
 
 /*
@@ -720,6 +724,20 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
 	return 0;
 }
 
+static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt)
+{
+	if (clnt->trans_mod == &p9_tcp_trans) {
+		if (clnt->trans_opts.tcp.port != P9_PORT)
+			seq_printf(m, "port=%u", clnt->trans_opts.tcp.port);
+	} else if (clnt->trans_mod == &p9_fd_trans) {
+		if (clnt->trans_opts.fd.rfd != ~0)
+			seq_printf(m, "rfd=%u", clnt->trans_opts.fd.rfd);
+		if (clnt->trans_opts.fd.wfd != ~0)
+			seq_printf(m, "wfd=%u", clnt->trans_opts.fd.wfd);
+	}
+	return 0;
+}
+
 /**
  * parse_opts - parse mount options into p9_fd_opts structure
  * @params: options string passed from mount
@@ -738,7 +756,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 	opts->port = P9_PORT;
 	opts->rfd = ~0;
 	opts->wfd = ~0;
-	opts->privport = 0;
+	opts->privport = false;
 
 	if (!params)
 		return 0;
@@ -776,7 +794,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 			opts->wfd = option;
 			break;
 		case Opt_privport:
-			opts->privport = 1;
+			opts->privport = true;
 			break;
 		default:
 			continue;
@@ -942,6 +960,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 
 	csocket = NULL;
 
+	client->trans_opts.tcp.port = opts.port;
+	client->trans_opts.tcp.privport = opts.privport;
 	sin_server.sin_family = AF_INET;
 	sin_server.sin_addr.s_addr = in_aton(addr);
 	sin_server.sin_port = htons(opts.port);
@@ -1020,6 +1040,8 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
 	struct p9_fd_opts opts;
 
 	parse_opts(args, &opts);
+	client->trans_opts.fd.rfd = opts.rfd;
+	client->trans_opts.fd.wfd = opts.wfd;
 
 	if (opts.rfd == ~0 || opts.wfd == ~0) {
 		pr_err("Insufficient options for proto=fd\n");
@@ -1044,6 +1066,7 @@ static struct p9_trans_module p9_tcp_trans = {
 	.request = p9_fd_request,
 	.cancel = p9_fd_cancel,
 	.cancelled = p9_fd_cancelled,
+	.show_options = p9_fd_show_options,
 	.owner = THIS_MODULE,
 };
 
@@ -1056,6 +1079,7 @@ static struct p9_trans_module p9_unix_trans = {
 	.request = p9_fd_request,
 	.cancel = p9_fd_cancel,
 	.cancelled = p9_fd_cancelled,
+	.show_options = p9_fd_show_options,
 	.owner = THIS_MODULE,
 };
 
@@ -1068,6 +1092,7 @@ static struct p9_trans_module p9_fd_trans = {
 	.request = p9_fd_request,
 	.cancel = p9_fd_cancel,
 	.cancelled = p9_fd_cancelled,
+	.show_options = p9_fd_show_options,
 	.owner = THIS_MODULE,
 };
 
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 553ed4ecb6a0e..6d8e3031978f3 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -43,6 +43,7 @@
 #include <linux/parser.h>
 #include <linux/semaphore.h>
 #include <linux/slab.h>
+#include <linux/seq_file.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
@@ -70,6 +71,8 @@
  * @dm_mr: DMA Memory Region pointer
  * @lkey: The local access only memory region key
  * @timeout: Number of uSecs to wait for connection management events
+ * @privport: Whether a privileged port may be used
+ * @port: The port to use
  * @sq_depth: The depth of the Send Queue
  * @sq_sem: Semaphore for the SQ
  * @rq_depth: The depth of the Receive Queue.
@@ -95,6 +98,8 @@ struct p9_trans_rdma {
 	struct ib_qp *qp;
 	struct ib_cq *cq;
 	long timeout;
+	bool privport;
+	u16 port;
 	int sq_depth;
 	struct semaphore sq_sem;
 	int rq_depth;
@@ -133,10 +138,10 @@ struct p9_rdma_context {
  */
 struct p9_rdma_opts {
 	short port;
+	bool privport;
 	int sq_depth;
 	int rq_depth;
 	long timeout;
-	int privport;
 };
 
 /*
@@ -159,6 +164,23 @@ static match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
+static int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt)
+{
+	struct p9_trans_rdma *rdma = clnt->trans;
+
+	if (rdma->port != P9_PORT)
+		seq_printf(m, ",port=%u", rdma->port);
+	if (rdma->sq_depth != P9_RDMA_SQ_DEPTH)
+		seq_printf(m, ",sq=%u", rdma->sq_depth);
+	if (rdma->rq_depth != P9_RDMA_RQ_DEPTH)
+		seq_printf(m, ",rq=%u", rdma->rq_depth);
+	if (rdma->timeout != P9_RDMA_TIMEOUT)
+		seq_printf(m, ",timeout=%lu", rdma->timeout);
+	if (rdma->privport)
+		seq_puts(m, ",privport");
+	return 0;
+}
+
 /**
  * parse_opts - parse mount options into rdma options structure
  * @params: options string passed from mount
@@ -177,7 +199,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 	opts->sq_depth = P9_RDMA_SQ_DEPTH;
 	opts->rq_depth = P9_RDMA_RQ_DEPTH;
 	opts->timeout = P9_RDMA_TIMEOUT;
-	opts->privport = 0;
+	opts->privport = false;
 
 	if (!params)
 		return 0;
@@ -218,7 +240,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 			opts->timeout = option;
 			break;
 		case Opt_privport:
-			opts->privport = 1;
+			opts->privport = true;
 			break;
 		default:
 			continue;
@@ -560,6 +582,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
 	if (!rdma)
 		return NULL;
 
+	rdma->port = opts->port;
+	rdma->privport = opts->privport;
 	rdma->sq_depth = opts->sq_depth;
 	rdma->rq_depth = opts->rq_depth;
 	rdma->timeout = opts->timeout;
@@ -733,6 +757,7 @@ static struct p9_trans_module p9_rdma_trans = {
 	.request = rdma_request,
 	.cancel = rdma_cancel,
 	.cancelled = rdma_cancelled,
+	.show_options = p9_rdma_show_options,
 };
 
 /**
-- 
GitLab


From 4dfdb71307675b19a54723a556371dad5e3b0083 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:25:45 +0100
Subject: [PATCH 1151/1958] orangefs: Implement show_options

Implement the show_options superblock op for orangefs as part of a bid to
rid of s_options and generic_show_options() to make it easier to implement
a context-based mount where the mount options can be passed individually
over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Mike Marshall <hubcap@omnibond.com>
cc: pvfs2-developers@beowulf-underground.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/orangefs/super.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 5c7c273e17ec3..5a1bed6c8c6ae 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -35,6 +35,19 @@ static const match_table_t tokens = {
 
 uint64_t orangefs_features;
 
+static int orangefs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct orangefs_sb_info_s *orangefs_sb = ORANGEFS_SB(root->d_sb);
+
+	if (root->d_sb->s_flags & MS_POSIXACL)
+		seq_puts(m, ",acl");
+	if (orangefs_sb->flags & ORANGEFS_OPT_INTR)
+		seq_puts(m, ",intr");
+	if (orangefs_sb->flags & ORANGEFS_OPT_LOCAL_LOCK)
+		seq_puts(m, ",local_lock");
+	return 0;
+}
+
 static int parse_mount_options(struct super_block *sb, char *options,
 		int silent)
 {
@@ -305,7 +318,7 @@ static const struct super_operations orangefs_s_ops = {
 	.drop_inode = generic_delete_inode,
 	.statfs = orangefs_statfs,
 	.remount_fs = orangefs_remount_fs,
-	.show_options = generic_show_options,
+	.show_options = orangefs_show_options,
 };
 
 static struct dentry *orangefs_fh_to_dentry(struct super_block *sb,
-- 
GitLab


From 1d278a879081ddc40286500e58868aaee47de257 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Jul 2017 16:25:53 +0100
Subject: [PATCH 1152/1958] VFS: Kill off s_options and helpers

Kill off s_options, save/replace_mount_options() and generic_show_options()
as all filesystems now implement ->show_options() for themselves.  This
should make it easier to implement a context-based mount where the mount
options can be passed individually over a file descriptor.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/vfs.txt |  6 ----
 fs/efivarfs/super.c               |  1 -
 fs/namespace.c                    | 59 -------------------------------
 fs/super.c                        |  1 -
 include/linux/fs.h                |  9 -----
 5 files changed, 76 deletions(-)

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f42b90687d406..ee56a7d10da90 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -1187,12 +1187,6 @@ The underlying reason for the above rules is to make sure, that a
 mount can be accurately replicated (e.g. umounting and mounting again)
 based on the information found in /proc/mounts.
 
-A simple method of saving options at mount/remount time and showing
-them is provided with the save_mount_options() and
-generic_show_options() helper functions.  Please note, that using
-these may have drawbacks.  For more info see header comments for these
-functions in fs/namespace.c.
-
 Resources
 =========
 
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index d7a7c53803c1b..5b68e4294faa5 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -29,7 +29,6 @@ static const struct super_operations efivarfs_ops = {
 	.statfs = simple_statfs,
 	.drop_inode = generic_delete_inode,
 	.evict_inode = efivarfs_evict_inode,
-	.show_options = generic_show_options,
 };
 
 static struct super_block *efivarfs_sb;
diff --git a/fs/namespace.c b/fs/namespace.c
index 544ab84642ebd..0e1fdb3061336 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1237,65 +1237,6 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
 	return &p->mnt;
 }
 
-static inline void mangle(struct seq_file *m, const char *s)
-{
-	seq_escape(m, s, " \t\n\\");
-}
-
-/*
- * Simple .show_options callback for filesystems which don't want to
- * implement more complex mount option showing.
- *
- * See also save_mount_options().
- */
-int generic_show_options(struct seq_file *m, struct dentry *root)
-{
-	const char *options;
-
-	rcu_read_lock();
-	options = rcu_dereference(root->d_sb->s_options);
-
-	if (options != NULL && options[0]) {
-		seq_putc(m, ',');
-		mangle(m, options);
-	}
-	rcu_read_unlock();
-
-	return 0;
-}
-EXPORT_SYMBOL(generic_show_options);
-
-/*
- * If filesystem uses generic_show_options(), this function should be
- * called from the fill_super() callback.
- *
- * The .remount_fs callback usually needs to be handled in a special
- * way, to make sure, that previous options are not overwritten if the
- * remount fails.
- *
- * Also note, that if the filesystem's .remount_fs function doesn't
- * reset all options to their default value, but changes only newly
- * given options, then the displayed options will not reflect reality
- * any more.
- */
-void save_mount_options(struct super_block *sb, char *options)
-{
-	BUG_ON(sb->s_options);
-	rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
-}
-EXPORT_SYMBOL(save_mount_options);
-
-void replace_mount_options(struct super_block *sb, char *options)
-{
-	char *old = sb->s_options;
-	rcu_assign_pointer(sb->s_options, options);
-	if (old) {
-		synchronize_rcu();
-		kfree(old);
-	}
-}
-EXPORT_SYMBOL(replace_mount_options);
-
 #ifdef CONFIG_PROC_FS
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
diff --git a/fs/super.c b/fs/super.c
index dfb56a9665d85..6bc3352adcf39 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -168,7 +168,6 @@ static void destroy_super(struct super_block *s)
 	WARN_ON(!list_empty(&s->s_mounts));
 	put_user_ns(s->s_user_ns);
 	kfree(s->s_subtype);
-	kfree(s->s_options);
 	call_rcu(&s->rcu, destroy_super_rcu);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bc0c054894b9e..e265b2ea72c63 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1351,11 +1351,6 @@ struct super_block {
 	 */
 	char *s_subtype;
 
-	/*
-	 * Saved mount options for lazy filesystems using
-	 * generic_show_options()
-	 */
-	char __rcu *s_options;
 	const struct dentry_operations *s_d_op; /* default d_op for dentries */
 
 	/*
@@ -3033,10 +3028,6 @@ extern void setattr_copy(struct inode *inode, const struct iattr *attr);
 
 extern int file_update_time(struct file *file);
 
-extern int generic_show_options(struct seq_file *m, struct dentry *root);
-extern void save_mount_options(struct super_block *sb, char *options);
-extern void replace_mount_options(struct super_block *sb, char *options);
-
 static inline bool io_is_direct(struct file *filp)
 {
 	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
-- 
GitLab


From 92ecd19a7e1dc1c928707835c28cb65f0740be2d Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Sat, 8 Jul 2017 16:00:09 +0100
Subject: [PATCH 1153/1958] MIPS: MIPS16e2: Report ASE presence in
 /proc/cpuinfo

Only now that both feature determination and unaligned emulation is in
place add reporting to /proc/cpuinfo, so that the presence of "mips16e2"
there not only indicates our recognition of the hardware feature, but
correct unaligned emulation as well.

Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16757/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/proc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 4eff2aed73601..bbd83b810e519 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -109,6 +109,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 	seq_printf(m, "ASEs implemented\t:");
 	if (cpu_has_mips16)	seq_printf(m, "%s", " mips16");
+	if (cpu_has_mips16e2)	seq_printf(m, "%s", " mips16e2");
 	if (cpu_has_mdmx)	seq_printf(m, "%s", " mdmx");
 	if (cpu_has_mips3d)	seq_printf(m, "%s", " mips3d");
 	if (cpu_has_smartmips)	seq_printf(m, "%s", " smartmips");
-- 
GitLab


From 17753d16251837125014caa6b49406f52aef8916 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 21 Jun 2017 15:08:39 +0300
Subject: [PATCH 1154/1958] mmc: sdhci-acpi: Workaround conflict with PCI wifi
 on GPD Win handheld

GPDwin uses PCI wifi which conflicts with SDIO's use of
acpi_device_fix_up_power() on child device nodes. Specifically
acpi_device_fix_up_power() causes the wifi module to get turned off.
Identifying GPDwin is problematic, but since SDIO is only used for wifi,
the presence of the PCI wifi card in the expected slot with an ACPI
companion node, is used to indicate that acpi_device_fix_up_power() should
be avoided.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-acpi.c | 70 ++++++++++++++++++++++++++++++++---
 1 file changed, 64 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index cf66a3db71b81..ac678e9fb19ab 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -45,6 +45,7 @@
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
+#include <linux/pci.h>
 #endif
 
 #include "sdhci.h"
@@ -134,6 +135,16 @@ static bool sdhci_acpi_byt(void)
 	return x86_match_cpu(byt);
 }
 
+static bool sdhci_acpi_cht(void)
+{
+	static const struct x86_cpu_id cht[] = {
+		{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
+		{}
+	};
+
+	return x86_match_cpu(cht);
+}
+
 #define BYT_IOSF_SCCEP			0x63
 #define BYT_IOSF_OCP_NETCTRL0		0x1078
 #define BYT_IOSF_OCP_TIMEOUT_BASE	GENMASK(10, 8)
@@ -178,6 +189,45 @@ static bool sdhci_acpi_byt_defer(struct device *dev)
 	return false;
 }
 
+static bool sdhci_acpi_cht_pci_wifi(unsigned int vendor, unsigned int device,
+				    unsigned int slot, unsigned int parent_slot)
+{
+	struct pci_dev *dev, *parent, *from = NULL;
+
+	while (1) {
+		dev = pci_get_device(vendor, device, from);
+		pci_dev_put(from);
+		if (!dev)
+			break;
+		parent = pci_upstream_bridge(dev);
+		if (ACPI_COMPANION(&dev->dev) && PCI_SLOT(dev->devfn) == slot &&
+		    parent && PCI_SLOT(parent->devfn) == parent_slot &&
+		    !pci_upstream_bridge(parent)) {
+			pci_dev_put(dev);
+			return true;
+		}
+		from = dev;
+	}
+
+	return false;
+}
+
+/*
+ * GPDwin uses PCI wifi which conflicts with SDIO's use of
+ * acpi_device_fix_up_power() on child device nodes. Identifying GPDwin is
+ * problematic, but since SDIO is only used for wifi, the presence of the PCI
+ * wifi card in the expected slot with an ACPI companion node, is used to
+ * indicate that acpi_device_fix_up_power() should be avoided.
+ */
+static inline bool sdhci_acpi_no_fixup_child_power(const char *hid,
+						   const char *uid)
+{
+	return sdhci_acpi_cht() &&
+	       !strcmp(hid, "80860F14") &&
+	       !strcmp(uid, "2") &&
+	       sdhci_acpi_cht_pci_wifi(0x14e4, 0x43ec, 0, 28);
+}
+
 #else
 
 static inline void sdhci_acpi_byt_setting(struct device *dev)
@@ -189,6 +239,12 @@ static inline bool sdhci_acpi_byt_defer(struct device *dev)
 	return false;
 }
 
+static inline bool sdhci_acpi_no_fixup_child_power(const char *hid,
+						   const char *uid)
+{
+	return false;
+}
+
 #endif
 
 static int bxt_get_cd(struct mmc_host *mmc)
@@ -389,18 +445,20 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
 	if (acpi_bus_get_device(handle, &device))
 		return -ENODEV;
 
+	hid = acpi_device_hid(device);
+	uid = device->pnp.unique_id;
+
 	/* Power on the SDHCI controller and its children */
 	acpi_device_fix_up_power(device);
-	list_for_each_entry(child, &device->children, node)
-		if (child->status.present && child->status.enabled)
-			acpi_device_fix_up_power(child);
+	if (!sdhci_acpi_no_fixup_child_power(hid, uid)) {
+		list_for_each_entry(child, &device->children, node)
+			if (child->status.present && child->status.enabled)
+				acpi_device_fix_up_power(child);
+	}
 
 	if (sdhci_acpi_byt_defer(dev))
 		return -EPROBE_DEFER;
 
-	hid = acpi_device_hid(device);
-	uid = device->pnp.unique_id;
-
 	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!iomem)
 		return -ENOMEM;
-- 
GitLab


From 65ae8d2621a9fb20b53348311984ea79fee42a55 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@imgtec.com>
Date: Tue, 23 May 2017 13:40:23 +0100
Subject: [PATCH 1155/1958] MIPS16e2: Provide feature overrides for non-MIPS16
 systems

Hardcode the absence of the MIPS16e2 ASE for all the systems that do so
for the MIPS16 ASE already, providing for code to be optimized away.

Signed-off-by: Maciej W. Rozycki <macro@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16097/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h      | 1 +
 arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h     | 1 +
 arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h    | 1 +
 arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h     | 1 +
 arch/mips/include/asm/mach-dec/cpu-feature-overrides.h        | 1 +
 arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h       | 1 +
 arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h       | 1 +
 arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h       | 1 +
 arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h       | 1 +
 arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h     | 1 +
 arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h | 1 +
 arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h   | 1 +
 arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h    | 1 +
 arch/mips/include/asm/mach-rm/cpu-feature-overrides.h         | 1 +
 arch/mips/include/asm/mach-sibyte/cpu-feature-overrides.h     | 1 +
 arch/mips/include/asm/mach-tx49xx/cpu-feature-overrides.h     | 1 +
 16 files changed, 16 insertions(+)

diff --git a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h
index ade0356df2570..e6a8108cde4e5 100644
--- a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h
@@ -40,6 +40,7 @@
 #endif
 
 #define cpu_has_mips16			0
+#define cpu_has_mips16e2		0
 #define cpu_has_mdmx			0
 #define cpu_has_mips3d			0
 #define cpu_has_smartmips		0
diff --git a/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h b/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h
index c5b6eef0efa7b..bace5b9ae4df2 100644
--- a/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-au1x00/cpu-feature-overrides.h
@@ -31,6 +31,7 @@
 #define cpu_has_ejtag			1
 #define cpu_has_llsc			1
 #define cpu_has_mips16			0
+#define cpu_has_mips16e2		0
 #define cpu_has_mdmx			0
 #define cpu_has_mips3d			0
 #define cpu_has_smartmips		0
diff --git a/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h
index bc1167dbd4e39..b56cf10b91d33 100644
--- a/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h
@@ -19,6 +19,7 @@
 #define cpu_has_ejtag			1
 #define cpu_has_llsc			1
 #define cpu_has_mips16			0
+#define cpu_has_mips16e2		0
 #define cpu_has_mdmx			0
 #define cpu_has_mips3d			0
 #define cpu_has_smartmips		0
diff --git a/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h
index 30c5cd9fd9733..291fe90aafa5d 100644
--- a/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cobalt/cpu-feature-overrides.h
@@ -37,6 +37,7 @@
 #endif
 
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_mdmx		0
 #define cpu_has_mips3d		0
 #define cpu_has_smartmips	0
diff --git a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h
index 21eae03d752aa..2ec10237688c6 100644
--- a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h
@@ -27,6 +27,7 @@
 #define cpu_has_mcheck			0
 #define cpu_has_ejtag			0
 #define cpu_has_mips16			0
+#define cpu_has_mips16e2		0
 #define cpu_has_mdmx			0
 #define cpu_has_mips3d			0
 #define cpu_has_smartmips		0
diff --git a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
index 9b19b72dba56f..b80d5eafc9dbc 100644
--- a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h
@@ -19,6 +19,7 @@
 #define cpu_has_32fpr		1
 #define cpu_has_counter		1
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_divec		0
 #define cpu_has_cache_cdex_p	1
 #define cpu_has_prefetch	0
diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
index 7449794eade6c..136d6d464e320 100644
--- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
@@ -43,6 +43,7 @@
 #define cpu_has_ejtag			0
 #define cpu_has_llsc			1
 #define cpu_has_mips16			0
+#define cpu_has_mips16e2		0
 #define cpu_has_mdmx			0
 #define cpu_has_mips3d			0
 #define cpu_has_smartmips		0
diff --git a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
index 4cec06d133db2..ba8b4e30b3e27 100644
--- a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h
@@ -16,6 +16,7 @@
  */
 #define cpu_has_watch		1
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_divec		0
 #define cpu_has_vce		0
 #define cpu_has_cache_cdex_p	0
diff --git a/arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h
index 241409b78ff1c..63b4c889094b1 100644
--- a/arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-ip32/cpu-feature-overrides.h
@@ -29,6 +29,7 @@
 #define cpu_has_32fpr		1
 #define cpu_has_counter		1
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_vce		0
 #define cpu_has_cache_cdex_s	0
 #define cpu_has_mcheck		0
diff --git a/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h b/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h
index 0933f94a1e697..7c5e576f9d964 100644
--- a/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h
@@ -23,6 +23,7 @@
 #define cpu_has_ejtag 1
 #define cpu_has_llsc		1
 #define cpu_has_mips16 0
+#define cpu_has_mips16e2	0
 #define cpu_has_mdmx 0
 #define cpu_has_mips3d 0
 #define cpu_has_smartmips 0
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 89328a3d44d85..581915ce231c7 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -32,6 +32,7 @@
 #define cpu_has_mcheck		0
 #define cpu_has_mdmx		0
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_mips3d		0
 #define cpu_has_mipsmt		0
 #define cpu_has_smartmips	0
diff --git a/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
index 091deb1700e5e..0c29ff820bb9a 100644
--- a/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
@@ -13,6 +13,7 @@
 #define cpu_has_4k_cache	1
 #define cpu_has_watch		1
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_counter		1
 #define cpu_has_divec		1
 #define cpu_has_vce		0
diff --git a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h
index b15307597ee39..6a1087ee8c6ed 100644
--- a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h
@@ -48,6 +48,7 @@
 #define cpu_has_llsc			1
 
 #define cpu_has_mips16			0
+#define cpu_has_mips16e2		0
 #define cpu_has_mdmx			0
 #define cpu_has_mips3d			0
 #define cpu_has_smartmips		0
diff --git a/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
index d38be668e3381..e1e182300feaa 100644
--- a/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-rm/cpu-feature-overrides.h
@@ -17,6 +17,7 @@
 #define cpu_has_counter		1
 #define cpu_has_watch		0
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_divec		0
 #define cpu_has_cache_cdex_p	1
 #define cpu_has_prefetch	0
diff --git a/arch/mips/include/asm/mach-sibyte/cpu-feature-overrides.h b/arch/mips/include/asm/mach-sibyte/cpu-feature-overrides.h
index 92927b62b5a07..7022358057fd8 100644
--- a/arch/mips/include/asm/mach-sibyte/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-sibyte/cpu-feature-overrides.h
@@ -13,6 +13,7 @@
  */
 #define cpu_has_watch		1
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_divec		1
 #define cpu_has_vce		0
 #define cpu_has_cache_cdex_p	0
diff --git a/arch/mips/include/asm/mach-tx49xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-tx49xx/cpu-feature-overrides.h
index 7f5144c6ce2da..b9d39dc45420b 100644
--- a/arch/mips/include/asm/mach-tx49xx/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-tx49xx/cpu-feature-overrides.h
@@ -6,6 +6,7 @@
 #define cpu_has_inclusive_pcaches	0
 
 #define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
 #define cpu_has_mdmx		0
 #define cpu_has_mips3d		0
 #define cpu_has_smartmips	0
-- 
GitLab


From 54eca7eccc5e149825f831859123b692a565bca9 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 29 Jun 2017 10:12:33 +0100
Subject: [PATCH 1156/1958] MIPS: Drop duplicate HAVE_SYSCALL_TRACEPOINTS
 select

MIPS selects HAVE_SYSCALL_TRACEPOINTS twice. The first was added back in
v3.13 by commit 2d7bf993e073 ("MIPS: ftrace: Add support for syscall
tracepoints."), but then a second redundant one was added in v4.2 by
commit fb59e394c30c ("MIPS: ftrace: Enable support for syscall
tracepoints.").

Drop the duplicate select.

Fixes: fb59e394c30c ("MIPS: ftrace: Enable support for syscall tracepoints.")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16654/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 609986e8b21e0..55e48a3e7f058 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -64,7 +64,6 @@ config MIPS
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA if MODULES && 64BIT
-- 
GitLab


From 4f32a39d49b25eaa66d2420f1f03d371ea4cd906 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 29 Jun 2017 10:12:34 +0100
Subject: [PATCH 1157/1958] MIPS: Negate error syscall return in trace

The sys_exit trace event takes a single return value for the system
call, which MIPS passes the value of the $v0 (result) register, however
MIPS returns positive error codes in $v0 with $a3 specifying that $v0
contains an error code. As a result erroring system calls are traced
returning positive error numbers that can't always be distinguished from
success.

Use regs_return_value() to negate the error code if $a3 is set.

Fixes: 1d7bf993e073 ("MIPS: ftrace: Add support for syscall tracepoints.")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 3.13+
Patchwork: https://patchwork.linux-mips.org/patch/16651/
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index ba3b1f771256f..8e2ea86dc23e8 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -913,7 +913,7 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_sys_exit(regs, regs->regs[2]);
+		trace_sys_exit(regs, regs_return_value(regs));
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
-- 
GitLab


From becddba9f80f26a2b9ebe9bad2806304ed5e00e1 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 29 Jun 2017 10:12:35 +0100
Subject: [PATCH 1158/1958] MIPS: Correct forced syscall errors

When the system call return value is forced to be an error (for example
due to SECCOMP_RET_ERRNO), syscall_set_return_value() puts the error
code in the return register $v0 and -1 in the error register $a3.

However normally executed system calls put 1 in the error register
rather than -1, so fix syscall_set_return_value() to be consistent with
that.

I don't anticipate that anything would have been broken by this, since
the most natural way to check the error register on MIPS would be a
conditional branch if error register is [not] equal to zero (bnez or
beqz).

Fixes: 1d7bf993e073 ("MIPS: ftrace: Add support for syscall tracepoints.")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16652/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/syscall.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index d87882513ee34..7c713025b23f6 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -85,7 +85,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
 {
 	if (error) {
 		regs->regs[2] = -error;
-		regs->regs[7] = -1;
+		regs->regs[7] = 1;
 	} else {
 		regs->regs[2] = val;
 		regs->regs[7] = 0;
-- 
GitLab


From 828db212bf4b63c68c51f6519435c48e8d79bd00 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 29 Jun 2017 10:12:36 +0100
Subject: [PATCH 1159/1958] MIPS: Traced negative syscalls should return
 -ENOSYS

If a negative system call number is used when system call tracing is
enabled, syscall_trace_enter() will return that negative system call
number without having written the return value and error flag into the
pt_regs.

The caller then treats it as a cancelled system call and assumes that
the return value and error flag are already written, leaving the
negative system call number in the return register ($v0), and the 4th
system call argument in the error register ($a3).

Add a special case to detect this at the end of syscall_trace_enter(),
to set the return value to error -ENOSYS when this happens.

Fixes: d218af78492a ("MIPS: scall: Always run the seccomp syscall filters")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16653/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/ptrace.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 8e2ea86dc23e8..6dd13641a4188 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -894,6 +894,13 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
 
 	audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
 			    regs->regs[6], regs->regs[7]);
+
+	/*
+	 * Negative syscall numbers are mistaken for rejected syscalls, but
+	 * won't have had the return value set appropriately, so we do so now.
+	 */
+	if (syscall < 0)
+		syscall_set_return_value(current, regs, -ENOSYS, 0);
 	return syscall;
 }
 
-- 
GitLab


From 7461279bba4a62d192eda6dd10de68ac0543bfcd Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Sat, 17 Jun 2017 13:52:46 -0700
Subject: [PATCH 1160/1958] dt-bindings: Document img,boston-clock binding

Add device tree binding documentation for the clocks provided by the
MIPS Boston development board from Imagination Technologies, and a
header file describing the available clocks for use by device trees &
driver.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: devicetree@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16482/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 .../bindings/clock/img,boston-clock.txt       | 31 +++++++++++++++++++
 MAINTAINERS                                   |  7 +++++
 include/dt-bindings/clock/boston-clock.h      | 14 +++++++++
 3 files changed, 52 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/img,boston-clock.txt
 create mode 100644 include/dt-bindings/clock/boston-clock.h

diff --git a/Documentation/devicetree/bindings/clock/img,boston-clock.txt b/Documentation/devicetree/bindings/clock/img,boston-clock.txt
new file mode 100644
index 0000000000000..7bc5e9ffb6244
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/img,boston-clock.txt
@@ -0,0 +1,31 @@
+Binding for Imagination Technologies MIPS Boston clock sources.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+The device node must be a child node of the syscon node corresponding to the
+Boston system's platform registers.
+
+Required properties:
+- compatible : Should be "img,boston-clock".
+- #clock-cells : Should be set to 1.
+  Values available for clock consumers can be found in the header file:
+    <dt-bindings/clock/boston-clock.h>
+
+Example:
+
+	system-controller@17ffd000 {
+		compatible = "img,boston-platform-regs", "syscon";
+		reg = <0x17ffd000 0x1000>;
+
+		clk_boston: clock {
+			compatible = "img,boston-clock";
+			#clock-cells = <1>;
+		};
+	};
+
+	uart0: uart@17ffe000 {
+		/* ... */
+		clocks = <&clk_boston BOSTON_CLK_SYS>;
+	};
diff --git a/MAINTAINERS b/MAINTAINERS
index bcf2d258c0dd6..0b977a6bf8492 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8504,6 +8504,13 @@ F:	arch/mips/include/asm/mach-loongson32/
 F:	drivers/*/*loongson1*
 F:	drivers/*/*/*loongson1*
 
+MIPS BOSTON DEVELOPMENT BOARD
+M:	Paul Burton <paul.burton@imgtec.com>
+L:	linux-mips@linux-mips.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/clock/img,boston-clock.txt
+F:	include/dt-bindings/clock/boston-clock.h
+
 MIROSOUND PCM20 FM RADIO RECEIVER DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
diff --git a/include/dt-bindings/clock/boston-clock.h b/include/dt-bindings/clock/boston-clock.h
new file mode 100644
index 0000000000000..a6f0098211378
--- /dev/null
+++ b/include/dt-bindings/clock/boston-clock.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ *
+ * SPDX-License-Identifier:	GPL-2.0
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_BOSTON_CLOCK_H__
+#define __DT_BINDINGS_CLOCK_BOSTON_CLOCK_H__
+
+#define BOSTON_CLK_INPUT 0
+#define BOSTON_CLK_SYS 1
+#define BOSTON_CLK_CPU 2
+
+#endif /* __DT_BINDINGS_CLOCK_BOSTON_CLOCK_H__ */
-- 
GitLab


From 6b0fd6c1a22da18a00f8ce12014d55ce0a316651 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Sat, 17 Jun 2017 13:52:47 -0700
Subject: [PATCH 1161/1958] clk: boston: Add a driver for MIPS Boston board
 clocks

Add a driver for the clocks provided by the MIPS Boston board from
Imagination Technologies. 2 clocks are provided - the system clock & the
CPU clock - and each is a simple fixed rate clock whose frequency can be
determined by reading a register provided by the board.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: linux-clk@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16483/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 MAINTAINERS                     |   1 +
 drivers/clk/Kconfig             |   1 +
 drivers/clk/Makefile            |   1 +
 drivers/clk/imgtec/Kconfig      |   9 +++
 drivers/clk/imgtec/Makefile     |   1 +
 drivers/clk/imgtec/clk-boston.c | 103 ++++++++++++++++++++++++++++++++
 6 files changed, 116 insertions(+)
 create mode 100644 drivers/clk/imgtec/Kconfig
 create mode 100644 drivers/clk/imgtec/Makefile
 create mode 100644 drivers/clk/imgtec/clk-boston.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 0b977a6bf8492..5513b85dd60ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8509,6 +8509,7 @@ M:	Paul Burton <paul.burton@imgtec.com>
 L:	linux-mips@linux-mips.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/clock/img,boston-clock.txt
+F:	drivers/clk/imgtec/clk-boston.c
 F:	include/dt-bindings/clock/boston-clock.h
 
 MIROSOUND PCM20 FM RADIO RECEIVER DRIVER
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 36cfea38135f6..251a22139e732 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -219,6 +219,7 @@ config COMMON_CLK_VC5
 
 source "drivers/clk/bcm/Kconfig"
 source "drivers/clk/hisilicon/Kconfig"
+source "drivers/clk/imgtec/Kconfig"
 source "drivers/clk/mediatek/Kconfig"
 source "drivers/clk/meson/Kconfig"
 source "drivers/clk/mvebu/Kconfig"
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index c19983afcb81c..a4a7c5df8b93f 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -59,6 +59,7 @@ obj-y					+= bcm/
 obj-$(CONFIG_ARCH_BERLIN)		+= berlin/
 obj-$(CONFIG_H8300)			+= h8300/
 obj-$(CONFIG_ARCH_HISI)			+= hisilicon/
+obj-y					+= imgtec/
 obj-$(CONFIG_ARCH_MXC)			+= imx/
 obj-$(CONFIG_MACH_INGENIC)		+= ingenic/
 obj-$(CONFIG_COMMON_CLK_KEYSTONE)	+= keystone/
diff --git a/drivers/clk/imgtec/Kconfig b/drivers/clk/imgtec/Kconfig
new file mode 100644
index 0000000000000..f6dcb748e9c4f
--- /dev/null
+++ b/drivers/clk/imgtec/Kconfig
@@ -0,0 +1,9 @@
+config COMMON_CLK_BOSTON
+	bool "Clock driver for MIPS Boston boards"
+	depends on MIPS || COMPILE_TEST
+	select MFD_SYSCON
+	---help---
+	  Enable this to support the system & CPU clocks on the MIPS Boston
+	  development board from Imagination Technologies. These are simple
+	  fixed rate clocks whose rate is determined by reading a platform
+	  provided register.
diff --git a/drivers/clk/imgtec/Makefile b/drivers/clk/imgtec/Makefile
new file mode 100644
index 0000000000000..ac779b8c22f26
--- /dev/null
+++ b/drivers/clk/imgtec/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_COMMON_CLK_BOSTON)		+= clk-boston.o
diff --git a/drivers/clk/imgtec/clk-boston.c b/drivers/clk/imgtec/clk-boston.c
new file mode 100644
index 0000000000000..f18f103517850
--- /dev/null
+++ b/drivers/clk/imgtec/clk-boston.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016-2017 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#define pr_fmt(fmt) "clk-boston: " fmt
+
+#include <linux/clk-provider.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/mfd/syscon.h>
+
+#include <dt-bindings/clock/boston-clock.h>
+
+#define BOSTON_PLAT_MMCMDIV		0x30
+# define BOSTON_PLAT_MMCMDIV_CLK0DIV	(0xff << 0)
+# define BOSTON_PLAT_MMCMDIV_INPUT	(0xff << 8)
+# define BOSTON_PLAT_MMCMDIV_MUL	(0xff << 16)
+# define BOSTON_PLAT_MMCMDIV_CLK1DIV	(0xff << 24)
+
+#define BOSTON_CLK_COUNT 3
+
+static u32 ext_field(u32 val, u32 mask)
+{
+	return (val & mask) >> (ffs(mask) - 1);
+}
+
+static void __init clk_boston_setup(struct device_node *np)
+{
+	unsigned long in_freq, cpu_freq, sys_freq;
+	uint mmcmdiv, mul, cpu_div, sys_div;
+	struct clk_hw_onecell_data *onecell;
+	struct regmap *regmap;
+	struct clk_hw *hw;
+	int err;
+
+	regmap = syscon_node_to_regmap(np->parent);
+	if (IS_ERR(regmap)) {
+		pr_err("failed to find regmap\n");
+		return;
+	}
+
+	err = regmap_read(regmap, BOSTON_PLAT_MMCMDIV, &mmcmdiv);
+	if (err) {
+		pr_err("failed to read mmcm_div register: %d\n", err);
+		return;
+	}
+
+	in_freq = ext_field(mmcmdiv, BOSTON_PLAT_MMCMDIV_INPUT) * 1000000;
+	mul = ext_field(mmcmdiv, BOSTON_PLAT_MMCMDIV_MUL);
+
+	sys_div = ext_field(mmcmdiv, BOSTON_PLAT_MMCMDIV_CLK0DIV);
+	sys_freq = mult_frac(in_freq, mul, sys_div);
+
+	cpu_div = ext_field(mmcmdiv, BOSTON_PLAT_MMCMDIV_CLK1DIV);
+	cpu_freq = mult_frac(in_freq, mul, cpu_div);
+
+	onecell = kzalloc(sizeof(*onecell) +
+			  (BOSTON_CLK_COUNT * sizeof(struct clk_hw *)),
+			  GFP_KERNEL);
+	if (!onecell)
+		return;
+
+	onecell->num = BOSTON_CLK_COUNT;
+
+	hw = clk_hw_register_fixed_rate(NULL, "input", NULL, 0, in_freq);
+	if (IS_ERR(hw)) {
+		pr_err("failed to register input clock: %ld\n", PTR_ERR(hw));
+		return;
+	}
+	onecell->hws[BOSTON_CLK_INPUT] = hw;
+
+	hw = clk_hw_register_fixed_rate(NULL, "sys", "input", 0, sys_freq);
+	if (IS_ERR(hw)) {
+		pr_err("failed to register sys clock: %ld\n", PTR_ERR(hw));
+		return;
+	}
+	onecell->hws[BOSTON_CLK_SYS] = hw;
+
+	hw = clk_hw_register_fixed_rate(NULL, "cpu", "input", 0, cpu_freq);
+	if (IS_ERR(hw)) {
+		pr_err("failed to register cpu clock: %ld\n", PTR_ERR(hw));
+		return;
+	}
+	onecell->hws[BOSTON_CLK_CPU] = hw;
+
+	err = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, onecell);
+	if (err)
+		pr_err("failed to add DT provider: %d\n", err);
+}
+
+/*
+ * Use CLK_OF_DECLARE so that this driver is probed early enough to provide the
+ * CPU frequency for use with the GIC or cop0 counters/timers.
+ */
+CLK_OF_DECLARE(clk_boston, "img,boston-clock", clk_boston_setup);
-- 
GitLab


From 4d2804b7d76ef6e6cd85be74999548276d23067f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Sat, 17 Jun 2017 13:52:48 -0700
Subject: [PATCH 1162/1958] MIPS: DTS: img: Don't attempt to build-in all .dtb
 files

When building a FIT image we may want the kernel to build multiple .dtb
files, but we don't want to build them all into the kernel binary as
object files since they'll instead be included in the FIT image.

Commit daa10170da27 ("MIPS: DTS: img: add device tree for Marduk board")
however created arch/mips/boot/dts/img/Makefile with a line that builds
any enabled .dtb files into the kernel. Remove this & build the
pistachio object specifically, in preparation for adding .dtb targets
which we don't want to build into the kernel.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Rahul Bedarkar <rahulbedarkar89@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16484/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/boot/dts/img/Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile
index 69a65f0f82d28..c178cf56f5b8b 100644
--- a/arch/mips/boot/dts/img/Makefile
+++ b/arch/mips/boot/dts/img/Makefile
@@ -1,6 +1,5 @@
 dtb-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb
-
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb.o
 
 # Force kbuild to make empty built-in.o if necessary
 obj-				+= dummy.o
-- 
GitLab


From 6e62a888029b4adbbc55894eaa8b929221113948 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Sat, 17 Jun 2017 13:52:49 -0700
Subject: [PATCH 1163/1958] MIPS: generic: Support MIPS Boston development
 boards

Add support for the MIPS Boston development board to generic kernels,
which essentially amounts to:

  - Adding the device tree source for the MIPS Boston board.

  - Adding a Kconfig fragment which enables the appropriate drivers for
    the MIPS Boston board.

With these changes in place generic kernels will support the board by
default, and kernels with only the drivers needed for Boston enabled can
be configured by setting BOARDS=boston during configuration. For
example:

  $ make ARCH=mips 64r6el_defconfig BOARDS=boston

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16485/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 MAINTAINERS                                   |   2 +
 arch/mips/boot/dts/img/Makefile               |   2 +
 arch/mips/boot/dts/img/boston.dts             | 224 ++++++++++++++++++
 arch/mips/configs/generic/board-boston.config |  48 ++++
 arch/mips/generic/Kconfig                     |  12 +
 arch/mips/generic/vmlinux.its.S               |  25 ++
 6 files changed, 313 insertions(+)
 create mode 100644 arch/mips/boot/dts/img/boston.dts
 create mode 100644 arch/mips/configs/generic/board-boston.config

diff --git a/MAINTAINERS b/MAINTAINERS
index 5513b85dd60ea..d034eb8031fa9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8509,6 +8509,8 @@ M:	Paul Burton <paul.burton@imgtec.com>
 L:	linux-mips@linux-mips.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/clock/img,boston-clock.txt
+F:	arch/mips/boot/dts/img/boston.dts
+F:	arch/mips/configs/generic/board-boston.config
 F:	drivers/clk/imgtec/clk-boston.c
 F:	include/dt-bindings/clock/boston-clock.h
 
diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile
index c178cf56f5b8b..3d70958d0f5a2 100644
--- a/arch/mips/boot/dts/img/Makefile
+++ b/arch/mips/boot/dts/img/Makefile
@@ -1,3 +1,5 @@
+dtb-$(CONFIG_FIT_IMAGE_FDT_BOSTON)	+= boston.dtb
+
 dtb-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb
 obj-$(CONFIG_MACH_PISTACHIO)	+= pistachio_marduk.dtb.o
 
diff --git a/arch/mips/boot/dts/img/boston.dts b/arch/mips/boot/dts/img/boston.dts
new file mode 100644
index 0000000000000..53bfa29a7093f
--- /dev/null
+++ b/arch/mips/boot/dts/img/boston.dts
@@ -0,0 +1,224 @@
+/dts-v1/;
+
+#include <dt-bindings/clock/boston-clock.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/mips-gic.h>
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "img,boston";
+
+	chosen {
+		stdout-path = "uart0:115200";
+	};
+
+	aliases {
+		uart0 = &uart0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "img,mips";
+			reg = <0>;
+			clocks = <&clk_boston BOSTON_CLK_CPU>;
+		};
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	pci0: pci@10000000 {
+		compatible = "xlnx,axi-pcie-host-1.00.a";
+		device_type = "pci";
+		reg = <0x10000000 0x2000000>;
+
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+
+		interrupt-parent = <&gic>;
+		interrupts = <GIC_SHARED 2 IRQ_TYPE_LEVEL_HIGH>;
+
+		ranges = <0x02000000 0 0x40000000
+			  0x40000000 0 0x40000000>;
+
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &pci0_intc 1>,
+				<0 0 0 2 &pci0_intc 2>,
+				<0 0 0 3 &pci0_intc 3>,
+				<0 0 0 4 &pci0_intc 4>;
+
+		pci0_intc: interrupt-controller {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <1>;
+		};
+	};
+
+	pci1: pci@12000000 {
+		compatible = "xlnx,axi-pcie-host-1.00.a";
+		device_type = "pci";
+		reg = <0x12000000 0x2000000>;
+
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+
+		interrupt-parent = <&gic>;
+		interrupts = <GIC_SHARED 1 IRQ_TYPE_LEVEL_HIGH>;
+
+		ranges = <0x02000000 0 0x20000000
+			  0x20000000 0 0x20000000>;
+
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &pci1_intc 1>,
+				<0 0 0 2 &pci1_intc 2>,
+				<0 0 0 3 &pci1_intc 3>,
+				<0 0 0 4 &pci1_intc 4>;
+
+		pci1_intc: interrupt-controller {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <1>;
+		};
+	};
+
+	pci2: pci@14000000 {
+		compatible = "xlnx,axi-pcie-host-1.00.a";
+		device_type = "pci";
+		reg = <0x14000000 0x2000000>;
+
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+
+		interrupt-parent = <&gic>;
+		interrupts = <GIC_SHARED 0 IRQ_TYPE_LEVEL_HIGH>;
+
+		ranges = <0x02000000 0 0x16000000
+			  0x16000000 0 0x100000>;
+
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &pci2_intc 1>,
+				<0 0 0 2 &pci2_intc 2>,
+				<0 0 0 3 &pci2_intc 3>,
+				<0 0 0 4 &pci2_intc 4>;
+
+		pci2_intc: interrupt-controller {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <1>;
+		};
+
+		pci2_root@0,0,0 {
+			compatible = "pci10ee,7021";
+			reg = <0x00000000 0 0 0 0>;
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+
+			eg20t_bridge@1,0,0 {
+				compatible = "pci8086,8800";
+				reg = <0x00010000 0 0 0 0>;
+
+				#address-cells = <3>;
+				#size-cells = <2>;
+				#interrupt-cells = <1>;
+
+				eg20t_mac@2,0,1 {
+					compatible = "pci8086,8802";
+					reg = <0x00020100 0 0 0 0>;
+					phy-reset-gpios = <&eg20t_gpio 6
+							   GPIO_ACTIVE_LOW>;
+				};
+
+				eg20t_gpio: eg20t_gpio@2,0,2 {
+					compatible = "pci8086,8803";
+					reg = <0x00020200 0 0 0 0>;
+
+					gpio-controller;
+					#gpio-cells = <2>;
+				};
+
+				eg20t_i2c@2,12,2 {
+					compatible = "pci8086,8817";
+					reg = <0x00026200 0 0 0 0>;
+
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					rtc@0x68 {
+						compatible = "st,m41t81s";
+						reg = <0x68>;
+					};
+				};
+			};
+		};
+	};
+
+	gic: interrupt-controller@16120000 {
+		compatible = "mti,gic";
+		reg = <0x16120000 0x20000>;
+
+		interrupt-controller;
+		#interrupt-cells = <3>;
+
+		timer {
+			compatible = "mti,gic-timer";
+			interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>;
+			clocks = <&clk_boston BOSTON_CLK_CPU>;
+		};
+	};
+
+	cdmm@16140000 {
+		compatible = "mti,mips-cdmm";
+		reg = <0x16140000 0x8000>;
+	};
+
+	cpc@16200000 {
+		compatible = "mti,mips-cpc";
+		reg = <0x16200000 0x8000>;
+	};
+
+	plat_regs: system-controller@17ffd000 {
+		compatible = "img,boston-platform-regs", "syscon";
+		reg = <0x17ffd000 0x1000>;
+
+		clk_boston: clock {
+			compatible = "img,boston-clock";
+			#clock-cells = <1>;
+		};
+	};
+
+	reboot: syscon-reboot {
+		compatible = "syscon-reboot";
+		regmap = <&plat_regs>;
+		offset = <0x10>;
+		mask = <0x10>;
+	};
+
+	uart0: uart@17ffe000 {
+		compatible = "ns16550a";
+		reg = <0x17ffe000 0x1000>;
+		reg-shift = <2>;
+
+		interrupt-parent = <&gic>;
+		interrupts = <GIC_SHARED 3 IRQ_TYPE_LEVEL_HIGH>;
+
+		clocks = <&clk_boston BOSTON_CLK_SYS>;
+	};
+
+	lcd: lcd@17fff000 {
+		compatible = "img,boston-lcd";
+		reg = <0x17fff000 0x8>;
+	};
+};
diff --git a/arch/mips/configs/generic/board-boston.config b/arch/mips/configs/generic/board-boston.config
new file mode 100644
index 0000000000000..19560a45b683a
--- /dev/null
+++ b/arch/mips/configs/generic/board-boston.config
@@ -0,0 +1,48 @@
+CONFIG_FIT_IMAGE_FDT_BOSTON=y
+
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+
+CONFIG_AUXDISPLAY=y
+CONFIG_IMG_ASCII_LCD=y
+
+CONFIG_COMMON_CLK_BOSTON=y
+
+CONFIG_DMADEVICES=y
+CONFIG_PCH_DMA=y
+
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_PCH=y
+
+CONFIG_I2C=y
+CONFIG_I2C_EG20T=y
+
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PCI=y
+
+CONFIG_NETDEVICES=y
+CONFIG_PCH_GBE=y
+
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_PCIE_XILINX=y
+
+CONFIG_PCH_PHUB=y
+
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+
+CONFIG_SPI=y
+CONFIG_SPI_TOPCLIFF_PCH=y
+
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index 446b7c68133d7..51ffbbaddee2e 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -16,6 +16,8 @@ config YAMON_DT_SHIM
 	  and you wish to include code which helps translate various
 	  YAMON-provided environment variables into a device tree properties.
 
+comment "Legacy (non-UHI/non-FIT) Boards"
+
 config LEGACY_BOARD_SEAD3
 	bool "Support MIPS SEAD-3 boards"
 	select LEGACY_BOARDS
@@ -24,4 +26,14 @@ config LEGACY_BOARD_SEAD3
 	  Enable this to include support for booting on MIPS SEAD-3 FPGA-based
 	  development boards, which boot using a legacy boot protocol.
 
+comment "FIT/UHI Boards"
+
+config FIT_IMAGE_FDT_BOSTON
+	bool "Include FDT for MIPS Boston boards"
+	help
+	  Enable this to include the FDT for the MIPS Boston development board
+	  from Imagination Technologies in the FIT kernel image. You should
+	  enable this if you wish to boot on a MIPS Boston board, as it is
+	  expected by the bootloader.
+
 endif
diff --git a/arch/mips/generic/vmlinux.its.S b/arch/mips/generic/vmlinux.its.S
index f67fbf1c85417..3390e2f80b801 100644
--- a/arch/mips/generic/vmlinux.its.S
+++ b/arch/mips/generic/vmlinux.its.S
@@ -29,3 +29,28 @@
 		};
 	};
 };
+
+#ifdef CONFIG_FIT_IMAGE_FDT_BOSTON
+/ {
+	images {
+		fdt@boston {
+			description = "img,boston Device Tree";
+			data = /incbin/("boot/dts/img/boston.dtb");
+			type = "flat_dt";
+			arch = "mips";
+			compression = "none";
+			hash@0 {
+				algo = "sha1";
+			};
+		};
+	};
+
+	configurations {
+		conf@boston {
+			description = "Boston Linux kernel";
+			kernel = "kernel@0";
+			fdt = "fdt@boston";
+		};
+	};
+};
+#endif /* CONFIG_FIT_IMAGE_FDT_BOSTON */
-- 
GitLab


From 5fdc66e046206306bf61ff2d626bfa52ca087f7b Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Mon, 10 Jul 2017 09:43:31 +0100
Subject: [PATCH 1164/1958] MIPS: Fix minimum alignment requirement of IRQ
 stack

Commit db8466c581cc ("MIPS: IRQ Stack: Unwind IRQ stack onto task
stack") erroneously set the initial stack pointer of the IRQ stack to a
value with a 4 byte alignment. The MIPS32 ABI requires that the minimum
stack alignment is 8 byte, and the MIPS64 ABIs(n32/n64) require 16 byte
minimum alignment. Fix IRQ_STACK_START such that it leaves space for the
dummy stack frame (containing interrupted task kernel stack pointer)
while also meeting minimum alignment requirements.

Fixes: db8466c581cc ("MIPS: IRQ Stack: Unwind IRQ stack onto task stack")
Reported-by: Darius Ivanauskas <dasilt@yahoo.com>
Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16760/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index ddd1c918103bc..c5d3517864162 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -18,7 +18,7 @@
 #include <irq.h>
 
 #define IRQ_STACK_SIZE			THREAD_SIZE
-#define IRQ_STACK_START			(IRQ_STACK_SIZE - sizeof(unsigned long))
+#define IRQ_STACK_START			(IRQ_STACK_SIZE - 16)
 
 extern void *irq_stack[NR_CPUS];
 
-- 
GitLab


From e5f5a5b06e51a36f6ddf31a4a485358263953a3d Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Sat, 8 Jul 2017 23:24:44 +0100
Subject: [PATCH 1165/1958] MIPS: Fix MIPS I ISA /proc/cpuinfo reporting

Correct a commit 515a6393dbac ("MIPS: kernel: proc: Add MIPS R6 support
to /proc/cpuinfo") regression that caused MIPS I systems to show no ISA
levels supported in /proc/cpuinfo, e.g.:

system type		: Digital DECstation 2100/3100
machine			: Unknown
processor		: 0
cpu model		: R3000 V2.0  FPU V2.0
BogoMIPS		: 10.69
wait instruction	: no
microsecond timers	: no
tlb_entries		: 64
extra interrupt vector	: no
hardware watchpoint	: no
isa			:
ASEs implemented	:
shadow register sets	: 1
kscratch registers	: 0
package			: 0
core			: 0
VCED exceptions		: not available
VCEI exceptions		: not available

and similarly exclude `mips1' from the ISA list for any processors below
MIPSr1.  This is because the condition to show `mips1' on has been made
`cpu_has_mips_r1' rather than newly-introduced `cpu_has_mips_1'.  Use
the correct condition then.

Fixes: 515a6393dbac ("MIPS: kernel: proc: Add MIPS R6 support to /proc/cpuinfo")
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 3.19+
Patchwork: https://patchwork.linux-mips.org/patch/16758/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index bbd83b810e519..70604c753aa4e 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -83,7 +83,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	}
 
 	seq_printf(m, "isa\t\t\t:"); 
-	if (cpu_has_mips_r1)
+	if (cpu_has_mips_1)
 		seq_printf(m, " mips1");
 	if (cpu_has_mips_2)
 		seq_printf(m, "%s", " mips2");
-- 
GitLab


From 20dd4c624d25156d5ec3345bbb690b98175ef879 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Tue, 11 Jul 2017 16:27:49 +0530
Subject: [PATCH 1166/1958] powerpc/perf: Fix SDAR_MODE value for continous
 sampling on Power9

In case of continous sampling (non-marked), the code currently
sets MMCRA[SDAR_MODE] to 0b01 (Update on TLB miss) for Power9 DD1.

On DD2 and later it copies the sdar_mode value from the event code,
which for most events is 0b00 (No updates).

However we must set a non-zero value for SDAR_MODE when doing
continuous sampling, so honor the event code, unless it's zero, in
which case we use use 0b01 (Update on TLB miss).

Fixes: 78b4416aa249 ("powerpc/perf: Handle sdar_mode for marked event in power9")
Cc: stable@vger.kernel.org # v4.11+
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/isa207-common.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 8125160be7bc3..3f3aa9a7063ab 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -90,13 +90,15 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
 	 *	MMCRA[SDAR_MODE] will be set to 0b01
 	 * For rest
 	 *	MMCRA[SDAR_MODE] will be set from event code.
+	 *      If sdar_mode from event is zero, default to 0b01. Hardware
+	 *      requires that we set a non-zero value.
 	 */
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
 			*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
-		else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+		else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event))
 			*mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
-		else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+		else
 			*mmcra |= MMCRA_SDAR_MODE_TLB;
 	} else
 		*mmcra |= MMCRA_SDAR_MODE_TLB;
-- 
GitLab


From 770c0d86013e22647b15af8df2707f81ee1d5cd0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:49 +0900
Subject: [PATCH 1167/1958] nios2: remove unneeded
 arch/nios2/include/(generated/)asm/signal.h

Currently, NIOS2 has three signal.h files under arch/nios2/include:

[1] arch/nios2/include/asm/signal.h
[2] arch/nios2/include/uapi/asm/signal.h
[3] arch/nios2/include/generated/asm/signal.h

[3] is build-time generated by scripts/Makefile.asm-generic.
However, -I$(srctree)/arch/$(hdr-arch)/include search path is listed
before -I$(objtree)/arch/$(hdr-arch)/include/generated in LINUXINCLUDE.
Therefore [1] is always included instead of [3].  Remove [3] which
is never included.

If we look at [1], it just includes [2].  So, [1] can be removed
as well.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
---
 arch/nios2/include/asm/Kbuild   |  1 -
 arch/nios2/include/asm/signal.h | 22 ----------------------
 2 files changed, 23 deletions(-)
 delete mode 100644 arch/nios2/include/asm/signal.h

diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index e1a843def56fa..c1c7f6736117c 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -47,7 +47,6 @@ generic-y += segment.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
-generic-y += signal.h
 generic-y += socket.h
 generic-y += sockios.h
 generic-y += spinlock.h
diff --git a/arch/nios2/include/asm/signal.h b/arch/nios2/include/asm/signal.h
deleted file mode 100644
index bbcf11eecb019..0000000000000
--- a/arch/nios2/include/asm/signal.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright Altera Corporation (C) 2013. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-#ifndef _NIOS2_SIGNAL_H
-#define _NIOS2_SIGNAL_H
-
-#include <uapi/asm/signal.h>
-
-#endif	/* _NIOS2_SIGNAL_H */
-- 
GitLab


From 3beb4256f922733330938eac70c9218ec3f0b5c5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:50 +0900
Subject: [PATCH 1168/1958] nios2: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
---
 arch/nios2/include/asm/Kbuild      | 25 +------------------------
 arch/nios2/include/uapi/asm/Kbuild | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index c1c7f6736117c..896c26ae0da93 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -1,8 +1,6 @@
 generic-y += atomic.h
-generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bitops.h
-generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
@@ -12,54 +10,33 @@ generic-y += device.h
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += module.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += socket.h
-generic-y += sockios.h
 generic-y += spinlock.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 51eff5bc2eb4f..ffca24da7647b 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -1,6 +1,29 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
 generic-y += setup.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
 generic-y += ucontext.h
-- 
GitLab


From c23504c60ebbc27f9418e01a45e5d4111c4d5f8b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:51 +0900
Subject: [PATCH 1169/1958] openrisc: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/Kbuild      | 29 +--------------------------
 arch/openrisc/include/uapi/asm/Kbuild | 27 +++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 091585addb911..5bea416a77927 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -1,6 +1,4 @@
-generic-y += auxvec.h
 generic-y += barrier.h
-generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += cacheflush.h
@@ -11,57 +9,32 @@ generic-y += device.h
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += module.h
-generic-y += msgbuf.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += sembuf.h
-generic-y += setup.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
 generic-y += string.h
-generic-y += swab.h
 generic-y += switch_to.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index b55fc2ae1e8c1..62286dbeb9043 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -1,4 +1,31 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From bd78acad8e866e1167f9de97c9b090ee6c2c96a7 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:52 +0900
Subject: [PATCH 1170/1958] parisc: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/parisc/include/asm/Kbuild      | 7 +------
 arch/parisc/include/uapi/asm/Kbuild | 4 ++++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index a9909c2d04c5c..a41139575ab42 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -1,5 +1,3 @@
-
-generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += current.h
@@ -11,14 +9,12 @@ generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += param.h
 generic-y += percpu.h
-generic-y += poll.h
 generic-y += preempt.h
 generic-y += seccomp.h
 generic-y += segment.h
@@ -28,4 +24,3 @@ generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 3971c60a7e7ff..196d2a4efb312 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -1,4 +1,8 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += kvm_para.h
+generic-y += param.h
+generic-y += poll.h
 generic-y += resource.h
-- 
GitLab


From 895931232d9358e0016f580f26b336c29c9528cc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:53 +0900
Subject: [PATCH 1171/1958] sh: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/sh/include/asm/Kbuild      | 19 -------------------
 arch/sh/include/uapi/asm/Kbuild | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 590c91ae75417..1a6f9c39feef5 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -1,39 +1,20 @@
-
-generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
-generic-y += fcntl.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
-generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += trace_clock.h
-generic-y += ucontext.h
 generic-y += xor.h
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index b55fc2ae1e8c1..e28531333efa9 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -1,4 +1,22 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += ucontext.h
-- 
GitLab


From b6744e04143dfe99f00b98367bc7e9e3449d7c07 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:54 +0900
Subject: [PATCH 1172/1958] sparc: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/sparc/include/asm/Kbuild      | 1 -
 arch/sparc/include/uapi/asm/Kbuild | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index e9e837bc3158c..80ddc01f57ac3 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -18,5 +18,4 @@ generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += serial.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += word-at-a-time.h
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index b15bf6bc0e94f..2178c78c7c1a6 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += types.h
-- 
GitLab


From c44e27181f8217bdab6f8ad25d34c68c26fbabb4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:55 +0900
Subject: [PATCH 1173/1958] tile: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/tile/include/asm/Kbuild      | 19 -------------------
 arch/tile/include/uapi/asm/Kbuild | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 16f0b08c8ce9a..d28d2b8932c7e 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -2,37 +2,18 @@ generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += seccomp.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += xor.h
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index 0c74c3c5ebfa4..5711de0a1b5ef 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -1,4 +1,23 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
 generic-y += ucontext.h
-- 
GitLab


From ee73056bee8ed47ba12de56ab83bcbe1a0618926 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:56 +0900
Subject: [PATCH 1174/1958] unicore32: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/unicore32/include/asm/Kbuild      | 30 +-------------------------
 arch/unicore32/include/uapi/asm/Kbuild | 27 +++++++++++++++++++++++
 2 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 7a53a55341de9..fda7e21530863 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -1,66 +1,38 @@
-
 generic-y += atomic.h
-generic-y += auxvec.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += module.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += setup.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += signal.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += swab.h
 generic-y += syscalls.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 1c44d3b3eba03..759a71411169f 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -1,5 +1,32 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
 generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
-- 
GitLab


From 22c92aee6f95d9c82d33f04c90c6903a6a94df80 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:57 +0900
Subject: [PATCH 1175/1958] xtensa: move generic-y of exported headers to
 uapi/asm/Kbuild

Since commit fcc8487d477a ("uapi: export all headers under uapi
directories"), all (and only) headers under uapi directories are
exported, but asm-generic wrappers are still exceptions.

To complete de-coupling the uapi from kernel headers, move generic-y
of exported headers to uapi/asm/Kbuild.

With this change, "make headers_install" will just need to parse
uapi/asm/Kbuild to build up exported headers.

Also, move "generic-y += kprobes.h" up in order to keep the entries
sorted.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/xtensa/include/asm/Kbuild      | 10 +---------
 arch/xtensa/include/uapi/asm/Kbuild |  9 +++++++++
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 30f6290109d43..c04efde775a59 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1,20 +1,16 @@
-generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += clkdev.h
 generic-y += div64.h
 generic-y += dma-contiguous.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
-generic-y += fcntl.h
 generic-y += hardirq.h
-generic-y += ioctl.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
@@ -22,13 +18,9 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += percpu.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
 generic-y += sections.h
-generic-y += statfs.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index 4cb0d2f8868ca..a5bcdfb890f1b 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -1,3 +1,12 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += kvm_para.h
+generic-y += resource.h
 generic-y += siginfo.h
+generic-y += statfs.h
+generic-y += termios.h
-- 
GitLab


From d1b32bacffe189a0cdc2c36905a753535638cb1c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:58 +0900
Subject: [PATCH 1176/1958] kbuild: do not include old-kbuild-file from
 Makefile.headersinst

Now asm-generic wrappers to be exported are all listed in
arch/*/include/uapi/asm/Kbuild.  "make headers_install" no longer
depends on any Kbuild files outside uapi directories.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.headersinst | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index e9147f05ea772..e6343b34182ad 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -40,11 +40,6 @@ ifeq ($(skip-inst),)
 kbuild-file := $(srctree)/$(obj)/Kbuild
 -include $(kbuild-file)
 
-old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild
-ifneq ($(wildcard $(old-kbuild-file)),)
-include $(old-kbuild-file)
-endif
-
 installdir    := $(INSTALL_HDR_PATH)/$(dst)
 gendir        := $(objtree)/$(subst include/,include/generated/,$(obj))
 header-files  := $(notdir $(wildcard $(srcdir)/*.h))
-- 
GitLab


From a9d9a400e0753491e3eacc9f4dbc60547ea1de45 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:32:59 +0900
Subject: [PATCH 1177/1958] kbuild: split exported generic header creation into
 uapi-asm-generic

When we install headers, we are interested only in headers under uapi
directories.  Split out uapi-asm-generic target and make headers_install
depend on it.  It will avoid generating unneeded asm-generic wrappers.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 70c414b158590..f75a93fa9975e 100644
--- a/Makefile
+++ b/Makefile
@@ -456,10 +456,11 @@ ifneq ($(KBUILD_SRC),)
 endif
 
 # Support for using generic headers in asm-generic
-PHONY += asm-generic
-asm-generic:
+PHONY += asm-generic uapi-asm-generic
+asm-generic: uapi-asm-generic
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-generic \
 	            src=asm obj=arch/$(SRCARCH)/include/generated/asm
+uapi-asm-generic:
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-generic \
 	            src=uapi/asm obj=arch/$(SRCARCH)/include/generated/uapi/asm
 
@@ -1143,7 +1144,7 @@ PHONY += archscripts
 archscripts:
 
 PHONY += __headers
-__headers: $(version_h) scripts_basic asm-generic archheaders archscripts
+__headers: $(version_h) scripts_basic uapi-asm-generic archheaders archscripts
 	$(Q)$(MAKE) $(build)=scripts build_unifdef
 
 PHONY += headers_install_all
-- 
GitLab


From 09c3776c5472f2bc73b29b13d5947cec6103a99c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 10 Jul 2017 03:33:00 +0900
Subject: [PATCH 1178/1958] kbuild: remove wrapper files handling from
 Makefile.headersinst

scripts/Makefike.headersinst creates asm-generic wrappers by itself
because scripts/Makefile.asm-generic created some of exported wrappers
outside uapi directories.

Now this distortion has been fixed.  scripts/Makefile.headersinst can
simply copy wrappers created by scripts/Makefile.asm-generic.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.headersinst | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index e6343b34182ad..343d586e566e5 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -52,14 +52,8 @@ genhdr-files  := $(filter-out $(header-files), $(genhdr-files))
 install-file  := $(installdir)/.install
 check-file    := $(installdir)/.check
 
-# generic-y list all files an architecture uses from asm-generic
-# Use this to build a list of headers which require a wrapper
-generic-files := $(notdir $(wildcard $(srctree)/include/uapi/asm-generic/*.h))
-wrapper-files := $(filter $(generic-files), $(generic-y))
-wrapper-files := $(filter-out $(header-files), $(wrapper-files))
-
 # all headers files for this dir
-all-files     := $(header-files) $(genhdr-files) $(wrapper-files)
+all-files     := $(header-files) $(genhdr-files)
 output-files  := $(addprefix $(installdir)/, $(all-files))
 
 ifneq ($(mandatory-y),)
@@ -83,9 +77,6 @@ quiet_cmd_install = INSTALL $(printdir) ($(words $(all-files))\
       cmd_install = \
         $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(header-files); \
         $(CONFIG_SHELL) $< $(installdir) $(gendir) $(genhdr-files); \
-        for F in $(wrapper-files); do                                   \
-                echo "\#include <asm-generic/$$F>" > $(installdir)/$$F;    \
-        done;                                                           \
         touch $@
 
 quiet_cmd_remove = REMOVE  $(unwanted)
-- 
GitLab


From d7f14c66c273b00aaa626f419d3155773a88d460 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 9 Jul 2017 20:02:36 +0200
Subject: [PATCH 1179/1958] kbuild: Enable Large File Support for hostprogs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the following build error for me when building on an 32 bit
machine using an XFS file system:

	$ make scripts/basic/fixdep
	  HOSTCC  scripts/basic/fixdep
	fixdep: error fstat'ing depfile: scripts/basic/.fixdep.d: Value too large for defined data type

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index f75a93fa9975e..1d6695ec31387 100644
--- a/Makefile
+++ b/Makefile
@@ -294,10 +294,17 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
 	  else if [ -x /bin/bash ]; then echo /bin/bash; \
 	  else echo sh; fi ; fi)
 
+HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS)
+HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS)
+HOST_LFS_LIBS := $(shell getconf LFS_LIBS)
+
 HOSTCC       = gcc
 HOSTCXX      = g++
-HOSTCFLAGS   := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89
-HOSTCXXFLAGS = -O2
+HOSTCFLAGS   := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 \
+		-fomit-frame-pointer -std=gnu89 $(HOST_LFS_CFLAGS)
+HOSTCXXFLAGS := -O2 $(HOST_LFS_CFLAGS)
+HOSTLDFLAGS  := $(HOST_LFS_LDFLAGS)
+HOST_LOADLIBES := $(HOST_LFS_LIBS)
 
 ifeq ($(shell $(HOSTCC) -v 2>&1 | grep -c "clang version"), 1)
 HOSTCFLAGS  += -Wno-unused-value -Wno-unused-parameter \
@@ -408,7 +415,7 @@ KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(S
 
 export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
 export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP
+export CPP AR NM STRIP OBJCOPY OBJDUMP HOSTLDFLAGS HOST_LOADLIBES
 export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
 
-- 
GitLab


From d40e0d4fb5613099a58c95a9403f51b03e40e861 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Wed, 5 Jul 2017 09:59:05 -0700
Subject: [PATCH 1180/1958] locking/qspinlock: Include linux/prefetch.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kernel/locking/qspinlock.c makes use of prefetchw() but hasn't included
linux/prefetch.h up until now, instead relying upon indirect inclusion
of some header that defines prefetchw().

In the case of MIPS we generally obtain a definition of prefetchw() from
asm/processor.h, included by way of linux/mutex.h, but only for
configurations which select CONFIG_CPU_HAS_PREFETCH. For configurations
which don't this leaves prefetchw() undefined leading to the following
build failure:

  CC      kernel/locking/qspinlock.o
  kernel/locking/qspinlock.c: In function ‘queued_spin_lock_slowpath’:
  kernel/locking/qspinlock.c:549:4: error: implicit declaration of
      function ‘prefetchw’ [-Werror=implicit-function-declaration]
    prefetchw(next);
    ^~~~~~~~~

Fix this by including linux/prefetch.h in order to ensure that we get a
definition of prefetchw().

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 kernel/locking/qspinlock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index b2caec7315af5..fd24153e8a48d 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -28,6 +28,7 @@
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/mutex.h>
+#include <linux/prefetch.h>
 #include <asm/byteorder.h>
 #include <asm/qspinlock.h>
 
-- 
GitLab


From 7432b49b54cd931743f0b6e9f652bc329e4a242a Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 5 Jul 2017 17:09:41 +0200
Subject: [PATCH 1181/1958] mmc: block: Initialize ret in
 mmc_blk_issue_drv_op() for MMC_DRV_OP_IOCTL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With gcc 4.1.2:

    drivers/mmc/core/block.c: In function ‘mmc_blk_issue_drv_op’:
    drivers/mmc/core/block.c:1178: warning: ‘ret’ may be used uninitialized in this function

Indeed, for MMC_DRV_OP_IOCTL, if mq_rq->ioc_count is zero, an
uninitialized value will be stored in mq_rq->drv_op_result and passed to
blk_end_request_all().

Can mq_rq->ioc_count be zero?
  - mmc_blk_ioctl_cmd() sets ioc_count to 1, so this is safe,
  - mmc_blk_ioctl_multi_cmd() obtains ioc_count from user space in
    response to the MMC_IOC_MULTI_CMD ioctl, and does allow zero.

Initialize ret to zero to fix this for current and future callers.

Fixes: 0493f6fe5bdee8ac ("mmc: block: Move boot partition locking into a driver op")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 0cfac2d391073..4920ea1ece38a 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1182,7 +1182,7 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req)
 
 	switch (mq_rq->drv_op) {
 	case MMC_DRV_OP_IOCTL:
-		for (i = 0; i < mq_rq->ioc_count; i++) {
+		for (i = 0, ret = 0; i < mq_rq->ioc_count; i++) {
 			ret = __mmc_blk_ioctl_cmd(card, md, mq_rq->idata[i]);
 			if (ret)
 				break;
-- 
GitLab


From aab2ee03912be6e12bb5f4810be0b80a82168d3e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 5 Jul 2017 17:09:42 +0200
Subject: [PATCH 1182/1958] mmc: block: Let MMC_IOC_MULTI_CMD return zero again
 for zero entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With gcc 4.1.2:

    drivers/mmc/core/block.c: In function ‘mmc_blk_ioctl_cmd_issue’:
    drivers/mmc/core/block.c:630: warning: ‘ioc_err’ may be used uninitialized in this function

Indeed, if mq_rq->ioc_count is zero, an uninitialized value will be
stored in mq_rq->drv_op_result and passed to blk_end_request_all().

Can mq_rq->ioc_count be zero?
  - mmc_blk_ioctl_cmd() sets ioc_count to 1, so this is safe,
  - mmc_blk_ioctl_multi_cmd() obtains ioc_count from user space in
    response to the MMC_IOC_MULTI_CMD ioctl, and does allow zero.

To avoid returning an uninitialized value, and as it is pointless to do
all this work when the MMC_IOC_MULTI_CMD ioctl is used with zero
entries, check for this early in mmc_blk_ioctl_multi_cmd(), and return
zero, like was returned before.

Fixes: 3ecd8cf23f88d5df ("mmc: block: move multi-ioctl() to use block layer")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 4920ea1ece38a..e0363223996e6 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -637,6 +637,9 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
 			   sizeof(num_of_cmds)))
 		return -EFAULT;
 
+	if (!num_of_cmds)
+		return 0;
+
 	if (num_of_cmds > MMC_IOC_MAX_CMDS)
 		return -EINVAL;
 
-- 
GitLab


From e7d80c830489f67b1d0257e6919840100085dea9 Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Thu, 22 Jun 2017 15:01:10 -0400
Subject: [PATCH 1183/1958] IB/iser: Handle lack of memory management
 extentions correctly

max_fast_reg_page_list_len is only valid when the
memory management extentions are signaled by the underlying
driver.

Fix by adjusting iser_calc_scsi_params() to use
ISCSI_ISER_MAX_SG_TABLESIZE when the extentions are not indicated.

Reported-by: Thomas Rosenstein <thomas.rosenstein@creamfinance.com>
Fixes: Commit df749cdc45d9 ("IB/iser: Support up to 8MB data transfer in a single command")
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Tested-by: Thomas Rosenstein <thomas.rosenstein@creamfinance.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/iser/iser_verbs.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index c538a38c91ce9..26a004e97ae0f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -708,8 +708,14 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
 	unsigned short sg_tablesize, sup_sg_tablesize;
 
 	sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
-	sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
-				 device->ib_device->attrs.max_fast_reg_page_list_len);
+	if (device->ib_device->attrs.device_cap_flags &
+			IB_DEVICE_MEM_MGT_EXTENSIONS)
+		sup_sg_tablesize =
+			min_t(
+			 uint, ISCSI_ISER_MAX_SG_TABLESIZE,
+			 device->ib_device->attrs.max_fast_reg_page_list_len);
+	else
+		sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE;
 
 	iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize);
 }
-- 
GitLab


From b6ea01ba3fe9dcb5e08bbd9ed482845582a1e80b Mon Sep 17 00:00:00 2001
From: "Amrani, Ram" <Ram.Amrani@cavium.com>
Date: Mon, 29 May 2017 11:18:54 +0300
Subject: [PATCH 1184/1958] RDMA/qedr: Add qedr to MAINTAINERS file

Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 053c3bdd1fe51..53437d46a39df 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10531,6 +10531,14 @@ L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/qedf/
 
+QLOGIC QL4xxx RDMA DRIVER
+M:	Ram Amrani <Ram.Amrani@cavium.com>
+M:	Ariel Elior <Ariel.Elior@cavium.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/hw/qedr/
+F:	include/uapi/rdma/qedr-abi.h
+
 QNX4 FILESYSTEM
 M:	Anders Larsen <al@alarsen.net>
 W:	http://www.alarsen.net/linux/qnx4fs/
-- 
GitLab


From 2fd1d2c4ceb2248a727696962cf3370dc9f5a0a4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 6 Jul 2017 08:41:06 -0500
Subject: [PATCH 1185/1958] proc: Fix proc_sys_prune_dcache to hold a sb
 reference

Andrei Vagin writes:
FYI: This bug has been reproduced on 4.11.7
> BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo}  still in use (1) [unmount of proc proc]
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80
> CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1
> Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015
> Workqueue: events proc_cleanup_work
> Call Trace:
>  dump_stack+0x63/0x86
>  __warn+0xcb/0xf0
>  warn_slowpath_null+0x1d/0x20
>  umount_check+0x6e/0x80
>  d_walk+0xc6/0x270
>  ? dentry_free+0x80/0x80
>  do_one_tree+0x26/0x40
>  shrink_dcache_for_umount+0x2d/0x90
>  generic_shutdown_super+0x1f/0xf0
>  kill_anon_super+0x12/0x20
>  proc_kill_sb+0x40/0x50
>  deactivate_locked_super+0x43/0x70
>  deactivate_super+0x5a/0x60
>  cleanup_mnt+0x3f/0x90
>  mntput_no_expire+0x13b/0x190
>  kern_unmount+0x3e/0x50
>  pid_ns_release_proc+0x15/0x20
>  proc_cleanup_work+0x15/0x20
>  process_one_work+0x197/0x450
>  worker_thread+0x4e/0x4a0
>  kthread+0x109/0x140
>  ? process_one_work+0x450/0x450
>  ? kthread_park+0x90/0x90
>  ret_from_fork+0x2c/0x40
> ---[ end trace e1c109611e5d0b41 ]---
> VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds.  Have a nice day...
> BUG: unable to handle kernel NULL pointer dereference at           (null)
> IP: _raw_spin_lock+0xc/0x30
> PGD 0

Fix this by taking a reference to the super block in proc_sys_prune_dcache.

The superblock reference is the core of the fix however the sysctl_inodes
list is converted to a hlist so that hlist_del_init_rcu may be used.  This
allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while
not causing problems for proc_sys_evict_inode when if it later choses to
remove the inode from the sysctl_inodes list.  Removing inodes from the
sysctl_inodes list allows proc_sys_prune_dcache to have a progress
guarantee, while still being able to drop all locks.  The fact that
head->unregistering is set in start_unregistering ensures that no more
inodes will be added to the the sysctl_inodes list.

Previously the code did a dance where it delayed calling iput until the
next entry in the list was being considered to ensure the inode remained on
the sysctl_inodes list until the next entry was walked to.  The structure
of the loop in this patch does not need that so is much easier to
understand and maintain.

Cc: stable@vger.kernel.org
Reported-by: Andrei Vagin <avagin@gmail.com>
Tested-by: Andrei Vagin <avagin@openvz.org>
Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.")
Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/internal.h     |  2 +-
 fs/proc/proc_sysctl.c  | 43 +++++++++++++++++++++++++++++-------------
 include/linux/sysctl.h |  2 +-
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c5ae09b6c726a..18694598bebfb 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -67,7 +67,7 @@ struct proc_inode {
 	struct proc_dir_entry *pde;
 	struct ctl_table_header *sysctl;
 	struct ctl_table *sysctl_entry;
-	struct list_head sysctl_inodes;
+	struct hlist_node sysctl_inodes;
 	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
 };
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 67985a7233c24..9bf06e2b12846 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head,
 	head->set = set;
 	head->parent = NULL;
 	head->node = node;
-	INIT_LIST_HEAD(&head->inodes);
+	INIT_HLIST_HEAD(&head->inodes);
 	if (node) {
 		struct ctl_table *entry;
 		for (entry = table; entry->procname; entry++, node++)
@@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p)
 			complete(p->unregistering);
 }
 
-/* called under sysctl_lock */
 static void proc_sys_prune_dcache(struct ctl_table_header *head)
 {
-	struct inode *inode, *prev = NULL;
+	struct inode *inode;
 	struct proc_inode *ei;
+	struct hlist_node *node;
+	struct super_block *sb;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
-		inode = igrab(&ei->vfs_inode);
-		if (inode) {
-			rcu_read_unlock();
-			iput(prev);
-			prev = inode;
-			d_prune_aliases(inode);
+	for (;;) {
+		node = hlist_first_rcu(&head->inodes);
+		if (!node)
+			break;
+		ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
+		spin_lock(&sysctl_lock);
+		hlist_del_init_rcu(&ei->sysctl_inodes);
+		spin_unlock(&sysctl_lock);
+
+		inode = &ei->vfs_inode;
+		sb = inode->i_sb;
+		if (!atomic_inc_not_zero(&sb->s_active))
+			continue;
+		inode = igrab(inode);
+		rcu_read_unlock();
+		if (unlikely(!inode)) {
+			deactivate_super(sb);
 			rcu_read_lock();
+			continue;
 		}
+
+		d_prune_aliases(inode);
+		iput(inode);
+		deactivate_super(sb);
+
+		rcu_read_lock();
 	}
 	rcu_read_unlock();
-	iput(prev);
 }
 
 /* called under sysctl_lock, will reacquire if has to wait */
@@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	}
 	ei->sysctl = head;
 	ei->sysctl_entry = table;
-	list_add_rcu(&ei->sysctl_inodes, &head->inodes);
+	hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
 	head->count++;
 	spin_unlock(&sysctl_lock);
 
@@ -489,7 +506,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
 {
 	spin_lock(&sysctl_lock);
-	list_del_rcu(&PROC_I(inode)->sysctl_inodes);
+	hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
 	if (!--head->count)
 		kfree_rcu(head, rcu);
 	spin_unlock(&sysctl_lock);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 80d07816def05..1c04a26bfd2f0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -143,7 +143,7 @@ struct ctl_table_header
 	struct ctl_table_set *set;
 	struct ctl_dir *parent;
 	struct ctl_node *node;
-	struct list_head inodes; /* head for proc_inode->sysctl_inodes */
+	struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
 };
 
 struct ctl_dir {
-- 
GitLab


From c1bda752fdaead0c154c13a72a83faabc7dffc82 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 11 Jul 2017 13:41:34 +0200
Subject: [PATCH 1186/1958] platform/x86: peaq-wmi: Fix
 peaq_ignore_events_counter handling off by 1

If peaq_ignore_events_counter gets set to 1 we should skip polling 1
time, rather then ignoring it.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/peaq-wmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/peaq-wmi.c b/drivers/platform/x86/peaq-wmi.c
index ca75b4dc437e2..77d1f90b07942 100644
--- a/drivers/platform/x86/peaq-wmi.c
+++ b/drivers/platform/x86/peaq-wmi.c
@@ -51,7 +51,7 @@ static void peaq_wmi_poll(struct input_polled_dev *dev)
 		return;
 	}
 
-	if (peaq_ignore_events_counter && --peaq_ignore_events_counter > 0)
+	if (peaq_ignore_events_counter && --peaq_ignore_events_counter >= 0)
 		return;
 
 	if (obj.integer.value) {
-- 
GitLab


From ee56ff71bbc405d841415809cb5ece079e7dd785 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 11 Jul 2017 16:18:14 +0530
Subject: [PATCH 1187/1958] platform/x86: fujitsu-laptop: constify
 attribute_group structures.

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work
with const attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   7474	   1205	     24	   8703	   21ff	drivers/platform/x86/fujitsu-laptop.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   7538	   1141	     24	   8703	   21ff	drivers/platform/x86/fujitsu-laptop.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/fujitsu-laptop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 9a1034bec0fcb..85de30f93a9cc 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -317,7 +317,7 @@ static struct attribute *fujitsu_pf_attributes[] = {
 	NULL
 };
 
-static struct attribute_group fujitsu_pf_attribute_group = {
+static const struct attribute_group fujitsu_pf_attribute_group = {
 	.attrs = fujitsu_pf_attributes
 };
 
-- 
GitLab


From ab56246bee36b7226355d10f817ef8c27f56ef4b Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 11 Jul 2017 16:18:15 +0530
Subject: [PATCH 1188/1958] platform/x86: compal-laptop: constify
 attribute_group structures.

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work
with const attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   6781	   6144	     34	  12959	   329f	drivers/platform/x86/compal-laptop.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   6845	   6080	     34	  12959	   329f	drivers/platform/x86/compal-laptop.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/compal-laptop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index e1c2b6d4b24ab..a8e4a539e704e 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -718,7 +718,7 @@ static struct attribute *compal_platform_attrs[] = {
 	&dev_attr_wake_up_mouse.attr,
 	NULL
 };
-static struct attribute_group compal_platform_attr_group = {
+static const struct attribute_group compal_platform_attr_group = {
 	.attrs = compal_platform_attrs
 };
 
-- 
GitLab


From 8546268e1177bd84157c7dc5bad82a2433fb5fea Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 11 Jul 2017 16:18:16 +0530
Subject: [PATCH 1189/1958] platform/x86: samsung-laptop: constify
 attribute_group structures.

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work
with const attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   8710	   5452	     23	  14185	   3769	drivers/platform/x86/samsung-laptop.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   8774	   5388	     23	  14185	   3769	drivers/platform/x86/samsung-laptop.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/samsung-laptop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index 5c4dfe48f03d2..0c703feaeb88f 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -1232,7 +1232,7 @@ static umode_t samsung_sysfs_is_visible(struct kobject *kobj,
 	return ok ? attr->mode : 0;
 }
 
-static struct attribute_group platform_attribute_group = {
+static const struct attribute_group platform_attribute_group = {
 	.is_visible = samsung_sysfs_is_visible,
 	.attrs = platform_attributes
 };
-- 
GitLab


From 4b7942d8d1ced3c82495953cb0bb90e7de6dbba6 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 11 Jul 2017 16:18:17 +0530
Subject: [PATCH 1190/1958] platform/x86: alienware-wmi: constify
 attribute_group structures.

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work
with const attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   6932	   1016	     48	   7996	   1f3c	drivers/platform/x86/alienware-wmi.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   7060	    888	     48	   7996	   1f64	drivers/platform/x86/alienware-wmi.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/alienware-wmi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c
index 9866fec78c1c9..0831b428c2175 100644
--- a/drivers/platform/x86/alienware-wmi.c
+++ b/drivers/platform/x86/alienware-wmi.c
@@ -604,7 +604,7 @@ static struct attribute *hdmi_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group hdmi_attribute_group = {
+static const struct attribute_group hdmi_attribute_group = {
 	.name = "hdmi",
 	.attrs = hdmi_attrs,
 };
@@ -660,7 +660,7 @@ static struct attribute *amplifier_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group amplifier_attribute_group = {
+static const struct attribute_group amplifier_attribute_group = {
 	.name = "amplifier",
 	.attrs = amplifier_attrs,
 };
@@ -741,7 +741,7 @@ static struct attribute *deepsleep_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group deepsleep_attribute_group = {
+static const struct attribute_group deepsleep_attribute_group = {
 	.name = "deepsleep",
 	.attrs = deepsleep_attrs,
 };
-- 
GitLab


From 24f584055c360d39cb55e91a7a1c53ddeb4a6147 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 11 Jul 2017 16:18:18 +0530
Subject: [PATCH 1191/1958] platform/x86: panasonic-laptop: constify
 attribute_group structures.

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work
with const attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   2505	    600	      4	   3109	    c25	drivers/platform/x86/panasonic-laptop.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   2569	    536	      4	   3109	    c25	drivers/platform/x86/panasonic-laptop.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/panasonic-laptop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index 76b0a58e205ba..5c39b3211709b 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -437,7 +437,7 @@ static struct attribute *pcc_sysfs_entries[] = {
 	NULL,
 };
 
-static struct attribute_group pcc_attr_group = {
+static const struct attribute_group pcc_attr_group = {
 	.name	= NULL,		/* put in device directory */
 	.attrs	= pcc_sysfs_entries,
 };
-- 
GitLab


From e90d9ba837b3b779644613a3b9e4845a0cd302f2 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 11 Jul 2017 16:18:19 +0530
Subject: [PATCH 1192/1958] platform/x86: asus-wmi: constify attribute_group
 structures.

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work
with const attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  13140	    840	      1	  13981	   369d	drivers/platform/x86/asus-wmi.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  13268	    712	      1	  13981	   368d	drivers/platform/x86/asus-wmi.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/asus-wmi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 6c7d86074b384..709e3a67391a0 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -1433,7 +1433,7 @@ static umode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
 	return ok ? attr->mode : 0;
 }
 
-static struct attribute_group hwmon_attribute_group = {
+static const struct attribute_group hwmon_attribute_group = {
 	.is_visible = asus_hwmon_sysfs_is_visible,
 	.attrs = hwmon_attributes
 };
@@ -1821,7 +1821,7 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj,
 	return ok ? attr->mode : 0;
 }
 
-static struct attribute_group platform_attribute_group = {
+static const struct attribute_group platform_attribute_group = {
 	.is_visible = asus_sysfs_is_visible,
 	.attrs = platform_attributes
 };
-- 
GitLab


From 44bd76d0a3ff1901b66a739c4a9e68e6fdc0463c Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 11 Jul 2017 16:18:20 +0530
Subject: [PATCH 1193/1958] platform/x86: toshiba_acpi: constify
 attribute_group structures.

attribute_groups are not supposed to change at runtime. All functions
working with attribute_groups provided by <linux/sysfs.h> work
with const attribute_group. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  26360	   1072	     24	  27456	   6b40	drivers/platform/x86/toshiba_acpi.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  26424	   1008	     24	  27456	   6b40	drivers/platform/x86/toshiba_acpi.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/toshiba_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 88f9f79a7cf60..bb1dcd7fbdeb8 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -2419,7 +2419,7 @@ static umode_t toshiba_sysfs_is_visible(struct kobject *kobj,
 	return exists ? attr->mode : 0;
 }
 
-static struct attribute_group toshiba_attr_group = {
+static const struct attribute_group toshiba_attr_group = {
 	.is_visible = toshiba_sysfs_is_visible,
 	.attrs = toshiba_attributes,
 };
-- 
GitLab


From f9b76ebd49f97458857568918c305a17fa7c6567 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 11 Jul 2017 13:05:34 -0400
Subject: [PATCH 1194/1958] bnxt_en: Fix race conditions in .ndo_get_stats64().

.ndo_get_stats64() may not be protected by RTNL and can race with
.ndo_stop() or other ethtool operations that can free the statistics
memory.  Fix it by setting a new flag BNXT_STATE_READ_STATS and then
proceeding to read statistics memory only if the state is OPEN.  The
close path that frees the memory clears the OPEN state and then waits
for the BNXT_STATE_READ_STATS to clear before proceeding to free the
statistics memory.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 18 ++++++++++++++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  1 +
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a19f68f5862d7..415694d37989c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6279,6 +6279,12 @@ static int bnxt_open(struct net_device *dev)
 	return __bnxt_open_nic(bp, true, true);
 }
 
+static bool bnxt_drv_busy(struct bnxt *bp)
+{
+	return (test_bit(BNXT_STATE_IN_SP_TASK, &bp->state) ||
+		test_bit(BNXT_STATE_READ_STATS, &bp->state));
+}
+
 int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 {
 	int rc = 0;
@@ -6297,7 +6303,7 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 
 	clear_bit(BNXT_STATE_OPEN, &bp->state);
 	smp_mb__after_atomic();
-	while (test_bit(BNXT_STATE_IN_SP_TASK, &bp->state))
+	while (bnxt_drv_busy(bp))
 		msleep(20);
 
 	/* Flush rings and and disable interrupts */
@@ -6358,8 +6364,15 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	u32 i;
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (!bp->bnapi)
+	set_bit(BNXT_STATE_READ_STATS, &bp->state);
+	/* Make sure bnxt_close_nic() sees that we are reading stats before
+	 * we check the BNXT_STATE_OPEN flag.
+	 */
+	smp_mb__after_atomic();
+	if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
+		clear_bit(BNXT_STATE_READ_STATS, &bp->state);
 		return;
+	}
 
 	/* TODO check if we need to synchronize with bnxt_close path */
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -6406,6 +6419,7 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 		stats->tx_fifo_errors = le64_to_cpu(tx->tx_fifo_underruns);
 		stats->tx_errors = le64_to_cpu(tx->tx_err);
 	}
+	clear_bit(BNXT_STATE_READ_STATS, &bp->state);
 }
 
 static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index f872a7db2ca8b..3c9d484dbd4ea 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1117,6 +1117,7 @@ struct bnxt {
 	unsigned long		state;
 #define BNXT_STATE_OPEN		0
 #define BNXT_STATE_IN_SP_TASK	1
+#define BNXT_STATE_READ_STATS	2
 
 	struct bnxt_irq	*irq_tbl;
 	int			total_irqs;
-- 
GitLab


From 3b6b34df342553a7522561e34288f5bb803aa9aa Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 11 Jul 2017 13:05:35 -0400
Subject: [PATCH 1195/1958] bnxt_en: Fix bug in ethtool -L.

When changing channels from combined to rx/tx or vice versa, the code
uses the wrong "sh" parameter to determine if we are reserving rings
for shared or non-shared mode.  It should be using the ethtool requested
"sh" parameter instead of the current "sh" parameter.

Fix it by passing the "sh" parameter to bnxt_reserve_rings().  For
ethtool, we will pass in the requested "sh" parameter.

Fixes: 391be5c27364 ("bnxt_en: Implement new scheme to reserve tx rings.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         | 9 +++------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         | 3 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c     | 2 +-
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 415694d37989c..d9830d09e6c31 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6918,16 +6918,13 @@ static void bnxt_sp_task(struct work_struct *work)
 }
 
 /* Under rtnl_lock */
-int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp)
+int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
+		       int tx_xdp)
 {
 	int max_rx, max_tx, tx_sets = 1;
 	int tx_rings_needed;
-	bool sh = true;
 	int rc;
 
-	if (!(bp->flags & BNXT_FLAG_SHARED_RINGS))
-		sh = false;
-
 	if (tcs)
 		tx_sets = tcs;
 
@@ -7135,7 +7132,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 		sh = true;
 
 	rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
-				tc, bp->tx_nr_rings_xdp);
+				sh, tc, bp->tx_nr_rings_xdp);
 	if (rc)
 		return rc;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 3c9d484dbd4ea..f34691f856028 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1301,7 +1301,8 @@ int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);
 void bnxt_half_close_nic(struct bnxt *bp);
 int bnxt_close_nic(struct bnxt *, bool, bool);
-int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp);
+int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
+		       int tx_xdp);
 int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
 void bnxt_restore_pf_fw_resources(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index fd1181510b65d..be6acadcb202b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -432,7 +432,8 @@ static int bnxt_set_channels(struct net_device *dev,
 		}
 		tx_xdp = req_rx_rings;
 	}
-	rc = bnxt_reserve_rings(bp, req_tx_rings, req_rx_rings, tcs, tx_xdp);
+	rc = bnxt_reserve_rings(bp, req_tx_rings, req_rx_rings, sh, tcs,
+				tx_xdp);
 	if (rc) {
 		netdev_warn(dev, "Unable to allocate the requested rings\n");
 		return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 7d67552e70d7a..3961a68074548 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -170,7 +170,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 	if (!tc)
 		tc = 1;
 	rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
-				tc, tx_xdp);
+				true, tc, tx_xdp);
 	if (rc) {
 		netdev_warn(dev, "Unable to reserve enough TX rings to support XDP.\n");
 		return rc;
-- 
GitLab


From 9b0436c3f29483ca91d890b0072c0c02e2e535ed Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 11 Jul 2017 13:05:36 -0400
Subject: [PATCH 1196/1958] bnxt_en: Fix SRIOV on big-endian architecture.

The PF driver sets up a list of firmware commands from the VF driver that
needs to be forwarded to the PF for approval.  This list is a 256-bit
bitmap.  The code that sets up the bitmap falls apart on big-endian
architecture.  __set_bit() does not work because it operates on long types
whereas the firmware interface is defined in u32 types, causing bits in
the wrong 32-bit word to be set.

Fix it by setting the proper bits on an array of u32.

Fixes: de68f5de5651 ("bnxt_en: Fix bitmap declaration to work on 32-bit arches.")
Reported-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d9830d09e6c31..e7c8539cbddf6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3458,13 +3458,18 @@ static int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp)
 	req.ver_upd = DRV_VER_UPD;
 
 	if (BNXT_PF(bp)) {
-		DECLARE_BITMAP(vf_req_snif_bmap, 256);
-		u32 *data = (u32 *)vf_req_snif_bmap;
+		u32 data[8];
 		int i;
 
-		memset(vf_req_snif_bmap, 0, sizeof(vf_req_snif_bmap));
-		for (i = 0; i < ARRAY_SIZE(bnxt_vf_req_snif); i++)
-			__set_bit(bnxt_vf_req_snif[i], vf_req_snif_bmap);
+		memset(data, 0, sizeof(data));
+		for (i = 0; i < ARRAY_SIZE(bnxt_vf_req_snif); i++) {
+			u16 cmd = bnxt_vf_req_snif[i];
+			unsigned int bit, idx;
+
+			idx = cmd / 32;
+			bit = cmd % 32;
+			data[idx] |= 1 << bit;
+		}
 
 		for (i = 0; i < 8; i++)
 			req.vf_req_fwd[i] = cpu_to_le32(data[i]);
-- 
GitLab


From b3743c71b7c33a126d6d8942bb268775987400ec Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Tue, 11 Jul 2017 17:59:43 +0800
Subject: [PATCH 1197/1958] tcmu: Fix possbile memory leak / OOPs when
 recalculating cmd base size

For all the entries allocated from the ring cmd area, the memory is
something like the stack memory, which will always reserve the old
data, so the entry->req.iov_bidi_cnt maybe none zero.

On some environments, the crash could be reproduce very easy and some
not. The following is the crash core trace as reported by Damien:

[  240.143969] CPU: 0 PID: 1285 Comm: iscsi_trx Not tainted 4.12.0-rc1+ #3
[  240.150607] Hardware name: ASUS All Series/H87-PRO, BIOS 2104 10/28/2014
[  240.157331] task: ffff8807de4f5800 task.stack: ffffc900047dc000
[  240.163270] RIP: 0010:memcpy_erms+0x6/0x10
[  240.167377] RSP: 0018:ffffc900047dfc68 EFLAGS: 00010202
[  240.172621] RAX: ffffc9065db85540 RBX: ffff8807f7980000 RCX: 0000000000000010
[  240.179771] RDX: 0000000000000010 RSI: ffff8807de574fe0 RDI: ffffc9065db85540
[  240.186930] RBP: ffffc900047dfd30 R08: ffff8807de41b000 R09: 0000000000000000
[  240.194088] R10: 0000000000000040 R11: ffff8807e9b726f0 R12: 00000006565726b0
[  240.201246] R13: ffffc90007612ea0 R14: 000000065657d540 R15: 0000000000000000
[  240.208397] FS:  0000000000000000(0000) GS:ffff88081fa00000(0000) knlGS:0000000000000000
[  240.216510] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  240.222280] CR2: ffffc9065db85540 CR3: 0000000001c0f000 CR4: 00000000001406f0
[  240.229430] Call Trace:
[  240.231887]  ? tcmu_queue_cmd+0x83c/0xa80
[  240.235916]  ? target_check_reservation+0xcd/0x6f0
[  240.240725]  __target_execute_cmd+0x27/0xa0
[  240.244918]  target_execute_cmd+0x232/0x2c0
[  240.249124]  ? __local_bh_enable_ip+0x64/0xa0
[  240.253499]  iscsit_execute_cmd+0x20d/0x270
[  240.257693]  iscsit_sequence_cmd+0x110/0x190
[  240.261985]  iscsit_get_rx_pdu+0x360/0xc80
[  240.267565]  ? iscsi_target_rx_thread+0x54/0xd0
[  240.273571]  iscsi_target_rx_thread+0x9a/0xd0
[  240.279413]  kthread+0x113/0x150
[  240.284120]  ? iscsi_target_tx_thread+0x1e0/0x1e0
[  240.290297]  ? kthread_create_on_node+0x40/0x40
[  240.296297]  ret_from_fork+0x2e/0x40
[  240.301332] Code: 90 90 90 90 90 eb 1e 0f 1f 00 48 89 f8 48 89 d1 48
c1 e9 03 83 e2 07 f3 48 a5 89 d1 f3 a4 c3 66 0f 1f 44 00 00 48 89 f8 48
89 d1 <f3> a4 c3 0f 1f 80 00 00 00 00 48 89 f8 48 83 fa 20 72 7e 40 38
[  240.321751] RIP: memcpy_erms+0x6/0x10 RSP: ffffc900047dfc68
[  240.328838] CR2: ffffc9065db85540
[  240.333667] ---[ end trace b7e5354cfb54d08b ]---

To fix this, just memset all the entry memory before using it, and
also to be more readable we adjust the bidi code.

Fixed: fe25cc34795(tcmu: Recalculate the tcmu_cmd size to save cmd area
		memories)
Reported-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Reported-by: Damien Le Moal <damien.lemoal@wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Cc: <stable@vger.kernel.org> # 4.12+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 2f1fa927682e3..3b25ef3d55967 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -563,7 +563,7 @@ static int scatter_data_area(struct tcmu_dev *udev,
 			to_offset = get_block_offset_user(udev, dbi,
 					block_remaining);
 			offset = DATA_BLOCK_SIZE - block_remaining;
-			to = (void *)(unsigned long)to + offset;
+			to += offset;
 
 			if (*iov_cnt != 0 &&
 			    to_offset == iov_tail(udev, *iov)) {
@@ -636,7 +636,7 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
 			offset = DATA_BLOCK_SIZE - block_remaining;
-			from = (void *)(unsigned long)from + offset;
+			from += offset;
 			tcmu_flush_dcache_range(from, copy_bytes);
 			memcpy(to + sg->length - sg_remaining, from,
 					copy_bytes);
@@ -840,10 +840,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	}
 
 	entry = (void *) mb + CMDR_OFF + cmd_head;
+	memset(entry, 0, command_size);
 	tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
 	entry->hdr.cmd_id = tcmu_cmd->cmd_id;
-	entry->hdr.kflags = 0;
-	entry->hdr.uflags = 0;
 
 	/* Handle allocating space from the data area */
 	tcmu_cmd_reset_dbi_cur(tcmu_cmd);
@@ -862,11 +861,10 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 	}
 	entry->req.iov_cnt = iov_cnt;
-	entry->req.iov_dif_cnt = 0;
 
 	/* Handle BIDI commands */
+	iov_cnt = 0;
 	if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		iov_cnt = 0;
 		iov++;
 		ret = scatter_data_area(udev, tcmu_cmd,
 					se_cmd->t_bidi_data_sg,
@@ -879,8 +877,8 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 			pr_err("tcmu: alloc and scatter bidi data failed\n");
 			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		}
-		entry->req.iov_bidi_cnt = iov_cnt;
 	}
+	entry->req.iov_bidi_cnt = iov_cnt;
 
 	/*
 	 * Recalaulate the command's base size and size according
-- 
GitLab


From daf78c305148c5a52f75a7fd88461ffa7066aec6 Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Tue, 11 Jul 2017 18:06:41 +0800
Subject: [PATCH 1198/1958] tcmu: clean up the code and with one small fix

Remove useless blank line and code and at the same time add one error
path to catch the errors.

Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 3b25ef3d55967..80ee130f8253e 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -342,7 +342,6 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
 
 	page = radix_tree_lookup(&udev->data_blocks, dbi);
 	if (!page) {
-
 		if (atomic_add_return(1, &global_db_count) >
 					TCMU_GLOBAL_MAX_BLOCKS) {
 			atomic_dec(&global_db_count);
@@ -352,14 +351,11 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
 		/* try to get new page from the mm */
 		page = alloc_page(GFP_KERNEL);
 		if (!page)
-			return false;
+			goto err_alloc;
 
 		ret = radix_tree_insert(&udev->data_blocks, dbi, page);
-		if (ret) {
-			__free_page(page);
-			return false;
-		}
-
+		if (ret)
+			goto err_insert;
 	}
 
 	if (dbi > udev->dbi_max)
@@ -369,6 +365,11 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
 	tcmu_cmd_set_dbi(tcmu_cmd, dbi);
 
 	return true;
+err_insert:
+	__free_page(page);
+err_alloc:
+	atomic_dec(&global_db_count);
+	return false;
 }
 
 static bool tcmu_get_empty_blocks(struct tcmu_dev *udev,
@@ -527,7 +528,7 @@ static inline size_t get_block_offset_user(struct tcmu_dev *dev,
 		DATA_BLOCK_SIZE - remaining;
 }
 
-static inline size_t iov_tail(struct tcmu_dev *udev, struct iovec *iov)
+static inline size_t iov_tail(struct iovec *iov)
 {
 	return (size_t)iov->iov_base + iov->iov_len;
 }
@@ -566,7 +567,7 @@ static int scatter_data_area(struct tcmu_dev *udev,
 			to += offset;
 
 			if (*iov_cnt != 0 &&
-			    to_offset == iov_tail(udev, *iov)) {
+			    to_offset == iov_tail(*iov)) {
 				(*iov)->iov_len += copy_bytes;
 			} else {
 				new_iov(iov, iov_cnt, udev);
@@ -722,10 +723,7 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 		}
 	}
 
-	if (!tcmu_get_empty_blocks(udev, cmd))
-		return false;
-
-	return true;
+	return tcmu_get_empty_blocks(udev, cmd);
 }
 
 static inline size_t tcmu_cmd_get_base_cmd_size(size_t iov_cnt)
-- 
GitLab


From fdf99b3ffcdd8471ee3104512198a178b7351a02 Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 Jul 2017 06:51:35 +0200
Subject: [PATCH 1199/1958] cisco: enic: Fic an error handling path in
 'vnic_dev_init_devcmd2()'

if 'ioread32()' returns 0xFFFFFFF, we have to go through the error
handling path as done everywhere else in this function.

Move the 'err_free_wq' label to better match its name and its location
and add a new label 'err_disable_wq'.
Update the code accordingly.

Fixes: 373fb0873d43 ("enic: add devcmd2")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/vnic_dev.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 1841ad45d2157..39bad67422dd4 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -402,8 +402,8 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev)
 	fetch_index = ioread32(&vdev->devcmd2->wq.ctrl->fetch_index);
 	if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone  */
 		vdev_err(vdev, "Fatal error in devcmd2 init - hardware surprise removal\n");
-
-		return -ENODEV;
+		err = -ENODEV;
+		goto err_free_wq;
 	}
 
 	enic_wq_init_start(&vdev->devcmd2->wq, 0, fetch_index, fetch_index, 0,
@@ -414,7 +414,7 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev)
 	err = vnic_dev_alloc_desc_ring(vdev, &vdev->devcmd2->results_ring,
 				       DEVCMD2_RING_SIZE, DEVCMD2_DESC_SIZE);
 	if (err)
-		goto err_free_wq;
+		goto err_disable_wq;
 
 	vdev->devcmd2->result = vdev->devcmd2->results_ring.descs;
 	vdev->devcmd2->cmd_ring = vdev->devcmd2->wq.ring.descs;
@@ -433,8 +433,9 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev)
 
 err_free_desc_ring:
 	vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring);
-err_free_wq:
+err_disable_wq:
 	vnic_wq_disable(&vdev->devcmd2->wq);
+err_free_wq:
 	vnic_wq_free(&vdev->devcmd2->wq);
 err_free_devcmd2:
 	kfree(vdev->devcmd2);
-- 
GitLab


From 55dd8cf2163e1866d9497c4f361d6ed42b3a192b Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 11 Jul 2017 10:50:00 -0700
Subject: [PATCH 1200/1958] Revert "qla2xxx: Fix incorrect tcm_qla2xxx_free_cmd
 use during TMR ABORT"

This reverts commit 5f572526a18418258bfa137e3353656c25439500.

As reported by Pascal here:

http://www.spinics.net/lists/target-devel/msg15808.html

there still appears to be another issue related to this change
to drop the original bogus tcm_qla2xxx_free_cmd() usage from
tcm_qla2xxx_handle_data_work() and tcm_qla2xxx_aborted_task().

So revert this for now, until Pascal can verify with further
debug in place to understand what's going on.

Reported-by: Pascal de Bruijn <p.debruijn@unilogic.nl>
Cc: Pascal de Bruijn <p.debruijn@unilogic.nl>
Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Quinn Tran <quinn.tran@cavium.com>
Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c  |  4 ----
 drivers/scsi/qla2xxx/qla_target.h  |  1 -
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 36 +++++++++++++++++++++++++++---
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 8f8ece9008011..401e245477d4c 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -4079,8 +4079,6 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
 	cmd = &((struct qla_tgt_cmd *)se_sess->sess_cmd_map)[tag];
 	memset(cmd, 0, sizeof(struct qla_tgt_cmd));
 
-	init_completion(&cmd->write_pending_abort_comp);
-
 	memcpy(&cmd->atio, atio, sizeof(*atio));
 	cmd->state = QLA_TGT_STATE_NEW;
 	cmd->tgt = vha->vha_tgt.qla_tgt;
@@ -5085,8 +5083,6 @@ qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
 
 	qlt_incr_num_pend_cmds(vha);
 	INIT_LIST_HEAD(&cmd->cmd_list);
-	init_completion(&cmd->write_pending_abort_comp);
-
 	memcpy(&cmd->atio, atio, sizeof(*atio));
 
 	cmd->tgt = vha->vha_tgt.qla_tgt;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 939e93c5d3aef..d64420251194e 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -868,7 +868,6 @@ struct qla_tgt_cmd {
 	unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER];
 
 	spinlock_t cmd_lock;
-	struct completion write_pending_abort_comp;
 	/* to save extra sess dereferences */
 	unsigned int conf_compl_supported:1;
 	unsigned int sg_mapped:1;
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 8c1bf9b14bb2f..75aeb9fdfd06e 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -415,7 +415,6 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd)
 
 static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd)
 {
-	struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd);
 	unsigned long flags;
 	/*
 	 * Check for WRITE_PENDING status to determine if we need to wait for
@@ -425,7 +424,8 @@ static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd)
 	if (se_cmd->t_state == TRANSPORT_WRITE_PENDING ||
 	    se_cmd->t_state == TRANSPORT_COMPLETE_QF_WP) {
 		spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
-		wait_for_completion(&cmd->write_pending_abort_comp);
+		wait_for_completion_timeout(&se_cmd->t_transport_stop_comp,
+						50);
 		return 0;
 	}
 	spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
@@ -501,6 +501,7 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
 static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 {
 	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
+	unsigned long flags;
 
 	/*
 	 * Ensure that the complete FCP WRITE payload has been received.
@@ -508,6 +509,17 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 	 */
 	cmd->cmd_in_wq = 0;
 
+	spin_lock_irqsave(&cmd->cmd_lock, flags);
+	cmd->data_work = 1;
+	if (cmd->aborted) {
+		cmd->data_work_free = 1;
+		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+
+		tcm_qla2xxx_free_cmd(cmd);
+		return;
+	}
+	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+
 	cmd->vha->tgt_counters.qla_core_ret_ctio++;
 	if (!cmd->write_data_transferred) {
 		/*
@@ -515,7 +527,7 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 		 * waiting upon completion in tcm_qla2xxx_write_pending_status()
 		 */
 		if (cmd->se_cmd.transport_state & CMD_T_ABORTED) {
-			complete(&cmd->write_pending_abort_comp);
+			complete(&cmd->se_cmd.t_transport_stop_comp);
 			return;
 		}
 
@@ -741,13 +753,31 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd)
 	qlt_xmit_tm_rsp(mcmd);
 }
 
+#define DATA_WORK_NOT_FREE(_cmd) (_cmd->data_work && !_cmd->data_work_free)
 static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 {
 	struct qla_tgt_cmd *cmd = container_of(se_cmd,
 				struct qla_tgt_cmd, se_cmd);
+	unsigned long flags;
 
 	if (qlt_abort_cmd(cmd))
 		return;
+
+	spin_lock_irqsave(&cmd->cmd_lock, flags);
+	if ((cmd->state == QLA_TGT_STATE_NEW)||
+	    ((cmd->state == QLA_TGT_STATE_DATA_IN) &&
+		DATA_WORK_NOT_FREE(cmd))) {
+		cmd->data_work_free = 1;
+		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+		/*
+		 * cmd has not reached fw, Use this trigger to free it.
+		 */
+		tcm_qla2xxx_free_cmd(cmd);
+		return;
+	}
+	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+	return;
+
 }
 
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,
-- 
GitLab


From 138d351eefb727ab9e41a3dc5f112ceb4f6e59f2 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 7 Jul 2017 14:45:49 -0700
Subject: [PATCH 1201/1958] iscsi-target: Add login_keys_workaround attribute
 for non RFC initiators

This patch re-introduces part of a long standing login workaround that
was recently dropped by:

  commit 1c99de981f30b3e7868b8d20ce5479fa1c0fea46
  Author: Nicholas Bellinger <nab@linux-iscsi.org>
  Date:   Sun Apr 2 13:36:44 2017 -0700

      iscsi-target: Drop work-around for legacy GlobalSAN initiator

Namely, the workaround for FirstBurstLength ended up being required by
Mellanox Flexboot PXE boot ROMs as reported by Robert.

So this patch re-adds the work-around for FirstBurstLength within
iscsi_check_proposer_for_optional_reply(), and makes the key optional
to respond when the initiator does not propose, nor respond to it.

Also as requested by Arun, this patch introduces a new TPG attribute
named 'login_keys_workaround' that controls the use of both the
FirstBurstLength workaround, as well as the two other existing
workarounds for gPXE iSCSI boot client.

By default, the workaround is enabled with login_keys_workaround=1,
since Mellanox FlexBoot requires it, and Arun has verified the Qlogic
MSFT initiator already proposes FirstBurstLength, so it's uneffected
by this re-adding this part of the original work-around.

Reported-by: Robert LeBlanc <robert@leblancnet.us>
Cc: Robert LeBlanc <robert@leblancnet.us>
Reviewed-by: Arun Easi <arun.easi@cavium.com>
Cc: <stable@vger.kernel.org> # 3.1+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_configfs.c  |  2 +
 drivers/target/iscsi/iscsi_target_nego.c      |  6 ++-
 .../target/iscsi/iscsi_target_parameters.c    | 41 +++++++++++++------
 .../target/iscsi/iscsi_target_parameters.h    |  2 +-
 drivers/target/iscsi/iscsi_target_tpg.c       | 19 +++++++++
 drivers/target/iscsi/iscsi_target_tpg.h       |  1 +
 include/target/iscsi/iscsi_target_core.h      |  9 ++++
 7 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 535a8e06a4010..0dd4c45f7575a 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -781,6 +781,7 @@ DEF_TPG_ATTRIB(default_erl);
 DEF_TPG_ATTRIB(t10_pi);
 DEF_TPG_ATTRIB(fabric_prot_type);
 DEF_TPG_ATTRIB(tpg_enabled_sendtargets);
+DEF_TPG_ATTRIB(login_keys_workaround);
 
 static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
 	&iscsi_tpg_attrib_attr_authentication,
@@ -796,6 +797,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
 	&iscsi_tpg_attrib_attr_t10_pi,
 	&iscsi_tpg_attrib_attr_fabric_prot_type,
 	&iscsi_tpg_attrib_attr_tpg_enabled_sendtargets,
+	&iscsi_tpg_attrib_attr_login_keys_workaround,
 	NULL,
 };
 
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 96df63f1f795e..7a6751fecd323 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -864,7 +864,8 @@ static int iscsi_target_handle_csg_zero(
 			SENDER_TARGET,
 			login->rsp_buf,
 			&login->rsp_length,
-			conn->param_list);
+			conn->param_list,
+			conn->tpg->tpg_attrib.login_keys_workaround);
 	if (ret < 0)
 		return -1;
 
@@ -934,7 +935,8 @@ static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_log
 			SENDER_TARGET,
 			login->rsp_buf,
 			&login->rsp_length,
-			conn->param_list);
+			conn->param_list,
+			conn->tpg->tpg_attrib.login_keys_workaround);
 	if (ret < 0) {
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
 				ISCSI_LOGIN_STATUS_INIT_ERR);
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index fce627628200c..caab1045742df 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -765,7 +765,8 @@ static int iscsi_check_for_auth_key(char *key)
 	return 0;
 }
 
-static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
+static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param,
+						    bool keys_workaround)
 {
 	if (IS_TYPE_BOOL_AND(param)) {
 		if (!strcmp(param->value, NO))
@@ -773,19 +774,31 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
 	} else if (IS_TYPE_BOOL_OR(param)) {
 		if (!strcmp(param->value, YES))
 			SET_PSTATE_REPLY_OPTIONAL(param);
-		 /*
-		  * Required for gPXE iSCSI boot client
-		  */
-		if (!strcmp(param->name, IMMEDIATEDATA))
-			SET_PSTATE_REPLY_OPTIONAL(param);
+
+		if (keys_workaround) {
+			/*
+			 * Required for gPXE iSCSI boot client
+			 */
+			if (!strcmp(param->name, IMMEDIATEDATA))
+				SET_PSTATE_REPLY_OPTIONAL(param);
+		}
 	} else if (IS_TYPE_NUMBER(param)) {
 		if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
 			SET_PSTATE_REPLY_OPTIONAL(param);
-		/*
-		 * Required for gPXE iSCSI boot client
-		 */
-		if (!strcmp(param->name, MAXCONNECTIONS))
-			SET_PSTATE_REPLY_OPTIONAL(param);
+
+		if (keys_workaround) {
+			/*
+			 * Required for Mellanox Flexboot PXE boot ROM
+			 */
+			if (!strcmp(param->name, FIRSTBURSTLENGTH))
+				SET_PSTATE_REPLY_OPTIONAL(param);
+
+			/*
+			 * Required for gPXE iSCSI boot client
+			 */
+			if (!strcmp(param->name, MAXCONNECTIONS))
+				SET_PSTATE_REPLY_OPTIONAL(param);
+		}
 	} else if (IS_PHASE_DECLARATIVE(param))
 		SET_PSTATE_REPLY_OPTIONAL(param);
 }
@@ -1422,7 +1435,8 @@ int iscsi_encode_text_output(
 	u8 sender,
 	char *textbuf,
 	u32 *length,
-	struct iscsi_param_list *param_list)
+	struct iscsi_param_list *param_list,
+	bool keys_workaround)
 {
 	char *output_buf = NULL;
 	struct iscsi_extra_response *er;
@@ -1458,7 +1472,8 @@ int iscsi_encode_text_output(
 			*length += 1;
 			output_buf = textbuf + *length;
 			SET_PSTATE_PROPOSER(param);
-			iscsi_check_proposer_for_optional_reply(param);
+			iscsi_check_proposer_for_optional_reply(param,
+							        keys_workaround);
 			pr_debug("Sending key: %s=%s\n",
 				param->name, param->value);
 		}
diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h
index 9962ccf0ccd7d..c47b73f575283 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.h
+++ b/drivers/target/iscsi/iscsi_target_parameters.h
@@ -46,7 +46,7 @@ extern int iscsi_extract_key_value(char *, char **, char **);
 extern int iscsi_update_param_value(struct iscsi_param *, char *);
 extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_conn *);
 extern int iscsi_encode_text_output(u8, u8, char *, u32 *,
-			struct iscsi_param_list *);
+			struct iscsi_param_list *, bool);
 extern int iscsi_check_negotiated_keys(struct iscsi_param_list *);
 extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *,
 			struct iscsi_param_list *);
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index abaabbaebd881..594d07a1e995e 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -227,6 +227,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg)
 	a->t10_pi = TA_DEFAULT_T10_PI;
 	a->fabric_prot_type = TA_DEFAULT_FABRIC_PROT_TYPE;
 	a->tpg_enabled_sendtargets = TA_DEFAULT_TPG_ENABLED_SENDTARGETS;
+	a->login_keys_workaround = TA_DEFAULT_LOGIN_KEYS_WORKAROUND;
 }
 
 int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg)
@@ -895,3 +896,21 @@ int iscsit_ta_tpg_enabled_sendtargets(
 
 	return 0;
 }
+
+int iscsit_ta_login_keys_workaround(
+	struct iscsi_portal_group *tpg,
+	u32 flag)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if ((flag != 0) && (flag != 1)) {
+		pr_err("Illegal value %d\n", flag);
+		return -EINVAL;
+	}
+
+	a->login_keys_workaround = flag;
+	pr_debug("iSCSI_TPG[%hu] - TPG enabled bit for login keys workaround: %s ",
+		tpg->tpgt, (a->login_keys_workaround) ? "ON" : "OFF");
+
+	return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h
index ceba298511677..59fd3cabe89d0 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.h
+++ b/drivers/target/iscsi/iscsi_target_tpg.h
@@ -48,5 +48,6 @@ extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_fabric_prot_type(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_tpg_enabled_sendtargets(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_login_keys_workaround(struct iscsi_portal_group *, u32);
 
 #endif /* ISCSI_TARGET_TPG_H */
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 7948fc68286b3..0ca1fb08805b2 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -66,6 +66,14 @@ struct sock;
 #define TA_DEFAULT_FABRIC_PROT_TYPE	0
 /* TPG status needs to be enabled to return sendtargets discovery endpoint info */
 #define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1
+/*
+ * Used to control the sending of keys with optional to respond state bit,
+ * as a workaround for non RFC compliant initiators,that do not propose,
+ * nor respond to specific keys required for login to complete.
+ *
+ * See iscsi_check_proposer_for_optional_reply() for more details.
+ */
+#define TA_DEFAULT_LOGIN_KEYS_WORKAROUND 1
 
 #define ISCSI_IOV_DATA_BUFFER		5
 
@@ -768,6 +776,7 @@ struct iscsi_tpg_attrib {
 	u8			t10_pi;
 	u32			fabric_prot_type;
 	u32			tpg_enabled_sendtargets;
+	u32			login_keys_workaround;
 	struct iscsi_portal_group *tpg;
 };
 
-- 
GitLab


From dea1d0f5f1284e3defee4b8484d9fc230686cd42 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 11 Jul 2017 22:06:24 +0200
Subject: [PATCH 1202/1958] smp/hotplug: Replace BUG_ON and react useful

The move of the unpark functions to the control thread moved the BUG_ON()
there as well. While it made some sense in the idle thread of the upcoming
CPU, it's bogus to crash the control thread on the already online CPU,
especially as the function has a return value and the callsite is prepared
to handle an error return.

Replace it with a WARN_ON_ONCE() and return a proper error code.

Fixes: 9cd4f1a4e7a8 ("smp/hotplug: Move unparking of percpu threads to the control CPU")
Rightfully-ranted-at-by: Linux Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/cpu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index ab860453841df..eee0331342628 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -279,7 +279,8 @@ static int bringup_wait_for_ap(unsigned int cpu)
 
 	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
 	wait_for_completion(&st->done);
-	BUG_ON(!cpu_online(cpu));
+	if (WARN_ON_ONCE((!cpu_online(cpu))))
+		return -ECANCELED;
 
 	/* Unpark the stopper thread and the hotplug thread of the target cpu */
 	stop_machine_unpark(cpu);
-- 
GitLab


From 63c3aa6b68782ee5ddb6588d95aacf1ab33e528b Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 Jul 2017 09:46:33 +0200
Subject: [PATCH 1203/1958] net: stmmac: Fix error handling path in
 'alloc_dma_rx_desc_resources()'

If the first 'kmalloc_array' within the loop fails, we should free what
as already been allocated, as done in all other error handling path.

Fixes: 54139cf3bb33 ("net: stmmac: adding multiple buffers for rx")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 19bba6281dab7..4322fa4a13e8e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1498,7 +1498,7 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 						    sizeof(dma_addr_t),
 						    GFP_KERNEL);
 		if (!rx_q->rx_skbuff_dma)
-			return -ENOMEM;
+			goto err_dma;
 
 		rx_q->rx_skbuff = kmalloc_array(DMA_RX_SIZE,
 						sizeof(struct sk_buff *),
-- 
GitLab


From 6941f7cc71a719089f7add4afc82bb80fc5a281d Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 Jul 2017 09:46:43 +0200
Subject: [PATCH 1204/1958] net: stmmac: Fix error handling path in
 'alloc_dma_tx_desc_resources()'

If the first 'kmalloc_array' within the loop fails, we should free what
as already been allocated, as done in all other error handling path.

Fixes: ce736788e8a9 ("net: stmmac: adding multiple buffers for TX")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4322fa4a13e8e..07d486a70118d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1561,7 +1561,7 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 						    sizeof(*tx_q->tx_skbuff_dma),
 						    GFP_KERNEL);
 		if (!tx_q->tx_skbuff_dma)
-			return -ENOMEM;
+			goto err_dma_buffers;
 
 		tx_q->tx_skbuff = kmalloc_array(DMA_TX_SIZE,
 						sizeof(struct sk_buff *),
-- 
GitLab


From 6224226030da93c062621527fd0070be3f718a88 Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 Jul 2017 09:46:54 +0200
Subject: [PATCH 1205/1958] net: stmmac: Make
 'alloc_dma_[rt]x_desc_resources()' look even closer

'alloc_dma_[rt]x_desc_resources()' functions look very close.
Remove a useless initialization and use the same label name for error
handling path in order to get them even closer.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 07d486a70118d..1853f7ff66575 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1449,7 +1449,7 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
 static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
 {
 	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 queue = 0;
+	u32 queue;
 
 	/* Free TX queue resources */
 	for (queue = 0; queue < tx_count; queue++) {
@@ -1561,13 +1561,13 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 						    sizeof(*tx_q->tx_skbuff_dma),
 						    GFP_KERNEL);
 		if (!tx_q->tx_skbuff_dma)
-			goto err_dma_buffers;
+			goto err_dma;
 
 		tx_q->tx_skbuff = kmalloc_array(DMA_TX_SIZE,
 						sizeof(struct sk_buff *),
 						GFP_KERNEL);
 		if (!tx_q->tx_skbuff)
-			goto err_dma_buffers;
+			goto err_dma;
 
 		if (priv->extend_desc) {
 			tx_q->dma_etx = dma_zalloc_coherent(priv->device,
@@ -1577,7 +1577,7 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 							    &tx_q->dma_tx_phy,
 							    GFP_KERNEL);
 			if (!tx_q->dma_etx)
-				goto err_dma_buffers;
+				goto err_dma;
 		} else {
 			tx_q->dma_tx = dma_zalloc_coherent(priv->device,
 							   DMA_TX_SIZE *
@@ -1586,13 +1586,13 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 							   &tx_q->dma_tx_phy,
 							   GFP_KERNEL);
 			if (!tx_q->dma_tx)
-				goto err_dma_buffers;
+				goto err_dma;
 		}
 	}
 
 	return 0;
 
-err_dma_buffers:
+err_dma:
 	free_dma_tx_desc_resources(priv);
 
 	return ret;
-- 
GitLab


From 5e34fa23ced16f03fe163d3f31fddcae91c3f2fc Mon Sep 17 00:00:00 2001
From: Ahmad Fatoum <ahmad@a3f.at>
Date: Sat, 8 Jul 2017 21:28:44 +0200
Subject: [PATCH 1206/1958] net: Fix minor code bug in timestamping.txt

Passing (void*)val instead of &val would make a pointer out of an integer
and cause sock_setsockopt to -EFAULT.

See tools/testing/selftests/networking/timestamping/timestamping.c
for a working example.

Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Signed-off-by: Ahmad Fatoum <ahmad@a3f.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 196ba17cc344f..1be0b6f9e0cb2 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -44,8 +44,7 @@ timeval of SO_TIMESTAMP (ms).
 Supports multiple types of timestamp requests. As a result, this
 socket option takes a bitmap of flags, not a boolean. In
 
-  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val,
-                   sizeof(val));
+  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
 
 val is an integer with any of the following bits set. Setting other
 bit returns EINVAL and does not change the current state.
@@ -249,8 +248,7 @@ setsockopt to receive timestamps:
 
   __u32 val = SOF_TIMESTAMPING_SOFTWARE |
 	      SOF_TIMESTAMPING_OPT_ID /* or any other flag */;
-  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val,
-                   sizeof(val));
+  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
 
 
 1.4 Bytestream Timestamps
-- 
GitLab


From 91d1ae475b9833097e078c2581c9265d033cdbe4 Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Sun, 9 Jul 2017 13:00:16 +0300
Subject: [PATCH 1207/1958] qed: Fix printk option passed when printing ipv6
 addresses

The option "h" (host order ) exists for ipv4 only.
Remove the h when printing ipv6 addresses.

Lead to the following smatch warning:

drivers/net/ethernet/qlogic/qed/qed_iwarp.c:585 qed_iwarp_print_tcp_ramrod()
warn: '%pI6' can only be followed by c
drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1521 qed_iwarp_print_cm_info()
warn: '%pI6' can only be followed by c

Fixes commit 456a584947d5 ("qed: iWARP CM add passive side connect")

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index b251ebaec4db3..9d989c96278c0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -575,7 +575,7 @@ qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn,
 
 	if (p_tcp_ramrod->tcp.ip_version == TCP_IPV4) {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
-			   "local_ip=%pI4h:%x, remote_ip=%pI4h%x, vlan=%x\n",
+			   "local_ip=%pI4h:%x, remote_ip=%pI4h:%x, vlan=%x\n",
 			   p_tcp_ramrod->tcp.local_ip,
 			   p_tcp_ramrod->tcp.local_port,
 			   p_tcp_ramrod->tcp.remote_ip,
@@ -583,7 +583,7 @@ qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn,
 			   p_tcp_ramrod->tcp.vlan_id);
 	} else {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
-			   "local_ip=%pI6h:%x, remote_ip=%pI6h:%x, vlan=%x\n",
+			   "local_ip=%pI6:%x, remote_ip=%pI6:%x, vlan=%x\n",
 			   p_tcp_ramrod->tcp.local_ip,
 			   p_tcp_ramrod->tcp.local_port,
 			   p_tcp_ramrod->tcp.remote_ip,
@@ -1519,7 +1519,7 @@ qed_iwarp_print_cm_info(struct qed_hwfn *p_hwfn,
 			   cm_info->vlan);
 	else
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
-			   "remote_ip %pI6h:%x, local_ip %pI6h:%x vlan=%x\n",
+			   "remote_ip %pI6:%x, local_ip %pI6:%x vlan=%x\n",
 			   cm_info->remote_ip, cm_info->remote_port,
 			   cm_info->local_ip, cm_info->local_port,
 			   cm_info->vlan);
-- 
GitLab


From 6a146f3a5894b751cef16feb3d7903e45e3c445c Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Mon, 10 Jul 2017 10:55:46 -0300
Subject: [PATCH 1208/1958] cxgb4: fix BUG() on interrupt deallocating path of
 ULD

Since the introduction of ULD (Upper-Layer Drivers), the MSI-X
deallocating path changed in cxgb4: the driver frees the interrupts
of ULD when unregistering it or on shutdown PCI handler.

Problem is that if a MSI-X is not freed before deallocated in the PCI
layer, it will trigger a BUG() due to still "alive" interrupt being
tentatively quiesced.

The below trace was observed when doing a simple unbind of Chelsio's
adapter PCI function, like:
  "echo 001e:80:00.4 > /sys/bus/pci/drivers/cxgb4/unbind"

Trace:

  kernel BUG at drivers/pci/msi.c:352!
  Oops: Exception in kernel mode, sig: 5 [#1]
  ...
  NIP [c0000000005a5e60] free_msi_irqs+0xa0/0x250
  LR [c0000000005a5e50] free_msi_irqs+0x90/0x250
  Call Trace:
  [c0000000005a5e50] free_msi_irqs+0x90/0x250 (unreliable)
  [c0000000005a72c4] pci_disable_msix+0x124/0x180
  [d000000011e06708] disable_msi+0x88/0xb0 [cxgb4]
  [d000000011e06948] free_some_resources+0xa8/0x160 [cxgb4]
  [d000000011e06d60] remove_one+0x170/0x3c0 [cxgb4]
  [c00000000058a910] pci_device_remove+0x70/0x110
  [c00000000064ef04] device_release_driver_internal+0x1f4/0x2c0
  ...

This patch fixes the issue by refactoring the shutdown path of ULD on
cxgb4 driver, by properly freeing and disabling interrupts on PCI
remove handler too.

Fixes: 0fbc81b3ad51 ("Allocate resources dynamically for all cxgb4 ULD's")
Reported-by: Harsha Thyagaraja <hathyaga@in.ibm.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   | 16 ++++---
 .../net/ethernet/chelsio/cxgb4/cxgb4_uld.c    | 42 +++++++++++--------
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 86f92e31e8aa6..e403fa18f1b15 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2083,12 +2083,12 @@ static void detach_ulds(struct adapter *adap)
 
 	mutex_lock(&uld_mutex);
 	list_del(&adap->list_node);
+
 	for (i = 0; i < CXGB4_ULD_MAX; i++)
-		if (adap->uld && adap->uld[i].handle) {
+		if (adap->uld && adap->uld[i].handle)
 			adap->uld[i].state_change(adap->uld[i].handle,
 					     CXGB4_STATE_DETACH);
-			adap->uld[i].handle = NULL;
-		}
+
 	if (netevent_registered && list_empty(&adapter_list)) {
 		unregister_netevent_notifier(&cxgb4_netevent_nb);
 		netevent_registered = false;
@@ -5303,8 +5303,10 @@ static void remove_one(struct pci_dev *pdev)
 		 */
 		destroy_workqueue(adapter->workq);
 
-		if (is_uld(adapter))
+		if (is_uld(adapter)) {
 			detach_ulds(adapter);
+			t4_uld_clean_up(adapter);
+		}
 
 		disable_interrupts(adapter);
 
@@ -5385,7 +5387,11 @@ static void shutdown_one(struct pci_dev *pdev)
 			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
 				cxgb_close(adapter->port[i]);
 
-		t4_uld_clean_up(adapter);
+		if (is_uld(adapter)) {
+			detach_ulds(adapter);
+			t4_uld_clean_up(adapter);
+		}
+
 		disable_interrupts(adapter);
 		disable_msi(adapter);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index ec53fe9dec688..71a315bc14097 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -589,22 +589,37 @@ void t4_uld_mem_free(struct adapter *adap)
 	kfree(adap->uld);
 }
 
+/* This function should be called with uld_mutex taken. */
+static void cxgb4_shutdown_uld_adapter(struct adapter *adap, enum cxgb4_uld type)
+{
+	if (adap->uld[type].handle) {
+		adap->uld[type].handle = NULL;
+		adap->uld[type].add = NULL;
+		release_sge_txq_uld(adap, type);
+
+		if (adap->flags & FULL_INIT_DONE)
+			quiesce_rx_uld(adap, type);
+
+		if (adap->flags & USING_MSIX)
+			free_msix_queue_irqs_uld(adap, type);
+
+		free_sge_queues_uld(adap, type);
+		free_queues_uld(adap, type);
+	}
+}
+
 void t4_uld_clean_up(struct adapter *adap)
 {
 	unsigned int i;
 
-	if (!adap->uld)
-		return;
+	mutex_lock(&uld_mutex);
 	for (i = 0; i < CXGB4_ULD_MAX; i++) {
 		if (!adap->uld[i].handle)
 			continue;
-		if (adap->flags & FULL_INIT_DONE)
-			quiesce_rx_uld(adap, i);
-		if (adap->flags & USING_MSIX)
-			free_msix_queue_irqs_uld(adap, i);
-		free_sge_queues_uld(adap, i);
-		free_queues_uld(adap, i);
+
+		cxgb4_shutdown_uld_adapter(adap, i);
 	}
+	mutex_unlock(&uld_mutex);
 }
 
 static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
@@ -783,15 +798,8 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
 			continue;
 		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
 			continue;
-		adap->uld[type].handle = NULL;
-		adap->uld[type].add = NULL;
-		release_sge_txq_uld(adap, type);
-		if (adap->flags & FULL_INIT_DONE)
-			quiesce_rx_uld(adap, type);
-		if (adap->flags & USING_MSIX)
-			free_msix_queue_irqs_uld(adap, type);
-		free_sge_queues_uld(adap, type);
-		free_queues_uld(adap, type);
+
+		cxgb4_shutdown_uld_adapter(adap, type);
 	}
 	mutex_unlock(&uld_mutex);
 
-- 
GitLab


From ffa423fb3251f8737303ffc3b0659e86e501808e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 10 Jul 2017 10:05:50 -0700
Subject: [PATCH 1209/1958] tap: convert a mutex to a spinlock

We are not allowed to block on the RCU reader side, so can't
just hold the mutex as before. As a quick fix, convert it to
a spinlock.

Fixes: d9f1f61c0801 ("tap: Extending tap device create/destroy APIs")
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Sainath Grandhi <sainath.grandhi@intel.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tap.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 9af3239d6ad52..3570c75769931 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -106,7 +106,7 @@ struct major_info {
 	struct rcu_head rcu;
 	dev_t major;
 	struct idr minor_idr;
-	struct mutex minor_lock;
+	spinlock_t minor_lock;
 	const char *device_name;
 	struct list_head next;
 };
@@ -416,15 +416,15 @@ int tap_get_minor(dev_t major, struct tap_dev *tap)
 		goto unlock;
 	}
 
-	mutex_lock(&tap_major->minor_lock);
-	retval = idr_alloc(&tap_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL);
+	spin_lock(&tap_major->minor_lock);
+	retval = idr_alloc(&tap_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_ATOMIC);
 	if (retval >= 0) {
 		tap->minor = retval;
 	} else if (retval == -ENOSPC) {
 		netdev_err(tap->dev, "Too many tap devices\n");
 		retval = -EINVAL;
 	}
-	mutex_unlock(&tap_major->minor_lock);
+	spin_unlock(&tap_major->minor_lock);
 
 unlock:
 	rcu_read_unlock();
@@ -442,12 +442,12 @@ void tap_free_minor(dev_t major, struct tap_dev *tap)
 		goto unlock;
 	}
 
-	mutex_lock(&tap_major->minor_lock);
+	spin_lock(&tap_major->minor_lock);
 	if (tap->minor) {
 		idr_remove(&tap_major->minor_idr, tap->minor);
 		tap->minor = 0;
 	}
-	mutex_unlock(&tap_major->minor_lock);
+	spin_unlock(&tap_major->minor_lock);
 
 unlock:
 	rcu_read_unlock();
@@ -467,13 +467,13 @@ static struct tap_dev *dev_get_by_tap_file(int major, int minor)
 		goto unlock;
 	}
 
-	mutex_lock(&tap_major->minor_lock);
+	spin_lock(&tap_major->minor_lock);
 	tap = idr_find(&tap_major->minor_idr, minor);
 	if (tap) {
 		dev = tap->dev;
 		dev_hold(dev);
 	}
-	mutex_unlock(&tap_major->minor_lock);
+	spin_unlock(&tap_major->minor_lock);
 
 unlock:
 	rcu_read_unlock();
@@ -1244,7 +1244,7 @@ static int tap_list_add(dev_t major, const char *device_name)
 	tap_major->major = MAJOR(major);
 
 	idr_init(&tap_major->minor_idr);
-	mutex_init(&tap_major->minor_lock);
+	spin_lock_init(&tap_major->minor_lock);
 
 	tap_major->device_name = device_name;
 
-- 
GitLab


From b11fb73743fc406204e0749ead18560aeda8b136 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 11 Jul 2017 15:43:24 -0400
Subject: [PATCH 1210/1958] tracing: Fixup trace file header alignment

The addition of TGID to the tracing header added a check to see if TGID
shoudl be displayed or not, and updated the header accordingly.
Unfortunately, it broke the default header.

Also add constant strings to use for spacing. This does remove the
visibility of the header a bit, but cuts it down from the extended lines
much greater than 80 characters.

Before this change:

 # tracer: function
 #
 #                            _-----=> irqs-off
 #                           / _----=> need-resched
 #                          | / _---=> hardirq/softirq
 #                          || / _--=> preempt-depth
 #                          ||| /     delay
 #           TASK-PID   CPU#||||    TIMESTAMP  FUNCTION
 #              | |       | ||||       |         |
        swapper/0-1     [000] ....     0.277830: migration_init <-do_one_initcall
        swapper/0-1     [002] d...    13.861967: Unknown type 1201
        swapper/0-1     [002] d..1    13.861970: Unknown type 1202

After this change:

 # tracer: function
 #
 #                              _-----=> irqs-off
 #                             / _----=> need-resched
 #                            | / _---=> hardirq/softirq
 #                            || / _--=> preempt-depth
 #                            ||| /     delay
 #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
 #              | |       |   ||||       |         |
        swapper/0-1     [000] ....     0.278245: migration_init <-do_one_initcall
        swapper/0-1     [003] d...    13.861189: Unknown type 1201
        swapper/0-1     [003] d..1    13.861192: Unknown type 1202

Cc: Joel Fernandes <joelaf@google.com>
Fixes: 441dae8f2f29 ("tracing: Add support for display of tgid in trace output")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 92af8fd1429b5..dabd810a10cd5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3358,14 +3358,23 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
 				       unsigned int flags)
 {
 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
-
-	seq_printf(m, "#                          %s  _-----=> irqs-off\n",	    tgid ? "          " : "");
-	seq_printf(m, "#                          %s / _----=> need-resched\n",	    tgid ? "          " : "");
-	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",  tgid ? "          " : "");
-	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",    tgid ? "          " : "");
-	seq_printf(m, "#                          %s||| /     delay\n",		    tgid ? "          " : "");
-	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
-	seq_printf(m, "#              | |       | %s||||       |         |\n",	    tgid ? "     |    " : "");
+	const char tgid_space[] = "          ";
+	const char space[] = "  ";
+
+	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#                          %s / _----=> need-resched\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#                          %s||| /     delay\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
+		   tgid ? "   TGID   " : space);
+	seq_printf(m, "#              | |       | %s||||       |         |\n",
+		   tgid ? "     |    " : space);
 }
 
 void
-- 
GitLab


From dc8d387210e3e2ab294031e8f6542329bc9141c4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 11 Jul 2017 18:41:12 -0400
Subject: [PATCH 1211/1958] tracing: Update Documentation/trace/ftrace.txt

The documentation of ftrace.txt has become rather outdated. Bring it closer
to reality of todays kernel.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/ftrace.txt | 508 +++++++++++++++++++++++++--------
 1 file changed, 396 insertions(+), 112 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 94a987bd2bc56..a0bbf5fabe98f 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -5,10 +5,11 @@ Copyright 2008 Red Hat Inc.
    Author:   Steven Rostedt <srostedt@redhat.com>
   License:   The GNU Free Documentation License, Version 1.2
                (dual licensed under the GPL v2)
-Reviewers:   Elias Oltmanns, Randy Dunlap, Andrew Morton,
-	     John Kacur, and David Teigland.
+Original Reviewers:   Elias Oltmanns, Randy Dunlap, Andrew Morton,
+		      John Kacur, and David Teigland.
 Written for: 2.6.28-rc2
 Updated for: 3.10
+Updated for: 4.13 - Copyright 2017 VMware Inc. Steven Rostedt
 
 Introduction
 ------------
@@ -26,9 +27,11 @@ a task is woken to the task is actually scheduled in.
 
 One of the most common uses of ftrace is the event tracing.
 Through out the kernel is hundreds of static event points that
-can be enabled via the debugfs file system to see what is
+can be enabled via the tracefs file system to see what is
 going on in certain parts of the kernel.
 
+See events.txt for more information.
+
 
 Implementation Details
 ----------------------
@@ -39,34 +42,47 @@ See ftrace-design.txt for details for arch porters and such.
 The File System
 ---------------
 
-Ftrace uses the debugfs file system to hold the control files as
+Ftrace uses the tracefs file system to hold the control files as
 well as the files to display output.
 
-When debugfs is configured into the kernel (which selecting any ftrace
-option will do) the directory /sys/kernel/debug will be created. To mount
+When tracefs is configured into the kernel (which selecting any ftrace
+option will do) the directory /sys/kernel/tracing will be created. To mount
 this directory, you can add to your /etc/fstab file:
 
- debugfs       /sys/kernel/debug          debugfs defaults        0       0
+ tracefs       /sys/kernel/tracing       tracefs defaults        0       0
 
 Or you can mount it at run time with:
 
- mount -t debugfs nodev /sys/kernel/debug
+ mount -t tracefs nodev /sys/kernel/tracing
 
 For quicker access to that directory you may want to make a soft link to
 it:
 
- ln -s /sys/kernel/debug /debug
+ ln -s /sys/kernel/tracing /tracing
+
+     *** NOTICE ***
+
+Before 4.1, all ftrace tracing control files were within the debugfs
+file system, which is typically located at /sys/kernel/debug/tracing.
+For backward compatibility, when mounting the debugfs file system,
+the tracefs file system will be automatically mounted at:
+
+ /sys/kernel/debug/tracing
 
-Any selected ftrace option will also create a directory called tracing
-within the debugfs. The rest of the document will assume that you are in
-the ftrace directory (cd /sys/kernel/debug/tracing) and will only concentrate
-on the files within that directory and not distract from the content with
-the extended "/sys/kernel/debug/tracing" path name.
+All files located in the tracefs file system will be located in that
+debugfs file system directory as well.
+
+     *** NOTICE ***
+
+Any selected ftrace option will also create the tracefs file system.
+The rest of the document will assume that you are in the ftrace directory
+(cd /sys/kernel/tracing) and will only concentrate on the files within that
+directory and not distract from the content with the extended
+"/sys/kernel/tracing" path name.
 
 That's it! (assuming that you have ftrace configured into your kernel)
 
-After mounting debugfs, you can see a directory called
-"tracing".  This directory contains the control and output files
+After mounting tracefs you will have access to the control and output files
 of ftrace. Here is a list of some of the key files:
 
 
@@ -92,10 +108,20 @@ of ftrace. Here is a list of some of the key files:
 	writing to the ring buffer, the tracing overhead may
 	still be occurring.
 
+	The kernel function tracing_off() can be used within the
+	kernel to disable writing to the ring buffer, which will
+	set this file to "0". User space can re-enable tracing by
+	echoing "1" into the file.
+
+	Note, the function and event trigger "traceoff" will also
+	set this file to zero and stop tracing. Which can also
+	be re-enabled by user space using this file.
+
   trace:
 
 	This file holds the output of the trace in a human
-	readable format (described below).
+	readable format (described below). Note, tracing is temporarily
+	disabled while this file is being read (opened).
 
   trace_pipe:
 
@@ -109,7 +135,8 @@ of ftrace. Here is a list of some of the key files:
 	will not be read again with a sequential read. The
 	"trace" file is static, and if the tracer is not
 	adding more data, it will display the same
-	information every time it is read.
+	information every time it is read. This file will not
+	disable tracing while being read.
 
   trace_options:
 
@@ -128,12 +155,14 @@ of ftrace. Here is a list of some of the key files:
   tracing_max_latency:
 
 	Some of the tracers record the max latency.
-	For example, the time interrupts are disabled.
-	This time is saved in this file. The max trace
-	will also be stored, and displayed by "trace".
-	A new max trace will only be recorded if the
-	latency is greater than the value in this
-	file. (in microseconds)
+	For example, the maximum time that interrupts are disabled.
+	The maximum time is saved in this file. The max trace will also be
+	stored,	and displayed by "trace". A new max trace will only be
+	recorded if the latency is greater than the value in this file
+	(in microseconds).
+
+	By echoing in a time into this file, no latency will be recorded
+	unless it is greater than the time in this file.
 
   tracing_thresh:
 
@@ -152,32 +181,34 @@ of ftrace. Here is a list of some of the key files:
 	that the kernel uses for allocation, usually 4 KB in size).
 	If the last page allocated has room for more bytes
 	than requested, the rest of the page will be used,
-	making the actual allocation bigger than requested.
+	making the actual allocation bigger than requested or shown.
 	( Note, the size may not be a multiple of the page size
 	  due to buffer management meta-data. )
 
+	Buffer sizes for individual CPUs may vary
+	(see "per_cpu/cpu0/buffer_size_kb" below), and if they do
+	this file will show "X".
+
   buffer_total_size_kb:
 
 	This displays the total combined size of all the trace buffers.
 
   free_buffer:
 
-	If a process is performing the tracing, and the ring buffer
-	should be shrunk "freed" when the process is finished, even
-	if it were to be killed by a signal, this file can be used
-	for that purpose. On close of this file, the ring buffer will
-	be resized to its minimum size. Having a process that is tracing
-	also open this file, when the process exits its file descriptor
-	for this file will be closed, and in doing so, the ring buffer
-	will be "freed".
+	If a process is performing tracing, and the ring buffer	should be
+	shrunk "freed" when the process is finished, even if it were to be
+	killed by a signal, this file can be used for that purpose. On close
+	of this file, the ring buffer will be resized to its minimum size.
+	Having a process that is tracing also open this file, when the process
+	exits its file descriptor for this file will be closed, and in doing so,
+	the ring buffer will be "freed".
 
 	It may also stop tracing if disable_on_free option is set.
 
   tracing_cpumask:
 
-	This is a mask that lets the user only trace
-	on specified CPUs. The format is a hex string
-	representing the CPUs.
+	This is a mask that lets the user only trace on specified CPUs.
+	The format is a hex string representing the CPUs.
 
   set_ftrace_filter:
 
@@ -190,6 +221,9 @@ of ftrace. Here is a list of some of the key files:
 	to be traced. Echoing names of functions into this file
 	will limit the trace to only those functions.
 
+	The functions listed in "available_filter_functions" are what
+	can be written into this file.
+
 	This interface also allows for commands to be used. See the
 	"Filter commands" section for more details.
 
@@ -202,7 +236,14 @@ of ftrace. Here is a list of some of the key files:
 
   set_ftrace_pid:
 
-	Have the function tracer only trace a single thread.
+	Have the function tracer only trace the threads whose PID are
+	listed in this file.
+
+	If the "function-fork" option is set, then when a task whose
+	PID is listed in this file forks, the child's PID will
+	automatically be added to this file, and the child will be
+	traced by the function tracer as well. This option will also
+	cause PIDs of tasks that exit to be removed from the file.
 
   set_event_pid:
 
@@ -217,17 +258,28 @@ of ftrace. Here is a list of some of the key files:
 
   set_graph_function:
 
-	Set a "trigger" function where tracing should start
-	with the function graph tracer (See the section
-	"dynamic ftrace" for more details).
+	Functions listed in this file will cause the function graph
+	tracer to only trace these functions and the functions that
+	they call. (See the section "dynamic ftrace" for more details).
+
+  set_graph_notrace:
+
+	Similar to set_graph_function, but will disable function graph
+	tracing when the function is hit until it exits the function.
+	This makes it possible to ignore tracing functions that are called
+	by a specific function.
 
   available_filter_functions:
 
-	This lists the functions that ftrace
-	has processed and can trace. These are the function
-	names that you can pass to "set_ftrace_filter" or
-	"set_ftrace_notrace". (See the section "dynamic ftrace"
-	below for more details.)
+	This lists the functions that ftrace has processed and can trace.
+	These are the function names that you can pass to
+	"set_ftrace_filter" or "set_ftrace_notrace".
+	(See the section "dynamic ftrace" below for more details.)
+
+  dyn_ftrace_total_info:
+
+	This file is for debugging purposes. The number of functions that
+	have been converted to nops and are available to be traced.
 
   enabled_functions:
 
@@ -250,12 +302,21 @@ of ftrace. Here is a list of some of the key files:
 	an 'I' will be displayed on the same line as the function that
 	can be overridden.
 
+	If the architecture supports it, it will also show what callback
+	is being directly called by the function. If the count is greater
+	than 1 it most likely will be ftrace_ops_list_func().
+
+	If the callback of the function jumps to a trampoline that is
+	specific to a the callback and not the standard trampoline,
+	its address will be printed as well as the function that the
+	trampoline calls.
+
   function_profile_enabled:
 
 	When set it will enable all functions with either the function
-	tracer, or if enabled, the function graph tracer. It will
+	tracer, or if configured, the function graph tracer. It will
 	keep a histogram of the number of functions that were called
-	and if run with the function graph tracer, it will also keep
+	and if the function graph tracer was configured, it will also keep
 	track of the time spent in those functions. The histogram
 	content can be displayed in the files:
 
@@ -283,12 +344,11 @@ of ftrace. Here is a list of some of the key files:
   printk_formats:
 
 	This is for tools that read the raw format files. If an event in
-	the ring buffer references a string (currently only trace_printk()
-	does this), only a pointer to the string is recorded into the buffer
-	and not the string itself. This prevents tools from knowing what
-	that string was. This file displays the string and address for
-	the string allowing tools to map the pointers to what the
-	strings were.
+	the ring buffer references a string, only a pointer to the string
+	is recorded into the buffer and not the string itself. This prevents
+	tools from knowing what that string was. This file displays the string
+	and address for	the string allowing tools to map the pointers to what
+	the strings were.
 
   saved_cmdlines:
 
@@ -298,6 +358,22 @@ of ftrace. Here is a list of some of the key files:
 	comms for events. If a pid for a comm is not listed, then
 	"<...>" is displayed in the output.
 
+	If the option "record-cmd" is set to "0", then comms of tasks
+	will not be saved during recording. By default, it is enabled.
+
+  saved_cmdlines_size:
+
+	By default, 128 comms are saved (see "saved_cmdlines" above). To
+	increase or decrease the amount of comms that are cached, echo
+	in a the number of comms to cache, into this file.
+
+  saved_tgids:
+
+	If the option "record-tgid" is set, on each scheduling context switch
+	the Task Group ID of a task is saved in a table mapping the PID of
+	the thread to its TGID. By default, the "record-tgid" option is
+	disabled.
+
   snapshot:
 
 	This displays the "snapshot" buffer and also lets the user
@@ -336,6 +412,9 @@ of ftrace. Here is a list of some of the key files:
 	  # cat trace_clock
 	  [local] global counter x86-tsc
 
+	  The clock with the square brackets around it is the one
+	  in effect.
+
 	  local: Default clock, but may not be in sync across CPUs
 
 	  global: This clock is in sync with all CPUs but may
@@ -448,6 +527,23 @@ of ftrace. Here is a list of some of the key files:
 
 	See events.txt for more information.
 
+  set_event:
+
+	By echoing in the event into this file, will enable that event.
+
+	See events.txt for more information.
+
+  available_events:
+
+	A list of events that can be enabled in tracing.
+
+	See events.txt for more information.
+
+  hwlat_detector:
+
+	Directory for the Hardware Latency Detector.
+	See "Hardware Latency Detector" section below.
+
   per_cpu:
 
 	This is a directory that contains the trace per_cpu information.
@@ -539,13 +635,25 @@ Here is the list of current tracers that may be configured.
 	to draw a graph of function calls similar to C code
 	source.
 
+  "blk"
+
+	The block tracer. The tracer used by the blktrace user
+	application.
+
+  "hwlat"
+
+	The Hardware Latency tracer is used to detect if the hardware
+	produces any latency. See "Hardware Latency Detector" section
+	below.
+
   "irqsoff"
 
 	Traces the areas that disable interrupts and saves
 	the trace with the longest max latency.
 	See tracing_max_latency. When a new max is recorded,
 	it replaces the old trace. It is best to view this
-	trace with the latency-format option enabled.
+	trace with the latency-format option enabled, which
+	happens automatically when the tracer is selected.
 
   "preemptoff"
 
@@ -571,6 +679,26 @@ Here is the list of current tracers that may be configured.
         RT tasks (as the current "wakeup" does). This is useful
         for those interested in wake up timings of RT tasks.
 
+  "wakeup_dl"
+
+	Traces and records the max latency that it takes for
+	a SCHED_DEADLINE task to be woken (as the "wakeup" and
+	"wakeup_rt" does).
+
+  "mmiotrace"
+
+	A special tracer that is used to trace binary module.
+	It will trace all the calls that a module makes to the
+	hardware. Everything it writes and reads from the I/O
+	as well.
+
+  "branch"
+
+	This tracer can be configured when tracing likely/unlikely
+	calls within the kernel. It will trace when a likely and
+	unlikely branch is hit and if it was correct in its prediction
+	of being correct.
+
   "nop"
 
 	This is the "trace nothing" tracer. To remove all
@@ -582,7 +710,7 @@ Examples of using the tracer
 ----------------------------
 
 Here are typical examples of using the tracers when controlling
-them only with the debugfs interface (without using any
+them only with the tracefs interface (without using any
 user-land utilities).
 
 Output format:
@@ -674,7 +802,7 @@ why a latency happened. Here is a typical trace.
 This shows that the current tracer is "irqsoff" tracing the time
 for which interrupts were disabled. It gives the trace version (which
 never changes) and the version of the kernel upon which this was executed on
-(3.10). Then it displays the max latency in microseconds (259 us). The number
+(3.8). Then it displays the max latency in microseconds (259 us). The number
 of trace entries displayed and the total number (both are four: #4/4).
 VP, KP, SP, and HP are always zero and are reserved for later use.
 #P is the number of online CPUs (#P:4).
@@ -709,6 +837,8 @@ explains which is which.
 	'.' otherwise.
 
   hardirq/softirq:
+	'Z' - NMI occurred inside a hardirq
+	'z' - NMI is running
 	'H' - hard irq occurred inside a softirq.
 	'h' - hard irq is running
 	's' - soft irq is running
@@ -757,24 +887,24 @@ nohex
 nobin
 noblock
 trace_printk
-nobranch
 annotate
 nouserstacktrace
 nosym-userobj
 noprintk-msg-only
 context-info
 nolatency-format
-sleep-time
-graph-time
 record-cmd
+norecord-tgid
 overwrite
 nodisable_on_free
 irq-info
 markers
 noevent-fork
 function-trace
+nofunction-fork
 nodisplay-graph
 nostacktrace
+nobranch
 
 To disable one of the options, echo in the option prepended with
 "no".
@@ -830,8 +960,6 @@ Here are the available options:
 
   trace_printk - Can disable trace_printk() from writing into the buffer.
 
-  branch - Enable branch tracing with the tracer.
-
   annotate - It is sometimes confusing when the CPU buffers are full
   	     and one CPU buffer had a lot of events recently, thus
 	     a shorter time frame, were another CPU may have only had
@@ -850,7 +978,8 @@ Here are the available options:
           <idle>-0     [001] .Ns3 21169.031485: sub_preempt_count <-_raw_spin_unlock
 
   userstacktrace - This option changes the trace. It records a
-		   stacktrace of the current userspace thread.
+		   stacktrace of the current user space thread after
+		   each trace event.
 
   sym-userobj - when user stacktrace are enabled, look up which
 		object the address belongs to, and print a
@@ -873,29 +1002,21 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
   context-info - Show only the event data. Hides the comm, PID,
   	         timestamp, CPU, and other useful data.
 
-  latency-format - This option changes the trace. When
-                   it is enabled, the trace displays
-                   additional information about the
-                   latencies, as described in "Latency
-                   trace format".
-
-  sleep-time - When running function graph tracer, to include
-  	       the time a task schedules out in its function.
-	       When enabled, it will account time the task has been
-	       scheduled out as part of the function call.
-
-  graph-time - When running function profiler with function graph tracer,
-	       to include the time to call nested functions. When this is
-	       not set, the time reported for the function will only
-	       include the time the function itself executed for, not the
-	       time for functions that it called.
+  latency-format - This option changes the trace output. When it is enabled,
+		   the trace displays additional information about the
+		   latency, as described in "Latency trace format".
 
   record-cmd - When any event or tracer is enabled, a hook is enabled
-  	       in the sched_switch trace point to fill comm cache
+	       in the sched_switch trace point to fill comm cache
 	       with mapped pids and comms. But this may cause some
 	       overhead, and if you only care about pids, and not the
 	       name of the task, disabling this option can lower the
-	       impact of tracing.
+	       impact of tracing. See "saved_cmdlines".
+
+  record-tgid - When any event or tracer is enabled, a hook is enabled
+	        in the sched_switch trace point to fill the cache of
+		mapped Thread Group IDs (TGID) mapping to pids. See
+		"saved_tgids".
 
   overwrite - This controls what happens when the trace buffer is
               full. If "1" (default), the oldest events are
@@ -935,19 +1056,98 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
 	    functions. This keeps the overhead of the tracer down
 	    when performing latency tests.
 
+  function-fork - When set, tasks with PIDs listed in set_ftrace_pid will
+		  have the PIDs of their children added to set_ftrace_pid
+		  when those tasks fork. Also, when tasks with PIDs in
+		  set_ftrace_pid exit, their PIDs will be removed from the
+		  file.
+
   display-graph - When set, the latency tracers (irqsoff, wakeup, etc) will
 	          use function graph tracing instead of function tracing.
 
-  stacktrace - This is one of the options that changes the trace
-	       itself. When a trace is recorded, so is the stack
-	       of functions. This allows for back traces of
-	       trace sites.
+  stacktrace - When set, a stack trace is recorded after any trace event
+	       is recorded.
+
+  branch - Enable branch tracing with the tracer. This enables branch
+	   tracer along with the currently set tracer. Enabling this
+	   with the "nop" tracer is the same as just enabling the
+	   "branch" tracer.
 
  Note: Some tracers have their own options. They only appear in this
        file when the tracer is active. They always appear in the
        options directory.
 
 
+Here are the per tracer options:
+
+Options for function tracer:
+
+  func_stack_trace - When set, a stack trace is recorded after every
+		     function that is recorded. NOTE! Limit the functions
+		     that are recorded before enabling this, with
+		     "set_ftrace_filter" otherwise the system performance
+		     will be critically degraded. Remember to disable
+		     this option before clearing the function filter.
+
+Options for function_graph tracer:
+
+ Since the function_graph tracer has a slightly different output
+ it has its own options to control what is displayed.
+
+  funcgraph-overrun - When set, the "overrun" of the graph stack is
+		      displayed after each function traced. The
+		      overrun, is when the stack depth of the calls
+		      is greater than what is reserved for each task.
+		      Each task has a fixed array of functions to
+		      trace in the call graph. If the depth of the
+		      calls exceeds that, the function is not traced.
+		      The overrun is the number of functions missed
+		      due to exceeding this array.
+
+  funcgraph-cpu - When set, the CPU number of the CPU where the trace
+		  occurred is displayed.
+
+  funcgraph-overhead - When set, if the function takes longer than
+		       A certain amount, then a delay marker is
+		       displayed. See "delay" above, under the
+		       header description.
+
+  funcgraph-proc - Unlike other tracers, the process' command line
+		   is not displayed by default, but instead only
+		   when a task is traced in and out during a context
+		   switch. Enabling this options has the command
+		   of each process displayed at every line.
+
+  funcgraph-duration - At the end of each function (the return)
+		       the duration of the amount of time in the
+		       function is displayed in microseconds.
+
+  funcgraph-abstime - When set, the timestamp is displayed at each
+		      line.
+
+  funcgraph-irqs - When disabled, functions that happen inside an
+		   interrupt will not be traced.
+
+  funcgraph-tail - When set, the return event will include the function
+		   that it represents. By default this is off, and
+		   only a closing curly bracket "}" is displayed for
+		   the return of a function.
+
+  sleep-time - When running function graph tracer, to include
+	       the time a task schedules out in its function.
+	       When enabled, it will account time the task has been
+	       scheduled out as part of the function call.
+
+  graph-time - When running function profiler with function graph tracer,
+	       to include the time to call nested functions. When this is
+	       not set, the time reported for the function will only
+	       include the time the function itself executed for, not the
+	       time for functions that it called.
+
+Options for blk tracer:
+
+  blk_classic - Shows a more minimalistic output.
+
 
 irqsoff
 -------
@@ -1711,6 +1911,85 @@ events.
   <idle>-0       2d..3    6us :      0:120:R ==> [002]  5882: 94:R sleep
 
 
+Hardware Latency Detector
+-------------------------
+
+The hardware latency detector is executed by enabling the "hwlat" tracer.
+
+NOTE, this tracer will affect the performance of the system as it will
+periodically make a CPU constantly busy with interrupts disabled.
+
+ # echo hwlat > current_tracer
+ # sleep 100
+ # cat trace
+# tracer: hwlat
+#
+#                              _-----=> irqs-off
+#                             / _----=> need-resched
+#                            | / _---=> hardirq/softirq
+#                            || / _--=> preempt-depth
+#                            ||| /     delay
+#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+#              | |       |   ||||       |         |
+           <...>-3638  [001] d... 19452.055471: #1     inner/outer(us):   12/14    ts:1499801089.066141940
+           <...>-3638  [003] d... 19454.071354: #2     inner/outer(us):   11/9     ts:1499801091.082164365
+           <...>-3638  [002] dn.. 19461.126852: #3     inner/outer(us):   12/9     ts:1499801098.138150062
+           <...>-3638  [001] d... 19488.340960: #4     inner/outer(us):    8/12    ts:1499801125.354139633
+           <...>-3638  [003] d... 19494.388553: #5     inner/outer(us):    8/12    ts:1499801131.402150961
+           <...>-3638  [003] d... 19501.283419: #6     inner/outer(us):    0/12    ts:1499801138.297435289 nmi-total:4 nmi-count:1
+
+
+The above output is somewhat the same in the header. All events will have
+interrupts disabled 'd'. Under the FUNCTION title there is:
+
+ #1 - This is the count of events recorded that were greater than the
+      tracing_threshold (See below).
+
+ inner/outer(us):   12/14
+
+      This shows two numbers as "inner latency" and "outer latency". The test
+      runs in a loop checking a timestamp twice. The latency detected within
+      the two timestamps is the "inner latency" and the latency detected
+      after the previous timestamp and the next timestamp in the loop is
+      the "outer latency".
+
+ ts:1499801089.066141940
+
+      The absolute timestamp that the event happened.
+
+ nmi-total:4 nmi-count:1
+
+      On architectures that support it, if an NMI comes in during the
+      test, the time spent in NMI is reported in "nmi-total" (in
+      microseconds).
+
+      All architectures that have NMIs will show the "nmi-count" if an
+      NMI comes in during the test.
+
+hwlat files:
+
+  tracing_threshold - This gets automatically set to "10" to represent 10
+		      microseconds. This is the threshold of latency that
+		      needs to be detected before the trace will be recorded.
+
+		      Note, when hwlat tracer is finished (another tracer is
+		      written into "current_tracer"), the original value for
+		      tracing_threshold is placed back into this file.
+
+  hwlat_detector/width - The length of time the test runs with interrupts
+			 disabled.
+
+  hwlat_detector/window - The length of time of the window which the test
+			  runs. That is, the test will run for "width"
+			  microseconds per "window" microseconds
+
+  tracing_cpumask - When the test is started. A kernel thread is created that
+		    runs the test. This thread will alternate between CPUs
+		    listed in the tracing_cpumask between each period
+		    (one "window"). To limit the test to specific CPUs
+		    set the mask in this file to only the CPUs that the test
+		    should run on.
+
 function
 --------
 
@@ -1821,15 +2100,15 @@ something like this simple program:
 #define STR(x) _STR(x)
 #define MAX_PATH 256
 
-const char *find_debugfs(void)
+const char *find_tracefs(void)
 {
-       static char debugfs[MAX_PATH+1];
-       static int debugfs_found;
+       static char tracefs[MAX_PATH+1];
+       static int tracefs_found;
        char type[100];
        FILE *fp;
 
-       if (debugfs_found)
-               return debugfs;
+       if (tracefs_found)
+               return tracefs;
 
        if ((fp = fopen("/proc/mounts","r")) == NULL) {
                perror("/proc/mounts");
@@ -1839,27 +2118,27 @@ const char *find_debugfs(void)
        while (fscanf(fp, "%*s %"
                      STR(MAX_PATH)
                      "s %99s %*s %*d %*d\n",
-                     debugfs, type) == 2) {
-               if (strcmp(type, "debugfs") == 0)
+                     tracefs, type) == 2) {
+               if (strcmp(type, "tracefs") == 0)
                        break;
        }
        fclose(fp);
 
-       if (strcmp(type, "debugfs") != 0) {
-               fprintf(stderr, "debugfs not mounted");
+       if (strcmp(type, "tracefs") != 0) {
+               fprintf(stderr, "tracefs not mounted");
                return NULL;
        }
 
-       strcat(debugfs, "/tracing/");
-       debugfs_found = 1;
+       strcat(tracefs, "/tracing/");
+       tracefs_found = 1;
 
-       return debugfs;
+       return tracefs;
 }
 
 const char *tracing_file(const char *file_name)
 {
        static char trace_file[MAX_PATH+1];
-       snprintf(trace_file, MAX_PATH, "%s/%s", find_debugfs(), file_name);
+       snprintf(trace_file, MAX_PATH, "%s/%s", find_tracefs(), file_name);
        return trace_file;
 }
 
@@ -1898,12 +2177,12 @@ Or this simple script!
 ------
 #!/bin/bash
 
-debugfs=`sed -ne 's/^debugfs \(.*\) debugfs.*/\1/p' /proc/mounts`
-echo nop > $debugfs/tracing/current_tracer
-echo 0 > $debugfs/tracing/tracing_on
-echo $$ > $debugfs/tracing/set_ftrace_pid
-echo function > $debugfs/tracing/current_tracer
-echo 1 > $debugfs/tracing/tracing_on
+tracefs=`sed -ne 's/^tracefs \(.*\) tracefs.*/\1/p' /proc/mounts`
+echo nop > $tracefs/tracing/current_tracer
+echo 0 > $tracefs/tracing/tracing_on
+echo $$ > $tracefs/tracing/set_ftrace_pid
+echo function > $tracefs/tracing/current_tracer
+echo 1 > $tracefs/tracing/tracing_on
 exec "$@"
 ------
 
@@ -2145,13 +2424,18 @@ include the -pg switch in the compiling of the kernel.)
 At compile time every C file object is run through the
 recordmcount program (located in the scripts directory). This
 program will parse the ELF headers in the C object to find all
-the locations in the .text section that call mcount. (Note, only
-white listed .text sections are processed, since processing other
-sections like .init.text may cause races due to those sections
-being freed unexpectedly).
-
-A new section called "__mcount_loc" is created that holds
-references to all the mcount call sites in the .text section.
+the locations in the .text section that call mcount. Starting
+with gcc verson 4.6, the -mfentry has been added for x86, which
+calls "__fentry__" instead of "mcount". Which is called before
+the creation of the stack frame.
+
+Note, not all sections are traced. They may be prevented by either
+a notrace, or blocked another way and all inline functions are not
+traced. Check the "available_filter_functions" file to see what functions
+can be traced.
+
+A section called "__mcount_loc" is created that holds
+references to all the mcount/fentry call sites in the .text section.
 The recordmcount program re-links this section back into the
 original object. The final linking stage of the kernel will add all these
 references into a single table.
@@ -2679,7 +2963,7 @@ in time without stopping tracing. Ftrace swaps the current
 buffer with a spare buffer, and tracing continues in the new
 current (=previous spare) buffer.
 
-The following debugfs files in "tracing" are related to this
+The following tracefs files in "tracing" are related to this
 feature:
 
   snapshot:
@@ -2752,7 +3036,7 @@ cat: snapshot: Device or resource busy
 
 Instances
 ---------
-In the debugfs tracing directory is a directory called "instances".
+In the tracefs tracing directory is a directory called "instances".
 This directory can have new directories created inside of it using
 mkdir, and removing directories with rmdir. The directory created
 with mkdir in this directory will already contain files and other
-- 
GitLab


From bbd1d27d863d5c0acee65ecd0c2e34035e1df5ea Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 11 Jul 2017 19:21:04 -0400
Subject: [PATCH 1212/1958] tracing: Do note expose stack_trace_filter without
 DYNAMIC_FTRACE

The "stack_trace_filter" file only makes sense if DYNAMIC_FTRACE is
configured in. If it is not, then the user can not filter any functions.

Not only that, the open function causes warnings when DYNAMIC_FTRACE is not
set.

Link: http://lkml.kernel.org/r/20170710110521.600806-1-arnd@arndb.de

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_stack.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b4a751e8f9d69..a4df67cbc7114 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -406,6 +406,8 @@ static const struct file_operations stack_trace_fops = {
 	.release	= seq_release,
 };
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+
 static int
 stack_trace_filter_open(struct inode *inode, struct file *file)
 {
@@ -423,6 +425,8 @@ static const struct file_operations stack_trace_filter_fops = {
 	.release = ftrace_regex_release,
 };
 
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp,
@@ -477,8 +481,10 @@ static __init int stack_trace_init(void)
 	trace_create_file("stack_trace", 0444, d_tracer,
 			NULL, &stack_trace_fops);
 
+#ifdef CONFIG_DYNAMIC_FTRACE
 	trace_create_file("stack_trace_filter", 0444, d_tracer,
 			  &trace_ops, &stack_trace_filter_fops);
+#endif
 
 	if (stack_trace_filter_buf[0])
 		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
-- 
GitLab


From 69449bbd65687e8e5fb968a5a0c46089f6af6001 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 10 Jul 2017 10:44:03 +0200
Subject: [PATCH 1213/1958] ftrace: Hide cached module code for !CONFIG_MODULES

When modules are disabled, we get a harmless build warning:

kernel/trace/ftrace.c:4051:13: error: 'process_cached_mods' defined but not used [-Werror=unused-function]

This adds the same #ifdef around the new code that exists around
its caller.

Link: http://lkml.kernel.org/r/20170710084413.1820568-1-arnd@arndb.de

Fixes: d7fbf8df7ca0 ("ftrace: Implement cached modules tracing on module load")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2953d558bbee7..4706f0ed193e2 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3978,6 +3978,7 @@ static int
 ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 		 int reset, int enable);
 
+#ifdef CONFIG_MODULES
 static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
 			     char *mod, bool enable)
 {
@@ -4068,6 +4069,7 @@ static void process_cached_mods(const char *mod_name)
 
 	kfree(mod);
 }
+#endif
 
 /*
  * We register the module command as a template to show others how
-- 
GitLab


From 3f0bd8dad0db73f5d71b355aec5ab33b374260ba Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 19 Jun 2017 09:51:28 +1000
Subject: [PATCH 1214/1958] powerpc/perf: Add POWER9 alternate PM_RUN_CYC and
 PM_RUN_INST_CMPL events

Similar to POWER8, POWER9 can count run cycles and run instructions
completed on more than one PMU.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/power9-events-list.h | 4 ++++
 arch/powerpc/perf/power9-pmu.c         | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 80204e0643622..50689180a6c12 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -51,8 +51,12 @@ EVENT(PM_DTLB_MISS,				0x300fc)
 EVENT(PM_ITLB_MISS,				0x400fc)
 /* Run_Instructions */
 EVENT(PM_RUN_INST_CMPL,				0x500fa)
+/* Alternate event code for PM_RUN_INST_CMPL */
+EVENT(PM_RUN_INST_CMPL_ALT,			0x400fa)
 /* Run_cycles */
 EVENT(PM_RUN_CYC,				0x600f4)
+/* Alternate event code for Run_cycles */
+EVENT(PM_RUN_CYC_ALT,				0x200f4)
 /* Instruction Dispatched */
 EVENT(PM_INST_DISP,				0x200f2)
 EVENT(PM_INST_DISP_ALT,				0x300f2)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index f17435e4a489f..2280cf87ff9c9 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -107,6 +107,8 @@ extern struct attribute_group isa207_pmu_format_group;
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power9_event_alternatives[][MAX_ALT] = {
 	{ PM_INST_DISP,			PM_INST_DISP_ALT },
+	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
+	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
 };
 
 static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
-- 
GitLab


From 1bfb159673957644951ab0a8d2aec44b93ddb1ae Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduval@amazon.com>
Date: Tue, 11 Jul 2017 14:55:12 -0700
Subject: [PATCH 1215/1958] bridge: mdb: fix leak on complete_info ptr on fail
 path

We currently get the following kmemleak report:
unreferenced object 0xffff8800039d9820 (size 32):
  comm "softirq", pid 0, jiffies 4295212383 (age 792.416s)
  hex dump (first 32 bytes):
    00 0c e0 03 00 88 ff ff ff 02 00 00 00 00 00 00  ................
    00 00 00 01 ff 11 00 02 86 dd 00 00 ff ff ff ff  ................
  backtrace:
    [<ffffffff8152b4aa>] kmemleak_alloc+0x4a/0xa0
    [<ffffffff811d8ec8>] kmem_cache_alloc_trace+0xb8/0x1c0
    [<ffffffffa0389683>] __br_mdb_notify+0x2a3/0x300 [bridge]
    [<ffffffffa038a0ce>] br_mdb_notify+0x6e/0x70 [bridge]
    [<ffffffffa0386479>] br_multicast_add_group+0x109/0x150 [bridge]
    [<ffffffffa0386518>] br_ip6_multicast_add_group+0x58/0x60 [bridge]
    [<ffffffffa0387fb5>] br_multicast_rcv+0x1d5/0xdb0 [bridge]
    [<ffffffffa037d7cf>] br_handle_frame_finish+0xcf/0x510 [bridge]
    [<ffffffffa03a236b>] br_nf_hook_thresh.part.27+0xb/0x10 [br_netfilter]
    [<ffffffffa03a3738>] br_nf_hook_thresh+0x48/0xb0 [br_netfilter]
    [<ffffffffa03a3fb9>] br_nf_pre_routing_finish_ipv6+0x109/0x1d0 [br_netfilter]
    [<ffffffffa03a4400>] br_nf_pre_routing_ipv6+0xd0/0x14c [br_netfilter]
    [<ffffffffa03a3c27>] br_nf_pre_routing+0x197/0x3d0 [br_netfilter]
    [<ffffffff814a2952>] nf_iterate+0x52/0x60
    [<ffffffff814a29bc>] nf_hook_slow+0x5c/0xb0
    [<ffffffffa037ddf4>] br_handle_frame+0x1a4/0x2c0 [bridge]

This happens when switchdev_port_obj_add() fails. This patch
frees complete_info object in the fail path.

Reviewed-by: Vallish Vaidyeshwara <vallish@amazon.com>
Signed-off-by: Eduardo Valentin <eduval@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 09dcdb9c0f3ce..a0b11e7d67d9a 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -323,7 +323,8 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
 			__mdb_entry_to_br_ip(entry, &complete_info->ip);
 			mdb.obj.complete_priv = complete_info;
 			mdb.obj.complete = br_mdb_complete;
-			switchdev_port_obj_add(port_dev, &mdb.obj);
+			if (switchdev_port_obj_add(port_dev, &mdb.obj))
+				kfree(complete_info);
 		}
 	} else if (port_dev && type == RTM_DELMDB) {
 		switchdev_port_obj_del(port_dev, &mdb.obj);
-- 
GitLab


From 533350227d95937703aaa16414701eadd67f3ac3 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 10 Jul 2017 14:04:28 -0700
Subject: [PATCH 1216/1958] samples/bpf: fix a build issue

With latest net-next:

====
clang  -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/6.3.1/include -I./arch/x86/include -I./arch/x86/include/generated/uapi -I./arch/x86/include/generated  -I./include -I./arch/x86/include/uapi -I./include/uapi -I./include/generated/uapi -include ./include/linux/kconfig.h  -Isamples/bpf \
    -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
    -Wno-compare-distinct-pointer-types \
    -Wno-gnu-variable-sized-type-not-at-end \
    -Wno-address-of-packed-member -Wno-tautological-compare \
    -Wno-unknown-warning-option \
    -O2 -emit-llvm -c samples/bpf/tcp_synrto_kern.c -o -| llc -march=bpf -filetype=obj -o samples/bpf/tcp_synrto_kern.o
samples/bpf/tcp_synrto_kern.c:20:10: fatal error: 'bpf_endian.h' file not found
          ^~~~~~~~~~~~~~
1 error generated.
====

net has the same issue.

Add support for ntohl and htonl in tools/testing/selftests/bpf/bpf_endian.h.
Also move bpf_helpers.h from samples/bpf to selftests/bpf and change
compiler include logic so that programs in samples/bpf can access the headers
in selftests/bpf, but not the other way around.

Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile                               |  1 +
 tools/testing/selftests/bpf/Makefile               |  1 -
 tools/testing/selftests/bpf/bpf_endian.h           | 14 ++++++++++++++
 .../testing/selftests}/bpf/bpf_helpers.h           |  0
 4 files changed, 15 insertions(+), 1 deletion(-)
 rename {samples => tools/testing/selftests}/bpf/bpf_helpers.h (100%)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 9c650589e80f3..87246be6feb85 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -207,6 +207,7 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
 # useless for BPF samples.
 $(obj)/%.o: $(src)/%.c
 	$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
+		-I$(srctree)/tools/testing/selftests/bpf/ \
 		-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
 		-Wno-compare-distinct-pointer-types \
 		-Wno-gnu-variable-sized-type-not-at-end \
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2ca51a8a588c4..153c3a181a4cd 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -37,6 +37,5 @@ CLANG ?= clang
 
 %.o: %.c
 	$(CLANG) -I. -I./include/uapi -I../../../include/uapi \
-		-I../../../../samples/bpf/ \
 		-Wno-compare-distinct-pointer-types \
 		-O2 -target bpf -c $< -o $@
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
index 487cbfb89beb5..74af266aa512b 100644
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -23,11 +23,19 @@
 # define __bpf_htons(x)			__builtin_bswap16(x)
 # define __bpf_constant_ntohs(x)	___constant_swab16(x)
 # define __bpf_constant_htons(x)	___constant_swab16(x)
+# define __bpf_ntohl(x)			__builtin_bswap32(x)
+# define __bpf_htonl(x)			__builtin_bswap32(x)
+# define __bpf_constant_ntohl(x)	___constant_swab32(x)
+# define __bpf_constant_htonl(x)	___constant_swab32(x)
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 # define __bpf_ntohs(x)			(x)
 # define __bpf_htons(x)			(x)
 # define __bpf_constant_ntohs(x)	(x)
 # define __bpf_constant_htons(x)	(x)
+# define __bpf_ntohl(x)			(x)
+# define __bpf_htonl(x)			(x)
+# define __bpf_constant_ntohl(x)	(x)
+# define __bpf_constant_htonl(x)	(x)
 #else
 # error "Fix your compiler's __BYTE_ORDER__?!"
 #endif
@@ -38,5 +46,11 @@
 #define bpf_ntohs(x)				\
 	(__builtin_constant_p(x) ?		\
 	 __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_ntohl(x) : __bpf_ntohl(x))
 
 #endif /* __BPF_ENDIAN__ */
diff --git a/samples/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
similarity index 100%
rename from samples/bpf/bpf_helpers.h
rename to tools/testing/selftests/bpf/bpf_helpers.h
-- 
GitLab


From 87c4b83e0fe234a1f0eed131ab6fa232036860d5 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 15 Jun 2017 09:46:38 +1000
Subject: [PATCH 1217/1958] powerpc: Fix emulation of mcrf in emulate_step()

The mcrf emulation code was using the CR field number directly as the shift
value, without taking into account that CR fields are numbered from 0-7 starting
at the high bits. That meant it was looking at the CR fields in the reverse
order.

Fixes: cf87c3f6b647 ("powerpc: Emulate icbi, mcrf and conditional-trap instructions")
Cc: stable@vger.kernel.org # v3.18+
Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 33117f8a08820..fb84f51b1f0b7 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -683,8 +683,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 	case 19:
 		switch ((instr >> 1) & 0x3ff) {
 		case 0:		/* mcrf */
-			rd = (instr >> 21) & 0x1c;
-			ra = (instr >> 16) & 0x1c;
+			rd = 7 - ((instr >> 23) & 0x7);
+			ra = 7 - ((instr >> 18) & 0x7);
+			rd *= 4;
+			ra *= 4;
 			val = (regs->ccr >> ra) & 0xf;
 			regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
 			goto instr_done;
-- 
GitLab


From 64e756c55aa46fc18fd53e8f3598b73b528d8637 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 15 Jun 2017 09:46:39 +1000
Subject: [PATCH 1218/1958] powerpc: Fix emulation of mfocrf in emulate_step()

From POWER4 onwards, mfocrf() only places the specified CR field into
the destination GPR, and the rest of it is set to 0. The PowerPC AS
from version 3.0 now requires this behaviour.

The emulation code currently puts the entire CR into the destination GPR.
Fix it.

Fixes: 6888199f7fe5 ("[POWERPC] Emulate more instructions in software")
Cc: stable@vger.kernel.org # v2.6.22+
Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index fb84f51b1f0b7..ee33327686aec 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -966,6 +966,19 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 #endif
 
 		case 19:	/* mfcr */
+			if ((instr >> 20) & 1) {
+				imm = 0xf0000000UL;
+				for (sh = 0; sh < 8; ++sh) {
+					if (instr & (0x80000 >> sh)) {
+						regs->gpr[rd] = regs->ccr & imm;
+						break;
+					}
+					imm >>= 4;
+				}
+
+				goto instr_done;
+			}
+
 			regs->gpr[rd] = regs->ccr;
 			regs->gpr[rd] &= 0xffffffffUL;
 			goto instr_done;
-- 
GitLab


From 19d39a3810e7032f311ef83effdac40339b9d022 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 11 Jul 2017 23:41:52 +0200
Subject: [PATCH 1219/1958] genirq: Keep chip buslock across
 irq_request/release_resources()

Moving the irq_request/release_resources() callbacks out of the spinlocked,
irq disabled and bus locked region, unearthed an interesting abuse of the
irq_bus_lock/irq_bus_sync_unlock() callbacks.

The OMAP GPIO driver does merily power management inside of them. The
irq_request_resources() callback of this GPIO irqchip calls a function
which reads a GPIO register. That read aborts now because the clock of the
GPIO block is not magically enabled via the irq_bus_lock() callback.

Move the callbacks under the bus lock again to prevent this. In the
free_irq() path this requires to drop the bus_lock before calling
synchronize_irq() and reaquiring it before calling the
irq_release_resources() callback.

The bus lock can't be held because:

   1) The data which has been changed between bus_lock/un_lock is cached in
      the irq chip driver private data and needs to go out to the irq chip
      via the slow bus (usually SPI or I2C) before calling
      synchronize_irq().

      That's the reason why this bus_lock/unlock magic exists in the first
      place, as you cannot do SPI/I2C transactions while holding desc->lock
      with interrupts disabled.

   2) synchronize_irq() will actually deadlock, if there is a handler on
      flight. These chips use threaded handlers for obvious reasons, as
      they allow to do SPI/I2C communication. When the threaded handler
      returns then bus_lock needs to be taken in irq_finalize_oneshot() as
      we need to talk to the actual irq chip once more. After that the
      threaded handler is marked done, which makes synchronize_irq() return.

      So if we hold bus_lock accross the synchronize_irq() call, the
      handler cannot mark itself done because it blocks on the bus
      lock. That in turn makes synchronize_irq() wait forever on the
      threaded handler to complete....

Add the missing unlock of desc->request_mutex in the error path of
__free_irq() and add a bunch of comments to explain the locking and
protection rules.

Fixes: 46e48e257360 ("genirq: Move irq resource handling out of spinlocked region")
Reported-and-tested-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Reported-and-tested-by: Tony Lindgren <tony@atomide.com>
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Not-longer-ranted-at-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
---
 kernel/irq/manage.c | 63 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5624b2dd6b580..1d1a5b945ab42 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1090,6 +1090,16 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
 /*
  * Internal function to register an irqaction - typically used to
  * allocate special interrupts that are part of the architecture.
+ *
+ * Locking rules:
+ *
+ * desc->request_mutex	Provides serialization against a concurrent free_irq()
+ *   chip_bus_lock	Provides serialization for slow bus operations
+ *     desc->lock	Provides serialization against hard interrupts
+ *
+ * chip_bus_lock and desc->lock are sufficient for all other management and
+ * interrupt related functions. desc->request_mutex solely serializes
+ * request/free_irq().
  */
 static int
 __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
@@ -1167,20 +1177,35 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
 		new->flags &= ~IRQF_ONESHOT;
 
+	/*
+	 * Protects against a concurrent __free_irq() call which might wait
+	 * for synchronize_irq() to complete without holding the optional
+	 * chip bus lock and desc->lock.
+	 */
 	mutex_lock(&desc->request_mutex);
+
+	/*
+	 * Acquire bus lock as the irq_request_resources() callback below
+	 * might rely on the serialization or the magic power management
+	 * functions which are abusing the irq_bus_lock() callback,
+	 */
+	chip_bus_lock(desc);
+
+	/* First installed action requests resources. */
 	if (!desc->action) {
 		ret = irq_request_resources(desc);
 		if (ret) {
 			pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n",
 			       new->name, irq, desc->irq_data.chip->name);
-			goto out_mutex;
+			goto out_bus_unlock;
 		}
 	}
 
-	chip_bus_lock(desc);
-
 	/*
 	 * The following block of code has to be executed atomically
+	 * protected against a concurrent interrupt and any of the other
+	 * management calls which are not serialized via
+	 * desc->request_mutex or the optional bus lock.
 	 */
 	raw_spin_lock_irqsave(&desc->lock, flags);
 	old_ptr = &desc->action;
@@ -1286,10 +1311,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 			ret = __irq_set_trigger(desc,
 						new->flags & IRQF_TRIGGER_MASK);
 
-			if (ret) {
-				irq_release_resources(desc);
+			if (ret)
 				goto out_unlock;
-			}
 		}
 
 		desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
@@ -1385,12 +1408,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 out_unlock:
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
-	chip_bus_sync_unlock(desc);
-
 	if (!desc->action)
 		irq_release_resources(desc);
-
-out_mutex:
+out_bus_unlock:
+	chip_bus_sync_unlock(desc);
 	mutex_unlock(&desc->request_mutex);
 
 out_thread:
@@ -1472,6 +1493,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 			WARN(1, "Trying to free already-free IRQ %d\n", irq);
 			raw_spin_unlock_irqrestore(&desc->lock, flags);
 			chip_bus_sync_unlock(desc);
+			mutex_unlock(&desc->request_mutex);
 			return NULL;
 		}
 
@@ -1498,6 +1520,20 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 #endif
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
+	/*
+	 * Drop bus_lock here so the changes which were done in the chip
+	 * callbacks above are synced out to the irq chips which hang
+	 * behind a slow bus (I2C, SPI) before calling synchronize_irq().
+	 *
+	 * Aside of that the bus_lock can also be taken from the threaded
+	 * handler in irq_finalize_oneshot() which results in a deadlock
+	 * because synchronize_irq() would wait forever for the thread to
+	 * complete, which is blocked on the bus lock.
+	 *
+	 * The still held desc->request_mutex() protects against a
+	 * concurrent request_irq() of this irq so the release of resources
+	 * and timing data is properly serialized.
+	 */
 	chip_bus_sync_unlock(desc);
 
 	unregister_handler_proc(irq, action);
@@ -1530,8 +1566,15 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 		}
 	}
 
+	/* Last action releases resources */
 	if (!desc->action) {
+		/*
+		 * Reaquire bus lock as irq_release_resources() might
+		 * require it to deallocate resources over the slow bus.
+		 */
+		chip_bus_lock(desc);
 		irq_release_resources(desc);
+		chip_bus_sync_unlock(desc);
 		irq_remove_timings(desc);
 	}
 
-- 
GitLab


From 7459e1d25ffefa2b1be799477fcc1f6c62f6cec7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Fri, 7 Jul 2017 16:57:06 +0300
Subject: [PATCH 1220/1958] crypto: caam - fix signals handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Driver does not properly handle the case when signals interrupt
wait_for_completion_interruptible():
-it does not check for return value
-completion structure is allocated on stack; in case a signal interrupts
the sleep, it will go out of scope, causing the worker thread
(caam_jr_dequeue) to fail when it accesses it

wait_for_completion_interruptible() is replaced with uninterruptable
wait_for_completion().
We choose to block all signals while waiting for I/O (device executing
the split key generation job descriptor) since the alternative - in
order to have a deterministic device state - would be to flush the job
ring (aborting *all* in-progress jobs).

Cc: <stable@vger.kernel.org>
Fixes: 045e36780f115 ("crypto: caam - ahash hmac support")
Fixes: 4c1ec1f930154 ("crypto: caam - refactor key_gen, sg")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamhash.c | 2 +-
 drivers/crypto/caam/key_gen.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 7c44c90ad5936..910ec61cae098 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -396,7 +396,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
 	ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
 	if (!ret) {
 		/* in progress */
-		wait_for_completion_interruptible(&result.completion);
+		wait_for_completion(&result.completion);
 		ret = result.err;
 #ifdef DEBUG
 		print_hex_dump(KERN_ERR,
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index 1bb2816a9b4d9..c425d4adaf2af 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -149,7 +149,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out,
 	ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
 	if (!ret) {
 		/* in progress */
-		wait_for_completion_interruptible(&result.completion);
+		wait_for_completion(&result.completion);
 		ret = result.err;
 #ifdef DEBUG
 		print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
-- 
GitLab


From 2acce6aa9f6569d4e135b2c4cfb56acce95efaeb Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 10 Jul 2017 22:00:48 +0800
Subject: [PATCH 1221/1958] Networking

crypto: af_alg - Avoid sock_graft call warning

The newly added sock_graft warning triggers in af_alg_accept.
It's harmless as we're essentially doing sock->sk = sock->sk.

The sock_graft call is actually redundant because all the work
it does is subsumed by sock_init_data.  However, it was added
to placate SELinux as it uses it to initialise its internal state.

This patch avoisd the warning by making the SELinux call directly.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: David S. Miller <davem@davemloft.net>
---
 crypto/af_alg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 3556d8eb54a74..92a3d540d9201 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -287,7 +287,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
 		goto unlock;
 
 	sock_init_data(newsock, sk2);
-	sock_graft(sk2, newsock);
+	security_sock_graft(sk2, newsock);
 	security_sk_clone(sk, sk2);
 
 	err = type->accept(ask->private, sk2);
-- 
GitLab


From 1606043f214f912a52195293614935811a6e3e53 Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@benyossef.com>
Date: Wed, 28 Jun 2017 10:22:03 +0300
Subject: [PATCH 1222/1958] crypto: atmel - only treat EBUSY as transient if
 backlog

The Atmel SHA driver was treating -EBUSY as indication of queueing
to backlog without checking that backlog is enabled for the request.

Fix it by checking request flags.

Cc: <stable@vger.kernel.org>
Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-sha.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index a9482023d7d37..dad4e5bad827f 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -1204,7 +1204,9 @@ static int atmel_sha_finup(struct ahash_request *req)
 	ctx->flags |= SHA_FLAGS_FINUP;
 
 	err1 = atmel_sha_update(req);
-	if (err1 == -EINPROGRESS || err1 == -EBUSY)
+	if (err1 == -EINPROGRESS ||
+	    (err1 == -EBUSY && (ahash_request_flags(req) &
+				CRYPTO_TFM_REQ_MAY_BACKLOG)))
 		return err1;
 
 	/*
-- 
GitLab


From 854b06f768794cd664886ec3ba3a5b1c58d42167 Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Wed, 28 Jun 2017 15:27:10 +0200
Subject: [PATCH 1223/1958] crypto: caam - properly set IV after {en,de}crypt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Certain cipher modes like CTS expect the IV (req->info) of
ablkcipher_request (or equivalently req->iv of skcipher_request) to
contain the last ciphertext block when the {en,de}crypt operation is done.
This is currently not the case for the CAAM driver which in turn breaks
e.g. cts(cbc(aes)) when the CAAM driver is enabled.

This patch fixes the CAAM driver to properly set the IV after the
{en,de}crypt operation of ablkcipher finishes.

This issue was revealed by the changes in the SW CTS mode in commit
0605c41cc53ca ("crypto: cts - Convert to skcipher")

Cc: <stable@vger.kernel.org> # 4.8+
Signed-off-by: David Gstir <david@sigma-star.at>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index fde399c887798..0488b7f81dcf3 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -882,10 +882,10 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 {
 	struct ablkcipher_request *req = context;
 	struct ablkcipher_edesc *edesc;
-#ifdef DEBUG
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 
+#ifdef DEBUG
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
@@ -904,6 +904,14 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 #endif
 
 	ablkcipher_unmap(jrdev, edesc, req);
+
+	/*
+	 * The crypto API expects us to set the IV (req->info) to the last
+	 * ciphertext block. This is used e.g. by the CTS mode.
+	 */
+	scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
+				 ivsize, 0);
+
 	kfree(edesc);
 
 	ablkcipher_request_complete(req, err);
@@ -914,10 +922,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 {
 	struct ablkcipher_request *req = context;
 	struct ablkcipher_edesc *edesc;
-#ifdef DEBUG
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 
+#ifdef DEBUG
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
@@ -935,6 +943,14 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 #endif
 
 	ablkcipher_unmap(jrdev, edesc, req);
+
+	/*
+	 * The crypto API expects us to set the IV (req->info) to the last
+	 * ciphertext block.
+	 */
+	scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize,
+				 ivsize, 0);
+
 	kfree(edesc);
 
 	ablkcipher_request_complete(req, err);
-- 
GitLab


From d3f1d2f7863137c5d71e64041b48968db29b149e Mon Sep 17 00:00:00 2001
From: Harsh Jain <harsh@chelsio.com>
Date: Fri, 23 Jun 2017 19:45:11 +0530
Subject: [PATCH 1224/1958] crypto: chcr - Avoid algo allocation in softirq.

Thsi patch fixes calling "crypto_alloc_cipher" call in bottom halves.
Pre allocate aes cipher required to update Tweak value for XTS.

Signed-off-by: Harsh Jain <harsh@chelsio.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chcr_algo.c   | 23 +++++++++++++++--------
 drivers/crypto/chelsio/chcr_crypto.h |  1 +
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index aa4e5b88483de..508cbc79e5088 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -899,26 +899,20 @@ static int chcr_update_tweak(struct ablkcipher_request *req, u8 *iv)
 	u8 *key;
 	unsigned int keylen;
 
-	cipher = crypto_alloc_cipher("aes-generic", 0, 0);
+	cipher = ablkctx->aes_generic;
 	memcpy(iv, req->info, AES_BLOCK_SIZE);
 
-	if (IS_ERR(cipher)) {
-		ret = -ENOMEM;
-		goto out;
-	}
 	keylen = ablkctx->enckey_len / 2;
 	key = ablkctx->key + keylen;
 	ret = crypto_cipher_setkey(cipher, key, keylen);
 	if (ret)
-		goto out1;
+		goto out;
 
 	crypto_cipher_encrypt_one(cipher, iv, iv);
 	for (i = 0; i < (reqctx->processed / AES_BLOCK_SIZE); i++)
 		gf128mul_x_ble((le128 *)iv, (le128 *)iv);
 
 	crypto_cipher_decrypt_one(cipher, iv, iv);
-out1:
-	crypto_free_cipher(cipher);
 out:
 	return ret;
 }
@@ -1262,6 +1256,17 @@ static int chcr_cra_init(struct crypto_tfm *tfm)
 		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
 		return PTR_ERR(ablkctx->sw_cipher);
 	}
+
+	if (get_cryptoalg_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_XTS) {
+		/* To update tweak*/
+		ablkctx->aes_generic = crypto_alloc_cipher("aes-generic", 0, 0);
+		if (IS_ERR(ablkctx->aes_generic)) {
+			pr_err("failed to allocate aes cipher for tweak\n");
+			return PTR_ERR(ablkctx->aes_generic);
+		}
+	} else
+		ablkctx->aes_generic = NULL;
+
 	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
 	return chcr_device_init(crypto_tfm_ctx(tfm));
 }
@@ -1292,6 +1297,8 @@ static void chcr_cra_exit(struct crypto_tfm *tfm)
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
 	crypto_free_skcipher(ablkctx->sw_cipher);
+	if (ablkctx->aes_generic)
+		crypto_free_cipher(ablkctx->aes_generic);
 }
 
 static int get_alg_config(struct algo_param *params,
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index a4f95b014b46c..30af1ee17b876 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -155,6 +155,7 @@
 
 struct ablk_ctx {
 	struct crypto_skcipher *sw_cipher;
+	struct crypto_cipher *aes_generic;
 	__be32 key_ctx_hdr;
 	unsigned int enckey_len;
 	unsigned char ciph_mode;
-- 
GitLab


From b8fc3397c9e88dc7889b56395d04c1f3d65d186f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 20 Jun 2017 11:35:50 +0100
Subject: [PATCH 1225/1958] crypto: cavium - make several functions static

The functions cvm_encrypt, cvm_decrypt, cvm_xts_setkey and
cvm_enc_dec_init does not need to be in global scope, so make
them static.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptvf_algs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c
index 1b220f3ed017c..df21d996db7ed 100644
--- a/drivers/crypto/cavium/cpt/cptvf_algs.c
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.c
@@ -222,17 +222,17 @@ static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc)
 		return -EINPROGRESS;
 }
 
-int cvm_encrypt(struct ablkcipher_request *req)
+static int cvm_encrypt(struct ablkcipher_request *req)
 {
 	return cvm_enc_dec(req, true);
 }
 
-int cvm_decrypt(struct ablkcipher_request *req)
+static int cvm_decrypt(struct ablkcipher_request *req)
 {
 	return cvm_enc_dec(req, false);
 }
 
-int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+static int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 		   u32 keylen)
 {
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
@@ -336,7 +336,7 @@ static int cvm_ecb_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	return cvm_setkey(cipher, key, keylen, DES3_ECB);
 }
 
-int cvm_enc_dec_init(struct crypto_tfm *tfm)
+static int cvm_enc_dec_init(struct crypto_tfm *tfm)
 {
 	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
 
-- 
GitLab


From 1dc482b6578e2869e51c35276017178503d5ac81 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 11 Jul 2017 12:45:43 -0400
Subject: [PATCH 1226/1958] ACPI / irq: Fix return code of acpi_gsi_to_irq()

The function acpi_gsi_to_irq() must return 0 on success as the caller
ghes_probe expects an 0 for success. This change also matches x86
implementation.

This patch was submitted around 4.5 timeframe but wasn't pushed because
it didn't fix a real problem. Now that RAS/GHES patches are in kernel,
this fixes an error seen on a Mustang (arm64) platform:

    GHES: Failed to map GSI to IRQ for generic hardware error source: 2
    GHES: probe of GHES.2 failed with error 81

Signed-off-by: Tuan Phan <tphan@apm.com>
Signed-off-by: Loc Ho <lho@apm.com>
Signed-off-by: Mark Salter <msalter@redhat.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c
index 830299a74b846..7c352cba05289 100644
--- a/drivers/acpi/irq.c
+++ b/drivers/acpi/irq.c
@@ -24,7 +24,7 @@ static struct fwnode_handle *acpi_gsi_domain_id;
  *
  * irq location updated with irq value [>0 on success, 0 on failure]
  *
- * Returns: linux IRQ number on success (>0)
+ * Returns: 0 on success
  *          -EINVAL on failure
  */
 int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
@@ -37,7 +37,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 	 * *irq == 0 means no mapping, that should
 	 * be reported as a failure
 	 */
-	return (*irq > 0) ? *irq : -EINVAL;
+	return (*irq > 0) ? 0 : -EINVAL;
 }
 EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
 
-- 
GitLab


From 662591461c4b9a1e3b9b159dbf37648a585ebaae Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Wed, 12 Jul 2017 11:09:09 +0800
Subject: [PATCH 1227/1958] ACPI / EC: Drop EC noirq hooks to fix a regression

According to bug reports, although the busy polling mode can make
noirq stages execute faster, it causes abnormal fan blowing up after
system resume (see the first link below for a video demonstration)
on Lenovo ThinkPad X1 Carbon - the 5th Generation.  The problem can
be fixed by upgrading the EC firmware on that machine.

However, many reporters confirm that the problem can be fixed by
stopping busy polling during suspend/resume and for some of them
upgrading the EC firmware is not an option.

For this reason, drop the noirq stage hooks from the EC driver
to fix the regression.

Fixes: c3a696b6e8f8 (ACPI / EC: Use busy polling mode when GPE is not enabled)
Link: https://youtu.be/9NQ9x-Jm99Q
Link: https://bugzilla.kernel.org/show_bug.cgi?id=196129
Reported-by: Andreas Lindhe <andreas@lindhe.io>
Tested-by: Gjorgji Jankovski <j.gjorgji@gmail.com>
Tested-by: Damjan Georgievski <gdamjan@gmail.com>
Tested-by: Fernando Chaves <nanochaves@gmail.com>
Tested-by: Tomislav Ivek <tomislav.ivek@gmail.com>
Tested-by: Denis P. <theoriginal.skullburner@gmail.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 77c18a5547ed7..b65016f1f6244 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1870,24 +1870,6 @@ int __init acpi_ec_ecdt_probe(void)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int acpi_ec_suspend_noirq(struct device *dev)
-{
-	struct acpi_ec *ec =
-		acpi_driver_data(to_acpi_device(dev));
-
-	acpi_ec_enter_noirq(ec);
-	return 0;
-}
-
-static int acpi_ec_resume_noirq(struct device *dev)
-{
-	struct acpi_ec *ec =
-		acpi_driver_data(to_acpi_device(dev));
-
-	acpi_ec_leave_noirq(ec);
-	return 0;
-}
-
 static int acpi_ec_suspend(struct device *dev)
 {
 	struct acpi_ec *ec =
@@ -1909,7 +1891,6 @@ static int acpi_ec_resume(struct device *dev)
 #endif
 
 static const struct dev_pm_ops acpi_ec_pm = {
-	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend_noirq, acpi_ec_resume_noirq)
 	SET_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend, acpi_ec_resume)
 };
 
-- 
GitLab


From 9c40f956ce9b331493347d1b3cb7e384f7dc0581 Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Wed, 12 Jul 2017 11:09:17 +0800
Subject: [PATCH 1228/1958] Revert "ACPI / EC: Enable event freeze mode..." to
 fix a regression

On Lenovo ThinkPad X1 Carbon - the 5th Generation, enabling an earlier
EC event freezing timing causes acpitz-virtual-0 to report a stuck
48C temparature.  And with EC firmware revisioned as 1.14, without
reverting back to old EC event freezing timing, the fan still blows
up after a system resume.

This reverts the culprit change so that the regression can be fixed
without upgrading the EC firmware.

Fixes: d30283057ecd (ACPI / EC: Enable event freeze mode to improve event handling)
Link: https://bugzilla.kernel.org/show_bug.cgi?id=191181#c168
Tested-by: Damjan Georgievski <gdamjan@gmail.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Cc: 4.9+ <stable@vger.kernel.org> # 4.9+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index b65016f1f6244..20d9e9f0e2311 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -147,7 +147,7 @@ static unsigned int ec_storm_threshold  __read_mostly = 8;
 module_param(ec_storm_threshold, uint, 0644);
 MODULE_PARM_DESC(ec_storm_threshold, "Maxim false GPE numbers not considered as GPE storm");
 
-static bool ec_freeze_events __read_mostly = true;
+static bool ec_freeze_events __read_mostly = false;
 module_param(ec_freeze_events, bool, 0644);
 MODULE_PARM_DESC(ec_freeze_events, "Disabling event handling during suspend/resume");
 
-- 
GitLab


From 3b6a70be5ac81673af1ca8b4dae84743cb9fcc87 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 9 Jul 2017 21:05:12 +0200
Subject: [PATCH 1229/1958] ACPI / x86: Allow matching always_present_id array
 entries by DMI

On some x86 systems the DSDT hides APCI devices to work around Windows
driver bugs. On one such system the device is even hidden until a certain
time after _SB.PCI0.GFX0.LCD.LCD1._ON gets called has passed *and*
_STA has been called at least 3 times since. TL;DR: it is a mess.

Until now the always_present_id matching was used to force status
for a whole class of devices, e.g. always enable PWM1 on CHerry Trail
devices.

This commit extends the always_present_id matching code to optionally
also check for a DMI match so that we can also add system specific
quirks to the always_present_id array.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/utils.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index bd86b809c8486..b0e16516adfdc 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 #include "../internal.h"
@@ -20,6 +21,10 @@
  * Some ACPI devices are hidden (status == 0x0) in recent BIOS-es because
  * some recent Windows drivers bind to one device but poke at multiple
  * devices at the same time, so the others get hidden.
+ *
+ * Some BIOS-es (temporarily) hide specific APCI devices to work around Windows
+ * driver bugs. We use DMI matching to match known cases of this.
+ *
  * We work around this by always reporting ACPI_STA_DEFAULT for these
  * devices. Note this MUST only be done for devices where this is safe.
  *
@@ -31,14 +36,16 @@
 struct always_present_id {
 	struct acpi_device_id hid[2];
 	struct x86_cpu_id cpu_ids[2];
+	struct dmi_system_id dmi_ids[2]; /* Optional */
 	const char *uid;
 };
 
 #define ICPU(model)	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
 
-#define ENTRY(hid, uid, cpu_models) {					\
+#define ENTRY(hid, uid, cpu_models, dmi...) {				\
 	{ { hid, }, {} },						\
 	{ cpu_models, {} },						\
+	{ { .matches = dmi }, {} },					\
 	uid,								\
 }
 
@@ -47,13 +54,13 @@ static const struct always_present_id always_present_ids[] = {
 	 * Bay / Cherry Trail PWM directly poked by GPU driver in win10,
 	 * but Linux uses a separate PWM driver, harmless if not used.
 	 */
-	ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1)),
-	ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
+	ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1), {}),
+	ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}),
 	/*
 	 * The INT0002 device is necessary to clear wakeup interrupt sources
 	 * on Cherry Trail devices, without it we get nobody cared IRQ msgs.
 	 */
-	ENTRY("INT0002", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
+	ENTRY("INT0002", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}),
 };
 
 bool acpi_device_always_present(struct acpi_device *adev)
@@ -76,6 +83,10 @@ bool acpi_device_always_present(struct acpi_device *adev)
 		if (!x86_match_cpu(always_present_ids[i].cpu_ids))
 			continue;
 
+		if (always_present_ids[i].dmi_ids[0].matches[0].slot &&
+		    !dmi_check_system(always_present_ids[i].dmi_ids))
+			continue;
+
 		if (old_status != ACPI_STA_DEFAULT) /* Log only once */
 			dev_info(&adev->dev,
 				 "Device [%s] is in always present list\n",
-- 
GitLab


From b5cc1699191829cda2dd9447210e1e801e2082c9 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 9 Jul 2017 21:05:13 +0200
Subject: [PATCH 1230/1958] ACPI / x86: Add Dell Venue 11 Pro 7130 touchscreen
 to always_present_ids

The _STA method of the Venue 11 Pro 7130 touchscreen has this ugliness:

                Method (_STA, 0, NotSerialized)  // _STA: Status
                {
                    If ((SDS1 & One) == One)
                    {
                        If (RST1 == Zero)
                        {
                            Return (0x0F)
                        }
                        ElseIf (RST2 == Zero)
                        {
                            RST2 = One
                            TMRV = Timer
                        }
                        Else
                        {
                            Local0 = ((Timer - TMRV) / 0x2710)
                            If (Local0 > TMRI)
                            {
                                RST2 = Zero
                                RST1 = Zero
                            }
                        }
                    }
                    Else
                    {
                        Return (Zero)
                    }
                }

Whereby RST1 gets set by _SB.PCI0.GFX0.LCD.LCD1._ON, this means that
after RST1 has been set first _STA must be called to set TIMER and
then after enough time has elapsed _STA must be called twice more, once
to clear RST1 and once to finally return 0xf before the touchscreen will
show up. Which is just crazy.

This commit adds an always_present_ids entry for the SYNA7500 touchscreen
ACPI node, together with a DMI match for the Venue 11 Pro 7130, fixing the
touchscreen not working on this device.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/utils.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index b0e16516adfdc..eb6caf6f708b3 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -61,6 +61,15 @@ static const struct always_present_id always_present_ids[] = {
 	 * on Cherry Trail devices, without it we get nobody cared IRQ msgs.
 	 */
 	ENTRY("INT0002", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}),
+	/*
+	 * On the Dell Venue 11 Pro 7130 the DSDT hides the touchscreen ACPI
+	 * device until a certain time after _SB.PCI0.GFX0.LCD.LCD1._ON gets
+	 * called has passed *and* _STA has been called at least 3 times since.
+	 */
+	ENTRY("SYNA7500", "1", ICPU(INTEL_FAM6_HASWELL_ULT), {
+		DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"),
+	      }),
 };
 
 bool acpi_device_always_present(struct acpi_device *adev)
-- 
GitLab


From 906dc284d00c8aa8e2ad3f71182cf9fa3107e35a Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 9 Jul 2017 21:05:14 +0200
Subject: [PATCH 1231/1958] ACPI / x86: Add KIOX000A accelerometer on GPD win
 to always_present_ids array

The GPD win BIOS dated 20170320 has disabled the accelerometer, the
drivers sometimes cause crashes under Windows and this is how the
manufacturer has solved this :|

I see no other way to keep the accelerometer working under Windows then
adding it to the always_present_ids array.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/utils.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index eb6caf6f708b3..b4fbb99294828 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -70,6 +70,19 @@ static const struct always_present_id always_present_ids[] = {
 		DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
 		DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"),
 	      }),
+	/*
+	 * The GPD win BIOS dated 20170320 has disabled the accelerometer, the
+	 * drivers sometimes cause crashes under Windows and this is how the
+	 * manufacturer has solved this :| Note that the the DMI data is less
+	 * generic then it seems, a board_vendor of "AMI Corporation" is quite
+	 * rare and a board_name of "Default String" also is rare.
+	 */
+	ENTRY("KIOX000A", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {
+		DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+		DMI_MATCH(DMI_BOARD_NAME, "Default string"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),
+		DMI_MATCH(DMI_BIOS_DATE, "03/20/2017")
+	      }),
 };
 
 bool acpi_device_always_present(struct acpi_device *adev)
-- 
GitLab


From e8158b486d5f3f55cf372c5a32b42f263bf7f123 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 11 Jul 2017 18:20:20 +0300
Subject: [PATCH 1232/1958] device property: Introduce fwnode_call_bool_op()
 for ops that return bool

fwnode_call_int_op() isn't suitable for calling ops that return bool
since it effectively causes the result returned to the user to be
true when an op hasn't been defined or the fwnode is NULL.

Address this by introducing fwnode_call_bool_op() for calling ops
that return bool.

Fixes: 3708184afc77 "device property: Move FW type specific functionality to FW specific files"
Fixes: 2294b3af05e9 "device property: Introduce fwnode_device_is_available()"
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c | 6 +++---
 include/linux/fwnode.h  | 4 ++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 692007e5a94b1..edf02c1b58459 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -253,10 +253,10 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
 {
 	bool ret;
 
-	ret = fwnode_call_int_op(fwnode, property_present, propname);
+	ret = fwnode_call_bool_op(fwnode, property_present, propname);
 	if (ret == false && !IS_ERR_OR_NULL(fwnode) &&
 	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = fwnode_call_int_op(fwnode->secondary, property_present,
+		ret = fwnode_call_bool_op(fwnode->secondary, property_present,
 					 propname);
 	return ret;
 }
@@ -1027,7 +1027,7 @@ EXPORT_SYMBOL_GPL(fwnode_handle_put);
  */
 bool fwnode_device_is_available(struct fwnode_handle *fwnode)
 {
-	return fwnode_call_int_op(fwnode, device_is_available);
+	return fwnode_call_bool_op(fwnode, device_is_available);
 }
 EXPORT_SYMBOL_GPL(fwnode_device_is_available);
 
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 9ab3754191892..50893a1646cf3 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -99,6 +99,10 @@ struct fwnode_operations {
 	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
 		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \
 	 -EINVAL)
+#define fwnode_call_bool_op(fwnode, op, ...)				\
+	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
+		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : false) : \
+	 false)
 #define fwnode_call_ptr_op(fwnode, op, ...)		\
 	(fwnode_has_op(fwnode, op) ?			\
 	 (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL)
-- 
GitLab


From 01e6a61aceb82e13bec29502a8eb70d9574f97ad Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 11 Jul 2017 22:10:54 +1000
Subject: [PATCH 1233/1958] powerpc/64: Fix atomic64_inc_not_zero() to return
 an int

Although it's not documented anywhere, there is an expectation that
atomic64_inc_not_zero() returns a result which fits in an int. This is
the behaviour implemented on all arches except powerpc.

This has caused at least one bug in practice, in the percpu-refcount
code, where the long result from our atomic64_inc_not_zero() was
truncated to an int leading to lost references and stuck systems. That
was worked around in that code in commit 966d2b04e070 ("percpu-refcount:
fix reference leak during percpu-atomic transition").

To the best of my grepping abilities there are no other callers
in-tree which truncate the value, but we should fix it anyway. Because
the breakage is subtle and potentially very harmful I'm also tagging
it for stable.

Code generation is largely unaffected because in most cases the
callers are just using the result for a test anyway. In particular the
case of fget() that was mentioned in commit a6cf7ed5119f
("powerpc/atomic: Implement atomic*_inc_not_zero") generates exactly
the same code.

Fixes: a6cf7ed5119f ("powerpc/atomic: Implement atomic*_inc_not_zero")
Cc: stable@vger.kernel.org # v3.4
Noticed-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/atomic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 2b90335194a76..a2cc8010cd72a 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -560,7 +560,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
  * Atomically increments @v by 1, so long as @v is non-zero.
  * Returns non-zero if @v was non-zero, and zero otherwise.
  */
-static __inline__ long atomic64_inc_not_zero(atomic64_t *v)
+static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
 {
 	long t1, t2;
 
@@ -579,7 +579,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer", "memory");
 
-	return t1;
+	return t1 != 0;
 }
 
 #endif /* __powerpc64__ */
-- 
GitLab


From d4436c0dba8d4d780588179a2e192a867d266a10 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 10 Jul 2017 16:23:52 -0700
Subject: [PATCH 1234/1958] cpufreq: intel_pstate: Fix ratio setting for
 min_perf_pct

When the minimum performance limit percentage is set to the power-up
default, it is possible that minimum performance ratio is off by one.

In the set_policy() callback the minimum ratio is calculated by
applying global.min_perf_pct to turbo_ratio and rounding up, but the
power-up default global.min_perf_pct is already rounded up to the
next percent in min_perf_pct_min().  That results in two round up
operations, so for the default min_perf_pct one of them is not
required.

It is better to remove rounding up in min_perf_pct_min() as this
matches the displayed min_perf_pct prior to commit c5a2ee7dde89
(cpufreq: intel_pstate: Active mode P-state limits rework) in 4.12.

For example on a platform with max turbo ratio of 37 and minimum
ratio of 10, the min_perf_pct resulted in 28 with the above commit.
Before this commit it was 27 and it will be the same after this
change.

Fixes: 1a4fe38add8b (cpufreq: intel_pstate: Remove max/min fractions to limit performance)
Reported-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 7564832518327..2386d7036e909 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -572,7 +572,7 @@ static int min_perf_pct_min(void)
 	int turbo_pstate = cpu->pstate.turbo_pstate;
 
 	return turbo_pstate ?
-		DIV_ROUND_UP(cpu->pstate.min_pstate * 100, turbo_pstate) : 0;
+		(cpu->pstate.min_pstate * 100 / turbo_pstate) : 0;
 }
 
 static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
-- 
GitLab


From 2ca30331c156ca9e97643ad05dd8930b8fe78b01 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 10 Jul 2017 10:21:40 +0300
Subject: [PATCH 1235/1958] PM / QoS: return -EINVAL for bogus strings

In the current code, if the user accidentally writes a bogus command to
this sysfs file, then we set the latency tolerance to an uninitialized
variable.

Fixes: 2d984ad132a8 (PM / QoS: Introcuce latency tolerance device PM QoS type)
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: 3.15+ <stable@vger.kernel.org> # 3.15+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/sysfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 185a52581cfae..156ab57bca771 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -272,6 +272,8 @@ static ssize_t pm_qos_latency_tolerance_store(struct device *dev,
 			value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
 		else if (!strcmp(buf, "any") || !strcmp(buf, "any\n"))
 			value = PM_QOS_LATENCY_ANY;
+		else
+			return -EINVAL;
 	}
 	ret = dev_pm_qos_update_user_latency_tolerance(dev, value);
 	return ret < 0 ? ret : n;
-- 
GitLab


From ab2f7cf141aa6734c4ca7525132d8cc236efee77 Mon Sep 17 00:00:00 2001
From: Vikram Mulukutla <markivx@codeaurora.org>
Date: Thu, 6 Jul 2017 10:53:20 -0700
Subject: [PATCH 1236/1958] cpufreq: schedutil: Fix sugov_start() versus
 sugov_update_shared() race

With a shared policy in place, when one of the CPUs in the policy is
hotplugged out and then brought back online, sugov_stop() and
sugov_start() are called in order.

sugov_stop() removes utilization hooks for each CPU in the policy and
does nothing else in the for_each_cpu() loop. sugov_start() on the
other hand iterates through the CPUs in the policy and re-initializes
the per-cpu structure _and_ adds the utilization hook.  This implies
that the scheduler is allowed to invoke a CPU's utilization update
hook when the rest of the per-cpu structures have yet to be
re-inited.

Apart from some strange values in tracepoints this doesn't cause a
problem, but if we do end up accessing a pointer from the per-cpu
sugov_cpu structure somewhere in the sugov_update_shared() path,
we will likely see crashes since the memset for another CPU in the
policy is free to race with sugov_update_shared from the CPU that is
ready to go.  So let's fix this now to first init all per-cpu
structures, and then add the per-cpu utilization update hooks all at
once.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 076a2e31951cc..29a397067ffa0 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -610,6 +610,11 @@ static int sugov_start(struct cpufreq_policy *policy)
 		sg_cpu->sg_policy = sg_policy;
 		sg_cpu->flags = SCHED_CPUFREQ_RT;
 		sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+	}
+
+	for_each_cpu(cpu, policy->cpus) {
+		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+
 		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 					     policy_is_shared(policy) ?
 							sugov_update_shared :
-- 
GitLab


From 44925dfff05fd1a897992d278b15a6b6b55e79a7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 Jul 2017 10:33:40 +0300
Subject: [PATCH 1237/1958] ftrace: Remove an unneeded NULL check

"func" can't be NULL and it doesn't make sense to check because we've
already derefenced it.

Link: http://lkml.kernel.org/r/20170712073340.4enzeojeoupuds5a@mwanda

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4706f0ed193e2..5fb5b40b3ae8d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3950,7 +3950,7 @@ static int cache_mod(struct trace_array *tr,
 				continue;
 
 			/* no func matches all */
-			if (!func || strcmp(func, "*") == 0 ||
+			if (strcmp(func, "*") == 0 ||
 			    (ftrace_mod->func &&
 			     strcmp(ftrace_mod->func, func) == 0)) {
 				ret = 0;
-- 
GitLab


From 2e028c4fe12907f226b8221815f16c2486ad3aa7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 Jul 2017 10:35:57 +0300
Subject: [PATCH 1238/1958] ftrace: Fix uninitialized variable in
 match_records()

My static checker complains that if "func" is NULL then "clear_filter"
is uninitialized.  This seems like it could be true, although it's
possible something subtle is happening that I haven't seen.

    kernel/trace/ftrace.c:3844 match_records()
    error: uninitialized symbol 'clear_filter'.

Link: http://lkml.kernel.org/r/20170712073556.h6tkpjcdzjaozozs@mwanda

Cc: stable@vger.kernel.org
Fixes: f0a3b154bd7 ("ftrace: Clarify code for mod command")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5fb5b40b3ae8d..53f6b6401cf05 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3816,7 +3816,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
 	int exclude_mod = 0;
 	int found = 0;
 	int ret;
-	int clear_filter;
+	int clear_filter = 0;
 
 	if (func) {
 		func_g.type = filter_parse_regex(func, len, &func_g.search,
-- 
GitLab


From 38c9140740eb0993924b676a111c164903163b1e Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 12 Jul 2017 15:25:01 +0800
Subject: [PATCH 1239/1958] bfq: fix typos in comments about B-WF2Q+ algorithm

The start time of eligible entity should be less than or equal to
the current virtual time, and the entity in idle tree has a finish
time being greater than the current virtual time.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Reviewed-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.h | 2 +-
 block/bfq-wf2q.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 8fd83b8857743..63e771ab56d80 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -52,7 +52,7 @@ struct bfq_entity;
 struct bfq_service_tree {
 	/* tree for active entities (i.e., those backlogged) */
 	struct rb_root active;
-	/* tree for idle entities (i.e., not backlogged, with V <= F_i)*/
+	/* tree for idle entities (i.e., not backlogged, with V < F_i)*/
 	struct rb_root idle;
 
 	/* idle entity with minimum F_i */
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 5ec05cd42b807..979f8f21b7e2b 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1297,7 +1297,7 @@ static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)
  *
  * This function searches the first schedulable entity, starting from the
  * root of the tree and going on the left every time on this side there is
- * a subtree with at least one eligible (start >= vtime) entity. The path on
+ * a subtree with at least one eligible (start <= vtime) entity. The path on
  * the right is followed only if a) the left subtree contains no eligible
  * entities and b) no eligible entity has been found yet.
  */
-- 
GitLab


From 3f7cb4f4130ca5693f0698211bf1e42945efbb8c Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Tue, 11 Jul 2017 21:58:15 +0800
Subject: [PATCH 1240/1958] bfq: dispatch request to prevent queue stalling
 after the request completion

There are mq devices (eg., virtio-blk, nbd and loopback) which don't
invoke blk_mq_run_hw_queues() after the completion of a request.
If bfq is enabled on these devices and the slice_idle attribute or
strict_guarantees attribute is set as zero, it is possible that
after a request completion the remaining requests of busy bfq queue
will stalled in the bfq schedule until a new request arrives.

To fix the scheduler latency problem, we need to check whether or not
all issued requests have completed and dispatch more requests to driver
if there is no request in driver.

The problem can be reproduced by running the following script
on a virtio-blk device with nr_hw_queues as 1:

#!/bin/sh

dev=vdb
# mount point for dev
mp=/tmp/mnt
cd $mp

job=strict.job
cat <<EOF > $job
[global]
direct=1
bs=4k
size=256M
rw=write
ioengine=libaio
iodepth=128
runtime=5
time_based

[1]
filename=1.data

[2]
new_group
filename=2.data
EOF

echo bfq > /sys/block/$dev/queue/scheduler
echo 1 > /sys/block/$dev/queue/iosched/strict_guarantees
fio $job

Signed-off-by: Hou Tao <houtao1@huawei.com>
Reviewed-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 60a6835265fc3..436b6ca6b1759 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4299,6 +4299,9 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
 			bfq_bfqq_expire(bfqd, bfqq, false,
 					BFQQE_NO_MORE_REQUESTS);
 	}
+
+	if (!bfqd->rq_in_driver)
+		bfq_schedule_dispatch(bfqd);
 }
 
 static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
-- 
GitLab


From 4b1303d0b01440f224cf81493b7e8e43d9b4965e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 11 Jul 2017 16:21:40 -0300
Subject: [PATCH 1241/1958] perf symbols: Accept zero as the kernel base
 address

Which is the case in S/390, where symbols were not being resolved
because machine__get_kernel_start was only setting machine->kernel_start
when the just successfully loaded kernel symtab had its map->start set
to !0, when it was left at (1ULL << 63) assuming a partitioning of the
address space for user/kernel, which is not the case in S/390 nor in
Sparc.

So just check if map__load() was successfull and set
machine->kernel_start to zero, fixing kernel symbol resolution on S/390.

Test performed by Thomas:

 ----

  I like this patch. I have done a new build and removed all my debug output to start
  from scratch. Without your patch I get this:

  # Samples: 4  of event 'cpu-clock'
  # Event count (approx.): 1000000
  #
  # Children      Self  Command  Shared Object     Symbol
  # ........  ........  .......  ................  ........................
      75.00%     0.00%  true     [unknown]         [k] 0x00000000004bedda
              |
              ---0x4bedda
                 |
                 |--50.00%--0x42693a
                 |          |
                 |           --25.00%--0x2a72e0
                 |                     0x2af0ca
                 |                     0x3d1003fe4c0
                 |
                  --25.00%--0x4272bc
                            0x26fa84

  and with your patch (I just rebuilt the perf tool, nothing else and used the same
  perf.data file as input):

  # Samples: 4  of event 'cpu-clock'
  # Event count (approx.): 1000000
  #
  # Children      Self  Command  Shared Object               Symbol
  # ........  ........  .......  ..........................  ..................................
      75.00%     0.00%  true     [kernel.vmlinux]            [k] pgm_check_handler
              |
              ---pgm_check_handler
                 do_dat_exception
                 handle_mm_fault
                 __handle_mm_fault
                 filemap_map_pages
                 |
                 |--25.00%--rcu_read_lock_held
                 |          rcu_lockdep_current_cpu_online
                 |          0x3d1003ff4c0
                 |
                  --25.00%--lock_release

  Looks good to me....
 ----

Reported-and-Tested-by: Thomas-Mich Richter <tmricht@linux.vnet.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zvonko Kosic <zvonko.kosic@de.ibm.com>
Link: http://lkml.kernel.org/n/tip-dk0n1uzmbe0tbthrpfqlx6bz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5de2b86b9880c..2e9eb6aa3ce2e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2209,7 +2209,7 @@ int machine__get_kernel_start(struct machine *machine)
 	machine->kernel_start = 1ULL << 63;
 	if (map) {
 		err = map__load(map);
-		if (map->start)
+		if (!err)
 			machine->kernel_start = map->start;
 	}
 	return err;
-- 
GitLab


From 2ad67141f1e47dc063b202993835361a06239aaa Mon Sep 17 00:00:00 2001
From: Tushar Dave <tushar.n.dave@oracle.com>
Date: Tue, 11 Jul 2017 14:34:47 -0700
Subject: [PATCH 1242/1958] SPARC64: Fix sun4v DMA panic

64bit DMA only supported on sun4v equipped with ATU IOMMU HW.
'Commit b02c2b0bfd7ae ("sparc: remove arch specific dma_supported
implementations")' introduced a code that incorrectly allow
dma_supported() to succeed for 64bit dma mask even if system doesn't
have ATU IOMMU. This results into panic.

Fix it.

Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/pci_sun4v.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 24f21c726dfad..f10e2f7123949 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -673,12 +673,14 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 static int dma_4v_supported(struct device *dev, u64 device_mask)
 {
 	struct iommu *iommu = dev->archdata.iommu;
-	u64 dma_addr_mask;
+	u64 dma_addr_mask = iommu->dma_addr_mask;
 
-	if (device_mask > DMA_BIT_MASK(32) && iommu->atu)
-		dma_addr_mask = iommu->atu->dma_addr_mask;
-	else
-		dma_addr_mask = iommu->dma_addr_mask;
+	if (device_mask > DMA_BIT_MASK(32)) {
+		if (iommu->atu)
+			dma_addr_mask = iommu->atu->dma_addr_mask;
+		else
+			return 0;
+	}
 
 	if ((device_mask & dma_addr_mask) == dma_addr_mask)
 		return 1;
-- 
GitLab


From a4e75b76b2ce47e3d964c87689962abb83cc4958 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 12 Jul 2017 09:12:52 +0200
Subject: [PATCH 1243/1958] mlxsw: spectrum_router: Add missing rollback

With this patch the error path of mlxsw_sp_nexthop_init() is symmetric
with mlxsw_sp_nexthop_fini(). Noticed during code review.

Fixes: a8c970142798 ("mlxsw: spectrum_router: Refactor nexthop init routine")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 192cb93e7669b..129afc168fd9d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1790,6 +1790,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 
 err_nexthop_neigh_init:
+	mlxsw_sp_nexthop_rif_fini(nh);
 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
 	return err;
 }
-- 
GitLab


From 7387dbbcdb2404e189591435527dcc65463819d7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 12 Jul 2017 09:12:53 +0200
Subject: [PATCH 1244/1958] mlxsw: spectrum_router: Fix use-after-free in route
 replace

While working on IPv6 route replace I realized we can have a
use-after-free in IPv4 in case the replaced route is offloaded and the
only one using its FIB info.

The problem is that fib_table_insert() drops the reference on the FIB
info of the replaced routes which is eventually freed via call_rcu().
Since the driver doesn't hold a reference on this FIB info it can cause
a use-after-free when it tries to clear the RTNH_F_OFFLOAD flag stored
in fi->fib_flags.

After running the following commands in a loop for enough time with a
KASAN enabled kernel I finally got the below trace.

$ ip route add 192.168.50.0/24 via 192.168.200.1 dev enp3s0np3
$ ip route replace 192.168.50.0/24 dev enp3s0np5
$ ip route del 192.168.50.0/24 dev enp3s0np5

BUG: KASAN: use-after-free in mlxsw_sp_fib_entry_offload_unset+0xa7/0x120 [mlxsw_spectrum]
Read of size 4 at addr ffff8803717d9820 by task kworker/u4:2/55
[...]
? mlxsw_sp_fib_entry_offload_unset+0xa7/0x120 [mlxsw_spectrum]
? mlxsw_sp_fib_entry_offload_unset+0xa7/0x120 [mlxsw_spectrum]
? mlxsw_sp_router_neighs_update_work+0x1cd0/0x1ce0 [mlxsw_spectrum]
? mlxsw_sp_fib_entry_offload_unset+0xa7/0x120 [mlxsw_spectrum]
__asan_load4+0x61/0x80
mlxsw_sp_fib_entry_offload_unset+0xa7/0x120 [mlxsw_spectrum]
mlxsw_sp_fib_entry_offload_refresh+0xb6/0x370 [mlxsw_spectrum]
mlxsw_sp_router_fib_event_work+0xd1c/0x2780 [mlxsw_spectrum]
[...]
Freed by task 5131:
 save_stack_trace+0x16/0x20
 save_stack+0x46/0xd0
 kasan_slab_free+0x70/0xc0
 kfree+0x144/0x570
 free_fib_info_rcu+0x2e7/0x410
 rcu_process_callbacks+0x4f8/0xe30
 __do_softirq+0x1d3/0x9e2

Fix this by taking a reference on the FIB info when creating the nexthop
group it represents and drop it when the group is destroyed.

Fixes: 599cf8f95f22 ("mlxsw: spectrum_router: Add support for route replace")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 129afc168fd9d..383fef5a8e242 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1867,6 +1867,7 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 	nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
 	nh_grp->count = fi->fib_nhs;
 	nh_grp->key.fi = fi;
+	fib_info_hold(fi);
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
 		fib_nh = &fi->fib_nh[i];
@@ -1886,6 +1887,7 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 		nh = &nh_grp->nexthops[i];
 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
 	}
+	fib_info_put(nh_grp->key.fi);
 	kfree(nh_grp);
 	return ERR_PTR(err);
 }
@@ -1904,6 +1906,7 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
 	}
 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
 	WARN_ON_ONCE(nh_grp->adj_index_valid);
+	fib_info_put(nh_grp->key.fi);
 	kfree(nh_grp);
 }
 
-- 
GitLab


From a9265b804dfdfca4e2ce3730fe0528da9c01dbf7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 12 Jul 2017 09:12:54 +0200
Subject: [PATCH 1245/1958] mlxsw: spectrum_switchdev: Remove unused variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 10e23eb299fa ("mlxsw: spectrum: Remove support for bypass bridge
port attributes/vlan set") removed statements that used 'bridge_vlan',
but didn't remove the variable itself resulting in the following warning
with W=1:

warning: variable ‘bridge_vlan’ set but not used
[-Wunused-but-set-variable]

Remove the variable and suppress the warning.

Fixes: 10e23eb299fa ("mlxsw: spectrum: Remove support for bypass bridge port attributes/vlan set")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index cd89a3e6cd818..4733fe918315f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -979,7 +979,6 @@ mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	u16 pvid = mlxsw_sp_port_pvid_determine(mlxsw_sp_port, vid, is_pvid);
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-	struct mlxsw_sp_bridge_vlan *bridge_vlan;
 	u16 old_pvid = mlxsw_sp_port->pvid;
 	int err;
 
@@ -1000,8 +999,6 @@ mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (err)
 		goto err_port_vlan_bridge_join;
 
-	bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid);
-
 	return 0;
 
 err_port_vlan_bridge_join:
-- 
GitLab


From 6f497930af73863b32e8c73947a52d1003af31e2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 12 Jul 2017 09:12:55 +0200
Subject: [PATCH 1246/1958] mlxsw: spectrum_switchdev: Check status of memory
 allocation

We can't rely on kzalloc() always succeeding, so check its return value.

Suppresses the following smatch error:

mlxsw_sp_switchdev_event() error: potential null dereference
'switchdev_work->fdb_info.addr'.  (kzalloc returns
 null)

Fixes: af061378924f ("mlxsw: spectrum_switchdev: Add support for learning FDB through notification")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 4733fe918315f..656b2d3f1bee0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1916,6 +1916,8 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 		memcpy(&switchdev_work->fdb_info, ptr,
 		       sizeof(switchdev_work->fdb_info));
 		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+		if (!switchdev_work->fdb_info.addr)
+			goto err_addr_alloc;
 		ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
 				fdb_info->addr);
 		/* Take a reference on the device. This can be either
@@ -1932,6 +1934,10 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 	mlxsw_core_schedule_work(&switchdev_work->work);
 
 	return NOTIFY_DONE;
+
+err_addr_alloc:
+	kfree(switchdev_work);
+	return NOTIFY_BAD;
 }
 
 static struct notifier_block mlxsw_sp_switchdev_notifier = {
-- 
GitLab


From 88f0f09b3ab7d817b897cedbd7a05a565ff4f75a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 Jul 2017 10:42:06 +0300
Subject: [PATCH 1247/1958] nfp: freeing the wrong variable

We accidentally free a NULL pointer and leak the pointer we want to
free.  Also you can tell from the label name what was intended.  :)

Fixes: abfcdc1de9bf ("nfp: add a stats handler for flower offloads")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/metadata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index fec0ff2ca94f6..3226ddc55f99b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -419,7 +419,7 @@ int nfp_flower_metadata_init(struct nfp_app *app)
 	return 0;
 
 err_free_last_used:
-	kfree(priv->stats_ids.free_list.buf);
+	kfree(priv->mask_ids.last_used);
 err_free_mask_id:
 	kfree(priv->mask_ids.mask_id_free_list.buf);
 	return -ENOMEM;
-- 
GitLab


From 2e3d232e139a7128e2494a840a8a00fb9ab65f45 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 Jul 2017 10:56:47 +0300
Subject: [PATCH 1248/1958] net: ipmr: ipmr_get_table() returns NULL

The ipmr_get_table() function doesn't return error pointers it returns
NULL on error.

Fixes: 4f75ba6982bc ("net: ipmr: Add ipmr_rtm_getroute")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index bb909f1d7537a..06863ea3fc5b8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2431,8 +2431,8 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
 
 	mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
-	if (IS_ERR(mrt)) {
-		err = PTR_ERR(mrt);
+	if (!mrt) {
+		err = -ENOENT;
 		goto errout_free;
 	}
 
-- 
GitLab


From 76b825ab870be3281edac4ae8a414da6e54b0d3a Mon Sep 17 00:00:00 2001
From: Lin Yun Sheng <linyunsheng@huawei.com>
Date: Wed, 12 Jul 2017 19:09:59 +0800
Subject: [PATCH 1249/1958] net: hns: Bugfix for Tx timeout handling in hns
 driver

When hns port type is not debug mode, netif_tx_disable is called
when there is a tx timeout, which requires system reboot to return
to normal state. This patch fix this problem by resetting the net
dev.

Fixes: b5996f11ea54 ("net: add Hisilicon Network Subsystem basic ethernet support")
Signed-off-by: Lin Yun Sheng <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index fe166e0f67810..3987699f8fe6a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1378,13 +1378,20 @@ void hns_nic_net_reset(struct net_device *ndev)
 void hns_nic_net_reinit(struct net_device *netdev)
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
+	enum hnae_port_type type = priv->ae_handle->port_type;
 
 	netif_trans_update(priv->netdev);
 	while (test_and_set_bit(NIC_STATE_REINITING, &priv->state))
 		usleep_range(1000, 2000);
 
 	hns_nic_net_down(netdev);
-	hns_nic_net_reset(netdev);
+
+	/* Only do hns_nic_net_reset in debug mode
+	 * because of hardware limitation.
+	 */
+	if (type == HNAE_PORT_DEBUG)
+		hns_nic_net_reset(netdev);
+
 	(void)hns_nic_net_up(netdev);
 	clear_bit(NIC_STATE_REINITING, &priv->state);
 }
@@ -1997,13 +2004,8 @@ static void hns_nic_reset_subtask(struct hns_nic_priv *priv)
 	rtnl_lock();
 	/* put off any impending NetWatchDogTimeout */
 	netif_trans_update(priv->netdev);
+	hns_nic_net_reinit(priv->netdev);
 
-	if (type == HNAE_PORT_DEBUG) {
-		hns_nic_net_reinit(priv->netdev);
-	} else {
-		netif_carrier_off(priv->netdev);
-		netif_tx_disable(priv->netdev);
-	}
 	rtnl_unlock();
 }
 
-- 
GitLab


From 8f44c9a41386729fea410e688959ddaa9d51be7c Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend.vanspriel@broadcom.com>
Date: Fri, 7 Jul 2017 21:09:06 +0100
Subject: [PATCH 1250/1958] brcmfmac: fix possible buffer overflow in
 brcmf_cfg80211_mgmt_tx()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The lower level nl80211 code in cfg80211 ensures that "len" is between
25 and NL80211_ATTR_FRAME (2304).  We subtract DOT11_MGMT_HDR_LEN (24) from
"len" so thats's max of 2280.  However, the action_frame->data[] buffer is
only BRCMF_FIL_ACTION_FRAME_SIZE (1800) bytes long so this memcpy() can
overflow.

	memcpy(action_frame->data, &buf[DOT11_MGMT_HDR_LEN],
	       le16_to_cpu(action_frame->len));

Cc: stable@vger.kernel.org # 3.9.x
Fixes: 18e2f61db3b70 ("brcmfmac: P2P action frame tx.")
Reported-by: "freenerguo(郭大兴)" <freenerguo@tencent.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index dcde596c9eb9c..7e689c86d5657 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -4934,6 +4934,11 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 		cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, true,
 					GFP_KERNEL);
 	} else if (ieee80211_is_action(mgmt->frame_control)) {
+		if (len > BRCMF_FIL_ACTION_FRAME_SIZE + DOT11_MGMT_HDR_LEN) {
+			brcmf_err("invalid action frame length\n");
+			err = -EINVAL;
+			goto exit;
+		}
 		af_params = kzalloc(sizeof(*af_params), GFP_KERNEL);
 		if (af_params == NULL) {
 			brcmf_err("unable to allocate frame\n");
-- 
GitLab


From 58c7ffc0747a3a9145629d4966291f0586703767 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 12 Jul 2017 04:59:45 +0100
Subject: [PATCH 1251/1958] fix a braino in compat_sys_getrlimit()

Reported-and-tested-by: Meelis Roos <mroos@linux.ee>
Fixes: commit d9e968cb9f84 "getrlimit()/setrlimit(): move compat to native"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 73fc0af147d02..2855ee73acd0e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1362,7 +1362,7 @@ COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource,
 
 	ret = do_prlimit(current, resource, NULL, &r);
 	if (!ret) {
-		struct rlimit r32;
+		struct compat_rlimit r32;
 		if (r.rlim_cur > COMPAT_RLIM_INFINITY)
 			r32.rlim_cur = COMPAT_RLIM_INFINITY;
 		else
-- 
GitLab


From 6409e84ec58fc4c0085d8921f8e01815dc871971 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Tue, 11 Jul 2017 16:16:24 +0200
Subject: [PATCH 1252/1958] raid5-ppl: use BIOSET_NEED_BVECS when creating
 bioset

This bioset is used for allocating bios with nr_iovecs > 0 so this flag
must be set.

Fixes: 011067b05668 ("blk: replace bioset_create_nobvec() with a flags arg to bioset_create()")
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-ppl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 77cce3573aa85..44ad5baf32068 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
 		goto err;
 	}
 
-	ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
+	ppl_conf->bs = bioset_create(conf->raid_disks, 0, BIOSET_NEED_BVECS);
 	if (!ppl_conf->bs) {
 		ret = -ENOMEM;
 		goto err;
-- 
GitLab


From fa43bc2a5a041c8935c1f6f5f8ab2d57efdd9644 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 12 Jul 2017 18:28:10 +0200
Subject: [PATCH 1253/1958] atyfb: hide unused variable

The vdisplay variable is now only accessed inside of an #ifdef, producing
a harmless warning:

drivers/video/fbdev/aty/atyfb_base.c: In function 'aty_var_to_crtc':
drivers/video/fbdev/aty/atyfb_base.c:805:19: error: unused variable 'vdisplay' [-Werror=unused-variable]

This moves the declaration into the ifdef as well.

Fixes: dd7d958ae912 ("video: fbdev: aty: remove useless variable assignments in aty_var_to_crtc()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/aty/atyfb_base.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c
index 3142d7f7020f0..b55fdac9c9f51 100644
--- a/drivers/video/fbdev/aty/atyfb_base.c
+++ b/drivers/video/fbdev/aty/atyfb_base.c
@@ -802,7 +802,7 @@ static int aty_var_to_crtc(const struct fb_info *info,
 {
 	struct atyfb_par *par = (struct atyfb_par *) info->par;
 	u32 xres, yres, vxres, vyres, xoffset, yoffset, bpp;
-	u32 sync, vmode, vdisplay;
+	u32 sync, vmode;
 	u32 h_total, h_disp, h_sync_strt, h_sync_end, h_sync_dly, h_sync_wid, h_sync_pol;
 	u32 v_total, v_disp, v_sync_strt, v_sync_end, v_sync_wid, v_sync_pol, c_sync;
 	u32 pix_width, dp_pix_width, dp_chain_mask;
@@ -1030,7 +1030,7 @@ static int aty_var_to_crtc(const struct fb_info *info,
 		crtc->gen_cntl |= CRTC_INTERLACE_EN;
 #ifdef CONFIG_FB_ATY_GENERIC_LCD
 	if (par->lcd_table != 0) {
-		vdisplay = yres;
+		u32 vdisplay = yres;
 		if (vmode & FB_VMODE_DOUBLE)
 			vdisplay <<= 1;
 		crtc->gen_cntl &= ~(CRTC2_EN | CRTC2_PIX_WIDTH);
-- 
GitLab


From 4c99ceda0d067c06f228a27b6870f548d3218cc6 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Date: Wed, 12 Jul 2017 18:28:11 +0200
Subject: [PATCH 1254/1958] fbdev: make get_fb_unmapped_area depends of !MMU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even if CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA flag is selected
do not compile and use get_fb_unmapped_area() if CONFIG_MMU is
also set. This will avoid mmap errors when compiling multi
architectures at same time.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Noralf Trønnes <noralf@tronnes.org>
Cc: Emil Velikov <emil.l.velikov@gmail.com>
Cc: Yannick Fertre <yannick.fertre@st.com>
Cc: Eric Engestrom <eric.engestrom@imgtec.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/core/fbmem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 069fe7960df1c..f20c5d23483c9 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1492,7 +1492,7 @@ __releases(&info->lock)
 	return 0;
 }
 
-#ifdef CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA
+#if defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA) && !defined(CONFIG_MMU)
 unsigned long get_fb_unmapped_area(struct file *filp,
 				   unsigned long addr, unsigned long len,
 				   unsigned long pgoff, unsigned long flags)
@@ -1519,7 +1519,8 @@ static const struct file_operations fb_fops = {
 	.open =		fb_open,
 	.release =	fb_release,
 #if defined(HAVE_ARCH_FB_UNMAPPED_AREA) || \
-    defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA)
+	(defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA) && \
+	 !defined(CONFIG_MMU))
 	.get_unmapped_area = get_fb_unmapped_area,
 #endif
 #ifdef CONFIG_FB_DEFERRED_IO
-- 
GitLab


From 7cdc2d62f9cce8a02eb2a5fbfca6813f04189487 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 6 Jul 2017 16:33:05 -0700
Subject: [PATCH 1255/1958] kvm: nVMX: Don't set vmcs12 to "launched" when
 VMLAUNCH fails

The VMCS launch state is not set to "launched" unless the VMLAUNCH
actually succeeds. VMLAUNCH failure includes VM-exits with bit 31 set.

Note that this change does not address the general problem that a
failure to launch/resume vmcs02 (i.e. vmx->fail) is not handled
correctly.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7592a18ecc1ca..ef978d5983aee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10435,8 +10435,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
 		return 1;
 	}
 
-	vmcs12->launch_state = 1;
-
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
 	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -10810,6 +10808,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 
 	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+		vmcs12->launch_state = 1;
+
 		/* vm_entry_intr_info_field is cleared on exit. Emulate this
 		 * instead of reading the real value. */
 		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
-- 
GitLab


From 56a205100d3933f785ca970c58aecedd94ff90b2 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 6 Jul 2017 16:33:06 -0700
Subject: [PATCH 1256/1958] kvm: nVMX: Validate the I/O bitmaps on nested
 VM-entry

According to the SDM, if the "use I/O bitmaps" VM-execution control is
1, bits 11:0 of each I/O-bitmap address must be 0. Neither address
should set any bits beyond the processor's physical-address width.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ef978d5983aee..22034ac4b5f2c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9595,6 +9595,19 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
 		      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
 }
 
+static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
+					       struct vmcs12 *vmcs12)
+{
+	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+		return 0;
+
+	if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
+	    !page_address_valid(vcpu, vmcs12->io_bitmap_b))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
 						struct vmcs12 *vmcs12)
 {
@@ -10299,6 +10312,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	    vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+	if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
 	if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
-- 
GitLab


From 5fa99cbe7b666dce6dd8ac55b253778893b9c5df Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 6 Jul 2017 16:33:07 -0700
Subject: [PATCH 1257/1958] kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls

Allow the L1 guest to specify the last page of addressable guest
physical memory for an L2 MSR permission bitmap. Also remove the
vmcs12_read_any() check that should never fail.

Fixes: 3af18d9c5fe95 ("KVM: nVMX: Prepare for using hardware MSR bitmap")
Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 22034ac4b5f2c..e02c7004b64b5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4634,6 +4634,11 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
 	return true;
 }
 
+static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
+}
+
 static int init_rmode_tss(struct kvm *kvm)
 {
 	gfn_t fn;
@@ -9611,20 +9616,10 @@ static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
 						struct vmcs12 *vmcs12)
 {
-	int maxphyaddr;
-	u64 addr;
-
 	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
 		return 0;
 
-	if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
-		WARN_ON(1);
-		return -EINVAL;
-	}
-	maxphyaddr = cpuid_maxphyaddr(vcpu);
-
-	if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
-	   ((addr + PAGE_SIZE) >> maxphyaddr))
+	if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
 		return -EINVAL;
 
 	return 0;
-- 
GitLab


From 85fd514e24238a633c971332aa96a2e5c4ddd502 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 7 Jul 2017 12:51:41 -0700
Subject: [PATCH 1258/1958] kvm: nVMX: Shadow "high" parts of shadowed 64-bit
 VMCS fields

Inconsistencies result from shadowing only accesses to the full
64-bits of a 64-bit VMCS field, but not shadowing accesses to the high
32-bits of the field. The "high" part of a 64-bit field should be
shadowed whenever the full 64-bit field is shadowed.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 60 ++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e02c7004b64b5..32db3f5dce7fd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3764,6 +3764,25 @@ static void free_kvm_area(void)
 	}
 }
 
+enum vmcs_field_type {
+	VMCS_FIELD_TYPE_U16 = 0,
+	VMCS_FIELD_TYPE_U64 = 1,
+	VMCS_FIELD_TYPE_U32 = 2,
+	VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
+};
+
+static inline int vmcs_field_type(unsigned long field)
+{
+	if (0x1 & field)	/* the *_HIGH fields are all 32 bit */
+		return VMCS_FIELD_TYPE_U32;
+	return (field >> 13) & 0x3 ;
+}
+
+static inline int vmcs_field_readonly(unsigned long field)
+{
+	return (((field >> 10) & 0x3) == 1);
+}
+
 static void init_vmcs_shadow_fields(void)
 {
 	int i, j;
@@ -3789,14 +3808,22 @@ static void init_vmcs_shadow_fields(void)
 
 	/* shadowed fields guest access without vmexit */
 	for (i = 0; i < max_shadow_read_write_fields; i++) {
-		clear_bit(shadow_read_write_fields[i],
-			  vmx_vmwrite_bitmap);
-		clear_bit(shadow_read_write_fields[i],
-			  vmx_vmread_bitmap);
+		unsigned long field = shadow_read_write_fields[i];
+
+		clear_bit(field, vmx_vmwrite_bitmap);
+		clear_bit(field, vmx_vmread_bitmap);
+		if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
+			clear_bit(field + 1, vmx_vmwrite_bitmap);
+			clear_bit(field + 1, vmx_vmread_bitmap);
+		}
+	}
+	for (i = 0; i < max_shadow_read_only_fields; i++) {
+		unsigned long field = shadow_read_only_fields[i];
+
+		clear_bit(field, vmx_vmread_bitmap);
+		if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
+			clear_bit(field + 1, vmx_vmread_bitmap);
 	}
-	for (i = 0; i < max_shadow_read_only_fields; i++)
-		clear_bit(shadow_read_only_fields[i],
-			  vmx_vmread_bitmap);
 }
 
 static __init int alloc_kvm_area(void)
@@ -7219,25 +7246,6 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
 	return nested_vmx_run(vcpu, false);
 }
 
-enum vmcs_field_type {
-	VMCS_FIELD_TYPE_U16 = 0,
-	VMCS_FIELD_TYPE_U64 = 1,
-	VMCS_FIELD_TYPE_U32 = 2,
-	VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
-};
-
-static inline int vmcs_field_type(unsigned long field)
-{
-	if (0x1 & field)	/* the *_HIGH fields are all 32 bit */
-		return VMCS_FIELD_TYPE_U32;
-	return (field >> 13) & 0x3 ;
-}
-
-static inline int vmcs_field_readonly(unsigned long field)
-{
-	return (((field >> 10) & 0x3) == 1);
-}
-
 /*
  * Read a vmcs12 field. Since these can have varying lengths and we return
  * one type, we chose the biggest type (u64) and zero-extend the return value
-- 
GitLab


From b9091b1c654953cc9615f282704a6df8e31549cf Mon Sep 17 00:00:00 2001
From: "Subhransu S. Prusty" <subhransu.s.prusty@intel.com>
Date: Wed, 12 Jul 2017 20:12:04 +0530
Subject: [PATCH 1259/1958] ALSA: hda - Add hdmi id for a Geminilake variant

Few GLK platform variants report a different vendor id. Add it.
Also add the missing check for GLK in is_haswell_plus().

Signed-off-by: Subhransu S. Prusty <subhransu.s.prusty@intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_hdmi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 76c85f08bea67..d549f35f39d3e 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -53,9 +53,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809)
 #define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a)
 #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
+#define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
+				((codec)->core.vendor_id == 0x80862800))
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
 				|| is_skylake(codec) || is_broxton(codec) \
-				|| is_kabylake(codec))
+				|| is_kabylake(codec)) || is_geminilake(codec)
 
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
@@ -3790,6 +3792,7 @@ HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI",	patch_i915_glk_hdmi),
+HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI",	patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI",	patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",	patch_i915_byt_hdmi),
 HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",	patch_i915_byt_hdmi),
-- 
GitLab


From 34d5ac2af644a10958e144b7bb937d289dfd158d Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Tue, 11 Jul 2017 18:15:08 +0800
Subject: [PATCH 1260/1958] PCI: rockchip: Check for pci_scan_root_bus_bridge()
 failure correctly

pci_scan_root_bus_bridge() returns zero for success, or a negative errno.
A typo in ae13cb9b1926 ("PCI: rockchip: Convert PCI scan API to
pci_scan_root_bus_bridge()") treated zero as a failure.

Fix the typo.

Fixes: ae13cb9b1926 ("PCI: rockchip: Convert PCI scan API to pci_scan_root_bus_bridge()")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
[bhelgaas: changelog]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 drivers/pci/host/pcie-rockchip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index 5acf8694fb23b..7bb9870f6d8ce 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c
@@ -1483,7 +1483,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
 	bridge->swizzle_irq = pci_common_swizzle;
 
 	err = pci_scan_root_bus_bridge(bridge);
-	if (!err)
+	if (err < 0)
 		goto err_free_res;
 
 	bus = bridge->bus;
-- 
GitLab


From 340d394a789518018f834ff70f7534fc463d3226 Mon Sep 17 00:00:00 2001
From: Chen Hong <chenhong3@huawei.com>
Date: Sun, 2 Jul 2017 15:11:10 -0700
Subject: [PATCH 1261/1958] Input: i8042 - fix crash at boot time

The driver checks port->exists twice in i8042_interrupt(), first when
trying to assign temporary "serio" variable, and second time when deciding
whether it should call serio_interrupt(). The value of port->exists may
change between the 2 checks, and we may end up calling serio_interrupt()
with a NULL pointer:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000050
IP: [<ffffffff8150feaf>] _spin_lock_irqsave+0x1f/0x40
PGD 0
Oops: 0002 [#1] SMP
last sysfs file:
CPU 0
Modules linked in:

Pid: 1, comm: swapper Not tainted 2.6.32-358.el6.x86_64 #1 QEMU Standard PC (i440FX + PIIX, 1996)
RIP: 0010:[<ffffffff8150feaf>]  [<ffffffff8150feaf>] _spin_lock_irqsave+0x1f/0x40
RSP: 0018:ffff880028203cc0  EFLAGS: 00010082
RAX: 0000000000010000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000000000000282 RSI: 0000000000000098 RDI: 0000000000000050
RBP: ffff880028203cc0 R08: ffff88013e79c000 R09: ffff880028203ee0
R10: 0000000000000298 R11: 0000000000000282 R12: 0000000000000050
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000098
FS:  0000000000000000(0000) GS:ffff880028200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000050 CR3: 0000000001a85000 CR4: 00000000001407f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 1, threadinfo ffff88013e79c000, task ffff88013e79b500)
Stack:
ffff880028203d00 ffffffff813de186 ffffffffffffff02 0000000000000000
<d> 0000000000000000 0000000000000000 0000000000000000 0000000000000098
<d> ffff880028203d70 ffffffff813e0162 ffff880028203d20 ffffffff8103b8ac
Call Trace:
<IRQ>
 [<ffffffff813de186>] serio_interrupt+0x36/0xa0
[<ffffffff813e0162>] i8042_interrupt+0x132/0x3a0
[<ffffffff8103b8ac>] ? kvm_clock_read+0x1c/0x20
[<ffffffff8103b8b9>] ? kvm_clock_get_cycles+0x9/0x10
[<ffffffff810e1640>] handle_IRQ_event+0x60/0x170
[<ffffffff8103b154>] ? kvm_guest_apic_eoi_write+0x44/0x50
[<ffffffff810e3d8e>] handle_edge_irq+0xde/0x180
[<ffffffff8100de89>] handle_irq+0x49/0xa0
[<ffffffff81516c8c>] do_IRQ+0x6c/0xf0
[<ffffffff8100b9d3>] ret_from_intr+0x0/0x11
[<ffffffff81076f63>] ? __do_softirq+0x73/0x1e0
[<ffffffff8109b75b>] ? hrtimer_interrupt+0x14b/0x260
[<ffffffff8100c1cc>] ? call_softirq+0x1c/0x30
[<ffffffff8100de05>] ? do_softirq+0x65/0xa0
[<ffffffff81076d95>] ? irq_exit+0x85/0x90
[<ffffffff81516d80>] ? smp_apic_timer_interrupt+0x70/0x9b
[<ffffffff8100bb93>] ? apic_timer_interrupt+0x13/0x20

To avoid the issue let's change the second check to test whether serio is
NULL or not.

Also, let's take i8042_lock in i8042_start() and i8042_stop() instead of
trying to be overly smart and using memory barriers.

Signed-off-by: Chen Hong <chenhong3@huawei.com>
[dtor: take lock in i8042_start()/i8042_stop()]
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index c52da651269b0..824f4c1c1f310 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -436,8 +436,10 @@ static int i8042_start(struct serio *serio)
 {
 	struct i8042_port *port = serio->port_data;
 
+	spin_lock_irq(&i8042_lock);
 	port->exists = true;
-	mb();
+	spin_unlock_irq(&i8042_lock);
+
 	return 0;
 }
 
@@ -450,16 +452,20 @@ static void i8042_stop(struct serio *serio)
 {
 	struct i8042_port *port = serio->port_data;
 
+	spin_lock_irq(&i8042_lock);
 	port->exists = false;
+	port->serio = NULL;
+	spin_unlock_irq(&i8042_lock);
 
 	/*
+	 * We need to make sure that interrupt handler finishes using
+	 * our serio port before we return from this function.
 	 * We synchronize with both AUX and KBD IRQs because there is
 	 * a (very unlikely) chance that AUX IRQ is raised for KBD port
 	 * and vice versa.
 	 */
 	synchronize_irq(I8042_AUX_IRQ);
 	synchronize_irq(I8042_KBD_IRQ);
-	port->serio = NULL;
 }
 
 /*
@@ -576,7 +582,7 @@ static irqreturn_t i8042_interrupt(int irq, void *dev_id)
 
 	spin_unlock_irqrestore(&i8042_lock, flags);
 
-	if (likely(port->exists && !filtered))
+	if (likely(serio && !filtered))
 		serio_interrupt(serio, data, dfl);
 
  out:
-- 
GitLab


From 8c6ae4980e70395cbdfdf605c29673c5a6a89d9a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 30 Jun 2017 12:03:54 -0400
Subject: [PATCH 1262/1958] sunrpc: Allocate up to RPCSVC_MAXPAGES per svc_rqst

svcrdma needs 259 pages allocated to receive 1MB NFSv4.0 WRITE requests:

 - 1 page for the transport header and head iovec
 - 256 pages for the data payload
 - 1 page for the trailing GETATTR request (since NFSD XDR decoding
   does not look for a tail iovec, the GETATTR is stuck at the end
   of the rqstp->rq_arg.pages list)
 - 1 page for building the reply xdr_buf

But RPCSVC_MAXPAGES is already 259 (on x86_64). The problem is that
svc_alloc_arg never allocates that many pages. To address this:

1. The final element of rq_pages always points to NULL. To
   accommodate up to 259 pages in rq_pages, add an extra element
   to rq_pages for the array termination sentinel.

2. Adjust the calculation of "pages" to match how RPCSVC_MAXPAGES
   is calculated, so it can go up to 259. Bruce noted that the
   calculation assumes sv_max_mesg is a multiple of PAGE_SIZE,
   which might not always be true. I didn't change this assumption.

3. Change the loop boundaries to allow 259 pages to be allocated.

Additional clean-up: WARN_ON_ONCE adds an extra conditional branch,
which is basically never taken. And there's no need to dump the
stack here because svc_alloc_arg has only one caller.

Keeping that NULL "array termination sentinel"; there doesn't appear to
be any code that depends on it, only code in nfsd_splice_actor() which
needs the 259th element to be initialized to *something*.  So it's
possible we could just keep the array at 259 elements and drop that
final NULL, but we're being conservative for now.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/svc_xprt.c      | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index eec04982a7ea3..a3f8af9bd543e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -246,7 +246,7 @@ struct svc_rqst {
 	size_t			rq_xprt_hlen;	/* xprt header len */
 	struct xdr_buf		rq_arg;
 	struct xdr_buf		rq_res;
-	struct page *		rq_pages[RPCSVC_MAXPAGES];
+	struct page		*rq_pages[RPCSVC_MAXPAGES + 1];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	struct page *		*rq_next_page; /* next reply page to use */
 	struct page *		*rq_page_end;  /* one past the last page */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7bfe1fb42addc..d16a8b423c20b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -659,11 +659,13 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 	int i;
 
 	/* now allocate needed pages.  If we get a failure, sleep briefly */
-	pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
-	WARN_ON_ONCE(pages >= RPCSVC_MAXPAGES);
-	if (pages >= RPCSVC_MAXPAGES)
+	pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
+	if (pages > RPCSVC_MAXPAGES) {
+		pr_warn_once("svc: warning: pages=%u > RPCSVC_MAXPAGES=%lu\n",
+			     pages, RPCSVC_MAXPAGES);
 		/* use as many pages as possible */
-		pages = RPCSVC_MAXPAGES - 1;
+		pages = RPCSVC_MAXPAGES;
+	}
 	for (i = 0; i < pages ; i++)
 		while (rqstp->rq_pages[i] == NULL) {
 			struct page *p = alloc_page(GFP_KERNEL);
-- 
GitLab


From 026d958b38c628a1b4ced534808945365e2747a5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:24 -0400
Subject: [PATCH 1263/1958] svcrdma: Add recvfrom helpers to svc_rdma_rw.c

svc_rdma_rw.c already contains helpers for the sendto path.
Introduce helpers for the recvfrom path.

The plan is to replace the local NFSD bespoke code that constructs
and posts RDMA Read Work Requests with calls to the rdma_rw API.
This shares code with other RDMA-enabled ULPs that manages the gory
details of buffer registration and posting Work Requests.

This new code also puts all RDMA_NOMSG-specific logic in one place.

Lastly, the use of rqstp->rq_arg.pages is deprecated in favor of
using rqstp->rq_pages directly, for clarity.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h   |   3 +
 net/sunrpc/xprtrdma/svc_rdma_rw.c | 425 +++++++++++++++++++++++++++++-
 2 files changed, 427 insertions(+), 1 deletion(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 3ca991657889f..cf5d5412298b4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -196,6 +196,9 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 
 /* svc_rdma_rw.c */
 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
+extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
+				    struct svc_rqst *rqstp,
+				    struct svc_rdma_op_ctxt *head, __be32 *p);
 extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
 				     __be32 *wr_ch, struct xdr_buf *xdr);
 extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 3b35364d9a6ba..77d38a4b8d991 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -12,6 +12,9 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc);
+static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
+
 /* Each R/W context contains state for one chain of RDMA Read or
  * Write Work Requests.
  *
@@ -177,6 +180,7 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
 	info->wi_nsegs = be32_to_cpup(++chunk);
 	info->wi_segs = ++chunk;
 	svc_rdma_cc_init(rdma, &info->wi_cc, DMA_TO_DEVICE);
+	info->wi_cc.cc_cqe.done = svc_rdma_write_done;
 	return info;
 }
 
@@ -216,6 +220,76 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 	svc_rdma_write_info_free(info);
 }
 
+/* State for pulling a Read chunk.
+ */
+struct svc_rdma_read_info {
+	struct svc_rdma_op_ctxt		*ri_readctxt;
+	unsigned int			ri_position;
+	unsigned int			ri_pageno;
+	unsigned int			ri_pageoff;
+	unsigned int			ri_chunklen;
+
+	struct svc_rdma_chunk_ctxt	ri_cc;
+};
+
+static struct svc_rdma_read_info *
+svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
+{
+	struct svc_rdma_read_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return info;
+
+	svc_rdma_cc_init(rdma, &info->ri_cc, DMA_FROM_DEVICE);
+	info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done;
+	return info;
+}
+
+static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
+{
+	svc_rdma_cc_release(&info->ri_cc);
+	kfree(info);
+}
+
+/**
+ * svc_rdma_wc_read_done - Handle completion of an RDMA Read ctx
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion
+ *
+ */
+static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_chunk_ctxt *cc =
+			container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_read_info *info =
+			container_of(cc, struct svc_rdma_read_info, ri_cc);
+
+	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+	wake_up(&rdma->sc_send_wait);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
+			       ib_wc_status_msg(wc->status),
+			       wc->status, wc->vendor_err);
+		svc_rdma_put_context(info->ri_readctxt, 1);
+	} else {
+		spin_lock(&rdma->sc_rq_dto_lock);
+		list_add_tail(&info->ri_readctxt->list,
+			      &rdma->sc_read_complete_q);
+		spin_unlock(&rdma->sc_rq_dto_lock);
+
+		set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+		svc_xprt_enqueue(&rdma->sc_xprt);
+	}
+
+	svc_rdma_read_info_free(info);
+}
+
 /* This function sleeps when the transport's Send Queue is congested.
  *
  * Assumptions:
@@ -335,7 +409,6 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
 	__be32 *seg;
 	int ret;
 
-	cc->cc_cqe.done = svc_rdma_write_done;
 	seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
 	do {
 		unsigned int write_len;
@@ -515,3 +588,353 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
 	svc_rdma_write_info_free(info);
 	return ret;
 }
+
+static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
+				       struct svc_rqst *rqstp,
+				       u32 rkey, u32 len, u64 offset)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
+	struct svc_rdma_rw_ctxt *ctxt;
+	unsigned int sge_no, seg_len;
+	struct scatterlist *sg;
+	int ret;
+
+	sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
+	ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
+	if (!ctxt)
+		goto out_noctx;
+	ctxt->rw_nents = sge_no;
+
+	dprintk("svcrdma: reading segment %u@0x%016llx:0x%08x (%u sges)\n",
+		len, offset, rkey, sge_no);
+
+	sg = ctxt->rw_sg_table.sgl;
+	for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
+		seg_len = min_t(unsigned int, len,
+				PAGE_SIZE - info->ri_pageoff);
+
+		head->arg.pages[info->ri_pageno] =
+			rqstp->rq_pages[info->ri_pageno];
+		if (!info->ri_pageoff)
+			head->count++;
+
+		sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
+			    seg_len, info->ri_pageoff);
+		sg = sg_next(sg);
+
+		info->ri_pageoff += seg_len;
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+		len -= seg_len;
+
+		/* Safety check */
+		if (len &&
+		    &rqstp->rq_pages[info->ri_pageno + 1] > rqstp->rq_page_end)
+			goto out_overrun;
+	}
+
+	ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp,
+			       cc->cc_rdma->sc_port_num,
+			       ctxt->rw_sg_table.sgl, ctxt->rw_nents,
+			       0, offset, rkey, DMA_FROM_DEVICE);
+	if (ret < 0)
+		goto out_initerr;
+
+	list_add(&ctxt->rw_list, &cc->cc_rwctxts);
+	cc->cc_sqecount += ret;
+	return 0;
+
+out_noctx:
+	dprintk("svcrdma: no R/W ctxs available\n");
+	return -ENOMEM;
+
+out_overrun:
+	dprintk("svcrdma: request overruns rq_pages\n");
+	return -EINVAL;
+
+out_initerr:
+	svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
+	pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
+	return -EIO;
+}
+
+static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
+				     struct svc_rdma_read_info *info,
+				     __be32 *p)
+{
+	int ret;
+
+	info->ri_chunklen = 0;
+	while (*p++ != xdr_zero) {
+		u32 rs_handle, rs_length;
+		u64 rs_offset;
+
+		if (be32_to_cpup(p++) != info->ri_position)
+			break;
+		rs_handle = be32_to_cpup(p++);
+		rs_length = be32_to_cpup(p++);
+		p = xdr_decode_hyper(p, &rs_offset);
+
+		ret = svc_rdma_build_read_segment(info, rqstp,
+						  rs_handle, rs_length,
+						  rs_offset);
+		if (ret < 0)
+			break;
+
+		info->ri_chunklen += rs_length;
+	}
+
+	return ret;
+}
+
+/* If there is inline content following the Read chunk, append it to
+ * the page list immediately following the data payload. This has to
+ * be done after the reader function has determined how many pages
+ * were consumed for RDMA Read.
+ *
+ * On entry, ri_pageno and ri_pageoff point directly to the end of the
+ * page list. On exit, both have been updated to the new "next byte".
+ *
+ * Assumptions:
+ *	- Inline content fits entirely in rq_pages[0]
+ *	- Trailing content is only a handful of bytes
+ */
+static int svc_rdma_copy_tail(struct svc_rqst *rqstp,
+			      struct svc_rdma_read_info *info)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	unsigned int tail_length, remaining;
+	u8 *srcp, *destp;
+
+	/* Assert that all inline content fits in page 0. This is an
+	 * implementation limit, not a protocol limit.
+	 */
+	if (head->arg.head[0].iov_len > PAGE_SIZE) {
+		pr_warn_once("svcrdma: too much trailing inline content\n");
+		return -EINVAL;
+	}
+
+	srcp = head->arg.head[0].iov_base;
+	srcp += info->ri_position;
+	tail_length = head->arg.head[0].iov_len - info->ri_position;
+	remaining = tail_length;
+
+	/* If there is room on the last page in the page list, try to
+	 * fit the trailing content there.
+	 */
+	if (info->ri_pageoff > 0) {
+		unsigned int len;
+
+		len = min_t(unsigned int, remaining,
+			    PAGE_SIZE - info->ri_pageoff);
+		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
+		destp += info->ri_pageoff;
+
+		memcpy(destp, srcp, len);
+		srcp += len;
+		destp += len;
+		info->ri_pageoff += len;
+		remaining -= len;
+
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+	}
+
+	/* Otherwise, a fresh page is needed. */
+	if (remaining) {
+		head->arg.pages[info->ri_pageno] =
+				rqstp->rq_pages[info->ri_pageno];
+		head->count++;
+
+		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
+		memcpy(destp, srcp, remaining);
+		info->ri_pageoff += remaining;
+	}
+
+	head->arg.page_len += tail_length;
+	head->arg.len += tail_length;
+	head->arg.buflen += tail_length;
+	return 0;
+}
+
+/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
+ * data lands in the page list of head->arg.pages.
+ *
+ * Currently NFSD does not look at the head->arg.tail[0] iovec.
+ * Therefore, XDR round-up of the Read chunk and trailing
+ * inline content must both be added at the end of the pagelist.
+ */
+static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
+					    struct svc_rdma_read_info *info,
+					    __be32 *p)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	int ret;
+
+	dprintk("svcrdma: Reading Read chunk at position %u\n",
+		info->ri_position);
+
+	info->ri_pageno = head->hdr_count;
+	info->ri_pageoff = 0;
+
+	ret = svc_rdma_build_read_chunk(rqstp, info, p);
+	if (ret < 0)
+		goto out;
+
+	/* Read chunk may need XDR round-up (see RFC 5666, s. 3.7).
+	 */
+	if (info->ri_chunklen & 3) {
+		u32 padlen = 4 - (info->ri_chunklen & 3);
+
+		info->ri_chunklen += padlen;
+
+		/* NB: data payload always starts on XDR alignment,
+		 * thus the pad can never contain a page boundary.
+		 */
+		info->ri_pageoff += padlen;
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+	}
+
+	head->arg.page_len = info->ri_chunklen;
+	head->arg.len += info->ri_chunklen;
+	head->arg.buflen += info->ri_chunklen;
+
+	if (info->ri_position < head->arg.head[0].iov_len) {
+		ret = svc_rdma_copy_tail(rqstp, info);
+		if (ret < 0)
+			goto out;
+	}
+	head->arg.head[0].iov_len = info->ri_position;
+
+out:
+	return ret;
+}
+
+/* Construct RDMA Reads to pull over a Position Zero Read chunk.
+ * The start of the data lands in the first page just after
+ * the Transport header, and the rest lands in the page list of
+ * head->arg.pages.
+ *
+ * Assumptions:
+ *	- A PZRC has an XDR-aligned length (no implicit round-up).
+ *	- There can be no trailing inline content (IOW, we assume
+ *	  a PZRC is never sent in an RDMA_MSG message, though it's
+ *	  allowed by spec).
+ */
+static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
+					struct svc_rdma_read_info *info,
+					__be32 *p)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	int ret;
+
+	dprintk("svcrdma: Reading Position Zero Read chunk\n");
+
+	info->ri_pageno = head->hdr_count - 1;
+	info->ri_pageoff = offset_in_page(head->byte_len);
+
+	ret = svc_rdma_build_read_chunk(rqstp, info, p);
+	if (ret < 0)
+		goto out;
+
+	head->arg.len += info->ri_chunklen;
+	head->arg.buflen += info->ri_chunklen;
+
+	if (head->arg.len <= head->sge[0].length) {
+		/* Transport header and RPC message fit entirely
+		 * in page where head iovec resides.
+		 */
+		head->arg.head[0].iov_len = info->ri_chunklen;
+	} else {
+		/* Transport header and part of RPC message reside
+		 * in the head iovec's page.
+		 */
+		head->arg.head[0].iov_len =
+				head->sge[0].length - head->byte_len;
+		head->arg.page_len =
+				info->ri_chunklen - head->arg.head[0].iov_len;
+	}
+
+out:
+	return ret;
+}
+
+/**
+ * svc_rdma_recv_read_chunk - Pull a Read chunk from the client
+ * @rdma: controlling RDMA transport
+ * @rqstp: set of pages to use as Read sink buffers
+ * @head: pages under I/O collect here
+ * @p: pointer to start of Read chunk
+ *
+ * Returns:
+ *	%0 if all needed RDMA Reads were posted successfully,
+ *	%-EINVAL if client provided too many segments,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ *
+ * Assumptions:
+ * - All Read segments in @p have the same Position value.
+ */
+int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
+			     struct svc_rdma_op_ctxt *head, __be32 *p)
+{
+	struct svc_rdma_read_info *info;
+	struct page **page;
+	int ret;
+
+	/* The request (with page list) is constructed in
+	 * head->arg. Pages involved with RDMA Read I/O are
+	 * transferred there.
+	 */
+	head->hdr_count = head->count;
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = head->pages;
+	head->arg.page_base = 0;
+	head->arg.page_len = 0;
+	head->arg.len = rqstp->rq_arg.len;
+	head->arg.buflen = rqstp->rq_arg.buflen;
+
+	info = svc_rdma_read_info_alloc(rdma);
+	if (!info)
+		return -ENOMEM;
+	info->ri_readctxt = head;
+
+	info->ri_position = be32_to_cpup(p + 1);
+	if (info->ri_position)
+		ret = svc_rdma_build_normal_read_chunk(rqstp, info, p);
+	else
+		ret = svc_rdma_build_pz_read_chunk(rqstp, info, p);
+
+	/* Mark the start of the pages that can be used for the reply */
+	if (info->ri_pageoff > 0)
+		info->ri_pageno++;
+	rqstp->rq_respages = &rqstp->rq_pages[info->ri_pageno];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+	if (ret < 0)
+		goto out;
+
+	ret = svc_rdma_post_chunk_ctxt(&info->ri_cc);
+
+out:
+	/* Read sink pages have been moved from rqstp->rq_pages to
+	 * head->arg.pages. Force svc_recv to refill those slots
+	 * in rq_pages.
+	 */
+	for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++)
+		*page = NULL;
+
+	if (ret < 0)
+		svc_rdma_read_info_free(info);
+	return ret;
+}
-- 
GitLab


From cafc739892f34b9090413179ca259409fc43bfae Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:33 -0400
Subject: [PATCH 1264/1958] svcrdma: Use generic RDMA R/W API in RPC Call path

The current svcrdma recvfrom code path has a lot of detail about
registration mode and the type of port (iWARP, IB, etc).

Instead, use the RDMA core's generic R/W API. This shares code with
other RDMA-enabled ULPs that manages the gory details of buffer
registration and the posting of RDMA Read Work Requests.

Since the Read list marshaling code is being replaced, I took the
opportunity to replace C structure-based XDR encoding code with more
portable code that uses pointer arithmetic.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  14 -
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 547 +++++------------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  13 -
 3 files changed, 106 insertions(+), 468 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index cf5d5412298b4..b1ba19ba10719 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -82,10 +82,7 @@ struct svc_rdma_op_ctxt {
 	int hdr_count;
 	struct xdr_buf arg;
 	struct ib_cqe cqe;
-	struct ib_cqe reg_cqe;
-	struct ib_cqe inv_cqe;
 	u32 byte_len;
-	u32 position;
 	struct svcxprt_rdma *xprt;
 	unsigned long flags;
 	enum dma_data_direction direction;
@@ -116,7 +113,6 @@ struct svcxprt_rdma {
 	struct list_head     sc_accept_q;	/* Conn. waiting accept */
 	int		     sc_ord;		/* RDMA read limit */
 	int                  sc_max_sge;
-	int                  sc_max_sge_rd;	/* max sge for read target */
 	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
 
 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
@@ -141,10 +137,6 @@ struct svcxprt_rdma {
 	struct ib_qp         *sc_qp;
 	struct ib_cq         *sc_rq_cq;
 	struct ib_cq         *sc_sq_cq;
-	int		     (*sc_reader)(struct svcxprt_rdma *,
-					  struct svc_rqst *,
-					  struct svc_rdma_op_ctxt *,
-					  int *, u32 *, u32, u32, u64, bool);
 	u32		     sc_dev_caps;	/* distilled device caps */
 	unsigned int	     sc_frmr_pg_list_len;
 	struct list_head     sc_frmr_q;
@@ -187,12 +179,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
-extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *,
-			       struct svc_rdma_op_ctxt *, int *, u32 *,
-			       u32, u32, u64, bool);
-extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
-				struct svc_rdma_op_ctxt *, int *, u32 *,
-				u32, u32, u64, bool);
 
 /* svc_rdma_rw.c */
 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 1452bd02d857f..5fbcb73dd68d8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -41,13 +41,66 @@
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
 
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
-#include <linux/spinlock.h>
+/* Operation
+ *
+ * The main entry point is svc_rdma_recvfrom. This is called from
+ * svc_recv when the transport indicates there is incoming data to
+ * be read. "Data Ready" is signaled when an RDMA Receive completes,
+ * or when a set of RDMA Reads complete.
+ *
+ * An svc_rqst is passed in. This structure contains an array of
+ * free pages (rq_pages) that will contain the incoming RPC message.
+ *
+ * Short messages are moved directly into svc_rqst::rq_arg, and
+ * the RPC Call is ready to be processed by the Upper Layer.
+ * svc_rdma_recvfrom returns the length of the RPC Call message,
+ * completing the reception of the RPC Call.
+ *
+ * However, when an incoming message has Read chunks,
+ * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's
+ * data payload from the client. svc_rdma_recvfrom sets up the
+ * RDMA Reads using pages in svc_rqst::rq_pages, which are
+ * transferred to an svc_rdma_op_ctxt for the duration of the
+ * I/O. svc_rdma_recvfrom then returns zero, since the RPC message
+ * is still not yet ready.
+ *
+ * When the Read chunk payloads have become available on the
+ * server, "Data Ready" is raised again, and svc_recv calls
+ * svc_rdma_recvfrom again. This second call may use a different
+ * svc_rqst than the first one, thus any information that needs
+ * to be preserved across these two calls is kept in an
+ * svc_rdma_op_ctxt.
+ *
+ * The second call to svc_rdma_recvfrom performs final assembly
+ * of the RPC Call message, using the RDMA Read sink pages kept in
+ * the svc_rdma_op_ctxt. The xdr_buf is copied from the
+ * svc_rdma_op_ctxt to the second svc_rqst. The second call returns
+ * the length of the completed RPC Call message.
+ *
+ * Page Management
+ *
+ * Pages under I/O must be transferred from the first svc_rqst to an
+ * svc_rdma_op_ctxt before the first svc_rdma_recvfrom call returns.
+ *
+ * The first svc_rqst supplies pages for RDMA Reads. These are moved
+ * from rqstp::rq_pages into ctxt::pages. The consumed elements of
+ * the rq_pages array are set to NULL and refilled with the first
+ * svc_rdma_recvfrom call returns.
+ *
+ * During the second svc_rdma_recvfrom call, RDMA Read sink pages
+ * are transferred from the svc_rdma_op_ctxt to the second svc_rqst
+ * (see rdma_read_complete() below).
+ */
+
 #include <asm/unaligned.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+
+#include <linux/spinlock.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sunrpc/svc_rdma.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
@@ -61,7 +114,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 			       struct svc_rdma_op_ctxt *ctxt,
 			       u32 byte_count)
 {
-	struct rpcrdma_msg *rmsgp;
 	struct page *page;
 	u32 bc;
 	int sge_no;
@@ -85,13 +137,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.page_len = bc;
 	rqstp->rq_arg.page_base = 0;
 
-	/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
-	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-	if (rmsgp->rm_type == rdma_nomsg)
-		rqstp->rq_arg.pages = &rqstp->rq_pages[0];
-	else
-		rqstp->rq_arg.pages = &rqstp->rq_pages[1];
-
 	sge_no = 1;
 	while (bc && sge_no < ctxt->count) {
 		page = ctxt->pages[sge_no];
@@ -320,395 +365,6 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
 	return -EINVAL;
 }
 
-/* Issue an RDMA_READ using the local lkey to map the data sink */
-int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
-			struct svc_rqst *rqstp,
-			struct svc_rdma_op_ctxt *head,
-			int *page_no,
-			u32 *page_offset,
-			u32 rs_handle,
-			u32 rs_length,
-			u64 rs_offset,
-			bool last)
-{
-	struct ib_rdma_wr read_wr;
-	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
-	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
-	int ret, read, pno;
-	u32 pg_off = *page_offset;
-	u32 pg_no = *page_no;
-
-	ctxt->direction = DMA_FROM_DEVICE;
-	ctxt->read_hdr = head;
-	pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd);
-	read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
-		     rs_length);
-
-	for (pno = 0; pno < pages_needed; pno++) {
-		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
-
-		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
-		head->arg.page_len += len;
-
-		head->arg.len += len;
-		if (!pg_off)
-			head->count++;
-		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
-		rqstp->rq_next_page = rqstp->rq_respages + 1;
-		ctxt->sge[pno].addr =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					head->arg.pages[pg_no], pg_off,
-					PAGE_SIZE - pg_off,
-					DMA_FROM_DEVICE);
-		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
-					   ctxt->sge[pno].addr);
-		if (ret)
-			goto err;
-		svc_rdma_count_mappings(xprt, ctxt);
-
-		ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
-		ctxt->sge[pno].length = len;
-		ctxt->count++;
-
-		/* adjust offset and wrap to next page if needed */
-		pg_off += len;
-		if (pg_off == PAGE_SIZE) {
-			pg_off = 0;
-			pg_no++;
-		}
-		rs_length -= len;
-	}
-
-	if (last && rs_length == 0)
-		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-
-	memset(&read_wr, 0, sizeof(read_wr));
-	ctxt->cqe.done = svc_rdma_wc_read;
-	read_wr.wr.wr_cqe = &ctxt->cqe;
-	read_wr.wr.opcode = IB_WR_RDMA_READ;
-	read_wr.wr.send_flags = IB_SEND_SIGNALED;
-	read_wr.rkey = rs_handle;
-	read_wr.remote_addr = rs_offset;
-	read_wr.wr.sg_list = ctxt->sge;
-	read_wr.wr.num_sge = pages_needed;
-
-	ret = svc_rdma_send(xprt, &read_wr.wr);
-	if (ret) {
-		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
-		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-		goto err;
-	}
-
-	/* return current location in page array */
-	*page_no = pg_no;
-	*page_offset = pg_off;
-	ret = read;
-	atomic_inc(&rdma_stat_read);
-	return ret;
- err:
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 0);
-	return ret;
-}
-
-/* Issue an RDMA_READ using an FRMR to map the data sink */
-int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
-			 struct svc_rqst *rqstp,
-			 struct svc_rdma_op_ctxt *head,
-			 int *page_no,
-			 u32 *page_offset,
-			 u32 rs_handle,
-			 u32 rs_length,
-			 u64 rs_offset,
-			 bool last)
-{
-	struct ib_rdma_wr read_wr;
-	struct ib_send_wr inv_wr;
-	struct ib_reg_wr reg_wr;
-	u8 key;
-	int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
-	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
-	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
-	int ret, read, pno, dma_nents, n;
-	u32 pg_off = *page_offset;
-	u32 pg_no = *page_no;
-
-	if (IS_ERR(frmr))
-		return -ENOMEM;
-
-	ctxt->direction = DMA_FROM_DEVICE;
-	ctxt->frmr = frmr;
-	nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
-	read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
-
-	frmr->direction = DMA_FROM_DEVICE;
-	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->sg_nents = nents;
-
-	for (pno = 0; pno < nents; pno++) {
-		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
-
-		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
-		head->arg.page_len += len;
-		head->arg.len += len;
-		if (!pg_off)
-			head->count++;
-
-		sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
-			    len, pg_off);
-
-		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
-		rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-		/* adjust offset and wrap to next page if needed */
-		pg_off += len;
-		if (pg_off == PAGE_SIZE) {
-			pg_off = 0;
-			pg_no++;
-		}
-		rs_length -= len;
-	}
-
-	if (last && rs_length == 0)
-		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-
-	dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
-				  frmr->sg, frmr->sg_nents,
-				  frmr->direction);
-	if (!dma_nents) {
-		pr_err("svcrdma: failed to dma map sg %p\n",
-		       frmr->sg);
-		return -ENOMEM;
-	}
-
-	n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
-	if (unlikely(n != frmr->sg_nents)) {
-		pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
-		       frmr->mr, n, frmr->sg_nents);
-		return n < 0 ? n : -EINVAL;
-	}
-
-	/* Bump the key */
-	key = (u8)(frmr->mr->lkey & 0x000000FF);
-	ib_update_fast_reg_key(frmr->mr, ++key);
-
-	ctxt->sge[0].addr = frmr->mr->iova;
-	ctxt->sge[0].lkey = frmr->mr->lkey;
-	ctxt->sge[0].length = frmr->mr->length;
-	ctxt->count = 1;
-	ctxt->read_hdr = head;
-
-	/* Prepare REG WR */
-	ctxt->reg_cqe.done = svc_rdma_wc_reg;
-	reg_wr.wr.wr_cqe = &ctxt->reg_cqe;
-	reg_wr.wr.opcode = IB_WR_REG_MR;
-	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
-	reg_wr.wr.num_sge = 0;
-	reg_wr.mr = frmr->mr;
-	reg_wr.key = frmr->mr->lkey;
-	reg_wr.access = frmr->access_flags;
-	reg_wr.wr.next = &read_wr.wr;
-
-	/* Prepare RDMA_READ */
-	memset(&read_wr, 0, sizeof(read_wr));
-	ctxt->cqe.done = svc_rdma_wc_read;
-	read_wr.wr.wr_cqe = &ctxt->cqe;
-	read_wr.wr.send_flags = IB_SEND_SIGNALED;
-	read_wr.rkey = rs_handle;
-	read_wr.remote_addr = rs_offset;
-	read_wr.wr.sg_list = ctxt->sge;
-	read_wr.wr.num_sge = 1;
-	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
-		read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
-		read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
-	} else {
-		read_wr.wr.opcode = IB_WR_RDMA_READ;
-		read_wr.wr.next = &inv_wr;
-		/* Prepare invalidate */
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		ctxt->inv_cqe.done = svc_rdma_wc_inv;
-		inv_wr.wr_cqe = &ctxt->inv_cqe;
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
-		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
-	}
-
-	/* Post the chain */
-	ret = svc_rdma_send(xprt, &reg_wr.wr);
-	if (ret) {
-		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
-		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-		goto err;
-	}
-
-	/* return current location in page array */
-	*page_no = pg_no;
-	*page_offset = pg_off;
-	ret = read;
-	atomic_inc(&rdma_stat_read);
-	return ret;
- err:
-	svc_rdma_put_context(ctxt, 0);
-	svc_rdma_put_frmr(xprt, frmr);
-	return ret;
-}
-
-/* If there was additional inline content, append it to the end of arg.pages.
- * Tail copy has to be done after the reader function has determined how many
- * pages are needed for RDMA READ.
- */
-static int
-rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
-	       u32 position, u32 byte_count, u32 page_offset, int page_no)
-{
-	char *srcp, *destp;
-
-	srcp = head->arg.head[0].iov_base + position;
-	byte_count = head->arg.head[0].iov_len - position;
-	if (byte_count > PAGE_SIZE) {
-		dprintk("svcrdma: large tail unsupported\n");
-		return 0;
-	}
-
-	/* Fit as much of the tail on the current page as possible */
-	if (page_offset != PAGE_SIZE) {
-		destp = page_address(rqstp->rq_arg.pages[page_no]);
-		destp += page_offset;
-		while (byte_count--) {
-			*destp++ = *srcp++;
-			page_offset++;
-			if (page_offset == PAGE_SIZE && byte_count)
-				goto more;
-		}
-		goto done;
-	}
-
-more:
-	/* Fit the rest on the next page */
-	page_no++;
-	destp = page_address(rqstp->rq_arg.pages[page_no]);
-	while (byte_count--)
-		*destp++ = *srcp++;
-
-	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-done:
-	byte_count = head->arg.head[0].iov_len - position;
-	head->arg.page_len += byte_count;
-	head->arg.len += byte_count;
-	head->arg.buflen += byte_count;
-	return 1;
-}
-
-/* Returns the address of the first read chunk or <nul> if no read chunk
- * is present
- */
-static struct rpcrdma_read_chunk *
-svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
-{
-	struct rpcrdma_read_chunk *ch =
-		(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-
-	if (ch->rc_discrim == xdr_zero)
-		return NULL;
-	return ch;
-}
-
-static int rdma_read_chunks(struct svcxprt_rdma *xprt,
-			    struct rpcrdma_msg *rmsgp,
-			    struct svc_rqst *rqstp,
-			    struct svc_rdma_op_ctxt *head)
-{
-	int page_no, ret;
-	struct rpcrdma_read_chunk *ch;
-	u32 handle, page_offset, byte_count;
-	u32 position;
-	u64 rs_offset;
-	bool last;
-
-	/* If no read list is present, return 0 */
-	ch = svc_rdma_get_read_chunk(rmsgp);
-	if (!ch)
-		return 0;
-
-	/* The request is completed when the RDMA_READs complete. The
-	 * head context keeps all the pages that comprise the
-	 * request.
-	 */
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->hdr_count = head->count;
-	head->arg.page_base = 0;
-	head->arg.page_len = 0;
-	head->arg.len = rqstp->rq_arg.len;
-	head->arg.buflen = rqstp->rq_arg.buflen;
-
-	/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
-	position = be32_to_cpu(ch->rc_position);
-	if (position == 0) {
-		head->arg.pages = &head->pages[0];
-		page_offset = head->byte_len;
-	} else {
-		head->arg.pages = &head->pages[head->count];
-		page_offset = 0;
-	}
-
-	ret = 0;
-	page_no = 0;
-	for (; ch->rc_discrim != xdr_zero; ch++) {
-		if (be32_to_cpu(ch->rc_position) != position)
-			goto err;
-
-		handle = be32_to_cpu(ch->rc_target.rs_handle),
-		byte_count = be32_to_cpu(ch->rc_target.rs_length);
-		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
-				 &rs_offset);
-
-		while (byte_count > 0) {
-			last = (ch + 1)->rc_discrim == xdr_zero;
-			ret = xprt->sc_reader(xprt, rqstp, head,
-					      &page_no, &page_offset,
-					      handle, byte_count,
-					      rs_offset, last);
-			if (ret < 0)
-				goto err;
-			byte_count -= ret;
-			rs_offset += ret;
-			head->arg.buflen += ret;
-		}
-	}
-
-	/* Read list may need XDR round-up (see RFC 5666, s. 3.7) */
-	if (page_offset & 3) {
-		u32 pad = 4 - (page_offset & 3);
-
-		head->arg.tail[0].iov_len += pad;
-		head->arg.len += pad;
-		head->arg.buflen += pad;
-		page_offset += pad;
-	}
-
-	ret = 1;
-	if (position && position < head->arg.head[0].iov_len)
-		ret = rdma_copy_tail(rqstp, head, position,
-				     byte_count, page_offset, page_no);
-	head->arg.head[0].iov_len = position;
-	head->position = position;
-
- err:
-	/* Detach arg pages. svc_recv will replenish them */
-	for (page_no = 0;
-	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
-		rqstp->rq_pages[page_no] = NULL;
-
-	return ret;
-}
-
 static void rdma_read_complete(struct svc_rqst *rqstp,
 			       struct svc_rdma_op_ctxt *head)
 {
@@ -720,24 +376,9 @@ static void rdma_read_complete(struct svc_rqst *rqstp,
 		rqstp->rq_pages[page_no] = head->pages[page_no];
 	}
 
-	/* Adjustments made for RDMA_NOMSG type requests */
-	if (head->position == 0) {
-		if (head->arg.len <= head->sge[0].length) {
-			head->arg.head[0].iov_len = head->arg.len -
-							head->byte_len;
-			head->arg.page_len = 0;
-		} else {
-			head->arg.head[0].iov_len = head->sge[0].length -
-								head->byte_len;
-			head->arg.page_len = head->arg.len -
-						head->sge[0].length;
-		}
-	}
-
 	/* Point rq_arg.pages past header */
 	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
 	rqstp->rq_arg.page_len = head->arg.page_len;
-	rqstp->rq_arg.page_base = head->arg.page_base;
 
 	/* rq_respages starts after the last arg page */
 	rqstp->rq_respages = &rqstp->rq_pages[page_no];
@@ -834,10 +475,35 @@ static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
 	return true;
 }
 
-/*
- * Set up the rqstp thread context to point to the RQ buffer. If
- * necessary, pull additional data from the client with an RDMA_READ
- * request.
+/**
+ * svc_rdma_recvfrom - Receive an RPC call
+ * @rqstp: request structure into which to receive an RPC Call
+ *
+ * Returns:
+ *	The positive number of bytes in the RPC Call message,
+ *	%0 if there were no Calls ready to return,
+ *	%-EINVAL if the Read chunk data is too large,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ *
+ * Called in a loop when XPT_DATA is set. XPT_DATA is cleared only
+ * when there are no remaining ctxt's to process.
+ *
+ * The next ctxt is removed from the "receive" lists.
+ *
+ * - If the ctxt completes a Read, then finish assembling the Call
+ *   message and return the number of bytes in the message.
+ *
+ * - If the ctxt completes a Receive, then construct the Call
+ *   message from the contents of the Receive buffer.
+ *
+ *   - If there are no Read chunks in this message, then finish
+ *     assembling the Call message and return the number of bytes
+ *     in the message.
+ *
+ *   - If there are Read chunks in this message, post Read WRs to
+ *     pull that payload and return 0.
  */
 int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 {
@@ -845,11 +511,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	struct svcxprt_rdma *rdma_xprt =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 	struct svc_rdma_op_ctxt *ctxt;
-	struct rpcrdma_msg *rmsgp;
+	__be32 *p;
 	int ret;
 
-	dprintk("svcrdma: rqstp=%p\n", rqstp);
-
 	spin_lock(&rdma_xprt->sc_rq_dto_lock);
 	if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
 		ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
@@ -870,7 +534,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	}
 	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
 
-	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n",
+	dprintk("svcrdma: recvfrom: ctxt=%p on xprt=%p, rqstp=%p\n",
 		ctxt, rdma_xprt, rqstp);
 	atomic_inc(&rdma_stat_recv);
 
@@ -878,7 +542,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
 
 	/* Decode the RDMA header. */
-	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
 	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
 	if (ret < 0)
 		goto out_err;
@@ -886,9 +550,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		goto out_drop;
 	rqstp->rq_xprt_hlen = ret;
 
-	if (svc_rdma_is_backchannel_reply(xprt, &rmsgp->rm_xid)) {
-		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt,
-					       &rmsgp->rm_xid,
+	if (svc_rdma_is_backchannel_reply(xprt, p)) {
+		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
 					       &rqstp->rq_arg);
 		svc_rdma_put_context(ctxt, 0);
 		if (ret)
@@ -896,16 +559,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		return ret;
 	}
 
-	/* Read read-list data. */
-	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
-	if (ret > 0) {
-		/* read-list posted, defer until data received from client. */
-		goto defer;
-	} else if (ret < 0) {
-		/* Post of read-list failed, free context. */
-		svc_rdma_put_context(ctxt, 1);
-		return 0;
-	}
+	p += rpcrdma_fixed_maxsz;
+	if (*p != xdr_zero)
+		goto out_readchunk;
 
 complete:
 	ret = rqstp->rq_arg.head[0].iov_len
@@ -921,13 +577,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	svc_xprt_copy_addrs(rqstp, xprt);
 	return ret;
 
+out_readchunk:
+	ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p);
+	if (ret < 0)
+		goto out_postfail;
+	return 0;
+
 out_err:
-	svc_rdma_send_error(rdma_xprt, &rmsgp->rm_xid, ret);
+	svc_rdma_send_error(rdma_xprt, p, ret);
 	svc_rdma_put_context(ctxt, 0);
 	return 0;
 
-defer:
-	return 0;
+out_postfail:
+	if (ret == -EINVAL)
+		svc_rdma_send_error(rdma_xprt, p, ret);
+	svc_rdma_put_context(ctxt, 1);
+	return ret;
 
 out_drop:
 	svc_rdma_put_context(ctxt, 1);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index a9d9cb1ba4c60..72d2dcdca2e12 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -908,8 +908,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 * capabilities of this particular device */
 	newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
 				  (size_t)RPCSVC_MAXPAGES);
-	newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
-				       RPCSVC_MAXPAGES);
 	newxprt->sc_max_req_size = svcrdma_max_req_size;
 	newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
 					 svcrdma_max_requests);
@@ -998,12 +996,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 * NB:	iWARP requires remote write access for the data sink
 	 *	of an RDMA_READ. IB does not.
 	 */
-	newxprt->sc_reader = rdma_read_chunk_lcl;
 	if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
 		newxprt->sc_frmr_pg_list_len =
 			dev->attrs.max_fast_reg_page_list_len;
 		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
-		newxprt->sc_reader = rdma_read_chunk_frmr;
 	} else
 		newxprt->sc_snd_w_inv = false;
 
@@ -1056,7 +1052,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
 	dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
 	dprintk("    max_sge         : %d\n", newxprt->sc_max_sge);
-	dprintk("    max_sge_rd      : %d\n", newxprt->sc_max_sge_rd);
 	dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
 	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
 	dprintk("    ord             : %d\n", newxprt->sc_ord);
@@ -1117,12 +1112,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		pr_err("svcrdma: sc_xprt still in use? (%d)\n",
 		       kref_read(&xprt->xpt_ref));
 
-	/*
-	 * Destroy queued, but not processed read completions. Note
-	 * that this cleanup has to be done before destroying the
-	 * cm_id because the device ptr is needed to unmap the dma in
-	 * svc_rdma_put_context.
-	 */
 	while (!list_empty(&rdma->sc_read_complete_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
 		ctxt = list_first_entry(&rdma->sc_read_complete_q,
@@ -1130,8 +1119,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		list_del(&ctxt->list);
 		svc_rdma_put_context(ctxt, 1);
 	}
-
-	/* Destroy queued, but not processed recv completions */
 	while (!list_empty(&rdma->sc_rq_dto_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
 		ctxt = list_first_entry(&rdma->sc_rq_dto_q,
-- 
GitLab


From 71641d99ce037ea226f94d5e08f2a8f71eba08f4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:41 -0400
Subject: [PATCH 1265/1958] svcrdma: Properly compute .len and .buflen for
 received RPC Calls

When an RPC-over-RDMA request is received, the Receive buffer
contains a Transport Header possibly followed by an RPC message.

Even though rq_arg.head[0] (as passed to NFSD) does not contain the
Transport Header header, currently rq_arg.len includes the size of
the Transport Header.

That violates the intent of the xdr_buf API contract. .buflen should
include everything, but .len should be exactly the length of the RPC
message in the buffer.

The rq_arg fields are summed together at the end of
svc_rdma_recvfrom to obtain the correct return value. rq_arg.len
really ought to contain the correct number of bytes already, but it
currently doesn't due to the above misbehavior.

Let's instead ensure that .buflen includes the length of the
transport header, and that .len is always equal to head.iov_len +
.page_len + tail.iov_len .

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 14 ++++----------
 net/sunrpc/xprtrdma/svc_rdma_rw.c       |  2 +-
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 5fbcb73dd68d8..ad4bd62eebf1a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -143,7 +143,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 		put_page(rqstp->rq_pages[sge_no]);
 		rqstp->rq_pages[sge_no] = page;
 		bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
-		rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
 		sge_no++;
 	}
 	rqstp->rq_respages = &rqstp->rq_pages[sge_no];
@@ -338,6 +337,7 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
 	rq_arg->head[0].iov_base = p;
 	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
 	rq_arg->head[0].iov_len -= hdr_len;
+	rq_arg->len -= hdr_len;
 	dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n",
 		proc, be32_to_cpup(rdma_argp), hdr_len);
 	return hdr_len;
@@ -564,18 +564,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		goto out_readchunk;
 
 complete:
-	ret = rqstp->rq_arg.head[0].iov_len
-		+ rqstp->rq_arg.page_len
-		+ rqstp->rq_arg.tail[0].iov_len;
 	svc_rdma_put_context(ctxt, 0);
-	dprintk("svcrdma: ret=%d, rq_arg.len=%u, "
-		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n",
-		ret, rqstp->rq_arg.len,
-		rqstp->rq_arg.head[0].iov_base,
-		rqstp->rq_arg.head[0].iov_len);
+	dprintk("svcrdma: recvfrom: xprt=%p, rqstp=%p, rq_arg.len=%u\n",
+		rdma_xprt, rqstp, rqstp->rq_arg.len);
 	rqstp->rq_prot = IPPROTO_MAX;
 	svc_xprt_copy_addrs(rqstp, xprt);
-	return ret;
+	return rqstp->rq_arg.len;
 
 out_readchunk:
 	ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 77d38a4b8d991..9d7a151b48856 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -848,7 +848,7 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
 	head->arg.len += info->ri_chunklen;
 	head->arg.buflen += info->ri_chunklen;
 
-	if (head->arg.len <= head->sge[0].length) {
+	if (head->arg.buflen <= head->sge[0].length) {
 		/* Transport header and RPC message fit entirely
 		 * in page where head iovec resides.
 		 */
-- 
GitLab


From c84dc900d737a8d8f08768622226980ee863403b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:49 -0400
Subject: [PATCH 1266/1958] svcrdma: Remove unused Read completion handlers

Clean up:

The generic RDMA R/W API conversion of svc_rdma_recvfrom replaced
the Register, Read, and Invalidate completion handlers. Remove the
old ones, which are no longer used.

These handlers shared some helper code with svc_rdma_wc_send. Fold
the wc_common helper back into the one remaining completion handler.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  4 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 93 +++---------------------
 2 files changed, 10 insertions(+), 87 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index b1ba19ba10719..06d58a3f74bc6 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -77,17 +77,15 @@ extern atomic_t rdma_stat_sq_prod;
  */
 struct svc_rdma_op_ctxt {
 	struct list_head list;
-	struct svc_rdma_op_ctxt *read_hdr;
 	struct svc_rdma_fastreg_mr *frmr;
-	int hdr_count;
 	struct xdr_buf arg;
 	struct ib_cqe cqe;
 	u32 byte_len;
 	struct svcxprt_rdma *xprt;
-	unsigned long flags;
 	enum dma_data_direction direction;
 	int count;
 	unsigned int mapped_sges;
+	int hdr_count;
 	struct ib_send_wr send_wr;
 	struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE];
 	struct page *pages[RPCSVC_MAXPAGES];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 72d2dcdca2e12..c915cba0f8e60 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -346,36 +346,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	svc_xprt_put(&xprt->sc_xprt);
 }
 
-static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
-				    struct ib_wc *wc,
-				    const char *opname)
-{
-	if (wc->status != IB_WC_SUCCESS)
-		goto err;
-
-out:
-	atomic_inc(&xprt->sc_sq_avail);
-	wake_up(&xprt->sc_send_wait);
-	return;
-
-err:
-	set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-	if (wc->status != IB_WC_WR_FLUSH_ERR)
-		pr_err("svcrdma: %s: %s (%u/0x%x)\n",
-		       opname, ib_wc_status_msg(wc->status),
-		       wc->status, wc->vendor_err);
-	goto out;
-}
-
-static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
-					const char *opname)
-{
-	struct svcxprt_rdma *xprt = cq->cq_context;
-
-	svc_rdma_send_wc_common(xprt, wc, opname);
-	svc_xprt_put(&xprt->sc_xprt);
-}
-
 /**
  * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
  * @cq:        completion queue
@@ -383,74 +353,29 @@ static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
  *
  */
 void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct ib_cqe *cqe = wc->wr_cqe;
-	struct svc_rdma_op_ctxt *ctxt;
-
-	svc_rdma_send_wc_common_put(cq, wc, "send");
-
-	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 1);
-}
-
-/**
- * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_reg(struct ib_cq *cq, struct ib_wc *wc)
-{
-	svc_rdma_send_wc_common_put(cq, wc, "fastreg");
-}
-
-/**
- * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct svcxprt_rdma *xprt = cq->cq_context;
 	struct ib_cqe *cqe = wc->wr_cqe;
 	struct svc_rdma_op_ctxt *ctxt;
 
-	svc_rdma_send_wc_common(xprt, wc, "read");
+	atomic_inc(&xprt->sc_sq_avail);
+	wake_up(&xprt->sc_send_wait);
 
 	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(xprt, ctxt->frmr);
-
-	if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
-		struct svc_rdma_op_ctxt *read_hdr;
-
-		read_hdr = ctxt->read_hdr;
-		spin_lock(&xprt->sc_rq_dto_lock);
-		list_add_tail(&read_hdr->list,
-			      &xprt->sc_read_complete_q);
-		spin_unlock(&xprt->sc_rq_dto_lock);
+	svc_rdma_put_context(ctxt, 1);
 
-		set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-		svc_xprt_enqueue(&xprt->sc_xprt);
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			pr_err("svcrdma: Send: %s (%u/0x%x)\n",
+			       ib_wc_status_msg(wc->status),
+			       wc->status, wc->vendor_err);
 	}
 
-	svc_rdma_put_context(ctxt, 0);
 	svc_xprt_put(&xprt->sc_xprt);
 }
 
-/**
- * svc_rdma_wc_inv - Invoked by RDMA provider for each polled LOCAL_INV WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_inv(struct ib_cq *cq, struct ib_wc *wc)
-{
-	svc_rdma_send_wc_common_put(cq, wc, "localInv");
-}
-
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 					     int listener)
 {
-- 
GitLab


From 463e63d7014442002399903af027b63ae38f6e77 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:18:57 -0400
Subject: [PATCH 1267/1958] svcrdma: Remove frmr cache

Clean up: Now that the svc_rdma_recvfrom path uses the rdma_rw API,
the details of Read sink buffer registration are dealt with by the
kernel's RDMA core. This cache is no longer used, and can be
removed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          | 18 -----
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 86 ------------------------
 2 files changed, 104 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 06d58a3f74bc6..fd7775f70bb5e 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -77,7 +77,6 @@ extern atomic_t rdma_stat_sq_prod;
  */
 struct svc_rdma_op_ctxt {
 	struct list_head list;
-	struct svc_rdma_fastreg_mr *frmr;
 	struct xdr_buf arg;
 	struct ib_cqe cqe;
 	u32 byte_len;
@@ -91,17 +90,6 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
-struct svc_rdma_fastreg_mr {
-	struct ib_mr *mr;
-	struct scatterlist *sg;
-	int sg_nents;
-	unsigned long access_flags;
-	enum dma_data_direction direction;
-	struct list_head frmr_list;
-};
-
-#define RDMACTXT_F_LAST_CTXT	2
-
 #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
 #define	SVCRDMA_DEVCAP_READ_W_INV	2	/* read w/ invalidate */
 
@@ -136,9 +124,6 @@ struct svcxprt_rdma {
 	struct ib_cq         *sc_rq_cq;
 	struct ib_cq         *sc_sq_cq;
 	u32		     sc_dev_caps;	/* distilled device caps */
-	unsigned int	     sc_frmr_pg_list_len;
-	struct list_head     sc_frmr_q;
-	spinlock_t	     sc_frmr_q_lock;
 
 	spinlock_t	     sc_lock;		/* transport lock */
 
@@ -210,9 +195,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
-extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
-			      struct svc_rdma_fastreg_mr *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
 extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c915cba0f8e60..8864105d86ec3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -202,7 +202,6 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 out:
 	ctxt->count = 0;
 	ctxt->mapped_sges = 0;
-	ctxt->frmr = NULL;
 	return ctxt;
 
 out_empty:
@@ -387,14 +386,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
-	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
 	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
-	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
 	spin_lock_init(&cma_xprt->sc_ctxt_lock);
 	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
@@ -705,86 +702,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	return ERR_PTR(ret);
 }
 
-static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
-{
-	struct ib_mr *mr;
-	struct scatterlist *sg;
-	struct svc_rdma_fastreg_mr *frmr;
-	u32 num_sg;
-
-	frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
-	if (!frmr)
-		goto err;
-
-	num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len);
-	mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg);
-	if (IS_ERR(mr))
-		goto err_free_frmr;
-
-	sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
-	if (!sg)
-		goto err_free_mr;
-
-	sg_init_table(sg, RPCSVC_MAXPAGES);
-
-	frmr->mr = mr;
-	frmr->sg = sg;
-	INIT_LIST_HEAD(&frmr->frmr_list);
-	return frmr;
-
- err_free_mr:
-	ib_dereg_mr(mr);
- err_free_frmr:
-	kfree(frmr);
- err:
-	return ERR_PTR(-ENOMEM);
-}
-
-static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
-{
-	struct svc_rdma_fastreg_mr *frmr;
-
-	while (!list_empty(&xprt->sc_frmr_q)) {
-		frmr = list_entry(xprt->sc_frmr_q.next,
-				  struct svc_rdma_fastreg_mr, frmr_list);
-		list_del_init(&frmr->frmr_list);
-		kfree(frmr->sg);
-		ib_dereg_mr(frmr->mr);
-		kfree(frmr);
-	}
-}
-
-struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
-{
-	struct svc_rdma_fastreg_mr *frmr = NULL;
-
-	spin_lock(&rdma->sc_frmr_q_lock);
-	if (!list_empty(&rdma->sc_frmr_q)) {
-		frmr = list_entry(rdma->sc_frmr_q.next,
-				  struct svc_rdma_fastreg_mr, frmr_list);
-		list_del_init(&frmr->frmr_list);
-		frmr->sg_nents = 0;
-	}
-	spin_unlock(&rdma->sc_frmr_q_lock);
-	if (frmr)
-		return frmr;
-
-	return rdma_alloc_frmr(rdma);
-}
-
-void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
-		       struct svc_rdma_fastreg_mr *frmr)
-{
-	if (frmr) {
-		ib_dma_unmap_sg(rdma->sc_cm_id->device,
-				frmr->sg, frmr->sg_nents, frmr->direction);
-		spin_lock(&rdma->sc_frmr_q_lock);
-		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
-		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
-		spin_unlock(&rdma->sc_frmr_q_lock);
-	}
-}
-
 /*
  * This is the xpo_recvfrom function for listening endpoints. Its
  * purpose is to accept incoming connections. The CMA callback handler
@@ -922,8 +839,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 *	of an RDMA_READ. IB does not.
 	 */
 	if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
-		newxprt->sc_frmr_pg_list_len =
-			dev->attrs.max_fast_reg_page_list_len;
 		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
 	} else
 		newxprt->sc_snd_w_inv = false;
@@ -1063,7 +978,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		xprt->xpt_bc_xprt = NULL;
 	}
 
-	rdma_dealloc_frmr_q(rdma);
 	svc_rdma_destroy_rw_ctxts(rdma);
 	svc_rdma_destroy_ctxts(rdma);
 
-- 
GitLab


From 0d956e694ad2d4b8fb3940dc19e738e1fc158353 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:19:05 -0400
Subject: [PATCH 1268/1958] svcrdma: Clean-up svc_rdma_unmap_dma

There's no longer a need to compare each SGE's lkey with the PD's
local_dma_lkey. Now that FRWR is gone, all DMA mappings are for
pages that were registered with this key.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 8864105d86ec3..75bd11f2b69ee 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -225,22 +225,13 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 {
 	struct svcxprt_rdma *xprt = ctxt->xprt;
 	struct ib_device *device = xprt->sc_cm_id->device;
-	u32 lkey = xprt->sc_pd->local_dma_lkey;
 	unsigned int i;
 
-	for (i = 0; i < ctxt->mapped_sges; i++) {
-		/*
-		 * Unmap the DMA addr in the SGE if the lkey matches
-		 * the local_dma_lkey, otherwise, ignore it since it is
-		 * an FRMR lkey and will be unmapped later when the
-		 * last WR that uses it completes.
-		 */
-		if (ctxt->sge[i].lkey == lkey)
-			ib_dma_unmap_page(device,
-					    ctxt->sge[i].addr,
-					    ctxt->sge[i].length,
-					    ctxt->direction);
-	}
+	for (i = 0; i < ctxt->mapped_sges; i++)
+		ib_dma_unmap_page(device,
+				  ctxt->sge[i].addr,
+				  ctxt->sge[i].length,
+				  ctxt->direction);
 	ctxt->mapped_sges = 0;
 }
 
-- 
GitLab


From 9450ca8e2febb0000a5efd4f5870915d59ae62bc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:19:13 -0400
Subject: [PATCH 1269/1958] svcrdma: Clean up after converting
 svc_rdma_recvfrom to rdma_rw API

Clean up: Registration mode details are now handled by the rdma_rw
API, and thus can be removed from svcrdma.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  4 ---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 39 +++---------------------
 2 files changed, 4 insertions(+), 39 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index fd7775f70bb5e..995c6fe9ee907 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -90,9 +90,6 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
-#define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
-#define	SVCRDMA_DEVCAP_READ_W_INV	2	/* read w/ invalidate */
-
 struct svcxprt_rdma {
 	struct svc_xprt      sc_xprt;		/* SVC transport structure */
 	struct rdma_cm_id    *sc_cm_id;		/* RDMA connection id */
@@ -123,7 +120,6 @@ struct svcxprt_rdma {
 	struct ib_qp         *sc_qp;
 	struct ib_cq         *sc_rq_cq;
 	struct ib_cq         *sc_sq_cq;
-	u32		     sc_dev_caps;	/* distilled device caps */
 
 	spinlock_t	     sc_lock;		/* transport lock */
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 75bd11f2b69ee..e660d4965b18a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -783,7 +783,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	memset(&qp_attr, 0, sizeof qp_attr);
 	qp_attr.event_handler = qp_event_handler;
 	qp_attr.qp_context = &newxprt->sc_xprt;
-	qp_attr.port_num = newxprt->sc_cm_id->port_num;
+	qp_attr.port_num = newxprt->sc_port_num;
 	qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
 	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
 	qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
@@ -807,43 +807,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	}
 	newxprt->sc_qp = newxprt->sc_cm_id->qp;
 
-	/*
-	 * Use the most secure set of MR resources based on the
-	 * transport type and available memory management features in
-	 * the device. Here's the table implemented below:
-	 *
-	 *		Fast	Global	DMA	Remote WR
-	 *		Reg	LKEY	MR	Access
-	 *		Sup'd	Sup'd	Needed	Needed
-	 *
-	 * IWARP	N	N	Y	Y
-	 *		N	Y	Y	Y
-	 *		Y	N	Y	N
-	 *		Y	Y	N	-
-	 *
-	 * IB		N	N	Y	N
-	 *		N	Y	N	-
-	 *		Y	N	Y	N
-	 *		Y	Y	N	-
-	 *
-	 * NB:	iWARP requires remote write access for the data sink
-	 *	of an RDMA_READ. IB does not.
-	 */
-	if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
-		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
-	} else
+	if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
 		newxprt->sc_snd_w_inv = false;
-
-	/*
-	 * Determine if a DMA MR is required and if so, what privs are required
-	 */
-	if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
-	    !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
+	if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
+	    !rdma_ib_or_roce(dev, newxprt->sc_port_num))
 		goto errout;
 
-	if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
-		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
-
 	/* Post receive buffers */
 	for (i = 0; i < newxprt->sc_max_requests; i++) {
 		ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
-- 
GitLab


From 91b022ec8b1214d0fdfc503be305a19e6f7c5164 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:19:21 -0400
Subject: [PATCH 1270/1958] svcrdma: use offset_in_page() macro

Clean up: Use offset_in_page() macro instead of open-coding.

Reported-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_rw.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 9d7a151b48856..9859736b331bc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -372,8 +372,9 @@ static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info,
 	struct scatterlist *sg;
 	struct page **page;
 
-	page_off = (info->wi_next_off + xdr->page_base) & ~PAGE_MASK;
-	page_no = (info->wi_next_off + xdr->page_base) >> PAGE_SHIFT;
+	page_off = info->wi_next_off + xdr->page_base;
+	page_no = page_off >> PAGE_SHIFT;
+	page_off = offset_in_page(page_off);
 	page = xdr->pages + page_no;
 	info->wi_next_off += remaining;
 	sg = ctxt->rw_sg_table.sgl;
-- 
GitLab


From 35a30fc389b5961b42bb73ef751f3bc85190a118 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 23 Jun 2017 17:19:29 -0400
Subject: [PATCH 1271/1958] svcrdma: Remove svc_rdma_chunk_ctxt::cc_dir field

Clean up: No need to save the I/O direction. The functions that
release svc_rdma_chunk_ctxt already know what direction to use.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_rw.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 9859736b331bc..933f79bed2708 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -116,22 +116,20 @@ struct svc_rdma_chunk_ctxt {
 	struct svcxprt_rdma	*cc_rdma;
 	struct list_head	cc_rwctxts;
 	int			cc_sqecount;
-	enum dma_data_direction cc_dir;
 };
 
 static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
-			     struct svc_rdma_chunk_ctxt *cc,
-			     enum dma_data_direction dir)
+			     struct svc_rdma_chunk_ctxt *cc)
 {
 	cc->cc_rdma = rdma;
 	svc_xprt_get(&rdma->sc_xprt);
 
 	INIT_LIST_HEAD(&cc->cc_rwctxts);
 	cc->cc_sqecount = 0;
-	cc->cc_dir = dir;
 }
 
-static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc)
+static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
+				enum dma_data_direction dir)
 {
 	struct svcxprt_rdma *rdma = cc->cc_rdma;
 	struct svc_rdma_rw_ctxt *ctxt;
@@ -141,7 +139,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc)
 
 		rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
 				    rdma->sc_port_num, ctxt->rw_sg_table.sgl,
-				    ctxt->rw_nents, cc->cc_dir);
+				    ctxt->rw_nents, dir);
 		svc_rdma_put_rw_ctxt(rdma, ctxt);
 	}
 	svc_xprt_put(&rdma->sc_xprt);
@@ -179,14 +177,14 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
 	info->wi_seg_no = 0;
 	info->wi_nsegs = be32_to_cpup(++chunk);
 	info->wi_segs = ++chunk;
-	svc_rdma_cc_init(rdma, &info->wi_cc, DMA_TO_DEVICE);
+	svc_rdma_cc_init(rdma, &info->wi_cc);
 	info->wi_cc.cc_cqe.done = svc_rdma_write_done;
 	return info;
 }
 
 static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
 {
-	svc_rdma_cc_release(&info->wi_cc);
+	svc_rdma_cc_release(&info->wi_cc, DMA_TO_DEVICE);
 	kfree(info);
 }
 
@@ -241,14 +239,14 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
 	if (!info)
 		return info;
 
-	svc_rdma_cc_init(rdma, &info->ri_cc, DMA_FROM_DEVICE);
+	svc_rdma_cc_init(rdma, &info->ri_cc);
 	info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done;
 	return info;
 }
 
 static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
 {
-	svc_rdma_cc_release(&info->ri_cc);
+	svc_rdma_cc_release(&info->ri_cc, DMA_FROM_DEVICE);
 	kfree(info);
 }
 
-- 
GitLab


From 630458e730b82efe1f6eb90e6bcabad02fe76e20 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 11 May 2017 14:45:06 -0400
Subject: [PATCH 1272/1958] nfsd4: factor ctime into change attribute

Factoring ctime into the nfsv4 change attribute gives us better
properties than just i_version alone.

Eventually we'll likely also expose this (as opposed to raw i_version)
to userspace, at which point we'll want to move it to a common helper,
called from either userspace or individual filesystems.  For now, nfsd
is the only user.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3xdr.c |  2 +-
 fs/nfsd/nfs4xdr.c |  2 +-
 fs/nfsd/nfsfh.h   | 24 +++++++++++++++++++++++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index b8838d3023ff2..bf444b664011a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -260,7 +260,7 @@ void fill_post_wcc(struct svc_fh *fhp)
 		printk("nfsd: inode locked twice during operation.\n");
 
 	err = fh_getattr(fhp, &fhp->fh_post_attr);
-	fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version;
+	fhp->fh_post_change = nfsd4_change_attribute(d_inode(fhp->fh_dentry));
 	if (err) {
 		fhp->fh_post_saved = false;
 		/* Grab the ctime anyway - set_change_info might use it */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 54e212e3541eb..20fbcab977531 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1973,7 +1973,7 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
 		*p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
 		*p++ = 0;
 	} else if (IS_I_VERSION(inode)) {
-		p = xdr_encode_hyper(p, inode->i_version);
+		p = xdr_encode_hyper(p, nfsd4_change_attribute(inode));
 	} else {
 		*p++ = cpu_to_be32(stat->ctime.tv_sec);
 		*p++ = cpu_to_be32(stat->ctime.tv_nsec);
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index f84fe6bf9aee1..e47cf6c2ac28b 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -240,6 +240,28 @@ fh_clear_wcc(struct svc_fh *fhp)
 	fhp->fh_pre_saved = false;
 }
 
+/*
+ * We could use i_version alone as the change attribute.  However,
+ * i_version can go backwards after a reboot.  On its own that doesn't
+ * necessarily cause a problem, but if i_version goes backwards and then
+ * is incremented again it could reuse a value that was previously used
+ * before boot, and a client who queried the two values might
+ * incorrectly assume nothing changed.
+ *
+ * By using both ctime and the i_version counter we guarantee that as
+ * long as time doesn't go backwards we never reuse an old value.
+ */
+static inline u64 nfsd4_change_attribute(struct inode *inode)
+{
+	u64 chattr;
+
+	chattr =  inode->i_ctime.tv_sec;
+	chattr <<= 30;
+	chattr += inode->i_ctime.tv_nsec;
+	chattr += inode->i_version;
+	return chattr;
+}
+
 /*
  * Fill in the pre_op attr for the wcc data
  */
@@ -253,7 +275,7 @@ fill_pre_wcc(struct svc_fh *fhp)
 		fhp->fh_pre_mtime = inode->i_mtime;
 		fhp->fh_pre_ctime = inode->i_ctime;
 		fhp->fh_pre_size  = inode->i_size;
-		fhp->fh_pre_change = inode->i_version;
+		fhp->fh_pre_change = nfsd4_change_attribute(inode);
 		fhp->fh_pre_saved = true;
 	}
 }
-- 
GitLab


From 0bc48bea36d178aea9d7f83f66a1b397cec9db5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Tue, 16 May 2017 22:50:00 +0200
Subject: [PATCH 1273/1958] KVM: x86: update master clock before computing
 kvmclock_offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kvm master clock usually has a different frequency than the kernel boot
clock.  This is not a problem until the master clock is updated;
update uses the current kernel boot clock to compute new kvm clock,
which erases any kvm clock cycles that might have built up due to
frequency difference over a long period.

KVM_SET_CLOCK is one of places where we can safely update master clock
as the guest-visible clock is going to be shifted anyway.

The problem with current code is that it updates the kvm master clock
after updating the offset.  If the master clock was enabled before
calling KVM_SET_CLOCK, then it might have built up a significant delta
from kernel boot clock.
In the worst case, the time set by userspace would be shifted by so much
that it couldn't have been set at any point during KVM_SET_CLOCK.

To fix this, move kvm_gen_update_masterclock() before computing
kvmclock_offset, which means that the master clock and kernel boot clock
will be sufficiently close together.
Another solution would be to replace get_kvmclock_ns() with
"ktime_get_boot_ns() + ka->kvmclock_offset", which is marginally more
accurate, but would break symmetry with KVM_GET_CLOCK.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c7266f7766dc..ca128a9c9cc4e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4188,9 +4188,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 
 		r = 0;
+		/*
+		 * TODO: userspace has to take care of races with VCPU_RUN, so
+		 * kvm_gen_update_masterclock() can be cut down to locked
+		 * pvclock_update_vm_gtod_copy().
+		 */
+		kvm_gen_update_masterclock(kvm);
 		now_ns = get_kvmclock_ns(kvm);
 		kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
-		kvm_gen_update_masterclock(kvm);
+		kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
 		break;
 	}
 	case KVM_GET_CLOCK: {
-- 
GitLab


From 48ae0fb49b2e8380e60c5072f18005c8a98c6520 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Mon, 22 May 2017 09:48:33 -0700
Subject: [PATCH 1274/1958] kvm: vmx: Properly handle machine check during
 VM-entry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vmx_complete_atomic_exit should call kvm_machine_check for any
VM-entry failure due to a machine-check event. Such an exit should be
recognized solely by its basic exit reason (i.e. the low 16 bits of
the VMCS exit reason field). None of the other VMCS exit information
fields contain valid information when the VM-exit is due to "VM-entry
failure due to machine-check event".

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@tencent.com>
[Changed VM_EXIT_INTR_INFO condition to better describe its reason.]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 32db3f5dce7fd..25f2fdccf6251 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8630,17 +8630,20 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 
 static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 {
-	u32 exit_intr_info;
+	u32 exit_intr_info = 0;
+	u16 basic_exit_reason = (u16)vmx->exit_reason;
 
-	if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
-	      || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
+	if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
+	      || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
 		return;
 
-	vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	exit_intr_info = vmx->exit_intr_info;
+	if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+		exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+	vmx->exit_intr_info = exit_intr_info;
 
 	/* Handle machine checks before interrupts are enabled */
-	if (is_machine_check(exit_intr_info))
+	if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
+	    is_machine_check(exit_intr_info))
 		kvm_machine_check();
 
 	/* We need to handle NMIs before interrupts are enabled */
-- 
GitLab


From 949c033694864082db9b3f5304723a6d7407f8e2 Mon Sep 17 00:00:00 2001
From: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Date: Tue, 11 Jul 2017 00:22:33 +0300
Subject: [PATCH 1275/1958] KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl
 definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case of KVM_S390_GET_CMMA_BITS, the kernel does not only read struct
kvm_s390_cmma_log passed from userspace (which constitutes _IOC_WRITE),
it also writes back a return value (which constitutes _IOC_READ) making
this an _IOWR ioctl instead of _IOW.

Fixes: 4036e387 ("KVM: s390: ioctls to get and set guest storage attributes")
Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 include/uapi/linux/kvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index c0b6dfec5f872..ebd604c222d89 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1351,7 +1351,7 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_X86_SMM */
 #define KVM_SMI                   _IO(KVMIO,   0xb7)
 /* Available with KVM_CAP_S390_CMMA_MIGRATION */
-#define KVM_S390_GET_CMMA_BITS      _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log)
+#define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
 #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
-- 
GitLab


From fb5307298e49ec1668c3a9ec888c1b9da4347395 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Mon, 10 Jul 2017 20:53:28 +0200
Subject: [PATCH 1276/1958] KVM: x86: take slots_lock in kvm_free_pit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kvm_vm_release() did not have slots_lock when calling
kvm_io_bus_unregister_dev() and this went unnoticed until 4a12f9517728
("KVM: mark kvm->busses as rcu protected") added dynamic checks.
Luckily, there should be no race at that point:

  =============================
  WARNING: suspicious RCU usage
  4.12.0.kvm+ #0 Not tainted
  -----------------------------
  ./include/linux/kvm_host.h:479 suspicious rcu_dereference_check() usage!

   lockdep_rcu_suspicious+0xc5/0x100
   kvm_io_bus_unregister_dev+0x173/0x190 [kvm]
   kvm_free_pit+0x28/0x80 [kvm]
   kvm_arch_sync_events+0x2d/0x30 [kvm]
   kvm_put_kvm+0xa7/0x2a0 [kvm]
   kvm_vm_release+0x21/0x30 [kvm]

Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/i8254.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index a78b445ce4116..af192895b1fc6 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm)
 	struct kvm_pit *pit = kvm->arch.vpit;
 
 	if (pit) {
+		mutex_lock(&kvm->slots_lock);
 		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
 		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
+		mutex_unlock(&kvm->slots_lock);
 		kvm_pit_set_reinject(pit, false);
 		hrtimer_cancel(&pit->pit_state.timer);
 		kthread_destroy_worker(pit->worker);
-- 
GitLab


From b742c1e6e79ddf4192d76336da2407c65ca7242f Mon Sep 17 00:00:00 2001
From: Ladi Prosek <lprosek@redhat.com>
Date: Thu, 22 Jun 2017 09:05:26 +0200
Subject: [PATCH 1277/1958] KVM: SVM: handle singlestep exception when skipping
 emulated instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kvm_skip_emulated_instruction handles the singlestep debug exception
which is something we almost always want. This commit (specifically
the change in rdmsr_interception) makes the debug.flat KVM unit test
pass on AMD.

Two call sites still call skip_emulated_instruction directly:

* In svm_queue_exception where it's used only for moving the rip forward

* In task_switch_interception which is analogous to handle_task_switch
  in VMX

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/svm.c | 59 ++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 905ea6052517f..3da42d7c629e1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2267,7 +2267,7 @@ static int io_interception(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-	int size, in, string;
+	int size, in, string, ret;
 	unsigned port;
 
 	++svm->vcpu.stat.io_exits;
@@ -2279,10 +2279,16 @@ static int io_interception(struct vcpu_svm *svm)
 	port = io_info >> 16;
 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
 	svm->next_rip = svm->vmcb->control.exit_info_2;
-	skip_emulated_instruction(&svm->vcpu);
+	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
-	return in ? kvm_fast_pio_in(vcpu, size, port)
-		  : kvm_fast_pio_out(vcpu, size, port);
+	/*
+	 * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+	 * KVM_EXIT_DEBUG here.
+	 */
+	if (in)
+		return kvm_fast_pio_in(vcpu, size, port) && ret;
+	else
+		return kvm_fast_pio_out(vcpu, size, port) && ret;
 }
 
 static int nmi_interception(struct vcpu_svm *svm)
@@ -3055,6 +3061,7 @@ static int vmload_interception(struct vcpu_svm *svm)
 {
 	struct vmcb *nested_vmcb;
 	struct page *page;
+	int ret;
 
 	if (nested_svm_check_permissions(svm))
 		return 1;
@@ -3064,18 +3071,19 @@ static int vmload_interception(struct vcpu_svm *svm)
 		return 1;
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
+	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
 	nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
 	nested_svm_unmap(page);
 
-	return 1;
+	return ret;
 }
 
 static int vmsave_interception(struct vcpu_svm *svm)
 {
 	struct vmcb *nested_vmcb;
 	struct page *page;
+	int ret;
 
 	if (nested_svm_check_permissions(svm))
 		return 1;
@@ -3085,12 +3093,12 @@ static int vmsave_interception(struct vcpu_svm *svm)
 		return 1;
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
+	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
 	nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
 	nested_svm_unmap(page);
 
-	return 1;
+	return ret;
 }
 
 static int vmrun_interception(struct vcpu_svm *svm)
@@ -3123,25 +3131,29 @@ static int vmrun_interception(struct vcpu_svm *svm)
 
 static int stgi_interception(struct vcpu_svm *svm)
 {
+	int ret;
+
 	if (nested_svm_check_permissions(svm))
 		return 1;
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
+	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 
 	enable_gif(svm);
 
-	return 1;
+	return ret;
 }
 
 static int clgi_interception(struct vcpu_svm *svm)
 {
+	int ret;
+
 	if (nested_svm_check_permissions(svm))
 		return 1;
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
+	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
 	disable_gif(svm);
 
@@ -3152,7 +3164,7 @@ static int clgi_interception(struct vcpu_svm *svm)
 		mark_dirty(svm->vmcb, VMCB_INTR);
 	}
 
-	return 1;
+	return ret;
 }
 
 static int invlpga_interception(struct vcpu_svm *svm)
@@ -3166,8 +3178,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
 	kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
-	return 1;
+	return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int skinit_interception(struct vcpu_svm *svm)
@@ -3190,7 +3201,7 @@ static int xsetbv_interception(struct vcpu_svm *svm)
 
 	if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
 		svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-		skip_emulated_instruction(&svm->vcpu);
+		return kvm_skip_emulated_instruction(&svm->vcpu);
 	}
 
 	return 1;
@@ -3286,8 +3297,7 @@ static int invlpg_interception(struct vcpu_svm *svm)
 		return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 
 	kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
-	skip_emulated_instruction(&svm->vcpu);
-	return 1;
+	return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int emulate_on_interception(struct vcpu_svm *svm)
@@ -3437,9 +3447,7 @@ static int dr_interception(struct vcpu_svm *svm)
 		kvm_register_write(&svm->vcpu, reg, val);
 	}
 
-	skip_emulated_instruction(&svm->vcpu);
-
-	return 1;
+	return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int cr8_write_interception(struct vcpu_svm *svm)
@@ -3562,6 +3570,7 @@ static int rdmsr_interception(struct vcpu_svm *svm)
 	if (svm_get_msr(&svm->vcpu, &msr_info)) {
 		trace_kvm_msr_read_ex(ecx);
 		kvm_inject_gp(&svm->vcpu, 0);
+		return 1;
 	} else {
 		trace_kvm_msr_read(ecx, msr_info.data);
 
@@ -3570,9 +3579,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
 		kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
 				   msr_info.data >> 32);
 		svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-		skip_emulated_instruction(&svm->vcpu);
+		return kvm_skip_emulated_instruction(&svm->vcpu);
 	}
-	return 1;
 }
 
 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -3698,11 +3706,11 @@ static int wrmsr_interception(struct vcpu_svm *svm)
 	if (kvm_set_msr(&svm->vcpu, &msr)) {
 		trace_kvm_msr_write_ex(ecx, data);
 		kvm_inject_gp(&svm->vcpu, 0);
+		return 1;
 	} else {
 		trace_kvm_msr_write(ecx, data);
-		skip_emulated_instruction(&svm->vcpu);
+		return kvm_skip_emulated_instruction(&svm->vcpu);
 	}
-	return 1;
 }
 
 static int msr_interception(struct vcpu_svm *svm)
@@ -3731,8 +3739,7 @@ static int pause_interception(struct vcpu_svm *svm)
 
 static int nop_interception(struct vcpu_svm *svm)
 {
-	skip_emulated_instruction(&(svm->vcpu));
-	return 1;
+	return kvm_skip_emulated_instruction(&(svm->vcpu));
 }
 
 static int monitor_interception(struct vcpu_svm *svm)
-- 
GitLab


From 8a77e90966e92759f94087f9845d413290be0d70 Mon Sep 17 00:00:00 2001
From: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Date: Thu, 6 Jul 2017 15:50:44 -0500
Subject: [PATCH 1278/1958] KVM: SVM: Prepare for new bit definition in lbr_ctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The lbr_ctl variable in the vmcb control area is used to enable or
disable Last Branch Record (LBR) virtualization. However, this is to be
done using only bit 0 of the variable. To correct this and to prepare
for a new feature, change the current usage to work only on a particular
bit.

Signed-off-by: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/svm.h | 2 ++
 arch/x86/kvm/svm.c         | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 14824fc78f7e7..d1163f64d7326 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -119,6 +119,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define AVIC_ENABLE_SHIFT 31
 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
 
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+
 #define SVM_INTERRUPT_SHADOW_MASK 1
 
 #define SVM_IOIO_STR_SHIFT 2
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3da42d7c629e1..6e72127c0d0ec 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -947,7 +947,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm)
 {
 	u32 *msrpm = svm->msrpm;
 
-	svm->vmcb->control.lbr_ctl = 1;
+	svm->vmcb->control.lbr_ctl |= LBR_CTL_ENABLE_MASK;
 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
 	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
@@ -958,7 +958,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 {
 	u32 *msrpm = svm->msrpm;
 
-	svm->vmcb->control.lbr_ctl = 0;
+	svm->vmcb->control.lbr_ctl &= ~LBR_CTL_ENABLE_MASK;
 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
 	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
-- 
GitLab


From 0dc92119b50be539a5480d72a00ae8098bdba2fc Mon Sep 17 00:00:00 2001
From: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Date: Thu, 6 Jul 2017 15:50:45 -0500
Subject: [PATCH 1279/1958] KVM: SVM: Rename lbr_ctl field in the vmcb control
 area
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename the lbr_ctl variable to better reflect the purpose of the field -
provide support for virtualization extensions.

Signed-off-by: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/svm.h |  2 +-
 arch/x86/kvm/svm.c         | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index d1163f64d7326..74d139352491f 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	u32 event_inj;
 	u32 event_inj_err;
 	u64 nested_cr3;
-	u64 lbr_ctl;
+	u64 virt_ext;
 	u32 clean;
 	u32 reserved_5;
 	u64 next_rip;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6e72127c0d0ec..9a09f89145f23 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -947,7 +947,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm)
 {
 	u32 *msrpm = svm->msrpm;
 
-	svm->vmcb->control.lbr_ctl |= LBR_CTL_ENABLE_MASK;
+	svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
 	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
@@ -958,7 +958,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 {
 	u32 *msrpm = svm->msrpm;
 
-	svm->vmcb->control.lbr_ctl &= ~LBR_CTL_ENABLE_MASK;
+	svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
 	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
 	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
@@ -2708,7 +2708,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
 	dst->event_inj            = from->event_inj;
 	dst->event_inj_err        = from->event_inj_err;
 	dst->nested_cr3           = from->nested_cr3;
-	dst->lbr_ctl              = from->lbr_ctl;
+	dst->virt_ext              = from->virt_ext;
 }
 
 static int nested_svm_vmexit(struct vcpu_svm *svm)
@@ -3014,7 +3014,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 	/* We don't want to see VMMCALLs from a nested guest */
 	clr_intercept(svm, INTERCEPT_VMMCALL);
 
-	svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
+	svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
 	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
 	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
 	svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
@@ -4124,7 +4124,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
 	pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
 	pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
 	pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
-	pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
+	pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
 	pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
 	pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
 	pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
-- 
GitLab


From 76ff359249f1e80ff0d6ced3b52b1088c4e61b9b Mon Sep 17 00:00:00 2001
From: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Date: Thu, 6 Jul 2017 15:50:46 -0500
Subject: [PATCH 1280/1958] KVM: SVM: Add Virtual VMLOAD VMSAVE feature
 definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define a new cpufeature definition for Virtual VMLOAD VMSAVE.

Signed-off-by: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2701e5f8145bd..ca3c48c0872f4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -286,6 +286,7 @@
 #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-- 
GitLab


From 89c8a4984fc98e625517bfe5083342d77ee35811 Mon Sep 17 00:00:00 2001
From: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Date: Thu, 6 Jul 2017 15:50:47 -0500
Subject: [PATCH 1281/1958] KVM: SVM: Enable Virtual VMLOAD VMSAVE feature
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable the Virtual VMLOAD VMSAVE feature. This is done by setting bit 1
at position B8h in the vmcb.

The processor must have nested paging enabled, be in 64-bit mode and
have support for the Virtual VMLOAD VMSAVE feature for the bit to be set
in the vmcb.

Signed-off-by: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/svm.h |  1 +
 arch/x86/kvm/svm.c         | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 74d139352491f..58fffe79e417e 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -120,6 +120,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
 
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
 
 #define SVM_INTERRUPT_SHADOW_MASK 1
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9a09f89145f23..4c98d362e3e44 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -277,6 +277,10 @@ static int avic;
 module_param(avic, int, S_IRUGO);
 #endif
 
+/* enable/disable Virtual VMLOAD VMSAVE */
+static int vls = true;
+module_param(vls, int, 0444);
+
 /* AVIC VM ID bit masks and lock */
 static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
 static DEFINE_SPINLOCK(avic_vm_id_lock);
@@ -1093,6 +1097,16 @@ static __init int svm_hardware_setup(void)
 		}
 	}
 
+	if (vls) {
+		if (!npt_enabled ||
+		    !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) ||
+		    !IS_ENABLED(CONFIG_X86_64)) {
+			vls = false;
+		} else {
+			pr_info("Virtual VMLOAD VMSAVE supported\n");
+		}
+	}
+
 	return 0;
 
 err:
@@ -1280,6 +1294,16 @@ static void init_vmcb(struct vcpu_svm *svm)
 	if (avic)
 		avic_init_vmcb(svm);
 
+	/*
+	 * If hardware supports Virtual VMLOAD VMSAVE then enable it
+	 * in VMCB and clear intercepts to avoid #VMEXIT.
+	 */
+	if (vls) {
+		clr_intercept(svm, INTERCEPT_VMLOAD);
+		clr_intercept(svm, INTERCEPT_VMSAVE);
+		svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+	}
+
 	mark_all_dirty(svm->vmcb);
 
 	enable_gif(svm);
-- 
GitLab


From 286de8f6ac9202f1c9012784639156c6ec386eb8 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Date: Wed, 12 Jul 2017 17:56:44 +0200
Subject: [PATCH 1282/1958] KVM: trigger uevents when creating or destroying a
 VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a few lines to the KVM common code to fire a
KOBJ_CHANGE uevent whenever a KVM VM is created or destroyed. The event
carries five environment variables:

CREATED indicates how many times a new VM has been created. It is
	useful for example to trigger specific actions when the first
	VM is started
COUNT indicates how many VMs are currently active. This can be used for
	logging or monitoring purposes
PID has the pid of the KVM process that has been started or stopped.
	This can be used to perform process-specific tuning.
STATS_PATH contains the path in debugfs to the directory with all the
	runtime statistics for this VM. This is useful for performance
	monitoring and profiling.
EVENT described the type of event, its value can be either "create" or
	"destroy"

Specific udev rules can be then set up in userspace to deal with the
creation or destruction of VMs as needed.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 virt/kvm/kvm_main.c | 69 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7766c2b527979..82987d457b8bb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -130,6 +130,12 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
 
 static bool largepages_enabled = true;
 
+#define KVM_EVENT_CREATE_VM 0
+#define KVM_EVENT_DESTROY_VM 1
+static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
+static unsigned long long kvm_createvm_count;
+static unsigned long long kvm_active_vms;
+
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
 {
 	if (pfn_valid(pfn))
@@ -740,6 +746,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	int i;
 	struct mm_struct *mm = kvm->mm;
 
+	kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
 	kvm_destroy_vm_debugfs(kvm);
 	kvm_arch_sync_events(kvm);
 	spin_lock(&kvm_lock);
@@ -3220,6 +3227,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 		fput(file);
 		return -ENOMEM;
 	}
+	kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
 
 	fd_install(r, file);
 	return r;
@@ -3872,6 +3880,67 @@ static const struct file_operations *stat_fops[] = {
 	[KVM_STAT_VM]   = &vm_stat_fops,
 };
 
+static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
+{
+	struct kobj_uevent_env *env;
+	char *tmp, *pathbuf = NULL;
+	unsigned long long created, active;
+
+	if (!kvm_dev.this_device || !kvm)
+		return;
+
+	spin_lock(&kvm_lock);
+	if (type == KVM_EVENT_CREATE_VM) {
+		kvm_createvm_count++;
+		kvm_active_vms++;
+	} else if (type == KVM_EVENT_DESTROY_VM) {
+		kvm_active_vms--;
+	}
+	created = kvm_createvm_count;
+	active = kvm_active_vms;
+	spin_unlock(&kvm_lock);
+
+	env = kzalloc(sizeof(*env), GFP_KERNEL);
+	if (!env)
+		return;
+
+	add_uevent_var(env, "CREATED=%llu", created);
+	add_uevent_var(env, "COUNT=%llu", active);
+
+	if (type == KVM_EVENT_CREATE_VM)
+		add_uevent_var(env, "EVENT=create");
+	else if (type == KVM_EVENT_DESTROY_VM)
+		add_uevent_var(env, "EVENT=destroy");
+
+	if (kvm->debugfs_dentry) {
+		char p[ITOA_MAX_LEN];
+
+		snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
+		tmp = strchrnul(p + 1, '-');
+		*tmp = '\0';
+		add_uevent_var(env, "PID=%s", p);
+		pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+		if (pathbuf) {
+			/* sizeof counts the final '\0' */
+			int len = sizeof("STATS_PATH=") - 1;
+			const char *pvar = "STATS_PATH=";
+
+			tmp = dentry_path_raw(kvm->debugfs_dentry,
+					      pathbuf + len,
+					      PATH_MAX - len);
+			if (!IS_ERR(tmp)) {
+				memcpy(tmp - len, pvar, len);
+				env->envp[env->envp_idx++] = tmp - len;
+			}
+		}
+	}
+	/* no need for checks, since we are adding at most only 5 keys */
+	env->envp[env->envp_idx++] = NULL;
+	kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
+	kfree(env);
+	kfree(pathbuf);
+}
+
 static int kvm_init_debug(void)
 {
 	int r = -EEXIST;
-- 
GitLab


From c57ec8fb7c025322f25a077afc94e0ef18cc3d89 Mon Sep 17 00:00:00 2001
From: Nilesh Javali <nilesh.javali@cavium.com>
Date: Tue, 27 Jun 2017 02:26:56 -0700
Subject: [PATCH 1283/1958] scsi: qedi: Add support for Boot from SAN over
 iSCSI offload

This patch adds support for Boot from SAN over iSCSI offload. The iSCSI
boot information in the NVRAM is populated under
/sys/firmware/iscsi_bootX/ using qed NVM-image reading API and further
exported to open-iscsi to perform iSCSI login enabling boot over offload
iSCSI interface in a Boot from SAN environment.

Signed-off-by: Arun Easi <arun.easi@cavium.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@cavium.com>
Signed-off-by: Manish Rangankar <manish.rangankar@cavium.com>
Signed-off-by: Nilesh Javali <nilesh.javali@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi.h               |  17 +
 drivers/scsi/qedi/qedi_main.c          | 419 +++++++++++++++++++++++++
 drivers/scsi/qedi/qedi_nvm_iscsi_cfg.h | 210 +++++++++++++
 3 files changed, 646 insertions(+)
 create mode 100644 drivers/scsi/qedi/qedi_nvm_iscsi_cfg.h

diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h
index 32632c9b22766..91d2f51c351b4 100644
--- a/drivers/scsi/qedi/qedi.h
+++ b/drivers/scsi/qedi/qedi.h
@@ -23,11 +23,17 @@
 #include <linux/qed/qed_iscsi_if.h>
 #include <linux/qed/qed_ll2_if.h>
 #include "qedi_version.h"
+#include "qedi_nvm_iscsi_cfg.h"
 
 #define QEDI_MODULE_NAME		"qedi"
 
 struct qedi_endpoint;
 
+#ifndef GET_FIELD2
+#define GET_FIELD2(value, name) \
+	(((value) & (name ## _MASK)) >> (name ## _OFFSET))
+#endif
+
 /*
  * PCI function probe defines
  */
@@ -66,6 +72,11 @@ struct qedi_endpoint;
 #define QEDI_HW_DMA_BOUNDARY	0xfff
 #define QEDI_PATH_HANDLE	0xFE0000000UL
 
+enum qedi_nvm_tgts {
+	QEDI_NVM_TGT_PRI,
+	QEDI_NVM_TGT_SEC,
+};
+
 struct qedi_uio_ctrl {
 	/* meta data */
 	u32 uio_hsi_version;
@@ -283,6 +294,8 @@ struct qedi_ctx {
 	void *bdq_pbl_list;
 	dma_addr_t bdq_pbl_list_dma;
 	u8 bdq_pbl_list_num_entries;
+	struct nvm_iscsi_cfg *iscsi_cfg;
+	dma_addr_t nvm_buf_dma;
 	void __iomem *bdq_primary_prod;
 	void __iomem *bdq_secondary_prod;
 	u16 bdq_prod_idx;
@@ -337,6 +350,10 @@ struct qedi_ctx {
 	bool use_fast_sge;
 
 	atomic_t num_offloads;
+#define SYSFS_FLAG_FW_SEL_BOOT 2
+#define IPV6_LEN	41
+#define IPV4_LEN	17
+	struct iscsi_boot_kset *boot_kset;
 };
 
 struct qedi_work {
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 5f5a4ef2e5296..2c37836848152 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/if_vlan.h>
 #include <linux/cpu.h>
+#include <linux/iscsi_boot_sysfs.h>
 
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
@@ -1143,6 +1144,30 @@ static int qedi_setup_int(struct qedi_ctx *qedi)
 	return rc;
 }
 
+static void qedi_free_nvm_iscsi_cfg(struct qedi_ctx *qedi)
+{
+	if (qedi->iscsi_cfg)
+		dma_free_coherent(&qedi->pdev->dev,
+				  sizeof(struct nvm_iscsi_cfg),
+				  qedi->iscsi_cfg, qedi->nvm_buf_dma);
+}
+
+static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi)
+{
+	qedi->iscsi_cfg = dma_zalloc_coherent(&qedi->pdev->dev,
+					     sizeof(struct nvm_iscsi_cfg),
+					     &qedi->nvm_buf_dma, GFP_KERNEL);
+	if (!qedi->iscsi_cfg) {
+		QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n");
+		return -ENOMEM;
+	}
+	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+		  "NVM BUF addr=0x%p dma=0x%llx.\n", qedi->iscsi_cfg,
+		  qedi->nvm_buf_dma);
+
+	return 0;
+}
+
 static void qedi_free_bdq(struct qedi_ctx *qedi)
 {
 	int i;
@@ -1183,6 +1208,7 @@ static void qedi_free_global_queues(struct qedi_ctx *qedi)
 		kfree(gl[i]);
 	}
 	qedi_free_bdq(qedi);
+	qedi_free_nvm_iscsi_cfg(qedi);
 }
 
 static int qedi_alloc_bdq(struct qedi_ctx *qedi)
@@ -1309,6 +1335,11 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi)
 	if (rc)
 		goto mem_alloc_failure;
 
+	/* Allocate DMA coherent buffers for NVM_ISCSI_CFG */
+	rc = qedi_alloc_nvm_iscsi_cfg(qedi);
+	if (rc)
+		goto mem_alloc_failure;
+
 	/* Allocate a CQ and an associated PBL for each MSI-X
 	 * vector.
 	 */
@@ -1671,6 +1702,387 @@ void qedi_reset_host_mtu(struct qedi_ctx *qedi, u16 mtu)
 	qedi_ops->ll2->start(qedi->cdev, &params);
 }
 
+/**
+ * qedi_get_nvram_block: - Scan through the iSCSI NVRAM block (while accounting
+ * for gaps) for the matching absolute-pf-id of the QEDI device.
+ */
+static struct nvm_iscsi_block *
+qedi_get_nvram_block(struct qedi_ctx *qedi)
+{
+	int i;
+	u8 pf;
+	u32 flags;
+	struct nvm_iscsi_block *block;
+
+	pf = qedi->dev_info.common.abs_pf_id;
+	block = &qedi->iscsi_cfg->block[0];
+	for (i = 0; i < NUM_OF_ISCSI_PF_SUPPORTED; i++, block++) {
+		flags = ((block->id) & NVM_ISCSI_CFG_BLK_CTRL_FLAG_MASK) >>
+			NVM_ISCSI_CFG_BLK_CTRL_FLAG_OFFSET;
+		if (flags & (NVM_ISCSI_CFG_BLK_CTRL_FLAG_IS_NOT_EMPTY |
+				NVM_ISCSI_CFG_BLK_CTRL_FLAG_PF_MAPPED) &&
+			(pf == (block->id & NVM_ISCSI_CFG_BLK_MAPPED_PF_ID_MASK)
+				>> NVM_ISCSI_CFG_BLK_MAPPED_PF_ID_OFFSET))
+			return block;
+	}
+	return NULL;
+}
+
+static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf)
+{
+	struct qedi_ctx *qedi = data;
+	struct nvm_iscsi_initiator *initiator;
+	char *str = buf;
+	int rc = 1;
+	u32 ipv6_en, dhcp_en, ip_len;
+	struct nvm_iscsi_block *block;
+	char *fmt, *ip, *sub, *gw;
+
+	block = qedi_get_nvram_block(qedi);
+	if (!block)
+		return 0;
+
+	initiator = &block->initiator;
+	ipv6_en = block->generic.ctrl_flags &
+		  NVM_ISCSI_CFG_GEN_IPV6_ENABLED;
+	dhcp_en = block->generic.ctrl_flags &
+		  NVM_ISCSI_CFG_GEN_DHCP_TCPIP_CONFIG_ENABLED;
+	/* Static IP assignments. */
+	fmt = ipv6_en ? "%pI6\n" : "%pI4\n";
+	ip = ipv6_en ? initiator->ipv6.addr.byte : initiator->ipv4.addr.byte;
+	ip_len = ipv6_en ? IPV6_LEN : IPV4_LEN;
+	sub = ipv6_en ? initiator->ipv6.subnet_mask.byte :
+	      initiator->ipv4.subnet_mask.byte;
+	gw = ipv6_en ? initiator->ipv6.gateway.byte :
+	     initiator->ipv4.gateway.byte;
+	/* DHCP IP adjustments. */
+	fmt = dhcp_en ? "%s\n" : fmt;
+	if (dhcp_en) {
+		ip = ipv6_en ? "0::0" : "0.0.0.0";
+		sub = ip;
+		gw = ip;
+		ip_len = ipv6_en ? 5 : 8;
+	}
+
+	switch (type) {
+	case ISCSI_BOOT_ETH_IP_ADDR:
+		rc = snprintf(str, ip_len, fmt, ip);
+		break;
+	case ISCSI_BOOT_ETH_SUBNET_MASK:
+		rc = snprintf(str, ip_len, fmt, sub);
+		break;
+	case ISCSI_BOOT_ETH_GATEWAY:
+		rc = snprintf(str, ip_len, fmt, gw);
+		break;
+	case ISCSI_BOOT_ETH_FLAGS:
+		rc = snprintf(str, 3, "%hhd\n",
+			      SYSFS_FLAG_FW_SEL_BOOT);
+		break;
+	case ISCSI_BOOT_ETH_INDEX:
+		rc = snprintf(str, 3, "0\n");
+		break;
+	case ISCSI_BOOT_ETH_MAC:
+		rc = sysfs_format_mac(str, qedi->mac, ETH_ALEN);
+		break;
+	case ISCSI_BOOT_ETH_VLAN:
+		rc = snprintf(str, 12, "%d\n",
+			      GET_FIELD2(initiator->generic_cont0,
+					 NVM_ISCSI_CFG_INITIATOR_VLAN));
+		break;
+	case ISCSI_BOOT_ETH_ORIGIN:
+		if (dhcp_en)
+			rc = snprintf(str, 3, "3\n");
+		break;
+	default:
+		rc = 0;
+		break;
+	}
+
+	return rc;
+}
+
+static umode_t qedi_eth_get_attr_visibility(void *data, int type)
+{
+	int rc = 1;
+
+	switch (type) {
+	case ISCSI_BOOT_ETH_FLAGS:
+	case ISCSI_BOOT_ETH_MAC:
+	case ISCSI_BOOT_ETH_INDEX:
+	case ISCSI_BOOT_ETH_IP_ADDR:
+	case ISCSI_BOOT_ETH_SUBNET_MASK:
+	case ISCSI_BOOT_ETH_GATEWAY:
+	case ISCSI_BOOT_ETH_ORIGIN:
+	case ISCSI_BOOT_ETH_VLAN:
+		rc = 0444;
+		break;
+	default:
+		rc = 0;
+		break;
+	}
+	return rc;
+}
+
+static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf)
+{
+	struct qedi_ctx *qedi = data;
+	struct nvm_iscsi_initiator *initiator;
+	char *str = buf;
+	int rc;
+	struct nvm_iscsi_block *block;
+
+	block = qedi_get_nvram_block(qedi);
+	if (!block)
+		return 0;
+
+	initiator = &block->initiator;
+
+	switch (type) {
+	case ISCSI_BOOT_INI_INITIATOR_NAME:
+		rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n",
+			      initiator->initiator_name.byte);
+		break;
+	default:
+		rc = 0;
+		break;
+	}
+	return rc;
+}
+
+static umode_t qedi_ini_get_attr_visibility(void *data, int type)
+{
+	int rc;
+
+	switch (type) {
+	case ISCSI_BOOT_INI_INITIATOR_NAME:
+		rc = 0444;
+		break;
+	default:
+		rc = 0;
+		break;
+	}
+	return rc;
+}
+
+static ssize_t
+qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type,
+			char *buf, enum qedi_nvm_tgts idx)
+{
+	char *str = buf;
+	int rc = 1;
+	u32 ctrl_flags, ipv6_en, chap_en, mchap_en, ip_len;
+	struct nvm_iscsi_block *block;
+	char *chap_name, *chap_secret;
+	char *mchap_name, *mchap_secret;
+
+	block = qedi_get_nvram_block(qedi);
+	if (!block)
+		goto exit_show_tgt_info;
+
+	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_EVT,
+		  "Port:%d, tgt_idx:%d\n",
+		  GET_FIELD2(block->id, NVM_ISCSI_CFG_BLK_MAPPED_PF_ID), idx);
+
+	ctrl_flags = block->target[idx].ctrl_flags &
+		     NVM_ISCSI_CFG_TARGET_ENABLED;
+
+	if (!ctrl_flags) {
+		QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_EVT,
+			  "Target disabled\n");
+		goto exit_show_tgt_info;
+	}
+
+	ipv6_en = block->generic.ctrl_flags &
+		  NVM_ISCSI_CFG_GEN_IPV6_ENABLED;
+	ip_len = ipv6_en ? IPV6_LEN : IPV4_LEN;
+	chap_en = block->generic.ctrl_flags &
+		  NVM_ISCSI_CFG_GEN_CHAP_ENABLED;
+	chap_name = chap_en ? block->initiator.chap_name.byte : NULL;
+	chap_secret = chap_en ? block->initiator.chap_password.byte : NULL;
+
+	mchap_en = block->generic.ctrl_flags &
+		  NVM_ISCSI_CFG_GEN_CHAP_MUTUAL_ENABLED;
+	mchap_name = mchap_en ? block->target[idx].chap_name.byte : NULL;
+	mchap_secret = mchap_en ? block->target[idx].chap_password.byte : NULL;
+
+	switch (type) {
+	case ISCSI_BOOT_TGT_NAME:
+		rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n",
+			      block->target[idx].target_name.byte);
+		break;
+	case ISCSI_BOOT_TGT_IP_ADDR:
+		if (ipv6_en)
+			rc = snprintf(str, ip_len, "%pI6\n",
+				      block->target[idx].ipv6_addr.byte);
+		else
+			rc = snprintf(str, ip_len, "%pI4\n",
+				      block->target[idx].ipv4_addr.byte);
+		break;
+	case ISCSI_BOOT_TGT_PORT:
+		rc = snprintf(str, 12, "%d\n",
+			      GET_FIELD2(block->target[idx].generic_cont0,
+					 NVM_ISCSI_CFG_TARGET_TCP_PORT));
+		break;
+	case ISCSI_BOOT_TGT_LUN:
+		rc = snprintf(str, 22, "%.*d\n",
+			      block->target[idx].lun.value[1],
+			      block->target[idx].lun.value[0]);
+		break;
+	case ISCSI_BOOT_TGT_CHAP_NAME:
+		rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n",
+			      chap_name);
+		break;
+	case ISCSI_BOOT_TGT_CHAP_SECRET:
+		rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n",
+			      chap_secret);
+		break;
+	case ISCSI_BOOT_TGT_REV_CHAP_NAME:
+		rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n",
+			      mchap_name);
+		break;
+	case ISCSI_BOOT_TGT_REV_CHAP_SECRET:
+		rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n",
+			      mchap_secret);
+		break;
+	case ISCSI_BOOT_TGT_FLAGS:
+		rc = snprintf(str, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT);
+		break;
+	case ISCSI_BOOT_TGT_NIC_ASSOC:
+		rc = snprintf(str, 3, "0\n");
+		break;
+	default:
+		rc = 0;
+		break;
+	}
+
+exit_show_tgt_info:
+	return rc;
+}
+
+static ssize_t qedi_show_boot_tgt_pri_info(void *data, int type, char *buf)
+{
+	struct qedi_ctx *qedi = data;
+
+	return qedi_show_boot_tgt_info(qedi, type, buf, QEDI_NVM_TGT_PRI);
+}
+
+static ssize_t qedi_show_boot_tgt_sec_info(void *data, int type, char *buf)
+{
+	struct qedi_ctx *qedi = data;
+
+	return qedi_show_boot_tgt_info(qedi, type, buf, QEDI_NVM_TGT_SEC);
+}
+
+static umode_t qedi_tgt_get_attr_visibility(void *data, int type)
+{
+	int rc;
+
+	switch (type) {
+	case ISCSI_BOOT_TGT_NAME:
+	case ISCSI_BOOT_TGT_IP_ADDR:
+	case ISCSI_BOOT_TGT_PORT:
+	case ISCSI_BOOT_TGT_LUN:
+	case ISCSI_BOOT_TGT_CHAP_NAME:
+	case ISCSI_BOOT_TGT_CHAP_SECRET:
+	case ISCSI_BOOT_TGT_REV_CHAP_NAME:
+	case ISCSI_BOOT_TGT_REV_CHAP_SECRET:
+	case ISCSI_BOOT_TGT_NIC_ASSOC:
+	case ISCSI_BOOT_TGT_FLAGS:
+		rc = 0444;
+		break;
+	default:
+		rc = 0;
+		break;
+	}
+	return rc;
+}
+
+static void qedi_boot_release(void *data)
+{
+	struct qedi_ctx *qedi = data;
+
+	scsi_host_put(qedi->shost);
+}
+
+static int qedi_get_boot_info(struct qedi_ctx *qedi)
+{
+	int ret = 1;
+	u16 len;
+
+	len = sizeof(struct nvm_iscsi_cfg);
+
+	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+		  "Get NVM iSCSI CFG image\n");
+	ret = qedi_ops->common->nvm_get_image(qedi->cdev,
+					      QED_NVM_IMAGE_ISCSI_CFG,
+					      (char *)qedi->iscsi_cfg, len);
+	if (ret)
+		QEDI_ERR(&qedi->dbg_ctx,
+			 "Could not get NVM image. ret = %d\n", ret);
+
+	return ret;
+}
+
+static int qedi_setup_boot_info(struct qedi_ctx *qedi)
+{
+	struct iscsi_boot_kobj *boot_kobj;
+
+	if (qedi_get_boot_info(qedi))
+		return -EPERM;
+
+	qedi->boot_kset = iscsi_boot_create_host_kset(qedi->shost->host_no);
+	if (!qedi->boot_kset)
+		goto kset_free;
+
+	if (!scsi_host_get(qedi->shost))
+		goto kset_free;
+
+	boot_kobj = iscsi_boot_create_target(qedi->boot_kset, 0, qedi,
+					     qedi_show_boot_tgt_pri_info,
+					     qedi_tgt_get_attr_visibility,
+					     qedi_boot_release);
+	if (!boot_kobj)
+		goto put_host;
+
+	if (!scsi_host_get(qedi->shost))
+		goto kset_free;
+
+	boot_kobj = iscsi_boot_create_target(qedi->boot_kset, 1, qedi,
+					     qedi_show_boot_tgt_sec_info,
+					     qedi_tgt_get_attr_visibility,
+					     qedi_boot_release);
+	if (!boot_kobj)
+		goto put_host;
+
+	if (!scsi_host_get(qedi->shost))
+		goto kset_free;
+
+	boot_kobj = iscsi_boot_create_initiator(qedi->boot_kset, 0, qedi,
+						qedi_show_boot_ini_info,
+						qedi_ini_get_attr_visibility,
+						qedi_boot_release);
+	if (!boot_kobj)
+		goto put_host;
+
+	if (!scsi_host_get(qedi->shost))
+		goto kset_free;
+
+	boot_kobj = iscsi_boot_create_ethernet(qedi->boot_kset, 0, qedi,
+					       qedi_show_boot_eth_info,
+					       qedi_eth_get_attr_visibility,
+					       qedi_boot_release);
+	if (!boot_kobj)
+		goto put_host;
+
+	return 0;
+
+put_host:
+	scsi_host_put(qedi->shost);
+kset_free:
+	iscsi_boot_destroy_kset(qedi->boot_kset);
+	return -ENOMEM;
+}
+
 static void __qedi_remove(struct pci_dev *pdev, int mode)
 {
 	struct qedi_ctx *qedi = pci_get_drvdata(pdev);
@@ -1724,6 +2136,9 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
 			qedi->ll2_recv_thread = NULL;
 		}
 		qedi_ll2_free_skbs(qedi);
+
+		if (qedi->boot_kset)
+			iscsi_boot_destroy_kset(qedi->boot_kset);
 	}
 }
 
@@ -1967,6 +2382,10 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 		/* F/w needs 1st task context memory entry for performance */
 		set_bit(QEDI_RESERVE_TASK_ID, qedi->task_idx_map);
 		atomic_set(&qedi->num_offloads, 0);
+
+		if (qedi_setup_boot_info(qedi))
+			QEDI_ERR(&qedi->dbg_ctx,
+				 "No iSCSI boot target configured\n");
 	}
 
 	return 0;
diff --git a/drivers/scsi/qedi/qedi_nvm_iscsi_cfg.h b/drivers/scsi/qedi/qedi_nvm_iscsi_cfg.h
new file mode 100644
index 0000000000000..df39b69b366d3
--- /dev/null
+++ b/drivers/scsi/qedi/qedi_nvm_iscsi_cfg.h
@@ -0,0 +1,210 @@
+/*
+ * QLogic iSCSI Offload Driver
+ * Copyright (c) 2016 Cavium Inc.
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef NVM_ISCSI_CFG_H
+#define NVM_ISCSI_CFG_H
+
+#define NUM_OF_ISCSI_TARGET_PER_PF    4   /* Defined as per the
+					   * ISCSI IBFT constraint
+					   */
+#define NUM_OF_ISCSI_PF_SUPPORTED     4   /* One PF per Port -
+					   * assuming 4 port card
+					   */
+
+#define NVM_ISCSI_CFG_DHCP_NAME_MAX_LEN  256
+
+union nvm_iscsi_dhcp_vendor_id {
+	u32 value[NVM_ISCSI_CFG_DHCP_NAME_MAX_LEN / 4];
+	u8  byte[NVM_ISCSI_CFG_DHCP_NAME_MAX_LEN];
+};
+
+#define NVM_ISCSI_IPV4_ADDR_BYTE_LEN 4
+union nvm_iscsi_ipv4_addr {
+	u32 addr;
+	u8  byte[NVM_ISCSI_IPV4_ADDR_BYTE_LEN];
+};
+
+#define NVM_ISCSI_IPV6_ADDR_BYTE_LEN 16
+union nvm_iscsi_ipv6_addr {
+	u32 addr[4];
+	u8  byte[NVM_ISCSI_IPV6_ADDR_BYTE_LEN];
+};
+
+struct nvm_iscsi_initiator_ipv4 {
+	union nvm_iscsi_ipv4_addr addr;				/* 0x0 */
+	union nvm_iscsi_ipv4_addr subnet_mask;			/* 0x4 */
+	union nvm_iscsi_ipv4_addr gateway;			/* 0x8 */
+	union nvm_iscsi_ipv4_addr primary_dns;			/* 0xC */
+	union nvm_iscsi_ipv4_addr secondary_dns;		/* 0x10 */
+	union nvm_iscsi_ipv4_addr dhcp_addr;			/* 0x14 */
+
+	union nvm_iscsi_ipv4_addr isns_server;			/* 0x18 */
+	union nvm_iscsi_ipv4_addr slp_server;			/* 0x1C */
+	union nvm_iscsi_ipv4_addr primay_radius_server;		/* 0x20 */
+	union nvm_iscsi_ipv4_addr secondary_radius_server;	/* 0x24 */
+
+	union nvm_iscsi_ipv4_addr rsvd[4];			/* 0x28 */
+};
+
+struct nvm_iscsi_initiator_ipv6 {
+	union nvm_iscsi_ipv6_addr addr;				/* 0x0 */
+	union nvm_iscsi_ipv6_addr subnet_mask;			/* 0x10 */
+	union nvm_iscsi_ipv6_addr gateway;			/* 0x20 */
+	union nvm_iscsi_ipv6_addr primary_dns;			/* 0x30 */
+	union nvm_iscsi_ipv6_addr secondary_dns;		/* 0x40 */
+	union nvm_iscsi_ipv6_addr dhcp_addr;			/* 0x50 */
+
+	union nvm_iscsi_ipv6_addr isns_server;			/* 0x60 */
+	union nvm_iscsi_ipv6_addr slp_server;			/* 0x70 */
+	union nvm_iscsi_ipv6_addr primay_radius_server;		/* 0x80 */
+	union nvm_iscsi_ipv6_addr secondary_radius_server;	/* 0x90 */
+
+	union nvm_iscsi_ipv6_addr rsvd[3];			/* 0xA0 */
+
+	u32   config;						/* 0xD0 */
+#define NVM_ISCSI_CFG_INITIATOR_IPV6_SUBNET_MASK_PREFIX_MASK      0x000000FF
+#define NVM_ISCSI_CFG_INITIATOR_IPV6_SUBNET_MASK_PREFIX_OFFSET    0
+
+	u32   rsvd_1[3];
+};
+
+#define NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN  256
+union nvm_iscsi_name {
+	u32 value[NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN / 4];
+	u8  byte[NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN];
+};
+
+#define NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN  256
+union nvm_iscsi_chap_name {
+	u32 value[NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN / 4];
+	u8  byte[NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN];
+};
+
+#define NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN  16 /* md5 need per RFC1996
+					    * is 16 octets
+					    */
+union nvm_iscsi_chap_password {
+	u32 value[NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN / 4];
+	u8 byte[NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN];
+};
+
+union nvm_iscsi_lun {
+	u8  byte[8];
+	u32 value[2];
+};
+
+struct nvm_iscsi_generic {
+	u32 ctrl_flags;						/* 0x0 */
+#define NVM_ISCSI_CFG_GEN_CHAP_ENABLED                 BIT(0)
+#define NVM_ISCSI_CFG_GEN_DHCP_TCPIP_CONFIG_ENABLED    BIT(1)
+#define NVM_ISCSI_CFG_GEN_DHCP_ISCSI_CONFIG_ENABLED    BIT(2)
+#define NVM_ISCSI_CFG_GEN_IPV6_ENABLED                 BIT(3)
+#define NVM_ISCSI_CFG_GEN_IPV4_FALLBACK_ENABLED        BIT(4)
+#define NVM_ISCSI_CFG_GEN_ISNS_WORLD_LOGIN             BIT(5)
+#define NVM_ISCSI_CFG_GEN_ISNS_SELECTIVE_LOGIN         BIT(6)
+#define NVM_ISCSI_CFG_GEN_ADDR_REDIRECT_ENABLED	       BIT(7)
+#define NVM_ISCSI_CFG_GEN_CHAP_MUTUAL_ENABLED          BIT(8)
+
+	u32 timeout;						/* 0x4 */
+#define NVM_ISCSI_CFG_GEN_DHCP_REQUEST_TIMEOUT_MASK       0x0000FFFF
+#define NVM_ISCSI_CFG_GEN_DHCP_REQUEST_TIMEOUT_OFFSET     0
+#define NVM_ISCSI_CFG_GEN_PORT_LOGIN_TIMEOUT_MASK         0xFFFF0000
+#define NVM_ISCSI_CFG_GEN_PORT_LOGIN_TIMEOUT_OFFSET       16
+
+	union nvm_iscsi_dhcp_vendor_id  dhcp_vendor_id;		/* 0x8  */
+	u32 rsvd[62];						/* 0x108 */
+};
+
+struct nvm_iscsi_initiator {
+	struct nvm_iscsi_initiator_ipv4 ipv4;			/* 0x0 */
+	struct nvm_iscsi_initiator_ipv6 ipv6;			/* 0x38 */
+
+	union nvm_iscsi_name           initiator_name;		/* 0x118 */
+	union nvm_iscsi_chap_name      chap_name;		/* 0x218 */
+	union nvm_iscsi_chap_password  chap_password;		/* 0x318 */
+
+	u32 generic_cont0;					/* 0x398 */
+#define NVM_ISCSI_CFG_INITIATOR_VLAN_MASK		0x0000FFFF
+#define NVM_ISCSI_CFG_INITIATOR_VLAN_OFFSET		0
+#define NVM_ISCSI_CFG_INITIATOR_IP_VERSION_MASK		0x00030000
+#define NVM_ISCSI_CFG_INITIATOR_IP_VERSION_OFFSET	16
+#define NVM_ISCSI_CFG_INITIATOR_IP_VERSION_4		1
+#define NVM_ISCSI_CFG_INITIATOR_IP_VERSION_6		2
+#define NVM_ISCSI_CFG_INITIATOR_IP_VERSION_4_AND_6	3
+
+	u32 ctrl_flags;
+#define NVM_ISCSI_CFG_INITIATOR_IP_VERSION_PRIORITY_V6     BIT(0)
+#define NVM_ISCSI_CFG_INITIATOR_VLAN_ENABLED               BIT(1)
+
+	u32 rsvd[116];						/* 0x32C */
+};
+
+struct nvm_iscsi_target {
+	u32 ctrl_flags;						/* 0x0 */
+#define NVM_ISCSI_CFG_TARGET_ENABLED            BIT(0)
+#define NVM_ISCSI_CFG_BOOT_TIME_LOGIN_STATUS    BIT(1)
+
+	u32 generic_cont0;					/* 0x4 */
+#define NVM_ISCSI_CFG_TARGET_TCP_PORT_MASK      0x0000FFFF
+#define NVM_ISCSI_CFG_TARGET_TCP_PORT_OFFSET    0
+
+	u32 ip_ver;
+#define NVM_ISCSI_CFG_IPv4       4
+#define NVM_ISCSI_CFG_IPv6       6
+
+	u32 rsvd_1[7];						/* 0x24 */
+	union nvm_iscsi_ipv4_addr ipv4_addr;			/* 0x28 */
+	union nvm_iscsi_ipv6_addr ipv6_addr;			/* 0x2C */
+	union nvm_iscsi_lun lun;				/* 0x3C */
+
+	union nvm_iscsi_name           target_name;		/* 0x44 */
+	union nvm_iscsi_chap_name      chap_name;		/* 0x144 */
+	union nvm_iscsi_chap_password  chap_password;		/* 0x244 */
+
+	u32 rsvd_2[107];					/* 0x2C4 */
+};
+
+struct nvm_iscsi_block {
+	u32 id;							/* 0x0 */
+#define NVM_ISCSI_CFG_BLK_MAPPED_PF_ID_MASK         0x0000000F
+#define NVM_ISCSI_CFG_BLK_MAPPED_PF_ID_OFFSET       0
+#define NVM_ISCSI_CFG_BLK_CTRL_FLAG_MASK            0x00000FF0
+#define NVM_ISCSI_CFG_BLK_CTRL_FLAG_OFFSET          4
+#define NVM_ISCSI_CFG_BLK_CTRL_FLAG_IS_NOT_EMPTY    BIT(0)
+#define NVM_ISCSI_CFG_BLK_CTRL_FLAG_PF_MAPPED       BIT(1)
+
+	u32 rsvd_1[5];						/* 0x4 */
+
+	struct nvm_iscsi_generic     generic;			/* 0x18 */
+	struct nvm_iscsi_initiator   initiator;			/* 0x218 */
+	struct nvm_iscsi_target      target[NUM_OF_ISCSI_TARGET_PER_PF];
+								/* 0x718 */
+
+	u32 rsvd_2[58];						/* 0x1718 */
+	/* total size - 0x1800 - 6K block */
+};
+
+struct nvm_iscsi_cfg {
+	u32 id;							/* 0x0 */
+#define NVM_ISCSI_CFG_BLK_VERSION_MINOR_MASK     0x000000FF
+#define NVM_ISCSI_CFG_BLK_VERSION_MAJOR_MASK     0x0000FF00
+#define NVM_ISCSI_CFG_BLK_SIGNATURE_MASK         0xFFFF0000
+#define NVM_ISCSI_CFG_BLK_SIGNATURE              0x49430000 /* IC - Iscsi
+							     * Config
+							     */
+
+#define NVM_ISCSI_CFG_BLK_VERSION_MAJOR          0
+#define NVM_ISCSI_CFG_BLK_VERSION_MINOR          10
+#define NVM_ISCSI_CFG_BLK_VERSION ((NVM_ISCSI_CFG_BLK_VERSION_MAJOR << 8) | \
+				   NVM_ISCSI_CFG_BLK_VERSION_MINOR)
+
+	struct nvm_iscsi_block	block[NUM_OF_ISCSI_PF_SUPPORTED]; /* 0x4 */
+};
+
+#endif
-- 
GitLab


From c3a73ed8a82b666ac01466d3badd7824eae89c44 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 12 Jul 2017 13:15:11 +0200
Subject: [PATCH 1284/1958] platform/x86: silead_dmi: Add entry for Ployer
 Momo7w tablet touchscreen

This Ployer Momo7w revision has the same hardware as the Trekstor
ST70416-6, so we re-use the surftab_wintron70_st70416_6_data.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/silead_dmi.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/platform/x86/silead_dmi.c b/drivers/platform/x86/silead_dmi.c
index f9e840aa5592d..1157a7b646d66 100644
--- a/drivers/platform/x86/silead_dmi.c
+++ b/drivers/platform/x86/silead_dmi.c
@@ -173,6 +173,16 @@ static const struct dmi_system_id silead_ts_dmi_table[] = {
 			DMI_MATCH(DMI_BIOS_VERSION, "TREK.G.WI71C.JGBMRBA04"),
 		},
 	},
+	{
+		/* Ployer Momo7w (same hardware as the Trekstor ST70416-6) */
+		.driver_data = (void *)&surftab_wintron70_st70416_6_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Shenzhen PLOYER"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MOMO7W"),
+			/* Exact match, different versions need different fw */
+			DMI_MATCH(DMI_BIOS_VERSION, "MOMO.G.WI71C.MABMRBA02"),
+		},
+	},
 	{
 		/* GP-electronic T701 */
 		.driver_data = (void *)&gp_electronic_t701_data,
-- 
GitLab


From eeac8cda2c957e156093933b860eec09e488fe15 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 30 Jun 2017 11:01:06 +0300
Subject: [PATCH 1285/1958] scsi: cxlflash: return -EFAULT if copy_from_user()
 fails

The copy_from/to_user() functions return the number of bytes remaining
to be copied but we had intended to return -EFAULT here.

Fixes: bc88ac47d5cb ("scsi: cxlflash: Support AFU debug")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/cxlflash/main.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 077f62e208aae..6a4367cc9caab 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3401,9 +3401,10 @@ static int cxlflash_afu_debug(struct cxlflash_cfg *cfg,
 		if (is_write) {
 			req_flags |= SISL_REQ_FLAGS_HOST_WRITE;
 
-			rc = copy_from_user(kbuf, ubuf, ulen);
-			if (unlikely(rc))
+			if (copy_from_user(kbuf, ubuf, ulen)) {
+				rc = -EFAULT;
 				goto out;
+			}
 		}
 	}
 
@@ -3431,8 +3432,10 @@ static int cxlflash_afu_debug(struct cxlflash_cfg *cfg,
 		goto out;
 	}
 
-	if (ulen && !is_write)
-		rc = copy_to_user(ubuf, kbuf, ulen);
+	if (ulen && !is_write) {
+		if (copy_to_user(ubuf, kbuf, ulen))
+			rc = -EFAULT;
+	}
 out:
 	kfree(buf);
 	dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc);
-- 
GitLab


From 0ce3fcaff92908c370334ce3b9111aeea71159d6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 12 Jul 2017 03:05:39 +0200
Subject: [PATCH 1286/1958] PCI / PM: Restore PME Enable after config space
 restoration

Commit dc15e71eefc7 (PCI / PM: Restore PME Enable if skipping wakeup
setup) introduced a mechanism by which the PME Enable bit can be
restored by pci_enable_wake() if dev->wakeup_prepared is set in
case it has been overwritten by PCI config space restoration.

However, that commit overlooked the fact that on some systems (Dell
XPS13 9360 in particular) the AML handling wakeup events checks PME
Status and PME Enable and it won't trigger a Notify() for devices
where those bits are not set while it is running.

That happens during resume from suspend-to-idle when pci_restore_state()
invoked by pci_pm_default_resume_early() clears PME Enable before the
wakeup events are processed by AML, effectively causing those wakeup
events to be ignored.

Fix this issue by restoring the PME Enable configuration right after
pci_restore_state() has been called instead of doing that in
pci_enable_wake().

Fixes: dc15e71eefc7 (PCI / PM: Restore PME Enable if skipping wakeup setup)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci-driver.c |  2 ++
 drivers/pci/pci.c        | 16 ++++++++--------
 drivers/pci/pci.h        |  1 +
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index ffe7d54d93282..1a93b48b0dd97 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -506,6 +506,7 @@ static int pci_restore_standard_config(struct pci_dev *pci_dev)
 	}
 
 	pci_restore_state(pci_dev);
+	pci_pme_restore(pci_dev);
 	return 0;
 }
 
@@ -517,6 +518,7 @@ static void pci_pm_default_resume_early(struct pci_dev *pci_dev)
 {
 	pci_power_up(pci_dev);
 	pci_restore_state(pci_dev);
+	pci_pme_restore(pci_dev);
 	pci_fixup_device(pci_fixup_resume_early, pci_dev);
 }
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0b5302a9fdaef..d1443a161b351 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1798,7 +1798,11 @@ static void __pci_pme_active(struct pci_dev *dev, bool enable)
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr);
 }
 
-static void pci_pme_restore(struct pci_dev *dev)
+/**
+ * pci_pme_restore - Restore PME configuration after config space restore.
+ * @dev: PCI device to update.
+ */
+void pci_pme_restore(struct pci_dev *dev)
 {
 	u16 pmcsr;
 
@@ -1808,6 +1812,7 @@ static void pci_pme_restore(struct pci_dev *dev)
 	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
 	if (dev->wakeup_prepared) {
 		pmcsr |= PCI_PM_CTRL_PME_ENABLE;
+		pmcsr &= ~PCI_PM_CTRL_PME_STATUS;
 	} else {
 		pmcsr &= ~PCI_PM_CTRL_PME_ENABLE;
 		pmcsr |= PCI_PM_CTRL_PME_STATUS;
@@ -1904,14 +1909,9 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 {
 	int ret = 0;
 
-	/*
-	 * Don't do the same thing twice in a row for one device, but restore
-	 * PME Enable in case it has been updated by config space restoration.
-	 */
-	if (!!enable == !!dev->wakeup_prepared) {
-		pci_pme_restore(dev);
+	/* Don't do the same thing twice in a row for one device. */
+	if (!!enable == !!dev->wakeup_prepared)
 		return 0;
-	}
 
 	/*
 	 * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 240b2c0fed4b9..0a6e737f53250 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -71,6 +71,7 @@ void pci_power_up(struct pci_dev *dev);
 void pci_disable_enabled_device(struct pci_dev *dev);
 int pci_finish_runtime_suspend(struct pci_dev *dev);
 int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
+void pci_pme_restore(struct pci_dev *dev);
 bool pci_dev_keep_suspended(struct pci_dev *dev);
 void pci_dev_complete_resume(struct pci_dev *pci_dev);
 void pci_config_pm_runtime_get(struct pci_dev *dev);
-- 
GitLab


From f5f44c6ffe0b39ee37e1d2963ee0d48cbc1ec038 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 3 Jul 2017 11:17:27 +0100
Subject: [PATCH 1287/1958] scsi: isci: fix typo in function names

There are a couple of typos in function names and spelling of request
where the letters u and e are swapped:

scu_ssp_reqeust_construct_task_context
scu_sata_reqeust_construct_task_context

Fix the spelling of request.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/isci/request.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index 47f66e9497451..ed197bc8e801a 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -213,7 +213,7 @@ static void sci_task_request_build_ssp_task_iu(struct isci_request *ireq)
  * @task_context:
  *
  */
-static void scu_ssp_reqeust_construct_task_context(
+static void scu_ssp_request_construct_task_context(
 	struct isci_request *ireq,
 	struct scu_task_context *task_context)
 {
@@ -425,7 +425,7 @@ static void scu_ssp_io_request_construct_task_context(struct isci_request *ireq,
 	u8 prot_type = scsi_get_prot_type(scmd);
 	u8 prot_op = scsi_get_prot_op(scmd);
 
-	scu_ssp_reqeust_construct_task_context(ireq, task_context);
+	scu_ssp_request_construct_task_context(ireq, task_context);
 
 	task_context->ssp_command_iu_length =
 		sizeof(struct ssp_cmd_iu) / sizeof(u32);
@@ -472,7 +472,7 @@ static void scu_ssp_task_request_construct_task_context(struct isci_request *ire
 {
 	struct scu_task_context *task_context = ireq->tc;
 
-	scu_ssp_reqeust_construct_task_context(ireq, task_context);
+	scu_ssp_request_construct_task_context(ireq, task_context);
 
 	task_context->control_frame                = 1;
 	task_context->priority                     = SCU_TASK_PRIORITY_HIGH;
@@ -495,7 +495,7 @@ static void scu_ssp_task_request_construct_task_context(struct isci_request *ire
  * the command buffer is complete. none Revisit task context construction to
  * determine what is common for SSP/SMP/STP task context structures.
  */
-static void scu_sata_reqeust_construct_task_context(
+static void scu_sata_request_construct_task_context(
 	struct isci_request *ireq,
 	struct scu_task_context *task_context)
 {
@@ -562,7 +562,7 @@ static void scu_stp_raw_request_construct_task_context(struct isci_request *ireq
 {
 	struct scu_task_context *task_context = ireq->tc;
 
-	scu_sata_reqeust_construct_task_context(ireq, task_context);
+	scu_sata_request_construct_task_context(ireq, task_context);
 
 	task_context->control_frame         = 0;
 	task_context->priority              = SCU_TASK_PRIORITY_NORMAL;
@@ -613,7 +613,7 @@ static void sci_stp_optimized_request_construct(struct isci_request *ireq,
 	struct scu_task_context *task_context = ireq->tc;
 
 	/* Build the STP task context structure */
-	scu_sata_reqeust_construct_task_context(ireq, task_context);
+	scu_sata_request_construct_task_context(ireq, task_context);
 
 	/* Copy over the SGL elements */
 	sci_request_build_sgl(ireq);
@@ -1401,7 +1401,7 @@ static enum sci_status sci_stp_request_pio_data_out_transmit_data(struct isci_re
  * @data_buffer: The buffer of data to be copied.
  * @length: The length of the data transfer.
  *
- * Copy the data from the buffer for the length specified to the IO reqeust SGL
+ * Copy the data from the buffer for the length specified to the IO request SGL
  * specified data region. enum sci_status
  */
 static enum sci_status
-- 
GitLab


From 4feca6a52b9812f53bc5c20829fd4e37e710db34 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 3 Jul 2017 11:24:02 +0100
Subject: [PATCH 1288/1958] scsi: qedi: fix another spelling mistake:
 "alloction" -> "allocation"

Trivial fix to spelling mistake in QEDF_ERR message. I should have also
included this in a previous fix, but I only just spotted this one.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 19254bd739d9c..93d54acd4a22f 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -1411,7 +1411,7 @@ static void qedi_tmf_work(struct work_struct *work)
 
 	list_work = kzalloc(sizeof(*list_work), GFP_ATOMIC);
 	if (!list_work) {
-		QEDI_ERR(&qedi->dbg_ctx, "Memory alloction failed\n");
+		QEDI_ERR(&qedi->dbg_ctx, "Memory allocation failed\n");
 		goto abort_ret;
 	}
 
-- 
GitLab


From 40bf6a35483ee25271ce2a90d8976cf1409a033a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Wed, 12 Jul 2017 10:23:13 +0200
Subject: [PATCH 1289/1958] rtc: Remove wrong deprecation comment

rtc_time_to_tm and rtc_tm_to_time are not deprecated and make perfect sense
for RTCs that are simple 32bit counters.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/rtc.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index d53ecdc060cfd..0a0f0d14a5fba 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -33,17 +33,11 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs)
 	return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs);
 }
 
-/**
- * Deprecated. Use rtc_time64_to_tm().
- */
 static inline void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
 {
 	rtc_time64_to_tm(time, tm);
 }
 
-/**
- * Deprecated. Use rtc_tm_to_time64().
- */
 static inline int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
 {
 	*time = rtc_tm_to_time64(tm);
-- 
GitLab


From acef2690d16f38a0687552b391674201ad7c5783 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 3 Jul 2017 20:21:39 +0100
Subject: [PATCH 1290/1958] scsi: qedf: fix spelling mistake: "offlading" ->
 "offloading"

Trivial fix to spelling mistake in QEDF_INFO message and remove
duplicated "since" (thanks to Tyrel Datwyler for spotting the latter
issue).

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Reviewed-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index b58bba4604e8c..7786c97e033fd 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -1227,7 +1227,7 @@ static void qedf_rport_event_handler(struct fc_lport *lport,
 
 		if (rdata->spp_type != FC_TYPE_FCP) {
 			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
-			    "Not offlading since since spp type isn't FCP\n");
+			    "Not offloading since spp type isn't FCP\n");
 			break;
 		}
 		if (!(rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET)) {
-- 
GitLab


From a680f1d463aeaeb00d22af257a56e111967c2f18 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 5 Jul 2017 10:30:56 +0200
Subject: [PATCH 1291/1958] scsi: virtio_scsi: always read VPD pages for
 multiqueue too

Multi-queue virtio-scsi uses a different scsi_host_template struct.  Add
the .device_alloc field there, too.

Fixes: 25d1d50e23275e141e3a3fe06c25a99f4c4bf4e0
Cc: stable@vger.kernel.org
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/virtio_scsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 8b93197daefe3..9be211d68b159 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -837,6 +837,7 @@ static struct scsi_host_template virtscsi_host_template_multi = {
 	.eh_abort_handler = virtscsi_abort,
 	.eh_device_reset_handler = virtscsi_device_reset,
 	.eh_timed_out = virtscsi_eh_timed_out,
+	.slave_alloc = virtscsi_device_alloc,
 
 	.can_queue = 1024,
 	.dma_boundary = UINT_MAX,
-- 
GitLab


From 68c59fcea1f2c6a54c62aa896cc623c1b5bc9b47 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 7 Jul 2017 10:56:38 +0200
Subject: [PATCH 1292/1958] scsi: sg: fix SG_DXFER_FROM_DEV transfers

SG_DXFER_FROM_DEV transfers do not necessarily have a dxferp as we set
it to NULL for the old sg_io read/write interface, but must have a
length bigger than 0. This fixes a regression introduced by commit
28676d869bbb ("scsi: sg: check for valid direction before starting the
request")

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Fixes: 28676d869bbb ("scsi: sg: check for valid direction before starting the request")
Reported-by: Chris Clayton <chris2553@googlemail.com>
Tested-by: Chris Clayton <chris2553@googlemail.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Tested-by: Chris Clayton <chris2553@googlemail.com>
Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sg.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 21225d62b0c1f..1e82d4128a848 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -758,8 +758,11 @@ static bool sg_is_valid_dxfer(sg_io_hdr_t *hp)
 		if (hp->dxferp || hp->dxfer_len > 0)
 			return false;
 		return true;
-	case SG_DXFER_TO_DEV:
 	case SG_DXFER_FROM_DEV:
+		if (hp->dxfer_len < 0)
+			return false;
+		return true;
+	case SG_DXFER_TO_DEV:
 	case SG_DXFER_TO_FROM_DEV:
 		if (!hp->dxferp || hp->dxfer_len == 0)
 			return false;
-- 
GitLab


From fb2028a0b24e0823bbc0a28e6f0f1bd9fbbf733c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 10 Jul 2017 11:47:40 +0300
Subject: [PATCH 1293/1958] scsi: qla2xxx: Off by one in qlt_ctio_to_cmd()

There are "req->num_outstanding_cmds" elements in the
req->outstanding_cmds[] array so the > here should be >=.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_target.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 2a0173e5d10e1..986a4270b2ffa 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3747,7 +3747,7 @@ static struct qla_tgt_cmd *qlt_ctio_to_cmd(struct scsi_qla_host *vha,
 	h &= QLA_CMD_HANDLE_MASK;
 
 	if (h != QLA_TGT_NULL_HANDLE) {
-		if (unlikely(h > req->num_outstanding_cmds)) {
+		if (unlikely(h >= req->num_outstanding_cmds)) {
 			ql_dbg(ql_dbg_tgt, vha, 0xe052,
 			    "qla_target(%d): Wrong handle %x received\n",
 			    vha->vp_idx, handle);
-- 
GitLab


From 89b203e9d07d4079367120a6fe271ad9e9b751c0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 11 Jul 2017 13:11:44 +0100
Subject: [PATCH 1294/1958] scsi: hisi_sas: make several const arrays static

Don't populate various tables on the stack but make them static const.
Makes the object code smaller by over 280 bytes:

Before:
   text	   data	    bss	    dec	    hex	filename
  39887	   5080	     64	  45031	   afe7	hisi_sas_v2_hw.o

After:
   text	   data	    bss	    dec	    hex	filename
  39318	   5368	     64	  44750	   aece	hisi_sas_v2_hw.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index 551d103c27f13..2bfea7082e3a0 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -1693,7 +1693,7 @@ static int prep_ssp_v2_hw(struct hisi_hba *hisi_hba,
 
 static int parse_trans_tx_err_code_v2_hw(u32 err_msk)
 {
-	const u8 trans_tx_err_code_prio[] = {
+	static const u8 trans_tx_err_code_prio[] = {
 		TRANS_TX_OPEN_FAIL_WITH_IT_NEXUS_LOSS,
 		TRANS_TX_ERR_PHY_NOT_ENABLE,
 		TRANS_TX_OPEN_CNX_ERR_WRONG_DESTINATION,
@@ -1738,7 +1738,7 @@ static int parse_trans_tx_err_code_v2_hw(u32 err_msk)
 
 static int parse_trans_rx_err_code_v2_hw(u32 err_msk)
 {
-	const u8 trans_rx_err_code_prio[] = {
+	static const u8 trans_rx_err_code_prio[] = {
 		TRANS_RX_ERR_WITH_RXFRAME_CRC_ERR,
 		TRANS_RX_ERR_WITH_RXFIS_8B10B_DISP_ERR,
 		TRANS_RX_ERR_WITH_RXFRAME_HAVE_ERRPRM,
@@ -1784,7 +1784,7 @@ static int parse_trans_rx_err_code_v2_hw(u32 err_msk)
 
 static int parse_dma_tx_err_code_v2_hw(u32 err_msk)
 {
-	const u8 dma_tx_err_code_prio[] = {
+	static const u8 dma_tx_err_code_prio[] = {
 		DMA_TX_UNEXP_XFER_ERR,
 		DMA_TX_UNEXP_RETRANS_ERR,
 		DMA_TX_XFER_LEN_OVERFLOW,
@@ -1810,7 +1810,7 @@ static int parse_dma_tx_err_code_v2_hw(u32 err_msk)
 
 static int parse_sipc_rx_err_code_v2_hw(u32 err_msk)
 {
-	const u8 sipc_rx_err_code_prio[] = {
+	static const u8 sipc_rx_err_code_prio[] = {
 		SIPC_RX_FIS_STATUS_ERR_BIT_VLD,
 		SIPC_RX_PIO_WRSETUP_STATUS_DRQ_ERR,
 		SIPC_RX_FIS_STATUS_BSY_BIT_ERR,
@@ -1836,7 +1836,7 @@ static int parse_sipc_rx_err_code_v2_hw(u32 err_msk)
 
 static int parse_dma_rx_err_code_v2_hw(u32 err_msk)
 {
-	const u8 dma_rx_err_code_prio[] = {
+	static const u8 dma_rx_err_code_prio[] = {
 		DMA_RX_UNKNOWN_FRM_ERR,
 		DMA_RX_DATA_LEN_OVERFLOW,
 		DMA_RX_DATA_LEN_UNDERFLOW,
-- 
GitLab


From 6f37e2102778d3437a416684680cc5ce295f2042 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 Jul 2017 10:30:22 +0300
Subject: [PATCH 1295/1958] scsi: libfc: pass an error pointer to
 fc_disc_error()

This patch is basically to silence a static checker warning.

    drivers/scsi/libfc/fc_disc.c:326 fc_disc_error()
    warn: passing a valid pointer to 'PTR_ERR'

It doesn't affect runtime because it treats -ENOMEM and a valid pointer
the same.  But the documentation says we should be passing an error
pointer.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Johannes Thumshirn <jth@kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libfc/fc_disc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index fd501f8dbb110..8660f923ace02 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -573,7 +573,7 @@ static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp,
 		event = DISC_EV_FAILED;
 	}
 	if (error)
-		fc_disc_error(disc, fp);
+		fc_disc_error(disc, ERR_PTR(error));
 	else if (event != DISC_EV_NONE)
 		fc_disc_done(disc, event);
 	fc_frame_free(fp);
-- 
GitLab


From 57fe14790b88122fb34b68308b9aa9a643457ea7 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Wed, 12 Jul 2017 23:58:56 +0300
Subject: [PATCH 1296/1958] smsc911x: Add check for ioremap_nocache() return
 code

There is no check for return code of smsc911x_drv_probe()
in smsc911x_drv_probe(). The patch adds one.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smsc911x.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index ea1bbc355b4df..0b6a39b003a4e 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2467,6 +2467,10 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
 	pdata = netdev_priv(dev);
 	dev->irq = irq;
 	pdata->ioaddr = ioremap_nocache(res->start, res_size);
+	if (!pdata->ioaddr) {
+		retval = -ENOMEM;
+		goto out_ioremap_fail;
+	}
 
 	pdata->dev = dev;
 	pdata->msg_enable = ((1 << debug) - 1);
@@ -2572,6 +2576,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
 	smsc911x_free_resources(pdev);
 out_request_resources_fail:
 	iounmap(pdata->ioaddr);
+out_ioremap_fail:
 	free_netdev(dev);
 out_release_io_1:
 	release_mem_region(res->start, resource_size(res));
-- 
GitLab


From 771edcafa753aad304babd3bab8e413574d5db3b Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Wed, 12 Jul 2017 09:29:06 -0700
Subject: [PATCH 1297/1958] socket: add documentation for missing elements

Fill in missing kernel-doc for missing elements in struct sock.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index 8c85791fc196c..f69c8c2782dfb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -246,6 +246,7 @@ struct sock_common {
   *	@sk_policy: flow policy
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
+  *	@sk_tsq_flags: TCP Small Queues flags
   *	@sk_write_queue: Packet sending queue
   *	@sk_omem_alloc: "o" is "option" or "other"
   *	@sk_wmem_queued: persistent queue size
@@ -257,6 +258,7 @@ struct sock_common {
   *	@sk_pacing_status: Pacing status (requested, handled by sch_fq)
   *	@sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
   *	@sk_sndbuf: size of send buffer in bytes
+  *	@__sk_flags_offset: empty field used to determine location of bitfield
   *	@sk_padding: unused element for alignment
   *	@sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
   *	@sk_no_check_rx: allow zero checksum in RX packets
@@ -277,6 +279,7 @@ struct sock_common {
   *	@sk_drops: raw/udp drops counter
   *	@sk_ack_backlog: current listen backlog
   *	@sk_max_ack_backlog: listen backlog set in listen()
+  *	@sk_uid: user id of owner
   *	@sk_priority: %SO_PRIORITY setting
   *	@sk_type: socket type (%SOCK_STREAM, etc)
   *	@sk_protocol: which protocol this socket belongs in this network family
-- 
GitLab


From d3f6cd9e6018db6c468238d902561c92d92cdffd Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Wed, 12 Jul 2017 09:29:07 -0700
Subject: [PATCH 1298/1958] datagram: fix kernel-doc comments

An underscore in the kernel-doc comment section has special meaning
and mis-use generates an errors.

./net/core/datagram.c:207: ERROR: Unknown target name: "msg".
./net/core/datagram.c:379: ERROR: Unknown target name: "msg".
./net/core/datagram.c:816: ERROR: Unknown target name: "t".

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 6877c43cc92d2..ee5647bd91b3f 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -203,7 +203,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 /**
  *	__skb_try_recv_datagram - Receive a datagram skbuff
  *	@sk: socket
- *	@flags: MSG_ flags
+ *	@flags: MSG\_ flags
  *	@destructor: invoked under the receive lock on successful dequeue
  *	@peeked: returns non-zero if this packet has been seen before
  *	@off: an offset in bytes to peek skb from. Returns an offset
@@ -375,7 +375,7 @@ EXPORT_SYMBOL(__sk_queue_drop_skb);
  *	skb_kill_datagram - Free a datagram skbuff forcibly
  *	@sk: socket
  *	@skb: datagram skbuff
- *	@flags: MSG_ flags
+ *	@flags: MSG\_ flags
  *
  *	This function frees a datagram skbuff that was received by
  *	skb_recv_datagram.  The flags argument must match the one
@@ -809,7 +809,7 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
  *	sequenced packet sockets providing the socket receive queue
  *	is only ever holding data ready to receive.
  *
- *	Note: when you _don't_ use this routine for this protocol,
+ *	Note: when you *don't* use this routine for this protocol,
  *	and you use a different write policy from sock_writeable()
  *	then please supply your own write_space callback.
  */
-- 
GitLab


From c70d68150f71b84cea6997a53493e17bf18a54db Mon Sep 17 00:00:00 2001
From: Bert Kenward <bkenward@solarflare.com>
Date: Wed, 12 Jul 2017 17:19:41 +0100
Subject: [PATCH 1299/1958] sfc: don't read beyond unicast address list

If we have more than 32 unicast MAC addresses assigned to an interface
we will read beyond the end of the address table in the driver when
adding filters. The next 256 entries store multicast addresses, so we
will end up attempting to insert duplicate filters, which is mostly
harmless. If we add more than 288 unicast addresses we will then read
past the multicast address table, which is likely to be more exciting.

Fixes: 12fb0da45c9a ("sfc: clean fallbacks between promisc/normal in efx_ef10_filter_sync_rx_mode")
Signed-off-by: Bert Kenward <bkenward@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 761c518b2f92e..13f72f5b18d20 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -5034,12 +5034,9 @@ static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct net_device *net_dev = efx->net_dev;
 	struct netdev_hw_addr *uc;
-	int addr_count;
 	unsigned int i;
 
-	addr_count = netdev_uc_count(net_dev);
 	table->uc_promisc = !!(net_dev->flags & IFF_PROMISC);
-	table->dev_uc_count = 1 + addr_count;
 	ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr);
 	i = 1;
 	netdev_for_each_uc_addr(uc, net_dev) {
@@ -5050,6 +5047,8 @@ static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
 		ether_addr_copy(table->dev_uc_list[i].addr, uc->addr);
 		i++;
 	}
+
+	table->dev_uc_count = i;
 }
 
 static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx)
@@ -5057,12 +5056,11 @@ static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx)
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct net_device *net_dev = efx->net_dev;
 	struct netdev_hw_addr *mc;
-	unsigned int i, addr_count;
+	unsigned int i;
 
 	table->mc_overflow = false;
 	table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI));
 
-	addr_count = netdev_mc_count(net_dev);
 	i = 0;
 	netdev_for_each_mc_addr(mc, net_dev) {
 		if (i >= EFX_EF10_FILTER_DEV_MC_MAX) {
-- 
GitLab


From d93b07f8a689cde962d4f97668a74ab76f55734d Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Wed, 12 Jul 2017 09:32:34 +0200
Subject: [PATCH 1300/1958] net: stmmac: revert "support future possible
 different internal phy mode"

Since internal phy-mode is reserved for non-xMII protocol we cannot use
it with dwmac-sun8i.
Furthermore, all DT patchs which comes with this patch were cleaned, so
the current state is broken.
This reverts commit 1c2fa5f84683 ("net: stmmac: support future possible different internal phy mode")

Fixes: 1c2fa5f84683 ("net: stmmac: support future possible different internal phy mode")
Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 6c2d1da055888..fffd6d5fc907b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -638,7 +638,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 {
 	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
 	struct device_node *node = priv->device->of_node;
-	int ret, phy_interface;
+	int ret;
 	u32 reg, val;
 
 	regmap_read(gmac->regmap, SYSCON_EMAC_REG, &val);
@@ -718,11 +718,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 	if (gmac->variant->support_rmii)
 		reg &= ~SYSCON_RMII_EN;
 
-	phy_interface = priv->plat->interface;
-	/* if PHY is internal, select the mode (xMII) used by the SoC */
-	if (gmac->use_internal_phy)
-		phy_interface = gmac->variant->internal_phy;
-	switch (phy_interface) {
+	switch (priv->plat->interface) {
 	case PHY_INTERFACE_MODE_MII:
 		/* default */
 		break;
@@ -936,7 +932,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	}
 
 	plat_dat->interface = of_get_phy_mode(dev->of_node);
-	if (plat_dat->interface == PHY_INTERFACE_MODE_INTERNAL) {
+	if (plat_dat->interface == gmac->variant->internal_phy) {
 		dev_info(&pdev->dev, "Will use internal PHY\n");
 		gmac->use_internal_phy = true;
 		gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0);
-- 
GitLab


From 7d2b39ab9b0d67c3e6c66e3b68644522d4e15392 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Wed, 12 Jul 2017 16:39:31 -0600
Subject: [PATCH 1301/1958] docs: Include uaccess docs from the right file

Documentation/core-api/kernel-api.rst was including kerneldoc comments from
arch/x86/include/asm/uaccess_32.h, but the relevant comments moved to
.../uaccess.h some time ago.  Correct the include to pick up the comments
and eliminate a warning.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/kernel-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 9ec8488319dca..17b00914c6aba 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -114,7 +114,7 @@ The Slab Cache
 User Space Memory Access
 ------------------------
 
-.. kernel-doc:: arch/x86/include/asm/uaccess_32.h
+.. kernel-doc:: arch/x86/include/asm/uaccess.h
    :internal:
 
 .. kernel-doc:: arch/x86/lib/usercopy_32.c
-- 
GitLab


From 6d16e9143e9ef7e4c3f497f1111dfd48eaf2bab6 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Wed, 12 Jul 2017 16:44:38 -0600
Subject: [PATCH 1302/1958] docs: Turn off section numbering for the input docs

The input docs enable section numbering at multiple levels, leading to a
lot of bright-red "nested numbered toctree" warnings in newer Sphinx
versions.  Just take that directive out for now to help alleviate the
global red-pixel shortage.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/input/index.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/input/index.rst b/Documentation/input/index.rst
index 7a3e71c2bd008..9888f5cbf6d5b 100644
--- a/Documentation/input/index.rst
+++ b/Documentation/input/index.rst
@@ -6,7 +6,6 @@ Contents:
 
 .. toctree::
    :maxdepth: 2
-   :numbered:
 
    input_uapi
    input_kapi
-- 
GitLab


From 3afadfd902a7477db0b121a3fcbd964d3606c29f Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj38.park@gmail.com>
Date: Fri, 7 Jul 2017 03:21:17 +0900
Subject: [PATCH 1303/1958] memory-barriers.txt: Fix broken link to
 atomic_ops.txt

Few obsolete links to atomic_ops.txt exist in memory-barriers.txt though
the file has moved to core-api/atomic_ops.rst.  This commit fixes the
obsolete links.

Signed-off-by: SeongJae Park <sj38.park@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/memory-barriers.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f1c9eaa45a577..3ed0d8bf2412e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1876,8 +1876,8 @@ There are some more advanced barrier functions:
      This makes sure that the death mark on the object is perceived to be set
      *before* the reference counter is decremented.
 
-     See Documentation/atomic_ops.txt for more information.  See the "Atomic
-     operations" subsection for information on where to use these.
+     See Documentation/core-api/atomic_ops.rst for more information.  See the
+     "Atomic operations" subsection for information on where to use these.
 
 
  (*) lockless_dereference();
@@ -2584,7 +2584,7 @@ situations because on some CPUs the atomic instructions used imply full memory
 barriers, and so barrier instructions are superfluous in conjunction with them,
 and in such cases the special barrier primitives will be no-ops.
 
-See Documentation/atomic_ops.txt for more information.
+See Documentation/core-api/atomic_ops.rst for more information.
 
 
 ACCESSING DEVICES
-- 
GitLab


From 51e988f4092428e3d2c9f141fba9f86583bc82f3 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj38.park@gmail.com>
Date: Fri, 7 Jul 2017 03:21:18 +0900
Subject: [PATCH 1304/1958] kokr/memory-barriers.txt: Fix obsolete link to
 atomic_ops.txt

Obsolete links to atomic_ops.txt exist in ko_KR/memory-barriers.txt
though the file has moved to core-api/atomic_ops.rst.  This commit fixes
the obsolete links.

Signed-off-by: SeongJae Park <sj38.park@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/ko_KR/memory-barriers.txt         | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index c6f4ead76ce7a..38310dcd66203 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -523,11 +523,11 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니
      즉, ACQUIRE 는 최소한의 "취득" 동작처럼, 그리고 RELEASE 는 최소한의 "공개"
      처럼 동작한다는 의미입니다.
 
-atomic_ops.txt 에서 설명되는 어토믹 오퍼레이션들 중에는 완전히 순서잡힌 것들과
-(배리어를 사용하지 않는) 완화된 순서의 것들 외에 ACQUIRE 와 RELEASE 부류의
-것들도 존재합니다.  로드와 스토어를 모두 수행하는 조합된 어토믹 오퍼레이션에서,
-ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE 는 해당
-오퍼레이션의 스토어 부분에만 적용됩니다.
+core-api/atomic_ops.rst 에서 설명되는 어토믹 오퍼레이션들 중에는 완전히
+순서잡힌 것들과 (배리어를 사용하지 않는) 완화된 순서의 것들 외에 ACQUIRE 와
+RELEASE 부류의 것들도 존재합니다.  로드와 스토어를 모두 수행하는 조합된 어토믹
+오퍼레이션에서, ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE 는
+해당 오퍼레이션의 스토어 부분에만 적용됩니다.
 
 메모리 배리어들은 두 CPU 간, 또는 CPU 와 디바이스 간에 상호작용의 가능성이 있을
 때에만 필요합니다.  만약 어떤 코드에 그런 상호작용이 없을 것이 보장된다면, 해당
@@ -1848,7 +1848,7 @@ Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효
      이 코드는 객체의 업데이트된 death 마크가 레퍼런스 카운터 감소 동작
      *전에* 보일 것을 보장합니다.
 
-     더 많은 정보를 위해선 Documentation/atomic_ops.txt 문서를 참고하세요.
+     더 많은 정보를 위해선 Documentation/core-api/atomic_ops.rst 문서를 참고하세요.
      어디서 이것들을 사용해야 할지 궁금하다면 "어토믹 오퍼레이션" 서브섹션을
      참고하세요.
 
@@ -2550,7 +2550,7 @@ CPU 에서는 사용되는 어토믹 인스트럭션 자체에 메모리 배리
 있는데, 그런 경우에 이 특수 메모리 배리어 도구들은 no-op 이 되어 실질적으로
 아무일도 하지 않습니다.
 
-더 많은 내용을 위해선 Documentation/atomic_ops.txt 를 참고하세요.
+더 많은 내용을 위해선 Documentation/core-api/atomic_ops.rst 를 참고하세요.
 
 
 디바이스 액세스
-- 
GitLab


From 0a2c13d9cd76c84f2520f573ff83f777eb7464aa Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 12 Jul 2017 14:33:01 -0700
Subject: [PATCH 1305/1958] include/linux/dcache.h: use unsigned chars in
 struct name_snapshot

"kernel.h: handle pointers to arrays better in container_of()" triggers:

In file included from include/uapi/linux/stddef.h:1:0,
                 from include/linux/stddef.h:4,
                 from include/uapi/linux/posix_types.h:4,
                 from include/uapi/linux/types.h:13,
                 from include/linux/types.h:5,
                 from include/linux/syscalls.h:71,
                 from fs/dcache.c:17:
fs/dcache.c: In function 'release_dentry_name_snapshot':
include/linux/compiler.h:542:38: error: call to '__compiletime_assert_305' declared with attribute error: pointer type mismatch in container_of()
  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
                                      ^
include/linux/compiler.h:525:4: note: in definition of macro '__compiletime_assert'
    prefix ## suffix();    \
    ^
include/linux/compiler.h:542:2: note: in expansion of macro '_compiletime_assert'
  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
  ^
include/linux/build_bug.h:46:37: note: in expansion of macro 'compiletime_assert'
 #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                     ^
include/linux/kernel.h:860:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
  BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
  ^
fs/dcache.c:305:7: note: in expansion of macro 'container_of'
   p = container_of(name->name, struct external_name, name[0]);

Switch name_snapshot to use unsigned chars, matching struct qstr and
struct external_name.

Link: http://lkml.kernel.org/r/20170710152134.0f78c1e6@canb.auug.org.au
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 025727bf67974..c706eaac692e8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -592,8 +592,8 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
 }
 
 struct name_snapshot {
-	const char *name;
-	char inline_name[DNAME_INLINE_LEN];
+	const unsigned char *name;
+	unsigned char inline_name[DNAME_INLINE_LEN];
 };
 void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
 void release_dentry_name_snapshot(struct name_snapshot *);
-- 
GitLab


From c7acec713d14c6ce8a20154f9dfda258d6bcad3b Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Wed, 12 Jul 2017 14:33:04 -0700
Subject: [PATCH 1306/1958] kernel.h: handle pointers to arrays better in
 container_of()

If the first parameter of container_of() is a pointer to a
non-const-qualified array type (and the third parameter names a
non-const-qualified array member), the local variable __mptr will be
defined with a const-qualified array type.  In ISO C, these types are
incompatible.  They work as expected in GNU C, but some versions will
issue warnings.  For example, GCC 4.9 produces the warning
"initialization from incompatible pointer type".

Here is an example of where the problem occurs:

-------------------------------------------------------
   #include <linux/kernel.h>
   #include <linux/module.h>

  MODULE_LICENSE("GPL");

  struct st {
  	int a;
  	char b[16];
  };

  static int __init example_init(void) {
  	struct st t = { .a = 101, .b = "hello" };
  	char (*p)[16] = &t.b;
  	struct st *x = container_of(p, struct st, b);
  	printk(KERN_DEBUG "%p %p\n", (void *)&t, (void *)x);
  	return 0;
  }

  static void __exit example_exit(void) {
  }

  module_init(example_init);
  module_exit(example_exit);
-------------------------------------------------------

Building the module with gcc-4.9 results in these warnings (where '{m}'
is the module source and '{k}' is the kernel source):

-------------------------------------------------------
  In file included from {m}/example.c:1:0:
  {m}/example.c: In function `example_init':
  {k}/include/linux/kernel.h:854:48: warning: initialization from incompatible pointer type
    const typeof( ((type *)0)->member ) *__mptr = (ptr); \
                                                  ^
  {m}/example.c:14:17: note: in expansion of macro `container_of'
    struct st *x = container_of(p, struct st, b);
                   ^
  {k}/include/linux/kernel.h:854:48: warning: (near initialization for `x')
    const typeof( ((type *)0)->member ) *__mptr = (ptr); \
                                                  ^
  {m}/example.c:14:17: note: in expansion of macro `container_of'
    struct st *x = container_of(p, struct st, b);
                   ^
-------------------------------------------------------

Replace the type checking performed by the macro to avoid these
warnings.  Make sure `*(ptr)` either has type compatible with the
member, or has type compatible with `void`, ignoring qualifiers.  Raise
compiler errors if this is not true.  This is stronger than the previous
behaviour, which only resulted in compiler warnings for a type mismatch.

[arnd@arndb.de: fix new warnings for container_of()]
  Link: http://lkml.kernel.org/r/20170620200940.90557-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20170525120316.24473-7-abbotti@mev.co.uk
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 1c91f26e2996d..bd6d96cf80b17 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -11,6 +11,7 @@
 #include <linux/log2.h>
 #include <linux/typecheck.h>
 #include <linux/printk.h>
+#include <linux/build_bug.h>
 #include <asm/byteorder.h>
 #include <uapi/linux/kernel.h>
 
@@ -854,9 +855,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @member:	the name of the member within the struct.
  *
  */
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
+#define container_of(ptr, type, member) ({				\
+	void *__mptr = (void *)(ptr);					\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type *)(__mptr - offsetof(type, member))); })
 
 /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-- 
GitLab


From 91a90140f9987101d730b7dad8c6406321285da8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 12 Jul 2017 14:33:08 -0700
Subject: [PATCH 1307/1958] mm/memory.c: mark create_huge_pmd() inline to
 prevent build failure

With gcc 4.1.2:

    mm/memory.o: In function `create_huge_pmd':
    memory.c:(.text+0x93e): undefined reference to `do_huge_pmd_anonymous_page'

Interestingly, create_huge_pmd() is emitted in the assembler output, but
never called.

Converting transparent_hugepage_enabled() from a macro to a static
inline function reduced the ability of the compiler to remove unused
code.

Fix this by marking create_huge_pmd() inline.

Fixes: 16981d763501c0e0 ("mm: improve readability of transparent_hugepage_enabled()")
Link: http://lkml.kernel.org/r/1499842660-10665-1-git-send-email-geert@linux-m68k.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index cbb57194687e3..0e517be91a89e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3591,7 +3591,7 @@ static int do_numa_page(struct vm_fault *vmf)
 	return 0;
 }
 
-static int create_huge_pmd(struct vm_fault *vmf)
+static inline int create_huge_pmd(struct vm_fault *vmf)
 {
 	if (vma_is_anonymous(vmf->vma))
 		return do_huge_pmd_anonymous_page(vmf);
-- 
GitLab


From 112166f88cf83dd11486cf1818672d42b540865b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Wed, 12 Jul 2017 14:33:11 -0700
Subject: [PATCH 1308/1958] kernel/fork.c: virtually mapped stacks: do not
 disable interrupts

The reason to disable interrupts seems to be to avoid switching to a
different processor while handling per cpu data using individual loads and
stores.  If we use per cpu RMV primitives we will not have to disable
interrupts.

Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1705171055130.5898@east.gentwo.org
Signed-off-by: Christoph Lameter <cl@linux.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/fork.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 0f69a3e5281ef..d2b9d7c31eaf8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -205,19 +205,17 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 	void *stack;
 	int i;
 
-	local_irq_disable();
 	for (i = 0; i < NR_CACHED_STACKS; i++) {
-		struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+		struct vm_struct *s;
+
+		s = this_cpu_xchg(cached_stacks[i], NULL);
 
 		if (!s)
 			continue;
-		this_cpu_write(cached_stacks[i], NULL);
 
 		tsk->stack_vm_area = s;
-		local_irq_enable();
 		return s->addr;
 	}
-	local_irq_enable();
 
 	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
 				     VMALLOC_START, VMALLOC_END,
@@ -245,19 +243,15 @@ static inline void free_thread_stack(struct task_struct *tsk)
 {
 #ifdef CONFIG_VMAP_STACK
 	if (task_stack_vm_area(tsk)) {
-		unsigned long flags;
 		int i;
 
-		local_irq_save(flags);
 		for (i = 0; i < NR_CACHED_STACKS; i++) {
-			if (this_cpu_read(cached_stacks[i]))
+			if (this_cpu_cmpxchg(cached_stacks[i],
+					NULL, tsk->stack_vm_area) != NULL)
 				continue;
 
-			this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
-			local_irq_restore(flags);
 			return;
 		}
-		local_irq_restore(flags);
 
 		vfree_atomic(tsk->stack);
 		return;
-- 
GitLab


From 203e9e41219b4e7357104e525e91ac609fba2c6c Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Wed, 12 Jul 2017 14:33:14 -0700
Subject: [PATCH 1309/1958] kexec: move vmcoreinfo out of the kernel's .bss
 section

As Eric said,
 "what we need to do is move the variable vmcoreinfo_note out of the
  kernel's .bss section. And modify the code to regenerate and keep this
  information in something like the control page.

  Definitely something like this needs a page all to itself, and ideally
  far away from any other kernel data structures. I clearly was not
  watching closely the data someone decided to keep this silly thing in
  the kernel's .bss section."

This patch allocates extra pages for these vmcoreinfo_XXX variables, one
advantage is that it enhances some safety of vmcoreinfo, because
vmcoreinfo now is kept far away from other kernel data structures.

Link: http://lkml.kernel.org/r/1493281021-20737-1-git-send-email-xlpang@redhat.com
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Suggested-by: Eric Biederman <ebiederm@xmission.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/machine_kexec.c |  5 -----
 arch/s390/kernel/machine_kexec.c |  1 +
 arch/s390/kernel/setup.c         |  6 ------
 arch/x86/kernel/crash.c          |  2 +-
 arch/x86/xen/mmu_pv.c            |  4 ++--
 include/linux/crash_core.h       |  4 ++--
 kernel/crash_core.c              | 26 ++++++++++++++++++++++----
 kernel/ksysfs.c                  |  2 +-
 8 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 599507bcec91f..c14815dca747d 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
 #endif
 }
 
-phys_addr_t paddr_vmcoreinfo_note(void)
-{
-	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
-}
-
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 49a6bd45957b5..3d0b14afa2325 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(lowcore_ptr);
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 }
 
 void machine_shutdown(void)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3ae756c0db3de..3d1d808ea8a97 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
 	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
 }
 
-static void __init setup_vmcoreinfo(void)
-{
-	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
-}
-
 #ifdef CONFIG_CRASH_DUMP
 
 /*
@@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	setup_resources();
-	setup_vmcoreinfo();
 	setup_lowcore();
 	smp_fill_possible_mask();
 	cpu_detect_mhz_feature();
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 22217ece26c88..44404e2307bbe 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
 	bufp += sizeof(Elf64_Phdr);
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
-	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
 	(ehdr->e_phnum)++;
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 1d7a7213a3109..cab28cf2cffbb 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2693,8 +2693,8 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 phys_addr_t paddr_vmcoreinfo_note(void)
 {
 	if (xen_pv_domain())
-		return virt_to_machine(&vmcoreinfo_note).maddr;
+		return virt_to_machine(vmcoreinfo_note).maddr;
 	else
-		return __pa_symbol(&vmcoreinfo_note);
+		return __pa(vmcoreinfo_note);
 }
 #endif /* CONFIG_KEXEC_CORE */
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 4090a42578a8f..87506a02e9140 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -19,7 +19,7 @@
 				     CRASH_CORE_NOTE_NAME_BYTES +	\
 				     CRASH_CORE_NOTE_DESC_BYTES)
 
-#define VMCOREINFO_BYTES	   (4096)
+#define VMCOREINFO_BYTES	   PAGE_SIZE
 #define VMCOREINFO_NOTE_NAME	   "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
 #define VMCOREINFO_NOTE_SIZE	   ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +	\
@@ -56,7 +56,7 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
-extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+extern u32 *vmcoreinfo_note;
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index fcbd568f1e950..2837d6164db81 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,10 +14,10 @@
 #include <asm/sections.h>
 
 /* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+static unsigned char *vmcoreinfo_data;
 size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
+u32 *vmcoreinfo_note;
 
 /*
  * parsing the "crashkernel" commandline
@@ -326,6 +326,9 @@ static void update_vmcoreinfo_note(void)
 
 void crash_save_vmcoreinfo(void)
 {
+	if (!vmcoreinfo_note)
+		return;
+
 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
 	update_vmcoreinfo_note();
 }
@@ -356,11 +359,26 @@ void __weak arch_crash_save_vmcoreinfo(void)
 
 phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
-	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
+	return __pa(vmcoreinfo_note);
 }
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
+	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+	if (!vmcoreinfo_data) {
+		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+		return -ENOMEM;
+	}
+
+	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+						GFP_KERNEL | __GFP_ZERO);
+	if (!vmcoreinfo_note) {
+		free_page((unsigned long)vmcoreinfo_data);
+		vmcoreinfo_data = NULL;
+		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+		return -ENOMEM;
+	}
+
 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
 
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index df1a9aa602a08..46ba853656f63 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
 {
 	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
 	return sprintf(buf, "%pa %x\n", &vmcore_base,
-		       (unsigned int)sizeof(vmcoreinfo_note));
+			(unsigned int)VMCOREINFO_NOTE_SIZE);
 }
 KERNEL_ATTR_RO(vmcoreinfo);
 
-- 
GitLab


From 5203f4995d9a87952a83c2ce7866adbbe8f97bb5 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Wed, 12 Jul 2017 14:33:17 -0700
Subject: [PATCH 1310/1958] powerpc/fadump: use the correct
 VMCOREINFO_NOTE_SIZE for phdr

vmcoreinfo_max_size stands for the vmcoreinfo_data, the correct one we
should use is vmcoreinfo_note whose total size is VMCOREINFO_NOTE_SIZE.

Like explained in commit 77019967f06b ("kdump: fix exported size of
vmcoreinfo note"), it should not affect the actual function, but we
better fix it, also this change should be safe and backward compatible.

After this, we can get rid of variable vmcoreinfo_max_size, let's use
the corresponding macros directly, fewer variables means more safety for
vmcoreinfo operation.

[xlpang@redhat.com: fix build warning]
  Link: http://lkml.kernel.org/r/1494830606-27736-1-git-send-email-xlpang@redhat.com
Link: http://lkml.kernel.org/r/1493281021-20737-2-git-send-email-xlpang@redhat.com
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Reviewed-by: Dave Young <dyoung@redhat.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/fadump.c | 3 +--
 include/linux/crash_core.h   | 1 -
 kernel/crash_core.c          | 3 +--
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3079518f2245c..dc0c49cfd90a0 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -999,8 +999,7 @@ static int fadump_create_elfcore_headers(char *bufp)
 
 	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
 	phdr->p_offset	= phdr->p_paddr;
-	phdr->p_memsz	= vmcoreinfo_max_size;
-	phdr->p_filesz	= vmcoreinfo_max_size;
+	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
 
 	/* Increment number of program headers. */
 	(elf->e_phnum)++;
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 87506a02e9140..e5df1b3cf0726 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -58,7 +58,6 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 
 extern u32 *vmcoreinfo_note;
 extern size_t vmcoreinfo_size;
-extern size_t vmcoreinfo_max_size;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 			  void *data, size_t data_len);
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 2837d6164db81..315adbf9cb680 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -16,7 +16,6 @@
 /* vmcoreinfo stuff */
 static unsigned char *vmcoreinfo_data;
 size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
 u32 *vmcoreinfo_note;
 
 /*
@@ -343,7 +342,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
 	r = vscnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 
-	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+	r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
 
 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
 
-- 
GitLab


From 1229384f5b856d83698c38f9dedfd836e26711cb Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Wed, 12 Jul 2017 14:33:21 -0700
Subject: [PATCH 1311/1958] kdump: protect vmcoreinfo data under the crash
 memory

Currently vmcoreinfo data is updated at boot time subsys_initcall(), it
has the risk of being modified by some wrong code during system is
running.

As a result, vmcore dumped may contain the wrong vmcoreinfo.  Later on,
when using "crash", "makedumpfile", etc utility to parse this vmcore, we
probably will get "Segmentation fault" or other unexpected errors.

E.g.  1) wrong code overwrites vmcoreinfo_data; 2) further crashes the
system; 3) trigger kdump, then we obviously will fail to recognize the
crash context correctly due to the corrupted vmcoreinfo.

Now except for vmcoreinfo, all the crash data is well
protected(including the cpu note which is fully updated in the crash
path, thus its correctness is guaranteed).  Given that vmcoreinfo data
is a large chunk prepared for kdump, we better protect it as well.

To solve this, we relocate and copy vmcoreinfo_data to the crash memory
when kdump is loading via kexec syscalls.  Because the whole crash
memory will be protected by existing arch_kexec_protect_crashkres()
mechanism, we naturally protect vmcoreinfo_data from write(even read)
access under kernel direct mapping after kdump is loaded.

Since kdump is usually loaded at the very early stage after boot, we can
trust the correctness of the vmcoreinfo data copied.

On the other hand, we still need to operate the vmcoreinfo safe copy
when crash happens to generate vmcoreinfo_note again, we rely on vmap()
to map out a new kernel virtual address and update to use this new one
instead in the following crash_save_vmcoreinfo().

BTW, we do not touch vmcoreinfo_note, because it will be fully updated
using the protected vmcoreinfo_data after crash which is surely correct
just like the cpu crash note.

Link: http://lkml.kernel.org/r/1493281021-20737-3-git-send-email-xlpang@redhat.com
Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crash_core.h |  2 +-
 include/linux/kexec.h      |  2 ++
 kernel/crash_core.c        | 17 ++++++++++++++++-
 kernel/kexec.c             |  8 ++++++++
 kernel/kexec_core.c        | 39 ++++++++++++++++++++++++++++++++++++++
 kernel/kexec_file.c        |  8 ++++++++
 6 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index e5df1b3cf0726..2df2118fbe135 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -28,6 +28,7 @@
 
 typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
 
+void crash_update_vmcoreinfo_safecopy(void *ptr);
 void crash_save_vmcoreinfo(void);
 void arch_crash_save_vmcoreinfo(void);
 __printf(1, 2)
@@ -57,7 +58,6 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
 extern u32 *vmcoreinfo_note;
-extern size_t vmcoreinfo_size;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 			  void *data, size_t data_len);
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 65888418fb694..dd056fab9e35c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -172,6 +172,7 @@ struct kimage {
 	unsigned long start;
 	struct page *control_code_page;
 	struct page *swap_page;
+	void *vmcoreinfo_data_copy; /* locates in the crash memory */
 
 	unsigned long nr_segments;
 	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
@@ -241,6 +242,7 @@ extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 int kexec_crash_loaded(void);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
 
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 315adbf9cb680..6db80fc0810b9 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -15,9 +15,12 @@
 
 /* vmcoreinfo stuff */
 static unsigned char *vmcoreinfo_data;
-size_t vmcoreinfo_size;
+static size_t vmcoreinfo_size;
 u32 *vmcoreinfo_note;
 
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
+
 /*
  * parsing the "crashkernel" commandline
  *
@@ -323,11 +326,23 @@ static void update_vmcoreinfo_note(void)
 	final_note(buf);
 }
 
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+	if (ptr)
+		memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+	vmcoreinfo_data_safecopy = ptr;
+}
+
 void crash_save_vmcoreinfo(void)
 {
 	if (!vmcoreinfo_note)
 		return;
 
+	/* Use the safe copy to generate vmcoreinfo note if have */
+	if (vmcoreinfo_data_safecopy)
+		vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
 	update_vmcoreinfo_note();
 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 980936a90ee6e..e62ec4dc66206 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 	if (ret)
 		goto out;
 
+	/*
+	 * Some architecture(like S390) may touch the crash memory before
+	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+	 */
+	ret = kimage_crash_copy_vmcoreinfo(image);
+	if (ret)
+		goto out;
+
 	for (i = 0; i < nr_segments; i++) {
 		ret = kimage_load_segment(image, &image->segment[i]);
 		if (ret)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 154ffb489b93d..1ae7c41c33c19 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -482,6 +482,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
 	return pages;
 }
 
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
+{
+	struct page *vmcoreinfo_page;
+	void *safecopy;
+
+	if (image->type != KEXEC_TYPE_CRASH)
+		return 0;
+
+	/*
+	 * For kdump, allocate one vmcoreinfo safe copy from the
+	 * crash memory. as we have arch_kexec_protect_crashkres()
+	 * after kexec syscall, we naturally protect it from write
+	 * (even read) access under kernel direct mapping. But on
+	 * the other hand, we still need to operate it when crash
+	 * happens to generate vmcoreinfo note, hereby we rely on
+	 * vmap for this purpose.
+	 */
+	vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+	if (!vmcoreinfo_page) {
+		pr_warn("Could not allocate vmcoreinfo buffer\n");
+		return -ENOMEM;
+	}
+	safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+	if (!safecopy) {
+		pr_warn("Could not vmap vmcoreinfo buffer\n");
+		return -ENOMEM;
+	}
+
+	image->vmcoreinfo_data_copy = safecopy;
+	crash_update_vmcoreinfo_safecopy(safecopy);
+
+	return 0;
+}
+
 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
 {
 	if (*image->entry != 0)
@@ -569,6 +603,11 @@ void kimage_free(struct kimage *image)
 	if (!image)
 		return;
 
+	if (image->vmcoreinfo_data_copy) {
+		crash_update_vmcoreinfo_safecopy(NULL);
+		vunmap(image->vmcoreinfo_data_copy);
+	}
+
 	kimage_free_extra_pages(image);
 	for_each_kimage_entry(image, ptr, entry) {
 		if (entry & IND_INDIRECTION) {
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 766e7e4d3ad91..c8f7f77e9fa9f 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -298,6 +298,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 	if (ret)
 		goto out;
 
+	/*
+	 * Some architecture(like S390) may touch the crash memory before
+	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+	 */
+	ret = kimage_crash_copy_vmcoreinfo(image);
+	if (ret)
+		goto out;
+
 	ret = kexec_calculate_store_digests(image);
 	if (ret)
 		goto out;
-- 
GitLab


From a711bdc095d2c9b6ad15e737d1cdc46409b09538 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@nxp.com>
Date: Wed, 12 Jul 2017 14:33:24 -0700
Subject: [PATCH 1312/1958] kexec/kdump: minor Documentation updates for arm64
 and Image

Minor updates in Documentation for arm64 as relocatable kernel.  Also
this patch updates documentation for using uncompressed image "Image"
which is used for ARM64.

Link: http://lkml.kernel.org/r/1495104793-6563-1-git-send-email-Bharat.Bhushan@nxp.com
Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Pratyush Anand <panand@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kdump/kdump.txt | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 615434d81108e..51814450a7f80 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -112,8 +112,8 @@ There are two possible methods of using Kdump.
 2) Or use the system kernel binary itself as dump-capture kernel and there is
    no need to build a separate dump-capture kernel. This is possible
    only with the architectures which support a relocatable kernel. As
-   of today, i386, x86_64, ppc64, ia64 and arm architectures support relocatable
-   kernel.
+   of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support
+   relocatable kernel.
 
 Building a relocatable kernel is advantageous from the point of view that
 one does not have to build a second kernel for capturing the dump. But
@@ -339,7 +339,7 @@ For arm:
 For arm64:
 	- Use vmlinux or Image
 
-If you are using a uncompressed vmlinux image then use following command
+If you are using an uncompressed vmlinux image then use following command
 to load dump-capture kernel.
 
    kexec -p <dump-capture-kernel-vmlinux-image> \
@@ -361,6 +361,12 @@ to load dump-capture kernel.
    --dtb=<dtb-for-dump-capture-kernel> \
    --append="root=<root-dev> <arch-specific-options>"
 
+If you are using an uncompressed Image, then use following command
+to load dump-capture kernel.
+
+   kexec -p <dump-capture-kernel-Image> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
 
 Please note, that --args-linux does not need to be specified for ia64.
 It is planned to make this a no-op on that architecture, but for now
-- 
GitLab


From 89c5b53b16bf577079d4f0311406dbea3c71202c Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:27 -0700
Subject: [PATCH 1313/1958] sysctl: fix lax sysctl_check_table() sanity check

Patch series "sysctl: few fixes", v5.

I've been working on making kmod more deterministic, and as I did that I
couldn't help but notice a few issues with sysctl.  My end goal was just
to fix unsigned int support, which back then was completely broken.
Liping Zhang has sent up small atomic fixes, however it still missed yet
one more fix and Alexey Dobriyan had also suggested to just drop array
support given its complexity.

I have inspected array support using Coccinelle and indeed its not that
popular, so if in fact we can avoid it for new interfaces, I agree its
best.

I did develop a sysctl stress driver but will hold that off for another
series.

This patch (of 5):

Commit 7c60c48f58a7 ("sysctl: Improve the sysctl sanity checks")
improved sanity checks considerbly, however the enhancements on
sysctl_check_table() meant adding a functional change so that only the
last table entry's sanity error is propagated.  It also changed the way
errors were propagated so that each new check reset the err value, this
means only last sanity check computed is used for an error.  This has
been in the kernel since v3.4 days.

Fix this by carrying on errors from previous checks and iterations as we
traverse the table and ensuring we keep any error from previous checks.
We keep iterating on the table even if an error is found so we can
complain for all errors found in one shot.  This works as -EINVAL is
always returned on error anyway, and the check for error is any non-zero
value.

Fixes: 7c60c48f58a7 ("sysctl: Improve the sysctl sanity checks")
Link: http://lkml.kernel.org/r/20170519033554.18592-2-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_sysctl.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 67985a7233c24..32c9c5630507c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1066,7 +1066,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 	int err = 0;
 	for (; table->procname; table++) {
 		if (table->child)
-			err = sysctl_err(path, table, "Not a file");
+			err |= sysctl_err(path, table, "Not a file");
 
 		if ((table->proc_handler == proc_dostring) ||
 		    (table->proc_handler == proc_dointvec) ||
@@ -1078,15 +1078,15 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 		    (table->proc_handler == proc_doulongvec_minmax) ||
 		    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
 			if (!table->data)
-				err = sysctl_err(path, table, "No data");
+				err |= sysctl_err(path, table, "No data");
 			if (!table->maxlen)
-				err = sysctl_err(path, table, "No maxlen");
+				err |= sysctl_err(path, table, "No maxlen");
 		}
 		if (!table->proc_handler)
-			err = sysctl_err(path, table, "No proc_handler");
+			err |= sysctl_err(path, table, "No proc_handler");
 
 		if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
-			err = sysctl_err(path, table, "bogus .mode 0%o",
+			err |= sysctl_err(path, table, "bogus .mode 0%o",
 				table->mode);
 	}
 	return err;
-- 
GitLab


From a19ac3374995382a994653ff372b98ea7cbad548 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:30 -0700
Subject: [PATCH 1314/1958] sysctl: kdoc'ify sysctl_writes_strict

Document the different sysctl_writes_strict modes in code.

Link: http://lkml.kernel.org/r/20170519033554.18592-3-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dfba1a76cc36..02725178694a2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -174,11 +174,32 @@ extern int no_unaligned_warning;
 
 #ifdef CONFIG_PROC_SYSCTL
 
-#define SYSCTL_WRITES_LEGACY	-1
-#define SYSCTL_WRITES_WARN	 0
-#define SYSCTL_WRITES_STRICT	 1
+/**
+ * enum sysctl_writes_mode - supported sysctl write modes
+ *
+ * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
+ * 	to be written, and multiple writes on the same sysctl file descriptor
+ * 	will rewrite the sysctl value, regardless of file position. No warning
+ * 	is issued when the initial position is not 0.
+ * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
+ * 	not 0.
+ * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
+ * 	file position 0 and the value must be fully contained in the buffer
+ * 	sent to the write syscall. If dealing with strings respect the file
+ * 	position, but restrict this to the max length of the buffer, anything
+ * 	passed the max lenght will be ignored. Multiple writes will append
+ * 	to the buffer.
+ *
+ * These write modes control how current file position affects the behavior of
+ * updating sysctl values through the proc interface on each write.
+ */
+enum sysctl_writes_mode {
+	SYSCTL_WRITES_LEGACY		= -1,
+	SYSCTL_WRITES_WARN		= 0,
+	SYSCTL_WRITES_STRICT		= 1,
+};
 
-static int sysctl_writes_strict = SYSCTL_WRITES_STRICT;
+static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 
 static int proc_do_cad_pid(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-- 
GitLab


From d383d48470819e86fe30eb72f0e9494e1ee0e2af Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:33 -0700
Subject: [PATCH 1315/1958] sysctl: fold sysctl_writes_strict checks into
 helper

The mode sysctl_writes_strict positional checks keep being copy and pasted
as we add new proc handlers.  Just add a helper to avoid code duplication.

Link: http://lkml.kernel.org/r/20170519033554.18592-4-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Suggested-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 56 ++++++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 02725178694a2..6f3bb1f099fa9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1970,6 +1970,32 @@ static void warn_sysctl_write(struct ctl_table *table)
 		current->comm, table->procname);
 }
 
+/**
+ * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * @ppos: file position
+ * @table: the sysctl table
+ *
+ * Returns true if the first position is non-zero and the sysctl_writes_strict
+ * mode indicates this is not allowed for numeric input types. String proc
+ * hadlers can ignore the return value.
+ */
+static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
+					   struct ctl_table *table)
+{
+	if (!*ppos)
+		return false;
+
+	switch (sysctl_writes_strict) {
+	case SYSCTL_WRITES_STRICT:
+		return true;
+	case SYSCTL_WRITES_WARN:
+		warn_sysctl_write(table);
+		return false;
+	default:
+		return false;
+	}
+}
+
 /**
  * proc_dostring - read a string sysctl
  * @table: the sysctl table
@@ -1990,8 +2016,8 @@ static void warn_sysctl_write(struct ctl_table *table)
 int proc_dostring(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
-		warn_sysctl_write(table);
+	if (write)
+		proc_first_pos_non_zero_ignore(ppos, table);
 
 	return _proc_do_string((char *)(table->data), table->maxlen, write,
 			       (char __user *)buffer, lenp, ppos);
@@ -2193,17 +2219,8 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 		conv = do_proc_dointvec_conv;
 
 	if (write) {
-		if (*ppos) {
-			switch (sysctl_writes_strict) {
-			case SYSCTL_WRITES_STRICT:
-				goto out;
-			case SYSCTL_WRITES_WARN:
-				warn_sysctl_write(table);
-				break;
-			default:
-				break;
-			}
-		}
+		if (proc_first_pos_non_zero_ignore(ppos, table))
+			goto out;
 
 		if (left > PAGE_SIZE - 1)
 			left = PAGE_SIZE - 1;
@@ -2468,17 +2485,8 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
 	left = *lenp;
 
 	if (write) {
-		if (*ppos) {
-			switch (sysctl_writes_strict) {
-			case SYSCTL_WRITES_STRICT:
-				goto out;
-			case SYSCTL_WRITES_WARN:
-				warn_sysctl_write(table);
-				break;
-			default:
-				break;
-			}
-		}
+		if (proc_first_pos_non_zero_ignore(ppos, table))
+			goto out;
 
 		if (left > PAGE_SIZE - 1)
 			left = PAGE_SIZE - 1;
-- 
GitLab


From 4f2fec00afa60aa8e5d1b7f2a8e0526900f55623 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:36 -0700
Subject: [PATCH 1316/1958] sysctl: simplify unsigned int support

Commit e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32
fields") added proc_douintvec() to start help adding support for
unsigned int, this however was only half the work needed.  Two fixes
have come in since then for the following issues:

  o Printing the values shows a negative value, this happens since
    do_proc_dointvec() and this uses proc_put_long()

This was fixed by commit 5380e5644afbba9 ("sysctl: don't print negative
flag for proc_douintvec").

  o We can easily wrap around the int values: UINT_MAX is 4294967295, if
    we echo in 4294967295 + 1 we end up with 0, using 4294967295 + 2 we
    end up with 1.
  o We echo negative values in and they are accepted

This was fixed by commit 425fffd886ba ("sysctl: report EINVAL if value
is larger than UINT_MAX for proc_douintvec").

It still also failed to be added to sysctl_check_table()...  instead of
adding it with the current implementation just provide a proper and
simplified unsigned int support without any array unsigned int support
with no negative support at all.

Historically sysctl proc helpers have supported arrays, due to the
complexity this adds though we've taken a step back to evaluate array
users to determine if its worth upkeeping for unsigned int.  An
evaluation using Coccinelle has been done to perform a grammatical
search to ask ourselves:

  o How many sysctl proc_dointvec() (int) users exist which likely
    should be moved over to proc_douintvec() (unsigned int) ?
	Answer: about 8
	- Of these how many are array users ?
		Answer: Probably only 1
  o How many sysctl array users exist ?
	Answer: about 12

This last question gives us an idea just how popular arrays: they are not.
Array support should probably just be kept for strings.

The identified uint ports are:

  drivers/infiniband/core/ucma.c - max_backlog
  drivers/infiniband/core/iwcm.c - default_backlog
  net/core/sysctl_net_core.c - rps_sock_flow_sysctl()
  net/netfilter/nf_conntrack_timestamp.c - nf_conntrack_timestamp -- bool
  net/netfilter/nf_conntrack_acct.c nf_conntrack_acct -- bool
  net/netfilter/nf_conntrack_ecache.c - nf_conntrack_events -- bool
  net/netfilter/nf_conntrack_helper.c - nf_conntrack_helper -- bool
  net/phonet/sysctl.c proc_local_port_range()

The only possible array users is proc_local_port_range() but it does not
seem worth it to add array support just for this given the range support
works just as well.  Unsigned int support should be desirable more for
when you *need* more than INT_MAX or using int min/max support then does
not suffice for your ranges.

If you forget and by mistake happen to register an unsigned int proc
entry with an array, the driver will fail and you will get something as
follows:

sysctl table check failed: debug/test_sysctl//uint_0002 array now allowed
CPU: 2 PID: 1342 Comm: modprobe Tainted: G        W   E <etc>
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS <etc>
Call Trace:
 dump_stack+0x63/0x81
 __register_sysctl_table+0x350/0x650
 ? kmem_cache_alloc_trace+0x107/0x240
 __register_sysctl_paths+0x1b3/0x1e0
 ? 0xffffffffc005f000
 register_sysctl_table+0x1f/0x30
 test_sysctl_init+0x10/0x1000 [test_sysctl]
 do_one_initcall+0x52/0x1a0
 ? kmem_cache_alloc_trace+0x107/0x240
 do_init_module+0x5f/0x200
 load_module+0x1867/0x1bd0
 ? __symbol_put+0x60/0x60
 SYSC_finit_module+0xdf/0x110
 SyS_finit_module+0xe/0x10
 entry_SYSCALL_64_fastpath+0x1e/0xad
RIP: 0033:0x7f042b22d119
<etc>

Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields")
Link: http://lkml.kernel.org/r/20170519033554.18592-5-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Suggested-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Liping Zhang <zlpnobody@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Heinrich Schuchardt <xypron.glpk@gmx.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_sysctl.c |  14 ++++
 kernel/sysctl.c       | 153 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 160 insertions(+), 7 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 32c9c5630507c..ee6feba8b6c0a 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1061,6 +1061,18 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
 	return -EINVAL;
 }
 
+static int sysctl_check_table_array(const char *path, struct ctl_table *table)
+{
+	int err = 0;
+
+	if (table->proc_handler == proc_douintvec) {
+		if (table->maxlen != sizeof(unsigned int))
+			err |= sysctl_err(path, table, "array now allowed");
+	}
+
+	return err;
+}
+
 static int sysctl_check_table(const char *path, struct ctl_table *table)
 {
 	int err = 0;
@@ -1081,6 +1093,8 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 				err |= sysctl_err(path, table, "No data");
 			if (!table->maxlen)
 				err |= sysctl_err(path, table, "No maxlen");
+			else
+				err |= sysctl_check_table_array(path, table);
 		}
 		if (!table->proc_handler)
 			err |= sysctl_err(path, table, "No proc_handler");
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6f3bb1f099fa9..d12078fc215fe 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2175,19 +2175,18 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 	return 0;
 }
 
-static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
-				 int *valp,
-				 int write, void *data)
+static int do_proc_douintvec_conv(unsigned long *lvalp,
+				  unsigned int *valp,
+				  int write, void *data)
 {
 	if (write) {
-		if (*negp)
+		if (*lvalp > UINT_MAX)
 			return -EINVAL;
 		if (*lvalp > UINT_MAX)
 			return -EINVAL;
 		*valp = *lvalp;
 	} else {
 		unsigned int val = *valp;
-		*negp = false;
 		*lvalp = (unsigned long)val;
 	}
 	return 0;
@@ -2287,6 +2286,146 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
 			buffer, lenp, ppos, conv, data);
 }
 
+static int do_proc_douintvec_w(unsigned int *tbl_data,
+			       struct ctl_table *table,
+			       void __user *buffer,
+			       size_t *lenp, loff_t *ppos,
+			       int (*conv)(unsigned long *lvalp,
+					   unsigned int *valp,
+					   int write, void *data),
+			       void *data)
+{
+	unsigned long lval;
+	int err = 0;
+	size_t left;
+	bool neg;
+	char *kbuf = NULL, *p;
+
+	left = *lenp;
+
+	if (proc_first_pos_non_zero_ignore(ppos, table))
+		goto bail_early;
+
+	if (left > PAGE_SIZE - 1)
+		left = PAGE_SIZE - 1;
+
+	p = kbuf = memdup_user_nul(buffer, left);
+	if (IS_ERR(kbuf))
+		return -EINVAL;
+
+	left -= proc_skip_spaces(&p);
+	if (!left) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	err = proc_get_long(&p, &left, &lval, &neg,
+			     proc_wspace_sep,
+			     sizeof(proc_wspace_sep), NULL);
+	if (err || neg) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	if (conv(&lval, tbl_data, 1, data)) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	if (!err && left)
+		left -= proc_skip_spaces(&p);
+
+out_free:
+	kfree(kbuf);
+	if (err)
+		return -EINVAL;
+
+	return 0;
+
+	/* This is in keeping with old __do_proc_dointvec() */
+bail_early:
+	*ppos += *lenp;
+	return err;
+}
+
+static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
+			       size_t *lenp, loff_t *ppos,
+			       int (*conv)(unsigned long *lvalp,
+					   unsigned int *valp,
+					   int write, void *data),
+			       void *data)
+{
+	unsigned long lval;
+	int err = 0;
+	size_t left;
+
+	left = *lenp;
+
+	if (conv(&lval, tbl_data, 0, data)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = proc_put_long(&buffer, &left, lval, false);
+	if (err || !left)
+		goto out;
+
+	err = proc_put_char(&buffer, &left, '\n');
+
+out:
+	*lenp -= left;
+	*ppos += *lenp;
+
+	return err;
+}
+
+static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
+			       int write, void __user *buffer,
+			       size_t *lenp, loff_t *ppos,
+			       int (*conv)(unsigned long *lvalp,
+					   unsigned int *valp,
+					   int write, void *data),
+			       void *data)
+{
+	unsigned int *i, vleft;
+
+	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	i = (unsigned int *) tbl_data;
+	vleft = table->maxlen / sizeof(*i);
+
+	/*
+	 * Arrays are not supported, keep this simple. *Do not* add
+	 * support for them.
+	 */
+	if (vleft != 1) {
+		*lenp = 0;
+		return -EINVAL;
+	}
+
+	if (!conv)
+		conv = do_proc_douintvec_conv;
+
+	if (write)
+		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
+					   conv, data);
+	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
+}
+
+static int do_proc_douintvec(struct ctl_table *table, int write,
+			     void __user *buffer, size_t *lenp, loff_t *ppos,
+			     int (*conv)(unsigned long *lvalp,
+					 unsigned int *valp,
+					 int write, void *data),
+			     void *data)
+{
+	return __do_proc_douintvec(table->data, table, write,
+				   buffer, lenp, ppos, conv, data);
+}
+
 /**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
@@ -2322,8 +2461,8 @@ int proc_dointvec(struct ctl_table *table, int write,
 int proc_douintvec(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	return do_proc_dointvec(table, write, buffer, lenp, ppos,
-				do_proc_douintvec_conv, NULL);
+	return do_proc_douintvec(table, write, buffer, lenp, ppos,
+				 do_proc_douintvec_conv, NULL);
 }
 
 /*
-- 
GitLab


From 61d9b56a89208d8cccd0b4cfec7e6959717e16e3 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:40 -0700
Subject: [PATCH 1317/1958] sysctl: add unsigned int range support

To keep parity with regular int interfaces provide the an unsigned int
proc_douintvec_minmax() which allows you to specify a range of allowed
valid numbers.

Adding proc_douintvec_minmax_sysadmin() is easy but we can wait for an
actual user for that.

Link: http://lkml.kernel.org/r/20170519033554.18592-6-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Heinrich Schuchardt <xypron.glpk@gmx.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_sysctl.c  |  4 ++-
 include/linux/sysctl.h |  3 ++
 kernel/sysctl.c        | 66 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index ee6feba8b6c0a..8f9d564d0969f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1065,7 +1065,8 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
 {
 	int err = 0;
 
-	if (table->proc_handler == proc_douintvec) {
+	if ((table->proc_handler == proc_douintvec) ||
+	    (table->proc_handler == proc_douintvec_minmax)) {
 		if (table->maxlen != sizeof(unsigned int))
 			err |= sysctl_err(path, table, "array now allowed");
 	}
@@ -1083,6 +1084,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 		if ((table->proc_handler == proc_dostring) ||
 		    (table->proc_handler == proc_dointvec) ||
 		    (table->proc_handler == proc_douintvec) ||
+		    (table->proc_handler == proc_douintvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_jiffies) ||
 		    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 80d07816def05..225001d437ae3 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -47,6 +47,9 @@ extern int proc_douintvec(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_minmax(struct ctl_table *, int,
 				void __user *, size_t *, loff_t *);
+extern int proc_douintvec_minmax(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos);
 extern int proc_dointvec_jiffies(struct ctl_table *, int,
 				 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d12078fc215fe..df9f2a3678821 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2567,6 +2567,65 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 				do_proc_dointvec_minmax_conv, &param);
 }
 
+struct do_proc_douintvec_minmax_conv_param {
+	unsigned int *min;
+	unsigned int *max;
+};
+
+static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+					 unsigned int *valp,
+					 int write, void *data)
+{
+	struct do_proc_douintvec_minmax_conv_param *param = data;
+
+	if (write) {
+		unsigned int val = *lvalp;
+
+		if ((param->min && *param->min > val) ||
+		    (param->max && *param->max < val))
+			return -ERANGE;
+
+		if (*lvalp > UINT_MAX)
+			return -EINVAL;
+		*valp = val;
+	} else {
+		unsigned int val = *valp;
+		*lvalp = (unsigned long) val;
+	}
+
+	return 0;
+}
+
+/**
+ * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string. Negative
+ * strings are not allowed.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max). There is a final sanity
+ * check for UINT_MAX to avoid having to support wrap around uses from
+ * userspace.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct do_proc_douintvec_minmax_conv_param param = {
+		.min = (unsigned int *) table->extra1,
+		.max = (unsigned int *) table->extra2,
+	};
+	return do_proc_douintvec(table, write, buffer, lenp, ppos,
+				 do_proc_douintvec_minmax_conv, &param);
+}
+
 static void validate_coredump_safety(void)
 {
 #ifdef CONFIG_COREDUMP
@@ -3066,6 +3125,12 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 	return -ENOSYS;
 }
 
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
 int proc_dointvec_jiffies(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -3108,6 +3173,7 @@ EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
 EXPORT_SYMBOL(proc_dostring);
-- 
GitLab


From 9308f2f9e7f055cf3934645ec622bb5259dc1c14 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:43 -0700
Subject: [PATCH 1318/1958] test_sysctl: add dedicated proc sysctl test driver

The existing tools/testing/selftests/sysctl/ tests include two test
cases, but these use existing production kernel sysctl interfaces.  We
want to expand test coverage but we can't just be looking for random
safe production values to poke at, that's just insane!

Instead just dedicate a test driver for debugging purposes and port the
existing scripts to use it.  This will make it easier for further tests
to be added.

Subsequent patches will extend our test coverage for sysctl.

The stress test driver uses a new license (GPL on Linux, copyleft-next
outside of Linux).  Linus was fine with this [0] and later due to Ted's
and Alans's request ironed out an "or" language clause to use [1] which
is already present upstream.

[0] https://lkml.kernel.org/r/CA+55aFyhxcvD+q7tp+-yrSFDKfR0mOHgyEAe=f_94aKLsOu0Og@mail.gmail.com
[1] https://lkml.kernel.org/r/1495234558.7848.122.camel@linux.intel.com

Link: http://lkml.kernel.org/r/20170630224431.17374-2-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug                             |  11 ++
 lib/Makefile                                  |   1 +
 lib/test_sysctl.c                             | 113 ++++++++++++++++++
 tools/testing/selftests/sysctl/config         |   1 +
 .../testing/selftests/sysctl/run_numerictests |   4 +-
 .../testing/selftests/sysctl/run_stringtests  |   4 +-
 6 files changed, 130 insertions(+), 4 deletions(-)
 create mode 100644 lib/test_sysctl.c
 create mode 100644 tools/testing/selftests/sysctl/config

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e20fc079bebd6..f28f4252e54a9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1785,6 +1785,17 @@ config TEST_FIRMWARE
 
 	  If unsure, say N.
 
+config TEST_SYSCTL
+	tristate "sysctl test driver"
+	default n
+	depends on PROC_SYSCTL
+	help
+	  This builds the "test_sysctl" module. This driver enables to test the
+	  proc sysctl interfaces available to drivers safely without affecting
+	  production knobs which might alter system functionality.
+
+	  If unsure, say N.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	default n
diff --git a/lib/Makefile b/lib/Makefile
index 5a008329324e7..85e91e51a9fed 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
+obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
new file mode 100644
index 0000000000000..b2163bfb6eb23
--- /dev/null
+++ b/lib/test_sysctl.c
@@ -0,0 +1,113 @@
+/*
+ * proc sysctl test driver
+ *
+ * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or at your option any
+ * later version; or, when distributed separately from the Linux kernel or
+ * when incorporated into other software packages, subject to the following
+ * license:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of copyleft-next (version 0.3.1 or later) as published
+ * at http://copyleft-next.org/.
+ */
+
+/*
+ * This module provides an interface to the the proc sysctl interfaces.  This
+ * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the
+ * system unless explicitly requested by name. You can also build this driver
+ * into your kernel.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/async.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+
+static int i_zero;
+static int i_one_hundred = 100;
+
+struct test_sysctl_data {
+	int int_0001;
+	char string_0001[65];
+};
+
+static struct test_sysctl_data test_data = {
+	.int_0001 = 60,
+	.string_0001 = "(none)",
+};
+
+/* These are all under /proc/sys/debug/test_sysctl/ */
+static struct ctl_table test_table[] = {
+	{
+		.procname	= "int_0001",
+		.data		= &test_data.int_0001,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &i_zero,
+		.extra2         = &i_one_hundred,
+	},
+	{
+		.procname	= "string_0001",
+		.data		= &test_data.string_0001,
+		.maxlen		= sizeof(test_data.string_0001),
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+	{ }
+};
+
+static struct ctl_table test_sysctl_table[] = {
+	{
+		.procname	= "test_sysctl",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= test_table,
+	},
+	{ }
+};
+
+static struct ctl_table test_sysctl_root_table[] = {
+	{
+		.procname	= "debug",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= test_sysctl_table,
+	},
+	{ }
+};
+
+static struct ctl_table_header *test_sysctl_header;
+
+static int __init test_sysctl_init(void)
+{
+	test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
+	if (!test_sysctl_header)
+		return -ENOMEM;
+	return 0;
+}
+late_initcall(test_sysctl_init);
+
+static void __exit test_sysctl_exit(void)
+{
+	if (test_sysctl_header)
+		unregister_sysctl_table(test_sysctl_header);
+}
+
+module_exit(test_sysctl_exit);
+
+MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/sysctl/config b/tools/testing/selftests/sysctl/config
new file mode 100644
index 0000000000000..6ca14800d7553
--- /dev/null
+++ b/tools/testing/selftests/sysctl/config
@@ -0,0 +1 @@
+CONFIG_TEST_SYSCTL=y
diff --git a/tools/testing/selftests/sysctl/run_numerictests b/tools/testing/selftests/sysctl/run_numerictests
index e6e76c93d9487..c375ce0f4c158 100755
--- a/tools/testing/selftests/sysctl/run_numerictests
+++ b/tools/testing/selftests/sysctl/run_numerictests
@@ -1,7 +1,7 @@
 #!/bin/sh
 
-SYSCTL="/proc/sys"
-TARGET="${SYSCTL}/vm/swappiness"
+SYSCTL="/proc/sys/debug/test_sysctl/"
+TARGET="${SYSCTL}/int_0001"
 ORIG=$(cat "${TARGET}")
 TEST_STR=$(( $ORIG + 1 ))
 
diff --git a/tools/testing/selftests/sysctl/run_stringtests b/tools/testing/selftests/sysctl/run_stringtests
index 857ec667fb02c..a6f2618afeaac 100755
--- a/tools/testing/selftests/sysctl/run_stringtests
+++ b/tools/testing/selftests/sysctl/run_stringtests
@@ -1,7 +1,7 @@
 #!/bin/sh
 
-SYSCTL="/proc/sys"
-TARGET="${SYSCTL}/kernel/domainname"
+SYSCTL="/proc/sys/debug/test_sysctl/"
+TARGET="${SYSCTL}/string_0001"
 ORIG=$(cat "${TARGET}")
 TEST_STR="Testing sysctl"
 
-- 
GitLab


From 64b671204afd71591e774e7237b7c862ac5bbd97 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:46 -0700
Subject: [PATCH 1319/1958] test_sysctl: add generic script to expand on tests

This adds a generic script to let us more easily add more tests cases.
Since we really have only two types of tests cases just fold them into
the one file.  Each test unit is now identified into its separate
function:

    # ./sysctl.sh -l
  Test ID list:

  TEST_ID x NUM_TEST
  TEST_ID:   Test ID
  NUM_TESTS: Number of recommended times to run the test

  0001 x 1 - tests proc_dointvec_minmax()
  0002 x 1 - tests proc_dostring()

For now we start off with what we had before, and run only each test
once.  We can now watch a test case until it fails:

  ./sysctl.sh -w 0002

We can also run a test case x number of times, say we want to run a test
case 100 times:

  ./sysctl.sh -c 0001 100

To run a test case only once, for example:

  ./sysctl.sh -s 0002

The default settings are specified at the top of sysctl.sh.

Link: http://lkml.kernel.org/r/20170630224431.17374-3-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/sysctl/Makefile       |   3 +-
 tools/testing/selftests/sysctl/common_tests   | 131 -----
 .../testing/selftests/sysctl/run_numerictests |  10 -
 .../testing/selftests/sysctl/run_stringtests  |  77 ---
 tools/testing/selftests/sysctl/sysctl.sh      | 494 ++++++++++++++++++
 5 files changed, 495 insertions(+), 220 deletions(-)
 delete mode 100644 tools/testing/selftests/sysctl/common_tests
 delete mode 100755 tools/testing/selftests/sysctl/run_numerictests
 delete mode 100755 tools/testing/selftests/sysctl/run_stringtests
 create mode 100644 tools/testing/selftests/sysctl/sysctl.sh

diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile
index b3c33e071f100..95c320b354e81 100644
--- a/tools/testing/selftests/sysctl/Makefile
+++ b/tools/testing/selftests/sysctl/Makefile
@@ -4,8 +4,7 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
 all:
 
-TEST_PROGS := run_numerictests run_stringtests
-TEST_FILES := common_tests
+TEST_PROGS := sysctl.sh
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/sysctl/common_tests b/tools/testing/selftests/sysctl/common_tests
deleted file mode 100644
index b6862322962ff..0000000000000
--- a/tools/testing/selftests/sysctl/common_tests
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/bin/sh
-
-TEST_FILE=$(mktemp)
-
-echo "== Testing sysctl behavior against ${TARGET} =="
-
-set_orig()
-{
-	echo "${ORIG}" > "${TARGET}"
-}
-
-set_test()
-{
-	echo "${TEST_STR}" > "${TARGET}"
-}
-
-verify()
-{
-	local seen
-	seen=$(cat "$1")
-	if [ "${seen}" != "${TEST_STR}" ]; then
-		return 1
-	fi
-	return 0
-}
-
-exit_test()
-{
-	if [ ! -z ${old_strict} ]; then
-		echo ${old_strict} > ${WRITES_STRICT}
-	fi
-	exit $rc
-}
-
-trap 'set_orig; rm -f "${TEST_FILE}"' EXIT
-
-rc=0
-
-echo -n "Writing test file ... "
-echo "${TEST_STR}" > "${TEST_FILE}"
-if ! verify "${TEST_FILE}"; then
-	echo "FAIL" >&2
-	exit 1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl is not set to test value ... "
-if verify "${TARGET}"; then
-	echo "FAIL" >&2
-	exit 1
-else
-	echo "ok"
-fi
-
-echo -n "Writing sysctl from shell ... "
-set_test
-if ! verify "${TARGET}"; then
-	echo "FAIL" >&2
-	exit 1
-else
-	echo "ok"
-fi
-
-echo -n "Resetting sysctl to original value ... "
-set_orig
-if verify "${TARGET}"; then
-	echo "FAIL" >&2
-	exit 1
-else
-	echo "ok"
-fi
-
-echo -n "Checking write strict setting ... "
-WRITES_STRICT="${SYSCTL}/kernel/sysctl_writes_strict"
-if [ ! -e ${WRITES_STRICT} ]; then
-	echo "FAIL, but skip in case of old kernel" >&2
-else
-	old_strict=$(cat ${WRITES_STRICT})
-	if [ "$old_strict" = "1" ]; then
-		echo "ok"
-	else
-		echo "FAIL, strict value is 0 but force to 1 to continue" >&2
-		echo "1" > ${WRITES_STRICT}
-	fi
-fi
-
-# Now that we've validated the sanity of "set_test" and "set_orig",
-# we can use those functions to set starting states before running
-# specific behavioral tests.
-
-echo -n "Writing entire sysctl in single write ... "
-set_orig
-dd if="${TEST_FILE}" of="${TARGET}" bs=4096 2>/dev/null
-if ! verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Writing middle of sysctl after synchronized seek ... "
-set_test
-dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 skip=1 2>/dev/null
-if ! verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Writing beyond end of sysctl ... "
-set_orig
-dd if="${TEST_FILE}" of="${TARGET}" bs=20 seek=2 2>/dev/null
-if verify "${TARGET}"; then
-        echo "FAIL" >&2
-        rc=1
-else
-        echo "ok"
-fi
-
-echo -n "Writing sysctl with multiple long writes ... "
-set_orig
-(perl -e 'print "A" x 50;'; echo "${TEST_STR}") | \
-	dd of="${TARGET}" bs=50 2>/dev/null
-if verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
diff --git a/tools/testing/selftests/sysctl/run_numerictests b/tools/testing/selftests/sysctl/run_numerictests
deleted file mode 100755
index c375ce0f4c158..0000000000000
--- a/tools/testing/selftests/sysctl/run_numerictests
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-SYSCTL="/proc/sys/debug/test_sysctl/"
-TARGET="${SYSCTL}/int_0001"
-ORIG=$(cat "${TARGET}")
-TEST_STR=$(( $ORIG + 1 ))
-
-. ./common_tests
-
-exit_test
diff --git a/tools/testing/selftests/sysctl/run_stringtests b/tools/testing/selftests/sysctl/run_stringtests
deleted file mode 100755
index a6f2618afeaac..0000000000000
--- a/tools/testing/selftests/sysctl/run_stringtests
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/sh
-
-SYSCTL="/proc/sys/debug/test_sysctl/"
-TARGET="${SYSCTL}/string_0001"
-ORIG=$(cat "${TARGET}")
-TEST_STR="Testing sysctl"
-
-. ./common_tests
-
-# Only string sysctls support seeking/appending.
-MAXLEN=65
-
-echo -n "Writing entire sysctl in short writes ... "
-set_orig
-dd if="${TEST_FILE}" of="${TARGET}" bs=1 2>/dev/null
-if ! verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Writing middle of sysctl after unsynchronized seek ... "
-set_test
-dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 2>/dev/null
-if verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
-set_orig
-perl -e 'print "A" x ('"${MAXLEN}"'-2), "B";' | \
-	dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
-if ! grep -q B "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl keeps original string on overflow append ... "
-set_orig
-perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
-	dd of="${TARGET}" bs=$(( MAXLEN - 1 )) 2>/dev/null
-if grep -q B "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl stays NULL terminated on write ... "
-set_orig
-perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
-	dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
-if grep -q B "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl stays NULL terminated on overwrite ... "
-set_orig
-perl -e 'print "A" x ('"${MAXLEN}"'-1), "BB";' | \
-	dd of="${TARGET}" bs=$(( $MAXLEN + 1 )) 2>/dev/null
-if grep -q B "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-exit_test
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
new file mode 100644
index 0000000000000..cbe1345d7c1d7
--- /dev/null
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -0,0 +1,494 @@
+#!/bin/bash
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This performs a series tests against the proc sysctl interface.
+
+TEST_NAME="sysctl"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+TEST_FILE=$(mktemp)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:1:1"
+ALL_TESTS="$ALL_TESTS 0002:1:1"
+
+test_modprobe()
+{
+       if [ ! -d $DIR ]; then
+               echo "$0: $DIR not present" >&2
+               echo "You must have the following enabled in your kernel:" >&2
+               cat $TEST_DIR/config >&2
+               exit 1
+       fi
+}
+
+function allow_user_defaults()
+{
+	if [ -z $DIR ]; then
+		DIR="/sys/module/test_sysctl/"
+	fi
+	if [ -z $DEFAULT_NUM_TESTS ]; then
+		DEFAULT_NUM_TESTS=50
+	fi
+	if [ -z $SYSCTL ]; then
+		SYSCTL="/proc/sys/debug/test_sysctl"
+	fi
+	if [ -z $PROD_SYSCTL ]; then
+		PROD_SYSCTL="/proc/sys"
+	fi
+	if [ -z $WRITES_STRICT ]; then
+		WRITES_STRICT="${PROD_SYSCTL}/kernel/sysctl_writes_strict"
+	fi
+}
+
+function check_production_sysctl_writes_strict()
+{
+	echo -n "Checking production write strict setting ... "
+	if [ ! -e ${WRITES_STRICT} ]; then
+		echo "FAIL, but skip in case of old kernel" >&2
+	else
+		old_strict=$(cat ${WRITES_STRICT})
+		if [ "$old_strict" = "1" ]; then
+			echo "ok"
+		else
+			echo "FAIL, strict value is 0 but force to 1 to continue" >&2
+			echo "1" > ${WRITES_STRICT}
+		fi
+	fi
+}
+
+test_reqs()
+{
+	uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo $msg must be run as root >&2
+		exit 0
+	fi
+
+	if ! which perl 2> /dev/null > /dev/null; then
+		echo "$0: You need perl installed"
+		exit 1
+	fi
+}
+
+function load_req_mod()
+{
+	trap "test_modprobe" EXIT
+
+	if [ ! -d $DIR ]; then
+		modprobe $TEST_DRIVER
+		if [ $? -ne 0 ]; then
+			exit
+		fi
+	fi
+}
+
+set_orig()
+{
+	if [ ! -z $TARGET ]; then
+		echo "${ORIG}" > "${TARGET}"
+	fi
+}
+
+set_test()
+{
+	echo "${TEST_STR}" > "${TARGET}"
+}
+
+verify()
+{
+	local seen
+	seen=$(cat "$1")
+	if [ "${seen}" != "${TEST_STR}" ]; then
+		return 1
+	fi
+	return 0
+}
+
+test_rc()
+{
+	if [[ $rc != 0 ]]; then
+		echo "Failed test, return value: $rc" >&2
+		exit $rc
+	fi
+}
+
+test_finish()
+{
+	set_orig
+	rm -f "${TEST_FILE}"
+
+	if [ ! -z ${old_strict} ]; then
+		echo ${old_strict} > ${WRITES_STRICT}
+	fi
+	exit $rc
+}
+
+run_numerictests()
+{
+	echo "== Testing sysctl behavior against ${TARGET} =="
+
+	rc=0
+
+	echo -n "Writing test file ... "
+	echo "${TEST_STR}" > "${TEST_FILE}"
+	if ! verify "${TEST_FILE}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl is not set to test value ... "
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing sysctl from shell ... "
+	set_test
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Resetting sysctl to original value ... "
+	set_orig
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	# Now that we've validated the sanity of "set_test" and "set_orig",
+	# we can use those functions to set starting states before running
+	# specific behavioral tests.
+
+	echo -n "Writing entire sysctl in single write ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=4096 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing middle of sysctl after synchronized seek ... "
+	set_test
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 skip=1 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing beyond end of sysctl ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=20 seek=2 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing sysctl with multiple long writes ... "
+	set_orig
+	(perl -e 'print "A" x 50;'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" bs=50 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	test_rc
+}
+
+run_stringtests()
+{
+	echo -n "Writing entire sysctl in short writes ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing middle of sysctl after unsynchronized seek ... "
+	set_test
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-2), "B";' | \
+		dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+	if ! grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl keeps original string on overflow append ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+		dd of="${TARGET}" bs=$(( MAXLEN - 1 )) 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl stays NULL terminated on write ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+		dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl stays NULL terminated on overwrite ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "BB";' | \
+		dd of="${TARGET}" bs=$(( $MAXLEN + 1 )) 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	test_rc
+}
+
+sysctl_test_0001()
+{
+	TARGET="${SYSCTL}/int_0001"
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+}
+
+sysctl_test_0002()
+{
+	TARGET="${SYSCTL}/string_0001"
+	ORIG=$(cat "${TARGET}")
+	TEST_STR="Testing sysctl"
+	# Only string sysctls support seeking/appending.
+	MAXLEN=65
+
+	run_numerictests
+	run_stringtests
+}
+
+list_tests()
+{
+	echo "Test ID list:"
+	echo
+	echo "TEST_ID x NUM_TEST"
+	echo "TEST_ID:   Test ID"
+	echo "NUM_TESTS: Number of recommended times to run the test"
+	echo
+	echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
+	echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
+}
+
+test_reqs
+
+usage()
+{
+	NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+	let NUM_TESTS=$NUM_TESTS+1
+	MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+	echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+	echo "		 [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+	echo "           [ all ] [ -h | --help ] [ -l ]"
+	echo ""
+	echo "Valid tests: 0001-$MAX_TEST"
+	echo ""
+	echo "    all     Runs all tests (default)"
+	echo "    -t      Run test ID the number amount of times is recommended"
+	echo "    -w      Watch test ID run until it runs into an error"
+	echo "    -c      Run test ID once"
+	echo "    -s      Run test ID x test-count number of times"
+	echo "    -l      List all test ID list"
+	echo " -h|--help  Help"
+	echo
+	echo "If an error every occurs execution will immediately terminate."
+	echo "If you are adding a new test try using -w <test-ID> first to"
+	echo "make sure the test passes a series of tests."
+	echo
+	echo Example uses:
+	echo
+	echo "$TEST_NAME.sh            -- executes all tests"
+	echo "$TEST_NAME.sh -t 0002    -- Executes test ID 0002 number of times is recomended"
+	echo "$TEST_NAME.sh -w 0002    -- Watch test ID 0002 run until an error occurs"
+	echo "$TEST_NAME.sh -s 0002    -- Run test ID 0002 once"
+	echo "$TEST_NAME.sh -c 0002 3  -- Run test ID 0002 three times"
+	echo
+	list_tests
+	exit 1
+}
+
+function test_num()
+{
+	re='^[0-9]+$'
+	if ! [[ $1 =~ $re ]]; then
+		usage
+	fi
+}
+
+function get_test_count()
+{
+	test_num $1
+	TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+	LAST_TWO=${TEST_DATA#*:*}
+	echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+	test_num $1
+	TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+	echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+	for i in $ALL_TESTS ; do
+		TEST_ID=${i%:*:*}
+		ENABLED=$(get_test_enabled $TEST_ID)
+		TEST_COUNT=$(get_test_count $TEST_ID)
+		if [[ $ENABLED -eq "1" ]]; then
+			test_case $TEST_ID $TEST_COUNT
+		fi
+	done
+}
+
+function watch_log()
+{
+	if [ $# -ne 3 ]; then
+		clear
+	fi
+	date
+	echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+	i=0
+	while [ 1 ]; do
+
+		if [ $# -eq 1 ]; then
+			test_num $1
+			watch_log $i ${TEST_NAME}_test_$1
+			${TEST_NAME}_test_$1
+		else
+			watch_log $i all
+			run_all_tests
+		fi
+		let i=$i+1
+	done
+}
+
+function test_case()
+{
+	NUM_TESTS=$DEFAULT_NUM_TESTS
+	if [ $# -eq 2 ]; then
+		NUM_TESTS=$2
+	fi
+
+	i=0
+	while [ $i -lt $NUM_TESTS ]; do
+		test_num $1
+		watch_log $i ${TEST_NAME}_test_$1 noclear
+		RUN_TEST=${TEST_NAME}_test_$1
+		$RUN_TEST
+		let i=$i+1
+	done
+}
+
+function parse_args()
+{
+	if [ $# -eq 0 ]; then
+		run_all_tests
+	else
+		if [[ "$1" = "all" ]]; then
+			run_all_tests
+		elif [[ "$1" = "-w" ]]; then
+			shift
+			watch_case $@
+		elif [[ "$1" = "-t" ]]; then
+			shift
+			test_num $1
+			test_case $1 $(get_test_count $1)
+		elif [[ "$1" = "-c" ]]; then
+			shift
+			test_num $1
+			test_num $2
+			test_case $1 $2
+		elif [[ "$1" = "-s" ]]; then
+			shift
+			test_case $1 1
+		elif [[ "$1" = "-l" ]]; then
+			list_tests
+		elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+			usage
+		else
+			usage
+		fi
+	fi
+}
+
+test_reqs
+allow_user_defaults
+check_production_sysctl_writes_strict
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0
-- 
GitLab


From 1c0357c846452add7c2c863ec372010e3d2ca943 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:49 -0700
Subject: [PATCH 1320/1958] test_sysctl: test against PAGE_SIZE for int

Add the following tests to ensure we do not regress:

  o Test using a buffer full of space (PAGE_SIZE-1) followed by a
    single digit works

  o Test using a buffer full of spaces (PAGE_SIZE or over) will fail

As tests increase instead of unloading the module and reloading it we
can just do a shell reset_vals() with a reset to values we know are set
at init on the driver.

Link: http://lkml.kernel.org/r/20170630224431.17374-4-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/sysctl/sysctl.sh | 66 ++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index cbe1345d7c1d7..6ec807576f7c9 100644
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -75,6 +75,13 @@ function check_production_sysctl_writes_strict()
 			echo "1" > ${WRITES_STRICT}
 		fi
 	fi
+
+	if [ -z $PAGE_SIZE ]; then
+		PAGE_SIZE=$(getconf PAGESIZE)
+	fi
+	if [ -z $MAX_DIGITS ]; then
+		MAX_DIGITS=$(($PAGE_SIZE/8))
+	fi
 }
 
 test_reqs()
@@ -89,6 +96,10 @@ test_reqs()
 		echo "$0: You need perl installed"
 		exit 1
 	fi
+	if ! which getconf 2> /dev/null > /dev/null; then
+		echo "$0: You need getconf installed"
+		exit 1
+	fi
 }
 
 function load_req_mod()
@@ -103,6 +114,23 @@ function load_req_mod()
 	fi
 }
 
+reset_vals()
+{
+	VAL=""
+	TRIGGER=$(basename ${TARGET})
+	case "$TRIGGER" in
+		int_0001)
+			VAL="60"
+			;;
+		string_0001)
+			VAL="(none)"
+			;;
+		*)
+			;;
+	esac
+	echo -n $VAL > $TARGET
+}
+
 set_orig()
 {
 	if [ ! -z $TARGET ]; then
@@ -229,7 +257,42 @@ run_numerictests()
 	else
 		echo "ok"
 	fi
+	test_rc
+}
+
+# Your test must accept digits 3 and 4 to use this
+run_limit_digit()
+{
+	echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ..."
+	reset_vals
 
+	LIMIT=$((MAX_DIGITS -1))
+	TEST_STR="3"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Checking passing PAGE_SIZE of spaces fails on write ..."
+	reset_vals
+
+	LIMIT=$((MAX_DIGITS))
+	TEST_STR="4"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
 	test_rc
 }
 
@@ -305,15 +368,18 @@ run_stringtests()
 sysctl_test_0001()
 {
 	TARGET="${SYSCTL}/int_0001"
+	reset_vals
 	ORIG=$(cat "${TARGET}")
 	TEST_STR=$(( $ORIG + 1 ))
 
 	run_numerictests
+	run_limit_digit
 }
 
 sysctl_test_0002()
 {
 	TARGET="${SYSCTL}/string_0001"
+	reset_vals
 	ORIG=$(cat "${TARGET}")
 	TEST_STR="Testing sysctl"
 	# Only string sysctls support seeking/appending.
-- 
GitLab


From eb965eda1cabf26e62afc07a06cdf2fd5aaa2906 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:52 -0700
Subject: [PATCH 1321/1958] test_sysctl: add simple proc_dointvec() case

Test against a simple proc_dointvec() case.  While at it, add a test
against INT_MAX.  Make sure INT_MAX works, and INT_MAX+1 will fail.
Also test negative values work.

Link: http://lkml.kernel.org/r/20170630224431.17374-5-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_sysctl.c                        | 11 +++++
 tools/testing/selftests/sysctl/sysctl.sh | 62 ++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index b2163bfb6eb23..1472e1ae49317 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -41,11 +41,15 @@ static int i_one_hundred = 100;
 
 struct test_sysctl_data {
 	int int_0001;
+	int int_0002;
+
 	char string_0001[65];
 };
 
 static struct test_sysctl_data test_data = {
 	.int_0001 = 60,
+	.int_0002 = 1,
+
 	.string_0001 = "(none)",
 };
 
@@ -60,6 +64,13 @@ static struct ctl_table test_table[] = {
 		.extra1		= &i_zero,
 		.extra2         = &i_one_hundred,
 	},
+	{
+		.procname	= "int_0002",
+		.data		= &test_data.int_0002,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "string_0001",
 		.data		= &test_data.string_0001,
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 6ec807576f7c9..7ba3fa2bbd548 100644
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -31,6 +31,7 @@ TEST_FILE=$(mktemp)
 # we have tons of space.
 ALL_TESTS="0001:1:1"
 ALL_TESTS="$ALL_TESTS 0002:1:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
 
 test_modprobe()
 {
@@ -82,6 +83,9 @@ function check_production_sysctl_writes_strict()
 	if [ -z $MAX_DIGITS ]; then
 		MAX_DIGITS=$(($PAGE_SIZE/8))
 	fi
+	if [ -z $INT_MAX ]; then
+		INT_MAX=$(getconf INT_MAX)
+	fi
 }
 
 test_reqs()
@@ -122,6 +126,9 @@ reset_vals()
 		int_0001)
 			VAL="60"
 			;;
+		int_0002)
+			VAL="1"
+			;;
 		string_0001)
 			VAL="(none)"
 			;;
@@ -296,6 +303,48 @@ run_limit_digit()
 	test_rc
 }
 
+# You are using an int
+run_limit_digit_int()
+{
+	echo -n "Testing INT_MAX works ..."
+	reset_vals
+	TEST_STR="$INT_MAX"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing INT_MAX + 1 will fail as expected..."
+	reset_vals
+	let TEST_STR=$INT_MAX+1
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing negative values will work as expected..."
+	reset_vals
+	TEST_STR="-3"
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
 run_stringtests()
 {
 	echo -n "Writing entire sysctl in short writes ... "
@@ -389,6 +438,18 @@ sysctl_test_0002()
 	run_stringtests
 }
 
+sysctl_test_0003()
+{
+	TARGET="${SYSCTL}/int_0002"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+	run_limit_digit
+	run_limit_digit_int
+}
+
 list_tests()
 {
 	echo "Test ID list:"
@@ -399,6 +460,7 @@ list_tests()
 	echo
 	echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
 	echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
+	echo "0003 x $(get_test_count 0003) - tests proc_dointvec()"
 }
 
 test_reqs
-- 
GitLab


From 2920fad3a5d394b66011c7f35c7b05278354055e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:55 -0700
Subject: [PATCH 1322/1958] test_sysctl: add simple proc_douintvec() case

Test against a simple proc_douintvec() case.  While at it, add a test
against UINT_MAX.  Make sure UINT_MAX works, and UINT_MAX+1 will fail
and that negative values are not accepted.

Link: http://lkml.kernel.org/r/20170630224431.17374-6-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_sysctl.c                        | 11 +++++
 tools/testing/selftests/sysctl/sysctl.sh | 63 ++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 1472e1ae49317..53db3513ab082 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -43,6 +43,8 @@ struct test_sysctl_data {
 	int int_0001;
 	int int_0002;
 
+	unsigned int uint_0001;
+
 	char string_0001[65];
 };
 
@@ -50,6 +52,8 @@ static struct test_sysctl_data test_data = {
 	.int_0001 = 60,
 	.int_0002 = 1,
 
+	.uint_0001 = 314,
+
 	.string_0001 = "(none)",
 };
 
@@ -71,6 +75,13 @@ static struct ctl_table test_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "uint_0001",
+		.data		= &test_data.uint_0001,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec,
+	},
 	{
 		.procname	= "string_0001",
 		.data		= &test_data.string_0001,
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 7ba3fa2bbd548..abeef675a8845 100644
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -32,6 +32,7 @@ TEST_FILE=$(mktemp)
 ALL_TESTS="0001:1:1"
 ALL_TESTS="$ALL_TESTS 0002:1:1"
 ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
 
 test_modprobe()
 {
@@ -86,6 +87,9 @@ function check_production_sysctl_writes_strict()
 	if [ -z $INT_MAX ]; then
 		INT_MAX=$(getconf INT_MAX)
 	fi
+	if [ -z $UINT_MAX ]; then
+		UINT_MAX=$(getconf UINT_MAX)
+	fi
 }
 
 test_reqs()
@@ -129,6 +133,9 @@ reset_vals()
 		int_0002)
 			VAL="1"
 			;;
+		uint_0001)
+			VAL="314"
+			;;
 		string_0001)
 			VAL="(none)"
 			;;
@@ -345,6 +352,49 @@ run_limit_digit_int()
 	test_rc
 }
 
+# You are using an unsigned int
+run_limit_digit_uint()
+{
+	echo -n "Testing UINT_MAX works ..."
+	reset_vals
+	TEST_STR="$UINT_MAX"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing UINT_MAX + 1 will fail as expected..."
+	reset_vals
+	TEST_STR=$(($UINT_MAX+1))
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing negative values will not work as expected ..."
+	reset_vals
+	TEST_STR="-3"
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
 run_stringtests()
 {
 	echo -n "Writing entire sysctl in short writes ... "
@@ -450,6 +500,18 @@ sysctl_test_0003()
 	run_limit_digit_int
 }
 
+sysctl_test_0004()
+{
+	TARGET="${SYSCTL}/uint_0001"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+	run_limit_digit
+	run_limit_digit_uint
+}
+
 list_tests()
 {
 	echo "Test ID list:"
@@ -461,6 +523,7 @@ list_tests()
 	echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
 	echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
 	echo "0003 x $(get_test_count 0003) - tests proc_dointvec()"
+	echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
 }
 
 test_reqs
-- 
GitLab


From 7c43a657a4beadeb6d2fe1a00732261e313a807f Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 12 Jul 2017 14:33:58 -0700
Subject: [PATCH 1323/1958] test_sysctl: test against int proc_dointvec() array
 support

Add a few initial respective tests for an array:

  o Echoing values separated by spaces works
  o Echoing only first elements will set first elements
  o Confirm PAGE_SIZE limit still applies even if an array is used

Link: http://lkml.kernel.org/r/20170630224431.17374-7-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_sysctl.c                        | 13 ++++
 tools/testing/selftests/sysctl/sysctl.sh | 89 ++++++++++++++++++++++++
 2 files changed, 102 insertions(+)

diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 53db3513ab082..3dd801c1c85b3 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -42,6 +42,7 @@ static int i_one_hundred = 100;
 struct test_sysctl_data {
 	int int_0001;
 	int int_0002;
+	int int_0003[4];
 
 	unsigned int uint_0001;
 
@@ -52,6 +53,11 @@ static struct test_sysctl_data test_data = {
 	.int_0001 = 60,
 	.int_0002 = 1,
 
+	.int_0003[0] = 0,
+	.int_0003[1] = 1,
+	.int_0003[2] = 2,
+	.int_0003[3] = 3,
+
 	.uint_0001 = 314,
 
 	.string_0001 = "(none)",
@@ -75,6 +81,13 @@ static struct ctl_table test_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "int_0003",
+		.data		= &test_data.int_0003,
+		.maxlen		= sizeof(test_data.int_0003),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "uint_0001",
 		.data		= &test_data.uint_0001,
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index abeef675a8845..ec232c3cfcaac 100644
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -33,6 +33,7 @@ ALL_TESTS="0001:1:1"
 ALL_TESTS="$ALL_TESTS 0002:1:1"
 ALL_TESTS="$ALL_TESTS 0003:1:1"
 ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:3:1"
 
 test_modprobe()
 {
@@ -108,6 +109,10 @@ test_reqs()
 		echo "$0: You need getconf installed"
 		exit 1
 	fi
+	if ! which diff 2> /dev/null > /dev/null; then
+		echo "$0: You need diff installed"
+		exit 1
+	fi
 }
 
 function load_req_mod()
@@ -167,6 +172,12 @@ verify()
 	return 0
 }
 
+verify_diff_w()
+{
+	echo "$TEST_STR" | diff -q -w -u - $1
+	return $?
+}
+
 test_rc()
 {
 	if [[ $rc != 0 ]]; then
@@ -352,6 +363,74 @@ run_limit_digit_int()
 	test_rc
 }
 
+# You used an int array
+run_limit_digit_int_array()
+{
+	echo -n "Testing array works as expected ... "
+	TEST_STR="4 3 2 1"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing skipping trailing array elements works ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# If we only echo in two digits the last two are left intact
+	TEST_STR="100 101"
+	echo -n $TEST_STR > $TARGET
+	# After we echo in, to help diff we need to set on TEST_STR what
+	# we expect the result to be.
+	TEST_STR="100 101 2 1"
+
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing PAGE_SIZE limit on array works ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# Even if you use an int array, you are still restricted to
+	# MAX_DIGITS, this is a known limitation. Test limit works.
+	LIMIT=$((MAX_DIGITS -1))
+	TEST_STR="9"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	TEST_STR="9 101 2 1"
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing exceeding PAGE_SIZE limit fails as expected ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# Now go over limit.
+	LIMIT=$((MAX_DIGITS))
+	TEST_STR="7"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	TEST_STR="7 101 2 1"
+	if verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
 # You are using an unsigned int
 run_limit_digit_uint()
 {
@@ -512,6 +591,15 @@ sysctl_test_0004()
 	run_limit_digit_uint
 }
 
+sysctl_test_0005()
+{
+	TARGET="${SYSCTL}/int_0003"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+
+	run_limit_digit_int_array
+}
+
 list_tests()
 {
 	echo "Test ID list:"
@@ -524,6 +612,7 @@ list_tests()
 	echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
 	echo "0003 x $(get_test_count 0003) - tests proc_dointvec()"
 	echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
+	echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
 }
 
 test_reqs
-- 
GitLab


From 9380fa60b10ebd6ee7c3fcdb2cf162f4d7cf9fc5 Mon Sep 17 00:00:00 2001
From: Mateusz Jurczyk <mjurczyk@google.com>
Date: Wed, 12 Jul 2017 14:34:01 -0700
Subject: [PATCH 1324/1958] kernel/sysctl_binary.c: check name array length in
 deprecated_sysctl_warning()

Prevent use of uninitialized memory (originating from the stack frame of
do_sysctl()) by verifying that the name array is filled with sufficient
input data before comparing its specific entries with integer constants.

Through timing measurement or analyzing the kernel debug logs, a
user-mode program could potentially infer the results of comparisons
against the uninitialized memory, and acquire some (very limited)
information about the state of the kernel stack.  The change also
eliminates possible future warnings by tools such as KMSAN and other
code checkers / instrumentations.

Link: http://lkml.kernel.org/r/20170524122139.21333-1-mjurczyk@google.com
Signed-off-by: Mateusz Jurczyk <mjurczyk@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Matthew Whitehead <tedheadster@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl_binary.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 939a158eab11d..02e1859f2ca82 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1346,7 +1346,7 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
 	 * CTL_KERN/KERN_VERSION is used by older glibc and cannot
 	 * ever go away.
 	 */
-	if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
+	if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
 		return;
 
 	if (printk_ratelimit()) {
-- 
GitLab


From ee7998c50c2697737c6530431709f77c852bf0d6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:34:04 -0700
Subject: [PATCH 1325/1958] random: do not ignore early device randomness

The add_device_randomness() function would ignore incoming bytes if the
crng wasn't ready.  This additionally makes sure to make an early enough
call to add_latent_entropy() to influence the initial stack canary,
which is especially important on non-x86 systems where it stays the same
through the life of the boot.

Link: http://lkml.kernel.org/r/20170626233038.GA48751@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Lokesh Vutla <lokeshvutla@ti.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/random.c | 5 +++++
 init/main.c           | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 01a260f674374..23cab7a8c1c19 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -987,6 +987,11 @@ void add_device_randomness(const void *buf, unsigned int size)
 	unsigned long time = random_get_entropy() ^ jiffies;
 	unsigned long flags;
 
+	if (!crng_ready()) {
+		crng_fast_load(buf, size);
+		return;
+	}
+
 	trace_add_device_randomness(size, _RET_IP_);
 	spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(&input_pool, buf, size);
diff --git a/init/main.c b/init/main.c
index df58a416dd1da..052481fbe3633 100644
--- a/init/main.c
+++ b/init/main.c
@@ -518,6 +518,7 @@ asmlinkage __visible void __init start_kernel(void)
 	/*
 	 * Set up the initial canary ASAP:
 	 */
+	add_latent_entropy();
 	boot_init_stack_canary();
 
 	cgroup_init_early();
-- 
GitLab


From 5f9f48f5b385457426b4b8f3f4a604019a8e7350 Mon Sep 17 00:00:00 2001
From: Rakesh Pandit <rakesh@tuxera.com>
Date: Wed, 12 Jul 2017 14:34:07 -0700
Subject: [PATCH 1326/1958] bfs: fix sanity checks for empty files

Mount fails if file system image has empty files because of sanity check
while reading superblock.  For empty files disk offset to end of file
(i_eoffset) is cpu_to_le32(-1).  Sanity check comparison, which compares
disk offset with file system size isn't valid for this value and hence
is ignored with this patch.

Steps to reproduce:

  $  dd if=/dev/zero of=bfs-image count=204800
  $  mkfs.bfs bfs-image
  $  mkdir bfs-mount-point
  $  sudo mount -t bfs -o loop bfs-image bfs-mount-point/
  $  cd bfs-mount-point/
  $  sudo touch a
  $  cd ..
  $  sudo umount bfs-mount-point/
  $  sudo mount -t bfs -o loop bfs-image bfs-mount-point/
  mount: /dev/loop0: can't read superblock

  $  dmesg
  [25526.689580] BFS-fs: bfs_fill_super(): Inode 0x00000003 corrupted

Tigran said:
 "If you had created the filesystem with the proper mkfs under SCO
  UnixWare 7 you (probably) wouldn't encounter this issue. But since
  commercial Unix-es are now part of history and the only proper way is
  the Linux mkfs.bfs utility, your patch is fine"

Link: http://lkml.kernel.org/r/20170505201625.GA3097@hercules.tuxera.com
Signed-off-by: Rakesh Pandit <rakesh@tuxera.com>
Acked-by: Tigran Aivazian <aivazian.tigran@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/bfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 25e312cb60716..9a69392f1fb37 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -419,7 +419,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 		if (i_sblock > info->si_blocks ||
 			i_eblock > info->si_blocks ||
 			i_sblock > i_eblock ||
-			i_eoff > s_size ||
+			(i_eoff != le32_to_cpu(-1) && i_eoff > s_size) ||
 			i_sblock * BFS_BSIZE > i_eoff) {
 
 			printf("Inode 0x%08x corrupted\n", i);
-- 
GitLab


From 59224ac1cf9cacf76a82266854921ca6c9887f20 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 12 Jul 2017 14:34:10 -0700
Subject: [PATCH 1327/1958] fs/Kconfig: kill CONFIG_PERCPU_RWSEM some more

As of commit bf3eac84c42d ("percpu-rwsem: kill CONFIG_PERCPU_RWSEM") we
unconditionally build pcpu-rwsems.  Remove a leftover in for
FILE_LOCKING.

Link: http://lkml.kernel.org/r/20170518180115.2794-1-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index b0e42b6a96b97..7aee6d699fd6b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -80,7 +80,6 @@ config EXPORTFS_BLOCK_OPS
 config FILE_LOCKING
 	bool "Enable POSIX file locking API" if EXPERT
 	default y
-	select PERCPU_RWSEM
 	help
 	  This option enables standard file locking support, required
           for filesystems like NFS and for the flock() system
-- 
GitLab


From 821f74402a4c67de63cb6bab5bae7c7a3b298ac2 Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Wed, 12 Jul 2017 14:34:13 -0700
Subject: [PATCH 1328/1958] scripts/gdb: add lx-fdtdump command

lx-fdtdump dumps the flattened device tree passed to the kernel from the
bootloader to the filename specified as the command argument.  If no
argument is provided it defaults to fdtdump.dtb.  This then allows
further post processing on the machine running GDB.  The fdt header is
also also printed in the GDB console.  For example:

  (gdb) lx-fdtdump
  fdt_magic:         0xD00DFEED
  fdt_totalsize:     0xC108
  off_dt_struct:     0x38
  off_dt_strings:    0x3804
  off_mem_rsvmap:    0x28
  version:           17
  last_comp_version: 16
  Dumped fdt to fdtdump.dtb

  >fdtdump fdtdump.dtb | less

This command is useful as the bootloader can often re-write parts of the
device tree, and this can sometimes cause the kernel to not boot.

Link: http://lkml.kernel.org/r/1481280065-5336-2-git-send-email-kbingham@kernel.org
Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Kieran Bingham <kbingham@kernel.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/gdb/linux/constants.py.in |  7 +++
 scripts/gdb/linux/proc.py         | 73 +++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index 7986f4e0da123..7aad824064225 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -14,6 +14,7 @@
 
 #include <linux/fs.h>
 #include <linux/mount.h>
+#include <linux/of_fdt.h>
 
 /* We need to stringify expanded macros so that they can be parsed */
 
@@ -50,3 +51,9 @@ LX_VALUE(MNT_NOEXEC)
 LX_VALUE(MNT_NOATIME)
 LX_VALUE(MNT_NODIRATIME)
 LX_VALUE(MNT_RELATIME)
+
+/* linux/of_fdt.h> */
+LX_VALUE(OF_DT_HEADER)
+
+/* Kernel Configs */
+LX_CONFIG(CONFIG_OF)
diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py
index 38b1f09d1cd95..086d27223c0cf 100644
--- a/scripts/gdb/linux/proc.py
+++ b/scripts/gdb/linux/proc.py
@@ -16,6 +16,7 @@ from linux import constants
 from linux import utils
 from linux import tasks
 from linux import lists
+from struct import *
 
 
 class LxCmdLine(gdb.Command):
@@ -195,3 +196,75 @@ values of that process namespace"""
                         info_opts(MNT_INFO, m_flags)))
 
 LxMounts()
+
+
+class LxFdtDump(gdb.Command):
+    """Output Flattened Device Tree header and dump FDT blob to the filename
+       specified as the command argument. Equivalent to
+       'cat /proc/fdt > fdtdump.dtb' on a running target"""
+
+    def __init__(self):
+        super(LxFdtDump, self).__init__("lx-fdtdump", gdb.COMMAND_DATA,
+                                        gdb.COMPLETE_FILENAME)
+
+    def fdthdr_to_cpu(self, fdt_header):
+
+        fdt_header_be = ">IIIIIII"
+        fdt_header_le = "<IIIIIII"
+
+        if utils.get_target_endianness() == 1:
+            output_fmt = fdt_header_le
+        else:
+            output_fmt = fdt_header_be
+
+        return unpack(output_fmt, pack(fdt_header_be,
+                                       fdt_header['magic'],
+                                       fdt_header['totalsize'],
+                                       fdt_header['off_dt_struct'],
+                                       fdt_header['off_dt_strings'],
+                                       fdt_header['off_mem_rsvmap'],
+                                       fdt_header['version'],
+                                       fdt_header['last_comp_version']))
+
+    def invoke(self, arg, from_tty):
+
+        if not constants.LX_CONFIG_OF:
+            raise gdb.GdbError("Kernel not compiled with CONFIG_OF\n")
+
+        if len(arg) == 0:
+            filename = "fdtdump.dtb"
+        else:
+            filename = arg
+
+        py_fdt_header_ptr = gdb.parse_and_eval(
+            "(const struct fdt_header *) initial_boot_params")
+        py_fdt_header = py_fdt_header_ptr.dereference()
+
+        fdt_header = self.fdthdr_to_cpu(py_fdt_header)
+
+        if fdt_header[0] != constants.LX_OF_DT_HEADER:
+            raise gdb.GdbError("No flattened device tree magic found\n")
+
+        gdb.write("fdt_magic:         0x{:02X}\n".format(fdt_header[0]))
+        gdb.write("fdt_totalsize:     0x{:02X}\n".format(fdt_header[1]))
+        gdb.write("off_dt_struct:     0x{:02X}\n".format(fdt_header[2]))
+        gdb.write("off_dt_strings:    0x{:02X}\n".format(fdt_header[3]))
+        gdb.write("off_mem_rsvmap:    0x{:02X}\n".format(fdt_header[4]))
+        gdb.write("version:           {}\n".format(fdt_header[5]))
+        gdb.write("last_comp_version: {}\n".format(fdt_header[6]))
+
+        inf = gdb.inferiors()[0]
+        fdt_buf = utils.read_memoryview(inf, py_fdt_header_ptr,
+                                        fdt_header[1]).tobytes()
+
+        try:
+            f = open(filename, 'wb')
+        except:
+            raise gdb.GdbError("Could not open file to dump fdt")
+
+        f.write(fdt_buf)
+        f.close()
+
+        gdb.write("Dumped fdt blob to " + filename + "\n")
+
+LxFdtDump()
-- 
GitLab


From c454756f47277b651ad41a5a163499294529e35d Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Wed, 12 Jul 2017 14:34:16 -0700
Subject: [PATCH 1329/1958] scripts/gdb: lx-dmesg: cast log_buf to void* for
 addr fetch

In some cases it is possible for the str() conversion here to throw
encoding errors because log_buf might not point to valid ascii.  For
example:

  (gdb) python print str(gdb.parse_and_eval("log_buf"))
  Traceback (most recent call last):
    File "<string>", line 1, in <module>
  UnicodeEncodeError: 'ascii' codec can't encode character u'\u0303' in
  	position 24: ordinal not in range(128)

Avoid this by explicitly casting to (void *) inside the gdb expression.

Link: http://lkml.kernel.org/r/ba6f85dbb02ca980ebd0e2399b0649423399b565.1498481469.git.leonard.crestez@nxp.com
Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Reviewed-by: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Kieran Bingham <kieran@ksquared.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/gdb/linux/dmesg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py
index 5afd1098e33a1..f5a030333dfd8 100644
--- a/scripts/gdb/linux/dmesg.py
+++ b/scripts/gdb/linux/dmesg.py
@@ -24,7 +24,7 @@ class LxDmesg(gdb.Command):
 
     def invoke(self, arg, from_tty):
         log_buf_addr = int(str(gdb.parse_and_eval(
-            "'printk.c'::log_buf")).split()[0], 16)
+            "(void *)'printk.c'::log_buf")).split()[0], 16)
         log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
         log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
         log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
-- 
GitLab


From 46d10a094353c05144f3b0530516bdac3ce7c435 Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Wed, 12 Jul 2017 14:34:19 -0700
Subject: [PATCH 1330/1958] scripts/gdb: lx-dmesg: use explicit encoding=utf8
 errors=replace

Use errors=replace because it is never desirable for lx-dmesg to fail on
string decoding errors, not even if the log buffer is corrupt and we
show incorrect info.

The kernel will sometimes print utf8, for example the copyright symbol
from jffs2.  In order to make this work specify 'utf8' everywhere
because python2 otherwise defaults to 'ascii'.

In theory the second errors='replace' is not be required because
everything that can be decoded as utf8 should also be encodable back to
utf8.  But it's better to be extra safe here.  It's worth noting that
this is definitely not true for encoding='ascii', unknown characters are
replaced with U+FFFD REPLACEMENT CHARACTER and they fail to encode back
to ascii.

Link: http://lkml.kernel.org/r/acee067f3345954ed41efb77b80eebdc038619c6.1498481469.git.leonard.crestez@nxp.com
Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Acked-by: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Kieran Bingham <kieran@ksquared.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/gdb/linux/dmesg.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py
index f5a030333dfd8..6d2e09a2ad2f9 100644
--- a/scripts/gdb/linux/dmesg.py
+++ b/scripts/gdb/linux/dmesg.py
@@ -12,6 +12,7 @@
 #
 
 import gdb
+import sys
 
 from linux import utils
 
@@ -52,13 +53,19 @@ class LxDmesg(gdb.Command):
                 continue
 
             text_len = utils.read_u16(log_buf[pos + 10:pos + 12])
-            text = log_buf[pos + 16:pos + 16 + text_len].decode()
+            text = log_buf[pos + 16:pos + 16 + text_len].decode(
+                encoding='utf8', errors='replace')
             time_stamp = utils.read_u64(log_buf[pos:pos + 8])
 
             for line in text.splitlines():
-                gdb.write("[{time:12.6f}] {line}\n".format(
+                msg = u"[{time:12.6f}] {line}\n".format(
                     time=time_stamp / 1000000000.0,
-                    line=line))
+                    line=line)
+                # With python2 gdb.write will attempt to convert unicode to
+                # ascii and might fail so pass an utf8-encoded str instead.
+                if sys.hexversion < 0x03000000:
+                    msg = msg.encode(encoding='utf8', errors='replace')
+                gdb.write(msg)
 
             pos += length
 
-- 
GitLab


From 9263969a46fc899092ba4f8c4206fa2340c9a64e Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 12 Jul 2017 14:34:22 -0700
Subject: [PATCH 1331/1958] kfifo: clean up example to not use page_link

This is a layering violation so we replace the uses with calls to
sg_page().  This is a prep patch for replacing page_link and this is one
of the very few uses outside of scatterlist.h.

Link: http://lkml.kernel.org/r/1495663199-22234-1-git-send-email-logang@deltatee.com
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Stephen Bates <sbates@raithlin.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 samples/kfifo/dma-example.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
index aa243db93f01f..be0d4a5fdf536 100644
--- a/samples/kfifo/dma-example.c
+++ b/samples/kfifo/dma-example.c
@@ -75,8 +75,8 @@ static int __init example_init(void)
 	for (i = 0; i < nents; i++) {
 		printk(KERN_INFO
 		"sg[%d] -> "
-		"page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
-			i, sg[i].page_link, sg[i].offset, sg[i].length);
+		"page %p offset 0x%.8x length 0x%.8x\n",
+			i, sg_page(&sg[i]), sg[i].offset, sg[i].length);
 
 		if (sg_is_last(&sg[i]))
 			break;
@@ -104,8 +104,8 @@ static int __init example_init(void)
 	for (i = 0; i < nents; i++) {
 		printk(KERN_INFO
 		"sg[%d] -> "
-		"page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
-			i, sg[i].page_link, sg[i].offset, sg[i].length);
+		"page %p offset 0x%.8x length 0x%.8x\n",
+			i, sg_page(&sg[i]), sg[i].offset, sg[i].length);
 
 		if (sg_is_last(&sg[i]))
 			break;
-- 
GitLab


From 77493f04b74cdff3a61fb3fb14b1f5a71d88fd5f Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 12 Jul 2017 14:34:25 -0700
Subject: [PATCH 1332/1958] procfs: fdinfo: extend information about epoll
 target files

Since it is possbile to have same number in tfd field (say file added,
closed, then nother file dup'ed to same number and added back) it is
imposible to distinguish such target files solely by their numbers.

Strictly speaking regular applications don't need to recognize these
targets at all but for checkpoint/restore sake we need to collect
targets to be able to push them back on restore stage in a proper order.

Thus lets add file position, inode and device number where this target
lays.  This three fields can be used as a primary key for sorting, and
together with kcmp help CRIU can find out an exact file target (from the
whole set of processes being checkpointed).

Link: http://lkml.kernel.org/r/20170424154423.436491881@gmail.com
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 6 +++++-
 fs/eventpoll.c                     | 8 ++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 4cddbce85ac9a..adba21b5ada7b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1786,12 +1786,16 @@ pair provide additional information particular to the objects they represent.
 	pos:	0
 	flags:	02
 	mnt_id:	9
-	tfd:        5 events:       1d data: ffffffffffffffff
+	tfd:        5 events:       1d data: ffffffffffffffff pos:0 ino:61af sdev:7
 
 	where 'tfd' is a target file descriptor number in decimal form,
 	'events' is events mask being watched and the 'data' is data
 	associated with a target [see epoll(7) for more details].
 
+	The 'pos' is current offset of the target file in decimal form
+	[see lseek(2)], 'ino' and 'sdev' are inode and device numbers
+	where target file resides, all in hex format.
+
 	Fsnotify files
 	~~~~~~~~~~~~~~
 	For inotify files the format is the following
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a6d194831ed86..322904c3ebdf9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -960,10 +960,14 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
 	mutex_lock(&ep->mtx);
 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+		struct inode *inode = file_inode(epi->ffd.file);
 
-		seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+		seq_printf(m, "tfd: %8d events: %8x data: %16llx "
+			   " pos:%lli ino:%lx sdev:%x\n",
 			   epi->ffd.fd, epi->event.events,
-			   (long long)epi->event.data);
+			   (long long)epi->event.data,
+			   (long long)epi->ffd.file->f_pos,
+			   inode->i_ino, inode->i_sb->s_dev);
 		if (seq_has_overflowed(m))
 			break;
 	}
-- 
GitLab


From 0791e3644e5ef21646fe565b9061788d05ec71d4 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 12 Jul 2017 14:34:28 -0700
Subject: [PATCH 1333/1958] kcmp: add KCMP_EPOLL_TFD mode to compare epoll
 target files

With current epoll architecture target files are addressed with
file_struct and file descriptor number, where the last is not unique.
Moreover files can be transferred from another process via unix socket,
added into queue and closed then so we won't find this descriptor in the
task fdinfo list.

Thus to checkpoint and restore such processes CRIU needs to find out
where exactly the target file is present to add it into epoll queue.
For this sake one can use kcmp call where some particular target file
from the queue is compared with arbitrary file passed as an argument.

Because epoll target files can have same file descriptor number but
different file_struct a caller should explicitly specify the offset
within.

To test if some particular file is matching entry inside epoll one have
to

 - fill kcmp_epoll_slot structure with epoll file descriptor,
   target file number and target file offset (in case if only
   one target is present then it should be 0)

 - call kcmp as kcmp(pid1, pid2, KCMP_EPOLL_TFD, fd, &kcmp_epoll_slot)
    - the kernel fetch file pointer matching file descriptor @fd of pid1
    - lookups for file struct in epoll queue of pid2 and returns traditional
      0,1,2 result for sorting purpose

Link: http://lkml.kernel.org/r/20170424154423.511592110@gmail.com
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c            | 42 +++++++++++++++++++++++++++++
 include/linux/eventpoll.h |  3 +++
 include/uapi/linux/kcmp.h | 10 +++++++
 kernel/kcmp.c             | 57 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 112 insertions(+)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 322904c3ebdf9..e7e9901c37907 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1077,6 +1077,48 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 	return epir;
 }
 
+static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
+{
+	struct rb_node *rbp;
+	struct epitem *epi;
+
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		epi = rb_entry(rbp, struct epitem, rbn);
+		if (epi->ffd.fd == tfd) {
+			if (toff == 0)
+				return epi;
+			else
+				toff--;
+		}
+		cond_resched();
+	}
+
+	return NULL;
+}
+
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
+				     unsigned long toff)
+{
+	struct file *file_raw;
+	struct eventpoll *ep;
+	struct epitem *epi;
+
+	if (!is_file_epoll(file))
+		return ERR_PTR(-EINVAL);
+
+	ep = file->private_data;
+
+	mutex_lock(&ep->mtx);
+	epi = ep_find_tfd(ep, tfd, toff);
+	if (epi)
+		file_raw = epi->ffd.file;
+	else
+		file_raw = ERR_PTR(-ENOENT);
+	mutex_unlock(&ep->mtx);
+
+	return file_raw;
+}
+
 /*
  * This is the callback that is passed to the wait queue wakeup
  * mechanism. It is called by the stored file descriptors when they
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 6daf6d4971f65..d8625d214ea7a 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -14,6 +14,7 @@
 #define _LINUX_EVENTPOLL_H
 
 #include <uapi/linux/eventpoll.h>
+#include <uapi/linux/kcmp.h>
 
 
 /* Forward declarations to avoid compiler errors */
@@ -22,6 +23,8 @@ struct file;
 
 #ifdef CONFIG_EPOLL
 
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
+
 /* Used to initialize the epoll bits inside the "struct file" */
 static inline void eventpoll_init_file(struct file *file)
 {
diff --git a/include/uapi/linux/kcmp.h b/include/uapi/linux/kcmp.h
index 84df14b373601..481e103da78ed 100644
--- a/include/uapi/linux/kcmp.h
+++ b/include/uapi/linux/kcmp.h
@@ -1,6 +1,8 @@
 #ifndef _UAPI_LINUX_KCMP_H
 #define _UAPI_LINUX_KCMP_H
 
+#include <linux/types.h>
+
 /* Comparison type */
 enum kcmp_type {
 	KCMP_FILE,
@@ -10,8 +12,16 @@ enum kcmp_type {
 	KCMP_SIGHAND,
 	KCMP_IO,
 	KCMP_SYSVSEM,
+	KCMP_EPOLL_TFD,
 
 	KCMP_TYPES,
 };
 
+/* Slot for KCMP_EPOLL_TFD */
+struct kcmp_epoll_slot {
+	__u32 efd;		/* epoll file descriptor */
+	__u32 tfd;		/* target file number */
+	__u32 toff;		/* target offset within same numbered sequence */
+};
+
 #endif /* _UAPI_LINUX_KCMP_H */
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 3a47fa998fe07..ea34ed8bb9529 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -11,6 +11,10 @@
 #include <linux/bug.h>
 #include <linux/err.h>
 #include <linux/kcmp.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/eventpoll.h>
+#include <linux/file.h>
 
 #include <asm/unistd.h>
 
@@ -94,6 +98,56 @@ static int kcmp_lock(struct mutex *m1, struct mutex *m2)
 	return err;
 }
 
+#ifdef CONFIG_EPOLL
+static int kcmp_epoll_target(struct task_struct *task1,
+			     struct task_struct *task2,
+			     unsigned long idx1,
+			     struct kcmp_epoll_slot __user *uslot)
+{
+	struct file *filp, *filp_epoll, *filp_tgt;
+	struct kcmp_epoll_slot slot;
+	struct files_struct *files;
+
+	if (copy_from_user(&slot, uslot, sizeof(slot)))
+		return -EFAULT;
+
+	filp = get_file_raw_ptr(task1, idx1);
+	if (!filp)
+		return -EBADF;
+
+	files = get_files_struct(task2);
+	if (!files)
+		return -EBADF;
+
+	spin_lock(&files->file_lock);
+	filp_epoll = fcheck_files(files, slot.efd);
+	if (filp_epoll)
+		get_file(filp_epoll);
+	else
+		filp_tgt = ERR_PTR(-EBADF);
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+
+	if (filp_epoll) {
+		filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+		fput(filp_epoll);
+	} else
+
+	if (IS_ERR(filp_tgt))
+		return PTR_ERR(filp_tgt);
+
+	return kcmp_ptr(filp, filp_tgt, KCMP_FILE);
+}
+#else
+static int kcmp_epoll_target(struct task_struct *task1,
+			     struct task_struct *task2,
+			     unsigned long idx1,
+			     struct kcmp_epoll_slot __user *uslot)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
 		unsigned long, idx1, unsigned long, idx2)
 {
@@ -165,6 +219,9 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
 		ret = -EOPNOTSUPP;
 #endif
 		break;
+	case KCMP_EPOLL_TFD:
+		ret = kcmp_epoll_target(task1, task2, idx1, (void *)idx2);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
-- 
GitLab


From 92ef6da3d06ff551a86de41ae37df9cc4b58d7a0 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 12 Jul 2017 14:34:31 -0700
Subject: [PATCH 1334/1958] kcmp: fs/epoll: wrap kcmp code with
 CONFIG_CHECKPOINT_RESTORE

kcmp syscall is build iif CONFIG_CHECKPOINT_RESTORE is selected, so wrap
appropriate helpers in epoll code with the config to build it
conditionally.

Link: http://lkml.kernel.org/r/20170513083456.GG1881@uranus.lan
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reported-by: Andrew Morton <akpm@linuxfoundation.org>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c            | 2 ++
 include/linux/eventpoll.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e7e9901c37907..e767e4389cb13 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1077,6 +1077,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 	return epir;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
 static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
 {
 	struct rb_node *rbp;
@@ -1118,6 +1119,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
 
 	return file_raw;
 }
+#endif /* CONFIG_CHECKPOINT_RESTORE */
 
 /*
  * This is the callback that is passed to the wait queue wakeup
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index d8625d214ea7a..2f14ac73d01d5 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -23,7 +23,9 @@ struct file;
 
 #ifdef CONFIG_EPOLL
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
 struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
+#endif
 
 /* Used to initialize the epoll bits inside the "struct file" */
 static inline void eventpoll_init_file(struct file *file)
-- 
GitLab


From e41d58185f1444368873d4d7422f7664a68be61d Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Wed, 12 Jul 2017 14:34:35 -0700
Subject: [PATCH 1335/1958] fault-inject: support systematic fault injection

Add /proc/self/task/<current-tid>/fail-nth file that allows failing
0-th, 1-st, 2-nd and so on calls systematically.
Excerpt from the added documentation:

 "Write to this file of integer N makes N-th call in the current task
  fail (N is 0-based). Read from this file returns a single char 'Y' or
  'N' that says if the fault setup with a previous write to this file
  was injected or not, and disables the fault if it wasn't yet injected.
  Note that this file enables all types of faults (slab, futex, etc).
  This setting takes precedence over all other generic settings like
  probability, interval, times, etc. But per-capability settings (e.g.
  fail_futex/ignore-private) take precedence over it. This feature is
  intended for systematic testing of faults in a single system call. See
  an example below"

Why add a new setting:
1. Existing settings are global rather than per-task.
   So parallel testing is not possible.
2. attr->interval is close but it depends on attr->count
   which is non reset to 0, so interval does not work as expected.
3. Trying to model this with existing settings requires manipulations
   of all of probability, interval, times, space, task-filter and
   unexposed count and per-task make-it-fail files.
4. Existing settings are per-failure-type, and the set of failure
   types is potentially expanding.
5. make-it-fail can't be changed by unprivileged user and aggressive
   stress testing better be done from an unprivileged user.
   Similarly, this would require opening the debugfs files to the
   unprivileged user, as he would need to reopen at least times file
   (not possible to pre-open before dropping privs).

The proposed interface solves all of the above (see the example).

We want to integrate this into syzkaller fuzzer.  A prototype has found
10 bugs in kernel in first day of usage:

  https://groups.google.com/forum/#!searchin/syzkaller/%22FAULT_INJECTION%22%7Csort:relevance

I've made the current interface work with all types of our sandboxes.
For setuid the secret sauce was prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) to
make /proc entries non-root owned.  So I am fine with the current
version of the code.

[akpm@linux-foundation.org: fix build]
Link: http://lkml.kernel.org/r/20170328130128.101773-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../fault-injection/fault-injection.txt       | 78 +++++++++++++++++++
 fs/proc/base.c                                | 52 +++++++++++++
 include/linux/sched.h                         |  1 +
 kernel/fork.c                                 |  4 +
 lib/fault-inject.c                            |  7 ++
 5 files changed, 142 insertions(+)

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 415484f3d59a2..192d8cbcc5f99 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -134,6 +134,22 @@ use the boot option:
 	fail_futex=
 	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
 
+o proc entries
+
+- /proc/self/task/<current-tid>/fail-nth:
+
+	Write to this file of integer N makes N-th call in the current task fail
+	(N is 0-based). Read from this file returns a single char 'Y' or 'N'
+	that says if the fault setup with a previous write to this file was
+	injected or not, and disables the fault if it wasn't yet injected.
+	Note that this file enables all types of faults (slab, futex, etc).
+	This setting takes precedence over all other generic debugfs settings
+	like probability, interval, times, etc. But per-capability settings
+	(e.g. fail_futex/ignore-private) take precedence over it.
+
+	This feature is intended for systematic testing of faults in a single
+	system call. See an example below.
+
 How to add new fault injection capability
 -----------------------------------------
 
@@ -278,3 +294,65 @@ allocation failure.
 	# env FAILCMD_TYPE=fail_page_alloc \
 		./tools/testing/fault-injection/failcmd.sh --times=100 \
                 -- make -C tools/testing/selftests/ run_tests
+
+Systematic faults using fail-nth
+---------------------------------
+
+The following code systematically faults 0-th, 1-st, 2-nd and so on
+capabilities in the socketpair() system call.
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+int main()
+{
+	int i, err, res, fail_nth, fds[2];
+	char buf[128];
+
+	system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
+	sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
+	fail_nth = open(buf, O_RDWR);
+	for (i = 0;; i++) {
+		sprintf(buf, "%d", i);
+		write(fail_nth, buf, strlen(buf));
+		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
+		err = errno;
+		read(fail_nth, buf, 1);
+		if (res == 0) {
+			close(fds[0]);
+			close(fds[1]);
+		}
+		printf("%d-th fault %c: res=%d/%d\n", i, buf[0], res, err);
+		if (buf[0] != 'Y')
+			break;
+	}
+	return 0;
+}
+
+An example output:
+
+0-th fault Y: res=-1/23
+1-th fault Y: res=-1/23
+2-th fault Y: res=-1/23
+3-th fault Y: res=-1/12
+4-th fault Y: res=-1/12
+5-th fault Y: res=-1/23
+6-th fault Y: res=-1/23
+7-th fault Y: res=-1/23
+8-th fault Y: res=-1/12
+9-th fault Y: res=-1/12
+10-th fault Y: res=-1/12
+11-th fault Y: res=-1/12
+12-th fault Y: res=-1/12
+13-th fault Y: res=-1/12
+14-th fault Y: res=-1/12
+15-th fault Y: res=-1/12
+16-th fault N: res=0/12
diff --git a/fs/proc/base.c b/fs/proc/base.c
index f1e1927ccd484..88b773f318cd0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1355,6 +1355,53 @@ static const struct file_operations proc_fault_inject_operations = {
 	.write		= proc_fault_inject_write,
 	.llseek		= generic_file_llseek,
 };
+
+static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	int err, n;
+
+	task = get_proc_task(file_inode(file));
+	if (!task)
+		return -ESRCH;
+	put_task_struct(task);
+	if (task != current)
+		return -EPERM;
+	err = kstrtoint_from_user(buf, count, 10, &n);
+	if (err)
+		return err;
+	if (n < 0 || n == INT_MAX)
+		return -EINVAL;
+	current->fail_nth = n + 1;
+	return count;
+}
+
+static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	int err;
+
+	task = get_proc_task(file_inode(file));
+	if (!task)
+		return -ESRCH;
+	put_task_struct(task);
+	if (task != current)
+		return -EPERM;
+	if (count < 1)
+		return -EINVAL;
+	err = put_user((char)(current->fail_nth ? 'N' : 'Y'), buf);
+	if (err)
+		return err;
+	current->fail_nth = 0;
+	return 1;
+}
+
+static const struct file_operations proc_fail_nth_operations = {
+	.read		= proc_fail_nth_read,
+	.write		= proc_fail_nth_write,
+};
 #endif
 
 
@@ -3311,6 +3358,11 @@ static const struct pid_entry tid_base_stuff[] = {
 #endif
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
+	/*
+	 * Operations on the file check that the task is current,
+	 * so we create it with 0666 to support testing under unprivileged user.
+	 */
+	REG("fail-nth", 0666, proc_fail_nth_operations),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	ONE("io",	S_IRUSR, proc_tid_io_accounting),
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 20814b7d7d70e..3822d749fc9ee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -974,6 +974,7 @@ struct task_struct {
 
 #ifdef CONFIG_FAULT_INJECTION
 	int				make_it_fail;
+	int fail_nth;
 #endif
 	/*
 	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
diff --git a/kernel/fork.c b/kernel/fork.c
index d2b9d7c31eaf8..ade237a963088 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -573,6 +573,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 	kcov_task_init(tsk);
 
+#ifdef CONFIG_FAULT_INJECTION
+	tsk->fail_nth = 0;
+#endif
+
 	return tsk;
 
 free_stack:
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4ff157159a0d4..09ac73c177fd5 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -107,6 +107,12 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
 
 bool should_fail(struct fault_attr *attr, ssize_t size)
 {
+	if (in_task() && current->fail_nth) {
+		if (--current->fail_nth == 0)
+			goto fail;
+		return false;
+	}
+
 	/* No need to check any other properties if the probability is 0 */
 	if (attr->probability == 0)
 		return false;
@@ -134,6 +140,7 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
 	if (!fail_stacktrace(attr))
 		return false;
 
+fail:
 	fail_dump(attr);
 
 	if (atomic_read(&attr->times) != -1)
-- 
GitLab


From 1a23395672658969a4035dcc518ea6cab835c579 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:34:38 -0700
Subject: [PATCH 1336/1958] ipc/sem.c: remove sem_base, embed struct sem

sma->sem_base is initialized with

	sma->sem_base = (struct sem *) &sma[1];

The current code has four problems:
 - There is an unnecessary pointer dereference - sem_base is not needed.
 - Alignment for struct sem only works by chance.
 - The current code causes false positive for static code analysis.
 - This is a cast between different non-void types, which the future
   randstruct GCC plugin warns on.

And, as bonus, the code size gets smaller:

  Before:
    0 .text         00003770
  After:
    0 .text         0000374e

[manfred@colorfullife.com: s/[0]/[]/, per hch]
  Link: http://lkml.kernel.org/r/20170525185107.12869-2-manfred@colorfullife.com
Link: http://lkml.kernel.org/r/20170515171912.6298-2-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: <1vier1@web.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 22 +++++++++++-
 ipc/sem.c           | 88 ++++++++++++++++++---------------------------
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9edec926e9d96..9db14093b73c8 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -8,11 +8,29 @@
 
 struct task_struct;
 
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+	int	semval;		/* current value */
+	/*
+	 * PID of the process that last modified the semaphore. For
+	 * Linux, specifically these are:
+	 *  - semop
+	 *  - semctl, via SETVAL and SETALL.
+	 *  - at task exit when performing undo adjustments (see exit_sem).
+	 */
+	int	sempid;
+	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
+	struct list_head pending_alter; /* pending single-sop operations */
+					/* that alter the semaphore */
+	struct list_head pending_const; /* pending single-sop operations */
+					/* that do not alter the semaphore*/
+	time_t	sem_otime;	/* candidate for sem_otime */
+} ____cacheline_aligned_in_smp;
+
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
 	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
 	time_t			sem_ctime;	/* last change time */
-	struct sem		*sem_base;	/* ptr to first semaphore in array */
 	struct list_head	pending_alter;	/* pending operations */
 						/* that alter the array */
 	struct list_head	pending_const;	/* pending complex operations */
@@ -21,6 +39,8 @@ struct sem_array {
 	int			sem_nsems;	/* no. of semaphores in array */
 	int			complex_count;	/* pending complex operations */
 	unsigned int		use_global_lock;/* >0: global lock required */
+
+	struct sem		sems[];
 };
 
 #ifdef CONFIG_SYSVIPC
diff --git a/ipc/sem.c b/ipc/sem.c
index 947dc2348271f..fff8337ebab3e 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -87,24 +87,6 @@
 #include <linux/uaccess.h>
 #include "util.h"
 
-/* One semaphore structure for each semaphore in the system. */
-struct sem {
-	int	semval;		/* current value */
-	/*
-	 * PID of the process that last modified the semaphore. For
-	 * Linux, specifically these are:
-	 *  - semop
-	 *  - semctl, via SETVAL and SETALL.
-	 *  - at task exit when performing undo adjustments (see exit_sem).
-	 */
-	int	sempid;
-	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
-	struct list_head pending_alter; /* pending single-sop operations */
-					/* that alter the semaphore */
-	struct list_head pending_const; /* pending single-sop operations */
-					/* that do not alter the semaphore*/
-	time_t	sem_otime;	/* candidate for sem_otime */
-} ____cacheline_aligned_in_smp;
 
 /* One queue for each sleeping process in the system. */
 struct sem_queue {
@@ -175,7 +157,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
  *	sem_array.sem_undo
  *
  * b) global or semaphore sem_lock() for read/write:
- *	sem_array.sem_base[i].pending_{const,alter}:
+ *	sem_array.sems[i].pending_{const,alter}:
  *
  * c) special:
  *	sem_undo_list.list_proc:
@@ -250,7 +232,7 @@ static void unmerge_queues(struct sem_array *sma)
 	 */
 	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
 		struct sem *curr;
-		curr = &sma->sem_base[q->sops[0].sem_num];
+		curr = &sma->sems[q->sops[0].sem_num];
 
 		list_add_tail(&q->list, &curr->pending_alter);
 	}
@@ -270,7 +252,7 @@ static void merge_queues(struct sem_array *sma)
 {
 	int i;
 	for (i = 0; i < sma->sem_nsems; i++) {
-		struct sem *sem = sma->sem_base + i;
+		struct sem *sem = &sma->sems[i];
 
 		list_splice_init(&sem->pending_alter, &sma->pending_alter);
 	}
@@ -306,7 +288,7 @@ static void complexmode_enter(struct sem_array *sma)
 	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
 
 	for (i = 0; i < sma->sem_nsems; i++) {
-		sem = sma->sem_base + i;
+		sem = &sma->sems[i];
 		spin_lock(&sem->lock);
 		spin_unlock(&sem->lock);
 	}
@@ -366,7 +348,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 	 *
 	 * Both facts are tracked by use_global_mode.
 	 */
-	sem = sma->sem_base + sops->sem_num;
+	sem = &sma->sems[sops->sem_num];
 
 	/*
 	 * Initial check for use_global_lock. Just an optimization,
@@ -421,7 +403,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
 		complexmode_tryleave(sma);
 		ipc_unlock_object(&sma->sem_perm);
 	} else {
-		struct sem *sem = sma->sem_base + locknum;
+		struct sem *sem = &sma->sems[locknum];
 		spin_unlock(&sem->lock);
 	}
 }
@@ -487,7 +469,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	if (ns->used_sems + nsems > ns->sc_semmns)
 		return -ENOSPC;
 
-	size = sizeof(*sma) + nsems * sizeof(struct sem);
+	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
 	sma = ipc_rcu_alloc(size);
 	if (!sma)
 		return -ENOMEM;
@@ -504,12 +486,10 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 		return retval;
 	}
 
-	sma->sem_base = (struct sem *) &sma[1];
-
 	for (i = 0; i < nsems; i++) {
-		INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
-		INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
-		spin_lock_init(&sma->sem_base[i].lock);
+		INIT_LIST_HEAD(&sma->sems[i].pending_alter);
+		INIT_LIST_HEAD(&sma->sems[i].pending_const);
+		spin_lock_init(&sma->sems[i].lock);
 	}
 
 	sma->complex_count = 0;
@@ -612,7 +592,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
 	un = q->undo;
 
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -639,7 +619,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
 	sop--;
 	pid = q->pid;
 	while (sop >= sops) {
-		sma->sem_base[sop->sem_num].sempid = pid;
+		sma->sems[sop->sem_num].sempid = pid;
 		sop--;
 	}
 
@@ -661,7 +641,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
 	sop--;
 	while (sop >= sops) {
 		sem_op = sop->sem_op;
-		sma->sem_base[sop->sem_num].semval -= sem_op;
+		sma->sems[sop->sem_num].semval -= sem_op;
 		if (sop->sem_flg & SEM_UNDO)
 			un->semadj[sop->sem_num] += sem_op;
 		sop--;
@@ -692,7 +672,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
 	 * until the operations can go through.
 	 */
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -716,7 +696,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
 	}
 
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -815,7 +795,7 @@ static int wake_const_ops(struct sem_array *sma, int semnum,
 	if (semnum == -1)
 		pending_list = &sma->pending_const;
 	else
-		pending_list = &sma->sem_base[semnum].pending_const;
+		pending_list = &sma->sems[semnum].pending_const;
 
 	list_for_each_entry_safe(q, tmp, pending_list, list) {
 		int error = perform_atomic_semop(sma, q);
@@ -856,7 +836,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 		for (i = 0; i < nsops; i++) {
 			int num = sops[i].sem_num;
 
-			if (sma->sem_base[num].semval == 0) {
+			if (sma->sems[num].semval == 0) {
 				got_zero = 1;
 				semop_completed |= wake_const_ops(sma, num, wake_q);
 			}
@@ -867,7 +847,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 		 * Assume all were changed.
 		 */
 		for (i = 0; i < sma->sem_nsems; i++) {
-			if (sma->sem_base[i].semval == 0) {
+			if (sma->sems[i].semval == 0) {
 				got_zero = 1;
 				semop_completed |= wake_const_ops(sma, i, wake_q);
 			}
@@ -909,7 +889,7 @@ static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *w
 	if (semnum == -1)
 		pending_list = &sma->pending_alter;
 	else
-		pending_list = &sma->sem_base[semnum].pending_alter;
+		pending_list = &sma->sems[semnum].pending_alter;
 
 again:
 	list_for_each_entry_safe(q, tmp, pending_list, list) {
@@ -922,7 +902,7 @@ static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *w
 		 * be in the  per semaphore pending queue, and decrements
 		 * cannot be successful if the value is already 0.
 		 */
-		if (semnum != -1 && sma->sem_base[semnum].semval == 0)
+		if (semnum != -1 && sma->sems[semnum].semval == 0)
 			break;
 
 		error = perform_atomic_semop(sma, q);
@@ -959,9 +939,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *w
 static void set_semotime(struct sem_array *sma, struct sembuf *sops)
 {
 	if (sops == NULL) {
-		sma->sem_base[0].sem_otime = get_seconds();
+		sma->sems[0].sem_otime = get_seconds();
 	} else {
-		sma->sem_base[sops[0].sem_num].sem_otime =
+		sma->sems[sops[0].sem_num].sem_otime =
 							get_seconds();
 	}
 }
@@ -1067,9 +1047,9 @@ static int count_semcnt(struct sem_array *sma, ushort semnum,
 	semcnt = 0;
 	/* First: check the simple operations. They are easy to evaluate */
 	if (count_zero)
-		l = &sma->sem_base[semnum].pending_const;
+		l = &sma->sems[semnum].pending_const;
 	else
-		l = &sma->sem_base[semnum].pending_alter;
+		l = &sma->sems[semnum].pending_alter;
 
 	list_for_each_entry(q, l, list) {
 		/* all task on a per-semaphore list sleep on exactly
@@ -1124,7 +1104,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
 	}
 	for (i = 0; i < sma->sem_nsems; i++) {
-		struct sem *sem = sma->sem_base + i;
+		struct sem *sem = &sma->sems[i];
 		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
 			unlink_queue(sma, q);
 			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
@@ -1174,9 +1154,9 @@ static time_t get_semotime(struct sem_array *sma)
 	int i;
 	time_t res;
 
-	res = sma->sem_base[0].sem_otime;
+	res = sma->sems[0].sem_otime;
 	for (i = 1; i < sma->sem_nsems; i++) {
-		time_t to = sma->sem_base[i].sem_otime;
+		time_t to = sma->sems[i].sem_otime;
 
 		if (to > res)
 			res = to;
@@ -1325,7 +1305,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 		return -EIDRM;
 	}
 
-	curr = &sma->sem_base[semnum];
+	curr = &sma->sems[semnum];
 
 	ipc_assert_locked_object(&sma->sem_perm);
 	list_for_each_entry(un, &sma->list_id, list_id)
@@ -1402,7 +1382,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			}
 		}
 		for (i = 0; i < sma->sem_nsems; i++)
-			sem_io[i] = sma->sem_base[i].semval;
+			sem_io[i] = sma->sems[i].semval;
 		sem_unlock(sma, -1);
 		rcu_read_unlock();
 		err = 0;
@@ -1450,8 +1430,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		}
 
 		for (i = 0; i < nsems; i++) {
-			sma->sem_base[i].semval = sem_io[i];
-			sma->sem_base[i].sempid = task_tgid_vnr(current);
+			sma->sems[i].semval = sem_io[i];
+			sma->sems[i].sempid = task_tgid_vnr(current);
 		}
 
 		ipc_assert_locked_object(&sma->sem_perm);
@@ -1476,7 +1456,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		err = -EIDRM;
 		goto out_unlock;
 	}
-	curr = &sma->sem_base[semnum];
+	curr = &sma->sems[semnum];
 
 	switch (cmd) {
 	case GETVAL:
@@ -1932,7 +1912,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	 */
 	if (nsops == 1) {
 		struct sem *curr;
-		curr = &sma->sem_base[sops->sem_num];
+		curr = &sma->sems[sops->sem_num];
 
 		if (alter) {
 			if (sma->complex_count) {
@@ -2146,7 +2126,7 @@ void exit_sem(struct task_struct *tsk)
 
 		/* perform adjustments registered in un */
 		for (i = 0; i < sma->sem_nsems; i++) {
-			struct sem *semaphore = &sma->sem_base[i];
+			struct sem *semaphore = &sma->sems[i];
 			if (un->semadj[i]) {
 				semaphore->semval += un->semadj[i];
 				/*
-- 
GitLab


From dba4cdd39e698d8dcdad0656825423052ac90ccd Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:34:41 -0700
Subject: [PATCH 1337/1958] ipc: merge ipc_rcu and kern_ipc_perm

ipc has two management structures that exist for every id:
 - struct kern_ipc_perm, it contains e.g. the permissions.
 - struct ipc_rcu, it contains the rcu head for rcu handling and the
   refcount.

The patch merges both structures.

As a bonus, we may save one cacheline, because both structures are
cacheline aligned.  In addition, it reduces the number of casts, instead
most codepaths can use container_of.

To simplify code, the ipc_rcu_alloc initializes the allocation to 0.

[manfred@colorfullife.com: really include the memset() into ipc_alloc_rcu()]
  Link: http://lkml.kernel.org/r/564f8612-0601-b267-514f-a9f650ec9b32@colorfullife.com
Link: http://lkml.kernel.org/r/20170525185107.12869-3-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc.h |  3 +++
 ipc/msg.c           | 19 +++++++++++--------
 ipc/sem.c           | 34 +++++++++++++++++-----------------
 ipc/shm.c           | 18 +++++++++++-------
 ipc/util.c          | 35 +++++++++++++++++------------------
 ipc/util.h          | 18 +++++++-----------
 6 files changed, 66 insertions(+), 61 deletions(-)

diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 71fd92d81b266..5591f055e13fd 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -20,6 +20,9 @@ struct kern_ipc_perm {
 	umode_t		mode;
 	unsigned long	seq;
 	void		*security;
+
+	struct rcu_head rcu;
+	atomic_t refcount;
 } ____cacheline_aligned_in_smp;
 
 #endif /* _LINUX_IPC_H */
diff --git a/ipc/msg.c b/ipc/msg.c
index 104926dc72be4..0ed7dae7d4e8b 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -97,8 +97,8 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
 
 static void msg_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct msg_queue *msq = ipc_rcu_to_struct(p);
+	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+	struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
 
 	security_msg_queue_free(msq);
 	ipc_rcu_free(head);
@@ -118,7 +118,10 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	key_t key = params->key;
 	int msgflg = params->flg;
 
-	msq = ipc_rcu_alloc(sizeof(*msq));
+	BUILD_BUG_ON(offsetof(struct msg_queue, q_perm) != 0);
+
+	msq = container_of(ipc_rcu_alloc(sizeof(*msq)), struct msg_queue,
+				q_perm);
 	if (!msq)
 		return -ENOMEM;
 
@@ -128,7 +131,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	msq->q_perm.security = NULL;
 	retval = security_msg_queue_alloc(msq);
 	if (retval) {
-		ipc_rcu_putref(msq, ipc_rcu_free);
+		ipc_rcu_putref(&msq->q_perm, ipc_rcu_free);
 		return retval;
 	}
 
@@ -144,7 +147,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	/* ipc_addid() locks msq upon success. */
 	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
 	if (id < 0) {
-		ipc_rcu_putref(msq, msg_rcu_free);
+		ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 		return id;
 	}
 
@@ -249,7 +252,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		free_msg(msg);
 	}
 	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
-	ipc_rcu_putref(msq, msg_rcu_free);
+	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 }
 
 /*
@@ -688,7 +691,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		/* enqueue the sender and prepare to block */
 		ss_add(msq, &s, msgsz);
 
-		if (!ipc_rcu_getref(msq)) {
+		if (!ipc_rcu_getref(&msq->q_perm)) {
 			err = -EIDRM;
 			goto out_unlock0;
 		}
@@ -700,7 +703,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		rcu_read_lock();
 		ipc_lock_object(&msq->q_perm);
 
-		ipc_rcu_putref(msq, msg_rcu_free);
+		ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 		/* raced with RMID? */
 		if (!ipc_valid_object(&msq->q_perm)) {
 			err = -EIDRM;
diff --git a/ipc/sem.c b/ipc/sem.c
index fff8337ebab3e..bdff6d93d2c78 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -260,8 +260,8 @@ static void merge_queues(struct sem_array *sma)
 
 static void sem_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct sem_array *sma = ipc_rcu_to_struct(p);
+	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
 
 	security_sem_free(sma);
 	ipc_rcu_free(head);
@@ -438,7 +438,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
 static inline void sem_lock_and_putref(struct sem_array *sma)
 {
 	sem_lock(sma, NULL, -1);
-	ipc_rcu_putref(sma, sem_rcu_free);
+	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 }
 
 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -469,20 +469,20 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	if (ns->used_sems + nsems > ns->sc_semmns)
 		return -ENOSPC;
 
+	BUILD_BUG_ON(offsetof(struct sem_array, sem_perm) != 0);
+
 	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
-	sma = ipc_rcu_alloc(size);
+	sma = container_of(ipc_rcu_alloc(size), struct sem_array, sem_perm);
 	if (!sma)
 		return -ENOMEM;
 
-	memset(sma, 0, size);
-
 	sma->sem_perm.mode = (semflg & S_IRWXUGO);
 	sma->sem_perm.key = key;
 
 	sma->sem_perm.security = NULL;
 	retval = security_sem_alloc(sma);
 	if (retval) {
-		ipc_rcu_putref(sma, ipc_rcu_free);
+		ipc_rcu_putref(&sma->sem_perm, ipc_rcu_free);
 		return retval;
 	}
 
@@ -502,7 +502,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 
 	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
 	if (id < 0) {
-		ipc_rcu_putref(sma, sem_rcu_free);
+		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 		return id;
 	}
 	ns->used_sems += nsems;
@@ -1122,7 +1122,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
 	wake_up_q(&wake_q);
 	ns->used_sems -= sma->sem_nsems;
-	ipc_rcu_putref(sma, sem_rcu_free);
+	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 }
 
 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1362,7 +1362,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			goto out_unlock;
 		}
 		if (nsems > SEMMSL_FAST) {
-			if (!ipc_rcu_getref(sma)) {
+			if (!ipc_rcu_getref(&sma->sem_perm)) {
 				err = -EIDRM;
 				goto out_unlock;
 			}
@@ -1370,7 +1370,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			rcu_read_unlock();
 			sem_io = ipc_alloc(sizeof(ushort)*nsems);
 			if (sem_io == NULL) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				return -ENOMEM;
 			}
 
@@ -1395,7 +1395,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		int i;
 		struct sem_undo *un;
 
-		if (!ipc_rcu_getref(sma)) {
+		if (!ipc_rcu_getref(&sma->sem_perm)) {
 			err = -EIDRM;
 			goto out_rcu_wakeup;
 		}
@@ -1404,20 +1404,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		if (nsems > SEMMSL_FAST) {
 			sem_io = ipc_alloc(sizeof(ushort)*nsems);
 			if (sem_io == NULL) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				return -ENOMEM;
 			}
 		}
 
 		if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
-			ipc_rcu_putref(sma, sem_rcu_free);
+			ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 			err = -EFAULT;
 			goto out_free;
 		}
 
 		for (i = 0; i < nsems; i++) {
 			if (sem_io[i] > SEMVMX) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				err = -ERANGE;
 				goto out_free;
 			}
@@ -1699,7 +1699,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	}
 
 	nsems = sma->sem_nsems;
-	if (!ipc_rcu_getref(sma)) {
+	if (!ipc_rcu_getref(&sma->sem_perm)) {
 		rcu_read_unlock();
 		un = ERR_PTR(-EIDRM);
 		goto out;
@@ -1709,7 +1709,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	/* step 2: allocate new undo structure */
 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
-		ipc_rcu_putref(sma, sem_rcu_free);
+		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 		return ERR_PTR(-ENOMEM);
 	}
 
diff --git a/ipc/shm.c b/ipc/shm.c
index f45c7959b2640..5ef6d31a52c51 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -174,9 +174,10 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 
 static void shm_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct shmid_kernel *shp = ipc_rcu_to_struct(p);
-
+	struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
+							rcu);
+	struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
+							shm_perm);
 	security_shm_free(shp);
 	ipc_rcu_free(head);
 }
@@ -241,7 +242,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 		user_shm_unlock(i_size_read(file_inode(shm_file)),
 				shp->mlock_user);
 	fput(shm_file);
-	ipc_rcu_putref(shp, shm_rcu_free);
+	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 }
 
 /*
@@ -542,7 +543,10 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 			ns->shm_tot + numpages > ns->shm_ctlall)
 		return -ENOSPC;
 
-	shp = ipc_rcu_alloc(sizeof(*shp));
+	BUILD_BUG_ON(offsetof(struct shmid_kernel, shm_perm) != 0);
+
+	shp = container_of(ipc_rcu_alloc(sizeof(*shp)), struct shmid_kernel,
+				shm_perm);
 	if (!shp)
 		return -ENOMEM;
 
@@ -553,7 +557,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_perm.security = NULL;
 	error = security_shm_alloc(shp);
 	if (error) {
-		ipc_rcu_putref(shp, ipc_rcu_free);
+		ipc_rcu_putref(&shp->shm_perm, ipc_rcu_free);
 		return error;
 	}
 
@@ -624,7 +628,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		user_shm_unlock(size, shp->mlock_user);
 	fput(file);
 no_file:
-	ipc_rcu_putref(shp, shm_rcu_free);
+	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 	return error;
 }
 
diff --git a/ipc/util.c b/ipc/util.c
index caec7b1bfaa33..5d1ff1035efe3 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -418,46 +418,45 @@ void ipc_free(void *ptr)
 }
 
 /**
- * ipc_rcu_alloc - allocate ipc and rcu space
+ * ipc_rcu_alloc - allocate ipc space
  * @size: size desired
  *
- * Allocate memory for the rcu header structure +  the object.
- * Returns the pointer to the object or NULL upon failure.
+ * Allocate memory for an ipc object.
+ * The first member must be struct kern_ipc_perm.
  */
-void *ipc_rcu_alloc(int size)
+struct kern_ipc_perm *ipc_rcu_alloc(int size)
 {
 	/*
 	 * We prepend the allocation with the rcu struct
 	 */
-	struct ipc_rcu *out = ipc_alloc(sizeof(struct ipc_rcu) + size);
+	struct kern_ipc_perm *out = ipc_alloc(size);
 	if (unlikely(!out))
 		return NULL;
+
+	memset(out, 0, size);
 	atomic_set(&out->refcount, 1);
-	return out + 1;
+	return out;
 }
 
-int ipc_rcu_getref(void *ptr)
+int ipc_rcu_getref(struct kern_ipc_perm *ptr)
 {
-	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
-	return atomic_inc_not_zero(&p->refcount);
+	return atomic_inc_not_zero(&ptr->refcount);
 }
 
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+			void (*func)(struct rcu_head *head))
 {
-	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
-	if (!atomic_dec_and_test(&p->refcount))
+	if (!atomic_dec_and_test(&ptr->refcount))
 		return;
 
-	call_rcu(&p->rcu, func);
+	call_rcu(&ptr->rcu, func);
 }
 
-void ipc_rcu_free(struct rcu_head *head)
+void ipc_rcu_free(struct rcu_head *h)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+	struct kern_ipc_perm *ptr = container_of(h, struct kern_ipc_perm, rcu);
 
-	kvfree(p);
+	kvfree(ptr);
 }
 
 /**
diff --git a/ipc/util.h b/ipc/util.h
index 60ddccca464dd..09d0f918c3e23 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -47,13 +47,6 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
 static inline void shm_exit_ns(struct ipc_namespace *ns) { }
 #endif
 
-struct ipc_rcu {
-	struct rcu_head rcu;
-	atomic_t refcount;
-} ____cacheline_aligned_in_smp;
-
-#define ipc_rcu_to_struct(p)  ((void *)(p+1))
-
 /*
  * Structure that holds the parameters needed by the ipc operations
  * (see after)
@@ -125,11 +118,14 @@ void ipc_free(void *ptr);
  * Objects are reference counted, they start with reference count 1.
  * getref increases the refcount, the putref call that reduces the recount
  * to 0 schedules the rcu destruction. Caller must guarantee locking.
+ *
+ * struct kern_ipc_perm must be the first member in the allocated structure.
  */
-void *ipc_rcu_alloc(int size);
-int ipc_rcu_getref(void *ptr);
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
-void ipc_rcu_free(struct rcu_head *head);
+struct kern_ipc_perm *ipc_rcu_alloc(int size);
+int ipc_rcu_getref(struct kern_ipc_perm *ptr);
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+			void (*func)(struct rcu_head *head));
+void ipc_rcu_free(struct rcu_head *h);
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
-- 
GitLab


From 2cd648c110b5570c3280bd645797658cabbe5f5c Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:34:44 -0700
Subject: [PATCH 1338/1958] include/linux/sem.h: correctly document sem_ctime

sem_ctime is initialized to the semget() time and then updated at every
semctl() that changes the array.

Thus it does not represent the time of the last change.

Especially, semop() calls are only stored in sem_otime, not in
sem_ctime.

This is already described in ipc/sem.c, I just overlooked that there is
a comment in include/linux/sem.h and man semctl(2) as well.

So: Correct wrong comments.

Link: http://lkml.kernel.org/r/20170515171912.6298-4-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: <1vier1@web.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h      | 2 +-
 include/uapi/linux/sem.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9db14093b73c8..be5cf2ea14ade 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -30,7 +30,7 @@ struct sem {
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
 	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
-	time_t			sem_ctime;	/* last change time */
+	time_t			sem_ctime;	/* create/last semctl() time */
 	struct list_head	pending_alter;	/* pending operations */
 						/* that alter the array */
 	struct list_head	pending_const;	/* pending complex operations */
diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h
index dd73b908b2f32..67eb90361692f 100644
--- a/include/uapi/linux/sem.h
+++ b/include/uapi/linux/sem.h
@@ -23,7 +23,7 @@
 struct semid_ds {
 	struct ipc_perm	sem_perm;		/* permissions .. see ipc.h */
 	__kernel_time_t	sem_otime;		/* last semop time */
-	__kernel_time_t	sem_ctime;		/* last change time */
+	__kernel_time_t	sem_ctime;		/* create/last semctl() time */
 	struct sem	*sem_base;		/* ptr to first semaphore in array */
 	struct sem_queue *sem_pending;		/* pending operations to be processed */
 	struct sem_queue **sem_pending_last;	/* last pending operation */
-- 
GitLab


From f8dbe8d290637ac3f68600e30d092393fe9b40a5 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:34:47 -0700
Subject: [PATCH 1339/1958] ipc: drop non-RCU allocation

The only users of ipc_alloc() were ipc_rcu_alloc() and the on-heap
sem_io fall-back memory.  Better to just open-code these to make things
easier to read.

[manfred@colorfullife.com: Rediff due to inclusion of memset() into ipc_rcu_alloc()]
Link: http://lkml.kernel.org/r/20170525185107.12869-5-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c  |  8 +++++---
 ipc/util.c | 25 +------------------------
 ipc/util.h |  6 ------
 3 files changed, 6 insertions(+), 33 deletions(-)

diff --git a/ipc/sem.c b/ipc/sem.c
index bdff6d93d2c78..484ccf83cf856 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1368,7 +1368,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			}
 			sem_unlock(sma, -1);
 			rcu_read_unlock();
-			sem_io = ipc_alloc(sizeof(ushort)*nsems);
+			sem_io = kvmalloc_array(nsems, sizeof(ushort),
+						GFP_KERNEL);
 			if (sem_io == NULL) {
 				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				return -ENOMEM;
@@ -1402,7 +1403,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		rcu_read_unlock();
 
 		if (nsems > SEMMSL_FAST) {
-			sem_io = ipc_alloc(sizeof(ushort)*nsems);
+			sem_io = kvmalloc_array(nsems, sizeof(ushort),
+						GFP_KERNEL);
 			if (sem_io == NULL) {
 				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				return -ENOMEM;
@@ -1480,7 +1482,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 	wake_up_q(&wake_q);
 out_free:
 	if (sem_io != fast_sem_io)
-		ipc_free(sem_io);
+		kvfree(sem_io);
 	return err;
 }
 
diff --git a/ipc/util.c b/ipc/util.c
index 5d1ff1035efe3..dd73feb1569a9 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -394,29 +394,6 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 	ipcp->deleted = true;
 }
 
-/**
- * ipc_alloc -	allocate ipc space
- * @size: size desired
- *
- * Allocate memory from the appropriate pools and return a pointer to it.
- * NULL is returned if the allocation fails
- */
-void *ipc_alloc(int size)
-{
-	return kvmalloc(size, GFP_KERNEL);
-}
-
-/**
- * ipc_free - free ipc space
- * @ptr: pointer returned by ipc_alloc
- *
- * Free a block created with ipc_alloc().
- */
-void ipc_free(void *ptr)
-{
-	kvfree(ptr);
-}
-
 /**
  * ipc_rcu_alloc - allocate ipc space
  * @size: size desired
@@ -429,7 +406,7 @@ struct kern_ipc_perm *ipc_rcu_alloc(int size)
 	/*
 	 * We prepend the allocation with the rcu struct
 	 */
-	struct kern_ipc_perm *out = ipc_alloc(size);
+	struct kern_ipc_perm *out = kvmalloc(size, GFP_KERNEL);
 	if (unlikely(!out))
 		return NULL;
 
diff --git a/ipc/util.h b/ipc/util.h
index 09d0f918c3e23..2578fd9be8356 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -107,12 +107,6 @@ void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
 /* must be called with ipcp locked */
 int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
 
-/* for rare, potentially huge allocations.
- * both function can sleep
- */
-void *ipc_alloc(int size);
-void ipc_free(void *ptr);
-
 /*
  * For allocation that need to be freed by RCU.
  * Objects are reference counted, they start with reference count 1.
-- 
GitLab


From 1b4654ef72f61c84704b3c79b50fdeed8747fc56 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:34:50 -0700
Subject: [PATCH 1340/1958] ipc/sem: do not use ipc_rcu_free()

Avoid using ipc_rcu_free, since it just re-finds the original structure
pointer.  For the pre-list-init failure path, there is no RCU needed,
since it was just allocated.  It can be directly freed.

Link: http://lkml.kernel.org/r/20170525185107.12869-6-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/ipc/sem.c b/ipc/sem.c
index 484ccf83cf856..a04c4d6d120ce 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -258,13 +258,18 @@ static void merge_queues(struct sem_array *sma)
 	}
 }
 
+static void __sem_free(struct sem_array *sma)
+{
+	kvfree(sma);
+}
+
 static void sem_rcu_free(struct rcu_head *head)
 {
 	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
 	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
 
 	security_sem_free(sma);
-	ipc_rcu_free(head);
+	__sem_free(sma);
 }
 
 /*
@@ -482,7 +487,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_perm.security = NULL;
 	retval = security_sem_alloc(sma);
 	if (retval) {
-		ipc_rcu_putref(&sma->sem_perm, ipc_rcu_free);
+		__sem_free(sma);
 		return retval;
 	}
 
-- 
GitLab


From 66470b18179f8839a62b4c20ffd3903a4025fe90 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:34:53 -0700
Subject: [PATCH 1341/1958] ipc/shm: do not use ipc_rcu_free()

Avoid using ipc_rcu_free, since it just re-finds the original structure
pointer.  For the pre-list-init failure path, there is no RCU needed,
since it was just allocated.  It can be directly freed.

Link: http://lkml.kernel.org/r/20170525185107.12869-7-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/shm.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 5ef6d31a52c51..566c1e193ee13 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -172,6 +172,11 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 	ipc_lock_object(&ipcp->shm_perm);
 }
 
+static void __shm_free(struct shmid_kernel *shp)
+{
+	kvfree(shp);
+}
+
 static void shm_rcu_free(struct rcu_head *head)
 {
 	struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
@@ -179,7 +184,7 @@ static void shm_rcu_free(struct rcu_head *head)
 	struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
 							shm_perm);
 	security_shm_free(shp);
-	ipc_rcu_free(head);
+	__shm_free(shp);
 }
 
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
@@ -557,7 +562,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_perm.security = NULL;
 	error = security_shm_alloc(shp);
 	if (error) {
-		ipc_rcu_putref(&shp->shm_perm, ipc_rcu_free);
+		__shm_free(shp);
 		return error;
 	}
 
-- 
GitLab


From 9ef5932f8a1134b9d93676ee26701b2be90c7a95 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:34:56 -0700
Subject: [PATCH 1342/1958] ipc/msg: do not use ipc_rcu_free()

Avoid using ipc_rcu_free, since it just re-finds the original structure
pointer.  For the pre-list-init failure path, there is no RCU needed,
since it was just allocated.  It can be directly freed.

Link: http://lkml.kernel.org/r/20170525185107.12869-8-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/msg.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/ipc/msg.c b/ipc/msg.c
index 0ed7dae7d4e8b..25d43e27ef12c 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -95,13 +95,18 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
 	ipc_rmid(&msg_ids(ns), &s->q_perm);
 }
 
+static void __msg_free(struct msg_queue *msq)
+{
+	kvfree(msq);
+}
+
 static void msg_rcu_free(struct rcu_head *head)
 {
 	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
 	struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
 
 	security_msg_queue_free(msq);
-	ipc_rcu_free(head);
+	__msg_free(msq);
 }
 
 /**
@@ -131,7 +136,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	msq->q_perm.security = NULL;
 	retval = security_msg_queue_alloc(msq);
 	if (retval) {
-		ipc_rcu_putref(&msq->q_perm, ipc_rcu_free);
+		__msg_free(msq);
 		return retval;
 	}
 
-- 
GitLab


From 5ccc8fb54f1c083f1dc7e073150ec18d43457872 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:34:59 -0700
Subject: [PATCH 1343/1958] ipc/util: drop ipc_rcu_free()

There are no more callers of ipc_rcu_free(), so remove it.

Link: http://lkml.kernel.org/r/20170525185107.12869-9-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/util.c | 7 -------
 ipc/util.h | 1 -
 2 files changed, 8 deletions(-)

diff --git a/ipc/util.c b/ipc/util.c
index dd73feb1569a9..556884bab698c 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -429,13 +429,6 @@ void ipc_rcu_putref(struct kern_ipc_perm *ptr,
 	call_rcu(&ptr->rcu, func);
 }
 
-void ipc_rcu_free(struct rcu_head *h)
-{
-	struct kern_ipc_perm *ptr = container_of(h, struct kern_ipc_perm, rcu);
-
-	kvfree(ptr);
-}
-
 /**
  * ipcperms - check ipc permissions
  * @ns: ipc namespace
diff --git a/ipc/util.h b/ipc/util.h
index 2578fd9be8356..44efbc0b635b4 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -119,7 +119,6 @@ struct kern_ipc_perm *ipc_rcu_alloc(int size);
 int ipc_rcu_getref(struct kern_ipc_perm *ptr);
 void ipc_rcu_putref(struct kern_ipc_perm *ptr,
 			void (*func)(struct rcu_head *head));
-void ipc_rcu_free(struct rcu_head *h);
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
-- 
GitLab


From 101ede01dfd5072651965e974bc6e30c8d0748e2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:02 -0700
Subject: [PATCH 1344/1958] ipc/sem: avoid ipc_rcu_alloc()

Instead of using ipc_rcu_alloc() which only performs the refcount bump,
open code it to perform better sem-specific checks.  This also allows
for sem_array structure layout to be randomized in the future.

[manfred@colorfullife.com: Rediff, because the memset was temporarily inside ipc_rcu_alloc()]
Link: http://lkml.kernel.org/r/20170525185107.12869-10-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/ipc/sem.c b/ipc/sem.c
index a04c4d6d120ce..445a5b5eb88fd 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -451,6 +451,25 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
 	ipc_rmid(&sem_ids(ns), &s->sem_perm);
 }
 
+static struct sem_array *sem_alloc(size_t nsems)
+{
+	struct sem_array *sma;
+	size_t size;
+
+	if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
+		return NULL;
+
+	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
+	sma = kvmalloc(size, GFP_KERNEL);
+	if (unlikely(!sma))
+		return NULL;
+
+	memset(sma, 0, size);
+	atomic_set(&sma->sem_perm.refcount, 1);
+
+	return sma;
+}
+
 /**
  * newary - Create a new semaphore set
  * @ns: namespace
@@ -463,7 +482,6 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	int id;
 	int retval;
 	struct sem_array *sma;
-	int size;
 	key_t key = params->key;
 	int nsems = params->u.nsems;
 	int semflg = params->flg;
@@ -474,10 +492,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	if (ns->used_sems + nsems > ns->sc_semmns)
 		return -ENOSPC;
 
-	BUILD_BUG_ON(offsetof(struct sem_array, sem_perm) != 0);
-
-	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
-	sma = container_of(ipc_rcu_alloc(size), struct sem_array, sem_perm);
+	sma = sem_alloc(nsems);
 	if (!sma)
 		return -ENOMEM;
 
-- 
GitLab


From 3e0c24042e5aa55eee817caeca67246df69931e1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:05 -0700
Subject: [PATCH 1345/1958] ipc/shm: avoid ipc_rcu_alloc()

Instead of using ipc_rcu_alloc() which only performs the refcount bump,
open code it.  This also allows for shmid_kernel structure layout to be
randomized in the future.

Link: http://lkml.kernel.org/r/20170525185107.12869-11-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/shm.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 566c1e193ee13..2ac489ef56e4e 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -518,6 +518,19 @@ static const struct vm_operations_struct shm_vm_ops = {
 #endif
 };
 
+static struct shmid_kernel *shm_alloc(void)
+{
+	struct shmid_kernel *shp;
+
+	shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
+	if (unlikely(!shp))
+		return NULL;
+
+	atomic_set(&shp->shm_perm.refcount, 1);
+
+	return shp;
+}
+
 /**
  * newseg - Create a new shared memory segment
  * @ns: namespace
@@ -548,10 +561,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 			ns->shm_tot + numpages > ns->shm_ctlall)
 		return -ENOSPC;
 
-	BUILD_BUG_ON(offsetof(struct shmid_kernel, shm_perm) != 0);
-
-	shp = container_of(ipc_rcu_alloc(sizeof(*shp)), struct shmid_kernel,
-				shm_perm);
+	shp = shm_alloc();
 	if (!shp)
 		return -ENOMEM;
 
-- 
GitLab


From 52f908904e7e05b6300162faa48152df073be645 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:07 -0700
Subject: [PATCH 1346/1958] ipc/msg: avoid ipc_rcu_alloc()

Instead of using ipc_rcu_alloc() which only performs the refcount bump,
open code it.  This also allows for msg_queue structure layout to be
randomized in the future.

Link: http://lkml.kernel.org/r/20170525185107.12869-12-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/msg.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/ipc/msg.c b/ipc/msg.c
index 25d43e27ef12c..10094a731b8ee 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -109,6 +109,19 @@ static void msg_rcu_free(struct rcu_head *head)
 	__msg_free(msq);
 }
 
+static struct msg_queue *msg_alloc(void)
+{
+	struct msg_queue *msq;
+
+	msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
+	if (unlikely(!msq))
+		return NULL;
+
+	atomic_set(&msq->q_perm.refcount, 1);
+
+	return msq;
+}
+
 /**
  * newque - Create a new msg queue
  * @ns: namespace
@@ -123,10 +136,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	key_t key = params->key;
 	int msgflg = params->flg;
 
-	BUILD_BUG_ON(offsetof(struct msg_queue, q_perm) != 0);
-
-	msq = container_of(ipc_rcu_alloc(sizeof(*msq)), struct msg_queue,
-				q_perm);
+	msq = msg_alloc();
 	if (!msq)
 		return -ENOMEM;
 
-- 
GitLab


From c3f6fb6fe4e638a58eae558cb209c463cd46c2fc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:10 -0700
Subject: [PATCH 1347/1958] ipc/util: drop ipc_rcu_alloc()

No callers remain for ipc_rcu_alloc().  Drop the function.

[manfred@colorfullife.com: Rediff because the memset was temporarily inside ipc_rcu_free()]
Link: http://lkml.kernel.org/r/20170525185107.12869-13-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/util.c | 21 ---------------------
 ipc/util.h |  3 ---
 2 files changed, 24 deletions(-)

diff --git a/ipc/util.c b/ipc/util.c
index 556884bab698c..2428dd44ca979 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -394,27 +394,6 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 	ipcp->deleted = true;
 }
 
-/**
- * ipc_rcu_alloc - allocate ipc space
- * @size: size desired
- *
- * Allocate memory for an ipc object.
- * The first member must be struct kern_ipc_perm.
- */
-struct kern_ipc_perm *ipc_rcu_alloc(int size)
-{
-	/*
-	 * We prepend the allocation with the rcu struct
-	 */
-	struct kern_ipc_perm *out = kvmalloc(size, GFP_KERNEL);
-	if (unlikely(!out))
-		return NULL;
-
-	memset(out, 0, size);
-	atomic_set(&out->refcount, 1);
-	return out;
-}
-
 int ipc_rcu_getref(struct kern_ipc_perm *ptr)
 {
 	return atomic_inc_not_zero(&ptr->refcount);
diff --git a/ipc/util.h b/ipc/util.h
index 44efbc0b635b4..77336c2bdb9c9 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -112,10 +112,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
  * Objects are reference counted, they start with reference count 1.
  * getref increases the refcount, the putref call that reduces the recount
  * to 0 schedules the rcu destruction. Caller must guarantee locking.
- *
- * struct kern_ipc_perm must be the first member in the allocated structure.
  */
-struct kern_ipc_perm *ipc_rcu_alloc(int size);
 int ipc_rcu_getref(struct kern_ipc_perm *ptr);
 void ipc_rcu_putref(struct kern_ipc_perm *ptr,
 			void (*func)(struct rcu_head *head));
-- 
GitLab


From 2ec55f8024db859d70f14c26e91ca044328dd50d Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:35:13 -0700
Subject: [PATCH 1348/1958] ipc/sem.c: avoid ipc_rcu_putref for failed
 ipc_addid()

Loosely based on a patch from Kees Cook <keescook@chromium.org>:
 - id and retval can be merged
 - if ipc_addid() fails, then use call_rcu() directly.

The difference is that call_rcu is used for failed ipc_addid() calls, to
continue to guaranteed an rcu delay for security_sem_free().

Link: http://lkml.kernel.org/r/20170525185107.12869-14-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/ipc/sem.c b/ipc/sem.c
index 445a5b5eb88fd..2b2ed56e0fde2 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -479,7 +479,6 @@ static struct sem_array *sem_alloc(size_t nsems)
  */
 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 {
-	int id;
 	int retval;
 	struct sem_array *sma;
 	key_t key = params->key;
@@ -520,10 +519,10 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
 
-	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
-	if (id < 0) {
-		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
-		return id;
+	retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
+	if (retval < 0) {
+		call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
+		return retval;
 	}
 	ns->used_sems += nsems;
 
-- 
GitLab


From a2642f8770993a1598500bb6a8082b7c32faff8b Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:35:16 -0700
Subject: [PATCH 1349/1958] ipc/shm.c: avoid ipc_rcu_putref for failed
 ipc_addid()

Loosely based on a patch from Kees Cook <keescook@chromium.org>:
 - id and error can be merged
 - if operations before ipc_addid() fail, then use call_rcu() directly.

The difference is that call_rcu is used for failures after
security_shm_alloc(), to continue to guaranteed an rcu delay for
security_sem_free().

Link: http://lkml.kernel.org/r/20170525185107.12869-15-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/shm.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 2ac489ef56e4e..c5976d318ed17 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -548,7 +548,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	struct file *file;
 	char name[13];
-	int id;
 	vm_flags_t acctflag = 0;
 
 	if (size < SHMMIN || size > ns->shm_ctlmax)
@@ -617,11 +616,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_file = file;
 	shp->shm_creator = current;
 
-	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
-	if (id < 0) {
-		error = id;
+	error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
+	if (error < 0)
 		goto no_id;
-	}
 
 	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
 
@@ -643,7 +640,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		user_shm_unlock(size, shp->mlock_user);
 	fput(file);
 no_file:
-	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+	call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
 	return error;
 }
 
-- 
GitLab


From 51c23b7b7db52493d4fc869cec8c3e8fe27bfcd3 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:35:19 -0700
Subject: [PATCH 1350/1958] ipc/msg.c: avoid ipc_rcu_putref for failed
 ipc_addid()

Loosely based on a patch from Kees Cook <keescook@chromium.org>:
 - id and retval can be merged
 - if ipc_addid() fails, then use call_rcu() directly.

The difference is that call_rcu is used for failed ipc_addid() calls, to
continue to guaranteed an rcu delay for security_msg_queue_free().

Link: http://lkml.kernel.org/r/20170525185107.12869-16-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/msg.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ipc/msg.c b/ipc/msg.c
index 10094a731b8ee..cd90bfde89a46 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -132,7 +132,7 @@ static struct msg_queue *msg_alloc(void)
 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 {
 	struct msg_queue *msq;
-	int id, retval;
+	int retval;
 	key_t key = params->key;
 	int msgflg = params->flg;
 
@@ -160,10 +160,10 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	INIT_LIST_HEAD(&msq->q_senders);
 
 	/* ipc_addid() locks msq upon success. */
-	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
-	if (id < 0) {
-		ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
-		return id;
+	retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
+	if (retval < 0) {
+		call_rcu(&msq->q_perm.rcu, msg_rcu_free);
+		return retval;
 	}
 
 	ipc_unlock_object(&msq->q_perm);
-- 
GitLab


From 3d3653f9732c73feb8c4addfc1cbdaa292a399fa Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:22 -0700
Subject: [PATCH 1351/1958] ipc: move atomic_set() to where it is needed

Only after ipc_addid() has succeeded will refcounting be used, so move
initialization into ipc_addid() and remove from open-coded *_alloc()
routines.

Link: http://lkml.kernel.org/r/20170525185107.12869-17-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/msg.c  | 2 --
 ipc/sem.c  | 1 -
 ipc/shm.c  | 2 --
 ipc/util.c | 1 +
 4 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/ipc/msg.c b/ipc/msg.c
index cd90bfde89a46..770342e1d3274 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -117,8 +117,6 @@ static struct msg_queue *msg_alloc(void)
 	if (unlikely(!msq))
 		return NULL;
 
-	atomic_set(&msq->q_perm.refcount, 1);
-
 	return msq;
 }
 
diff --git a/ipc/sem.c b/ipc/sem.c
index 2b2ed56e0fde2..5f137738819d7 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -465,7 +465,6 @@ static struct sem_array *sem_alloc(size_t nsems)
 		return NULL;
 
 	memset(sma, 0, size);
-	atomic_set(&sma->sem_perm.refcount, 1);
 
 	return sma;
 }
diff --git a/ipc/shm.c b/ipc/shm.c
index c5976d318ed17..d1988ef821a17 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -526,8 +526,6 @@ static struct shmid_kernel *shm_alloc(void)
 	if (unlikely(!shp))
 		return NULL;
 
-	atomic_set(&shp->shm_perm.refcount, 1);
-
 	return shp;
 }
 
diff --git a/ipc/util.c b/ipc/util.c
index 2428dd44ca979..1a2cb02467ab5 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -232,6 +232,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
 
 	idr_preload(GFP_KERNEL);
 
+	atomic_set(&new->refcount, 1);
 	spin_lock_init(&new->lock);
 	new->deleted = false;
 	rcu_read_lock();
-- 
GitLab


From 42e618f77dabc82c697915c193d729e9d16e2a75 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:25 -0700
Subject: [PATCH 1352/1958] ipc/shm: remove special shm_alloc/free

There is nothing special about the shm_alloc/free routines any more, so
remove them to make code more readable.

[manfred@colorfullife.com: Rediff, to continue to keep rcu for free calls after a successful security_shm_alloc()]
Link: http://lkml.kernel.org/r/20170525185107.12869-18-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/shm.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index d1988ef821a17..28a444861a8f4 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -172,11 +172,6 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 	ipc_lock_object(&ipcp->shm_perm);
 }
 
-static void __shm_free(struct shmid_kernel *shp)
-{
-	kvfree(shp);
-}
-
 static void shm_rcu_free(struct rcu_head *head)
 {
 	struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
@@ -184,7 +179,7 @@ static void shm_rcu_free(struct rcu_head *head)
 	struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
 							shm_perm);
 	security_shm_free(shp);
-	__shm_free(shp);
+	kvfree(shp);
 }
 
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
@@ -518,17 +513,6 @@ static const struct vm_operations_struct shm_vm_ops = {
 #endif
 };
 
-static struct shmid_kernel *shm_alloc(void)
-{
-	struct shmid_kernel *shp;
-
-	shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
-	if (unlikely(!shp))
-		return NULL;
-
-	return shp;
-}
-
 /**
  * newseg - Create a new shared memory segment
  * @ns: namespace
@@ -558,8 +542,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 			ns->shm_tot + numpages > ns->shm_ctlall)
 		return -ENOSPC;
 
-	shp = shm_alloc();
-	if (!shp)
+	shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
+	if (unlikely(!shp))
 		return -ENOMEM;
 
 	shp->shm_perm.key = key;
@@ -569,7 +553,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_perm.security = NULL;
 	error = security_shm_alloc(shp);
 	if (error) {
-		__shm_free(shp);
+		kvfree(shp);
 		return error;
 	}
 
-- 
GitLab


From fb259c310f79d295c2da2934ff2282e1b7c30529 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:28 -0700
Subject: [PATCH 1353/1958] ipc/msg: remove special msg_alloc/free

There is nothing special about the msg_alloc/free routines any more, so
remove them to make code more readable.

[manfred@colorfullife.com: Rediff to keep rcu protection for security_msg_queue_alloc()]
Link: http://lkml.kernel.org/r/20170525185107.12869-19-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/msg.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/ipc/msg.c b/ipc/msg.c
index 770342e1d3274..5b25e0755656a 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -95,29 +95,13 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
 	ipc_rmid(&msg_ids(ns), &s->q_perm);
 }
 
-static void __msg_free(struct msg_queue *msq)
-{
-	kvfree(msq);
-}
-
 static void msg_rcu_free(struct rcu_head *head)
 {
 	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
 	struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
 
 	security_msg_queue_free(msq);
-	__msg_free(msq);
-}
-
-static struct msg_queue *msg_alloc(void)
-{
-	struct msg_queue *msq;
-
-	msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
-	if (unlikely(!msq))
-		return NULL;
-
-	return msq;
+	kvfree(msq);
 }
 
 /**
@@ -134,8 +118,8 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	key_t key = params->key;
 	int msgflg = params->flg;
 
-	msq = msg_alloc();
-	if (!msq)
+	msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
+	if (unlikely(!msq))
 		return -ENOMEM;
 
 	msq->q_perm.mode = msgflg & S_IRWXUGO;
@@ -144,7 +128,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	msq->q_perm.security = NULL;
 	retval = security_msg_queue_alloc(msq);
 	if (retval) {
-		__msg_free(msq);
+		kvfree(msq);
 		return retval;
 	}
 
-- 
GitLab


From e2029dfeef7b09f08ac8572e8be3d4c624d1f79a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:31 -0700
Subject: [PATCH 1354/1958] ipc/sem: drop __sem_free()

The remaining users of __sem_free() can simply call kvfree() instead for
better readability.

[manfred@colorfullife.com: Rediff to keep rcu protection for security_sem_alloc()]
Link: http://lkml.kernel.org/r/20170525185107.12869-20-manfred@colorfullife.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/ipc/sem.c b/ipc/sem.c
index 5f137738819d7..9e70cd7a17da7 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -258,18 +258,13 @@ static void merge_queues(struct sem_array *sma)
 	}
 }
 
-static void __sem_free(struct sem_array *sma)
-{
-	kvfree(sma);
-}
-
 static void sem_rcu_free(struct rcu_head *head)
 {
 	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
 	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
 
 	security_sem_free(sma);
-	__sem_free(sma);
+	kvfree(sma);
 }
 
 /*
@@ -500,7 +495,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_perm.security = NULL;
 	retval = security_sem_alloc(sma);
 	if (retval) {
-		__sem_free(sma);
+		kvfree(sma);
 		return retval;
 	}
 
-- 
GitLab


From 62b49c9908bcee88347efe8b4ed1b4f53c60ee66 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Wed, 12 Jul 2017 14:35:34 -0700
Subject: [PATCH 1355/1958] ipc/util.h: update documentation for ipc_getref()
 and ipc_putref()

Now that ipc_rcu_alloc() and ipc_rcu_free() are removed, document when
it is valid to use ipc_getref() and ipc_putref().

Link: http://lkml.kernel.org/r/20170525185107.12869-21-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/util.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ipc/util.h b/ipc/util.h
index 77336c2bdb9c9..c692010e6f0a3 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -112,6 +112,9 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
  * Objects are reference counted, they start with reference count 1.
  * getref increases the refcount, the putref call that reduces the recount
  * to 0 schedules the rcu destruction. Caller must guarantee locking.
+ *
+ * refcount is initialized by ipc_addid(), before that point call_rcu()
+ * must be used.
  */
 int ipc_rcu_getref(struct kern_ipc_perm *ptr);
 void ipc_rcu_putref(struct kern_ipc_perm *ptr,
-- 
GitLab


From eacd86ca3b036e55e172b7279f101cef4a6ff3a4 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:35:37 -0700
Subject: [PATCH 1356/1958] net/netfilter/x_tables.c: use kvmalloc() in
 xt_alloc_table_info()

xt_alloc_table_info() basically opencodes kvmalloc() so use the library
function instead.

Link: http://lkml.kernel.org/r/20170531155145.17111-4-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/netfilter/x_tables.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 1770c1d9b37fc..e1648238a9c99 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1003,14 +1003,10 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
 	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
 		return NULL;
 
-	if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
-		info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
-	if (!info) {
-		info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
-				 PAGE_KERNEL);
-		if (!info)
-			return NULL;
-	}
+	info = kvmalloc(sz, GFP_KERNEL);
+	if (!info)
+		return NULL;
+
 	memset(info, 0, sizeof(*info));
 	info->size = size;
 	return info;
-- 
GitLab


From 24bb44612c5f93a1dff1f7e71b7b7b109a988791 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 12 Jul 2017 14:35:40 -0700
Subject: [PATCH 1357/1958] kernel/watchdog: remove unused declaration

Patch series "Improve watchdog config for arch watchdogs", v4.

A series to make the hardlockup watchdog more easily replaceable by arch
code.  The last patch provides some justification for why we want to do
this (existing sparc watchdog is another that could benefit).

This patch (of 5):

Remove unused declaration.

Link: http://lkml.kernel.org/r/20170616065715.18390-2-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nmi.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index aa3cd08782703..5e2e57536d98c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -12,9 +12,6 @@ extern void touch_softlockup_watchdog_sched(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-				  void __user *buffer,
-				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern unsigned int  hardlockup_panic;
 void lockup_detector_init(void);
-- 
GitLab


From f2e0cff85ed111a3cf24d894c3fa11697dfae628 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 12 Jul 2017 14:35:43 -0700
Subject: [PATCH 1358/1958] kernel/watchdog: introduce
 arch_touch_nmi_watchdog()

For architectures that define HAVE_NMI_WATCHDOG, instead of having them
provide the complete touch_nmi_watchdog() function, just have them
provide arch_touch_nmi_watchdog().

This gives the generic code more flexibility in implementing this
function, and arch implementations don't miss out on touching the
softlockup watchdog or other generic details.

Link: http://lkml.kernel.org/r/20170616065715.18390-3-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/blackfin/include/asm/nmi.h            |  2 ++
 arch/blackfin/kernel/nmi.c                 |  2 +-
 arch/mn10300/include/asm/nmi.h             |  2 ++
 arch/mn10300/kernel/mn10300-watchdog-low.S |  8 +++----
 arch/mn10300/kernel/mn10300-watchdog.c     |  2 +-
 arch/sparc/include/asm/nmi.h               |  1 +
 arch/sparc/kernel/nmi.c                    |  6 ++---
 include/linux/nmi.h                        | 27 +++++++++++++---------
 kernel/watchdog_hld.c                      |  5 ++--
 9 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/arch/blackfin/include/asm/nmi.h b/arch/blackfin/include/asm/nmi.h
index b9caac4fcfd85..107d23705f465 100644
--- a/arch/blackfin/include/asm/nmi.h
+++ b/arch/blackfin/include/asm/nmi.h
@@ -9,4 +9,6 @@
 
 #include <linux/nmi.h>
 
+extern void arch_touch_nmi_watchdog(void);
+
 #endif
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 633c37083e877..1e714329fe8a1 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c
@@ -190,7 +190,7 @@ static int __init init_nmi_wdt(void)
 }
 device_initcall(init_nmi_wdt);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	atomic_set(&nmi_touched[smp_processor_id()], 1);
 }
diff --git a/arch/mn10300/include/asm/nmi.h b/arch/mn10300/include/asm/nmi.h
index f3671cbbc1177..b05627597b1ba 100644
--- a/arch/mn10300/include/asm/nmi.h
+++ b/arch/mn10300/include/asm/nmi.h
@@ -11,4 +11,6 @@
 #ifndef _ASM_NMI_H
 #define _ASM_NMI_H
 
+extern void arch_touch_nmi_watchdog(void);
+
 #endif /* _ASM_NMI_H */
diff --git a/arch/mn10300/kernel/mn10300-watchdog-low.S b/arch/mn10300/kernel/mn10300-watchdog-low.S
index f2f5c9cfaabd8..34f8773de7d03 100644
--- a/arch/mn10300/kernel/mn10300-watchdog-low.S
+++ b/arch/mn10300/kernel/mn10300-watchdog-low.S
@@ -50,9 +50,9 @@ watchdog_handler:
 #   we can't inline it)
 #
 ###############################################################################
-	.globl	touch_nmi_watchdog
-	.type	touch_nmi_watchdog,@function
-touch_nmi_watchdog:
+	.globl	arch_touch_nmi_watchdog
+	.type	arch_touch_nmi_watchdog,@function
+arch_touch_nmi_watchdog:
 	clr	d0
 	clr	d1
 	mov	watchdog_alert_counter, a0
@@ -63,4 +63,4 @@ touch_nmi_watchdog:
 	lne
 	ret	[],0
 
-	.size	touch_nmi_watchdog,.-touch_nmi_watchdog
+	.size	arch_touch_nmi_watchdog,.-arch_touch_nmi_watchdog
diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c
index a2d8e6938d671..0d5641beadf5c 100644
--- a/arch/mn10300/kernel/mn10300-watchdog.c
+++ b/arch/mn10300/kernel/mn10300-watchdog.c
@@ -31,7 +31,7 @@ static unsigned int watchdog;
 static unsigned int watchdog_hz = 1;
 unsigned int watchdog_alert_counter[NR_CPUS];
 
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 /*
  * the best way to detect whether a CPU has a 'hard lockup' problem
diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h
index 26ad2b2607c66..284eac3ffaf2c 100644
--- a/arch/sparc/include/asm/nmi.h
+++ b/arch/sparc/include/asm/nmi.h
@@ -7,6 +7,7 @@ void nmi_adjust_hz(unsigned int new_hz);
 
 extern atomic_t nmi_active;
 
+void arch_touch_nmi_watchdog(void);
 void start_nmi_watchdog(void *unused);
 void stop_nmi_watchdog(void *unused);
 
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 95e73c63c99d4..048ad783ea3fe 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -51,7 +51,7 @@ static DEFINE_PER_CPU(unsigned int, last_irq_sum);
 static DEFINE_PER_CPU(long, alert_counter);
 static DEFINE_PER_CPU(int, nmi_touch);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	if (atomic_read(&nmi_active)) {
 		int cpu;
@@ -61,10 +61,8 @@ void touch_nmi_watchdog(void)
 				per_cpu(nmi_touch, cpu) = 1;
 		}
 	}
-
-	touch_softlockup_watchdog();
 }
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
 {
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 5e2e57536d98c..bd387ef8bccd2 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -6,6 +6,9 @@
 
 #include <linux/sched.h>
 #include <asm/irq.h>
+#if defined(CONFIG_HAVE_NMI_WATCHDOG)
+#include <asm/nmi.h>
+#endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 extern void touch_softlockup_watchdog_sched(void);
@@ -58,6 +61,18 @@ static inline void reset_hung_task_detector(void)
 #define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
 #define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR)
+extern void hardlockup_detector_disable(void);
+#else
+static inline void hardlockup_detector_disable(void) {}
+#endif
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+extern void arch_touch_nmi_watchdog(void);
+#else
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
  * 
@@ -65,21 +80,11 @@ static inline void reset_hung_task_detector(void)
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-#include <asm/nmi.h>
-extern void touch_nmi_watchdog(void);
-#else
 static inline void touch_nmi_watchdog(void)
 {
+	arch_touch_nmi_watchdog();
 	touch_softlockup_watchdog();
 }
-#endif
-
-#if defined(CONFIG_HARDLOCKUP_DETECTOR)
-extern void hardlockup_detector_disable(void);
-#else
-static inline void hardlockup_detector_disable(void) {}
-#endif
 
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 54a427d1f3445..90d688df6ce1c 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -56,7 +56,7 @@ static int __init hardlockup_panic_setup(char *str)
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	/*
 	 * Using __raw here because some code paths have
@@ -66,9 +66,8 @@ void touch_nmi_watchdog(void)
 	 * going off.
 	 */
 	raw_cpu_write(watchdog_nmi_touch, true);
-	touch_softlockup_watchdog();
 }
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 static struct perf_event_attr wd_hw_attr = {
 	.type		= PERF_TYPE_HARDWARE,
-- 
GitLab


From 05a4a95279311c3a4633b4277a5d21cfd616c6c7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 12 Jul 2017 14:35:46 -0700
Subject: [PATCH 1359/1958] kernel/watchdog: split up config options

Split SOFTLOCKUP_DETECTOR from LOCKUP_DETECTOR, and split
HARDLOCKUP_DETECTOR_PERF from HARDLOCKUP_DETECTOR.

LOCKUP_DETECTOR implies the general boot, sysctl, and programming
interfaces for the lockup detectors.

An architecture that wants to use a hard lockup detector must define
HAVE_HARDLOCKUP_DETECTOR_PERF or HAVE_HARDLOCKUP_DETECTOR_ARCH.

Alternatively an arch can define HAVE_NMI_WATCHDOG, which provides the
minimum arch_touch_nmi_watchdog, and it otherwise does its own thing and
does not implement the LOCKUP_DETECTOR interfaces.

sparc is unusual in that it has started to implement some of the
interfaces, but not fully yet.  It should probably be converted to a full
HAVE_HARDLOCKUP_DETECTOR_ARCH.

[npiggin@gmail.com: fix]
  Link: http://lkml.kernel.org/r/20170617223522.66c0ad88@roar.ozlabs.ibm.com
Link: http://lkml.kernel.org/r/20170616065715.18390-4-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig                   |  25 +++-
 arch/powerpc/Kconfig           |   1 +
 arch/powerpc/kernel/setup_64.c |   2 +-
 arch/x86/Kconfig               |   1 +
 arch/x86/kernel/apic/hw_nmi.c  |   2 +-
 include/linux/nmi.h            |  29 ++--
 kernel/Makefile                |   2 +-
 kernel/sysctl.c                |  31 +++--
 kernel/watchdog.c              | 243 +++++++++++++++++++++------------
 kernel/watchdog_hld.c          |  32 -----
 lib/Kconfig.debug              |  45 +++---
 11 files changed, 251 insertions(+), 162 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index cae0958a2298d..fb9bd7d36b05a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -198,9 +198,6 @@ config HAVE_KPROBES_ON_FTRACE
 config HAVE_NMI
 	bool
 
-config HAVE_NMI_WATCHDOG
-	depends on HAVE_NMI
-	bool
 #
 # An arch should select this if it provides all these things:
 #
@@ -288,6 +285,28 @@ config HAVE_PERF_EVENTS_NMI
 	  subsystem.  Also has support for calculating CPU cycle events
 	  to determine how many clock cycles in a given period.
 
+config HAVE_HARDLOCKUP_DETECTOR_PERF
+	bool
+	depends on HAVE_PERF_EVENTS_NMI
+	help
+	  The arch chooses to use the generic perf-NMI-based hardlockup
+	  detector. Must define HAVE_PERF_EVENTS_NMI.
+
+config HAVE_NMI_WATCHDOG
+	depends on HAVE_NMI
+	bool
+	help
+	  The arch provides a low level NMI watchdog. It provides
+	  asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
+
+config HAVE_HARDLOCKUP_DETECTOR_ARCH
+	bool
+	select HAVE_NMI_WATCHDOG
+	help
+	  The arch chooses to provide its own hardlockup detector, which is
+	  a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
+	  interfaces and parameters provided by hardlockup detector subsystem.
+
 config HAVE_PERF_REGS
 	bool
 	help
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7177a3f4f4181..63ed758e1d202 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -197,6 +197,7 @@ config PPC
 	select HAVE_OPTPROBES			if PPC64
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE		if SMP
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4640f6d64f8b4..074a075a9cdb3 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -752,7 +752,7 @@ struct ppc_pci_io ppc_pci_io;
 EXPORT_SYMBOL(ppc_pci_io);
 #endif
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
 	return ppc_proc_freq * watchdog_thresh;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 94a18681353d5..3d2b8ce54e007 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -162,6 +162,7 @@ config X86
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c73c9fb281e18..d6f387780849a 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -19,7 +19,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
 	return (u64)(cpu_khz) * 1000 * watchdog_thresh;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index bd387ef8bccd2..8aa01fd859fb8 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -11,13 +11,21 @@
 #endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
+void lockup_detector_init(void);
+#else
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 extern void touch_softlockup_watchdog_sched(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern unsigned int  softlockup_panic;
-extern unsigned int  hardlockup_panic;
-void lockup_detector_init(void);
+extern int soft_watchdog_enabled;
+extern atomic_t watchdog_park_in_progress;
 #else
 static inline void touch_softlockup_watchdog_sched(void)
 {
@@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void)
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
-static inline void lockup_detector_init(void)
-{
-}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
@@ -63,15 +68,18 @@ static inline void reset_hung_task_detector(void)
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR)
 extern void hardlockup_detector_disable(void);
+extern unsigned int hardlockup_panic;
 #else
 static inline void hardlockup_detector_disable(void) {}
 #endif
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 extern void arch_touch_nmi_watchdog(void);
 #else
+#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
+#endif
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
@@ -141,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
 }
 #endif
 
-#ifdef CONFIG_LOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
+#endif
+
+#ifdef CONFIG_LOCKUP_DETECTOR
 extern int nmi_watchdog_enabled;
-extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
+extern struct cpumask watchdog_cpumask;
 extern unsigned long *watchdog_cpumask_bits;
-extern atomic_t watchdog_park_in_progress;
+extern int __read_mostly watchdog_suspended;
 #ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
diff --git a/kernel/Makefile b/kernel/Makefile
index 72aa080f91f04..4cb8e8b23c6ec 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,7 +82,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index df9f2a3678821..6648fbbb8157f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -900,6 +900,14 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &zero,
 #endif
 	},
+	{
+		.procname	= "watchdog_cpumask",
+		.data		= &watchdog_cpumask_bits,
+		.maxlen		= NR_CPUS,
+		.mode		= 0644,
+		.proc_handler	= proc_watchdog_cpumask,
+	},
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 	{
 		.procname       = "soft_watchdog",
 		.data           = &soft_watchdog_enabled,
@@ -909,13 +917,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-	{
-		.procname	= "watchdog_cpumask",
-		.data		= &watchdog_cpumask_bits,
-		.maxlen		= NR_CPUS,
-		.mode		= 0644,
-		.proc_handler	= proc_watchdog_cpumask,
-	},
 	{
 		.procname	= "softlockup_panic",
 		.data		= &softlockup_panic,
@@ -925,27 +926,29 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_SMP
 	{
-		.procname	= "hardlockup_panic",
-		.data		= &hardlockup_panic,
+		.procname	= "softlockup_all_cpu_backtrace",
+		.data		= &sysctl_softlockup_all_cpu_backtrace,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#endif /* CONFIG_SMP */
 #endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 	{
-		.procname	= "softlockup_all_cpu_backtrace",
-		.data		= &sysctl_softlockup_all_cpu_backtrace,
+		.procname	= "hardlockup_panic",
+		.data		= &hardlockup_panic,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#ifdef CONFIG_SMP
 	{
 		.procname	= "hardlockup_all_cpu_backtrace",
 		.data		= &sysctl_hardlockup_all_cpu_backtrace,
@@ -957,6 +960,8 @@ static struct ctl_table kern_table[] = {
 	},
 #endif /* CONFIG_SMP */
 #endif
+#endif
+
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
 		.procname       = "unknown_nmi_panic",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 03e0b69bb5bfd..1fba9c3d66dcc 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,15 +29,58 @@
 #include <linux/kvm_para.h>
 #include <linux/kthread.h>
 
+/* Watchdog configuration */
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+int __read_mostly nmi_watchdog_enabled;
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
+						NMI_WATCHDOG_ENABLED;
 #else
 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
 #endif
-int __read_mostly nmi_watchdog_enabled;
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+	if (!strncmp(str, "panic", 5))
+		hardlockup_panic = 1;
+	else if (!strncmp(str, "nopanic", 7))
+		hardlockup_panic = 0;
+	else if (!strncmp(str, "0", 1))
+		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+	else if (!strncmp(str, "1", 1))
+		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+	return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 int __read_mostly soft_watchdog_enabled;
+#endif
+
 int __read_mostly watchdog_user_enabled;
 int __read_mostly watchdog_thresh = 10;
 
@@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10;
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
 #endif
-static struct cpumask watchdog_cpumask __read_mostly;
+struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
-	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
-
-atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
-
 /*
  * The 'watchdog_running' variable is set to 1 when the watchdog threads
  * are registered/started and is set to 0 when the watchdog threads are
@@ -72,7 +109,27 @@ static int __read_mostly watchdog_running;
  * of 'watchdog_running' cannot change while the watchdog is deactivated
  * temporarily (see related code in 'proc' handlers).
  */
-static int __read_mostly watchdog_suspended;
+int __read_mostly watchdog_suspended;
+
+/*
+ * These functions can be overridden if an architecture implements its
+ * own hardlockup detector.
+ */
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+	return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
 
 static u64 __read_mostly sample_period;
 
@@ -120,6 +177,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
 	return 1;
 }
 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 static int __init hardlockup_all_cpu_backtrace_setup(char *str)
 {
 	sysctl_hardlockup_all_cpu_backtrace =
@@ -128,6 +186,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str)
 }
 __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
 #endif
+#endif
 
 /*
  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -213,18 +272,6 @@ void touch_softlockup_watchdog_sync(void)
 	__this_cpu_write(watchdog_touch_ts, 0);
 }
 
-/* watchdog detector functions */
-bool is_hardlockup(void)
-{
-	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
-	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-		return true;
-
-	__this_cpu_write(hrtimer_interrupts_saved, hrint);
-	return false;
-}
-
 static int is_softlockup(unsigned long touch_ts)
 {
 	unsigned long now = get_timestamp();
@@ -237,21 +284,21 @@ static int is_softlockup(unsigned long touch_ts)
 	return 0;
 }
 
-static void watchdog_interrupt_count(void)
+/* watchdog detector functions */
+bool is_hardlockup(void)
 {
-	__this_cpu_inc(hrtimer_interrupts);
-}
+	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
-/*
- * These two functions are mostly architecture specific
- * defining them as weak here.
- */
-int __weak watchdog_nmi_enable(unsigned int cpu)
-{
-	return 0;
+	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+		return true;
+
+	__this_cpu_write(hrtimer_interrupts_saved, hrint);
+	return false;
 }
-void __weak watchdog_nmi_disable(unsigned int cpu)
+
+static void watchdog_interrupt_count(void)
 {
+	__this_cpu_inc(hrtimer_interrupts);
 }
 
 static int watchdog_enable_all_cpus(void);
@@ -502,57 +549,6 @@ static void watchdog_unpark_threads(void)
 		kthread_unpark(per_cpu(softlockup_watchdog, cpu));
 }
 
-/*
- * Suspend the hard and soft lockup detector by parking the watchdog threads.
- */
-int lockup_detector_suspend(void)
-{
-	int ret = 0;
-
-	get_online_cpus();
-	mutex_lock(&watchdog_proc_mutex);
-	/*
-	 * Multiple suspend requests can be active in parallel (counted by
-	 * the 'watchdog_suspended' variable). If the watchdog threads are
-	 * running, the first caller takes care that they will be parked.
-	 * The state of 'watchdog_running' cannot change while a suspend
-	 * request is active (see related code in 'proc' handlers).
-	 */
-	if (watchdog_running && !watchdog_suspended)
-		ret = watchdog_park_threads();
-
-	if (ret == 0)
-		watchdog_suspended++;
-	else {
-		watchdog_disable_all_cpus();
-		pr_err("Failed to suspend lockup detectors, disabled\n");
-		watchdog_enabled = 0;
-	}
-
-	mutex_unlock(&watchdog_proc_mutex);
-
-	return ret;
-}
-
-/*
- * Resume the hard and soft lockup detector by unparking the watchdog threads.
- */
-void lockup_detector_resume(void)
-{
-	mutex_lock(&watchdog_proc_mutex);
-
-	watchdog_suspended--;
-	/*
-	 * The watchdog threads are unparked if they were previously running
-	 * and if there is no more active suspend request.
-	 */
-	if (watchdog_running && !watchdog_suspended)
-		watchdog_unpark_threads();
-
-	mutex_unlock(&watchdog_proc_mutex);
-	put_online_cpus();
-}
-
 static int update_watchdog_all_cpus(void)
 {
 	int ret;
@@ -604,6 +600,81 @@ static void watchdog_disable_all_cpus(void)
 	}
 }
 
+#else /* SOFTLOCKUP */
+static int watchdog_park_threads(void)
+{
+	return 0;
+}
+
+static void watchdog_unpark_threads(void)
+{
+}
+
+static int watchdog_enable_all_cpus(void)
+{
+	return 0;
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+}
+
+static void set_sample_period(void)
+{
+}
+#endif /* SOFTLOCKUP */
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+	int ret = 0;
+
+	get_online_cpus();
+	mutex_lock(&watchdog_proc_mutex);
+	/*
+	 * Multiple suspend requests can be active in parallel (counted by
+	 * the 'watchdog_suspended' variable). If the watchdog threads are
+	 * running, the first caller takes care that they will be parked.
+	 * The state of 'watchdog_running' cannot change while a suspend
+	 * request is active (see related code in 'proc' handlers).
+	 */
+	if (watchdog_running && !watchdog_suspended)
+		ret = watchdog_park_threads();
+
+	if (ret == 0)
+		watchdog_suspended++;
+	else {
+		watchdog_disable_all_cpus();
+		pr_err("Failed to suspend lockup detectors, disabled\n");
+		watchdog_enabled = 0;
+	}
+
+	mutex_unlock(&watchdog_proc_mutex);
+
+	return ret;
+}
+
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
+{
+	mutex_lock(&watchdog_proc_mutex);
+
+	watchdog_suspended--;
+	/*
+	 * The watchdog threads are unparked if they were previously running
+	 * and if there is no more active suspend request.
+	 */
+	if (watchdog_running && !watchdog_suspended)
+		watchdog_unpark_threads();
+
+	mutex_unlock(&watchdog_proc_mutex);
+	put_online_cpus();
+}
+
 #ifdef CONFIG_SYSCTL
 
 /*
@@ -810,9 +881,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
 			 * a temporary cpumask, so we are likely not in a
 			 * position to do much else to make things better.
 			 */
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 			if (smpboot_update_cpumask_percpu_thread(
 				    &watchdog_threads, &watchdog_cpumask) != 0)
 				pr_err("cpumask update failed\n");
+#endif
 		}
 	}
 out:
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 90d688df6ce1c..295a0d84934cb 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -22,39 +22,7 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-unsigned int __read_mostly hardlockup_panic =
-			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
-	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
-	if (!strncmp(str, "panic", 5))
-		hardlockup_panic = 1;
-	else if (!strncmp(str, "nopanic", 7))
-		hardlockup_panic = 0;
-	else if (!strncmp(str, "0", 1))
-		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-	else if (!strncmp(str, "1", 1))
-		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
-	return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
 
 void arch_touch_nmi_watchdog(void)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f28f4252e54a9..b0d01c6d4e03a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -778,34 +778,45 @@ config DEBUG_SHIRQ
 menu "Debug Lockups and Hangs"
 
 config LOCKUP_DETECTOR
-	bool "Detect Hard and Soft Lockups"
+	bool
+
+config SOFTLOCKUP_DETECTOR
+	bool "Detect Soft Lockups"
 	depends on DEBUG_KERNEL && !S390
+	select LOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to act as a watchdog to detect
-	  hard and soft lockups.
+	  soft lockups.
 
 	  Softlockups are bugs that cause the kernel to loop in kernel
 	  mode for more than 20 seconds, without giving other tasks a
 	  chance to run.  The current stack trace is displayed upon
 	  detection and the system will stay locked up.
 
+config HARDLOCKUP_DETECTOR_PERF
+	bool
+	select SOFTLOCKUP_DETECTOR
+
+#
+# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
+# lockup detector rather than the perf based detector.
+#
+config HARDLOCKUP_DETECTOR
+	bool "Detect Hard Lockups"
+	depends on DEBUG_KERNEL && !S390
+	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+	select LOCKUP_DETECTOR
+	select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+	select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
+	help
+	  Say Y here to enable the kernel to act as a watchdog to detect
+	  hard lockups.
+
 	  Hardlockups are bugs that cause the CPU to loop in kernel mode
 	  for more than 10 seconds, without letting other interrupts have a
 	  chance to run.  The current stack trace is displayed upon detection
 	  and the system will stay locked up.
 
-	  The overhead should be minimal.  A periodic hrtimer runs to
-	  generate interrupts and kick the watchdog task every 4 seconds.
-	  An NMI is generated every 10 seconds or so to check for hardlockups.
-
-	  The frequency of hrtimer and NMI events and the soft and hard lockup
-	  thresholds can be controlled through the sysctl watchdog_thresh.
-
-config HARDLOCKUP_DETECTOR
-	def_bool y
-	depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
-	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
-
 config BOOTPARAM_HARDLOCKUP_PANIC
 	bool "Panic (Reboot) On Hard Lockups"
 	depends on HARDLOCKUP_DETECTOR
@@ -826,7 +837,7 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
-	depends on LOCKUP_DETECTOR
+	depends on SOFTLOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to panic on "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
@@ -843,7 +854,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 
 config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 	int
-	depends on LOCKUP_DETECTOR
+	depends on SOFTLOCKUP_DETECTOR
 	range 0 1
 	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
 	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -851,7 +862,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 config DETECT_HUNG_TASK
 	bool "Detect Hung Tasks"
 	depends on DEBUG_KERNEL
-	default LOCKUP_DETECTOR
+	default SOFTLOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to detect "hung tasks",
 	  which are bugs that cause the task to be stuck in
-- 
GitLab


From a10a842ff81a7e3810817b3b04e4c432b6191e21 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 12 Jul 2017 14:35:49 -0700
Subject: [PATCH 1360/1958] kernel/watchdog: provide watchdog_nmi_reconfigure()
 for arch watchdogs

After reconfiguring watchdog sysctls etc., architecture specific
watchdogs may not get all their parameters updated.

watchdog_nmi_reconfigure() can be implemented to pull the new values in
and set the arch NMI watchdog.

[npiggin@gmail.com: add code comments]
  Link: http://lkml.kernel.org/r/20170617125933.774d3858@roar.ozlabs.ibm.com
[arnd@arndb.de: hide unused function]
  Link: http://lkml.kernel.org/r/20170620204854.966601-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20170616065715.18390-5-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/watchdog.c | 48 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1fba9c3d66dcc..cabe3e9fb620f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -114,6 +114,10 @@ int __read_mostly watchdog_suspended;
 /*
  * These functions can be overridden if an architecture implements its
  * own hardlockup detector.
+ *
+ * watchdog_nmi_enable/disable can be implemented to start and stop when
+ * softlockup watchdog threads start and stop. The arch must select the
+ * SOFTLOCKUP_DETECTOR Kconfig.
  */
 int __weak watchdog_nmi_enable(unsigned int cpu)
 {
@@ -123,6 +127,22 @@ void __weak watchdog_nmi_disable(unsigned int cpu)
 {
 }
 
+/*
+ * watchdog_nmi_reconfigure can be implemented to be notified after any
+ * watchdog configuration change. The arch hardlockup watchdog should
+ * respond to the following variables:
+ * - nmi_watchdog_enabled
+ * - watchdog_thresh
+ * - watchdog_cpumask
+ * - sysctl_hardlockup_all_cpu_backtrace
+ * - hardlockup_panic
+ * - watchdog_suspended
+ */
+void __weak watchdog_nmi_reconfigure(void)
+{
+}
+
+
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
 
 /* Helper for online, unparked cpus. */
@@ -600,6 +620,14 @@ static void watchdog_disable_all_cpus(void)
 	}
 }
 
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+	return smpboot_update_cpumask_percpu_thread(
+		    &watchdog_threads, &watchdog_cpumask);
+}
+#endif
+
 #else /* SOFTLOCKUP */
 static int watchdog_park_threads(void)
 {
@@ -619,6 +647,13 @@ static void watchdog_disable_all_cpus(void)
 {
 }
 
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+	return 0;
+}
+#endif
+
 static void set_sample_period(void)
 {
 }
@@ -651,6 +686,8 @@ int lockup_detector_suspend(void)
 		watchdog_enabled = 0;
 	}
 
+	watchdog_nmi_reconfigure();
+
 	mutex_unlock(&watchdog_proc_mutex);
 
 	return ret;
@@ -671,6 +708,8 @@ void lockup_detector_resume(void)
 	if (watchdog_running && !watchdog_suspended)
 		watchdog_unpark_threads();
 
+	watchdog_nmi_reconfigure();
+
 	mutex_unlock(&watchdog_proc_mutex);
 	put_online_cpus();
 }
@@ -696,6 +735,8 @@ static int proc_watchdog_update(void)
 	else
 		watchdog_disable_all_cpus();
 
+	watchdog_nmi_reconfigure();
+
 	return err;
 
 }
@@ -881,12 +922,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
 			 * a temporary cpumask, so we are likely not in a
 			 * position to do much else to make things better.
 			 */
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-			if (smpboot_update_cpumask_percpu_thread(
-				    &watchdog_threads, &watchdog_cpumask) != 0)
+			if (watchdog_update_cpus() != 0)
 				pr_err("cpumask update failed\n");
-#endif
 		}
+
+		watchdog_nmi_reconfigure();
 	}
 out:
 	mutex_unlock(&watchdog_proc_mutex);
-- 
GitLab


From 2104180a53698df5aec35aed5f840a26ade0551d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 12 Jul 2017 14:35:52 -0700
Subject: [PATCH 1361/1958] powerpc/64s: implement arch-specific hardlockup
 watchdog

Implement an arch-speicfic watchdog rather than use the perf-based
hardlockup detector.

The new watchdog takes the soft-NMI directly, rather than going through
perf.  Perf interrupts are to be made maskable in future, so that would
prevent the perf detector from working in those regions.

Additionally, implement a SMP based detector where all CPUs watch one
another by pinging a shared cpumask.  This is because powerpc Book3S
does not have a true periodic local NMI, but some platforms do implement
a true NMI IPI.

If a CPU is stuck with interrupts hard disabled, the soft-NMI watchdog
does not work, but the SMP watchdog will.  Even on platforms without a
true NMI IPI to get a good trace from the stuck CPU, other CPUs will
notice the lockup sufficiently to report it and panic.

[npiggin@gmail.com: honor watchdog disable at boot/hotplug]
  Link: http://lkml.kernel.org/r/20170621001346.5bb337c9@roar.ozlabs.ibm.com
[npiggin@gmail.com: fix false positive warning at CPU unplug]
  Link: http://lkml.kernel.org/r/20170630080740.20766-1-npiggin@gmail.com
[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/20170616065715.18390-6-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Tested-by: Babu Moger <babu.moger@oracle.com>	[sparc]
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/Kconfig                 |   7 +-
 arch/powerpc/include/asm/nmi.h       |  11 +
 arch/powerpc/include/asm/smp.h       |   2 +
 arch/powerpc/kernel/Makefile         |   1 +
 arch/powerpc/kernel/exceptions-64s.S |  30 ++-
 arch/powerpc/kernel/kvm.c            |   7 +
 arch/powerpc/kernel/setup_64.c       |  19 --
 arch/powerpc/kernel/smp.c            |  20 +-
 arch/powerpc/kernel/watchdog.c       | 386 +++++++++++++++++++++++++++
 9 files changed, 458 insertions(+), 25 deletions(-)
 create mode 100644 arch/powerpc/kernel/watchdog.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 63ed758e1d202..fce2f4f208917 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -82,7 +82,7 @@ config NR_IRQS
 
 config NMI_IPI
 	bool
-	depends on SMP && (DEBUGGER || KEXEC_CORE)
+	depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
 	default y
 
 config STACKTRACE_SUPPORT
@@ -192,12 +192,13 @@ config PPC
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
-	select HAVE_NMI				if PERF_EVENTS
+	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
+	select HAVE_HARDLOCKUP_DETECTOR_ARCH	if (PPC64 && PPC_BOOK3S)
 	select HAVE_OPROFILE
 	select HAVE_OPTPROBES			if PPC64
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
-	select HAVE_HARDLOCKUP_DETECTOR_PERF	if HAVE_PERF_EVENTS_NMI
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE		if SMP
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index ff1ccb375e606..6f8e79cd35d87 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -1,4 +1,15 @@
 #ifndef _ASM_NMI_H
 #define _ASM_NMI_H
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+extern void arch_touch_nmi_watchdog(void);
+
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+					   bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
+#else
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+
 #endif /* _ASM_NMI_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ebddb2111d870..8ea98504f9007 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -55,6 +55,8 @@ struct smp_ops_t {
 	int   (*cpu_bootable)(unsigned int nr);
 };
 
+extern void smp_flush_nmi_ipi(u64 delay_us);
+extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
 extern void smp_send_debugger_break(void);
 extern void start_secondary_resume(void);
 extern void smp_generic_give_timebase(void);
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0845eebc5af38..4aa7c147e4472 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
 obj-$(CONFIG_VDSO32)		+= vdso32/
+obj-$(CONFIG_HARDLOCKUP_DETECTOR)	+= watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c18a5fbb4bbf..e6d8354d79ef2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1314,6 +1314,31 @@ EXC_REAL_NONE(0x1800, 0x100)
 EXC_VIRT_NONE(0x5800, 0x100)
 #endif
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+
+#define MASKED_DEC_HANDLER_LABEL 3f
+
+#define MASKED_DEC_HANDLER(_H)				\
+3: /* soft-nmi */					\
+	std	r12,PACA_EXGEN+EX_R12(r13);		\
+	GET_SCRATCH0(r10);				\
+	std	r10,PACA_EXGEN+EX_R13(r13);		\
+	EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
+
+EXC_COMMON_BEGIN(soft_nmi_common)
+	mr	r10,r1
+	ld	r1,PACAEMERGSP(r13)
+	ld	r1,PACA_NMI_EMERG_SP(r13)
+	subi	r1,r1,INT_FRAME_SIZE
+	EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
+			system_reset, soft_nmi_interrupt,
+			ADD_NVGPRS;ADD_RECONCILE)
+	b	ret_from_except
+
+#else
+#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
+#define MASKED_DEC_HANDLER(_H)
+#endif
 
 /*
  * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -1336,7 +1361,7 @@ masked_##_H##interrupt:					\
 	lis	r10,0x7fff;				\
 	ori	r10,r10,0xffff;				\
 	mtspr	SPRN_DEC,r10;				\
-	b	2f;					\
+	b	MASKED_DEC_HANDLER_LABEL;		\
 1:	cmpwi	r10,PACA_IRQ_DBELL;			\
 	beq	2f;					\
 	cmpwi	r10,PACA_IRQ_HMI;			\
@@ -1351,7 +1376,8 @@ masked_##_H##interrupt:					\
 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
 	GET_SCRATCH0(r13);				\
 	##_H##rfid;					\
-	b	.
+	b	.;					\
+	MASKED_DEC_HANDLER(_H)
 
 /*
  * Real mode exceptions actually use this too, but alternate
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 9ad37f827a975..1086ea37c8324 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -25,6 +25,7 @@
 #include <linux/kvm_para.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/nmi.h> /* hardlockup_detector_disable() */
 
 #include <asm/reg.h>
 #include <asm/sections.h>
@@ -718,6 +719,12 @@ static __init void kvm_free_tmp(void)
 
 static int __init kvm_guest_init(void)
 {
+	/*
+	 * The hardlockup detector is likely to get false positives in
+	 * KVM guests, so disable it by default.
+	 */
+	hardlockup_detector_disable();
+
 	if (!kvm_para_available())
 		goto free_tmp;
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 074a075a9cdb3..af23d4b576ec5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -751,22 +751,3 @@ unsigned long memory_block_size_bytes(void)
 struct ppc_pci_io ppc_pci_io;
 EXPORT_SYMBOL(ppc_pci_io);
 #endif
-
-#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
-u64 hw_nmi_get_sample_period(int watchdog_thresh)
-{
-	return ppc_proc_freq * watchdog_thresh;
-}
-
-/*
- * The hardlockup detector breaks PMU event based branches and is likely
- * to get false positives in KVM guests, so disable it by default.
- */
-static int __init disable_hardlockup_detector(void)
-{
-	hardlockup_detector_disable();
-
-	return 0;
-}
-early_initcall(disable_hardlockup_detector);
-#endif
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c6b8bace17669..997c88d54acf2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -435,13 +435,31 @@ static void do_smp_send_nmi_ipi(int cpu)
 	}
 }
 
+void smp_flush_nmi_ipi(u64 delay_us)
+{
+	unsigned long flags;
+
+	nmi_ipi_lock_start(&flags);
+	while (nmi_ipi_busy_count) {
+		nmi_ipi_unlock_end(&flags);
+		udelay(1);
+		if (delay_us) {
+			delay_us--;
+			if (!delay_us)
+				return;
+		}
+		nmi_ipi_lock_start(&flags);
+	}
+	nmi_ipi_unlock_end(&flags);
+}
+
 /*
  * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
  * - fn is the target callback function.
  * - delay_us > 0 is the delay before giving up waiting for targets to
  *   enter the handler, == 0 specifies indefinite delay.
  */
-static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
 {
 	unsigned long flags;
 	int me = raw_smp_processor_id();
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
new file mode 100644
index 0000000000000..b67f8b03a32d0
--- /dev/null
+++ b/arch/powerpc/kernel/watchdog.c
@@ -0,0 +1,386 @@
+/*
+ * Watchdog support on powerpc systems.
+ *
+ * Copyright 2017, IBM Corporation.
+ *
+ * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/kdebug.h>
+#include <linux/sched/debug.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+
+#include <asm/paca.h>
+
+/*
+ * The watchdog has a simple timer that runs on each CPU, once per timer
+ * period. This is the heartbeat.
+ *
+ * Then there are checks to see if the heartbeat has not triggered on a CPU
+ * for the panic timeout period. Currently the watchdog only supports an
+ * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ *
+ * This is not an NMI watchdog, but Linux uses that name for a generic
+ * watchdog in some cases, so NMI gets used in some places.
+ */
+
+static cpumask_t wd_cpus_enabled __read_mostly;
+
+static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
+static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
+
+static u64 wd_timer_period_ms __read_mostly;  /* interval between heartbeat */
+
+static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(u64, wd_timer_tb);
+
+/*
+ * These are for the SMP checker. CPUs clear their pending bit in their
+ * heartbeat. If the bitmask becomes empty, the time is noted and the
+ * bitmask is refilled.
+ *
+ * All CPUs clear their bit in the pending mask every timer period.
+ * Once all have cleared, the time is noted and the bits are reset.
+ * If the time since all clear was greater than the panic timeout,
+ * we can panic with the list of stuck CPUs.
+ *
+ * This will work best with NMI IPIs for crash code so the stuck CPUs
+ * can be pulled out to get their backtraces.
+ */
+static unsigned long __wd_smp_lock;
+static cpumask_t wd_smp_cpus_pending;
+static cpumask_t wd_smp_cpus_stuck;
+static u64 wd_smp_last_reset_tb;
+
+static inline void wd_smp_lock(unsigned long *flags)
+{
+	/*
+	 * Avoid locking layers if possible.
+	 * This may be called from low level interrupt handlers at some
+	 * point in future.
+	 */
+	local_irq_save(*flags);
+	while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
+		cpu_relax();
+}
+
+static inline void wd_smp_unlock(unsigned long *flags)
+{
+	clear_bit_unlock(0, &__wd_smp_lock);
+	local_irq_restore(*flags);
+}
+
+static void wd_lockup_ipi(struct pt_regs *regs)
+{
+	pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+	print_modules();
+	print_irqtrace_events(current);
+	if (regs)
+		show_regs(regs);
+	else
+		dump_stack();
+
+	if (hardlockup_panic)
+		nmi_panic(regs, "Hard LOCKUP");
+}
+
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+	cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+	if (cpumask_empty(&wd_smp_cpus_pending)) {
+		wd_smp_last_reset_tb = tb;
+		cpumask_andnot(&wd_smp_cpus_pending,
+				&wd_cpus_enabled,
+				&wd_smp_cpus_stuck);
+	}
+}
+
+static void watchdog_smp_panic(int cpu, u64 tb)
+{
+	unsigned long flags;
+	int c;
+
+	wd_smp_lock(&flags);
+	/* Double check some things under lock */
+	if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+		goto out;
+	if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
+		goto out;
+	if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+		goto out;
+
+	pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
+			cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+
+	/*
+	 * Try to trigger the stuck CPUs.
+	 */
+	for_each_cpu(c, &wd_smp_cpus_pending) {
+		if (c == cpu)
+			continue;
+		smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+	}
+	smp_flush_nmi_ipi(1000000);
+
+	/* Take the stuck CPU out of the watch group */
+	for_each_cpu(c, &wd_smp_cpus_pending)
+		set_cpu_stuck(c, tb);
+
+out:
+	wd_smp_unlock(&flags);
+
+	printk_safe_flush();
+	/*
+	 * printk_safe_flush() seems to require another print
+	 * before anything actually goes out to console.
+	 */
+	if (sysctl_hardlockup_all_cpu_backtrace)
+		trigger_allbutself_cpu_backtrace();
+
+	if (hardlockup_panic)
+		nmi_panic(NULL, "Hard LOCKUP");
+}
+
+static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+{
+	if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
+		if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
+			unsigned long flags;
+
+			pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+			wd_smp_lock(&flags);
+			cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
+			wd_smp_unlock(&flags);
+		}
+		return;
+	}
+	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+	if (cpumask_empty(&wd_smp_cpus_pending)) {
+		unsigned long flags;
+
+		wd_smp_lock(&flags);
+		if (cpumask_empty(&wd_smp_cpus_pending)) {
+			wd_smp_last_reset_tb = tb;
+			cpumask_andnot(&wd_smp_cpus_pending,
+					&wd_cpus_enabled,
+					&wd_smp_cpus_stuck);
+		}
+		wd_smp_unlock(&flags);
+	}
+}
+
+static void watchdog_timer_interrupt(int cpu)
+{
+	u64 tb = get_tb();
+
+	per_cpu(wd_timer_tb, cpu) = tb;
+
+	wd_smp_clear_cpu_pending(cpu, tb);
+
+	if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
+		watchdog_smp_panic(cpu, tb);
+}
+
+void soft_nmi_interrupt(struct pt_regs *regs)
+{
+	unsigned long flags;
+	int cpu = raw_smp_processor_id();
+	u64 tb;
+
+	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+		return;
+
+	nmi_enter();
+	tb = get_tb();
+	if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+		per_cpu(wd_timer_tb, cpu) = tb;
+
+		wd_smp_lock(&flags);
+		if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
+			wd_smp_unlock(&flags);
+			goto out;
+		}
+		set_cpu_stuck(cpu, tb);
+
+		pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+		print_modules();
+		print_irqtrace_events(current);
+		if (regs)
+			show_regs(regs);
+		else
+			dump_stack();
+
+		wd_smp_unlock(&flags);
+
+		if (sysctl_hardlockup_all_cpu_backtrace)
+			trigger_allbutself_cpu_backtrace();
+
+		if (hardlockup_panic)
+			nmi_panic(regs, "Hard LOCKUP");
+	}
+	if (wd_panic_timeout_tb < 0x7fffffff)
+		mtspr(SPRN_DEC, wd_panic_timeout_tb);
+
+out:
+	nmi_exit();
+}
+
+static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
+{
+	t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
+	if (wd_timer_period_ms > 1000)
+		t->expires = __round_jiffies_up(t->expires, cpu);
+	add_timer_on(t, cpu);
+}
+
+static void wd_timer_fn(unsigned long data)
+{
+	struct timer_list *t = this_cpu_ptr(&wd_timer);
+	int cpu = smp_processor_id();
+
+	watchdog_timer_interrupt(cpu);
+
+	wd_timer_reset(cpu, t);
+}
+
+void arch_touch_nmi_watchdog(void)
+{
+	int cpu = smp_processor_id();
+
+	watchdog_timer_interrupt(cpu);
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
+static void start_watchdog_timer_on(unsigned int cpu)
+{
+	struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+	per_cpu(wd_timer_tb, cpu) = get_tb();
+
+	setup_pinned_timer(t, wd_timer_fn, 0);
+	wd_timer_reset(cpu, t);
+}
+
+static void stop_watchdog_timer_on(unsigned int cpu)
+{
+	struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+	del_timer_sync(t);
+}
+
+static int start_wd_on_cpu(unsigned int cpu)
+{
+	if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
+		WARN_ON(1);
+		return 0;
+	}
+
+	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+		return 0;
+
+	if (watchdog_suspended)
+		return 0;
+
+	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+		return 0;
+
+	cpumask_set_cpu(cpu, &wd_cpus_enabled);
+	if (cpumask_weight(&wd_cpus_enabled) == 1) {
+		cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
+		wd_smp_last_reset_tb = get_tb();
+	}
+	smp_wmb();
+	start_watchdog_timer_on(cpu);
+
+	return 0;
+}
+
+static int stop_wd_on_cpu(unsigned int cpu)
+{
+	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+		return 0; /* Can happen in CPU unplug case */
+
+	stop_watchdog_timer_on(cpu);
+
+	cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+	wd_smp_clear_cpu_pending(cpu, get_tb());
+
+	return 0;
+}
+
+static void watchdog_calc_timeouts(void)
+{
+	wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq;
+
+	/* Have the SMP detector trigger a bit later */
+	wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
+
+	/* 2/5 is the factor that the perf based detector uses */
+	wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
+}
+
+void watchdog_nmi_reconfigure(void)
+{
+	int cpu;
+
+	watchdog_calc_timeouts();
+
+	for_each_cpu(cpu, &wd_cpus_enabled)
+		stop_wd_on_cpu(cpu);
+
+	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+		start_wd_on_cpu(cpu);
+}
+
+/*
+ * This runs after lockup_detector_init() which sets up watchdog_cpumask.
+ */
+static int __init powerpc_watchdog_init(void)
+{
+	int err;
+
+	watchdog_calc_timeouts();
+
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online",
+				start_wd_on_cpu, stop_wd_on_cpu);
+	if (err < 0)
+		pr_warn("Watchdog could not be initialized");
+
+	return 0;
+}
+arch_initcall(powerpc_watchdog_init);
+
+static void handle_backtrace_ipi(struct pt_regs *regs)
+{
+	nmi_cpu_backtrace(regs);
+}
+
+static void raise_backtrace_ipi(cpumask_t *mask)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+		if (cpu == smp_processor_id())
+			handle_backtrace_ipi(NULL);
+		else
+			smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
+	}
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+}
-- 
GitLab


From 3e2c044a54e6b6373606f8ffad42a4a0759fcf3d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:55 -0700
Subject: [PATCH 1362/1958] efi: avoid fortify checks in EFI stub

This avoids CONFIG_FORTIFY_SOURCE from being enabled during the EFI stub
build, as adding a panic() implementation may not work well.  This can
be adjusted in the future.

Link: http://lkml.kernel.org/r/1497903987-21002-2-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Suggested-by: Daniel Micay <danielmicay@gmail.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/efi/libstub/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index f7425960f6a57..37e24f525162e 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -17,6 +17,7 @@ cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
 cflags-$(CONFIG_EFI_ARMSTUB)	+= -I$(srctree)/scripts/dtc/libfdt
 
 KBUILD_CFLAGS			:= $(cflags-y) -DDISABLE_BRANCH_PROFILING \
+				   -D__NO_FORTIFY \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector)
 
-- 
GitLab


From e2ae8ab4b571e2e4094a28acb60649bc2732c67f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:35:58 -0700
Subject: [PATCH 1363/1958] kexec_file: adjust declaration of kexec_purgatory

Defining kexec_purgatory as a zero-length char array upsets compile time
size checking.  Since this is built on a per-arch basis, define it as an
unsized char array (like is done for other similar things, e.g.  linker
sections).  This silences the warning generated by the future
CONFIG_FORTIFY_SOURCE, which did not like the memcmp() of a "0 byte"
array.  This drops the __weak and uses an extern instead, since both
users define kexec_purgatory.

Link: http://lkml.kernel.org/r/1497903987-21002-4-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kexec_file.c     | 7 -------
 kernel/kexec_internal.h | 2 ++
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index c8f7f77e9fa9f..9f48f44122972 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -26,13 +26,6 @@
 #include <linux/vmalloc.h>
 #include "kexec_internal.h"
 
-/*
- * Declare these symbols weak so that if architecture provides a purgatory,
- * these will be overridden.
- */
-char __weak kexec_purgatory[0];
-size_t __weak kexec_purgatory_size = 0;
-
 static int kexec_calculate_store_digests(struct kimage *image);
 
 /* Architectures can provide this probe function */
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 799a8a4521870..50dfcb039a417 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -17,6 +17,8 @@ extern struct mutex kexec_mutex;
 #ifdef CONFIG_KEXEC_FILE
 #include <linux/purgatory.h>
 void kimage_file_post_load_cleanup(struct kimage *image);
+extern char kexec_purgatory[];
+extern size_t kexec_purgatory_size;
 #else /* CONFIG_KEXEC_FILE */
 static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
 #endif /* CONFIG_KEXEC_FILE */
-- 
GitLab


From 4c93496f18ce5044d78e4f7f9e018682a4f44b3d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:36:01 -0700
Subject: [PATCH 1364/1958] IB/rxe: do not copy extra stack memory to skb

This fixes a over-read condition detected by FORTIFY_SOURCE for this
line:

	memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));

The error was:

  In file included from ./include/linux/bitmap.h:8:0,
                   from ./include/linux/cpumask.h:11,
                   from ./include/linux/mm_types_task.h:13,
                   from ./include/linux/mm_types.h:4,
                   from ./include/linux/kmemcheck.h:4,
                   from ./include/linux/skbuff.h:18,
                   from drivers/infiniband/sw/rxe/rxe_resp.c:34:
  In function 'memcpy',
      inlined from 'send_atomic_ack.constprop' at drivers/infiniband/sw/rxe/rxe_resp.c:998:2,
      inlined from 'acknowledge' at drivers/infiniband/sw/rxe/rxe_resp.c:1026:3,
      inlined from 'rxe_responder' at drivers/infiniband/sw/rxe/rxe_resp.c:1286:10:
  ./include/linux/string.h:309:4: error: call to '__read_overflow2' declared with attribute error: detected read beyond size of object passed as 2nd parameter
      __read_overflow2();

Daniel Micay noted that struct rxe_pkt_info is 32 bytes on 32-bit
architectures, but skb->cb is still 64.  The memcpy() over-reads 32
bytes.  This fixes it by zeroing the unused bytes in skb->cb.

Link: http://lkml.kernel.org/r/1497903987-21002-5-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Moni Shoua <monis@mellanox.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 23039768f5416..be944d5aa9afc 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -995,7 +995,9 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	free_rd_atomic_resource(qp, res);
 	rxe_advance_resp_resource(qp);
 
-	memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+	memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt));
+	memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0,
+	       sizeof(skb->cb) - sizeof(ack_pkt));
 
 	res->type = RXE_ATOMIC_MASK;
 	res->atomic.skb = skb;
-- 
GitLab


From 054f367a32381b5640c5d150fe0b7ba285564998 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Wed, 12 Jul 2017 14:36:04 -0700
Subject: [PATCH 1365/1958] powerpc: don't fortify prom_init

prom_init is a bit special; in theory it should be able to be linked
separately to the kernel.  To keep this from getting too complex, the
symbols that prom_init.c uses are checked.

Fortification adds symbols, and it gets quite messy as it includes
things like panic().  So just don't fortify prom_init.c for now.

Link: http://lkml.kernel.org/r/1497903987-21002-6-git-send-email-keescook@chromium.org
Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/prom_init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index dd8a04f3053a7..613f79f03877d 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -15,6 +15,9 @@
 
 #undef DEBUG_PROM
 
+/* we cannot use FORTIFY as it brings in new symbols */
+#define __NO_FORTIFY
+
 #include <stdarg.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-- 
GitLab


From c69a48cdb301a18697bc8c9935baf4f32861cf9e Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Wed, 12 Jul 2017 14:36:07 -0700
Subject: [PATCH 1366/1958] powerpc: make feature-fixup tests fortify-safe

Testing the fortified string functions[1] would cause a kernel panic on
boot in test_feature_fixups() due to a buffer overflow in memcmp.

This boils down to things like this:

  extern unsigned int ftr_fixup_test1;
  extern unsigned int ftr_fixup_test1_orig;

  check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);

We know that these are asm labels so it is safe to read up to 'size'
bytes at those addresses.

However, because we have passed the address of a single unsigned int to
memcmp, the compiler believes the underlying object is in fact a single
unsigned int.  So if size > sizeof(unsigned int), there will be a panic
at runtime.

We can fix this by changing the types: instead of calling the asm labels
unsigned ints, call them unsigned int[]s.  Therefore the size isn't
incorrectly determined at compile time and we get a regular unsafe
memcmp and no panic.

[1] http://openwall.com/lists/kernel-hardening/2017/05/09/2

Link: http://lkml.kernel.org/r/1497903987-21002-7-git-send-email-keescook@chromium.org
Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Tested-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/lib/feature-fixups.c | 180 +++++++++++++++---------------
 1 file changed, 90 insertions(+), 90 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index f3917705c686c..41cf5ae273cf7 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -233,192 +233,192 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p)
 
 static void test_basic_patching(void)
 {
-	extern unsigned int ftr_fixup_test1;
-	extern unsigned int end_ftr_fixup_test1;
-	extern unsigned int ftr_fixup_test1_orig;
-	extern unsigned int ftr_fixup_test1_expected;
-	int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
+	extern unsigned int ftr_fixup_test1[];
+	extern unsigned int end_ftr_fixup_test1[];
+	extern unsigned int ftr_fixup_test1_orig[];
+	extern unsigned int ftr_fixup_test1_expected[];
+	int size = end_ftr_fixup_test1 - ftr_fixup_test1;
 
 	fixup.value = fixup.mask = 8;
-	fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
-	fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
 	fixup.alt_start_off = fixup.alt_end_off = 0;
 
 	/* Sanity check */
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
 
 	/* Check we don't patch if the value matches */
 	patch_feature_section(8, &fixup);
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
 
 	/* Check we do patch if the value doesn't match */
 	patch_feature_section(0, &fixup);
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
 
 	/* Check we do patch if the mask doesn't match */
-	memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+	memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
 	patch_feature_section(~8, &fixup);
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
 }
 
 static void test_alternative_patching(void)
 {
-	extern unsigned int ftr_fixup_test2;
-	extern unsigned int end_ftr_fixup_test2;
-	extern unsigned int ftr_fixup_test2_orig;
-	extern unsigned int ftr_fixup_test2_alt;
-	extern unsigned int ftr_fixup_test2_expected;
-	int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
+	extern unsigned int ftr_fixup_test2[];
+	extern unsigned int end_ftr_fixup_test2[];
+	extern unsigned int ftr_fixup_test2_orig[];
+	extern unsigned int ftr_fixup_test2_alt[];
+	extern unsigned int ftr_fixup_test2_expected[];
+	int size = end_ftr_fixup_test2 - ftr_fixup_test2;
 
 	fixup.value = fixup.mask = 0xF;
-	fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
-	fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
-	fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
-	fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
 
 	/* Sanity check */
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
 
 	/* Check we don't patch if the value matches */
 	patch_feature_section(0xF, &fixup);
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
 
 	/* Check we do patch if the value doesn't match */
 	patch_feature_section(0, &fixup);
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
 
 	/* Check we do patch if the mask doesn't match */
-	memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+	memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
 	patch_feature_section(~0xF, &fixup);
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
 }
 
 static void test_alternative_case_too_big(void)
 {
-	extern unsigned int ftr_fixup_test3;
-	extern unsigned int end_ftr_fixup_test3;
-	extern unsigned int ftr_fixup_test3_orig;
-	extern unsigned int ftr_fixup_test3_alt;
-	int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
+	extern unsigned int ftr_fixup_test3[];
+	extern unsigned int end_ftr_fixup_test3[];
+	extern unsigned int ftr_fixup_test3_orig[];
+	extern unsigned int ftr_fixup_test3_alt[];
+	int size = end_ftr_fixup_test3 - ftr_fixup_test3;
 
 	fixup.value = fixup.mask = 0xC;
-	fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
-	fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
-	fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
-	fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
 
 	/* Sanity check */
-	check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 
 	/* Expect nothing to be patched, and the error returned to us */
 	check(patch_feature_section(0xF, &fixup) == 1);
-	check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 	check(patch_feature_section(0, &fixup) == 1);
-	check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 	check(patch_feature_section(~0xF, &fixup) == 1);
-	check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 }
 
 static void test_alternative_case_too_small(void)
 {
-	extern unsigned int ftr_fixup_test4;
-	extern unsigned int end_ftr_fixup_test4;
-	extern unsigned int ftr_fixup_test4_orig;
-	extern unsigned int ftr_fixup_test4_alt;
-	extern unsigned int ftr_fixup_test4_expected;
-	int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
+	extern unsigned int ftr_fixup_test4[];
+	extern unsigned int end_ftr_fixup_test4[];
+	extern unsigned int ftr_fixup_test4_orig[];
+	extern unsigned int ftr_fixup_test4_alt[];
+	extern unsigned int ftr_fixup_test4_expected[];
+	int size = end_ftr_fixup_test4 - ftr_fixup_test4;
 	unsigned long flag;
 
 	/* Check a high-bit flag */
 	flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
 	fixup.value = fixup.mask = flag;
-	fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
-	fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
-	fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
-	fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
 
 	/* Sanity check */
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
 
 	/* Check we don't patch if the value matches */
 	patch_feature_section(flag, &fixup);
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
 
 	/* Check we do patch if the value doesn't match */
 	patch_feature_section(0, &fixup);
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
 
 	/* Check we do patch if the mask doesn't match */
-	memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+	memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
 	patch_feature_section(~flag, &fixup);
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
 }
 
 static void test_alternative_case_with_branch(void)
 {
-	extern unsigned int ftr_fixup_test5;
-	extern unsigned int end_ftr_fixup_test5;
-	extern unsigned int ftr_fixup_test5_expected;
-	int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
+	extern unsigned int ftr_fixup_test5[];
+	extern unsigned int end_ftr_fixup_test5[];
+	extern unsigned int ftr_fixup_test5_expected[];
+	int size = end_ftr_fixup_test5 - ftr_fixup_test5;
 
-	check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
+	check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
 }
 
 static void test_alternative_case_with_external_branch(void)
 {
-	extern unsigned int ftr_fixup_test6;
-	extern unsigned int end_ftr_fixup_test6;
-	extern unsigned int ftr_fixup_test6_expected;
-	int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
+	extern unsigned int ftr_fixup_test6[];
+	extern unsigned int end_ftr_fixup_test6[];
+	extern unsigned int ftr_fixup_test6_expected[];
+	int size = end_ftr_fixup_test6 - ftr_fixup_test6;
 
-	check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
+	check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
 }
 
 static void test_cpu_macros(void)
 {
-	extern u8 ftr_fixup_test_FTR_macros;
-	extern u8 ftr_fixup_test_FTR_macros_expected;
-	unsigned long size = &ftr_fixup_test_FTR_macros_expected -
-			     &ftr_fixup_test_FTR_macros;
+	extern u8 ftr_fixup_test_FTR_macros[];
+	extern u8 ftr_fixup_test_FTR_macros_expected[];
+	unsigned long size = ftr_fixup_test_FTR_macros_expected -
+			     ftr_fixup_test_FTR_macros;
 
 	/* The fixups have already been done for us during boot */
-	check(memcmp(&ftr_fixup_test_FTR_macros,
-		     &ftr_fixup_test_FTR_macros_expected, size) == 0);
+	check(memcmp(ftr_fixup_test_FTR_macros,
+		     ftr_fixup_test_FTR_macros_expected, size) == 0);
 }
 
 static void test_fw_macros(void)
 {
 #ifdef CONFIG_PPC64
-	extern u8 ftr_fixup_test_FW_FTR_macros;
-	extern u8 ftr_fixup_test_FW_FTR_macros_expected;
-	unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
-			     &ftr_fixup_test_FW_FTR_macros;
+	extern u8 ftr_fixup_test_FW_FTR_macros[];
+	extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
+	unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
+			     ftr_fixup_test_FW_FTR_macros;
 
 	/* The fixups have already been done for us during boot */
-	check(memcmp(&ftr_fixup_test_FW_FTR_macros,
-		     &ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+	check(memcmp(ftr_fixup_test_FW_FTR_macros,
+		     ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
 #endif
 }
 
 static void test_lwsync_macros(void)
 {
-	extern u8 lwsync_fixup_test;
-	extern u8 end_lwsync_fixup_test;
-	extern u8 lwsync_fixup_test_expected_LWSYNC;
-	extern u8 lwsync_fixup_test_expected_SYNC;
-	unsigned long size = &end_lwsync_fixup_test -
-			     &lwsync_fixup_test;
+	extern u8 lwsync_fixup_test[];
+	extern u8 end_lwsync_fixup_test[];
+	extern u8 lwsync_fixup_test_expected_LWSYNC[];
+	extern u8 lwsync_fixup_test_expected_SYNC[];
+	unsigned long size = end_lwsync_fixup_test -
+			     lwsync_fixup_test;
 
 	/* The fixups have already been done for us during boot */
 	if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
-		check(memcmp(&lwsync_fixup_test,
-			     &lwsync_fixup_test_expected_LWSYNC, size) == 0);
+		check(memcmp(lwsync_fixup_test,
+			     lwsync_fixup_test_expected_LWSYNC, size) == 0);
 	} else {
-		check(memcmp(&lwsync_fixup_test,
-			     &lwsync_fixup_test_expected_SYNC, size) == 0);
+		check(memcmp(lwsync_fixup_test,
+			     lwsync_fixup_test_expected_SYNC, size) == 0);
 	}
 }
 
-- 
GitLab


From 6974f0c4555e285ab217cee58b6e874f776ff409 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 12 Jul 2017 14:36:10 -0700
Subject: [PATCH 1367/1958] include/linux/string.h: add the option of fortified
 string.h functions

This adds support for compiling with a rough equivalent to the glibc
_FORTIFY_SOURCE=1 feature, providing compile-time and runtime buffer
overflow checks for string.h functions when the compiler determines the
size of the source or destination buffer at compile-time.  Unlike glibc,
it covers buffer reads in addition to writes.

GNU C __builtin_*_chk intrinsics are avoided because they would force a
much more complex implementation.  They aren't designed to detect read
overflows and offer no real benefit when using an implementation based
on inline checks.  Inline checks don't add up to much code size and
allow full use of the regular string intrinsics while avoiding the need
for a bunch of _chk functions and per-arch assembly to avoid wrapper
overhead.

This detects various overflows at compile-time in various drivers and
some non-x86 core kernel code.  There will likely be issues caught in
regular use at runtime too.

Future improvements left out of initial implementation for simplicity,
as it's all quite optional and can be done incrementally:

* Some of the fortified string functions (strncpy, strcat), don't yet
  place a limit on reads from the source based on __builtin_object_size of
  the source buffer.

* Extending coverage to more string functions like strlcat.

* It should be possible to optionally use __builtin_object_size(x, 1) for
  some functions (C strings) to detect intra-object overflows (like
  glibc's _FORTIFY_SOURCE=2), but for now this takes the conservative
  approach to avoid likely compatibility issues.

* The compile-time checks should be made available via a separate config
  option which can be enabled by default (or always enabled) once enough
  time has passed to get the issues it catches fixed.

Kees said:
 "This is great to have. While it was out-of-tree code, it would have
  blocked at least CVE-2016-3858 from being exploitable (improper size
  argument to strlcpy()). I've sent a number of fixes for
  out-of-bounds-reads that this detected upstream already"

[arnd@arndb.de: x86: fix fortified memcpy]
  Link: http://lkml.kernel.org/r/20170627150047.660360-1-arnd@arndb.de
[keescook@chromium.org: avoid panic() in favor of BUG()]
  Link: http://lkml.kernel.org/r/20170626235122.GA25261@beast
[keescook@chromium.org: move from -mm, add ARCH_HAS_FORTIFY_SOURCE, tweak Kconfig help]
Link: http://lkml.kernel.org/r/20170526095404.20439-1-danielmicay@gmail.com
Link: http://lkml.kernel.org/r/1497903987-21002-8-git-send-email-keescook@chromium.org
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Daniel Axtens <dja@axtens.net>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig                     |   6 +
 arch/arm64/Kconfig               |   1 +
 arch/arm64/include/asm/string.h  |   5 +
 arch/powerpc/Kconfig             |   1 +
 arch/x86/Kconfig                 |   1 +
 arch/x86/boot/compressed/misc.c  |   5 +
 arch/x86/include/asm/string_32.h |   9 ++
 arch/x86/include/asm/string_64.h |   7 ++
 arch/x86/lib/memcpy_32.c         |   2 +-
 include/linux/string.h           | 200 +++++++++++++++++++++++++++++++
 lib/string.c                     |   7 ++
 security/Kconfig                 |   7 ++
 12 files changed, 250 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index fb9bd7d36b05a..21d0089117fe9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -223,6 +223,12 @@ config GENERIC_SMP_IDLE_THREAD
 config GENERIC_IDLE_POLL_SETUP
        bool
 
+config ARCH_HAS_FORTIFY_SOURCE
+	bool
+	help
+	  An architecture should select this when it can successfully
+	  build and run with CONFIG_FORTIFY_SOURCE.
+
 # Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h
 config ARCH_HAS_SET_MEMORY
 	bool
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8addb851ab5e3..dfd908630631a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -12,6 +12,7 @@ config ARM64
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
 	select ARCH_HAS_KCOV
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 2eb714c4639f5..d0aa42907569b 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -63,6 +63,11 @@ extern int memcmp(const void *, const void *, size_t);
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memmove(dst, src, len) __memmove(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
 #endif
 
 #endif
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fce2f4f208917..36f858c37ca70 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -125,6 +125,7 @@ config PPC
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_SET_COHERENT_MASK
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
 	select ARCH_HAS_SG_CHAIN
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3d2b8ce54e007..781521b7cf9ef 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -50,6 +50,7 @@ config X86
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MMIO_FLUSH
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 00241c8155244..a0838ab929f22 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -411,3 +411,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	debug_putstr("done.\nBooting the kernel.\n");
 	return output;
 }
+
+void fortify_panic(const char *name)
+{
+	error("detected buffer overflow");
+}
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 3d3e8353ee5c0..e9ee84873de50 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -142,7 +142,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
 }
 
 #define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *, const void *, size_t);
 
+#ifndef CONFIG_FORTIFY_SOURCE
 #ifdef CONFIG_X86_USE_3DNOW
 
 #include <asm/mmx.h>
@@ -195,11 +197,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
 #endif
 
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMMOVE
 void *memmove(void *dest, const void *src, size_t n);
 
+extern int memcmp(const void *, const void *, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
 #define memcmp __builtin_memcmp
+#endif
 
 #define __HAVE_ARCH_MEMCHR
 extern void *memchr(const void *cs, int c, size_t count);
@@ -321,6 +327,8 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 	 : __memset_generic((s), (c), (count)))
 
 #define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
 #if (__GNUC__ >= 4)
 #define memset(s, c, count) __builtin_memset(s, c, count)
 #else
@@ -330,6 +338,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 				 (count))				\
 	 : __memset((s), (c), (count)))
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 /*
  * find the first occurrence of byte 'c', or 1 past the area if none
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 1f22bc277c455..2a8c822de1fc6 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -31,6 +31,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
 extern void *memcpy(void *to, const void *from, size_t len);
 extern void *__memcpy(void *to, const void *from, size_t len);
 
+#ifndef CONFIG_FORTIFY_SOURCE
 #ifndef CONFIG_KMEMCHECK
 #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
 #define memcpy(dst, src, len)					\
@@ -51,6 +52,7 @@ extern void *__memcpy(void *to, const void *from, size_t len);
  */
 #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMSET
 void *memset(void *s, int c, size_t n);
@@ -77,6 +79,11 @@ int strcmp(const char *cs, const char *ct);
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memmove(dst, src, len) __memmove(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
 #endif
 
 #define __HAVE_ARCH_MEMCPY_MCSAFE 1
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index cad12634d6bd8..2eab7d0bfeddc 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -6,7 +6,7 @@
 
 __visible void *memcpy(void *to, const void *from, size_t n)
 {
-#ifdef CONFIG_X86_USE_3DNOW
+#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
 	return __memcpy3d(to, from, n);
 #else
 	return __memcpy(to, from, n);
diff --git a/include/linux/string.h b/include/linux/string.h
index 7439d83eaa336..96f5a5fd03774 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -193,4 +193,204 @@ static inline const char *kbasename(const char *path)
 	return tail ? tail + 1 : path;
 }
 
+#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
+#define __RENAME(x) __asm__(#x)
+
+void fortify_panic(const char *name) __noreturn __cold;
+void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
+void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
+void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
+
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __builtin_strcpy(p, q);
+	if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0)
+		fortify_panic(__func__);
+	return p;
+}
+
+__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__write_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_strncpy(p, q, size);
+}
+
+__FORTIFY_INLINE char *strcat(char *p, const char *q)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (p_size == (size_t)-1)
+		return __builtin_strcat(p, q);
+	if (strlcat(p, q, p_size) >= p_size)
+		fortify_panic(__func__);
+	return p;
+}
+
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+{
+	__kernel_size_t ret;
+	size_t p_size = __builtin_object_size(p, 0);
+	if (p_size == (size_t)-1)
+		return __builtin_strlen(p);
+	ret = strnlen(p, p_size);
+	if (p_size <= ret)
+		fortify_panic(__func__);
+	return ret;
+}
+
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+	if (p_size <= ret && maxlen != ret)
+		fortify_panic(__func__);
+	return ret;
+}
+
+/* defined after fortified strlen to reuse it */
+extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
+{
+	size_t ret;
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __real_strlcpy(p, q, size);
+	ret = strlen(q);
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		if (__builtin_constant_p(len) && len >= p_size)
+			__write_overflow();
+		if (len >= p_size)
+			fortify_panic(__func__);
+		__builtin_memcpy(p, q, len);
+		p[len] = '\0';
+	}
+	return ret;
+}
+
+/* defined after fortified strlen and strnlen to reuse them */
+__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
+{
+	size_t p_len, copy_len;
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __builtin_strncat(p, q, count);
+	p_len = strlen(p);
+	copy_len = strnlen(q, count);
+	if (p_size < p_len + copy_len + 1)
+		fortify_panic(__func__);
+	__builtin_memcpy(p + p_len, q, copy_len);
+	p[p_len + copy_len] = '\0';
+	return p;
+}
+
+__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__write_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_memset(p, c, size);
+}
+
+__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__write_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memcpy(p, q, size);
+}
+
+__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__write_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memmove(p, q, size);
+}
+
+extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
+__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_memscan(p, c, size);
+}
+
+__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__read_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memcmp(p, q, size);
+}
+
+__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_memchr(p, c, size);
+}
+
+void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
+__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_memchr_inv(p, c, size);
+}
+
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_kmemdup(p, size, gfp);
+}
+#endif
+
 #endif /* _LINUX_STRING_H_ */
diff --git a/lib/string.c b/lib/string.c
index 1c1fc9187b054..ebbb99c775bda 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -978,3 +978,10 @@ char *strreplace(char *s, char old, char new)
 	return s;
 }
 EXPORT_SYMBOL(strreplace);
+
+void fortify_panic(const char *name)
+{
+	pr_emerg("detected buffer overflow in %s\n", name);
+	BUG();
+}
+EXPORT_SYMBOL(fortify_panic);
diff --git a/security/Kconfig b/security/Kconfig
index d540bfe731903..e8e449444e658 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -163,6 +163,13 @@ config HARDENED_USERCOPY_PAGESPAN
 	  been removed. This config is intended to be used only while
 	  trying to find such users.
 
+config FORTIFY_SOURCE
+	bool "Harden common str/mem functions against buffer overflows"
+	depends on ARCH_HAS_FORTIFY_SOURCE
+	help
+	  Detect overflows of buffers in common string and memory functions
+	  where the compiler can determine and validate the buffer sizes.
+
 config STATIC_USERMODEHELPER
 	bool "Force all usermode helper calls through a single binary"
 	help
-- 
GitLab


From 579e14524c6593cb651a806a6563e14b263c00e2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 12 Jul 2017 14:36:13 -0700
Subject: [PATCH 1368/1958] sh: mark end of BUG() implementation as unreachable

When building the sh architecture, the compiler doesn't realize that
BUG() doesn't return, so it will complain about functions using BUG()
that are marked with the noreturn attribute:

   lib/string.c: In function 'fortify_panic':
>> lib/string.c:986:1: warning: 'noreturn' function does return
    }
    ^

Link: http://lkml.kernel.org/r/20170627192050.GA66784@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/include/asm/bug.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sh/include/asm/bug.h b/arch/sh/include/asm/bug.h
index 1b77f068be2b1..c9828f785ca0b 100644
--- a/arch/sh/include/asm/bug.h
+++ b/arch/sh/include/asm/bug.h
@@ -48,6 +48,7 @@ do {							\
 		   "i" (__FILE__),			\
 		   "i" (__LINE__), "i" (0),		\
 		   "i" (sizeof(struct bug_entry)));	\
+	unreachable();					\
 } while (0)
 
 #define __WARN_FLAGS(flags)				\
-- 
GitLab


From 022c204040f3fd22d6445bc35517786195b7ae80 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:17 -0700
Subject: [PATCH 1369/1958] random,stackprotect: introduce get_random_canary
 function

Patch series "stackprotector: ascii armor the stack canary", v2.

Zero out the first byte of the stack canary value on 64 bit systems, in
order to mitigate unterminated C string overflows.

The null byte both prevents C string functions from reading the canary,
and from writing it if the canary value were guessed or obtained through
some other means.

Reducing the entropy by 8 bits is acceptable on 64-bit systems, which
will still have 56 bits of entropy left, but not on 32 bit systems, so
the "ascii armor" canary is only implemented on 64-bit systems.

Inspired by the "ascii armor" code in execshield and Daniel Micay's
linux-hardened tree.

Also see https://github.com/thestinger/linux-hardened/

This patch (of 5):

Introduce get_random_canary(), which provides a random unsigned long
canary value with the first byte zeroed out on 64 bit architectures, in
order to mitigate non-terminated C string overflows.

The null byte both prevents C string functions from reading the canary,
and from writing it if the canary value were guessed or obtained through
some other means.

Reducing the entropy by 8 bits is acceptable on 64-bit systems, which
will still have 56 bits of entropy left, but not on 32 bit systems, so
the "ascii armor" canary is only implemented on 64-bit systems.

Inspired by the "ascii armor" code in the old execshield patches, and
Daniel Micay's linux-hardened tree.

Link: http://lkml.kernel.org/r/20170524155751.424-2-riel@redhat.com
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/random.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/include/linux/random.h b/include/linux/random.h
index ed5c3838780de..1fa0dc880bd78 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -57,6 +57,27 @@ static inline unsigned long get_random_long(void)
 #endif
 }
 
+/*
+ * On 64-bit architectures, protect against non-terminated C string overflows
+ * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ */
+#ifdef CONFIG_64BIT
+# ifdef __LITTLE_ENDIAN
+#  define CANARY_MASK 0xffffffffffffff00UL
+# else /* big endian, 64 bits: */
+#  define CANARY_MASK 0x00ffffffffffffffUL
+# endif
+#else /* 32 bits: */
+# define CANARY_MASK 0xffffffffUL
+#endif
+
+static inline unsigned long get_random_canary(void)
+{
+	unsigned long val = get_random_long();
+
+	return val & CANARY_MASK;
+}
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
-- 
GitLab


From 7cd815bce828220deffd1654265f0ef891567774 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:20 -0700
Subject: [PATCH 1370/1958] fork,random: use get_random_canary() to set
 tsk->stack_canary

Use the ascii-armor canary to prevent unterminated C string overflows
from being able to successfully overwrite the canary, even if they
somehow obtain the canary value.

Inspired by execshield ascii-armor and Daniel Micay's linux-hardened
tree.

Link: http://lkml.kernel.org/r/20170524155751.424-3-riel@redhat.com
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index ade237a963088..17921b0390b4f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -554,7 +554,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	set_task_stack_end_magic(tsk);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-	tsk->stack_canary = get_random_long();
+	tsk->stack_canary = get_random_canary();
 #endif
 
 	/*
-- 
GitLab


From bf9eb5443844e288b73f7963dfc67a0d2fbc3849 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:23 -0700
Subject: [PATCH 1371/1958] x86: ascii armor the x86_64 boot init stack canary

Use the ascii-armor canary to prevent unterminated C string overflows
from being able to successfully overwrite the canary, even if they
somehow obtain the canary value.

Inspired by execshield ascii-armor and Daniel Micay's linux-hardened
tree.

Link: http://lkml.kernel.org/r/20170524155751.424-4-riel@redhat.com
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/stackprotector.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index dcbd9bcce7144..8abedf1d650e9 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -74,6 +74,7 @@ static __always_inline void boot_init_stack_canary(void)
 	get_random_bytes(&canary, sizeof(canary));
 	tsc = rdtsc();
 	canary += tsc + (tsc << 32UL);
+	canary &= CANARY_MASK;
 
 	current->stack_canary = canary;
 #ifdef CONFIG_X86_64
-- 
GitLab


From d21f54988bb2927341757d42152440c3abcae4eb Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:26 -0700
Subject: [PATCH 1372/1958] arm64: ascii armor the arm64 boot init stack canary

Use the ascii-armor canary to prevent unterminated C string overflows
from being able to successfully overwrite the canary, even if they
somehow obtain the canary value.

Inspired by execshield ascii-armor and Daniel Micay's linux-hardened
tree.

Link: http://lkml.kernel.org/r/20170524155751.424-5-riel@redhat.com
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/stackprotector.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
index fe5e287dc56b7..b86a0865ddf16 100644
--- a/arch/arm64/include/asm/stackprotector.h
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -30,6 +30,7 @@ static __always_inline void boot_init_stack_canary(void)
 	/* Try to get a semi random initial value. */
 	get_random_bytes(&canary, sizeof(canary));
 	canary ^= LINUX_VERSION_CODE;
+	canary &= CANARY_MASK;
 
 	current->stack_canary = canary;
 	__stack_chk_guard = current->stack_canary;
-- 
GitLab


From 2c8340eda45cee1784f7b29e6d7dcc740ea3d02a Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:29 -0700
Subject: [PATCH 1373/1958] sh64: ascii armor the sh64 boot init stack canary

Use the ascii-armor canary to prevent unterminated C string overflows
from being able to successfully overwrite the canary, even if they
somehow obtain the canary value.

Inspired by execshield ascii-armor and Daniel Micay's linux-hardened
tree.

Link: http://lkml.kernel.org/r/20170524123446.78510066@annuminas.surriel.com
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/include/asm/stackprotector.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sh/include/asm/stackprotector.h b/arch/sh/include/asm/stackprotector.h
index d9df3a76847c3..141515a43b781 100644
--- a/arch/sh/include/asm/stackprotector.h
+++ b/arch/sh/include/asm/stackprotector.h
@@ -19,6 +19,7 @@ static __always_inline void boot_init_stack_canary(void)
 	/* Try to get a semi random initial value. */
 	get_random_bytes(&canary, sizeof(canary));
 	canary ^= LINUX_VERSION_CODE;
+	canary &= CANARY_MASK;
 
 	current->stack_canary = canary;
 	__stack_chk_guard = current->stack_canary;
-- 
GitLab


From c204d21f2232d875e36b8774c36ffd027dc1d606 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:33 -0700
Subject: [PATCH 1374/1958] x86/mmap: properly account for stack randomization
 in mmap_base

When RLIMIT_STACK is, for example, 256MB, the current code results in a
gap between the top of the task and mmap_base of 256MB, failing to take
into account the amount by which the stack address was randomized.  In
other words, the stack gets less than RLIMIT_STACK space.

Ensure that the gap between the stack and mmap_base always takes stack
randomization and the stack guard gap into account.

Obtained from Daniel Micay's linux-hardened tree.

Link: http://lkml.kernel.org/r/20170622200033.25714-2-riel@redhat.com
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Reported-by: Florian Weimer <fweimer@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/mmap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 797295e792b2f..229d04a83f856 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -92,13 +92,18 @@ unsigned long arch_mmap_rnd(void)
 static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
 	unsigned long gap_min, gap_max;
 
+	/* Values close to RLIM_INFINITY can overflow. */
+	if (gap + pad > gap)
+		gap += pad;
+
 	/*
 	 * Top of mmap area (just below the process stack).
 	 * Leave an at least ~128 MB hole with possible stack randomization.
 	 */
-	gap_min = SIZE_128M + stack_maxrandom_size(task_size);
+	gap_min = SIZE_128M;
 	gap_max = (task_size / 6) * 5;
 
 	if (gap < gap_min)
-- 
GitLab


From cf92251dc52d2dfae6c6adaeae8b12ea50686946 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:36 -0700
Subject: [PATCH 1375/1958] arm64/mmap: properly account for stack
 randomization in mmap_base

When RLIMIT_STACK is, for example, 256MB, the current code results in a
gap between the top of the task and mmap_base of 256MB, failing to take
into account the amount by which the stack address was randomized.  In
other words, the stack gets less than RLIMIT_STACK space.

Ensure that the gap between the stack and mmap_base always takes stack
randomization and the stack guard gap into account.

Obtained from Daniel Micay's linux-hardened tree.

Link: http://lkml.kernel.org/r/20170622200033.25714-3-riel@redhat.com
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Reported-by: Florian Weimer <fweimer@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/mmap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index adc208c2ae9c0..decccffb03cac 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -35,7 +35,7 @@
  * Leave enough space between the mmap area and the stack to honour ulimit in
  * the face of randomisation.
  */
-#define MIN_GAP (SZ_128M + ((STACK_RND_MASK << PAGE_SHIFT) + 1))
+#define MIN_GAP (SZ_128M)
 #define MAX_GAP	(STACK_TOP/6*5)
 
 static int mmap_is_legacy(void)
@@ -65,6 +65,11 @@ unsigned long arch_mmap_rnd(void)
 static unsigned long mmap_base(unsigned long rnd)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
+
+	/* Values close to RLIM_INFINITY can overflow. */
+	if (gap + pad > gap)
+		gap += pad;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
-- 
GitLab


From 0a782dc31f4d867921eb7caf1f4bb1222e45bb0e Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 12 Jul 2017 14:36:39 -0700
Subject: [PATCH 1376/1958] powerpc,mmap: properly account for stack
 randomization in mmap_base

When RLIMIT_STACK is, for example, 256MB, the current code results in a
gap between the top of the task and mmap_base of 256MB, failing to take
into account the amount by which the stack address was randomized.  In
other words, the stack gets less than RLIMIT_STACK space.

Ensure that the gap between the stack and mmap_base always takes stack
randomization and the stack guard gap into account.

Inspired by Daniel Micay's linux-hardened tree.

Link: http://lkml.kernel.org/r/20170622200033.25714-4-riel@redhat.com
Signed-off-by: Rik van Riel <riel@redhat.com>
Reported-by: Florian Weimer <fweimer@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/mmap.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0ee6be4f1ba44..5d78b193fec41 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -34,16 +34,9 @@
 /*
  * Top of mmap area (just below the process stack).
  *
- * Leave at least a ~128 MB hole on 32bit applications.
- *
- * On 64bit applications we randomise the stack by 1GB so we need to
- * space our mmap start address by a further 1GB, otherwise there is a
- * chance the mmap area will end up closer to the stack than our ulimit
- * requires.
+ * Leave at least a ~128 MB hole.
  */
-#define MIN_GAP32 (128*1024*1024)
-#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
-#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
+#define MIN_GAP (128*1024*1024)
 #define MAX_GAP (TASK_SIZE/6*5)
 
 static inline int mmap_is_legacy(void)
@@ -71,9 +64,26 @@ unsigned long arch_mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
+static inline unsigned long stack_maxrandom_size(void)
+{
+	if (!(current->flags & PF_RANDOMIZE))
+		return 0;
+
+	/* 8MB for 32bit, 1GB for 64bit */
+	if (is_32bit_task())
+		return (1<<23);
+	else
+		return (1<<30);
+}
+
 static inline unsigned long mmap_base(unsigned long rnd)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
+
+	/* Values close to RLIM_INFINITY can overflow. */
+	if (gap + pad > gap)
+		gap += pad;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
-- 
GitLab


From 473738eb78c3e379d682fb8a3cf7e1d17beded9f Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:36:42 -0700
Subject: [PATCH 1377/1958] MIPS: do not use __GFP_REPEAT for order-0 request

Patch series "mm: give __GFP_REPEAT a better semantic".

The main motivation for the change is that the current implementation of
__GFP_REPEAT is not very much useful.

The documentation says:
 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
 *   _might_ fail.  This depends upon the particular VM implementation.

It just fails to mention that this is true only for large (costly) high
order which has been the case since the flag was introduced.  A similar
semantic would be really helpful for smal orders as well, though,
because we have places where a failure with a specific fallback error
handling is preferred to a potential endless loop inside the page
allocator.

The earlier cleanup dropped __GFP_REPEAT usage for low (!costly) order
users so only those which might use larger orders have stayed.  One new
user added in the meantime is addressed in patch 1.

Let's rename the flag to something more verbose and use it for existing
users.  Semantic for those will not change.  Then implement low
(!costly) orders failure path which is hit after the page allocator is
about to invoke the oom killer.  With that we have a good counterpart
for __GFP_NORETRY and finally can tell try as hard as possible without
the OOM killer.

Xfs code already has an existing annotation for allocations which are
allowed to fail and we can trivially map them to the new gfp flag
because it will provide the semantic KM_MAYFAIL wants.  Christoph didn't
consider the new flag really necessary but didn't respond to the OOM
killer aspect of the change so I have kept the patch.  If this is still
seen as not really needed I can drop the patch.

kvmalloc will allow also !costly high order allocations to retry hard
before falling back to the vmalloc.

drm/i915 asked for the new semantic explicitly.

Memory migration code, especially for the memory hotplug, should back
off rather than invoking the OOM killer as well.

This patch (of 6):

Commit 3377e227af44 ("MIPS: Add 48-bit VA space (and 4-level page
tables) for 4K pages.") has added a new __GFP_REPEAT user but using this
flag doesn't really make any sense for order-0 request which is the case
here because PUD_ORDER is 0.  __GFP_REPEAT has historically effect only
on allocation requests with order > PAGE_ALLOC_COSTLY_ORDER.

This doesn't introduce any functional change.  This is a preparatory
patch for later work which renames the flag and redefines its semantic.

Link: http://lkml.kernel.org/r/20170623085345.11304-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/asm/pgalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index a1bdb1ea52347..39b9f311c4ef4 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -116,7 +116,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pud_t *pud;
 
-	pud = (pud_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PUD_ORDER);
+	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
 	if (pud)
 		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
 	return pud;
-- 
GitLab


From dcda9b04713c3f6ff0875652924844fae28286ea Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:36:45 -0700
Subject: [PATCH 1378/1958] mm, tree wide: replace __GFP_REPEAT by
 __GFP_RETRY_MAYFAIL with more useful semantic

__GFP_REPEAT was designed to allow retry-but-eventually-fail semantic to
the page allocator.  This has been true but only for allocations
requests larger than PAGE_ALLOC_COSTLY_ORDER.  It has been always
ignored for smaller sizes.  This is a bit unfortunate because there is
no way to express the same semantic for those requests and they are
considered too important to fail so they might end up looping in the
page allocator for ever, similarly to GFP_NOFAIL requests.

Now that the whole tree has been cleaned up and accidental or misled
usage of __GFP_REPEAT flag has been removed for !costly requests we can
give the original flag a better name and more importantly a more useful
semantic.  Let's rename it to __GFP_RETRY_MAYFAIL which tells the user
that the allocator would try really hard but there is no promise of a
success.  This will work independent of the order and overrides the
default allocator behavior.  Page allocator users have several levels of
guarantee vs.  cost options (take GFP_KERNEL as an example)

 - GFP_KERNEL & ~__GFP_RECLAIM - optimistic allocation without _any_
   attempt to free memory at all. The most light weight mode which even
   doesn't kick the background reclaim. Should be used carefully because
   it might deplete the memory and the next user might hit the more
   aggressive reclaim

 - GFP_KERNEL & ~__GFP_DIRECT_RECLAIM (or GFP_NOWAIT)- optimistic
   allocation without any attempt to free memory from the current
   context but can wake kswapd to reclaim memory if the zone is below
   the low watermark. Can be used from either atomic contexts or when
   the request is a performance optimization and there is another
   fallback for a slow path.

 - (GFP_KERNEL|__GFP_HIGH) & ~__GFP_DIRECT_RECLAIM (aka GFP_ATOMIC) -
   non sleeping allocation with an expensive fallback so it can access
   some portion of memory reserves. Usually used from interrupt/bh
   context with an expensive slow path fallback.

 - GFP_KERNEL - both background and direct reclaim are allowed and the
   _default_ page allocator behavior is used. That means that !costly
   allocation requests are basically nofail but there is no guarantee of
   that behavior so failures have to be checked properly by callers
   (e.g. OOM killer victim is allowed to fail currently).

 - GFP_KERNEL | __GFP_NORETRY - overrides the default allocator behavior
   and all allocation requests fail early rather than cause disruptive
   reclaim (one round of reclaim in this implementation). The OOM killer
   is not invoked.

 - GFP_KERNEL | __GFP_RETRY_MAYFAIL - overrides the default allocator
   behavior and all allocation requests try really hard. The request
   will fail if the reclaim cannot make any progress. The OOM killer
   won't be triggered.

 - GFP_KERNEL | __GFP_NOFAIL - overrides the default allocator behavior
   and all allocation requests will loop endlessly until they succeed.
   This might be really dangerous especially for larger orders.

Existing users of __GFP_REPEAT are changed to __GFP_RETRY_MAYFAIL
because they already had their semantic.  No new users are added.
__alloc_pages_slowpath is changed to bail out for __GFP_RETRY_MAYFAIL if
there is no progress and we have already passed the OOM point.

This means that all the reclaim opportunities have been exhausted except
the most disruptive one (the OOM killer) and a user defined fallback
behavior is more sensible than keep retrying in the page allocator.

[akpm@linux-foundation.org: fix arch/sparc/kernel/mdesc.c]
[mhocko@suse.com: semantic fix]
  Link: http://lkml.kernel.org/r/20170626123847.GM11534@dhcp22.suse.cz
[mhocko@kernel.org: address other thing spotted by Vlastimil]
  Link: http://lkml.kernel.org/r/20170626124233.GN11534@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/20170623085345.11304-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-ISA-LPC.txt                |  2 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c          |  2 +-
 arch/sparc/kernel/mdesc.c                    |  2 +-
 drivers/mmc/host/wbsd.c                      |  2 +-
 drivers/s390/char/vmcp.c                     |  2 +-
 drivers/target/target_core_transport.c       |  2 +-
 drivers/vhost/net.c                          |  2 +-
 drivers/vhost/scsi.c                         |  2 +-
 drivers/vhost/vsock.c                        |  2 +-
 include/linux/gfp.h                          | 56 +++++++++++++++-----
 include/linux/slab.h                         |  3 +-
 include/trace/events/mmflags.h               |  2 +-
 mm/hugetlb.c                                 |  4 +-
 mm/internal.h                                |  2 +-
 mm/page_alloc.c                              | 14 +++--
 mm/sparse-vmemmap.c                          |  4 +-
 mm/util.c                                    |  6 +--
 mm/vmalloc.c                                 |  2 +-
 mm/vmscan.c                                  |  8 +--
 net/core/dev.c                               |  6 +--
 net/core/skbuff.c                            |  2 +-
 net/sched/sch_fq.c                           |  2 +-
 tools/perf/builtin-kmem.c                    |  2 +-
 24 files changed, 86 insertions(+), 47 deletions(-)

diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
index c413313987521..7a065ac4a9d1c 100644
--- a/Documentation/DMA-ISA-LPC.txt
+++ b/Documentation/DMA-ISA-LPC.txt
@@ -42,7 +42,7 @@ requirements you pass the flag GFP_DMA to kmalloc.
 
 Unfortunately the memory available for ISA DMA is scarce so unless you
 allocate the memory during boot-up it's a good idea to also pass
-__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder.
+__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
 
 (This scarcity also means that you should allocate the buffer as
 early as possible and not release it until the driver is unloaded.)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 20b1485ff1e88..e2329db9d6f40 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
 	return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
 #else
 	struct page *page;
-	page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT),
+	page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL),
 				4);
 	if (!page)
 		return NULL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 710e491206ed0..8cb0190e2a737 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -93,7 +93,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
 	}
 
 	if (!hpt)
-		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
 				       |__GFP_NOWARN, order - PAGE_SHIFT);
 
 	if (!hpt)
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index e4b4e790bf899..fa466ce45bc9b 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -205,7 +205,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
 	handle_size = (sizeof(struct mdesc_handle) -
 		       sizeof(struct mdesc_hdr) +
 		       mdesc_size);
-	base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_REPEAT);
+	base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!base)
 		return NULL;
 
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index e15a9733fcfdd..9668616faf16b 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1386,7 +1386,7 @@ static void wbsd_request_dma(struct wbsd_host *host, int dma)
 	 * order for ISA to be able to DMA to it.
 	 */
 	host->dma_buffer = kmalloc(WBSD_DMA_SIZE,
-		GFP_NOIO | GFP_DMA | __GFP_REPEAT | __GFP_NOWARN);
+		GFP_NOIO | GFP_DMA | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (!host->dma_buffer)
 		goto free;
 
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 65f5a794f26d0..98749fa817dac 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -98,7 +98,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count,
 	}
 	if (!session->response)
 		session->response = (char *)__get_free_pages(GFP_KERNEL
-						| __GFP_REPEAT | GFP_DMA,
+						| __GFP_RETRY_MAYFAIL | GFP_DMA,
 						get_order(session->bufsize));
 	if (!session->response) {
 		mutex_unlock(&session->mutex);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index f1b3a46bdcaff..1bdc10651bcd1 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -252,7 +252,7 @@ int transport_alloc_session_tags(struct se_session *se_sess,
 	int rc;
 
 	se_sess->sess_cmd_map = kzalloc(tag_num * tag_size,
-					GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+					GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
 	if (!se_sess->sess_cmd_map) {
 		se_sess->sess_cmd_map = vzalloc(tag_num * tag_size);
 		if (!se_sess->sess_cmd_map) {
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e3d7ea1288c68..06d044862e589 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -897,7 +897,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 	struct sk_buff **queue;
 	int i;
 
-	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT);
+	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!n)
 		return -ENOMEM;
 	vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index fd6c8b66f06fd..ff02a942c4d5f 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1404,7 +1404,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
 	struct vhost_virtqueue **vqs;
 	int r = -ENOMEM, i;
 
-	vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
 	if (!vs) {
 		vs = vzalloc(sizeof(*vs));
 		if (!vs)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 3f63e03de8e80..c9de9c41aa976 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -508,7 +508,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
 	/* This struct is large and allocation could fail, fall back to vmalloc
 	 * if there is no other way.
 	 */
-	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT);
+	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!vsock)
 		return -ENOMEM;
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4c6656f1fee7a..bcfb9f7c46f5d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -25,7 +25,7 @@ struct vm_area_struct;
 #define ___GFP_FS		0x80u
 #define ___GFP_COLD		0x100u
 #define ___GFP_NOWARN		0x200u
-#define ___GFP_REPEAT		0x400u
+#define ___GFP_RETRY_MAYFAIL	0x400u
 #define ___GFP_NOFAIL		0x800u
 #define ___GFP_NORETRY		0x1000u
 #define ___GFP_MEMALLOC		0x2000u
@@ -136,26 +136,56 @@ struct vm_area_struct;
  *
  * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
  *
- * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- *   _might_ fail.  This depends upon the particular VM implementation.
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules
+ *
+ * __GFP_NORETRY: The VM implementation will try only very lightweight
+ *   memory direct reclaim to get some memory under memory pressure (thus
+ *   it can sleep). It will avoid disruptive actions like OOM killer. The
+ *   caller must handle the failure which is quite likely to happen under
+ *   heavy memory pressure. The flag is suitable when failure can easily be
+ *   handled at small cost, such as reduced throughput
+ *
+ * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ *   procedures that have previously failed if there is some indication
+ *   that progress has been made else where.  It can wait for other
+ *   tasks to attempt high level approaches to freeing memory such as
+ *   compaction (which removes fragmentation) and page-out.
+ *   There is still a definite limit to the number of retries, but it is
+ *   a larger limit than with __GFP_NORETRY.
+ *   Allocations with this flag may fail, but only when there is
+ *   genuinely little unused memory. While these allocations do not
+ *   directly trigger the OOM killer, their failure indicates that
+ *   the system is likely to need to use the OOM killer soon.  The
+ *   caller must handle failure, but can reasonably do so by failing
+ *   a higher-level request, or completing it only in a much less
+ *   efficient manner.
+ *   If the allocation does fail, and the caller is in a position to
+ *   free some non-essential memory, doing so could benefit the system
+ *   as a whole.
  *
  * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- *   cannot handle allocation failures. New users should be evaluated carefully
- *   (and the flag should be used only when there is no reasonable failure
- *   policy) but it is definitely preferable to use the flag rather than
- *   opencode endless loop around allocator.
- *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- *   return NULL when direct reclaim and memory compaction have failed to allow
- *   the allocation to succeed.  The OOM killer is not called with the current
- *   implementation.
+ *   cannot handle allocation failures. The allocation could block
+ *   indefinitely but will never return with failure. Testing for
+ *   failure is pointless.
+ *   New users should be evaluated carefully (and the flag should be
+ *   used only when there is no reasonable failure policy) but it is
+ *   definitely preferable to use the flag rather than opencode endless
+ *   loop around allocator.
+ *   Using this flag for costly allocations is _highly_ discouraged.
  */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)
 #define __GFP_FS	((__force gfp_t)___GFP_FS)
 #define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
 #define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
 #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
-#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)
+#define __GFP_RETRY_MAYFAIL	((__force gfp_t)___GFP_RETRY_MAYFAIL)
 #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
 #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 04a7f7993e678..41473df6dfb07 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
  *
  * %__GFP_NOWARN - If allocation fails, don't issue any warnings.
  *
- * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
+ * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail
+ *   eventually.
  *
  * There are other flags available as well, but these are not intended
  * for general use, and so are not documented here. For a full list of
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 10e3663a75a67..8e50d01c645fc 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -34,7 +34,7 @@
 	{(unsigned long)__GFP_FS,		"__GFP_FS"},		\
 	{(unsigned long)__GFP_COLD,		"__GFP_COLD"},		\
 	{(unsigned long)__GFP_NOWARN,		"__GFP_NOWARN"},	\
-	{(unsigned long)__GFP_REPEAT,		"__GFP_REPEAT"},	\
+	{(unsigned long)__GFP_RETRY_MAYFAIL,	"__GFP_RETRY_MAYFAIL"},	\
 	{(unsigned long)__GFP_NOFAIL,		"__GFP_NOFAIL"},	\
 	{(unsigned long)__GFP_NORETRY,		"__GFP_NORETRY"},	\
 	{(unsigned long)__GFP_COMP,		"__GFP_COMP"},		\
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1e516520433dc..bc48ee783dd9e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1384,7 +1384,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 
 	page = __alloc_pages_node(nid,
 		htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
-						__GFP_REPEAT|__GFP_NOWARN,
+						__GFP_RETRY_MAYFAIL|__GFP_NOWARN,
 		huge_page_order(h));
 	if (page) {
 		prep_new_huge_page(h, page, nid);
@@ -1525,7 +1525,7 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
 {
 	int order = huge_page_order(h);
 
-	gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
+	gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
 	if (nid == NUMA_NO_NODE)
 		nid = numa_mem_id();
 	return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
diff --git a/mm/internal.h b/mm/internal.h
index 0e4f558412fb1..24d88f0847059 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -23,7 +23,7 @@
  * hints such as HIGHMEM usage.
  */
 #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+			__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
 			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
 			__GFP_ATOMIC)
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 64b7d82a9b1ab..6d30e914afb6c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3284,6 +3284,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	/* The OOM killer will not help higher order allocs */
 	if (order > PAGE_ALLOC_COSTLY_ORDER)
 		goto out;
+	/*
+	 * We have already exhausted all our reclaim opportunities without any
+	 * success so it is time to admit defeat. We will skip the OOM killer
+	 * because it is very likely that the caller has a more reasonable
+	 * fallback than shooting a random task.
+	 */
+	if (gfp_mask & __GFP_RETRY_MAYFAIL)
+		goto out;
 	/* The OOM killer does not needlessly kill tasks for lowmem */
 	if (ac->high_zoneidx < ZONE_NORMAL)
 		goto out;
@@ -3413,7 +3421,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
 	}
 
 	/*
-	 * !costly requests are much more important than __GFP_REPEAT
+	 * !costly requests are much more important than __GFP_RETRY_MAYFAIL
 	 * costly ones because they are de facto nofail and invoke OOM
 	 * killer to move on while costly can fail and users are ready
 	 * to cope with that. 1/4 retries is rather arbitrary but we
@@ -3920,9 +3928,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 
 	/*
 	 * Do not retry costly high order allocations unless they are
-	 * __GFP_REPEAT
+	 * __GFP_RETRY_MAYFAIL
 	 */
-	if (costly_order && !(gfp_mask & __GFP_REPEAT))
+	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
 		goto nopage;
 
 	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a56c3989f7731..c50b1a14d55ec 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -56,11 +56,11 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 
 		if (node_state(node, N_HIGH_MEMORY))
 			page = alloc_pages_node(
-				node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+				node, GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
 				get_order(size));
 		else
 			page = alloc_pages(
-				GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+				GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
 				get_order(size));
 		if (page)
 			return page_address(page);
diff --git a/mm/util.c b/mm/util.c
index 26be6407abd7e..6520f2d4a2269 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -339,7 +339,7 @@ EXPORT_SYMBOL(vm_mmap);
  * Uses kmalloc to get the memory but if the allocation fails then falls back
  * to the vmalloc allocator. Use kvfree for freeing the memory.
  *
- * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT
+ * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_RETRY_MAYFAIL
  * is supported only for large (>32kB) allocations, and it should be used only if
  * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
  *
@@ -367,11 +367,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 		kmalloc_flags |= __GFP_NOWARN;
 
 		/*
-		 * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly
+		 * We have to override __GFP_RETRY_MAYFAIL by __GFP_NORETRY for !costly
 		 * requests because there is no other way to tell the allocator
 		 * that we want to fail rather than retry endlessly.
 		 */
-		if (!(kmalloc_flags & __GFP_REPEAT) ||
+		if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL) ||
 				(size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
 			kmalloc_flags |= __GFP_NORETRY;
 	}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6016ab079e2bd..8698c1c86c4db 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1795,7 +1795,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
  *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  *
- *	Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
+ *	Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
  *	and __GFP_NOFAIL are not supported
  *
  *	Any use of gfp flags outside of GFP_KERNEL should be consulted
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e9210f825219c..a1af041930a6b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2506,18 +2506,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 		return false;
 
 	/* Consider stopping depending on scan and reclaim activity */
-	if (sc->gfp_mask & __GFP_REPEAT) {
+	if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
 		/*
-		 * For __GFP_REPEAT allocations, stop reclaiming if the
+		 * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
 		 * full LRU list has been scanned and we are still failing
 		 * to reclaim pages. This full LRU scan is potentially
-		 * expensive but a __GFP_REPEAT caller really wants to succeed
+		 * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
 		 */
 		if (!nr_reclaimed && !nr_scanned)
 			return false;
 	} else {
 		/*
-		 * For non-__GFP_REPEAT allocations which can presumably
+		 * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
 		 * fail without consequence, stop if we failed to reclaim
 		 * any pages from the last SWAP_CLUSTER_MAX number of
 		 * pages that were scanned. This will return to the
diff --git a/net/core/dev.c b/net/core/dev.c
index 02440518dd69e..8515f8fe0460a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7384,7 +7384,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
 	BUG_ON(count < 1);
 
-	rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+	rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!rx)
 		return -ENOMEM;
 
@@ -7424,7 +7424,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	if (count < 1 || count > 0xffff)
 		return -EINVAL;
 
-	tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+	tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!tx)
 		return -ENOMEM;
 
@@ -7965,7 +7965,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	/* ensure 32-byte alignment of whole construct */
 	alloc_size += NETDEV_ALIGN - 1;
 
-	p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT);
+	p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!p)
 		return NULL;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b11341ed69ad..f990eb8b30a9c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4747,7 +4747,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 
 	gfp_head = gfp_mask;
 	if (gfp_head & __GFP_DIRECT_RECLAIM)
-		gfp_head |= __GFP_REPEAT;
+		gfp_head |= __GFP_RETRY_MAYFAIL;
 
 	*errcode = -ENOBUFS;
 	skb = alloc_skb(header_len, gfp_head);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 147fde73a0f56..263d16e3219e6 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -648,7 +648,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
 		return 0;
 
 	/* If XPS was setup, we can allocate memory on right NUMA node */
-	array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT,
+	array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL,
 			      netdev_queue_numa_node_read(sch->dev_queue));
 	if (!array)
 		return -ENOMEM;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0a8a1c45af87d..a1497c516d85a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -643,7 +643,7 @@ static const struct {
 	{ "__GFP_FS",			"F" },
 	{ "__GFP_COLD",			"CO" },
 	{ "__GFP_NOWARN",		"NWR" },
-	{ "__GFP_REPEAT",		"R" },
+	{ "__GFP_RETRY_MAYFAIL",	"R" },
 	{ "__GFP_NOFAIL",		"NF" },
 	{ "__GFP_NORETRY",		"NR" },
 	{ "__GFP_COMP",			"C" },
-- 
GitLab


From 91c63ecda75d3004da8ffd5d2590383e0ba47722 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:36:49 -0700
Subject: [PATCH 1379/1958] xfs: map KM_MAYFAIL to __GFP_RETRY_MAYFAIL

KM_MAYFAIL didn't have any suitable GFP_FOO counterpart until recently
so it relied on the default page allocator behavior for the given set of
flags.  This means that small allocations actually never failed.

Now that we have __GFP_RETRY_MAYFAIL flag which works independently on
the allocation request size we can map KM_MAYFAIL to it.  The allocator
will try as hard as it can to fulfill the request but fails eventually
if the progress cannot be made.  It does so without triggering the OOM
killer which can be seen as an improvement because KM_MAYFAIL users
should be able to deal with allocation failures.

Link: http://lkml.kernel.org/r/20170623085345.11304-4-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Daney <david.daney@cavium.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/xfs/kmem.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index d6ea520162b26..4d85992d75b22 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -54,6 +54,16 @@ kmem_flags_convert(xfs_km_flags_t flags)
 			lflags &= ~__GFP_FS;
 	}
 
+	/*
+	 * Default page/slab allocator behavior is to retry for ever
+	 * for small allocations. We can override this behavior by using
+	 * __GFP_RETRY_MAYFAIL which will tell the allocator to retry as long
+	 * as it is feasible but rather fail than retry forever for all
+	 * request sizes.
+	 */
+	if (flags & KM_MAYFAIL)
+		lflags |= __GFP_RETRY_MAYFAIL;
+
 	if (flags & KM_ZERO)
 		lflags |= __GFP_ZERO;
 
-- 
GitLab


From cc965a29db172c28e25b9742db86a85766a08bf5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:36:52 -0700
Subject: [PATCH 1380/1958] mm: kvmalloc support __GFP_RETRY_MAYFAIL for all
 sizes

Now that __GFP_RETRY_MAYFAIL has a reasonable semantic regardless of the
request size we can drop the hackish implementation for !costly orders.
__GFP_RETRY_MAYFAIL retries as long as the reclaim makes a forward
progress and backs of when we are out of memory for the requested size.
Therefore we do not need to enforce__GFP_NORETRY for !costly orders just
to silent the oom killer anymore.

Link: http://lkml.kernel.org/r/20170623085345.11304-5-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/util.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/mm/util.c b/mm/util.c
index 6520f2d4a2269..ee250e2cde344 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -339,9 +339,9 @@ EXPORT_SYMBOL(vm_mmap);
  * Uses kmalloc to get the memory but if the allocation fails then falls back
  * to the vmalloc allocator. Use kvfree for freeing the memory.
  *
- * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_RETRY_MAYFAIL
- * is supported only for large (>32kB) allocations, and it should be used only if
- * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
+ * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported.
+ * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
+ * preferable to the vmalloc fallback, due to visible performance drawbacks.
  *
  * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
  */
@@ -366,13 +366,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	if (size > PAGE_SIZE) {
 		kmalloc_flags |= __GFP_NOWARN;
 
-		/*
-		 * We have to override __GFP_RETRY_MAYFAIL by __GFP_NORETRY for !costly
-		 * requests because there is no other way to tell the allocator
-		 * that we want to fail rather than retry endlessly.
-		 */
-		if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL) ||
-				(size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+		if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
 			kmalloc_flags |= __GFP_NORETRY;
 	}
 
-- 
GitLab


From dbb329561ae9ccfb942b6ba330030a4654e8908e Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:36:55 -0700
Subject: [PATCH 1381/1958] drm/i915: use __GFP_RETRY_MAYFAIL

Commit 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than
incur the oom for gfx allocations") has tried to remove disruptive OOM
killer because the userspace should be able to cope with allocation
failures.

At the time only __GFP_NORETRY could achieve that and it turned out that
this would fail the allocations just too easily.  So "drm/i915: Remove
__GFP_NORETRY from our buffer allocator" removed it and hoped for a
better solution.  __GFP_RETRY_MAYFAIL is that solution.  It will keep
retrying the allocation until there is no more progress and we would go
OOM.  Instead we fail the allocation and let the caller to deal with it.

Link: http://lkml.kernel.org/r/20170623085345.11304-6-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_gem.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7dcac3bfb7719..969bac8404f18 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2434,8 +2434,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 				 * again with !__GFP_NORETRY. However, we still
 				 * want to fail this allocation rather than
 				 * trigger the out-of-memory killer and for
-				 * this we want the future __GFP_MAYFAIL.
+				 * this we want __GFP_RETRY_MAYFAIL.
 				 */
+				gfp |= __GFP_RETRY_MAYFAIL;
 			}
 		} while (1);
 
-- 
GitLab


From 0f55685627d6dd2beda55a82abc02297f0f8e5c2 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 12 Jul 2017 14:36:58 -0700
Subject: [PATCH 1382/1958] mm, migration: do not trigger OOM killer when
 migrating memory

Page migration (for memory hotplug, soft_offline_page or mbind) needs to
allocate a new memory.  This can trigger an oom killer if the target
memory is depleated.  Although quite unlikely, still possible,
especially for the memory hotplug (offlining of memoery).

Up to now we didn't really have reasonable means to back off.
__GFP_NORETRY can fail just too easily and __GFP_THISNODE sticks to a
single node and that is not suitable for all callers.

But now that we have __GFP_RETRY_MAYFAIL we should use it.  It is
preferable to fail the migration than disrupt the system by killing some
processes.

Link: http://lkml.kernel.org/r/20170623085345.11304-7-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alex Belits <alex.belits@cavium.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 2 +-
 mm/mempolicy.c          | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 4634da521238f..3e0d405dc842d 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -34,7 +34,7 @@ extern char *migrate_reason_names[MR_TYPES];
 static inline struct page *new_page_nodemask(struct page *page,
 				int preferred_nid, nodemask_t *nodemask)
 {
-	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
+	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
 
 	if (PageHuge(page))
 		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7d8e56214ac09..d911fa5cb2a73 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1078,7 +1078,8 @@ static struct page *new_page(struct page *page, unsigned long start, int **x)
 	/*
 	 * if !vma, alloc_page_vma() will use task or system default policy
 	 */
-	return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL,
+			vma, address);
 }
 #else
 
-- 
GitLab


From 596ed45b5b5b7e4624c813ddeffe0e100f8b13ba Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:02 -0700
Subject: [PATCH 1383/1958] checkpatch: improve the STORAGE_CLASS test

Make sure static, extern, and asmlinkage appear before a specific type.

e.g.:
	int asmlinkage foo(void)
is better written
       asmlinkage int foo(void)

Link: http://lkml.kernel.org/r/31704c96df2d5fd9df0b41165940a7a4feb16a63.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 8f940c09918f0..2287a0bca863b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5576,10 +5576,18 @@ sub process {
 			    "architecture specific defines should be avoided\n" .  $herecurr);
 		}
 
+# check that the storage class is not after a type
+		if ($line =~ /\b($Type)\s+($Storage)\b/) {
+			WARN("STORAGE_CLASS",
+			     "storage class '$2' should be located before type '$1'\n" . $herecurr);
+		}
 # Check that the storage class is at the beginning of a declaration
-		if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) {
+		if ($line =~ /\b$Storage\b/ &&
+		    $line !~ /^.\s*$Storage/ &&
+		    $line =~ /^.\s*(.+?)\$Storage\s/ &&
+		    $1 !~ /[\,\)]\s*$/) {
 			WARN("STORAGE_CLASS",
-			     "storage class should be at the beginning of the declaration\n" . $herecurr)
+			     "storage class should be at the beginning of the declaration\n" . $herecurr);
 		}
 
 # check the location of the inline attribute, that it is between
-- 
GitLab


From ead9fba6b8e9a81107807f5a221576cb01206a6e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:05 -0700
Subject: [PATCH 1384/1958] ARM: KVM: move asmlinkage before type

asmlinkage is either 'extern "C"' or blank.

Move the uses of asmlinkage before the return types to be similar
to the rest of the kernel.

Link: http://lkml.kernel.org/r/005b8e120650c6a13b541e420f4e3605603fe9e6.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krcmar <rkrcmar@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/kvm_hyp.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 58508900c4bb2..14b5903f02246 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -110,8 +110,8 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
 
-void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp);
-void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp);
+asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
+asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
 static inline bool __vfp_enabled(void)
 {
 	return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10)));
@@ -120,8 +120,8 @@ static inline bool __vfp_enabled(void)
 void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt);
 void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt);
 
-int asmlinkage __guest_enter(struct kvm_vcpu *vcpu,
+asmlinkage int __guest_enter(struct kvm_vcpu *vcpu,
 			     struct kvm_cpu_context *host);
-int asmlinkage __hyp_do_panic(const char *, int, u32);
+asmlinkage int __hyp_do_panic(const char *, int, u32);
 
 #endif /* __ARM_KVM_HYP_H__ */
-- 
GitLab


From 1e90d0ed327a289c6fee4f36b25f67d3d7864a43 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:08 -0700
Subject: [PATCH 1385/1958] ARM: HP Jornada 7XX: move inline before return type

Convert 'u8 inline' to 'inline u8' to be the same style used by the rest
of the kernel.

Miscellanea:

jornada_ssp_reverse is an odd function.
It is declared inline but is also EXPORT_SYMBOL.
It is also apparently only used by jornada720_ssp.c
Likely the EXPORT_SYMBOL could be removed and the function
converted to static.

The addition of static and removal of EXPORT_SYMBOL was not done.

Link: http://lkml.kernel.org/r/5bd3b2bf39c6c9caf773949f18158f8f5ec08582.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-sa1100/jornada720_ssp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c
index b143c4659346d..7fc11a3c17b4c 100644
--- a/arch/arm/mach-sa1100/jornada720_ssp.c
+++ b/arch/arm/mach-sa1100/jornada720_ssp.c
@@ -33,7 +33,7 @@ static unsigned long jornada_ssp_flags;
  * we need to reverse all data we receive from the mcu due to its physical location
  * returns : 01110111 -> 11101110
  */
-u8 inline jornada_ssp_reverse(u8 byte)
+inline u8 jornada_ssp_reverse(u8 byte)
 {
 	return
 		((0x80 & byte) >> 7) |
-- 
GitLab


From 8d95a3dca060b09ddafa4cd1c82f27ab19490fad Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:10 -0700
Subject: [PATCH 1386/1958] CRIS: gpio: move inline before return type

Move inline to be like the rest of the kernel.

Link: http://lkml.kernel.org/r/6bf1bec049897c4158f698b866810f47c728f233.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/cris/arch-v10/drivers/gpio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/cris/arch-v10/drivers/gpio.c b/arch/cris/arch-v10/drivers/gpio.c
index 64285e0d34815..dfd3b3ba5e4ec 100644
--- a/arch/cris/arch-v10/drivers/gpio.c
+++ b/arch/cris/arch-v10/drivers/gpio.c
@@ -399,7 +399,7 @@ gpio_release(struct inode *inode, struct file *filp)
 /* Main device API. ioctl's to read/set/clear bits, as well as to
  * set alarms to wait for using a subsequent select().
  */
-unsigned long inline setget_input(struct gpio_private *priv, unsigned long arg)
+inline unsigned long setget_input(struct gpio_private *priv, unsigned long arg)
 {
 	/* Set direction 0=unchanged 1=input,
 	 * return mask with 1=input */
@@ -450,7 +450,7 @@ unsigned long inline setget_input(struct gpio_private *priv, unsigned long arg)
 	return dir_g_in_bits;
 } /* setget_input */
 
-unsigned long inline setget_output(struct gpio_private *priv, unsigned long arg)
+inline unsigned long setget_output(struct gpio_private *priv, unsigned long arg)
 {
 	if (USE_PORTS(priv)) {
 		*priv->dir = *priv->dir_shadow |=
-- 
GitLab


From 1d731bb77247b9456680f379c31c30f9774473ce Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:13 -0700
Subject: [PATCH 1387/1958] FRV: tlbflush: move asmlinkage before return type

Make the use of asmlinkage like the rest of the kernel.

Link: http://lkml.kernel.org/r/efb2dfed4d9315bf68ec0334c81b65af176a0174.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/include/asm/tlbflush.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/frv/include/asm/tlbflush.h b/arch/frv/include/asm/tlbflush.h
index 7ac5eafc5d983..75879420f578e 100644
--- a/arch/frv/include/asm/tlbflush.h
+++ b/arch/frv/include/asm/tlbflush.h
@@ -18,10 +18,10 @@
 #ifdef CONFIG_MMU
 
 #ifndef __ASSEMBLY__
-extern void asmlinkage __flush_tlb_all(void);
-extern void asmlinkage __flush_tlb_mm(unsigned long contextid);
-extern void asmlinkage __flush_tlb_page(unsigned long contextid, unsigned long start);
-extern void asmlinkage __flush_tlb_range(unsigned long contextid,
+extern asmlinkage void __flush_tlb_all(void);
+extern asmlinkage void __flush_tlb_mm(unsigned long contextid);
+extern asmlinkage void __flush_tlb_page(unsigned long contextid, unsigned long start);
+extern asmlinkage void __flush_tlb_range(unsigned long contextid,
 					 unsigned long start, unsigned long end);
 #endif /* !__ASSEMBLY__ */
 
-- 
GitLab


From d778931d7b32ada39de0a45267b0c4af0600e277 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:16 -0700
Subject: [PATCH 1388/1958] ia64: move inline before return type

Make the use of inline like the rest of the kernel.

Link: http://lkml.kernel.org/r/d47074493af80ce12590340294bc49618165c30d.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/mca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 79c7c46d7dc17..555b111801560 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -334,7 +334,7 @@ static void ia64_mlogbuf_dump_from_init(void)
 	ia64_mlogbuf_dump();
 }
 
-static void inline
+static inline void
 ia64_mca_spin(const char *func)
 {
 	if (monarch_cpu == smp_processor_id())
-- 
GitLab


From c02f2a911f2bb833f982d869e3a4ae67e1468969 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:19 -0700
Subject: [PATCH 1389/1958] ia64: sn: pci: move inline before type

Make the use of inline like the rest of the kernel.

Link: http://lkml.kernel.org/r/f42b2202bd0d4e7ccf79ce5348bb255a035e67bb.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/sn/pci/pcibr/pcibr_ate.c | 2 +-
 arch/ia64/sn/pci/tioce_provider.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
index 5bc34eac9e019..b67bb4cb73ff7 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -140,7 +140,7 @@ static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info,
 /*
  * Update the ate.
  */
-void inline
+inline void
 ate_write(struct pcibus_info *pcibus_info, int ate_index, int count,
 	  volatile u64 ate)
 {
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index 46d3df4b03a16..3bd9abc35485e 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -52,7 +52,7 @@
  * All registers defined in struct tioce will meet that criteria.
  */
 
-static void inline
+static inline void
 tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
 {
 	u64 mmr_base;
@@ -78,7 +78,7 @@ tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
 	}
 }
 
-static void inline
+static inline void
 tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr)
 {
 	u64 mmr_base;
-- 
GitLab


From 9d8a9ae2818fa44099195146a7fa85963409a2e9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:22 -0700
Subject: [PATCH 1390/1958] m68k: coldfire: move inline before return type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/14db9c166d5b68efa77e337cfe49bb9b29bca3f7.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/coldfire/intc-simr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/m68k/coldfire/intc-simr.c b/arch/m68k/coldfire/intc-simr.c
index 7cf2c156f72d2..15c4b7a6e38f4 100644
--- a/arch/m68k/coldfire/intc-simr.c
+++ b/arch/m68k/coldfire/intc-simr.c
@@ -35,7 +35,7 @@
 #define	EINT7	67	/* EDGE Port interrupt 7 */
 
 static unsigned int irqebitmap[] = { 0, 1, 4, 7 };
-static unsigned int inline irq2ebit(unsigned int irq)
+static inline unsigned int irq2ebit(unsigned int irq)
 {
 	return irqebitmap[irq - EINT0];
 }
@@ -51,7 +51,7 @@ static unsigned int inline irq2ebit(unsigned int irq)
 #define	EINT1	65	/* EDGE Port interrupt 1 */
 #define	EINT7	71	/* EDGE Port interrupt 7 */
 
-static unsigned int inline irq2ebit(unsigned int irq)
+static inline unsigned int irq2ebit(unsigned int irq)
 {
 	return irq - EINT0;
 }
-- 
GitLab


From b745fcb949f90762e7e126737683e2a26a4e8035 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:25 -0700
Subject: [PATCH 1391/1958] MIPS: SMP: move asmlinkage before return type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/756d3fb543e981b9284e756fa27616725a354b28.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/asm/smp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 98a117a05fbce..bab3d41e5987e 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -47,7 +47,7 @@ extern int __cpu_logical_map[NR_CPUS];
 /* Mask of CPUs which are currently definitely operating coherently */
 extern cpumask_t cpu_coherent_mask;
 
-extern void asmlinkage smp_bootstrap(void);
+extern asmlinkage void smp_bootstrap(void);
 
 extern void calculate_cpu_foreign_map(void);
 
-- 
GitLab


From 0cef25c1d89fb617683fc76645bb27ab297f2a1e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:28 -0700
Subject: [PATCH 1392/1958] sh: move inline before return type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/f81bb2a67a97b1fd8b6ea99bd350d8a0f6864fb1.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/mm/cache-sh5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index d94dadedf74f5..445b5e69b73cb 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -234,7 +234,7 @@ static void sh64_icache_inv_current_user_range(unsigned long start, unsigned lon
 #define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
 static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
 
-static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
+static inline void sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
 {
 	/* Purge all ways in a particular block of sets, specified by the base
 	   set number and number of sets.  Can handle wrap-around, if that's
-- 
GitLab


From 0825f49f225bd24c3ae46ff2bb716067e3684fc3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:31 -0700
Subject: [PATCH 1393/1958] x86/efi: move asmlinkage before return type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/1cd3d401626e51ea0e2333a860e76e80bc560a4c.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/efi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index d2ff779f347e5..796ff6c1aa539 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -33,7 +33,7 @@
 
 #ifdef CONFIG_X86_32
 
-extern unsigned long asmlinkage efi_call_phys(void *, ...);
+extern asmlinkage unsigned long efi_call_phys(void *, ...);
 
 #define arch_efi_call_virt_setup()	kernel_fpu_begin()
 #define arch_efi_call_virt_teardown()	kernel_fpu_end()
@@ -52,7 +52,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 
 #define EFI_LOADER_SIGNATURE	"EL64"
 
-extern u64 asmlinkage efi_call(void *fp, ...);
+extern asmlinkage u64 efi_call(void *fp, ...);
 
 #define efi_call_phys(f, args...)		efi_call((f), args)
 
-- 
GitLab


From e0710e510c59602e4fa80d784c946e02e8968523 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:34 -0700
Subject: [PATCH 1394/1958] drivers: s390: move static and inline before return
 type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/3f980cd89084ae09716353aba3171e4b3815e690.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/s390/net/ctcm_main.c    | 2 +-
 drivers/s390/net/qeth_l3_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 1563b1458e44d..2ade6131a89f2 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1115,7 +1115,7 @@ static const struct net_device_ops ctcm_mpc_netdev_ops = {
 	.ndo_start_xmit		= ctcmpc_tx,
 };
 
-void static ctcm_dev_setup(struct net_device *dev)
+static void ctcm_dev_setup(struct net_device *dev)
 {
 	dev->type = ARPHRD_SLIP;
 	dev->tx_queue_len = 100;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 3062cde33a3db..8975cd3213904 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2408,7 +2408,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	return rc;
 }
 
-int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
+inline int qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 {
 	int cast_type = RTN_UNSPEC;
 	struct neighbour *n = NULL;
-- 
GitLab


From a9e5bfdb9d786efbc6995edfe6902788527457ad Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:37 -0700
Subject: [PATCH 1395/1958] drivers: tty: serial: move inline before return
 type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/55d3e89d50bb03d603bfb28019fab07f48bdc714.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Pat Gefre <pfg@sgi.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/serial/ioc3_serial.c | 4 ++--
 drivers/tty/serial/ioc4_serial.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/ioc3_serial.c b/drivers/tty/serial/ioc3_serial.c
index 2a61dd6b40094..906ee770ff4a8 100644
--- a/drivers/tty/serial/ioc3_serial.c
+++ b/drivers/tty/serial/ioc3_serial.c
@@ -377,7 +377,7 @@ static struct ioc3_port *get_ioc3_port(struct uart_port *the_port)
  *			called per port from attach...
  * @port: port to initialize
  */
-static int inline port_init(struct ioc3_port *port)
+static inline int port_init(struct ioc3_port *port)
 {
 	uint32_t sio_cr;
 	struct port_hooks *hooks = port->ip_hooks;
@@ -1430,7 +1430,7 @@ static int receive_chars(struct uart_port *the_port)
  * @pending: interrupts to handle
  */
 
-static int inline
+static inline int
 ioc3uart_intr_one(struct ioc3_submodule *is,
 			struct ioc3_driver_data *idd,
 			unsigned int pending)
diff --git a/drivers/tty/serial/ioc4_serial.c b/drivers/tty/serial/ioc4_serial.c
index f96bcf9bee259..43d7d32eb1500 100644
--- a/drivers/tty/serial/ioc4_serial.c
+++ b/drivers/tty/serial/ioc4_serial.c
@@ -824,7 +824,7 @@ pending_intrs(struct ioc4_soft *soft, int type)
  *			called per port from attach...
  * @port: port to initialize
  */
-static int inline port_init(struct ioc4_port *port)
+static inline int port_init(struct ioc4_port *port)
 {
 	uint32_t sio_cr;
 	struct hooks *hooks = port->ip_hooks;
@@ -1048,7 +1048,7 @@ static irqreturn_t ioc4_intr(int irq, void *arg)
  *			IOC4 with serial ports in the system.
  * @idd: Master module data for this IOC4
  */
-static int inline ioc4_attach_local(struct ioc4_driver_data *idd)
+static inline int ioc4_attach_local(struct ioc4_driver_data *idd)
 {
 	struct ioc4_port *port;
 	struct ioc4_port *ports[IOC4_NUM_SERIAL_PORTS];
-- 
GitLab


From 4abf87f41a6cedf7a780af63991d6bebec303be0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:40 -0700
Subject: [PATCH 1396/1958] USB: serial: safe_serial: move __inline__ before
 return type

Make the code like the rest of the kernel.
Also use inline instead of __inline__.

Link: http://lkml.kernel.org/r/a5072b74b6c293e6ec93c4900482e9d3267f15b2.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Johan Hovold <johan@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/serial/safe_serial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index 8a069aa154eda..27d7a70162987 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -180,7 +180,7 @@ static const __u16 crc10_table[256] = {
  * Perform a memcpy and calculate fcs using ppp 10bit CRC algorithm. Return
  * new 10 bit FCS.
  */
-static __u16 __inline__ fcs_compute10(unsigned char *sp, int len, __u16 fcs)
+static inline __u16 fcs_compute10(unsigned char *sp, int len, __u16 fcs)
 {
 	for (; len-- > 0; fcs = CRC10_FCS(fcs, *sp++));
 	return fcs;
-- 
GitLab


From dce3944717523bfe65e2b3cbd1d3af0ccc27ace9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:43 -0700
Subject: [PATCH 1397/1958] video: fbdev: intelfb: move inline before return
 type

Make the code like the rest of the kernel.

But there is an oddity here because the inline should probably be removed.

It's an extern function in intelfb.h and it is used in intelfbdrv.c and
intelfbhw.c.

The inline is kept here as I suppose it's possible for some compiler to
make the uses inline in intelfbdrv and and also create an external
function for intelfbhw.

Link: http://lkml.kernel.org/r/8ba151a1fdc84e42cbf4aafc798513c0158edee1.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Maik Broemme <mbroemme@libmpq.org>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbdev/intelfb/intelfbdrv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c
index 6b444400a86c2..ffc391208b271 100644
--- a/drivers/video/fbdev/intelfb/intelfbdrv.c
+++ b/drivers/video/fbdev/intelfb/intelfbdrv.c
@@ -907,7 +907,7 @@ static void intelfb_pci_unregister(struct pci_dev *pdev)
  *                       helper functions                      *
  ***************************************************************/
 
-int __inline__ intelfb_var_to_depth(const struct fb_var_screeninfo *var)
+__inline__ int intelfb_var_to_depth(const struct fb_var_screeninfo *var)
 {
 	DBG_MSG("intelfb_var_to_depth: bpp: %d, green.length is %d\n",
 		var->bits_per_pixel, var->green.length);
-- 
GitLab


From ba168a46b0b9e06f38f63ef9adf182950c82d6e0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:46 -0700
Subject: [PATCH 1398/1958] video: fbdev: omap: move inline before return type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/bc5927726abc70d7c066df7ab4cb7cfce4a7b577.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbdev/omap/lcdc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/omap/lcdc.c b/drivers/video/fbdev/omap/lcdc.c
index e3d9b9ea5498e..938cba0d24aef 100644
--- a/drivers/video/fbdev/omap/lcdc.c
+++ b/drivers/video/fbdev/omap/lcdc.c
@@ -79,12 +79,12 @@ static struct omap_lcd_controller {
 	unsigned long		vram_size;
 } lcdc;
 
-static void inline enable_irqs(int mask)
+static inline void enable_irqs(int mask)
 {
 	lcdc.irq_mask |= mask;
 }
 
-static void inline disable_irqs(int mask)
+static inline void disable_irqs(int mask)
 {
 	lcdc.irq_mask &= ~mask;
 }
@@ -466,7 +466,7 @@ static void calc_ck_div(int is_tft, int pck, int *pck_div)
 	}
 }
 
-static void inline setup_regs(void)
+static inline void setup_regs(void)
 {
 	u32 l;
 	struct lcd_panel *panel = lcdc.fbdev->panel;
-- 
GitLab


From c945dccc80856107f109c36a7d0e29a371b5d1b5 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Jul 2017 14:37:48 -0700
Subject: [PATCH 1399/1958] ARM: samsung: usb-ohci: move inline before return
 type

Make the code like the rest of the kernel.

Link: http://lkml.kernel.org/r/667a515b8d0f10f2465d519f8595edd91552fc5e.1499284835.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/platform_data/usb-ohci-s3c2410.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/platform_data/usb-ohci-s3c2410.h b/include/linux/platform_data/usb-ohci-s3c2410.h
index 7fa1fbefc3f22..cc7554ae6e8bd 100644
--- a/include/linux/platform_data/usb-ohci-s3c2410.h
+++ b/include/linux/platform_data/usb-ohci-s3c2410.h
@@ -31,7 +31,7 @@ struct s3c2410_hcd_info {
 	void		(*report_oc)(struct s3c2410_hcd_info *, int ports);
 };
 
-static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
+static inline void s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
 {
 	if (info->report_oc != NULL) {
 		(info->report_oc)(info, ports);
-- 
GitLab


From 3e8f399da490e6ac20a3cfd6aa404c9aa961a9a2 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 12 Jul 2017 14:37:51 -0700
Subject: [PATCH 1400/1958] writeback: rework wb_[dec|inc]_stat family of
 functions

Currently the writeback statistics code uses a percpu counters to hold
various statistics.  Furthermore we have 2 families of functions - those
which disable local irq and those which doesn't and whose names begin
with double underscore.  However, they both end up calling
__add_wb_stats which in turn calls percpu_counter_add_batch which is
already irq-safe.

Exploiting this fact allows to eliminated the __wb_* functions since
they don't add any further protection than we already have.
Furthermore, refactor the wb_* function to call __add_wb_stat directly
without the irq-disabling dance.  This will likely result in better
runtime of code which deals with modifying the stat counters.

While at it also document why percpu_counter_add_batch is in fact
preempt and irq-safe since at least 3 people got confused.

Link: http://lkml.kernel.org/r/1498029937-27293-1-git-send-email-nborisov@suse.com
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c           |  8 ++++----
 include/linux/backing-dev.h | 24 ++----------------------
 lib/percpu_counter.c        |  7 +++++++
 mm/page-writeback.c         | 10 +++++-----
 4 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8b426f83909f6..245c430a2e418 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -380,8 +380,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 		struct page *page = radix_tree_deref_slot_protected(slot,
 							&mapping->tree_lock);
 		if (likely(page) && PageDirty(page)) {
-			__dec_wb_stat(old_wb, WB_RECLAIMABLE);
-			__inc_wb_stat(new_wb, WB_RECLAIMABLE);
+			dec_wb_stat(old_wb, WB_RECLAIMABLE);
+			inc_wb_stat(new_wb, WB_RECLAIMABLE);
 		}
 	}
 
@@ -391,8 +391,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 							&mapping->tree_lock);
 		if (likely(page)) {
 			WARN_ON_ONCE(!PageWriteback(page));
-			__dec_wb_stat(old_wb, WB_WRITEBACK);
-			__inc_wb_stat(new_wb, WB_WRITEBACK);
+			dec_wb_stat(old_wb, WB_WRITEBACK);
+			inc_wb_stat(new_wb, WB_WRITEBACK);
 		}
 	}
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 334165c911f0a..854e1bdd0b2a4 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -69,34 +69,14 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
 	percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
 }
 
-static inline void __inc_wb_stat(struct bdi_writeback *wb,
-				 enum wb_stat_item item)
-{
-	__add_wb_stat(wb, item, 1);
-}
-
 static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__inc_wb_stat(wb, item);
-	local_irq_restore(flags);
-}
-
-static inline void __dec_wb_stat(struct bdi_writeback *wb,
-				 enum wb_stat_item item)
-{
-	__add_wb_stat(wb, item, -1);
+	__add_wb_stat(wb, item, 1);
 }
 
 static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__dec_wb_stat(wb, item);
-	local_irq_restore(flags);
+	__add_wb_stat(wb, item, -1);
 }
 
 static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 8ee7e5ec21be2..3bf4a9984f4cb 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -72,6 +72,13 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
+/**
+ * This function is both preempt and irq safe. The former is due to explicit
+ * preemption disable. The latter is guaranteed by the fact that the slow path
+ * is explicitly protected by an irq-safe spinlock whereas the fast patch uses
+ * this_cpu_add which is irq-safe by definition. Hence there is no need muck
+ * with irq state before calling this one
+ */
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
 	s64 count;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b60cc7ddac2b..96e93b214d317 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -601,7 +601,7 @@ static inline void __wb_writeout_inc(struct bdi_writeback *wb)
 {
 	struct wb_domain *cgdom;
 
-	__inc_wb_stat(wb, WB_WRITTEN);
+	inc_wb_stat(wb, WB_WRITTEN);
 	wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
 			       wb->bdi->max_prop_frac);
 
@@ -2435,8 +2435,8 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		__inc_lruvec_page_state(page, NR_FILE_DIRTY);
 		__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 		__inc_node_page_state(page, NR_DIRTIED);
-		__inc_wb_stat(wb, WB_RECLAIMABLE);
-		__inc_wb_stat(wb, WB_DIRTIED);
+		inc_wb_stat(wb, WB_RECLAIMABLE);
+		inc_wb_stat(wb, WB_DIRTIED);
 		task_io_account_write(PAGE_SIZE);
 		current->nr_dirtied++;
 		this_cpu_inc(bdp_ratelimits);
@@ -2741,7 +2741,7 @@ int test_clear_page_writeback(struct page *page)
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
 
-				__dec_wb_stat(wb, WB_WRITEBACK);
+				dec_wb_stat(wb, WB_WRITEBACK);
 				__wb_writeout_inc(wb);
 			}
 		}
@@ -2786,7 +2786,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi))
-				__inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 
 			/*
 			 * We can come through here when swapping anonymous
-- 
GitLab


From c7b5a4e6e8fbef8fdf2c529f953000d24c561339 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 6 Jul 2017 02:32:02 +0200
Subject: [PATCH 1401/1958] PCI / PM: Fix native PME handling during system
 suspend/resume

Commit 76cde7e49590 (PCI / PM: Make PCIe PME interrupts wake up from
suspend-to-idle) went too far with preventing pcie_pme_work_fn() from
clearing the root port's PME Status and re-enabling the PME interrupt
which should be done for PMEs to work correctly after system resume.

The failing scenario is as follows:

 1. pcie_pme_suspend() finds that the PME IRQ should be designated
    for system wakeup, so it calls enable_irq_wake() and then sets
    data->suspend_level to PME_SUSPEND_WAKEUP.

 2. PME interrupt happens at this point.

 3. pcie_pme_irq() runs, disables the PME interrupt and queues up
    the execution of pcie_pme_work_fn().

 4. pcie_pme_work_fn() runs before pcie_pme_resume() and breaks out
    of the loop right away, because data->suspend_level is not
    PME_SUSPEND_NONE, and it doesn't re-enable the PME interrupt
    for the same reason.

 5. pcie_pme_resume() runs and simply calls disable_irq_wake()
    without re-enabling the PME interrupt (because data->suspend_level
    is not PME_SUSPEND_NONE), so the PME interrupt remains disabled
    and the PME Status remains set.

To fix this notice that there is no reason why pcie_pme_work_fn()
should behave in a special way during system resume if the PME
interrupt is not disabled by pcie_pme_suspend() and partially revert
commit 76cde7e49590 and restore the previous (and correct) behavior
of pcie_pme_work_fn().

Fixes: 76cde7e49590 (PCI / PM: Make PCIe PME interrupts wake up from suspend-to-idle)
Reported-and-tested-by: Naresh Solanki <naresh.solanki@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/pme.c | 35 +++++++++++++----------------------
 1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 80e58d25006d0..fafdb165dd2ed 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -40,17 +40,11 @@ static int __init pcie_pme_setup(char *str)
 }
 __setup("pcie_pme=", pcie_pme_setup);
 
-enum pme_suspend_level {
-	PME_SUSPEND_NONE = 0,
-	PME_SUSPEND_WAKEUP,
-	PME_SUSPEND_NOIRQ,
-};
-
 struct pcie_pme_service_data {
 	spinlock_t lock;
 	struct pcie_device *srv;
 	struct work_struct work;
-	enum pme_suspend_level suspend_level;
+	bool noirq; /* If set, keep the PME interrupt disabled. */
 };
 
 /**
@@ -228,7 +222,7 @@ static void pcie_pme_work_fn(struct work_struct *work)
 	spin_lock_irq(&data->lock);
 
 	for (;;) {
-		if (data->suspend_level != PME_SUSPEND_NONE)
+		if (data->noirq)
 			break;
 
 		pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
@@ -255,7 +249,7 @@ static void pcie_pme_work_fn(struct work_struct *work)
 		spin_lock_irq(&data->lock);
 	}
 
-	if (data->suspend_level == PME_SUSPEND_NONE)
+	if (!data->noirq)
 		pcie_pme_interrupt_enable(port, true);
 
 	spin_unlock_irq(&data->lock);
@@ -378,7 +372,7 @@ static int pcie_pme_suspend(struct pcie_device *srv)
 {
 	struct pcie_pme_service_data *data = get_service_data(srv);
 	struct pci_dev *port = srv->port;
-	bool wakeup, wake_irq_enabled = false;
+	bool wakeup;
 	int ret;
 
 	if (device_may_wakeup(&port->dev)) {
@@ -388,19 +382,16 @@ static int pcie_pme_suspend(struct pcie_device *srv)
 		wakeup = pcie_pme_check_wakeup(port->subordinate);
 		up_read(&pci_bus_sem);
 	}
-	spin_lock_irq(&data->lock);
 	if (wakeup) {
 		ret = enable_irq_wake(srv->irq);
-		if (ret == 0) {
-			data->suspend_level = PME_SUSPEND_WAKEUP;
-			wake_irq_enabled = true;
-		}
-	}
-	if (!wake_irq_enabled) {
-		pcie_pme_interrupt_enable(port, false);
-		pcie_clear_root_pme_status(port);
-		data->suspend_level = PME_SUSPEND_NOIRQ;
+		if (!ret)
+			return 0;
 	}
+
+	spin_lock_irq(&data->lock);
+	pcie_pme_interrupt_enable(port, false);
+	pcie_clear_root_pme_status(port);
+	data->noirq = true;
 	spin_unlock_irq(&data->lock);
 
 	synchronize_irq(srv->irq);
@@ -417,15 +408,15 @@ static int pcie_pme_resume(struct pcie_device *srv)
 	struct pcie_pme_service_data *data = get_service_data(srv);
 
 	spin_lock_irq(&data->lock);
-	if (data->suspend_level == PME_SUSPEND_NOIRQ) {
+	if (data->noirq) {
 		struct pci_dev *port = srv->port;
 
 		pcie_clear_root_pme_status(port);
 		pcie_pme_interrupt_enable(port, true);
+		data->noirq = false;
 	} else {
 		disable_irq_wake(srv->irq);
 	}
-	data->suspend_level = PME_SUSPEND_NONE;
 	spin_unlock_irq(&data->lock);
 
 	return 0;
-- 
GitLab


From bd664f6b3e376a8ef4990f87d08271cc2d01ba9a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 12 Jul 2017 19:25:47 -0700
Subject: [PATCH 1402/1958] disable new gcc-7.1.1 warnings for now

I made the mistake of upgrading my desktop to the new Fedora 26 that
comes with gcc-7.1.1.

There's nothing wrong per se that I've noticed, but I now have 1500
lines of warnings, mostly from the new format-truncation warning
triggering all over the tree.

We use 'snprintf()' and friends in a lot of places, and often know that
the numbers are fairly small (ie a controller index or similar), but gcc
doesn't know that, and sees an 'int', and thinks that it could be some
huge number.  And then complains when our buffers are not able to fit
the name for the ten millionth controller.

These warnings aren't necessarily bad per se, and we probably want to
look through them subsystem by subsystem, but at least during the merge
window they just mean that I can't even see if somebody is introducing
any *real* problems when I pull.

So warnings disabled for now.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Makefile b/Makefile
index 06ef9947cf7c0..547947ff87de9 100644
--- a/Makefile
+++ b/Makefile
@@ -622,6 +622,9 @@ include arch/$(SRCARCH)/Makefile
 
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, int-in-bool-context)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= $(call cc-option,-Oz,-Os)
-- 
GitLab


From 3c48d86cc959309ee168fb87737a8cb3f97c5224 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 12 Jul 2017 15:04:16 -0700
Subject: [PATCH 1403/1958] clk: Provide bulk prepare_enable disable_unprepare
 variants

This extends the existing set of bulk helpers with prepare_enable and
disable_unprepare variants.

Cc: Russell King <linux@armlinux.org.uk>,
Cc: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/linux/clk.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/include/linux/clk.h b/include/linux/clk.h
index c673f0b917512..690e6a6921e14 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -657,6 +657,28 @@ static inline void clk_disable_unprepare(struct clk *clk)
 	clk_unprepare(clk);
 }
 
+static inline int clk_bulk_prepare_enable(int num_clks,
+					  struct clk_bulk_data *clks)
+{
+	int ret;
+
+	ret = clk_bulk_prepare(num_clks, clks);
+	if (ret)
+		return ret;
+	ret = clk_bulk_enable(num_clks, clks);
+	if (ret)
+		clk_bulk_unprepare(num_clks, clks);
+
+	return ret;
+}
+
+static inline void clk_bulk_disable_unprepare(int num_clks,
+					      struct clk_bulk_data *clks)
+{
+	clk_bulk_disable(num_clks, clks);
+	clk_bulk_unprepare(num_clks, clks);
+}
+
 #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK)
 struct clk *of_clk_get(struct device_node *np, int index);
 struct clk *of_clk_get_by_name(struct device_node *np, const char *name);
-- 
GitLab


From b855629b9a4107c37999723c09cc9fd357b53caf Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 6 Jul 2017 14:08:25 +0200
Subject: [PATCH 1404/1958] s390/perf: fix problem state detection

The P sample bit indicates problem state and not PER.

Fixes: commit a752598254 ("s390: rename struct psw_bits members")
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 0c82f7903fc7a..c1bf75ffb8756 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -998,7 +998,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 	psw_bits(regs.psw).ia	= sfr->basic.ia;
 	psw_bits(regs.psw).dat	= sfr->basic.T;
 	psw_bits(regs.psw).wait = sfr->basic.W;
-	psw_bits(regs.psw).per	= sfr->basic.P;
+	psw_bits(regs.psw).pstate = sfr->basic.P;
 	psw_bits(regs.psw).as	= sfr->basic.AS;
 
 	/*
-- 
GitLab


From 2daace78a8c94e2cd20164b8efc18171c56e92ec Mon Sep 17 00:00:00 2001
From: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Date: Thu, 6 Jul 2017 04:00:45 +0200
Subject: [PATCH 1405/1958] s390: chp: handle CRW_ERC_INIT for channel-path
 status change

When channel path is identified as the report source code (RSC)
of a CRW, and initialized (CRW_ERC_INIT) is recognized as the
error recovery code (ERC) by the channel subsystem, it indicates
a "path has come" event.

Let's handle this case in chp_process_crw().

Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 7e0d4f724ddae..432fc40990bd2 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -559,6 +559,7 @@ static void chp_process_crw(struct crw *crw0, struct crw *crw1,
 	chpid.id = crw0->rsid;
 	switch (crw0->erc) {
 	case CRW_ERC_IPARM: /* Path has come. */
+	case CRW_ERC_INIT:
 		if (!chp_is_registered(chpid))
 			chp_new(chpid);
 		chsc_chp_online(chpid);
-- 
GitLab


From 97ca7bfc19605bc08e9183441b8b8545e84032d6 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 6 Jul 2017 10:12:58 +0200
Subject: [PATCH 1406/1958] s390/mm: set change and reference bit on lazy key
 enablement

When we enable storage keys for a guest lazily, we reset the ACC and F
values. That is correct assuming that these are 0 on a clear reset and
the guest obviously has not used any key setting instruction.

We also zero out the change and reference bit. This is not correct as
the architecture prefers over-indication instead of under-indication
for the keyless->keyed transition.

This patch fixes the behaviour and always sets guest change and guest
reference for all guest storage keys on the keyless -> keyed switch.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgtable.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index d4d409ba206b2..4a1f7366b17ae 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -591,11 +591,11 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	unsigned long ptev;
 	pgste_t pgste;
 
-	/* Clear storage key */
+	/* Clear storage key ACC and F, but set R/C */
 	preempt_disable();
 	pgste = pgste_get_lock(ptep);
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
-			      PGSTE_GR_BIT | PGSTE_GC_BIT);
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
 	ptev = pte_val(*ptep);
 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
-- 
GitLab


From bbdc74dc19e09ac4e71bfb219596b3d5bc786720 Mon Sep 17 00:00:00 2001
From: Grzegorz Sluja <grzegorzx.sluja@intel.com>
Date: Thu, 13 Jul 2017 11:17:58 +0200
Subject: [PATCH 1407/1958] mmc: block: Prevent new req entering queue after
 its cleanup

The commit 304419d8a7e9 ("mmc: core: Allocate per-request data using the
block layer core"), refactored the mechanism of queue handling, but also
made mmc_init_request() to be called after mmc_cleanup_queue(). This
triggers a null pointer dereference:

[  683.123791] BUG: unable to handle kernel NULL pointer dereference at (null)
[  683.123801] IP: mmc_init_request+0x2c/0xf0 [mmc_block]
...
[  683.123905] Call Trace:
[  683.123913]  alloc_request_size+0x4f/0x70
[  683.123919]  mempool_alloc+0x5f/0x150
[  683.123925]  ? __enqueue_entity+0x6c/0x70
[  683.123928]  get_request+0x3ad/0x720
[  683.123933]  ? prepare_to_wait_event+0x110/0x110
[  683.123937]  blk_queue_bio+0xc1/0x3a0
[  683.123940]  generic_make_request+0xf8/0x2a0
[  683.123942]  submit_bio+0x75/0x150
[  683.123947]  submit_bio_wait+0x51/0x70
[  683.123951]  blkdev_issue_flush+0x5c/0x90
[  683.123956]  ext4_sync_fs+0x171/0x1b0
[  683.123961]  sync_filesystem+0x73/0x90
[  683.123965]  fsync_bdev+0x24/0x50
[  683.123971]  invalidate_partition+0x24/0x50
[  683.123973]  del_gendisk+0xb2/0x2a0
[  683.123977]  mmc_blk_remove_req.part.38+0x71/0xa0 [mmc_block]
[  683.123980]  mmc_blk_remove+0xba/0x190 [mmc_block]
[  683.123990]  mmc_bus_remove+0x1a/0x20 [mmc_core]
[  683.123995]  device_release_driver_internal+0x141/0x200
[  683.123999]  device_release_driver+0x12/0x20
[  683.124001]  bus_remove_device+0xfd/0x170
[  683.124004]  device_del+0x1e8/0x330
[  683.124012]  mmc_remove_card+0x60/0xc0 [mmc_core]
[  683.124019]  mmc_remove+0x19/0x30 [mmc_core]
[  683.124025]  mmc_stop_host+0xfb/0x1a0 [mmc_core]
[  683.124032]  mmc_remove_host+0x1a/0x40 [mmc_core]
[  683.124037]  sdhci_remove_host+0x2e/0x1c0 [mmc_sdhci]
[  683.124042]  sdhci_pci_remove_slot+0x3f/0x80 [sdhci_pci]
[  683.124045]  sdhci_pci_remove+0x39/0x70 [sdhci_pci]
[  683.124049]  pci_device_remove+0x39/0xc0
[  683.124052]  device_release_driver_internal+0x141/0x200
[  683.124056]  driver_detach+0x3f/0x80
[  683.124059]  bus_remove_driver+0x55/0xd0
[  683.124062]  driver_unregister+0x2c/0x50
[  683.124065]  pci_unregister_driver+0x29/0x90
[  683.124069]  sdhci_driver_exit+0x10/0x4f3 [sdhci_pci]
[  683.124073]  SyS_delete_module+0x171/0x250
[  683.124078]  entry_SYSCALL_64_fastpath+0x1e/0xa9

Fix this by setting the queue DYING flag before cleanup the queue, as it
prevents new reqs from entering the queue.

Signed-off-by: Grzegorz Sluja <grzegorzx.sluja@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Fixes: 304419d8a7e9 ("mmc: core: Allocate per-request data using the...")
[Ulf: Updated the changelog]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index e0363223996e6..8ac59dc80f23e 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2170,6 +2170,7 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
 		 * from being accepted.
 		 */
 		card = md->queue.card;
+		blk_set_queue_dying(md->queue.queue);
 		mmc_cleanup_queue(&md->queue);
 		if (md->disk->flags & GENHD_FL_UP) {
 			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
-- 
GitLab


From 69f0d429c413fe96db2c187475cebcc6e3a8c7f5 Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@linaro.org>
Date: Thu, 13 Jul 2017 14:18:24 +0800
Subject: [PATCH 1408/1958] locking/rtmutex: Remove unnecessary priority
 adjustment

We don't need to adjust priority before adding a new pi_waiter, the
priority only needs to be updated after pi_waiter change or task
priority change.

Steven Rostedt pointed out:

  "Interesting, I did some git mining and this was added with the original
   entry of the rtmutex.c (23f78d4a03c5). Looking at even that version, I
   don't see the purpose of adjusting the task prio here. It is done
   before anything changes in the task."

Signed-off-by: Alex Shi <alex.shi@linaro.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1499926704-28841-1-git-send-email-alex.shi@linaro.org
[ Enhance the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rtmutex.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 78069895032a9..649dc9d3951a5 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -963,7 +963,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 		return -EDEADLK;
 
 	raw_spin_lock(&task->pi_lock);
-	rt_mutex_adjust_prio(task);
 	waiter->task = task;
 	waiter->lock = lock;
 	waiter->prio = task->prio;
-- 
GitLab


From 9c284c41c0886f09e75c323a16278b6d353b0b4a Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Wed, 12 Jul 2017 08:40:01 -0700
Subject: [PATCH 1409/1958] mmc: tmio-mmc: fix bad pointer math

The existing code gives an incorrect pointer value.
The buffer pointer 'buf' was of type unsigned short *, and 'count' was a
number in bytes. A cast of buf should have been used.

However, instead of casting, just change the code to use u32 pointers.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 8185e51f358a: ("mmc: tmio-mmc: add support for 32bit data port")
Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc_core.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 82b80d42f7ae9..88a94355ac901 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -409,30 +409,29 @@ static void tmio_mmc_transfer_data(struct tmio_mmc_host *host,
 	 * Transfer the data
 	 */
 	if (host->pdata->flags & TMIO_MMC_32BIT_DATA_PORT) {
-		u8 data[4] = { };
+		u32 data = 0;
+		u32 *buf32 = (u32 *)buf;
 
 		if (is_read)
-			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT, (u32 *)buf,
+			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT, buf32,
 					   count >> 2);
 		else
-			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT, (u32 *)buf,
+			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT, buf32,
 					    count >> 2);
 
 		/* if count was multiple of 4 */
 		if (!(count & 0x3))
 			return;
 
-		buf8 = (u8 *)(buf + (count >> 2));
+		buf32 += count >> 2;
 		count %= 4;
 
 		if (is_read) {
-			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT,
-					   (u32 *)data, 1);
-			memcpy(buf8, data, count);
+			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT, &data, 1);
+			memcpy(buf32, &data, count);
 		} else {
-			memcpy(data, buf8, count);
-			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT,
-					    (u32 *)data, 1);
+			memcpy(&data, buf32, count);
+			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT, &data, 1);
 		}
 
 		return;
-- 
GitLab


From 4d9bcaddacf23861c5ee088b0c03e7034c3d59d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?=
 <ernesto.mnd.fernandez@gmail.com>
Date: Fri, 23 Jun 2017 21:37:21 -0300
Subject: [PATCH 1410/1958] ext2: Fix memory leak when truncate races
 ext2_get_blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Buffer heads referencing indirect blocks may not be released if the file
is truncated at the right time. This happens because ext2_get_branch()
returns NULL when it finds the whole chain of indirect blocks already
set, and when truncate alters the chain this value of NULL is
treated as the address of the last head to be released. Handle this in the
same way as it's done after the got_it label.

Signed-off-by: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 2dcbd56988843..30163d007b2f2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -659,6 +659,7 @@ static int ext2_get_blocks(struct inode *inode,
 				 */
 				err = -EAGAIN;
 				count = 0;
+				partial = chain + depth - 1;
 				break;
 			}
 			blk = le32_to_cpu(*(chain[depth-1].p + count));
-- 
GitLab


From 768516894fb2a0addee6b802af0f8789c0b4c322 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 13 Jul 2017 19:20:44 +0800
Subject: [PATCH 1411/1958] nbd: kill unused ret in recv_work

No need to return value in queue work, kill ret variable.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index dea7d85134ee6..87a0a29f6e7e3 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -626,7 +626,6 @@ static void recv_work(struct work_struct *work)
 	struct nbd_device *nbd = args->nbd;
 	struct nbd_config *config = nbd->config;
 	struct nbd_cmd *cmd;
-	int ret = 0;
 
 	while (1) {
 		cmd = nbd_read_stat(nbd, args->index);
@@ -636,7 +635,6 @@ static void recv_work(struct work_struct *work)
 			mutex_lock(&nsock->tx_lock);
 			nbd_mark_nsock_dead(nbd, nsock, 1);
 			mutex_unlock(&nsock->tx_lock);
-			ret = PTR_ERR(cmd);
 			break;
 		}
 
-- 
GitLab


From a826faf108e2d855929342268e68c43ba667379a Mon Sep 17 00:00:00 2001
From: Ladi Prosek <lprosek@redhat.com>
Date: Mon, 26 Jun 2017 09:56:43 +0200
Subject: [PATCH 1412/1958] KVM: x86: make backwards_tsc_observed a per-VM
 variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The backwards_tsc_observed global introduced in commit 16a9602 is never
reset to false. If a VM happens to be running while the host is suspended
(a common source of the TSC jumping backwards), master clock will never
be enabled again for any VM. In contrast, if no VM is running while the
host is suspended, master clock is unaffected. This is inconsistent and
unnecessarily strict. Let's track the backwards_tsc_observed variable
separately and let each VM start with a clean slate.

Real world impact: My Windows VMs get slower after my laptop undergoes a
suspend/resume cycle. The only way to get the perf back is unloading and
reloading the kvm module.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/x86.c              | 6 ++----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1588e9e3dc01f..ef37d0dc61bd8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -803,6 +803,7 @@ struct kvm_arch {
 	int audit_point;
 	#endif
 
+	bool backwards_tsc_observed;
 	bool boot_vcpu_runs_old_kvmclock;
 	u32 bsp_vcpu_id;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ca128a9c9cc4e..08aa5e442aa79 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -134,8 +134,6 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
 
-static bool __read_mostly backwards_tsc_observed = false;
-
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -1719,7 +1717,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 					&ka->master_cycle_now);
 
 	ka->use_master_clock = host_tsc_clocksource && vcpus_matched
-				&& !backwards_tsc_observed
+				&& !ka->backwards_tsc_observed
 				&& !ka->boot_vcpu_runs_old_kvmclock;
 
 	if (ka->use_master_clock)
@@ -7835,8 +7833,8 @@ int kvm_arch_hardware_enable(void)
 	 */
 	if (backwards_tsc) {
 		u64 delta_cyc = max_tsc - local_tsc;
-		backwards_tsc_observed = true;
 		list_for_each_entry(kvm, &vm_list, vm_list) {
+			kvm->arch.backwards_tsc_observed = true;
 			kvm_for_each_vcpu(i, vcpu, kvm) {
 				vcpu->arch.tsc_offset_adjustment += delta_cyc;
 				vcpu->arch.last_host_tsc = local_tsc;
-- 
GitLab


From efc479e6900c22bad9a2b649d13405ed9cde2d53 Mon Sep 17 00:00:00 2001
From: Roman Kagan <rkagan@virtuozzo.com>
Date: Thu, 22 Jun 2017 16:51:01 +0300
Subject: [PATCH 1413/1958] kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a flaw in the Hyper-V SynIC implementation in KVM: when message
page or event flags page is enabled by setting the corresponding msr,
KVM zeroes it out.  This is problematic because on migration the
corresponding MSRs are loaded on the destination, so the content of
those pages is lost.

This went unnoticed so far because the only user of those pages was
in-KVM hyperv synic timers, which could continue working despite that
zeroing.

Newer QEMU uses those pages for Hyper-V VMBus implementation, and
zeroing them breaks the migration.

Besides, in newer QEMU the content of those pages is fully managed by
QEMU, so zeroing them is undesirable even when writing the MSRs from the
guest side.

To support this new scheme, introduce a new capability,
KVM_CAP_HYPERV_SYNIC2, which, when enabled, makes sure that the synic
pages aren't zeroed out in KVM.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  9 +++++++++
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/hyperv.c             | 13 +++++++++----
 arch/x86/kvm/hyperv.h             |  2 +-
 arch/x86/kvm/x86.c                |  7 ++++++-
 include/uapi/linux/kvm.h          |  1 +
 6 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3a9831b72945a..78ac577c93781 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4329,3 +4329,12 @@ Querying this capability returns a bitmap indicating the possible
 virtual SMT modes that can be set using KVM_CAP_PPC_SMT.  If bit N
 (counting from the right) is set, then a virtual SMT mode of 2^N is
 available.
+
+8.11 KVM_CAP_HYPERV_SYNIC2
+
+Architectures: x86
+
+This capability enables a newer version of Hyper-V Synthetic interrupt
+controller (SynIC).  The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
+doesn't clear SynIC message and event flags pages when they are enabled by
+writing to the respective MSRs.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ef37d0dc61bd8..9d8de5dd7546d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -462,6 +462,7 @@ struct kvm_vcpu_hv_synic {
 	DECLARE_BITMAP(auto_eoi_bitmap, 256);
 	DECLARE_BITMAP(vec_bitmap, 256);
 	bool active;
+	bool dont_zero_synic_pages;
 };
 
 /* Hyper-V per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ebae57ac59024..a8084406707e3 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -221,7 +221,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 		synic->version = data;
 		break;
 	case HV_X64_MSR_SIEFP:
-		if (data & HV_SYNIC_SIEFP_ENABLE)
+		if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
+		    !synic->dont_zero_synic_pages)
 			if (kvm_clear_guest(vcpu->kvm,
 					    data & PAGE_MASK, PAGE_SIZE)) {
 				ret = 1;
@@ -232,7 +233,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 			synic_exit(synic, msr);
 		break;
 	case HV_X64_MSR_SIMP:
-		if (data & HV_SYNIC_SIMP_ENABLE)
+		if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
+		    !synic->dont_zero_synic_pages)
 			if (kvm_clear_guest(vcpu->kvm,
 					    data & PAGE_MASK, PAGE_SIZE)) {
 				ret = 1;
@@ -687,14 +689,17 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 		stimer_init(&hv_vcpu->stimer[i], i);
 }
 
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
 {
+	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+
 	/*
 	 * Hyper-V SynIC auto EOI SINT's are
 	 * not compatible with APICV, so deactivate APICV
 	 */
 	kvm_vcpu_deactivate_apicv(vcpu);
-	vcpu_to_synic(vcpu)->active = true;
+	synic->active = true;
+	synic->dont_zero_synic_pages = dont_zero_synic_pages;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index cd1119538add9..12f65fe1011d5 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -56,7 +56,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
 void kvm_hv_irq_routing_update(struct kvm *kvm);
 int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
 void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
-int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
 
 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 08aa5e442aa79..4f41c5222ecd4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2659,6 +2659,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_VAPIC:
 	case KVM_CAP_HYPERV_SPIN:
 	case KVM_CAP_HYPERV_SYNIC:
+	case KVM_CAP_HYPERV_SYNIC2:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3382,10 +3383,14 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	switch (cap->cap) {
+	case KVM_CAP_HYPERV_SYNIC2:
+		if (cap->args[0])
+			return -EINVAL;
 	case KVM_CAP_HYPERV_SYNIC:
 		if (!irqchip_in_kernel(vcpu->kvm))
 			return -EINVAL;
-		return kvm_hv_activate_synic(vcpu);
+		return kvm_hv_activate_synic(vcpu, cap->cap ==
+					     KVM_CAP_HYPERV_SYNIC2);
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ebd604c222d89..38b2cfbc81120 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -927,6 +927,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_CMMA_MIGRATION 145
 #define KVM_CAP_PPC_FWNMI 146
 #define KVM_CAP_PPC_SMT_POSSIBLE 147
+#define KVM_CAP_HYPERV_SYNIC2 148
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
GitLab


From e4a6a3424b75f23f6bb1cc479974fc305a4b9f78 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 13 Jul 2017 14:27:58 +0800
Subject: [PATCH 1414/1958] bpf: fix return in bpf_skb_adjust_net

The bpf_skb_adjust_net() ignores the return value of bpf_skb_net_shrink/grow,
and always return 0, fix it by return 'ret'.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index c7f737058d896..f44fc22fd45ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2248,7 +2248,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
 		       bpf_skb_net_grow(skb, len_diff_abs);
 
 	bpf_compute_data_end(skb);
-	return 0;
+	return ret;
 }
 
 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
-- 
GitLab


From 0db01097cabd97897d123b4c5d805d1a7b061d82 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Thu, 13 Jul 2017 10:57:40 +0200
Subject: [PATCH 1415/1958] xgene: Don't fail probe, if there is no clk
 resource for SGMII interfaces

This change fixes following problem

[    1.827940] xgene-enet: probe of 1f210030.ethernet failed with error -2

which leads to a missing ethernet interface (reproducable at least on
Gigabyte MP30-AR0 and APM Mustang systems).

The check for a valid clk resource fails, because DT doesn't provide a
clock for sgenet1. But the driver doesn't use this clk, if the ethernet
port is connected via SGMII. Therefore this patch avoids probing for clk
on SGMII interfaces.

Fixes: 9aea7779b764 ("drivers: net: xgene: Fix crash on DT systems")
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/apm/xgene/xgene_enet_main.c  | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index d3906f6b01bd1..86058a9f3417b 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -1785,16 +1785,18 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 
 	xgene_enet_gpiod_get(pdata);
 
-	pdata->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(pdata->clk)) {
-		/* Abort if the clock is defined but couldn't be retrived.
-		 * Always abort if the clock is missing on DT system as
-		 * the driver can't cope with this case.
-		 */
-		if (PTR_ERR(pdata->clk) != -ENOENT || dev->of_node)
-			return PTR_ERR(pdata->clk);
-		/* Firmware may have set up the clock already. */
-		dev_info(dev, "clocks have been setup already\n");
+	if (pdata->phy_mode != PHY_INTERFACE_MODE_SGMII) {
+		pdata->clk = devm_clk_get(&pdev->dev, NULL);
+		if (IS_ERR(pdata->clk)) {
+			/* Abort if the clock is defined but couldn't be
+			 * retrived. Always abort if the clock is missing on
+			 * DT system as the driver can't cope with this case.
+			 */
+			if (PTR_ERR(pdata->clk) != -ENOENT || dev->of_node)
+				return PTR_ERR(pdata->clk);
+			/* Firmware may have set up the clock already. */
+			dev_info(dev, "clocks have been setup already\n");
+		}
 	}
 
 	if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII)
-- 
GitLab


From 22c608919bd4f0fae8e70e1284c83e0f1d507f34 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 11 Jul 2017 11:52:23 +0100
Subject: [PATCH 1416/1958] net: broadcom: bnx2x: make a couple of const arrays
 static

Don't populate various tables on the stack but make them static const.
Makes the object code smaller by nearly 200 bytes:

Before:
   text	   data	    bss	    dec	    hex	filename
 113468	  11200	      0	 124668	  1e6fc	bnx2x_ethtool.o

After:
   text	   data	    bss	    dec	    hex	filename
 113129	  11344	      0	 124473	  1e639	bnx2x_ethtool.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 43423744fdfa8..21bc4bed6b260 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -2886,7 +2886,7 @@ static int bnx2x_test_nvram_tbl(struct bnx2x *bp,
 
 static int bnx2x_test_nvram(struct bnx2x *bp)
 {
-	const struct crc_pair nvram_tbl[] = {
+	static const struct crc_pair nvram_tbl[] = {
 		{     0,  0x14 }, /* bootstrap */
 		{  0x14,  0xec }, /* dir */
 		{ 0x100, 0x350 }, /* manuf_info */
@@ -2895,7 +2895,7 @@ static int bnx2x_test_nvram(struct bnx2x *bp)
 		{ 0x708,  0x70 }, /* manuf_key_info */
 		{     0,     0 }
 	};
-	const struct crc_pair nvram_tbl2[] = {
+	static const struct crc_pair nvram_tbl2[] = {
 		{ 0x7e8, 0x350 }, /* manuf_info2 */
 		{ 0xb38,  0xf0 }, /* feature_info */
 		{     0,     0 }
-- 
GitLab


From f56ff774871d95f60c3f858419cf7424e6c361cf Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 11 Jul 2017 12:18:48 +0100
Subject: [PATCH 1417/1958] net: stmmac: make const array route_possibilities
 static

Don't populate array route_possibilities on the stack but make it
static const.  Makes the object code a little smaller by 85 bytes:

Before:
   text	   data	    bss	    dec	    hex	filename
   9901	   2448	      0	  12349	   303d	dwmac4_core.o

After:
   text	   data	    bss	    dec	    hex	filename
   9760	   2504	      0	  12264	   2fe8	dwmac4_core.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index f233bf8b4ebb8..c4407e8e39a35 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -117,7 +117,7 @@ static void dwmac4_tx_queue_routing(struct mac_device_info *hw,
 	void __iomem *ioaddr = hw->pcsr;
 	u32 value;
 
-	const struct stmmac_rx_routing route_possibilities[] = {
+	static const struct stmmac_rx_routing route_possibilities[] = {
 		{ GMAC_RXQCTRL_AVCPQ_MASK, GMAC_RXQCTRL_AVCPQ_SHIFT },
 		{ GMAC_RXQCTRL_PTPQ_MASK, GMAC_RXQCTRL_PTPQ_SHIFT },
 		{ GMAC_RXQCTRL_DCBCPQ_MASK, GMAC_RXQCTRL_DCBCPQ_SHIFT },
-- 
GitLab


From bf98bd0be1ba509c5e6d77524ffac192f1edb2dd Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 11 Jul 2017 12:47:33 +0100
Subject: [PATCH 1418/1958] rt2x00: make const array glrt_table static

Don't populate array glrt_table on the stack but make it static.
Makes the object code a smaller by over 670 bytes:

Before:
   text	   data	    bss	    dec	    hex	filename
 131772	   4733	      0	 136505	  21539	rt2800lib.o

After:
   text	   data	    bss	    dec	    hex	filename
 131043	   4789	      0	 135832	  21298	rt2800lib.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
index 6e2e760d98b1b..0b75def39c6c4 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
@@ -5704,7 +5704,7 @@ static void rt2800_init_freq_calibration(struct rt2x00_dev *rt2x00dev)
 
 static void rt2800_init_bbp_5592_glrt(struct rt2x00_dev *rt2x00dev)
 {
-	const u8 glrt_table[] = {
+	static const u8 glrt_table[] = {
 		0xE0, 0x1F, 0X38, 0x32, 0x08, 0x28, 0x19, 0x0A, 0xFF, 0x00, /* 128 ~ 137 */
 		0x16, 0x10, 0x10, 0x0B, 0x36, 0x2C, 0x26, 0x24, 0x42, 0x36, /* 138 ~ 147 */
 		0x30, 0x2D, 0x4C, 0x46, 0x3D, 0x40, 0x3E, 0x42, 0x3D, 0x40, /* 148 ~ 157 */
-- 
GitLab


From 5e349fc03cdd44c670aaf3affaa6f0426c5e553f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 13 Jul 2017 12:22:24 +0100
Subject: [PATCH 1419/1958] dccp: make const array error_code static

Don't populate array error_code on the stack but make it static. Makes
the object code smaller by almost 250 bytes:

Before:
   text	   data	    bss	    dec	    hex	filename
  10366	    983	      0	  11349	   2c55	net/dccp/input.o

After:
   text	   data	    bss	    dec	    hex	filename
  10161	   1039	      0	  11200	   2bc0	net/dccp/input.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 4a05d78768502..fa6be9750bb46 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -126,7 +126,7 @@ static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
 
 static u16 dccp_reset_code_convert(const u8 code)
 {
-	const u16 error_code[] = {
+	static const u16 error_code[] = {
 	[DCCP_RESET_CODE_CLOSED]	     = 0,	/* normal termination */
 	[DCCP_RESET_CODE_UNSPECIFIED]	     = 0,	/* nothing known */
 	[DCCP_RESET_CODE_ABORTED]	     = ECONNRESET,
-- 
GitLab


From 803d5b6ebfb06a0d2ee3699fea4f1c7593958566 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Thu, 13 Jul 2017 18:45:07 +0530
Subject: [PATCH 1420/1958] cxgb4: add new T5 pci device id's

Add 0x50a3 and 0x50a4 T5 device id's

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index 99987d8e437e2..aa28299aef5f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -174,6 +174,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x50a0), /* Custom T540-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x50a1), /* Custom T540-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x50a2), /* Custom T540-KR4 */
+	CH_PCI_ID_TABLE_FENTRY(0x50a3), /* Custom T580-KR4 */
+	CH_PCI_ID_TABLE_FENTRY(0x50a4), /* Custom 2x T540-CR */
 
 	/* T6 adapters:
 	 */
-- 
GitLab


From 2683701201ddb9a2a4d0fda5d950ab50ca8e77e4 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Thu, 13 Jul 2017 16:46:43 +0200
Subject: [PATCH 1421/1958] netlink: correctly document nla_put_u64_64bit()

Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 01709172b3d38..ef8e6c3a80a63 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -98,8 +98,8 @@
  *   nla_put_u8(skb, type, value)	add u8 attribute to skb
  *   nla_put_u16(skb, type, value)	add u16 attribute to skb
  *   nla_put_u32(skb, type, value)	add u32 attribute to skb
- *   nla_put_u64_64bits(skb, type,
- *			value, padattr)	add u64 attribute to skb
+ *   nla_put_u64_64bit(skb, type,
+ *                     value, padattr)	add u64 attribute to skb
  *   nla_put_s8(skb, type, value)	add s8 attribute to skb
  *   nla_put_s16(skb, type, value)	add s16 attribute to skb
  *   nla_put_s32(skb, type, value)	add s32 attribute to skb
-- 
GitLab


From fdb254db21bb4aed44a0bc7fe993e58d3848c926 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 11 Jul 2017 16:22:59 +0100
Subject: [PATCH 1422/1958] isofs: Fix isofs_show_options()

The isofs patch needs a small fix to handle a signed/unsigned comparison that
the compiler didn't flag - thanks to Dan for catching it.

It should be noted, however, the session number handing appears to be incorrect
between where it is parsed and where it is used.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/isofs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index f80ee600d1bcd..8cf898a59730d 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -492,7 +492,7 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
 
 	if (sbi->s_check)		seq_printf(m, ",check=%c", sbi->s_check);
 	if (sbi->s_mapping)		seq_printf(m, ",map=%c", sbi->s_mapping);
-	if (sbi->s_session != -1)	seq_printf(m, ",session=%u", sbi->s_session);
+	if (sbi->s_session != 255)	seq_printf(m, ",session=%u", sbi->s_session - 1);
 	if (sbi->s_sbsector != -1)	seq_printf(m, ",sbsector=%u", sbi->s_sbsector);
 
 	if (root->d_sb->s_blocksize != 1024)
-- 
GitLab


From 7d84120b5ba61912a5333f5fe2c4e8f35ef9514f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Sun, 25 Jun 2017 13:11:54 +0200
Subject: [PATCH 1423/1958] Documentation: ABI: mtd: describe "offset" more
 precisely
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So far Linux supported only two levels of MTD devices so we didn't need
a very precise description for this sysfs file. With commit
97519dc52b44a ("mtd: partitions: add support for subpartitions") there
is support for a tree structure so we should have more precise
description. Using "parent" and "flash device" makes it more accurate.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 Documentation/ABI/testing/sysfs-class-mtd | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd
index 3b5c3bca9186d..f34e592301d1d 100644
--- a/Documentation/ABI/testing/sysfs-class-mtd
+++ b/Documentation/ABI/testing/sysfs-class-mtd
@@ -229,6 +229,6 @@ KernelVersion:	4.1
 Contact:	linux-mtd@lists.infradead.org
 Description:
 		For a partition, the offset of that partition from the start
-		of the master device in bytes. This attribute is absent on
-		main devices, so it can be used to distinguish between
-		partitions and devices that aren't partitions.
+		of the parent (another partition or a flash device) in bytes.
+		This attribute is absent on flash devices, so it can be used
+		to distinguish them from partitions.
-- 
GitLab


From b20dae70bfa5014fdf201d0a8589271b888e4927 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 13 Jul 2017 18:51:15 +0100
Subject: [PATCH 1424/1958] svcrdma: fix an incorrect check on -E2BIG and
 -EINVAL

The current check will always be true and will always jump to
err1, this looks dubious to me. I believe && should be used
instead of ||.

Detected by CoverityScan, CID#1450120 ("Logically Dead Code")

Fixes: 107c1d0a991a ("svcrdma: Avoid Send Queue overflow")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 19fd01e4b6900..7c3a211e0e9a9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -684,7 +684,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	return 0;
 
  err2:
-	if (ret != -E2BIG || ret != -EINVAL)
+	if (ret != -E2BIG && ret != -EINVAL)
 		goto err1;
 
 	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
-- 
GitLab


From 266530b34cb679188d5b2bd6e580b8105f4cb593 Mon Sep 17 00:00:00 2001
From: Jeffy Chen <jeffy.chen@rock-chips.com>
Date: Wed, 12 Jul 2017 14:18:32 +0800
Subject: [PATCH 1425/1958] drm: Add missing field copy in compat_drm_version

DRM_IOCTL_VERSION is supposed to update the name_len/date_len/desc_len
fields to user.

Fixes: 012c6741c6aa ("switch compat_drm_version() to drm_ioctl_kernel()")
Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_ioc32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 0b2d8c4a2fa52..d1f2028520288 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -112,6 +112,9 @@ static int compat_drm_version(struct file *file, unsigned int cmd,
 	v32.version_major = v.version_major;
 	v32.version_minor = v.version_minor;
 	v32.version_patchlevel = v.version_patchlevel;
+	v32.name_len = v.name_len;
+	v32.date_len = v.date_len;
+	v32.desc_len = v.desc_len;
 	if (copy_to_user((void __user *)arg, &v32, sizeof(v32)))
 		return -EFAULT;
 	return 0;
-- 
GitLab


From 6592e58c6b68e61f003a01ba29a3716e7e2e9484 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 12 Jul 2017 23:36:02 +0100
Subject: [PATCH 1426/1958] Btrfs: fix write corruption due to bio cloning on
 raid5/6

The recent changes to make bio cloning faster (added in the 4.13 merge
window) by using the bio_clone_fast() API introduced a regression on
raid5/6 modes, because cloned bios have an invalid bi_vcnt field
(therefore it can not be used) and the raid5/6 code uses the
bio_for_each_segment_all() API to iterate the segments of a bio, and this
API uses a bio's bi_vcnt field.

The issue is very simple to trigger by doing for example a direct IO write
against a raid5 or raid6 filesystem and then attempting to read what we
wrote before:

  $ mkfs.btrfs -m raid5 -d raid5 -f /dev/sdc /dev/sdd /dev/sde /dev/sdf
  $ mount /dev/sdc /mnt
  $ xfs_io -f -d -c "pwrite -S 0xab 0 1M" /mnt/foobar
  $ od -t x1 /mnt/foobar
  od: /mnt/foobar: read error: Input/output error

For that example, the following is also reported in dmesg/syslog:

  [18274.985557] btrfs_print_data_csum_error: 18 callbacks suppressed
  [18274.995277] BTRFS warning (device sdf): csum failed root 5 ino 257 off 0 csum 0x98f94189 expected csum 0x94374193 mirror 1
  [18274.997205] BTRFS warning (device sdf): csum failed root 5 ino 257 off 4096 csum 0x98f94189 expected csum 0x94374193 mirror 1
  [18275.025221] BTRFS warning (device sdf): csum failed root 5 ino 257 off 8192 csum 0x98f94189 expected csum 0x94374193 mirror 1
  [18275.047422] BTRFS warning (device sdf): csum failed root 5 ino 257 off 12288 csum 0x98f94189 expected csum 0x94374193 mirror 1
  [18275.054818] BTRFS warning (device sdf): csum failed root 5 ino 257 off 4096 csum 0x98f94189 expected csum 0x94374193 mirror 1
  [18275.054834] BTRFS warning (device sdf): csum failed root 5 ino 257 off 8192 csum 0x98f94189 expected csum 0x94374193 mirror 1
  [18275.054943] BTRFS warning (device sdf): csum failed root 5 ino 257 off 8192 csum 0x98f94189 expected csum 0x94374193 mirror 2
  [18275.055207] BTRFS warning (device sdf): csum failed root 5 ino 257 off 8192 csum 0x98f94189 expected csum 0x94374193 mirror 3
  [18275.055571] BTRFS warning (device sdf): csum failed root 5 ino 257 off 0 csum 0x98f94189 expected csum 0x94374193 mirror 1
  [18275.062171] BTRFS warning (device sdf): csum failed root 5 ino 257 off 12288 csum 0x98f94189 expected csum 0x94374193 mirror 1

A scrub will also fail correcting bad copies, mentioning the following in
dmesg/syslog:

  [18276.128696] scrub_handle_errored_block: 498 callbacks suppressed
  [18276.129617] BTRFS warning (device sdf): checksum error at logical 2186346496 on dev /dev/sde, sector 2116608, root 5, inode 257, offset 65536, length 4096, links $
  [18276.149235] btrfs_dev_stat_print_on_error: 498 callbacks suppressed
  [18276.157897] BTRFS error (device sdf): bdev /dev/sde errs: wr 0, rd 0, flush 0, corrupt 1, gen 0
  [18276.206059] BTRFS warning (device sdf): checksum error at logical 2186477568 on dev /dev/sdd, sector 2116736, root 5, inode 257, offset 196608, length 4096, links$
  [18276.206059] BTRFS error (device sdf): bdev /dev/sdd errs: wr 0, rd 0, flush 0, corrupt 1, gen 0
  [18276.306552] BTRFS warning (device sdf): checksum error at logical 2186543104 on dev /dev/sdd, sector 2116864, root 5, inode 257, offset 262144, length 4096, links$
  [18276.319152] BTRFS error (device sdf): bdev /dev/sdd errs: wr 0, rd 0, flush 0, corrupt 2, gen 0
  [18276.394316] BTRFS warning (device sdf): checksum error at logical 2186739712 on dev /dev/sdf, sector 2116992, root 5, inode 257, offset 458752, length 4096, links$
  [18276.396348] BTRFS error (device sdf): bdev /dev/sdf errs: wr 0, rd 0, flush 0, corrupt 1, gen 0
  [18276.434127] BTRFS warning (device sdf): checksum error at logical 2186870784 on dev /dev/sde, sector 2117120, root 5, inode 257, offset 589824, length 4096, links$
  [18276.434127] BTRFS error (device sdf): bdev /dev/sde errs: wr 0, rd 0, flush 0, corrupt 2, gen 0
  [18276.500504] BTRFS error (device sdf): unable to fixup (regular) error at logical 2186477568 on dev /dev/sdd
  [18276.538400] BTRFS warning (device sdf): checksum error at logical 2186481664 on dev /dev/sdd, sector 2116744, root 5, inode 257, offset 200704, length 4096, links$
  [18276.540452] BTRFS error (device sdf): bdev /dev/sdd errs: wr 0, rd 0, flush 0, corrupt 3, gen 0
  [18276.542012] BTRFS error (device sdf): unable to fixup (regular) error at logical 2186481664 on dev /dev/sdd
  [18276.585030] BTRFS error (device sdf): unable to fixup (regular) error at logical 2186346496 on dev /dev/sde
  [18276.598306] BTRFS warning (device sdf): checksum error at logical 2186412032 on dev /dev/sde, sector 2116736, root 5, inode 257, offset 131072, length 4096, links$
  [18276.598310] BTRFS error (device sdf): bdev /dev/sde errs: wr 0, rd 0, flush 0, corrupt 3, gen 0
  [18276.598582] BTRFS error (device sdf): unable to fixup (regular) error at logical 2186350592 on dev /dev/sde
  [18276.603455] BTRFS error (device sdf): bdev /dev/sde errs: wr 0, rd 0, flush 0, corrupt 4, gen 0
  [18276.638362] BTRFS warning (device sdf): checksum error at logical 2186354688 on dev /dev/sde, sector 2116624, root 5, inode 257, offset 73728, length 4096, links $
  [18276.640445] BTRFS error (device sdf): bdev /dev/sde errs: wr 0, rd 0, flush 0, corrupt 5, gen 0
  [18276.645942] BTRFS error (device sdf): unable to fixup (regular) error at logical 2186354688 on dev /dev/sde
  [18276.657204] BTRFS error (device sdf): unable to fixup (regular) error at logical 2186412032 on dev /dev/sde
  [18276.660563] BTRFS warning (device sdf): checksum error at logical 2186416128 on dev /dev/sde, sector 2116744, root 5, inode 257, offset 135168, length 4096, links$
  [18276.664609] BTRFS error (device sdf): bdev /dev/sde errs: wr 0, rd 0, flush 0, corrupt 6, gen 0
  [18276.664609] BTRFS error (device sdf): unable to fixup (regular) error at logical 2186358784 on dev /dev/sde

So fix this by using the bio_for_each_segment() API and setting before
the bio's bi_iter field to the value of the corresponding btrfs bio
container's saved iterator if we are processing a cloned bio in the
raid5/6 code (the same code processes both cloned and non-cloned bios).

This incorrect iteration of cloned bios was also causing some occasional
BUG_ONs when running fstest btrfs/064, which have a trace like the
following:

  [ 6674.416156] ------------[ cut here ]------------
  [ 6674.416157] kernel BUG at fs/btrfs/raid56.c:1897!
  [ 6674.416159] invalid opcode: 0000 [#1] PREEMPT SMP
  [ 6674.416160] Modules linked in: dm_flakey dm_mod dax ppdev tpm_tis parport_pc tpm_tis_core evdev tpm psmouse sg i2c_piix4 pcspkr parport i2c_core serio_raw button s
  [ 6674.416184] CPU: 3 PID: 19236 Comm: kworker/u32:10 Not tainted 4.12.0-rc6-btrfs-next-44+ #1
  [ 6674.416185] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
  [ 6674.416210] Workqueue: btrfs-endio btrfs_endio_helper [btrfs]
  [ 6674.416211] task: ffff880147f6c740 task.stack: ffffc90001fb8000
  [ 6674.416229] RIP: 0010:__raid_recover_end_io+0x1ac/0x370 [btrfs]
  [ 6674.416230] RSP: 0018:ffffc90001fbbb90 EFLAGS: 00010217
  [ 6674.416231] RAX: ffff8801ff4b4f00 RBX: 0000000000000002 RCX: 0000000000000001
  [ 6674.416232] RDX: ffff880099b045d8 RSI: ffffffff81a5f6e0 RDI: 0000000000000004
  [ 6674.416232] RBP: ffffc90001fbbbc8 R08: 0000000000000001 R09: 0000000000000001
  [ 6674.416233] R10: ffffc90001fbbac8 R11: 0000000000001000 R12: 0000000000000002
  [ 6674.416234] R13: ffff880099b045c0 R14: 0000000000000004 R15: ffff88012bff2000
  [ 6674.416235] FS:  0000000000000000(0000) GS:ffff88023f2c0000(0000) knlGS:0000000000000000
  [ 6674.416235] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [ 6674.416236] CR2: 00007f28cf282000 CR3: 00000001000c6000 CR4: 00000000000006e0
  [ 6674.416239] Call Trace:
  [ 6674.416259]  __raid56_parity_recover+0xfc/0x16e [btrfs]
  [ 6674.416276]  raid56_parity_recover+0x157/0x16b [btrfs]
  [ 6674.416293]  btrfs_map_bio+0xe0/0x259 [btrfs]
  [ 6674.416310]  btrfs_submit_bio_hook+0xbf/0x147 [btrfs]
  [ 6674.416327]  end_bio_extent_readpage+0x27b/0x4a0 [btrfs]
  [ 6674.416331]  bio_endio+0x17d/0x1b3
  [ 6674.416346]  end_workqueue_fn+0x3c/0x3f [btrfs]
  [ 6674.416362]  btrfs_scrubparity_helper+0x1aa/0x3b8 [btrfs]
  [ 6674.416379]  btrfs_endio_helper+0xe/0x10 [btrfs]
  [ 6674.416381]  process_one_work+0x276/0x4b6
  [ 6674.416384]  worker_thread+0x1ac/0x266
  [ 6674.416386]  ? rescuer_thread+0x278/0x278
  [ 6674.416387]  kthread+0x106/0x10e
  [ 6674.416389]  ? __list_del_entry+0x22/0x22
  [ 6674.416391]  ret_from_fork+0x27/0x40
  [ 6674.416395] Code: 44 89 e2 be 00 10 00 00 ff 15 b0 ab ef ff eb 72 4d 89 e8 89 d9 44 89 e2 be 00 10 00 00 ff 15 a3 ab ef ff eb 5d 41 83 fc ff 74 02 <0f> 0b 49 63 97
  [ 6674.416432] RIP: __raid_recover_end_io+0x1ac/0x370 [btrfs] RSP: ffffc90001fbbb90
  [ 6674.416434] ---[ end trace 74d56ebe7489dd6a ]---

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/raid56.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index b9abb0b01021c..b89d070036979 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1136,20 +1136,27 @@ static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
 static void index_rbio_pages(struct btrfs_raid_bio *rbio)
 {
 	struct bio *bio;
-	struct bio_vec *bvec;
 	u64 start;
 	unsigned long stripe_offset;
 	unsigned long page_index;
-	int i;
 
 	spin_lock_irq(&rbio->bio_list_lock);
 	bio_list_for_each(bio, &rbio->bio_list) {
+		struct bio_vec bvec;
+		struct bvec_iter iter;
+		int i = 0;
+
 		start = (u64)bio->bi_iter.bi_sector << 9;
 		stripe_offset = start - rbio->bbio->raid_map[0];
 		page_index = stripe_offset >> PAGE_SHIFT;
 
-		bio_for_each_segment_all(bvec, bio, i)
-			rbio->bio_pages[page_index + i] = bvec->bv_page;
+		if (bio_flagged(bio, BIO_CLONED))
+			bio->bi_iter = btrfs_io_bio(bio)->iter;
+
+		bio_for_each_segment(bvec, bio, iter) {
+			rbio->bio_pages[page_index + i] = bvec.bv_page;
+			i++;
+		}
 	}
 	spin_unlock_irq(&rbio->bio_list_lock);
 }
@@ -1423,11 +1430,14 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
  */
 static void set_bio_pages_uptodate(struct bio *bio)
 {
-	struct bio_vec *bvec;
-	int i;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+
+	if (bio_flagged(bio, BIO_CLONED))
+		bio->bi_iter = btrfs_io_bio(bio)->iter;
 
-	bio_for_each_segment_all(bvec, bio, i)
-		SetPageUptodate(bvec->bv_page);
+	bio_for_each_segment(bvec, bio, iter)
+		SetPageUptodate(bvec.bv_page);
 }
 
 /*
-- 
GitLab


From 0aebdc52ca824c38837a652548028e45da72628f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 09:31:19 +0200
Subject: [PATCH 1427/1958] sunrpc: properly type argument to kxdreproc_t

Pass struct rpc_request as the first argument instead of an untyped blob,
and mark the data object as const.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 include/linux/sunrpc/xdr.h | 5 ++++-
 net/sunrpc/clnt.c          | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 054c8cde18f33..290f189de2009 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -17,6 +17,8 @@
 #include <asm/unaligned.h>
 #include <linux/scatterlist.h>
 
+struct rpc_rqst;
+
 /*
  * Buffer adjustment
  */
@@ -222,7 +224,8 @@ struct xdr_stream {
 /*
  * These are the xdr_stream style generic XDR encode and decode functions.
  */
-typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj);
 typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b5cb921775a0b..9fee20dc0c809 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2476,7 +2476,8 @@ rpc_verify_header(struct rpc_task *task)
 	goto out_garbage;
 }
 
-static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj)
 {
 }
 
-- 
GitLab


From 8be9d07f0ca68a188ab448e5fad8ea5d7420f535 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 09:59:45 +0200
Subject: [PATCH 1428/1958] sunrpc: fix encoder callback prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/rpcb_clnt.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 5b30603596d0c..d0269a39afdfa 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -843,8 +843,9 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
  */
 
 static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct rpcbind_args *rpcb)
+			     const void *data)
 {
+	const struct rpcbind_args *rpcb = data;
 	__be32 *p;
 
 	dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
@@ -917,8 +918,9 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
 }
 
 static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct rpcbind_args *rpcb)
+			     const void *data)
 {
+	const struct rpcbind_args *rpcb = data;
 	__be32 *p;
 
 	dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
@@ -992,7 +994,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_encode	= rpcb_enc_mapping,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1002,7 +1004,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_encode	= rpcb_enc_mapping,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1012,7 +1014,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_encode	= rpcb_enc_mapping,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
@@ -1025,7 +1027,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1035,7 +1037,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1045,7 +1047,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
@@ -1058,7 +1060,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1068,7 +1070,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -1078,7 +1080,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
 		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
-- 
GitLab


From d16073389bb48a2caf7c62cd7089a2009c8717b8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 09:34:04 +0200
Subject: [PATCH 1429/1958] lockd: fix encoder callback prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clnt4xdr.c | 22 +++++++++++++++-------
 fs/lockd/clntxdr.c  | 22 +++++++++++++++-------
 fs/lockd/mon.c      |  8 ++++----
 3 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index d3e40db289302..3cbad662120a5 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -381,8 +381,9 @@ static void encode_nlm4_lock(struct xdr_stream *xdr,
  */
 static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -402,8 +403,9 @@ static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -424,8 +426,9 @@ static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -442,8 +445,9 @@ static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nlm_args *args)
+				    const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -458,8 +462,10 @@ static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_res(struct rpc_rqst *req,
 			     struct xdr_stream *xdr,
-			     const struct nlm_res *result)
+			     const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm4_stat(xdr, result->status);
 }
@@ -479,8 +485,10 @@ static void nlm4_xdr_enc_res(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_res *result)
+				 const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm4_stat(xdr, result->status);
 	if (result->status == nlm_lck_denied)
@@ -566,7 +574,7 @@ static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 #define PROC(proc, argtype, restype)					\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nlm4_xdr_enc_##argtype,		\
+	.p_encode    = nlm4_xdr_enc_##argtype,				\
 	.p_decode    = (kxdrdproc_t)nlm4_xdr_dec_##restype,		\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 3e9f7874b9755..825c0fde8c80b 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -374,8 +374,9 @@ static void encode_nlm_lock(struct xdr_stream *xdr,
  */
 static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -395,8 +396,9 @@ static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -417,8 +419,9 @@ static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -435,8 +438,9 @@ static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nlm_args *args)
+				   const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -451,8 +455,10 @@ static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_res(struct rpc_rqst *req,
 			    struct xdr_stream *xdr,
-			    const struct nlm_res *result)
+			    const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm_stat(xdr, result->status);
 }
@@ -479,8 +485,10 @@ static void encode_nlm_testrply(struct xdr_stream *xdr,
 
 static void nlm_xdr_enc_testres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				const struct nlm_res *result)
+				const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm_stat(xdr, result->status);
 	encode_nlm_testrply(xdr, result);
@@ -564,7 +572,7 @@ static int nlm_xdr_dec_res(struct rpc_rqst *req,
 #define PROC(proc, argtype, restype)	\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nlm_xdr_enc_##argtype,		\
+	.p_encode    = nlm_xdr_enc_##argtype,		\
 	.p_decode    = (kxdrdproc_t)nlm_xdr_dec_##restype,		\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 19166d4a8d313..8043fd4b8a5c2 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -476,14 +476,14 @@ static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
 }
 
 static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    const struct nsm_args *argp)
+			    const void *argp)
 {
 	encode_mon_id(xdr, argp);
 	encode_priv(xdr, argp);
 }
 
 static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      const struct nsm_args *argp)
+			      const void *argp)
 {
 	encode_mon_id(xdr, argp);
 }
@@ -532,7 +532,7 @@ static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
-		.p_encode	= (kxdreproc_t)nsm_xdr_enc_mon,
+		.p_encode	= nsm_xdr_enc_mon,
 		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
@@ -541,7 +541,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
-		.p_encode	= (kxdreproc_t)nsm_xdr_enc_unmon,
+		.p_encode	= nsm_xdr_enc_unmon,
 		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
-- 
GitLab


From fcc85819ee4826f52b0d6c959c759329e6d87bfc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 10:01:49 +0200
Subject: [PATCH 1430/1958] nfs: fix encoder callback prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/mount_clnt.c |  10 +--
 fs/nfs/nfs2xdr.c    |  49 +++++++++---
 fs/nfs/nfs3xdr.c    |  83 +++++++++++++------
 fs/nfs/nfs42xdr.c   |  34 ++++----
 fs/nfs/nfs4xdr.c    | 188 ++++++++++++++++++++++++++++----------------
 5 files changed, 242 insertions(+), 122 deletions(-)

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 09b190015df41..f435d640d5527 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -304,7 +304,7 @@ static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
 }
 
 static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const char *dirpath)
+				const void *dirpath)
 {
 	encode_mntdirpath(xdr, dirpath);
 }
@@ -467,7 +467,7 @@ static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
@@ -476,7 +476,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 	[MOUNTPROC_UMNT] = {
 		.p_proc		= MOUNTPROC_UMNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC_UMNT,
 		.p_name		= "UMOUNT",
@@ -486,7 +486,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
@@ -495,7 +495,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 	[MOUNTPROC3_UMNT] = {
 		.p_proc		= MOUNTPROC3_UMNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC3_UMNT,
 		.p_name		= "UMOUNT",
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index b4e03ed8599de..8ecd58597228d 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -568,8 +568,10 @@ static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
 
 static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nfs_fh *fh)
+				 const void *data)
 {
+	const struct nfs_fh *fh = data;
+
 	encode_fhandle(xdr, fh);
 }
 
@@ -583,23 +585,29 @@ static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_sattrargs *args)
+				   const void *data)
 {
+	const struct nfs_sattrargs *args = data;
+
 	encode_fhandle(xdr, args->fh);
 	encode_sattr(xdr, args->sattr);
 }
 
 static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_diropargs *args)
+				   const void *data)
 {
+	const struct nfs_diropargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name, args->len);
 }
 
 static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs_readlinkargs *args)
+				      const void *data)
 {
+	const struct nfs_readlinkargs *args = data;
+
 	encode_fhandle(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS_readlinkres_sz);
@@ -632,8 +640,10 @@ static void encode_readargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_pgio_args *args)
+				  const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_readargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS_readres_sz);
@@ -672,8 +682,10 @@ static void encode_writeargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_pgio_args *args)
+				   const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_writeargs(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
 }
@@ -688,16 +700,20 @@ static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_createargs *args)
+				    const void *data)
 {
+	const struct nfs_createargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name, args->len);
 	encode_sattr(xdr, args->sattr);
 }
 
 static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_removeargs *args)
+				    const void *data)
 {
+	const struct nfs_removeargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
 }
 
@@ -711,8 +727,9 @@ static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_renameargs *args)
+				    const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
 
@@ -730,8 +747,10 @@ static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_linkargs *args)
+				  const void *data)
 {
+	const struct nfs_linkargs *args = data;
+
 	encode_fhandle(xdr, args->fromfh);
 	encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
 }
@@ -747,8 +766,10 @@ static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_symlinkargs *args)
+				     const void *data)
 {
+	const struct nfs_symlinkargs *args = data;
+
 	encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
 	encode_path(xdr, args->pages, args->pathlen);
 	encode_sattr(xdr, args->sattr);
@@ -777,8 +798,10 @@ static void encode_readdirargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_readdirargs *args)
+				     const void *data)
 {
+	const struct nfs_readdirargs *args = data;
+
 	encode_readdirargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 					args->count, NFS_readdirres_sz);
@@ -1118,7 +1141,7 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 #define PROC(proc, argtype, restype, timer)				\
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
-	.p_encode   =  (kxdreproc_t)nfs2_xdr_enc_##argtype,		\
+	.p_encode   =  nfs2_xdr_enc_##argtype,				\
 	.p_decode   =  (kxdrdproc_t)nfs2_xdr_dec_##restype,		\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 267126d32ec0f..773150678633f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -846,8 +846,10 @@ static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
  */
 static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs_fh *fh)
+				      const void *data)
 {
+	const struct nfs_fh *fh = data;
+
 	encode_nfs_fh3(xdr, fh);
 }
 
@@ -884,8 +886,9 @@ static void encode_sattrguard3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_sattrargs *args)
+				      const void *data)
 {
+	const struct nfs3_sattrargs *args = data;
 	encode_nfs_fh3(xdr, args->fh);
 	encode_sattr3(xdr, args->sattr);
 	encode_sattrguard3(xdr, args);
@@ -900,8 +903,10 @@ static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_diropargs *args)
+				     const void *data)
 {
+	const struct nfs3_diropargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 }
 
@@ -922,8 +927,10 @@ static void encode_access3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_accessargs *args)
+				     const void *data)
 {
+	const struct nfs3_accessargs *args = data;
+
 	encode_access3args(xdr, args);
 }
 
@@ -936,8 +943,10 @@ static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
 				       struct xdr_stream *xdr,
-				       const struct nfs3_readlinkargs *args)
+				       const void *data)
 {
+	const struct nfs3_readlinkargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS3_readlinkres_sz);
@@ -966,8 +975,10 @@ static void encode_read3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_pgio_args *args)
+				   const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS3_readres_sz);
@@ -1008,8 +1019,10 @@ static void encode_write3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_pgio_args *args)
+				    const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_write3args(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
 }
@@ -1055,8 +1068,10 @@ static void encode_createhow3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_createargs *args)
+				     const void *data)
 {
+	const struct nfs3_createargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_createhow3(xdr, args);
 }
@@ -1071,8 +1086,10 @@ static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs3_mkdirargs *args)
+				    const void *data)
 {
+	const struct nfs3_mkdirargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_sattr3(xdr, args->sattr);
 }
@@ -1091,16 +1108,20 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_symlinkdata3(struct xdr_stream *xdr,
-				const struct nfs3_symlinkargs *args)
+				const void *data)
 {
+	const struct nfs3_symlinkargs *args = data;
+
 	encode_sattr3(xdr, args->sattr);
 	encode_nfspath3(xdr, args->pages, args->pathlen);
 }
 
 static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_symlinkargs *args)
+				      const void *data)
 {
+	const struct nfs3_symlinkargs *args = data;
+
 	encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
 	encode_symlinkdata3(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
@@ -1160,8 +1181,10 @@ static void encode_mknoddata3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs3_mknodargs *args)
+				    const void *data)
 {
+	const struct nfs3_mknodargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_mknoddata3(xdr, args);
 }
@@ -1175,8 +1198,10 @@ static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_removeargs *args)
+				     const void *data)
 {
+	const struct nfs_removeargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
 }
 
@@ -1190,8 +1215,9 @@ static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_renameargs *args)
+				     const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
 
@@ -1209,8 +1235,10 @@ static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs3_linkargs *args)
+				   const void *data)
 {
+	const struct nfs3_linkargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fromfh);
 	encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
 }
@@ -1240,8 +1268,10 @@ static void encode_readdir3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_readdirargs *args)
+				      const void *data)
 {
+	const struct nfs3_readdirargs *args = data;
+
 	encode_readdir3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
@@ -1280,8 +1310,10 @@ static void encode_readdirplus3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
 					  struct xdr_stream *xdr,
-					  const struct nfs3_readdirargs *args)
+					  const void *data)
 {
+	const struct nfs3_readdirargs *args = data;
+
 	encode_readdirplus3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
@@ -1310,8 +1342,10 @@ static void encode_commit3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_commitargs *args)
+				     const void *data)
 {
+	const struct nfs_commitargs *args = data;
+
 	encode_commit3args(xdr, args);
 }
 
@@ -1319,8 +1353,10 @@ static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
 
 static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_getaclargs *args)
+				     const void *data)
 {
+	const struct nfs3_getaclargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fh);
 	encode_uint32(xdr, args->mask);
 	if (args->mask & (NFS_ACL | NFS_DFACL))
@@ -1331,8 +1367,9 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 
 static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_setaclargs *args)
+				     const void *data)
 {
+	const struct nfs3_setaclargs *args = data;
 	unsigned int base;
 	int error;
 
@@ -2495,7 +2532,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 #define PROC(proc, argtype, restype, timer)				\
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args,	\
+	.p_encode    = nfs3_xdr_enc_##argtype##3args,			\
 	.p_decode    = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res,	\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
@@ -2538,7 +2575,7 @@ const struct rpc_version nfs_version3 = {
 static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
-		.p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
+		.p_encode = nfs3_xdr_enc_getacl3args,
 		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
@@ -2547,7 +2584,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
-		.p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
+		.p_encode = nfs3_xdr_enc_setacl3args,
 		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 528362f69cc1b..0a1bd60a1f8e8 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -112,7 +112,7 @@
 					 decode_getattr_maxsz)
 
 static void encode_fallocate(struct xdr_stream *xdr,
-			     struct nfs42_falloc_args *args)
+			     const struct nfs42_falloc_args *args)
 {
 	encode_nfs4_stateid(xdr, &args->falloc_stateid);
 	encode_uint64(xdr, args->falloc_offset);
@@ -120,7 +120,7 @@ static void encode_fallocate(struct xdr_stream *xdr,
 }
 
 static void encode_allocate(struct xdr_stream *xdr,
-			    struct nfs42_falloc_args *args,
+			    const struct nfs42_falloc_args *args,
 			    struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr);
@@ -128,7 +128,7 @@ static void encode_allocate(struct xdr_stream *xdr,
 }
 
 static void encode_copy(struct xdr_stream *xdr,
-			struct nfs42_copy_args *args,
+			const struct nfs42_copy_args *args,
 			struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_COPY, decode_copy_maxsz, hdr);
@@ -145,7 +145,7 @@ static void encode_copy(struct xdr_stream *xdr,
 }
 
 static void encode_deallocate(struct xdr_stream *xdr,
-			      struct nfs42_falloc_args *args,
+			      const struct nfs42_falloc_args *args,
 			      struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr);
@@ -153,7 +153,7 @@ static void encode_deallocate(struct xdr_stream *xdr,
 }
 
 static void encode_seek(struct xdr_stream *xdr,
-			struct nfs42_seek_args *args,
+			const struct nfs42_seek_args *args,
 			struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_SEEK, decode_seek_maxsz, hdr);
@@ -163,7 +163,7 @@ static void encode_seek(struct xdr_stream *xdr,
 }
 
 static void encode_layoutstats(struct xdr_stream *xdr,
-			       struct nfs42_layoutstat_args *args,
+			       const struct nfs42_layoutstat_args *args,
 			       struct nfs42_layoutstat_devinfo *devinfo,
 			       struct compound_hdr *hdr)
 {
@@ -191,7 +191,7 @@ static void encode_layoutstats(struct xdr_stream *xdr,
 }
 
 static void encode_clone(struct xdr_stream *xdr,
-			 struct nfs42_clone_args *args,
+			 const struct nfs42_clone_args *args,
 			 struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -210,8 +210,9 @@ static void encode_clone(struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  struct nfs42_falloc_args *args)
+				  const void *data)
 {
+	const struct nfs42_falloc_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -225,7 +226,7 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 }
 
 static void encode_copy_commit(struct xdr_stream *xdr,
-			  struct nfs42_copy_args *args,
+			  const struct nfs42_copy_args *args,
 			  struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -241,8 +242,9 @@ static void encode_copy_commit(struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 			      struct xdr_stream *xdr,
-			      struct nfs42_copy_args *args)
+			      const void *data)
 {
+	const struct nfs42_copy_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -262,8 +264,9 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs42_falloc_args *args)
+				    const void *data)
 {
+	const struct nfs42_falloc_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -281,8 +284,9 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
 			      struct xdr_stream *xdr,
-			      struct nfs42_seek_args *args)
+			      const void *data)
 {
+	const struct nfs42_seek_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -299,8 +303,9 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs42_layoutstat_args *args)
+				     const void *data)
 {
+	const struct nfs42_layoutstat_args *args = data;
 	int i;
 
 	struct compound_hdr hdr = {
@@ -321,8 +326,9 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
 			       struct xdr_stream *xdr,
-			       struct nfs42_clone_args *args)
+			       const void *data)
 {
+	const struct nfs42_clone_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3aebfdc82b303..c5036ef770f9a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1651,7 +1651,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 }
 
 static void
-encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
+encode_setacl(struct xdr_stream *xdr, const struct nfs_setaclargs *arg,
+		struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -1735,7 +1736,7 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
 #if defined(CONFIG_NFS_V4_1)
 /* NFSv4.1 operations */
 static void encode_bind_conn_to_session(struct xdr_stream *xdr,
-				   struct nfs41_bind_conn_to_session_args *args,
+				   const struct nfs41_bind_conn_to_session_args *args,
 				   struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1748,7 +1749,7 @@ static void encode_bind_conn_to_session(struct xdr_stream *xdr,
 	*p = (args->use_conn_in_rdma_mode) ? cpu_to_be32(1) : cpu_to_be32(0);
 }
 
-static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
+static void encode_op_map(struct xdr_stream *xdr, const struct nfs4_op_map *op_map)
 {
 	unsigned int i;
 	encode_uint32(xdr, NFS4_OP_MAP_NUM_WORDS);
@@ -1757,7 +1758,7 @@ static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
 }
 
 static void encode_exchange_id(struct xdr_stream *xdr,
-			       struct nfs41_exchange_id_args *args,
+			       const struct nfs41_exchange_id_args *args,
 			       struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1809,7 +1810,7 @@ static void encode_exchange_id(struct xdr_stream *xdr,
 }
 
 static void encode_create_session(struct xdr_stream *xdr,
-				  struct nfs41_create_session_args *args,
+				  const struct nfs41_create_session_args *args,
 				  struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1862,7 +1863,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 }
 
 static void encode_destroy_session(struct xdr_stream *xdr,
-				   struct nfs4_session *session,
+				   const struct nfs4_session *session,
 				   struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr);
@@ -1878,7 +1879,7 @@ static void encode_destroy_clientid(struct xdr_stream *xdr,
 }
 
 static void encode_reclaim_complete(struct xdr_stream *xdr,
-				    struct nfs41_reclaim_complete_args *args,
+				    const struct nfs41_reclaim_complete_args *args,
 				    struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr);
@@ -1974,7 +1975,7 @@ encode_layoutget(struct xdr_stream *xdr,
 static int
 encode_layoutcommit(struct xdr_stream *xdr,
 		    struct inode *inode,
-		    struct nfs4_layoutcommit_args *args,
+		    const struct nfs4_layoutcommit_args *args,
 		    struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -2044,7 +2045,7 @@ encode_secinfo_no_name(struct xdr_stream *xdr,
 }
 
 static void encode_test_stateid(struct xdr_stream *xdr,
-				struct nfs41_test_stateid_args *args,
+				const struct nfs41_test_stateid_args *args,
 				struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
@@ -2053,7 +2054,7 @@ static void encode_test_stateid(struct xdr_stream *xdr,
 }
 
 static void encode_free_stateid(struct xdr_stream *xdr,
-				struct nfs41_free_stateid_args *args,
+				const struct nfs41_free_stateid_args *args,
 				struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
@@ -2086,8 +2087,9 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
  * Encode an ACCESS request
  */
 static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_accessargs *args)
+				const void *data)
 {
+	const struct nfs4_accessargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2104,8 +2106,9 @@ static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode LOOKUP request
  */
 static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_lookup_arg *args)
+				const void *data)
 {
+	const struct nfs4_lookup_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2124,8 +2127,9 @@ static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs4_lookup_root_arg *args)
+				     const void *data)
 {
+	const struct nfs4_lookup_root_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2142,8 +2146,9 @@ static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
  * Encode REMOVE request
  */
 static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs_removeargs *args)
+				const void *data)
 {
+	const struct nfs_removeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2159,8 +2164,9 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode RENAME request
  */
 static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs_renameargs *args)
+				const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2178,8 +2184,9 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode LINK request
  */
 static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct nfs4_link_arg *args)
+			      const void *data)
 {
+	const struct nfs4_link_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2199,8 +2206,9 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode CREATE request
  */
 static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_create_arg *args)
+				const void *data)
 {
+	const struct nfs4_create_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2218,8 +2226,10 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode SYMLINK request
  */
 static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_create_arg *args)
+				 const void *data)
 {
+	const struct nfs4_create_arg *args = data;
+
 	nfs4_xdr_enc_create(req, xdr, args);
 }
 
@@ -2227,8 +2237,9 @@ static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode GETATTR request
  */
 static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_getattr_arg *args)
+				 const void *data)
 {
+	const struct nfs4_getattr_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2244,8 +2255,9 @@ static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a CLOSE request
  */
 static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_closeargs *args)
+			       const void *data)
 {
+	const struct nfs_closeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2265,8 +2277,9 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode an OPEN request
  */
 static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_openargs *args)
+			      const void *data)
 {
+	const struct nfs_openargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2287,8 +2300,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs_open_confirmargs *args)
+				      const void *data)
 {
+	const struct nfs_open_confirmargs *args = data;
 	struct compound_hdr hdr = {
 		.nops   = 0,
 	};
@@ -2304,8 +2318,9 @@ static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_openargs *args)
+				     const void *data)
 {
+	const struct nfs_openargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2325,8 +2340,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs_closeargs *args)
+					const void *data)
 {
+	const struct nfs_closeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2344,8 +2360,9 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
  * Encode a LOCK request
  */
 static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_lock_args *args)
+			      const void *data)
 {
+	const struct nfs_lock_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2361,8 +2378,9 @@ static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a LOCKT request
  */
 static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_lockt_args *args)
+			       const void *data)
 {
+	const struct nfs_lockt_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2378,8 +2396,9 @@ static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a LOCKU request
  */
 static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_locku_args *args)
+			       const void *data)
 {
+	const struct nfs_locku_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2393,8 +2412,9 @@ static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
 					   struct xdr_stream *xdr,
-					struct nfs_release_lockowner_args *args)
+					   const void *data)
 {
+	const struct nfs_release_lockowner_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = 0,
 	};
@@ -2408,8 +2428,9 @@ static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
  * Encode a READLINK request
  */
 static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  const struct nfs4_readlink *args)
+				  const void *data)
 {
+	const struct nfs4_readlink *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2428,8 +2449,9 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a READDIR request
  */
 static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_readdir_arg *args)
+				 const void *data)
 {
+	const struct nfs4_readdir_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2451,8 +2473,9 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a READ request
  */
 static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_pgio_args *args)
+			      const void *data)
 {
+	const struct nfs_pgio_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2472,8 +2495,9 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode an SETATTR request
  */
 static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_setattrargs *args)
+				 const void *data)
 {
+	const struct nfs_setattrargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2490,8 +2514,9 @@ static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a GETACL request
  */
 static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_getaclargs *args)
+				const void *data)
 {
+	const struct nfs_getaclargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2513,8 +2538,9 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a WRITE request
  */
 static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_pgio_args *args)
+			       const void *data)
 {
+	const struct nfs_pgio_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2533,8 +2559,9 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
  *  a COMMIT request
  */
 static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_commitargs *args)
+				const void *data)
 {
+	const struct nfs_commitargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2550,8 +2577,9 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
  * FSINFO request
  */
 static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs4_fsinfo_arg *args)
+				const void *data)
 {
+	const struct nfs4_fsinfo_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2567,8 +2595,9 @@ static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
  * a PATHCONF request
  */
 static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  const struct nfs4_pathconf_arg *args)
+				  const void *data)
 {
+	const struct nfs4_pathconf_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2585,8 +2614,9 @@ static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
  * a STATFS request
  */
 static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_statfs_arg *args)
+				const void *data)
 {
+	const struct nfs4_statfs_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2604,8 +2634,9 @@ static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_server_caps_arg *args)
+				     const void *data)
 {
+	const struct nfs4_server_caps_arg *args = data;
 	const u32 *bitmask = args->bitmask;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2622,8 +2653,10 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
  * a RENEW request
  */
 static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_client *clp)
+			       const void *data)
+
 {
+	const struct nfs_client *clp = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2638,8 +2671,9 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_setclientid *sc)
+				     const void *data)
 {
+	const struct nfs4_setclientid *sc = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2654,8 +2688,9 @@ static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
 					     struct xdr_stream *xdr,
-					     struct nfs4_setclientid_res *arg)
+					     const void *data)
 {
+	const struct nfs4_setclientid_res *arg = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2670,8 +2705,9 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs4_delegreturnargs *args)
+				     const void *data)
 {
+	const struct nfs4_delegreturnargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2692,8 +2728,9 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_fs_locations_arg *args)
+				      const void *data)
 {
+	const struct nfs4_fs_locations_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2715,8 +2752,8 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 	}
 
 	/* Set up reply kvec to capture returned fs_locations array. */
-	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
-			0, PAGE_SIZE);
+	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
+			 (struct page **)&args->page, 0, PAGE_SIZE);
 	encode_nops(&hdr);
 }
 
@@ -2725,8 +2762,9 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nfs4_secinfo_arg *args)
+				const void *data)
 {
+	const struct nfs4_secinfo_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2743,8 +2781,9 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_fsid_present_arg *args)
+				      const void *data)
 {
+	const struct nfs4_fsid_present_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2764,8 +2803,9 @@ static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nfs41_bind_conn_to_session_args *args)
+				const void *data)
 {
+	const struct nfs41_bind_conn_to_session_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2780,8 +2820,9 @@ static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs41_exchange_id_args *args)
+				     const void *data)
 {
+	const struct nfs41_exchange_id_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2796,8 +2837,9 @@ static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs41_create_session_args *args)
+					const void *data)
 {
+	const struct nfs41_create_session_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2812,8 +2854,9 @@ static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
 					 struct xdr_stream *xdr,
-					 struct nfs4_session *session)
+					 const void *data)
 {
+	const struct nfs4_session *session = data;
 	struct compound_hdr hdr = {
 		.minorversion = session->clp->cl_mvops->minor_version,
 	};
@@ -2828,8 +2871,9 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
 					 struct xdr_stream *xdr,
-					 struct nfs_client *clp)
+					 const void *data)
 {
+	const struct nfs_client *clp = data;
 	struct compound_hdr hdr = {
 		.minorversion = clp->cl_mvops->minor_version,
 	};
@@ -2843,8 +2887,9 @@ static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
  * a SEQUENCE request
  */
 static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs4_sequence_args *args)
+				  const void *data)
 {
+	const struct nfs4_sequence_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(args),
 	};
@@ -2859,8 +2904,9 @@ static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs4_get_lease_time_args *args)
+					const void *data)
 {
+	const struct nfs4_get_lease_time_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
 	};
@@ -2878,8 +2924,9 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
 					  struct xdr_stream *xdr,
-				struct nfs41_reclaim_complete_args *args)
+					  const void *data)
 {
+	const struct nfs41_reclaim_complete_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args)
 	};
@@ -2895,8 +2942,9 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
 				       struct xdr_stream *xdr,
-				       struct nfs4_getdeviceinfo_args *args)
+				       const void *data)
 {
+	const struct nfs4_getdeviceinfo_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2919,8 +2967,9 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs4_layoutget_args *args)
+				   const void *data)
 {
+	const struct nfs4_layoutget_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2941,8 +2990,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_layoutcommit_args *args)
+				      const void *priv)
 {
+	const struct nfs4_layoutcommit_args *args = priv;
 	struct nfs4_layoutcommit_data *data =
 		container_of(args, struct nfs4_layoutcommit_data, args);
 	struct compound_hdr hdr = {
@@ -2962,8 +3012,9 @@ static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_layoutreturn_args *args)
+				      const void *data)
 {
+	const struct nfs4_layoutreturn_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2978,10 +3029,11 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
 /*
  * Encode SECINFO_NO_NAME request
  */
-static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
+static void nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs41_secinfo_no_name_args *args)
+					const void *data)
 {
+	const struct nfs41_secinfo_no_name_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2991,7 +3043,6 @@ static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
 	encode_putrootfh(xdr, &hdr);
 	encode_secinfo_no_name(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
@@ -2999,8 +3050,9 @@ static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs41_test_stateid_args *args)
+				      const void *data)
 {
+	const struct nfs41_test_stateid_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -3016,8 +3068,9 @@ static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs41_free_stateid_args *args)
+				     const void *data)
 {
+	const struct nfs41_free_stateid_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -6364,8 +6417,9 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Encode an SETACL request
  */
 static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_setaclargs *args)
+				const void *data)
 {
+	const struct nfs_setaclargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -7484,7 +7538,7 @@ nfs4_stat_to_errno(int stat)
 #define PROC(proc, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
-	.p_encode = (kxdreproc_t)nfs4_xdr_##argtype,		\
+	.p_encode = nfs4_xdr_##argtype,				\
 	.p_decode = (kxdrdproc_t)nfs4_xdr_##restype,		\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
-- 
GitLab


From 843efb7d7d98364645a7f23eaa13a0649075815f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 14:47:53 +0200
Subject: [PATCH 1431/1958] nfsd: fix encoder callback prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfsd/nfs4callback.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 0274db6e65d0d..bede418237ee7 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -468,7 +468,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
  * NB: Without this zero space reservation, callbacks over krb5p fail
  */
 static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 void *__unused)
+				 const void *__unused)
 {
 	xdr_reserve_space(xdr, 0);
 }
@@ -477,8 +477,9 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
  * 20.2. Operation 4: CB_RECALL - Recall a Delegation
  */
 static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
-				   const struct nfsd4_callback *cb)
+				   const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfs4_delegation *dp = cb_to_delegation(cb);
 	struct nfs4_cb_compound_hdr hdr = {
 		.ident = cb->cb_clp->cl_cb_ident,
@@ -585,8 +586,9 @@ static void encode_cb_layout4args(struct xdr_stream *xdr,
 
 static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfsd4_callback *cb)
+				   const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfs4_layout_stateid *ls =
 		container_of(cb, struct nfs4_layout_stateid, ls_recall);
 	struct nfs4_cb_compound_hdr hdr = {
@@ -631,8 +633,9 @@ static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so
 
 static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					const struct nfsd4_callback *cb)
+					const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfsd4_blocked_lock *nbl =
 		container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
 	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
@@ -682,7 +685,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 #define PROC(proc, call, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
-	.p_encode  = (kxdreproc_t)nfs4_xdr_enc_##argtype,		\
+	.p_encode  = nfs4_xdr_enc_##argtype,		\
 	.p_decode  = (kxdrdproc_t)nfs4_xdr_dec_##restype,		\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
-- 
GitLab


From df179381223f299b6c16ae134b5853aeee94b09e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 14:54:06 +0200
Subject: [PATCH 1432/1958] sunrpc/auth_gss: nfsd: fix encoder callback
 prototypes

Declare the p_encode callbacks with the proper prototype instead of
casting to kxdreproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  2 +-
 net/sunrpc/auth_gss/gss_rpc_xdr.c    | 11 ++++++-----
 net/sunrpc/auth_gss/gss_rpc_xdr.h    |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index f0c6a8c78a567..45ab924da726c 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -55,7 +55,7 @@ enum {
 #define PROC(proc, name)				\
 [GSSX_##proc] = {					\
 	.p_proc   = GSSX_##proc,			\
-	.p_encode = (kxdreproc_t)gssx_enc_##name,	\
+	.p_encode = gssx_enc_##name,	\
 	.p_decode = (kxdrdproc_t)gssx_dec_##name,	\
 	.p_arglen = GSSX_ARG_##name##_sz,		\
 	.p_replen = GSSX_RES_##name##_sz, 		\
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 25d9a9cf7b66b..5e54f47430921 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -44,7 +44,7 @@ static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
 }
 
 static int gssx_enc_buffer(struct xdr_stream *xdr,
-			   gssx_buffer *buf)
+			   const gssx_buffer *buf)
 {
 	__be32 *p;
 
@@ -56,7 +56,7 @@ static int gssx_enc_buffer(struct xdr_stream *xdr,
 }
 
 static int gssx_enc_in_token(struct xdr_stream *xdr,
-			     struct gssp_in_token *in)
+			     const struct gssp_in_token *in)
 {
 	__be32 *p;
 
@@ -130,7 +130,7 @@ static int gssx_dec_option(struct xdr_stream *xdr,
 }
 
 static int dummy_enc_opt_array(struct xdr_stream *xdr,
-				struct gssx_option_array *oa)
+				const struct gssx_option_array *oa)
 {
 	__be32 *p;
 
@@ -348,7 +348,7 @@ static int gssx_dec_status(struct xdr_stream *xdr,
 }
 
 static int gssx_enc_call_ctx(struct xdr_stream *xdr,
-			     struct gssx_call_ctx *ctx)
+			     const struct gssx_call_ctx *ctx)
 {
 	struct gssx_option opt;
 	__be32 *p;
@@ -733,8 +733,9 @@ static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
 
 void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct gssx_arg_accept_sec_context *arg)
+				 const void *data)
 {
+	const struct gssx_arg_accept_sec_context *arg = data;
 	int err;
 
 	err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 9d88c6239f014..87cd719ca0ad7 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -179,7 +179,7 @@ struct gssx_res_accept_sec_context {
 #define gssx_dec_init_sec_context NULL
 void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct gssx_arg_accept_sec_context *args);
+				 const void *data);
 int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
 				struct gssx_res_accept_sec_context *res);
-- 
GitLab


From 993328e2b31fedc35276a4828039ad7af6d519cb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 14:58:11 +0200
Subject: [PATCH 1433/1958] sunrpc: properly type argument to kxdrdproc_t

Pass struct rpc_request as the first argument instead of an untyped blob.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xdr.h | 3 ++-
 net/sunrpc/clnt.c          | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 290f189de2009..ed0fbf0d8d0f5 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -226,7 +226,8 @@ struct xdr_stream {
  */
 typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		const void *obj);
-typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef int	(*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9fee20dc0c809..964d5c4a1b609 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2481,7 +2481,8 @@ static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 {
 }
 
-static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj)
 {
 	return 0;
 }
-- 
GitLab


From 555966beff70ed612728521f0c12298a5127b7e5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:00:29 +0200
Subject: [PATCH 1434/1958] sunrpc: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 net/sunrpc/rpcb_clnt.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index d0269a39afdfa..f67b9e2897b49 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -861,8 +861,9 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    struct rpcbind_args *rpcb)
+			    void *data)
 {
+	struct rpcbind_args *rpcb = data;
 	unsigned long port;
 	__be32 *p;
 
@@ -883,8 +884,9 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
-			unsigned int *boolp)
+			void *data)
 {
+	unsigned int *boolp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4);
@@ -939,8 +941,9 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    struct rpcbind_args *rpcb)
+			    void *data)
 {
+	struct rpcbind_args *rpcb = data;
 	struct sockaddr_storage address;
 	struct sockaddr *sap = (struct sockaddr *)&address;
 	__be32 *p;
@@ -995,7 +998,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1005,7 +1008,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1015,7 +1018,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
 		.p_encode	= rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getport,
+		.p_decode	= rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
 		.p_statidx	= RPCBPROC_GETPORT,
@@ -1028,7 +1031,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1038,7 +1041,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1048,7 +1051,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
+		.p_decode	= rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
@@ -1061,7 +1064,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1071,7 +1074,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1081,7 +1084,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_encode	= rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
+		.p_decode	= rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
-- 
GitLab


From c56c620b3e48feb5a6444707a05f9baeae1af9ef Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:03:02 +0200
Subject: [PATCH 1435/1958] sunrpc/auth_gss: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +-
 net/sunrpc/auth_gss/gss_rpc_xdr.c    | 3 ++-
 net/sunrpc/auth_gss/gss_rpc_xdr.h    | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 45ab924da726c..a80b8e6074781 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -56,7 +56,7 @@ enum {
 [GSSX_##proc] = {					\
 	.p_proc   = GSSX_##proc,			\
 	.p_encode = gssx_enc_##name,	\
-	.p_decode = (kxdrdproc_t)gssx_dec_##name,	\
+	.p_decode = gssx_dec_##name,	\
 	.p_arglen = GSSX_ARG_##name##_sz,		\
 	.p_replen = GSSX_RES_##name##_sz, 		\
 	.p_statidx = GSSX_##proc,			\
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 5e54f47430921..c4778cae58ef1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -790,8 +790,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 
 int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct gssx_res_accept_sec_context *res)
+				void *data)
 {
+	struct gssx_res_accept_sec_context *res = data;
 	u32 value_follows;
 	int err;
 	struct page *scratch;
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 87cd719ca0ad7..146c310329173 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -182,7 +182,7 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 				 const void *data);
 int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct gssx_res_accept_sec_context *res);
+				void *data);
 #define gssx_enc_release_handle NULL
 #define gssx_dec_release_handle NULL
 #define gssx_enc_get_mic NULL
-- 
GitLab


From 5362a4ec830e0b5f4db59ad05fc32b10268abe91 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:04:45 +0200
Subject: [PATCH 1436/1958] nfsd: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfsd/nfs4callback.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index bede418237ee7..a2bedbd05b2bf 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -513,8 +513,9 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfsd4_callback *cb)
+				  void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -604,8 +605,9 @@ static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
 
 static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfsd4_callback *cb)
+				  void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -662,8 +664,9 @@ static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
 
 static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct nfsd4_callback *cb)
+					void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -686,7 +689,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
 	.p_encode  = nfs4_xdr_enc_##argtype,		\
-	.p_decode  = (kxdrdproc_t)nfs4_xdr_dec_##restype,		\
+	.p_decode  = nfs4_xdr_dec_##restype,				\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
 	.p_statidx = NFSPROC4_CB_##call,				\
-- 
GitLab


From 04000564c1fc72770e4946cc3adb3ea9cd544bb2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:06:20 +0200
Subject: [PATCH 1437/1958] lockd: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clnt4xdr.c |  8 +++++---
 fs/lockd/clntxdr.c  |  8 +++++---
 fs/lockd/mon.c      | 10 ++++++----
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 3cbad662120a5..f0ab7a99dd236 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -533,8 +533,9 @@ static int decode_nlm4_testrply(struct xdr_stream *xdr,
 
 static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nlm_res *result)
+				void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -553,8 +554,9 @@ static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
  */
 static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 			    struct xdr_stream *xdr,
-			    struct nlm_res *result)
+			    void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -575,7 +577,7 @@ static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = nlm4_xdr_enc_##argtype,				\
-	.p_decode    = (kxdrdproc_t)nlm4_xdr_dec_##restype,		\
+	.p_decode    = nlm4_xdr_dec_##restype,				\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 825c0fde8c80b..17e7f08b3a22f 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -531,8 +531,9 @@ static int decode_nlm_testrply(struct xdr_stream *xdr,
 
 static int nlm_xdr_dec_testres(struct rpc_rqst *req,
 			       struct xdr_stream *xdr,
-			       struct nlm_res *result)
+			       void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -551,8 +552,9 @@ static int nlm_xdr_dec_testres(struct rpc_rqst *req,
  */
 static int nlm_xdr_dec_res(struct rpc_rqst *req,
 			   struct xdr_stream *xdr,
-			   struct nlm_res *result)
+			   void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -573,7 +575,7 @@ static int nlm_xdr_dec_res(struct rpc_rqst *req,
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = nlm_xdr_enc_##argtype,		\
-	.p_decode    = (kxdrdproc_t)nlm_xdr_dec_##restype,		\
+	.p_decode    = nlm_xdr_dec_##restype,				\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 8043fd4b8a5c2..80630f0347e18 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -490,8 +490,9 @@ static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nsm_res *resp)
+				void *data)
 {
+	struct nsm_res *resp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4 + 4);
@@ -507,8 +508,9 @@ static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
 
 static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 			    struct xdr_stream *xdr,
-			    struct nsm_res *resp)
+			    void *data)
 {
+	struct nsm_res *resp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4);
@@ -533,7 +535,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_encode	= nsm_xdr_enc_mon,
-		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat_res,
+		.p_decode	= nsm_xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
 		.p_statidx	= NSMPROC_MON,
@@ -542,7 +544,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
 		.p_encode	= nsm_xdr_enc_unmon,
-		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat,
+		.p_decode	= nsm_xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
 		.p_statidx	= NSMPROC_UNMON,
-- 
GitLab


From fc016483eb0b2a19ef1e84ef8f8753c8ec9ac7f8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 15:09:02 +0200
Subject: [PATCH 1438/1958] nfs: fix decoder callback prototypes

Declare the p_decode callbacks with the proper prototype instead of
casting to kxdrdproc_t and losing all type safety.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/mount_clnt.c |  10 ++--
 fs/nfs/nfs2xdr.c    |  15 +++--
 fs/nfs/nfs3xdr.c    |  55 +++++++++++-------
 fs/nfs/nfs42xdr.c   |  18 ++++--
 fs/nfs/nfs4xdr.c    | 137 +++++++++++++++++++++++++++++---------------
 5 files changed, 151 insertions(+), 84 deletions(-)

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index f435d640d5527..806657d650745 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -357,8 +357,9 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
 
 static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct mountres *res)
+				void *data)
 {
+	struct mountres *res = data;
 	int status;
 
 	status = decode_status(xdr, res);
@@ -449,8 +450,9 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 
 static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct mountres *res)
+				 void *data)
 {
+	struct mountres *res = data;
 	int status;
 
 	status = decode_fhs_status(xdr, res);
@@ -468,7 +470,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres,
+		.p_decode	= mnt_xdr_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
 		.p_statidx	= MOUNTPROC_MNT,
@@ -487,7 +489,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres3,
+		.p_decode	= mnt_xdr_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
 		.p_statidx	= MOUNTPROC3_MNT,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 8ecd58597228d..a299648ea3215 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -832,13 +832,13 @@ static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_fattr *result)
+				 void *result)
 {
 	return decode_attrstat(xdr, result, NULL);
 }
 
 static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_diropok *result)
+				 void *result)
 {
 	return decode_diropres(xdr, result);
 }
@@ -883,8 +883,9 @@ static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
  *	};
  */
 static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_pgio_res *result)
+				void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -905,8 +906,10 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_pgio_res *result)
+				 void *data)
 {
+	struct nfs_pgio_res *result = data;
+
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;
 	return decode_attrstat(xdr, result->fattr, &result->op_status);
@@ -1057,7 +1060,7 @@ static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
 }
 
 static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs2_fsstat *result)
+				  void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1142,7 +1145,7 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
 	.p_encode   =  nfs2_xdr_enc_##argtype,				\
-	.p_decode   =  (kxdrdproc_t)nfs2_xdr_dec_##restype,		\
+	.p_decode   =  nfs2_xdr_dec_##restype,				\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
 	.p_timer    =  timer,						\
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 773150678633f..cc272eb8be3e0 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1419,7 +1419,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs_fattr *result)
+				    void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1456,7 +1456,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs_fattr *result)
+				    void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1497,8 +1497,9 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_diropres *result)
+				   void *data)
 {
+	struct nfs3_diropres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1544,8 +1545,9 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_accessres *result)
+				   void *data)
 {
+	struct nfs3_accessres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1585,7 +1587,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_fattr *result)
+				     void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1663,8 +1665,9 @@ static int decode_read3resok(struct xdr_stream *xdr,
 }
 
 static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_pgio_res *result)
+				 void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1736,8 +1739,9 @@ static int decode_write3resok(struct xdr_stream *xdr,
 }
 
 static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs_pgio_res *result)
+				  void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1801,8 +1805,9 @@ static int decode_create3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_diropres *result)
+				   void *data)
 {
+	struct nfs3_diropres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1841,8 +1846,9 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_removeres *result)
+				   void *data)
 {
+	struct nfs_removeres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1882,8 +1888,9 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_renameres *result)
+				   void *data)
 {
+	struct nfs_renameres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1925,8 +1932,9 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
  *	};
  */
 static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs3_linkres *result)
+				 void *data)
 {
+	struct nfs3_linkres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2109,8 +2117,9 @@ static int decode_readdir3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs3_readdirres *result)
+				    void *data)
 {
+	struct nfs3_readdirres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2177,8 +2186,9 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fsstat *result)
+				   void *data)
 {
+	struct nfs_fsstat *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2253,8 +2263,9 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fsinfo *result)
+				   void *data)
 {
+	struct nfs_fsinfo *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2316,8 +2327,9 @@ static int decode_pathconf3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_pathconf *result)
+				     void *data)
 {
+	struct nfs_pathconf *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2357,8 +2369,9 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_commitres *result)
+				   void *data)
 {
+	struct nfs_commitres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2426,7 +2439,7 @@ static inline int decode_getacl3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_getaclres *result)
+				   void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -2445,7 +2458,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
 
 static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fattr *result)
+				   void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -2533,7 +2546,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
 	.p_encode    = nfs3_xdr_enc_##argtype##3args,			\
-	.p_decode    = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res,	\
+	.p_decode    = nfs3_xdr_dec_##restype##3res,			\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
 	.p_timer     = timer,						\
@@ -2576,7 +2589,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
 		.p_encode = nfs3_xdr_enc_getacl3args,
-		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
+		.p_decode = nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
 		.p_timer = 1,
@@ -2585,7 +2598,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
 		.p_encode = nfs3_xdr_enc_setacl3args,
-		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
+		.p_decode = nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
 		.p_timer = 0,
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0a1bd60a1f8e8..5ee1b0f0d9044 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -454,8 +454,9 @@ static int decode_clone(struct xdr_stream *xdr)
  */
 static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs42_falloc_res *res)
+				 void *data)
 {
+	struct nfs42_falloc_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -481,8 +482,9 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
 			     struct xdr_stream *xdr,
-			     struct nfs42_copy_res *res)
+			     void *data)
 {
+	struct nfs42_copy_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -514,8 +516,9 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
 				   struct xdr_stream *xdr,
-				   struct nfs42_falloc_res *res)
+				   void *data)
 {
+	struct nfs42_falloc_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -541,8 +544,9 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
 			     struct xdr_stream *xdr,
-			     struct nfs42_seek_res *res)
+			     void *data)
 {
+	struct nfs42_seek_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -565,8 +569,9 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs42_layoutstat_res *res)
+				    void *data)
 {
+	struct nfs42_layoutstat_res *res = data;
 	struct compound_hdr hdr;
 	int status, i;
 
@@ -595,8 +600,9 @@ static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
 			      struct xdr_stream *xdr,
-			      struct nfs42_clone_res *res)
+			      void *data)
 {
+	struct nfs42_clone_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c5036ef770f9a..797f3ce752861 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6154,8 +6154,9 @@ int decode_layoutreturn(struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs_closeres *res)
+				       void *data)
 {
+	struct nfs_closeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6183,8 +6184,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
  * Decode ACCESS response
  */
 static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_accessres *res)
+			       void *data)
 {
+	struct nfs4_accessres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6209,8 +6211,9 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOOKUP response
  */
 static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_lookup_res *res)
+			       void *data)
 {
+	struct nfs4_lookup_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6239,8 +6242,9 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs4_lookup_res *res)
+				    void *data)
 {
+	struct nfs4_lookup_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6265,8 +6269,9 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
  * Decode REMOVE response
  */
 static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_removeres *res)
+			       void *data)
 {
+	struct nfs_removeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6288,8 +6293,9 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode RENAME response
  */
 static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_renameres *res)
+			       void *data)
 {
+	struct nfs_renameres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6317,8 +6323,9 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LINK response
  */
 static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs4_link_res *res)
+			     void *data)
 {
+	struct nfs4_link_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6356,8 +6363,9 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode CREATE response
  */
 static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_create_res *res)
+			       void *data)
 {
+	struct nfs4_create_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6385,7 +6393,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode SYMLINK response
  */
 static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_create_res *res)
+				void *res)
 {
 	return nfs4_xdr_dec_create(rqstp, xdr, res);
 }
@@ -6394,8 +6402,9 @@ static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode GETATTR response
  */
 static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_getattr_res *res)
+				void *data)
 {
+	struct nfs4_getattr_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6436,8 +6445,9 @@ static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int
 nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-		    struct nfs_setaclres *res)
+		    void *data)
 {
+	struct nfs_setaclres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6460,8 +6470,9 @@ nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int
 nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-		    struct nfs_getaclres *res)
+		    void *data)
 {
+	struct nfs_getaclres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6488,8 +6499,9 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode CLOSE response
  */
 static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_closeres *res)
+			      void *data)
 {
+	struct nfs_closeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6522,8 +6534,9 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode OPEN response
  */
 static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_openres *res)
+			     void *data)
 {
+	struct nfs_openres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6554,8 +6567,9 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs_open_confirmres *res)
+				     void *data)
 {
+	struct nfs_open_confirmres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6575,8 +6589,9 @@ static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs_openres *res)
+				    void *data)
 {
+	struct nfs_openres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6604,8 +6619,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nfs_setattrres *res)
+				void *data)
 {
+	struct nfs_setattrres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6630,8 +6646,9 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
  * Decode LOCK response
  */
 static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_lock_res *res)
+			     void *data)
 {
+	struct nfs_lock_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6653,8 +6670,9 @@ static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOCKT response
  */
 static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_lockt_res *res)
+			      void *data)
 {
+	struct nfs_lockt_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6676,8 +6694,9 @@ static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOCKU response
  */
 static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_locku_res *res)
+			      void *data)
 {
+	struct nfs_locku_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6712,8 +6731,9 @@ static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs4_readlink_res *res)
+				 void *data)
 {
+	struct nfs4_readlink_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6735,8 +6755,9 @@ static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
  * Decode READDIR response
  */
 static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_readdir_res *res)
+				void *data)
 {
+	struct nfs4_readdir_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6758,8 +6779,9 @@ static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode Read response
  */
 static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_pgio_res *res)
+			     void *data)
 {
+	struct nfs_pgio_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6784,8 +6806,9 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode WRITE response
  */
 static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_pgio_res *res)
+			      void *data)
 {
+	struct nfs_pgio_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6814,8 +6837,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode COMMIT response
  */
 static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_commitres *res)
+			       void *data)
 {
+	struct nfs_commitres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6838,8 +6862,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode FSINFO response
  */
 static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs4_fsinfo_res *res)
+			       void *data)
 {
+	struct nfs4_fsinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6857,8 +6882,9 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Decode PATHCONF response
  */
 static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs4_pathconf_res *res)
+				 void *data)
 {
+	struct nfs4_pathconf_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6876,8 +6902,9 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Decode STATFS response
  */
 static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs4_statfs_res *res)
+			       void *data)
 {
+	struct nfs4_statfs_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6896,8 +6923,9 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs4_server_caps_res *res)
+				    void *data)
 {
+	struct nfs4_server_caps_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6935,8 +6963,9 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs4_setclientid_res *res)
+				    void *data)
 {
+	struct nfs4_setclientid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6950,7 +6979,8 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
  * Decode SETCLIENTID_CONFIRM response
  */
 static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
-					    struct xdr_stream *xdr)
+					    struct xdr_stream *xdr,
+					    void *data)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -6966,8 +6996,9 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
  */
 static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs4_delegreturnres *res)
+				    void *data)
 {
+	struct nfs4_delegreturnres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7001,8 +7032,9 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_fs_locations_res *res)
+				     void *data)
 {
+	struct nfs4_fs_locations_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7044,8 +7076,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
  */
 static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nfs4_secinfo_res *res)
+				void *data)
 {
+	struct nfs4_secinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7068,8 +7101,9 @@ static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_fsid_present_res *res)
+				     void *data)
 {
+	struct nfs4_fsid_present_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7129,7 +7163,7 @@ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs41_create_session_res *res)
+				       void *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -7177,7 +7211,7 @@ static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs4_sequence_res *res)
+				 void *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -7193,8 +7227,9 @@ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs4_get_lease_time_res *res)
+				       void *data)
 {
+	struct nfs4_get_lease_time_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7213,8 +7248,9 @@ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
 					 struct xdr_stream *xdr,
-					 struct nfs41_reclaim_complete_res *res)
+					 void *data)
 {
+	struct nfs41_reclaim_complete_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7231,8 +7267,9 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
 				      struct xdr_stream *xdr,
-				      struct nfs4_getdeviceinfo_res *res)
+				      void *data)
 {
+	struct nfs4_getdeviceinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7252,8 +7289,9 @@ static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfs4_layoutget_res *res)
+				  void *data)
 {
+	struct nfs4_layoutget_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7276,8 +7314,9 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_layoutreturn_res *res)
+				     void *data)
 {
+	struct nfs4_layoutreturn_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7300,8 +7339,9 @@ static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_layoutcommit_res *res)
+				     void *data)
 {
+	struct nfs4_layoutcommit_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7327,8 +7367,9 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct nfs4_secinfo_res *res)
+					void *data)
 {
+	struct nfs4_secinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7351,8 +7392,9 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs41_test_stateid_res *res)
+				     void *data)
 {
+	struct nfs41_test_stateid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7372,8 +7414,9 @@ static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs41_free_stateid_res *res)
+				     void *data)
 {
+	struct nfs41_free_stateid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7539,7 +7582,7 @@ nfs4_stat_to_errno(int stat)
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
 	.p_encode = nfs4_xdr_##argtype,				\
-	.p_decode = (kxdrdproc_t)nfs4_xdr_##restype,		\
+	.p_decode = nfs4_xdr_##restype,				\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
 	.p_statidx = NFSPROC4_CLNT_##proc,			\
-- 
GitLab


From 947c6e431d1feb0bb62397cbb03d6e7292a35425 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 11 May 2017 09:22:18 +0200
Subject: [PATCH 1439/1958] nfs: don't cast callback decode/proc/encode
 routines

Instead declare all functions with the proper methods signature.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback.h      | 27 +++++--------
 fs/nfs/callback_proc.c | 33 ++++++++++------
 fs/nfs/callback_xdr.c  | 86 ++++++++++++++++++++++--------------------
 3 files changed, 77 insertions(+), 69 deletions(-)

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index c701c308fac52..3dc54d7cb19c0 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -114,8 +114,7 @@ struct cb_sequenceres {
 	uint32_t			csr_target_highestslotid;
 };
 
-extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
-				       struct cb_sequenceres *res,
+extern __be32 nfs4_callback_sequence(void *argp, void *resp,
 				       struct cb_process_state *cps);
 
 #define RCA4_TYPE_MASK_RDATA_DLG	0
@@ -134,15 +133,13 @@ struct cb_recallanyargs {
 	uint32_t	craa_type_mask;
 };
 
-extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
-					void *dummy,
+extern __be32 nfs4_callback_recallany(void *argp, void *resp,
 					struct cb_process_state *cps);
 
 struct cb_recallslotargs {
 	uint32_t	crsa_target_highest_slotid;
 };
-extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
-					 void *dummy,
+extern __be32 nfs4_callback_recallslot(void *argp, void *resp,
 					 struct cb_process_state *cps);
 
 struct cb_layoutrecallargs {
@@ -159,9 +156,8 @@ struct cb_layoutrecallargs {
 	};
 };
 
-extern __be32 nfs4_callback_layoutrecall(
-	struct cb_layoutrecallargs *args,
-	void *dummy, struct cb_process_state *cps);
+extern __be32 nfs4_callback_layoutrecall(void *argp, void *resp,
+		struct cb_process_state *cps);
 
 struct cb_devicenotifyitem {
 	uint32_t		cbd_notify_type;
@@ -175,9 +171,8 @@ struct cb_devicenotifyargs {
 	struct cb_devicenotifyitem	 *devs;
 };
 
-extern __be32 nfs4_callback_devicenotify(
-	struct cb_devicenotifyargs *args,
-	void *dummy, struct cb_process_state *cps);
+extern __be32 nfs4_callback_devicenotify(void *argp, void *resp,
+		struct cb_process_state *cps);
 
 struct cb_notify_lock_args {
 	struct nfs_fh			cbnl_fh;
@@ -185,15 +180,13 @@ struct cb_notify_lock_args {
 	bool				cbnl_valid;
 };
 
-extern __be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args,
-					 void *dummy,
+extern __be32 nfs4_callback_notify_lock(void *argp, void *resp,
 					 struct cb_process_state *cps);
 #endif /* CONFIG_NFS_V4_1 */
 extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
-extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
-				    struct cb_getattrres *res,
+extern __be32 nfs4_callback_getattr(void *argp, void *resp,
 				    struct cb_process_state *cps);
-extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+extern __be32 nfs4_callback_recall(void *argp, void *resp,
 				   struct cb_process_state *cps);
 #if IS_ENABLED(CONFIG_NFS_V4)
 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 52479f180ea14..5427cdf04c5a1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -19,10 +19,11 @@
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
-__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
-			     struct cb_getattrres *res,
+__be32 nfs4_callback_getattr(void *argp, void *resp,
 			     struct cb_process_state *cps)
 {
+	struct cb_getattrargs *args = argp;
+	struct cb_getattrres *res = resp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
@@ -68,9 +69,10 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
 	return res->status;
 }
 
-__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+__be32 nfs4_callback_recall(void *argp, void *resp,
 			    struct cb_process_state *cps)
 {
+	struct cb_recallargs *args = argp;
 	struct inode *inode;
 	__be32 res;
 	
@@ -324,9 +326,10 @@ static u32 do_callback_layoutrecall(struct nfs_client *clp,
 	return initiate_bulk_draining(clp, args);
 }
 
-__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
-				  void *dummy, struct cb_process_state *cps)
+__be32 nfs4_callback_layoutrecall(void *argp, void *resp,
+				  struct cb_process_state *cps)
 {
+	struct cb_layoutrecallargs *args = argp;
 	u32 res = NFS4ERR_OP_NOT_IN_SESSION;
 
 	if (cps->clp)
@@ -345,9 +348,10 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp)
 	do_callback_layoutrecall(clp, &args);
 }
 
-__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
-				  void *dummy, struct cb_process_state *cps)
+__be32 nfs4_callback_devicenotify(void *argp, void *resp,
+				  struct cb_process_state *cps)
 {
+	struct cb_devicenotifyargs *args = argp;
 	int i;
 	__be32 res = 0;
 	struct nfs_client *clp = cps->clp;
@@ -469,10 +473,11 @@ static bool referring_call_exists(struct nfs_client *clp,
 	return status;
 }
 
-__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
-			      struct cb_sequenceres *res,
+__be32 nfs4_callback_sequence(void *argp, void *resp,
 			      struct cb_process_state *cps)
 {
+	struct cb_sequenceargs *args = argp;
+	struct cb_sequenceres *res = resp;
 	struct nfs4_slot_table *tbl;
 	struct nfs4_slot *slot;
 	struct nfs_client *clp;
@@ -571,9 +576,10 @@ validate_bitmap_values(unsigned long mask)
 	return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
 }
 
-__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
+__be32 nfs4_callback_recallany(void *argp, void *resp,
 			       struct cb_process_state *cps)
 {
+	struct cb_recallanyargs *args = argp;
 	__be32 status;
 	fmode_t flags = 0;
 
@@ -606,9 +612,10 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
 }
 
 /* Reduce the fore channel's max_slots to the target value */
-__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
+__be32 nfs4_callback_recallslot(void *argp, void *resp,
 				struct cb_process_state *cps)
 {
+	struct cb_recallslotargs *args = argp;
 	struct nfs4_slot_table *fc_tbl;
 	__be32 status;
 
@@ -631,9 +638,11 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
 	return status;
 }
 
-__be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args, void *dummy,
+__be32 nfs4_callback_notify_lock(void *argp, void *resp,
 				 struct cb_process_state *cps)
 {
+	struct cb_notify_lock_args *args = argp;
+
 	if (!cps->clp) /* set in cb_sequence */
 		return htonl(NFS4ERR_OP_NOT_IN_SESSION);
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c14758e08d738..287c02202b258 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -43,16 +43,11 @@
 /* Internal error code */
 #define NFS4ERR_RESOURCE_HDR	11050
 
-typedef __be32 (*callback_process_op_t)(void *, void *,
-					struct cb_process_state *);
-typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
-typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
-
-
 struct callback_op {
-	callback_process_op_t process_op;
-	callback_decode_arg_t decode_args;
-	callback_encode_res_t encode_res;
+	__be32 (*process_op)(void *, void *, struct cb_process_state *);
+	__be32 (*decode_args)(struct svc_rqst *, struct xdr_stream *, void *);
+	__be32 (*encode_res)(struct svc_rqst *, struct xdr_stream *,
+			const void *);
 	long res_maxsize;
 };
 
@@ -184,8 +179,10 @@ static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
 	return 0;
 }
 
-static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
+static __be32 decode_getattr_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_getattrargs *args = argp;
 	__be32 status;
 
 	status = decode_fh(xdr, &args->fh);
@@ -194,8 +191,10 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
 	return decode_bitmap(xdr, args->bitmap);
 }
 
-static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
+static __be32 decode_recall_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_recallargs *args = argp;
 	__be32 *p;
 	__be32 status;
 
@@ -217,9 +216,9 @@ static __be32 decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *statei
 }
 
 static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
-				       struct xdr_stream *xdr,
-				       struct cb_layoutrecallargs *args)
+				       struct xdr_stream *xdr, void *argp)
 {
+	struct cb_layoutrecallargs *args = argp;
 	__be32 *p;
 	__be32 status = 0;
 	uint32_t iomode;
@@ -262,8 +261,9 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
 static
 __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct cb_devicenotifyargs *args)
+				void *argp)
 {
+	struct cb_devicenotifyargs *args = argp;
 	__be32 *p;
 	__be32 status = 0;
 	u32 tmp;
@@ -403,8 +403,9 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
 
 static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct cb_sequenceargs *args)
+					void *argp)
 {
+	struct cb_sequenceargs *args = argp;
 	__be32 *p;
 	int i;
 	__be32 status;
@@ -450,8 +451,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 
 static __be32 decode_recallany_args(struct svc_rqst *rqstp,
 				      struct xdr_stream *xdr,
-				      struct cb_recallanyargs *args)
+				      void *argp)
 {
+	struct cb_recallanyargs *args = argp;
 	uint32_t bitmap[2];
 	__be32 *p, status;
 
@@ -469,8 +471,9 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
 
 static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct cb_recallslotargs *args)
+					void *argp)
 {
+	struct cb_recallslotargs *args = argp;
 	__be32 *p;
 
 	p = read_buf(xdr, 4);
@@ -510,8 +513,10 @@ static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_arg
 	return 0;
 }
 
-static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_notify_lock_args *args)
+static __be32 decode_notify_lock_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_notify_lock_args *args = argp;
 	__be32 status;
 
 	status = decode_fh(xdr, &args->cbnl_fh);
@@ -641,8 +646,10 @@ static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res)
 	return 0;
 }
 
-static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
+static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *resp)
 {
+	const struct cb_getattrres *res = resp;
 	__be32 *savep = NULL;
 	__be32 status = res->status;
 	
@@ -683,8 +690,9 @@ static __be32 encode_sessionid(struct xdr_stream *xdr,
 
 static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       const struct cb_sequenceres *res)
+				       const void *resp)
 {
+	const struct cb_sequenceres *res = resp;
 	__be32 *p;
 	__be32 status = res->csr_status;
 
@@ -938,48 +946,46 @@ static struct callback_op callback_ops[] = {
 		.res_maxsize = CB_OP_HDR_RES_MAXSZ,
 	},
 	[OP_CB_GETATTR] = {
-		.process_op = (callback_process_op_t)nfs4_callback_getattr,
-		.decode_args = (callback_decode_arg_t)decode_getattr_args,
-		.encode_res = (callback_encode_res_t)encode_getattr_res,
+		.process_op = nfs4_callback_getattr,
+		.decode_args = decode_getattr_args,
+		.encode_res = encode_getattr_res,
 		.res_maxsize = CB_OP_GETATTR_RES_MAXSZ,
 	},
 	[OP_CB_RECALL] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recall,
-		.decode_args = (callback_decode_arg_t)decode_recall_args,
+		.process_op = nfs4_callback_recall,
+		.decode_args = decode_recall_args,
 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
 	},
 #if defined(CONFIG_NFS_V4_1)
 	[OP_CB_LAYOUTRECALL] = {
-		.process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
-		.decode_args =
-			(callback_decode_arg_t)decode_layoutrecall_args,
+		.process_op = nfs4_callback_layoutrecall,
+		.decode_args = decode_layoutrecall_args,
 		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
 	},
 	[OP_CB_NOTIFY_DEVICEID] = {
-		.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
-		.decode_args =
-			(callback_decode_arg_t)decode_devicenotify_args,
+		.process_op = nfs4_callback_devicenotify,
+		.decode_args = decode_devicenotify_args,
 		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
 	},
 	[OP_CB_SEQUENCE] = {
-		.process_op = (callback_process_op_t)nfs4_callback_sequence,
-		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
-		.encode_res = (callback_encode_res_t)encode_cb_sequence_res,
+		.process_op = nfs4_callback_sequence,
+		.decode_args = decode_cb_sequence_args,
+		.encode_res = encode_cb_sequence_res,
 		.res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ,
 	},
 	[OP_CB_RECALL_ANY] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recallany,
-		.decode_args = (callback_decode_arg_t)decode_recallany_args,
+		.process_op = nfs4_callback_recallany,
+		.decode_args = decode_recallany_args,
 		.res_maxsize = CB_OP_RECALLANY_RES_MAXSZ,
 	},
 	[OP_CB_RECALL_SLOT] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recallslot,
-		.decode_args = (callback_decode_arg_t)decode_recallslot_args,
+		.process_op = nfs4_callback_recallslot,
+		.decode_args = decode_recallslot_args,
 		.res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ,
 	},
 	[OP_CB_NOTIFY_LOCK] = {
-		.process_op = (callback_process_op_t)nfs4_callback_notify_lock,
-		.decode_args = (callback_decode_arg_t)decode_notify_lock_args,
+		.process_op = nfs4_callback_notify_lock,
+		.decode_args = decode_notify_lock_args,
 		.res_maxsize = CB_OP_NOTIFY_LOCK_RES_MAXSZ,
 	},
 #endif /* CONFIG_NFS_V4_1 */
-- 
GitLab


From e91ff8e3c4cf0e4227fddabec87683401fc657ac Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:32:18 +0200
Subject: [PATCH 1440/1958] lockd: fix some weird indentation

Remove double indentation of a few struct rpc_version and
struct rpc_program instance.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clntxdr.c | 22 +++++++++++-----------
 fs/lockd/mon.c     | 16 ++++++++--------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 17e7f08b3a22f..bd8a976785aea 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -601,15 +601,15 @@ static struct rpc_procinfo	nlm_procedures[] = {
 };
 
 static const struct rpc_version	nlm_version1 = {
-		.number		= 1,
-		.nrprocs	= ARRAY_SIZE(nlm_procedures),
-		.procs		= nlm_procedures,
+	.number		= 1,
+	.nrprocs	= ARRAY_SIZE(nlm_procedures),
+	.procs		= nlm_procedures,
 };
 
 static const struct rpc_version	nlm_version3 = {
-		.number		= 3,
-		.nrprocs	= ARRAY_SIZE(nlm_procedures),
-		.procs		= nlm_procedures,
+	.number		= 3,
+	.nrprocs	= ARRAY_SIZE(nlm_procedures),
+	.procs		= nlm_procedures,
 };
 
 static const struct rpc_version	*nlm_versions[] = {
@@ -623,9 +623,9 @@ static const struct rpc_version	*nlm_versions[] = {
 static struct rpc_stat		nlm_rpc_stats;
 
 const struct rpc_program	nlm_program = {
-		.name		= "lockd",
-		.number		= NLM_PROGRAM,
-		.nrvers		= ARRAY_SIZE(nlm_versions),
-		.version	= nlm_versions,
-		.stats		= &nlm_rpc_stats,
+	.name		= "lockd",
+	.number		= NLM_PROGRAM,
+	.nrvers		= ARRAY_SIZE(nlm_versions),
+	.version	= nlm_versions,
+	.stats		= &nlm_rpc_stats,
 };
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 80630f0347e18..62424e929a7fd 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -553,9 +553,9 @@ static struct rpc_procinfo	nsm_procedures[] = {
 };
 
 static const struct rpc_version nsm_version1 = {
-		.number		= 1,
-		.nrprocs	= ARRAY_SIZE(nsm_procedures),
-		.procs		= nsm_procedures
+	.number		= 1,
+	.nrprocs	= ARRAY_SIZE(nsm_procedures),
+	.procs		= nsm_procedures
 };
 
 static const struct rpc_version *nsm_version[] = {
@@ -565,9 +565,9 @@ static const struct rpc_version *nsm_version[] = {
 static struct rpc_stat		nsm_stats;
 
 static const struct rpc_program nsm_program = {
-		.name		= "statd",
-		.number		= NSM_PROGRAM,
-		.nrvers		= ARRAY_SIZE(nsm_version),
-		.version	= nsm_version,
-		.stats		= &nsm_stats
+	.name		= "statd",
+	.number		= NSM_PROGRAM,
+	.nrvers		= ARRAY_SIZE(nsm_version),
+	.version	= nsm_version,
+	.stats		= &nsm_stats
 };
-- 
GitLab


From c551858a884b6d81def3d1528a9002ba97f5d4ad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:27:10 +0200
Subject: [PATCH 1441/1958] sunrpc: move p_count out of struct rpc_procinfo

p_count is the only writeable memeber of struct rpc_procinfo, which is
a good candidate to be const-ified as it contains function pointers.

This patch moves it into out out struct rpc_procinfo, and into a
separate writable array that is pointed to by struct rpc_version and
indexed by p_statidx.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/clnt4xdr.c                  |  2 ++
 fs/lockd/clntxdr.c                   |  4 ++++
 fs/lockd/mon.c                       |  4 +++-
 fs/nfs/mount_clnt.c                  |  5 ++++-
 fs/nfs/nfs2xdr.c                     |  4 +++-
 fs/nfs/nfs3xdr.c                     |  6 +++++-
 fs/nfs/nfs4xdr.c                     |  4 +++-
 fs/nfsd/nfs4callback.c               |  4 +++-
 include/linux/sunrpc/clnt.h          |  2 +-
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  3 ++-
 net/sunrpc/clnt.c                    |  6 ++++--
 net/sunrpc/rpcb_clnt.c               | 12 +++++++++---
 net/sunrpc/stats.c                   |  3 +--
 13 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index f0ab7a99dd236..7c255d1d7c644 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -602,8 +602,10 @@ static struct rpc_procinfo	nlm4_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version4_counts[ARRAY_SIZE(nlm4_procedures)];
 const struct rpc_version nlm_version4 = {
 	.number		= 4,
 	.nrprocs	= ARRAY_SIZE(nlm4_procedures),
 	.procs		= nlm4_procedures,
+	.counts		= nlm_version4_counts,
 };
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index bd8a976785aea..39500c5743a56 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -600,16 +600,20 @@ static struct rpc_procinfo	nlm_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version1_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(nlm_procedures),
 	.procs		= nlm_procedures,
+	.counts		= nlm_version1_counts,
 };
 
+static unsigned int nlm_version3_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(nlm_procedures),
 	.procs		= nlm_procedures,
+	.counts		= nlm_version3_counts,
 };
 
 static const struct rpc_version	*nlm_versions[] = {
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 62424e929a7fd..fe4ec82764fe1 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -552,10 +552,12 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 };
 
+static unsigned int nsm_version1_counts[ARRAY_SIZE(nsm_procedures)];
 static const struct rpc_version nsm_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(nsm_procedures),
-	.procs		= nsm_procedures
+	.procs		= nsm_procedures,
+	.counts		= nsm_version1_counts,
 };
 
 static const struct rpc_version *nsm_version[] = {
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 806657d650745..d25914aa8bf91 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -504,17 +504,20 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 };
 
-
+static unsigned int mnt_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(mnt_procedures),
 	.procs		= mnt_procedures,
+	.counts		= mnt_counts,
 };
 
+static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(mnt3_procedures),
 	.procs		= mnt3_procedures,
+	.counts		= mnt3_counts,
 };
 
 static const struct rpc_version *mnt_version[] = {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a299648ea3215..16b4526299c11 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1170,8 +1170,10 @@ struct rpc_procinfo	nfs_procedures[] = {
 	PROC(STATFS,	fhandle,	statfsres,	0),
 };
 
+static unsigned int nfs_version2_counts[ARRAY_SIZE(nfs_procedures)];
 const struct rpc_version nfs_version2 = {
 	.number			= 2,
 	.nrprocs		= ARRAY_SIZE(nfs_procedures),
-	.procs			= nfs_procedures
+	.procs			= nfs_procedures,
+	.counts			= nfs_version2_counts,
 };
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index cc272eb8be3e0..a017ec5c7a9d6 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2578,10 +2578,12 @@ struct rpc_procinfo	nfs3_procedures[] = {
 	PROC(COMMIT,		commit,		commit,		5),
 };
 
+static unsigned int nfs_version3_counts[ARRAY_SIZE(nfs3_procedures)];
 const struct rpc_version nfs_version3 = {
 	.number			= 3,
 	.nrprocs		= ARRAY_SIZE(nfs3_procedures),
-	.procs			= nfs3_procedures
+	.procs			= nfs3_procedures,
+	.counts			= nfs_version3_counts,
 };
 
 #ifdef CONFIG_NFS_V3_ACL
@@ -2606,10 +2608,12 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 };
 
+static unsigned int nfs3_acl_counts[ARRAY_SIZE(nfs3_acl_procedures)];
 const struct rpc_version nfsacl_version3 = {
 	.number			= 3,
 	.nrprocs		= sizeof(nfs3_acl_procedures)/
 				  sizeof(nfs3_acl_procedures[0]),
 	.procs			= nfs3_acl_procedures,
+	.counts			= nfs3_acl_counts,
 };
 #endif  /* CONFIG_NFS_V3_ACL */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 797f3ce752861..40cf5529e65ff 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7661,10 +7661,12 @@ struct rpc_procinfo	nfs4_procedures[] = {
 #endif /* CONFIG_NFS_V4_2 */
 };
 
+static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
 const struct rpc_version nfs_version4 = {
 	.number			= 4,
 	.nrprocs		= ARRAY_SIZE(nfs4_procedures),
-	.procs			= nfs4_procedures
+	.procs			= nfs4_procedures,
+	.counts			= nfs_version4_counts,
 };
 
 /*
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a2bedbd05b2bf..afa961fe073c3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -705,6 +705,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
 };
 
+static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
 static struct rpc_version nfs_cb_version4 = {
 /*
  * Note on the callback rpc program version number: despite language in rfc
@@ -715,7 +716,8 @@ static struct rpc_version nfs_cb_version4 = {
  */
 	.number			= 1,
 	.nrprocs		= ARRAY_SIZE(nfs4_cb_procedures),
-	.procs			= nfs4_cb_procedures
+	.procs			= nfs4_cb_procedures,
+	.counts			= nfs4_cb_counts,
 };
 
 static const struct rpc_version *nfs_cb_version[] = {
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6095ecba0ddee..c75ba37151fee 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -88,6 +88,7 @@ struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
 	struct rpc_procinfo *	procs;		/* procedure array */
+	unsigned int		*counts;	/* call counts */
 };
 
 /*
@@ -99,7 +100,6 @@ struct rpc_procinfo {
 	kxdrdproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
-	unsigned int		p_count;	/* call count */
 	unsigned int		p_timer;	/* Which RTT timer to use */
 	u32			p_statidx;	/* Which procedure to account */
 	const char *		p_name;		/* name of procedure */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index a80b8e6074781..f8729b6476052 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -364,11 +364,12 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data)
 /*
  * Initialization stuff
  */
-
+static unsigned int gssp_version1_counts[ARRAY_SIZE(gssp_procedures)];
 static const struct rpc_version gssp_version1 = {
 	.number		= GSSPROXY_VERS_1,
 	.nrprocs	= ARRAY_SIZE(gssp_procedures),
 	.procs		= gssp_procedures,
+	.counts		= gssp_version1_counts,
 };
 
 static const struct rpc_version *gssp_version[] = {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 964d5c4a1b609..f2d1f971247b1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1517,14 +1517,16 @@ static void
 call_start(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	int idx = task->tk_msg.rpc_proc->p_statidx;
 
 	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
 			clnt->cl_program->name, clnt->cl_vers,
 			rpc_proc_name(task),
 			(RPC_IS_ASYNC(task) ? "async" : "sync"));
 
-	/* Increment call count */
-	task->tk_msg.rpc_proc->p_count++;
+	/* Increment call count (version might not be valid for ping) */
+	if (clnt->cl_program->version[clnt->cl_vers])
+		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 }
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index f67b9e2897b49..9d47b9d3bbeed 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -1117,22 +1117,28 @@ static const struct rpcb_info rpcb_next_version6[] = {
 	},
 };
 
+static unsigned int rpcb_version2_counts[ARRAY_SIZE(rpcb_procedures2)];
 static const struct rpc_version rpcb_version2 = {
 	.number		= RPCBVERS_2,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures2),
-	.procs		= rpcb_procedures2
+	.procs		= rpcb_procedures2,
+	.counts		= rpcb_version2_counts,
 };
 
+static unsigned int rpcb_version3_counts[ARRAY_SIZE(rpcb_procedures3)];
 static const struct rpc_version rpcb_version3 = {
 	.number		= RPCBVERS_3,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures3),
-	.procs		= rpcb_procedures3
+	.procs		= rpcb_procedures3,
+	.counts		= rpcb_version3_counts,
 };
 
+static unsigned int rpcb_version4_counts[ARRAY_SIZE(rpcb_procedures4)];
 static const struct rpc_version rpcb_version4 = {
 	.number		= RPCBVERS_4,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures4),
-	.procs		= rpcb_procedures4
+	.procs		= rpcb_procedures4,
+	.counts		= rpcb_version4_counts,
 };
 
 static const struct rpc_version *rpcb_version[] = {
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index caeb01ad2b5ae..91c84d18bf9a5 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -55,8 +55,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
 		seq_printf(seq, "proc%u %u",
 					vers->number, vers->nrprocs);
 		for (j = 0; j < vers->nrprocs; j++)
-			seq_printf(seq, " %u",
-					vers->procs[j].p_count);
+			seq_printf(seq, " %u", vers->counts[j]);
 		seq_putc(seq, '\n');
 	}
 	return 0;
-- 
GitLab


From 9ae7d8ff2948c4514b9a6bfef7f1a548ea6ab190 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 15:51:24 +0200
Subject: [PATCH 1442/1958] nfs: use ARRAY_SIZE() in the nfsacl_version3
 declaration

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfs/nfs3xdr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a017ec5c7a9d6..85ff1187e637f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2611,8 +2611,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 static unsigned int nfs3_acl_counts[ARRAY_SIZE(nfs3_acl_procedures)];
 const struct rpc_version nfsacl_version3 = {
 	.number			= 3,
-	.nrprocs		= sizeof(nfs3_acl_procedures)/
-				  sizeof(nfs3_acl_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs3_acl_procedures),
 	.procs			= nfs3_acl_procedures,
 	.counts			= nfs3_acl_counts,
 };
-- 
GitLab


From 511e936bf2b3e8be2a3160ace3d86be07962a7a8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 15:36:49 +0200
Subject: [PATCH 1443/1958] sunrpc: mark all struct rpc_procinfo instances as
 const

struct rpc_procinfo contains function pointers, and marking it as
constant avoids it being able to be used as an attach vector for
code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/clnt4xdr.c                  |  2 +-
 fs/lockd/clntxdr.c                   |  2 +-
 fs/lockd/mon.c                       |  2 +-
 fs/nfs/internal.h                    |  6 +++---
 fs/nfs/mount_clnt.c                  |  4 ++--
 fs/nfs/nfs2xdr.c                     |  2 +-
 fs/nfs/nfs3xdr.c                     |  4 ++--
 fs/nfs/nfs4_fs.h                     |  2 +-
 fs/nfs/nfs4xdr.c                     |  2 +-
 fs/nfsd/nfs4callback.c               |  2 +-
 include/linux/sunrpc/clnt.h          |  4 ++--
 include/linux/sunrpc/sched.h         |  2 +-
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  2 +-
 net/sunrpc/clnt.c                    |  4 ++--
 net/sunrpc/rpcb_clnt.c               | 19 ++++++++++---------
 net/sunrpc/stats.c                   |  2 +-
 16 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 7c255d1d7c644..c349fc0f9b804 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -584,7 +584,7 @@ static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm4_procedures[] = {
+static const struct rpc_procinfo nlm4_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 39500c5743a56..3b4724a6c4eeb 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -582,7 +582,7 @@ static int nlm_xdr_dec_res(struct rpc_rqst *req,
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm_procedures[] = {
+static const struct rpc_procinfo nlm_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index fe4ec82764fe1..9d8166c39c549 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -531,7 +531,7 @@ static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 #define SM_monres_sz	2
 #define SM_unmonres_sz	1
 
-static struct rpc_procinfo	nsm_procedures[] = {
+static const struct rpc_procinfo nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_encode	= nsm_xdr_enc_mon,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3e24392f2caa1..dc2a29a7d48b0 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -270,12 +270,12 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 }
 
 /* nfs2xdr.c */
-extern struct rpc_procinfo nfs_procedures[];
+extern const struct rpc_procinfo nfs_procedures[];
 extern int nfs2_decode_dirent(struct xdr_stream *,
 				struct nfs_entry *, int);
 
 /* nfs3xdr.c */
-extern struct rpc_procinfo nfs3_procedures[];
+extern const struct rpc_procinfo nfs3_procedures[];
 extern int nfs3_decode_dirent(struct xdr_stream *,
 				struct nfs_entry *, int);
 
@@ -292,7 +292,7 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
 
 /* nfs4proc.c */
 #if IS_ENABLED(CONFIG_NFS_V4)
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 #endif
 
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d25914aa8bf91..3efe946672beb 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -466,7 +466,7 @@ static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 	return decode_auth_flavors(xdr, res);
 }
 
-static struct rpc_procinfo mnt_procedures[] = {
+static const struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
@@ -485,7 +485,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 };
 
-static struct rpc_procinfo mnt3_procedures[] = {
+static const struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_encode	= mnt_xdr_enc_dirpath,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 16b4526299c11..c8a7e98c1371a 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -1152,7 +1152,7 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 	.p_statidx  =  NFSPROC_##proc,					\
 	.p_name     =  #proc,						\
 	}
-struct rpc_procinfo	nfs_procedures[] = {
+const struct rpc_procinfo nfs_procedures[] = {
 	PROC(GETATTR,	fhandle,	attrstat,	1),
 	PROC(SETATTR,	sattrargs,	attrstat,	0),
 	PROC(LOOKUP,	diropargs,	diropres,	2),
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 85ff1187e637f..670eddb3ae369 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2554,7 +2554,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 	.p_name      = #proc,						\
 	}
 
-struct rpc_procinfo	nfs3_procedures[] = {
+const struct rpc_procinfo nfs3_procedures[] = {
 	PROC(GETATTR,		getattr,	getattr,	1),
 	PROC(SETATTR,		setattr,	setattr,	0),
 	PROC(LOOKUP,		lookup,		lookup,		2),
@@ -2587,7 +2587,7 @@ const struct rpc_version nfs_version3 = {
 };
 
 #ifdef CONFIG_NFS_V3_ACL
-static struct rpc_procinfo	nfs3_acl_procedures[] = {
+static const struct rpc_procinfo nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
 		.p_encode = nfs3_xdr_enc_getacl3args,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index af285cc27ccf8..9b0cf3872722b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -493,7 +493,7 @@ static inline void nfs4_unregister_sysctl(void)
 #endif
 
 /* nfs4xdr.c */
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 40cf5529e65ff..0f1f290c97cd0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7594,7 +7594,7 @@ nfs4_stat_to_errno(int stat)
 	.p_name = #proc,	\
 }
 
-struct rpc_procinfo	nfs4_procedures[] = {
+const struct rpc_procinfo nfs4_procedures[] = {
 	PROC(READ,		enc_read,		dec_read),
 	PROC(WRITE,		enc_write,		dec_write),
 	PROC(COMMIT,		enc_commit,		dec_commit),
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index afa961fe073c3..ac10f78c0fb36 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -696,7 +696,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 	.p_name    = #proc,						\
 }
 
-static struct rpc_procinfo nfs4_cb_procedures[] = {
+static const struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
 	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
 #ifdef CONFIG_NFSD_PNFS
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c75ba37151fee..55ef67bea06b9 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -39,7 +39,7 @@ struct rpc_clnt {
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt __rcu *	cl_xprt;	/* transport */
-	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
+	const struct rpc_procinfo *cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
 				cl_vers,	/* RPC version number */
 				cl_maxproc;	/* max procedure number */
@@ -87,7 +87,7 @@ struct rpc_program {
 struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
-	struct rpc_procinfo *	procs;		/* procedure array */
+	const struct rpc_procinfo *procs;	/* procedure array */
 	unsigned int		*counts;	/* call counts */
 };
 
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7ba040c797ec4..ed60253abd0a3 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -22,7 +22,7 @@
  */
 struct rpc_procinfo;
 struct rpc_message {
-	struct rpc_procinfo *	rpc_proc;	/* Procedure information */
+	const struct rpc_procinfo *rpc_proc;	/* Procedure information */
 	void *			rpc_argp;	/* Arguments */
 	void *			rpc_resp;	/* Result */
 	struct rpc_cred *	rpc_cred;	/* Credentials */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index f8729b6476052..46b295e4f2b82 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -63,7 +63,7 @@ enum {
 	.p_name   = #proc,				\
 }
 
-static struct rpc_procinfo gssp_procedures[] = {
+static const struct rpc_procinfo gssp_procedures[] = {
 	PROC(INDICATE_MECHS, indicate_mechs),
         PROC(GET_CALL_CONTEXT, get_call_context),
         PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f2d1f971247b1..2e49d1f892b79 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1674,7 +1674,7 @@ call_allocate(struct rpc_task *task)
 	unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
-	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
 	int status;
 
 	dprint_status(task);
@@ -2489,7 +2489,7 @@ static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	return 0;
 }
 
-static struct rpc_procinfo rpcproc_null = {
+static const struct rpc_procinfo rpcproc_null = {
 	.p_encode = rpcproc_encode_null,
 	.p_decode = rpcproc_decode_null,
 };
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 9d47b9d3bbeed..ea0676f199c85 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -128,13 +128,13 @@ struct rpcbind_args {
 	int			r_status;
 };
 
-static struct rpc_procinfo rpcb_procedures2[];
-static struct rpc_procinfo rpcb_procedures3[];
-static struct rpc_procinfo rpcb_procedures4[];
+static const struct rpc_procinfo rpcb_procedures2[];
+static const struct rpc_procinfo rpcb_procedures3[];
+static const struct rpc_procinfo rpcb_procedures4[];
 
 struct rpcb_info {
 	u32			rpc_vers;
-	struct rpc_procinfo *	rpc_proc;
+	const struct rpc_procinfo *rpc_proc;
 };
 
 static const struct rpcb_info rpcb_next_version[];
@@ -620,7 +620,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version,
 	return -EAFNOSUPPORT;
 }
 
-static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt,
+		struct rpcbind_args *map, const struct rpc_procinfo *proc)
 {
 	struct rpc_message msg = {
 		.rpc_proc = proc,
@@ -671,7 +672,7 @@ static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
 void rpcb_getport_async(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt;
-	struct rpc_procinfo *proc;
+	const struct rpc_procinfo *proc;
 	u32 bind_version;
 	struct rpc_xprt *xprt;
 	struct rpc_clnt	*rpcb_clnt;
@@ -994,7 +995,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * since the Linux kernel RPC code requires only these.
  */
 
-static struct rpc_procinfo rpcb_procedures2[] = {
+static const struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_mapping,
@@ -1027,7 +1028,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures3[] = {
+static const struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
@@ -1060,7 +1061,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures4[] = {
+static const struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= rpcb_enc_getaddr,
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 91c84d18bf9a5..8b6c35ae1d571 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -191,7 +191,7 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
 EXPORT_SYMBOL_GPL(rpc_count_iostats);
 
 static void _print_name(struct seq_file *seq, unsigned int op,
-			struct rpc_procinfo *procs)
+			const struct rpc_procinfo *procs)
 {
 	if (procs[op].p_name)
 		seq_printf(seq, "\t%12s: ", procs[op].p_name);
-- 
GitLab


From 39d43f75c6a1c214e035c143628cd8a985484bad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 15:58:06 +0200
Subject: [PATCH 1444/1958] nfsd4: const-ify nfs_cb_version4

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4callback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ac10f78c0fb36..b45083c0f9ae8 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -706,7 +706,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
 };
 
 static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
-static struct rpc_version nfs_cb_version4 = {
+static const struct rpc_version nfs_cb_version4 = {
 /*
  * Note on the callback rpc program version number: despite language in rfc
  * 5661 section 18.36.3 requiring servers to use 4 in this field, the
-- 
GitLab


From ec7e8caec9cf4f3cef5245220d0129380355d938 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 17:59:13 +0200
Subject: [PATCH 1445/1958] nfsd: use named initializers in PROC()

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs2acl.c  | 22 +++++++++++-----------
 fs/nfsd/nfs3acl.c  | 22 +++++++++++-----------
 fs/nfsd/nfs4proc.c | 10 +++++-----
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 838f90f3f890a..12933d07204c4 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -345,17 +345,17 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
 #define nfsd3_voidres		nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsacld_proc_##name,		\
-   (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
-   (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
-   (kxdrproc_t) nfsaclsvc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
+#define PROC(name, argt, rest, relt, cache, respsize)			\
+{									\
+	.pc_func	= (svc_procfunc) nfsacld_proc_##name,		\
+	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
+	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
+	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
+	.pc_cachetype	= cache,					\
+	.pc_xdrressize	= respsize,					\
+}
 
 #define ST 1		/* status*/
 #define AT 21		/* attributes */
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index dcb5f79076c0c..db988a229b3a3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -237,17 +237,17 @@ static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
 #define nfsd3_voidres			nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsd3_proc_##name,		\
-   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
-   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-   (kxdrproc_t) nfs3svc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
+#define PROC(name, argt, rest, relt, cache, respsize)			\
+{									\
+	.pc_func	= (svc_procfunc) nfsd3_proc_##name,		\
+	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
+	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
+	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
+	.pc_cachetype	= cache,					\
+	.pc_xdrressize	= respsize,					\
+}
 
 #define ST 1		/* status*/
 #define AT 21		/* attributes */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index dadb3bf305b22..0833686ccdd3c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1673,10 +1673,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
  * COMPOUND call.
  */
 static __be32
-nfsd4_proc_compound(struct svc_rqst *rqstp,
-		    struct nfsd4_compoundargs *args,
-		    struct nfsd4_compoundres *resp)
+nfsd4_proc_compound(struct svc_rqst *rqstp, void *arg, void *res)
 {
+	struct nfsd4_compoundargs *args = arg;
+	struct nfsd4_compoundres *resp = res;
 	struct nfsd4_op	*op;
 	struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
@@ -2517,7 +2517,7 @@ struct nfsd4_voidargs { int dummy; };
 
 static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
-		.pc_func = (svc_procfunc) nfsd4_proc_null,
+		.pc_func = nfsd4_proc_null,
 		.pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd4_voidargs),
 		.pc_ressize = sizeof(struct nfsd4_voidres),
@@ -2525,7 +2525,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 		.pc_xdrressize = 1,
 	},
 	[NFSPROC4_COMPOUND] = {
-		.pc_func = (svc_procfunc) nfsd4_proc_compound,
+		.pc_func = nfsd4_proc_compound,
 		.pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
 		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
-- 
GitLab


From 36ba89c2a40fe138898b394106205a3853ef2a4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 18:03:06 +0200
Subject: [PATCH 1446/1958] nfsd: remove the unused PROC() macro in nfs3proc.c

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs3proc.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 045c9081eabeb..24e0351907707 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -647,18 +647,6 @@ nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
 #define nfsd3_voidres			nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsd3_proc_##name,		\
-   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
-   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-   (kxdrproc_t) nfs3svc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
-
 #define ST 1		/* status*/
 #define FH 17		/* filehandle with length */
 #define AT 21		/* attributes */
-- 
GitLab


From 1c8a5409f3c4748ff42c1721d9578dd03091f378 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 17:35:49 +0200
Subject: [PATCH 1447/1958] sunrpc: properly type pc_func callbacks

Drop the argp and resp arguments as they can trivially be derived from
the rqstp argument.  With that all functions now have the same prototype,
and we can remove the unsafe casting to svc_procfunc as well as the
svc_procfunc typedef itself.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c        | 118 ++++++++++++++++++-----------
 fs/lockd/svcproc.c         | 118 ++++++++++++++++++-----------
 fs/nfs/callback_xdr.c      |   7 +-
 fs/nfsd/nfs2acl.c          |  25 +++---
 fs/nfsd/nfs3acl.c          |  15 ++--
 fs/nfsd/nfs3proc.c         | 151 +++++++++++++++++++++----------------
 fs/nfsd/nfs4proc.c         |   9 ++-
 fs/nfsd/nfsproc.c          | 104 +++++++++++++------------
 fs/nfsd/nfssvc.c           |   2 +-
 include/linux/sunrpc/svc.h |   4 +-
 net/sunrpc/svc.c           |   2 +-
 11 files changed, 328 insertions(+), 227 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 09c576f26c7b7..3e4cba029d3d6 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -62,7 +62,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NULL: Test for presence of service
  */
 static __be32
-nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlm4svc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -72,9 +72,9 @@ nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -99,9 +99,15 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlm4svc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlm4svc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -141,9 +147,15 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlm4svc_proc_lock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -170,13 +182,19 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -203,14 +221,21 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -219,6 +244,12 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -243,9 +274,10 @@ static const struct rpc_call_ops nlm4svc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *,  struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -261,7 +293,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -273,48 +305,44 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, __nlm4svc_proc_test);
 }
 
-static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, __nlm4svc_proc_lock);
 }
 
-static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, __nlm4svc_proc_cancel);
 }
 
-static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlm4svc_proc_unlock);
 }
 
-static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, __nlm4svc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlm4svc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -345,9 +373,10 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -378,22 +407,23 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlm4svc_proc_lock(rqstp, argp, resp);
+	return nlm4svc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlm4svc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -409,9 +439,10 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlm4svc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -429,9 +460,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlm4svc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
         if (!nlmsvc_ops)
                 return rpc_success;
 
@@ -463,7 +495,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlm4svc_proc_##name,	\
+ { .pc_func	= nlm4svc_proc_##name,	\
    .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index fb26b9f522e74..3add50661fab6 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -92,7 +92,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NULL: Test for presence of service
  */
 static __be32
-nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlmsvc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -102,9 +102,9 @@ nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -130,9 +130,15 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlmsvc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlmsvc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -172,9 +178,15 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlmsvc_proc_lock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -202,13 +214,19 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -236,14 +254,21 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -252,6 +277,12 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -284,9 +315,10 @@ static const struct rpc_call_ops nlmsvc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *, struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -302,7 +334,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -314,50 +346,46 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, __nlmsvc_proc_test);
 }
 
-static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, __nlmsvc_proc_lock);
 }
 
-static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, __nlmsvc_proc_cancel);
 }
 
 static __be32
-nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlmsvc_proc_unlock);
 }
 
 static __be32
-nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, __nlmsvc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlmsvc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -388,9 +416,10 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -421,22 +450,23 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlmsvc_proc_lock(rqstp, argp, resp);
+	return nlmsvc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlmsvc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -452,9 +482,10 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlmsvc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -472,9 +503,10 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
 	if (!nlmsvc_ops)
 		return rpc_success;
 
@@ -505,7 +537,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlmsvc_proc_##name,	\
+ { .pc_func	= nlmsvc_proc_##name,			\
    .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
    .pc_release	= NULL,					\
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 287c02202b258..5a14bdaa5986d 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -53,7 +53,7 @@ struct callback_op {
 
 static struct callback_op callback_ops[];
 
-static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 {
 	return htonl(NFS4_OK);
 }
@@ -880,7 +880,7 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp,
 /*
  * Decode, process and encode a COMPOUND
  */
-static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 {
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
@@ -916,7 +916,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	while (status == 0 && nops != hdr_arg.nops) {
 		status = process_op(nops, rqstp, &xdr_in,
-				    argp, &xdr_out, resp, &cps);
+				    rqstp->rq_argp, &xdr_out, rqstp->rq_resp,
+				    &cps);
 		nops++;
 	}
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 12933d07204c4..4b7f84fa1fa50 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -19,7 +19,7 @@
  * NULL call.
  */
 static __be32
-nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsacld_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -27,9 +27,10 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -87,10 +88,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -141,9 +142,10 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 /*
  * Check file attributes
  */
-static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
-		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -158,9 +160,10 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 /*
  * Check file access
  */
-static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-		struct nfsd3_accessres *resp)
+static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
@@ -347,7 +350,7 @@ struct nfsd3_voidargs { int dummy; };
 
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
-	.pc_func	= (svc_procfunc) nfsacld_proc_##name,		\
+	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
 	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index db988a229b3a3..5e42004035e0a 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -18,7 +18,7 @@
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -26,9 +26,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -80,10 +81,10 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd3_attrstat *resp)
+static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -239,7 +240,7 @@ struct nfsd3_voidargs { int dummy; };
 
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
-	.pc_func	= (svc_procfunc) nfsd3_proc_##name,		\
+	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
 	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 24e0351907707..4a2bae07cfbff 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -31,7 +31,7 @@ static int	nfs3_ftypes[] = {
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -40,9 +40,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * Get a file's attributes
  */
 static __be32
-nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					   struct nfsd3_attrstat *resp)
+nfsd3_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
@@ -63,9 +64,10 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * Set a file's attributes
  */
 static __be32
-nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
-					   struct nfsd3_attrstat  *resp)
+nfsd3_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd3_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SETATTR(3)  %s\n",
@@ -81,9 +83,10 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
  * Look up a path name component
  */
 static __be32
-nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_diropres  *resp)
+nfsd3_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP(3)   %s %.*s\n",
@@ -105,9 +108,10 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Check file access
  */
 static __be32
-nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-					  struct nfsd3_accessres *resp)
+nfsd3_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: ACCESS(3)   %s 0x%x\n",
@@ -124,9 +128,10 @@ nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
-					   struct nfsd3_readlinkres *resp)
+nfsd3_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
@@ -142,9 +147,10 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
  * Read a portion of a file.
  */
 static __be32
-nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
-				        struct nfsd3_readres  *resp)
+nfsd3_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readargs *argp = rqstp->rq_argp;
+	struct nfsd3_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 	unsigned long cnt = min(argp->count, max_blocksize);
@@ -179,9 +185,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
  * Write data to a file
  */
 static __be32
-nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
-					 struct nfsd3_writeres  *resp)
+nfsd3_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd3_writeargs *argp = rqstp->rq_argp;
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -206,9 +213,10 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
  * first reports about SunOS compatibility problems start to pour in...
  */
 static __be32
-nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					  struct nfsd3_diropres   *resp)
+nfsd3_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp, *newfhp = NULL;
 	struct iattr	*attr;
 	__be32		nfserr;
@@ -243,9 +251,10 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
  * Make directory. This operation is not idempotent.
  */
 static __be32
-nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					 struct nfsd3_diropres   *resp)
+nfsd3_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR(3)    %s %.*s\n",
@@ -263,9 +272,10 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 }
 
 static __be32
-nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
-					   struct nfsd3_diropres    *resp)
+nfsd3_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_symlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
@@ -284,9 +294,10 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
  * Make socket/fifo/device.
  */
 static __be32
-nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
-					 struct nfsd3_diropres  *resp)
+nfsd3_proc_mknod(struct svc_rqst *rqstp)
 {
+	struct nfsd3_mknodargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int type;
 	dev_t	rdev = 0;
@@ -321,9 +332,10 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
  * Remove file/fifo/socket etc.
  */
 static __be32
-nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_attrstat  *resp)
+nfsd3_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE(3)   %s %.*s\n",
@@ -342,9 +354,10 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					 struct nfsd3_attrstat  *resp)
+nfsd3_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR(3)    %s %.*s\n",
@@ -359,9 +372,10 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 }
 
 static __be32
-nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
-					  struct nfsd3_renameres  *resp)
+nfsd3_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd3_renameargs *argp = rqstp->rq_argp;
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME(3)   %s %.*s ->\n",
@@ -381,9 +395,10 @@ nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 }
 
 static __be32
-nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
-					struct nfsd3_linkres  *resp)
+nfsd3_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd3_linkargs *argp = rqstp->rq_argp;
+	struct nfsd3_linkres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK(3)     %s ->\n",
@@ -404,9 +419,10 @@ nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					   struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32		nfserr;
 	int		count;
 
@@ -440,9 +456,10 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * For now, we choose to ignore the dircount parameter.
  */
 static __be32
-nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					       struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
@@ -507,9 +524,10 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * Get file system stats
  */
 static __be32
-nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsstatres *resp)
+nfsd3_proc_fsstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: FSSTAT(3)   %s\n",
@@ -524,9 +542,10 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get file system info
  */
 static __be32
-nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsinfores *resp)
+nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 
@@ -567,9 +586,10 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get pathconf info for the specified file
  */
 static __be32
-nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
-					     struct nfsd3_pathconfres *resp)
+nfsd3_proc_pathconf(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: PATHCONF(3) %s\n",
@@ -610,9 +630,10 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
  * Commit a file (range) to stable storage.
  */
 static __be32
-nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
-					   struct nfsd3_commitres  *resp)
+nfsd3_proc_commit(struct svc_rqst *rqstp)
 {
+	struct nfsd3_commitargs *argp = rqstp->rq_argp;
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: COMMIT(3)   %s %u@%Lu\n",
@@ -655,7 +676,7 @@ struct nfsd3_voidargs { int dummy; };
 
 static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_null,
+		.pc_func = nfsd3_proc_null,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
@@ -663,7 +684,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST,
 	},
 	[NFS3PROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_getattr,
+		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -673,7 +694,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFS3PROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_setattr,
+		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -683,7 +704,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_lookup,
+		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -693,7 +714,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+FH+pAT+pAT,
 	},
 	[NFS3PROC_ACCESS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_access,
+		.pc_func = nfsd3_proc_access,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -703,7 +724,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+1,
 	},
 	[NFS3PROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readlink,
+		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -713,7 +734,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
 	},
 	[NFS3PROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_read,
+		.pc_func = nfsd3_proc_read,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -723,7 +744,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
 	},
 	[NFS3PROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_write,
+		.pc_func = nfsd3_proc_write,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -733,7 +754,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC+4,
 	},
 	[NFS3PROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_create,
+		.pc_func = nfsd3_proc_create,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -743,7 +764,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mkdir,
+		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -753,7 +774,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_symlink,
+		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -763,7 +784,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKNOD] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mknod,
+		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -773,7 +794,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_remove,
+		.pc_func = nfsd3_proc_remove,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -783,7 +804,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rmdir,
+		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -793,7 +814,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rename,
+		.pc_func = nfsd3_proc_rename,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -803,7 +824,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+WC+WC,
 	},
 	[NFS3PROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_link,
+		.pc_func = nfsd3_proc_link,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
@@ -813,7 +834,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+WC,
 	},
 	[NFS3PROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdir,
+		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -822,7 +843,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_READDIRPLUS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdirplus,
+		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
@@ -831,7 +852,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_FSSTAT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsstat,
+		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -840,7 +861,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+2*6+1,
 	},
 	[NFS3PROC_FSINFO] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsinfo,
+		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -849,7 +870,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+12,
 	},
 	[NFS3PROC_PATHCONF] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_pathconf,
+		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -858,7 +879,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_xdrressize = ST+pAT+6,
 	},
 	[NFS3PROC_COMMIT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_commit,
+		.pc_func = nfsd3_proc_commit,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
 		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0833686ccdd3c..43b2e5ab44307 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1510,7 +1510,7 @@ nfsd4_layoutreturn(struct svc_rqst *rqstp,
  * NULL call.
  */
 static __be32
-nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd4_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -1524,6 +1524,7 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
 typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
+
 typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
 typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
 
@@ -1673,10 +1674,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
  * COMPOUND call.
  */
 static __be32
-nfsd4_proc_compound(struct svc_rqst *rqstp, void *arg, void *res)
+nfsd4_proc_compound(struct svc_rqst *rqstp)
 {
-	struct nfsd4_compoundargs *args = arg;
-	struct nfsd4_compoundres *resp = res;
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfsd4_op	*op;
 	struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 03a7e9da4da02..448505b939db0 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -17,7 +17,7 @@ typedef struct svc_buf	svc_buf;
 
 
 static __be32
-nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -39,9 +39,10 @@ nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					  struct nfsd_attrstat *resp)
+nfsd_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -56,9 +57,10 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
-					  struct nfsd_attrstat  *resp)
+nfsd_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct iattr *iap = &argp->attrs;
 	struct svc_fh *fhp;
 	__be32 nfserr;
@@ -122,9 +124,10 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 struct nfsd_diropres  *resp)
+nfsd_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP   %s %.*s\n",
@@ -142,9 +145,10 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
-					   struct nfsd_readlinkres *resp)
+nfsd_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
@@ -162,9 +166,10 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
-				       struct nfsd_readres  *resp)
+nfsd_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd_readargs *argp = rqstp->rq_argp;
+	struct nfsd_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READ    %s %d bytes at %d\n",
@@ -200,9 +205,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
-					struct nfsd_attrstat  *resp)
+nfsd_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd_writeargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -222,9 +228,10 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
  * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
  */
 static __be32
-nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					 struct nfsd_diropres   *resp)
+nfsd_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp = &argp->fh;
 	svc_fh		*newfhp = &resp->fh;
 	struct iattr	*attr = &argp->attrs;
@@ -377,9 +384,9 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 }
 
 static __be32
-nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 void		       *resp)
+nfsd_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE   %s %.*s\n", SVCFH_fmt(&argp->fh),
@@ -392,9 +399,9 @@ nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 }
 
 static __be32
-nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
-				  	 void		        *resp)
+nfsd_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd_renameargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME   %s %.*s -> \n",
@@ -410,9 +417,9 @@ nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 }
 
 static __be32
-nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
-				void			    *resp)
+nfsd_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd_linkargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK     %s ->\n",
@@ -430,9 +437,9 @@ nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 }
 
 static __be32
-nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
-				          void			  *resp)
+nfsd_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_symlinkargs *argp = rqstp->rq_argp;
 	struct svc_fh	newfh;
 	__be32		nfserr;
 
@@ -460,9 +467,10 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					struct nfsd_diropres   *resp)
+nfsd_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -484,9 +492,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-				 	void		      *resp)
+nfsd_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -500,9 +508,10 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
-					  struct nfsd_readdirres  *resp)
+nfsd_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
 	int		count;
 	__be32		nfserr;
 	loff_t		offset;
@@ -540,9 +549,10 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
  * Get file system info
  */
 static __be32
-nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
-					  struct nfsd_statfsres *resp)
+nfsd_proc_statfs(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: STATFS   %s\n", SVCFH_fmt(&argp->fh));
@@ -565,7 +575,7 @@ struct nfsd_void { int dummy; };
 
 static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd_proc_null,
+		.pc_func = nfsd_proc_null,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
@@ -574,7 +584,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_getattr,
+		.pc_func = nfsd_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -584,7 +594,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_setattr,
+		.pc_func = nfsd_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -602,7 +612,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd_proc_lookup,
+		.pc_func = nfsd_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -612,7 +622,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readlink,
+		.pc_func = nfsd_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
@@ -621,7 +631,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
 	},
 	[NFSPROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd_proc_read,
+		.pc_func = nfsd_proc_read,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -639,7 +649,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_write,
+		.pc_func = nfsd_proc_write,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -649,7 +659,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_create,
+		.pc_func = nfsd_proc_create,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -659,7 +669,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_remove,
+		.pc_func = nfsd_proc_remove,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -668,7 +678,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rename,
+		.pc_func = nfsd_proc_rename,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
@@ -677,7 +687,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_link,
+		.pc_func = nfsd_proc_link,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
@@ -686,7 +696,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_symlink,
+		.pc_func = nfsd_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
@@ -695,7 +705,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_mkdir,
+		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
@@ -705,7 +715,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rmdir,
+		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -714,7 +724,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readdir,
+		.pc_func = nfsd_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
@@ -722,7 +732,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFSPROC_STATFS] = {
-		.pc_func = (svc_procfunc) nfsd_proc_statfs,
+		.pc_func = nfsd_proc_statfs,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 59979f0bbd4bf..d64895fd8d25b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -827,7 +827,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
 	/* Now call the procedure handler, and encode NFS status. */
-	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+	nfserr = proc->pc_func(rqstp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
 	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 		dprintk("nfsd: Dropping request; may be revisited later\n");
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 11cef5a7bc87a..3b58c55614c75 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -419,9 +419,9 @@ struct svc_version {
 /*
  * RPC procedure info
  */
-typedef __be32	(*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
 struct svc_procedure {
-	svc_procfunc		pc_func;	/* process the request */
+	/* process the request: */
+	__be32			(*pc_func)(struct svc_rqst *);
 	kxdrproc_t		pc_decode;	/* XDR decode args */
 	kxdrproc_t		pc_encode;	/* XDR encode result */
 	kxdrproc_t		pc_release;	/* XDR free result */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc0f5a0ecbdce..95335455ad385 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1281,7 +1281,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
 			goto err_garbage;
 
-		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+		*statp = procp->pc_func(rqstp);
 
 		/* Encode reply */
 		if (*statp == rpc_drop_reply ||
-- 
GitLab


From 1150ded804c2be6d02efef3e39e39e570c9cea21 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 18:48:24 +0200
Subject: [PATCH 1448/1958] sunrpc: properly type pc_release callbacks

Drop the p and resp arguments as they are always NULL or can trivially
be derived from the rqstp argument.  With that all functions now have the
same prototype, and we can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs2acl.c          | 22 +++++++++++-----------
 fs/nfsd/nfs3acl.c          |  8 ++++----
 fs/nfsd/nfs3proc.c         | 36 ++++++++++++++++++------------------
 fs/nfsd/nfs3xdr.c          | 16 ++++++++--------
 fs/nfsd/nfs4xdr.c          |  4 +---
 fs/nfsd/nfsproc.c          | 14 +++++++-------
 fs/nfsd/nfsxdr.c           |  8 ++++----
 fs/nfsd/xdr.h              |  2 +-
 fs/nfsd/xdr3.h             |  6 ++----
 fs/nfsd/xdr4.h             |  2 +-
 include/linux/sunrpc/svc.h |  3 ++-
 net/sunrpc/svc.c           |  8 ++++----
 12 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4b7f84fa1fa50..302441027f508 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -318,27 +318,27 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
-static int nfsaclsvc_release_attrstat(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
-               struct nfsd3_accessres *resp)
+static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
 {
-       fh_put(&resp->fh);
-       return 1;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+	fh_put(&resp->fh);
 }
 
 #define nfsaclsvc_decode_voidargs	NULL
@@ -353,7 +353,7 @@ struct nfsd3_voidargs { int dummy; };
 	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
-	.pc_release	= (kxdrproc_t) nfsaclsvc_release_##relt,	\
+	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
 	.pc_cachetype	= cache,					\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 5e42004035e0a..56cdff4e954c5 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -223,13 +223,13 @@ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfs3svc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
 #define nfs3svc_decode_voidargs		NULL
@@ -243,7 +243,7 @@ struct nfsd3_voidargs { int dummy; };
 	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-	.pc_release	= (kxdrproc_t) nfs3svc_release_##relt,		\
+	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
 	.pc_cachetype	= cache,					\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 4a2bae07cfbff..f0cccc0768ceb 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -687,7 +687,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
 		.pc_cachetype = RC_NOCACHE,
@@ -697,7 +697,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -707,7 +707,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
 		.pc_cachetype = RC_NOCACHE,
@@ -717,7 +717,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_access,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
 		.pc_cachetype = RC_NOCACHE,
@@ -727,7 +727,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
@@ -737,7 +737,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_read,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
 		.pc_cachetype = RC_NOCACHE,
@@ -747,7 +747,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_write,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -757,7 +757,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_create,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -767,7 +767,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -777,7 +777,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -787,7 +787,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -797,7 +797,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_remove,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -807,7 +807,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -817,7 +817,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_rename,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -827,7 +827,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_link,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -837,7 +837,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -846,7 +846,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -882,7 +882,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 		.pc_func = nfsd3_proc_commit,
 		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
 		.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 452334694a5d1..c3d3ef28347c7 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1103,19 +1103,19 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-int
-nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+void
+nfs3svc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-int
-nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fhandle_pair *resp)
+void
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp)
 {
+	struct nfsd3_fhandle_pair *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh1);
 	fh_put(&resp->fh2);
-	return 1;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 26780d53a6f94..5aa847bdfc630 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4543,9 +4543,8 @@ nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
         return xdr_ressize_check(rqstp, p);
 }
 
-int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
+void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 {
-	struct svc_rqst *rqstp = rq;
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 
 	if (args->ops != args->iops) {
@@ -4559,7 +4558,6 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
 		args->to_free = tb->next;
 		kfree(tb);
 	}
-	return 1;
 }
 
 int
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 448505b939db0..dc32e0f8480d6 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -587,7 +587,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_getattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_NOCACHE,
@@ -597,7 +597,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
@@ -615,7 +615,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_lookup,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_NOCACHE,
@@ -634,7 +634,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_read,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
 		.pc_cachetype = RC_NOCACHE,
@@ -652,7 +652,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
@@ -662,7 +662,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_create,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
@@ -708,7 +708,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index de07ff6257778..2facfc6ac8f3f 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -543,10 +543,10 @@ nfssvc_encode_entry(void *ccdv, const char *name,
 /*
  * XDR release functions
  */
-int
-nfssvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_fhandle *resp)
+void
+nfssvc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 4f0481d638048..2c21fa843fbf6 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -164,7 +164,7 @@ int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
 
-int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+void nfssvc_release_fhandle(struct svc_rqst *);
 
 /* Helper functions for NFSv2 ACL code */
 __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 335e04aaf7db1..23fe456a223be 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -330,10 +330,8 @@ int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
 int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
 				struct nfsd3_commitres *);
 
-int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
-				struct nfsd3_fhandle_pair *);
+void nfs3svc_release_fhandle(struct svc_rqst *);
+void nfs3svc_release_fhandle2(struct svc_rqst *);
 int nfs3svc_encode_entry(void *, const char *name,
 				int namlen, loff_t offset, u64 ino,
 				unsigned int);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 8fda4abdf3b12..a158579d55a2b 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -743,7 +743,7 @@ extern __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *,
 		struct nfsd4_release_lockowner *rlockowner);
-extern int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp);
+extern void nfsd4_release_compoundargs(struct svc_rqst *rqstp);
 extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
 extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3b58c55614c75..c73194e9c2bdb 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -424,7 +424,8 @@ struct svc_procedure {
 	__be32			(*pc_func)(struct svc_rqst *);
 	kxdrproc_t		pc_decode;	/* XDR decode args */
 	kxdrproc_t		pc_encode;	/* XDR encode result */
-	kxdrproc_t		pc_release;	/* XDR free result */
+	/* XDR free result: */
+	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
 	unsigned int		pc_count;	/* call count */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 95335455ad385..4611cb7adc041 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1287,12 +1287,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (*statp == rpc_drop_reply ||
 		    test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 		if (*statp == rpc_autherr_badcred) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
 		if (*statp == rpc_success &&
@@ -1307,7 +1307,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (!versp->vs_dispatch(rqstp, statp)) {
 			/* Release reply info */
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 	}
@@ -1318,7 +1318,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Release reply info */
 	if (procp->pc_release)
-		procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+		procp->pc_release(rqstp);
 
 	if (procp->pc_encode == NULL)
 		goto dropit;
-- 
GitLab


From cc6acc20a606f9ca65a6338af8204b7ae9e67b1a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:01:48 +0200
Subject: [PATCH 1449/1958] sunrpc: properly type pc_decode callbacks

Drop the argp argument as it can trivially be derived from the rqstp
argument.  With that all functions now have the same prototype, and we
can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c        |  2 +-
 fs/lockd/svcproc.c         |  2 +-
 fs/lockd/xdr.c             | 29 ++++++++++-----
 fs/lockd/xdr4.c            | 29 ++++++++++-----
 fs/nfs/callback_xdr.c      |  4 +--
 fs/nfsd/nfs2acl.c          | 21 ++++++-----
 fs/nfsd/nfs3acl.c          | 11 +++---
 fs/nfsd/nfs3proc.c         | 42 +++++++++++-----------
 fs/nfsd/nfs3xdr.c          | 74 ++++++++++++++++++++++----------------
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfs4xdr.c          |  4 ++-
 fs/nfsd/nfsproc.c          | 36 +++++++++----------
 fs/nfsd/nfssvc.c           |  5 ++-
 fs/nfsd/nfsxdr.c           | 53 ++++++++++++++++-----------
 fs/nfsd/xdr.h              | 34 +++++++-----------
 fs/nfsd/xdr3.h             | 47 +++++++++---------------
 fs/nfsd/xdr4.h             |  3 +-
 include/linux/lockd/xdr.h  | 18 +++++-----
 include/linux/lockd/xdr4.h | 18 +++++-----
 include/linux/sunrpc/svc.h |  3 +-
 net/sunrpc/svc.c           |  9 +++--
 21 files changed, 237 insertions(+), 209 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 3e4cba029d3d6..804744f7528c0 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -496,7 +496,7 @@ struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlm4svc_proc_##name,	\
-   .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
+   .pc_decode	= nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3add50661fab6..204a698f7d410 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -538,7 +538,7 @@ struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlmsvc_proc_##name,			\
-   .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
+   .pc_decode	= nlmsvc_decode_##xargt,		\
    .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 5b651daad5183..b57af63fba56b 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -182,8 +182,9 @@ nlm_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -207,8 +208,9 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -227,8 +229,9 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -243,8 +246,10 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm_decode_lock(p, &argp->lock)))
 		return 0;
@@ -253,8 +258,9 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -293,8 +299,9 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -305,8 +312,10 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -316,8 +325,10 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -325,7 +336,7 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index dfa4789cd4605..46e18598a15c4 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -179,8 +179,9 @@ nlm4_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -204,8 +205,9 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -224,8 +226,9 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -240,8 +243,10 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm4_decode_lock(p, &argp->lock)))
 		return 0;
@@ -250,8 +255,9 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -290,8 +296,9 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -302,8 +309,10 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -313,8 +322,10 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp
 }
 
 int
-nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -322,7 +333,7 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 5a14bdaa5986d..23ecbf7a40c17 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -58,7 +58,7 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 	return htonl(NFS4_OK);
 }
 
-static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
@@ -998,7 +998,7 @@ static struct callback_op callback_ops[] = {
 static struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
-		.pc_decode = (kxdrproc_t)nfs4_decode_void,
+		.pc_decode = nfs4_decode_void,
 		.pc_encode = (kxdrproc_t)nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 302441027f508..bcfdaa83ee6c3 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -182,9 +182,10 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 /*
  * XDR decode functions
  */
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *argp)
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -194,9 +195,9 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *argp)
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -220,18 +221,20 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_fhandle *argp)
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessargs *argp)
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -351,7 +354,7 @@ struct nfsd3_voidargs { int dummy; };
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
 	.pc_func	= nfsacld_proc_##name,				\
-	.pc_decode	= (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
 	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
 	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 56cdff4e954c5..4e68d6b5f4095 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -124,9 +124,10 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 /*
  * XDR decode functions
  */
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *args)
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *args = rqstp->rq_argp;
+
 	p = nfs3svc_decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -136,9 +137,9 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *args)
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *args = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -241,7 +242,7 @@ struct nfsd3_voidargs { int dummy; };
 #define PROC(name, argt, rest, relt, cache, respsize)			\
 {									\
 	.pc_func	= nfsd3_proc_##name,				\
-	.pc_decode	= (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+	.pc_decode	= nfs3svc_decode_##argt##args,			\
 	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
 	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f0cccc0768ceb..ed83e8a9e7b47 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -685,7 +685,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
@@ -695,7 +695,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
+		.pc_decode = nfs3svc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
@@ -705,7 +705,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -715,7 +715,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
+		.pc_decode = nfs3svc_decode_accessargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
@@ -725,7 +725,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
+		.pc_decode = nfs3svc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
@@ -735,7 +735,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
+		.pc_decode = nfs3svc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
@@ -745,7 +745,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
+		.pc_decode = nfs3svc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
@@ -755,7 +755,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
+		.pc_decode = nfs3svc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
@@ -765,7 +765,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
+		.pc_decode = nfs3svc_decode_mkdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
@@ -775,7 +775,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
+		.pc_decode = nfs3svc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
@@ -785,7 +785,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
+		.pc_decode = nfs3svc_decode_mknodargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
@@ -795,7 +795,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -805,7 +805,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_decode = nfs3svc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
@@ -815,7 +815,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
+		.pc_decode = nfs3svc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
@@ -825,7 +825,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
+		.pc_decode = nfs3svc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
@@ -835,7 +835,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
+		.pc_decode = nfs3svc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
@@ -844,7 +844,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
+		.pc_decode = nfs3svc_decode_readdirplusargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
@@ -853,7 +853,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
@@ -862,7 +862,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
@@ -871,7 +871,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
@@ -880,7 +880,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
+		.pc_decode = nfs3svc_decode_commitargs,
 		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index c3d3ef28347c7..c56089ac2819b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -273,8 +273,10 @@ void fill_post_wcc(struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -282,9 +284,10 @@ nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *a
 }
 
 int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_sattrargs *args)
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -300,9 +303,10 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropargs *args)
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -311,9 +315,10 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessargs *args)
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -323,9 +328,9 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readargs *args)
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
@@ -353,9 +358,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeargs *args)
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeargs *args = rqstp->rq_argp;
 	unsigned int len, v, hdr, dlen;
 	u32 max_blocksize = svc_max_payload(rqstp);
 	struct kvec *head = rqstp->rq_arg.head;
@@ -413,9 +418,10 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -435,10 +441,12 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 
 	return xdr_argsize_check(rqstp, p);
 }
+
 int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh)) ||
 	    !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -448,9 +456,9 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_symlinkargs *args)
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
 	unsigned int len, avail;
 	char *old, *new;
 	struct kvec *vec;
@@ -500,9 +508,10 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_mknodargs *args)
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_mknodargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -522,9 +531,10 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameargs *args)
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -535,9 +545,10 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkargs *args)
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -547,9 +558,10 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkargs *args)
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -559,9 +571,9 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -576,9 +588,9 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
@@ -602,9 +614,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitargs *args)
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 43b2e5ab44307..19c7d4b989c49 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2527,7 +2527,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
-		.pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
+		.pc_decode = nfs4svc_decode_compoundargs,
 		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5aa847bdfc630..3a7e117bd11eb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4561,8 +4561,10 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 }
 
 int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
 	if (rqstp->rq_arg.head[0].iov_len % 4) {
 		/* client is nuts */
 		dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index dc32e0f8480d6..d351d0ef6d348 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -576,7 +576,7 @@ struct nfsd_void { int dummy; };
 static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -585,7 +585,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
@@ -595,7 +595,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
+		.pc_decode = nfssvc_decode_sattrargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
@@ -604,7 +604,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_ROOT] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -613,7 +613,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
@@ -623,7 +623,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
+		.pc_decode = nfssvc_decode_readlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
@@ -632,7 +632,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
+		.pc_decode = nfssvc_decode_readargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
@@ -641,7 +641,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 		.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
 	},
 	[NFSPROC_WRITECACHE] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_decode = nfssvc_decode_void,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -650,7 +650,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
+		.pc_decode = nfssvc_decode_writeargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
@@ -660,7 +660,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_decode = nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
@@ -670,7 +670,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -679,7 +679,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
+		.pc_decode = nfssvc_decode_renameargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -688,7 +688,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
+		.pc_decode = nfssvc_decode_linkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -697,7 +697,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
+		.pc_decode = nfssvc_decode_symlinkargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -706,7 +706,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_decode = nfssvc_decode_createargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
@@ -716,7 +716,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_decode = nfssvc_decode_diropargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
@@ -725,7 +725,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
+		.pc_decode = nfssvc_decode_readdirargs,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
@@ -733,7 +733,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d64895fd8d25b..3e00499d7ad7e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -801,9 +801,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 */
 	rqstp->rq_cachetype = proc->pc_cachetype;
 	/* Decode arguments */
-	xdr = proc->pc_decode;
-	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
-			rqstp->rq_argp)) {
+	if (proc->pc_decode &&
+	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
 		dprintk("nfsd: failed to decode arguments!\n");
 		*statp = rpc_garbage_args;
 		return 1;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 2facfc6ac8f3f..19cf04ebf3884 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -206,14 +206,16 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
  * XDR decode functions
  */
 int
-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -221,9 +223,10 @@ nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *ar
 }
 
 int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_sattrargs *args)
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -233,9 +236,10 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropargs *args)
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -244,9 +248,9 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readargs *args)
+nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	p = decode_fh(p, &args->fh);
@@ -276,9 +280,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_writeargs *args)
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_writeargs *args = rqstp->rq_argp;
 	unsigned int len, hdr, dlen;
 	struct kvec *head = rqstp->rq_arg.head;
 	int v;
@@ -332,9 +336,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_createargs *args)
+nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_createargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->fh))
 	    || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -344,9 +349,10 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_renameargs *args)
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -357,8 +363,10 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -368,9 +376,10 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 }
 
 int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_linkargs *args)
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -380,9 +389,10 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_symlinkargs *args)
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_symlinkargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->ffh))
 	    || !(p = decode_filename(p, &args->fname, &args->flen))
 	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
@@ -393,9 +403,10 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirargs *args)
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 2c21fa843fbf6..8eeb752cf6f8a 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -131,28 +131,18 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readdirargs *);
+int nfssvc_decode_void(struct svc_rqst *, __be32 *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
 int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
 int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
 int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 23fe456a223be..f79be4c42e4a5 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -269,37 +269,22 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitargs *);
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
 int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
 int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
 				struct nfsd3_attrstat *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index a158579d55a2b..2a53c12338842 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -683,8 +683,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
 int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundargs *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
 int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
 		struct nfsd4_compoundres *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d39ed1cc5fbf4..0416600844ceb 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -95,19 +95,19 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index e58c88b52ce13..951bbe31fdb83 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -23,19 +23,19 @@
 
 
-int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index c73194e9c2bdb..d8703a5ab81e6 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -422,7 +422,8 @@ struct svc_version {
 struct svc_procedure {
 	/* process the request: */
 	__be32			(*pc_func)(struct svc_rqst *);
-	kxdrproc_t		pc_decode;	/* XDR decode args */
+	/* XDR decode args: */
+	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
 	kxdrproc_t		pc_encode;	/* XDR encode result */
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4611cb7adc041..18024c1b9b7b3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1276,9 +1276,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Call the function that processes the request. */
 	if (!versp->vs_dispatch) {
-		/* Decode arguments */
-		xdr = procp->pc_decode;
-		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
+		/*
+		 * Decode arguments
+		 * XXX: why do we ignore the return value?
+		 */
+		if (procp->pc_decode &&
+		    !procp->pc_decode(rqstp, argv->iov_base))
 			goto err_garbage;
 
 		*statp = procp->pc_func(rqstp);
-- 
GitLab


From d16d1867215663907f3212590d1a9d32398a0f47 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:42:02 +0200
Subject: [PATCH 1450/1958] sunrpc: properly type pc_encode callbacks

Drop the resp argument as it can trivially be derived from the rqstp
argument.  With that all functions now have the same prototype, and we
can remove the unsafe casting to kxdrproc_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svc4proc.c        |  2 +-
 fs/lockd/svcproc.c         |  2 +-
 fs/lockd/xdr.c             | 14 +++++---
 fs/lockd/xdr4.c            | 14 +++++---
 fs/nfs/callback_xdr.c      |  6 ++--
 fs/nfsd/nfs2acl.c          | 18 +++++-----
 fs/nfsd/nfs3acl.c          | 11 +++---
 fs/nfsd/nfs3proc.c         | 44 +++++++++++------------
 fs/nfsd/nfs3xdr.c          | 74 ++++++++++++++++++++++----------------
 fs/nfsd/nfs4proc.c         |  4 +--
 fs/nfsd/nfs4xdr.c          |  5 +--
 fs/nfsd/nfsproc.c          | 36 +++++++++----------
 fs/nfsd/nfssvc.c           |  5 +--
 fs/nfsd/nfsxdr.c           | 31 +++++++++-------
 fs/nfsd/xdr.h              | 14 ++++----
 fs/nfsd/xdr3.h             | 45 +++++++++--------------
 fs/nfsd/xdr4.h             |  5 ++-
 include/linux/lockd/xdr.h  |  8 ++---
 include/linux/lockd/xdr4.h |  8 ++---
 include/linux/sunrpc/svc.h |  3 +-
 net/sunrpc/svc.c           |  6 ++--
 21 files changed, 185 insertions(+), 170 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 804744f7528c0..fed0161557917 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -497,7 +497,7 @@ struct nlm_void			{ int dummy; };
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlm4svc_proc_##name,	\
    .pc_decode	= nlm4svc_decode_##xargt,	\
-   .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
+   .pc_encode	= nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 204a698f7d410..14648b051eba0 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -539,7 +539,7 @@ struct nlm_void			{ int dummy; };
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
  { .pc_func	= nlmsvc_proc_##name,			\
    .pc_decode	= nlmsvc_decode_##xargt,		\
-   .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
+   .pc_encode	= nlmsvc_encode_##xrest,		\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b57af63fba56b..442bbd0b0b293 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -200,8 +200,10 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
@@ -280,8 +282,10 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -290,8 +294,10 @@ nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -342,7 +348,7 @@ nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 46e18598a15c4..2a0cd5679c49d 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -197,8 +197,10 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
@@ -277,8 +279,10 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -287,8 +291,10 @@ nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -339,7 +345,7 @@ nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 23ecbf7a40c17..acf75dc63e14e 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -63,7 +63,7 @@ static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -999,12 +999,12 @@ static struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
 		.pc_decode = nfs4_decode_void,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
 	[CB_COMPOUND] = {
 		.pc_func = nfs4_callback_compound,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_argsize = 256,
 		.pc_ressize = 256,
 		.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index bcfdaa83ee6c3..fc6b179c8fff7 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -251,15 +251,15 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
  * There must be an encoding function for void results so svc_process
  * will work properly.
  */
-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct inode *inode;
 	struct kvec *head = rqstp->rq_res.head;
@@ -302,17 +302,19 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessres *resp)
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->access);
 	return xdr_ressize_check(rqstp, p);
@@ -355,7 +357,7 @@ struct nfsd3_voidargs { int dummy; };
 {									\
 	.pc_func	= nfsacld_proc_##name,				\
 	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
-	.pc_encode	= (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+	.pc_encode	= nfsaclsvc_encode_##rest##res,			\
 	.pc_release	= nfsaclsvc_release_##relt,	\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 4e68d6b5f4095..9437b758cbfdb 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -168,9 +168,9 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
  */
 
 /* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
@@ -213,9 +213,10 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 /* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_attrstat *resp)
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
 
 	return xdr_ressize_check(rqstp, p);
@@ -243,7 +244,7 @@ struct nfsd3_voidargs { int dummy; };
 {									\
 	.pc_func	= nfsd3_proc_##name,				\
 	.pc_decode	= nfs3svc_decode_##argt##args,			\
-	.pc_encode	= (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+	.pc_encode	= nfs3svc_encode_##rest##res,			\
 	.pc_release	= nfs3svc_release_##relt,			\
 	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
 	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index ed83e8a9e7b47..17c90c41a3a68 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -677,7 +677,7 @@ struct nfsd3_voidargs { int dummy; };
 static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
+		.pc_encode = nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
 		.pc_cachetype = RC_NOCACHE,
@@ -686,7 +686,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
+		.pc_encode = nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
@@ -696,7 +696,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
 		.pc_decode = nfs3svc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -706,7 +706,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
+		.pc_encode = nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
@@ -716,7 +716,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
 		.pc_decode = nfs3svc_decode_accessargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
+		.pc_encode = nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
@@ -726,7 +726,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
 		.pc_decode = nfs3svc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
+		.pc_encode = nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
@@ -736,7 +736,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
 		.pc_decode = nfs3svc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
+		.pc_encode = nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
@@ -746,7 +746,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
 		.pc_decode = nfs3svc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
+		.pc_encode = nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
@@ -756,7 +756,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
 		.pc_decode = nfs3svc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -766,7 +766,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
 		.pc_decode = nfs3svc_decode_mkdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -776,7 +776,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
 		.pc_decode = nfs3svc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -786,7 +786,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
 		.pc_decode = nfs3svc_decode_mknodargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
@@ -796,7 +796,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -806,7 +806,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
@@ -816,7 +816,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
 		.pc_decode = nfs3svc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
+		.pc_encode = nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
@@ -826,7 +826,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
 		.pc_decode = nfs3svc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
+		.pc_encode = nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
@@ -836,7 +836,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
 		.pc_decode = nfs3svc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
@@ -845,7 +845,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
 		.pc_decode = nfs3svc_decode_readdirplusargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
@@ -854,7 +854,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
+		.pc_encode = nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
 		.pc_cachetype = RC_NOCACHE,
@@ -863,7 +863,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
+		.pc_encode = nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
 		.pc_cachetype = RC_NOCACHE,
@@ -872,7 +872,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
+		.pc_encode = nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
 		.pc_cachetype = RC_NOCACHE,
@@ -881,7 +881,7 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
 		.pc_decode = nfs3svc_decode_commitargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
+		.pc_encode = nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index c56089ac2819b..b8838d3023ff2 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -634,16 +634,17 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
  * will work properly.
  */
 int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETATTR */
 int
-nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		lease_get_mtime(d_inode(resp->fh.fh_dentry),
 				&resp->stat.mtime);
@@ -654,18 +655,20 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 
 /* SETATTR, REMOVE, RMDIR */
 int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* LOOKUP */
 int
-nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		p = encode_fh(p, &resp->fh);
 		p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -676,9 +679,10 @@ nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 
 /* ACCESS */
 int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessres *resp)
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0)
 		*p++ = htonl(resp->access);
@@ -687,9 +691,10 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READLINK */
 int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkres *resp)
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->len);
@@ -708,9 +713,10 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READ */
 int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readres *resp)
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->count);
@@ -732,9 +738,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 
 /* WRITE */
 int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeres *resp)
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -749,9 +755,10 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
 int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		*p++ = xdr_one;
 		p = encode_fh(p, &resp->fh);
@@ -763,9 +770,10 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
 
 /* RENAME */
 int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameres *resp)
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->ffh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -773,9 +781,10 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
 
 /* LINK */
 int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkres *resp)
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -783,9 +792,10 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READDIR */
 int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirres *resp)
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 
 	if (resp->status == 0) {
@@ -1033,9 +1043,9 @@ nfs3svc_encode_entry_plus(void *cd, const char *name,
 
 /* FSSTAT */
 int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsstatres *resp)
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	struct kstatfs	*s = &resp->stats;
 	u64		bs = s->f_bsize;
 
@@ -1055,9 +1065,10 @@ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
 
 /* FSINFO */
 int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsinfores *resp)
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1079,9 +1090,10 @@ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
 
 /* PATHCONF */
 int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_pathconfres *resp)
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1098,9 +1110,9 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
 
 /* COMMIT */
 int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitres *resp)
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 19c7d4b989c49..726d28376f7b5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2519,7 +2519,7 @@ struct nfsd4_voidargs { int dummy; };
 static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
+		.pc_encode = nfs4svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd4_voidargs),
 		.pc_ressize = sizeof(struct nfsd4_voidres),
 		.pc_cachetype = RC_NOCACHE,
@@ -2528,7 +2528,7 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
 		.pc_decode = nfs4svc_decode_compoundargs,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
+		.pc_encode = nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
 		.pc_release = nfsd4_release_compoundargs,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3a7e117bd11eb..54e212e3541eb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4538,7 +4538,7 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
 }
 
 int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
         return xdr_ressize_check(rqstp, p);
 }
@@ -4584,11 +4584,12 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp)
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
 {
 	/*
 	 * All that remains is to write the tag and operation count...
 	 */
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct xdr_buf *buf = resp->xdr.buf;
 
 	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index d351d0ef6d348..0ef88d0e67d9d 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -577,7 +577,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -586,7 +586,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
 		.pc_decode = nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -596,7 +596,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = nfssvc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -605,7 +605,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_ROOT] = {
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -614,7 +614,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -624,7 +624,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
 		.pc_decode = nfssvc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
+		.pc_encode = nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
@@ -633,7 +633,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
 		.pc_decode = nfssvc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
+		.pc_encode = nfssvc_encode_readres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
@@ -642,7 +642,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	},
 	[NFSPROC_WRITECACHE] = {
 		.pc_decode = nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
@@ -651,7 +651,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = nfssvc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
@@ -661,7 +661,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
 		.pc_decode = nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -671,7 +671,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -680,7 +680,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
 		.pc_decode = nfssvc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -689,7 +689,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
 		.pc_decode = nfssvc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -698,7 +698,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
 		.pc_decode = nfssvc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -707,7 +707,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
 		.pc_decode = nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
@@ -717,7 +717,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
@@ -726,7 +726,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
 		.pc_decode = nfssvc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
+		.pc_encode = nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
 		.pc_cachetype = RC_NOCACHE,
@@ -734,7 +734,7 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
 		.pc_decode = nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
+		.pc_encode = nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
 		.pc_cachetype = RC_NOCACHE,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 3e00499d7ad7e..5552336641246 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -782,7 +782,6 @@ int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	struct svc_procedure	*proc;
-	kxdrproc_t		xdr;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
@@ -841,9 +840,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 * For NFSv2, additional info is never returned in case of an error.
 	 */
 	if (!(nfserr && rqstp->rq_vers == 2)) {
-		xdr = proc->pc_encode;
-		if (xdr && !xdr(rqstp, nfserrp,
-				rqstp->rq_resp)) {
+		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
 			/* Failed to encode result. Release cache entry */
 			dprintk("nfsd: failed to encode result!\n");
 			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 19cf04ebf3884..e4da2717982d0 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -422,32 +422,35 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
  * XDR encode functions
  */
 int
-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_attrstat *resp)
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropres *resp)
+nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropres *resp = rqstp->rq_resp;
+
 	p = encode_fh(p, &resp->fh);
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readlinkres *resp)
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
+
 	*p++ = htonl(resp->len);
 	xdr_ressize_check(rqstp, p);
 	rqstp->rq_res.page_len = resp->len;
@@ -461,9 +464,10 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readres *resp)
+nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readres *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->count);
 	xdr_ressize_check(rqstp, p);
@@ -480,9 +484,10 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirres *resp)
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
+
 	xdr_ressize_check(rqstp, p);
 	p = resp->buffer;
 	*p++ = 0;			/* no more entries */
@@ -493,9 +498,9 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_statfsres *resp)
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	struct kstatfs	*stat = &resp->stats;
 
 	*p++ = htonl(NFSSVC_MAXBLKSIZE_V2);	/* max transfer size */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 8eeb752cf6f8a..457ce45e5084f 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -143,13 +143,13 @@ int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
 int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
 
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index f79be4c42e4a5..80d7da620e912 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -285,35 +285,22 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
 int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
-				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitres *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
 
 void nfs3svc_release_fhandle(struct svc_rqst *);
 void nfs3svc_release_fhandle2(struct svc_rqst *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2a53c12338842..eb7f9239304ff 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -682,10 +682,9 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
 int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundres *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 0416600844ceb..7acbecc21a401 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -96,16 +96,16 @@ struct nlm_reboot {
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
 int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlmsvc_encode_void(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
 int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 951bbe31fdb83..bf16456092254 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -24,16 +24,16 @@
 
 
 int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlm4svc_encode_void(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
 int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d8703a5ab81e6..bd9e313c444af 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -424,7 +424,8 @@ struct svc_procedure {
 	__be32			(*pc_func)(struct svc_rqst *);
 	/* XDR decode args: */
 	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
-	kxdrproc_t		pc_encode;	/* XDR encode result */
+	/* XDR encode result: */
+	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 18024c1b9b7b3..aa643a29fdc6f 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1154,7 +1154,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	struct svc_version	*versp = NULL;	/* compiler food */
 	struct svc_procedure	*procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
-	kxdrproc_t		xdr;
 	__be32			*statp;
 	u32			prog, vers, proc;
 	__be32			auth_stat, rpc_stat;
@@ -1298,9 +1297,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
-		if (*statp == rpc_success &&
-		    (xdr = procp->pc_encode) &&
-		    !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
+		if (*statp == rpc_success && procp->pc_encode &&
+		    !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
 			dprintk("svc: failed to encode reply\n");
 			/* serv->sv_stats->rpcsystemerr++; */
 			*statp = rpc_system_err;
-- 
GitLab


From 408b3d46ae06e1d219f31cbe629789a5e5c862aa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 19:56:10 +0200
Subject: [PATCH 1451/1958] sunrpc: remove kxdrproc_t

Remove the now unused typedef.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/sunrpc/xdr.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index ed0fbf0d8d0f5..261b48a2701d2 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -34,13 +34,6 @@ struct xdr_netobj {
 	u8 *			data;
 };
 
-/*
- * This is the legacy generic XDR function. rqstp is either a rpc_rqst
- * (client side) or svc_rqst pointer (server side).
- * Encode functions always assume there's enough room in the buffer.
- */
-typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
-
 /*
  * Basic structure for transmission/reception of a client XDR message.
  * Features a header (for a linear buffer containing RPC headers
-- 
GitLab


From 6c9600a71d9c00a49e6a915b228f3f0b542a6067 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 20:03:15 +0200
Subject: [PATCH 1452/1958] nfsd4: properly type op_set_currentstateid
 callbacks

Given the args union in struct nfsd4_op a name, and pass it to the
op_set_currentstateid callbacks instead of using unsafe function
pointer casts.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/current_stateid.h | 12 ++++++++----
 fs/nfsd/nfs4proc.c        | 12 ++++++------
 fs/nfsd/nfs4state.c       | 20 ++++++++++++--------
 fs/nfsd/xdr4.h            |  2 +-
 4 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h
index 4123551208d88..3c80a7b5302da 100644
--- a/fs/nfsd/current_stateid.h
+++ b/fs/nfsd/current_stateid.h
@@ -8,10 +8,14 @@ extern void clear_current_stateid(struct nfsd4_compound_state *cstate);
 /*
  * functions to set current state id
  */
-extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
-extern void nfsd4_set_openstateid(struct nfsd4_compound_state *, struct nfsd4_open *);
-extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *, struct nfsd4_lock *);
-extern void nfsd4_set_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
+extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_openstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_closestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
 
 /*
  * functions to consume current state id
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 726d28376f7b5..91cee9e2b4f39 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1525,7 +1525,6 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
 typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
 
-typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
 typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
 
 enum nfsd4_op_flags {
@@ -1565,7 +1564,8 @@ struct nfsd4_operation {
 	/* Try to get response size before operation */
 	nfsd4op_rsize op_rsize_bop;
 	stateid_getter op_get_currentstateid;
-	stateid_setter op_set_currentstateid;
+	void (*op_set_currentstateid)(struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
 };
 
 static struct nfsd4_operation nfsd4_ops[];
@@ -2104,7 +2104,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_CLOSE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_closestateid,
+		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
 	[OP_COMMIT] = {
 		.op_func = (nfsd4op_func)nfsd4_commit,
@@ -2148,7 +2148,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCK",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_lockstateid,
+		.op_set_currentstateid = nfsd4_set_lockstateid,
 	},
 	[OP_LOCKT] = {
 		.op_func = (nfsd4op_func)nfsd4_lockt,
@@ -2184,7 +2184,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_openstateid,
+		.op_set_currentstateid = nfsd4_set_openstateid,
 	},
 	[OP_OPEN_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_open_confirm,
@@ -2198,7 +2198,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_OPEN_DOWNGRADE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_opendowngradestateid,
+		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
 	[OP_PUTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putfh,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 22002fb75a182..f9552357923e8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7103,27 +7103,31 @@ clear_current_stateid(struct nfsd4_compound_state *cstate)
  * functions to set current state id
  */
 void
-nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &odp->od_stateid);
+	put_stateid(cstate, &u->open_downgrade.od_stateid);
 }
 
 void
-nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+nfsd4_set_openstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &open->op_stateid);
+	put_stateid(cstate, &u->open.op_stateid);
 }
 
 void
-nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+nfsd4_set_closestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &close->cl_stateid);
+	put_stateid(cstate, &u->close.cl_stateid);
 }
 
 void
-nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock)
+nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &lock->lk_resp_stateid);
+	put_stateid(cstate, &u->lock.lk_resp_stateid);
 }
 
 /*
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index eb7f9239304ff..b625f4aa1061e 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -539,7 +539,7 @@ struct nfsd4_seek {
 struct nfsd4_op {
 	int					opnum;
 	__be32					status;
-	union {
+	union nfsd4_op_u {
 		struct nfsd4_access		access;
 		struct nfsd4_close		close;
 		struct nfsd4_commit		commit;
-- 
GitLab


From c2a1102aa2add2f6757586541fc9c22d9b71bdf3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 20:37:33 +0200
Subject: [PATCH 1453/1958] nfsd4: properly type op_get_currentstateid
 callbacks

Pass union nfsd4_op_u to the op_set_currentstateid callbacks instead of
using unsafe function pointer casts.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/current_stateid.h | 24 +++++++++++++++--------
 fs/nfsd/nfs4proc.c        | 21 ++++++++++----------
 fs/nfsd/nfs4state.c       | 40 +++++++++++++++++++++++----------------
 3 files changed, 50 insertions(+), 35 deletions(-)

diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h
index 3c80a7b5302da..34075cee573a0 100644
--- a/fs/nfsd/current_stateid.h
+++ b/fs/nfsd/current_stateid.h
@@ -20,13 +20,21 @@ extern void nfsd4_set_closestateid(struct nfsd4_compound_state *,
 /*
  * functions to consume current state id
  */
-extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
-extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *, struct nfsd4_delegreturn *);
-extern void nfsd4_get_freestateid(struct nfsd4_compound_state *, struct nfsd4_free_stateid *);
-extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *, struct nfsd4_setattr *);
-extern void nfsd4_get_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
-extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *, struct nfsd4_locku *);
-extern void nfsd4_get_readstateid(struct nfsd4_compound_state *, struct nfsd4_read *);
-extern void nfsd4_get_writestateid(struct nfsd4_compound_state *, struct nfsd4_write *);
+extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_freestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_closestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_readstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_writestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
 
 #endif   /* _NFSD4_CURRENT_STATE_H */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 91cee9e2b4f39..c06ed1cdfca9a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1525,8 +1525,6 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
 typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
 
-typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
-
 enum nfsd4_op_flags {
 	ALLOWED_WITHOUT_FH = 1 << 0,	/* No current filehandle required */
 	ALLOWED_ON_ABSENT_FS = 1 << 1,	/* ops processed on absent fs */
@@ -1563,7 +1561,8 @@ struct nfsd4_operation {
 	char *op_name;
 	/* Try to get response size before operation */
 	nfsd4op_rsize op_rsize_bop;
-	stateid_getter op_get_currentstateid;
+	void (*op_get_currentstateid)(struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
 	void (*op_set_currentstateid)(struct nfsd4_compound_state *,
 			union nfsd4_op_u *);
 };
@@ -2103,7 +2102,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CLOSE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid,
+		.op_get_currentstateid = nfsd4_get_closestateid,
 		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
 	[OP_COMMIT] = {
@@ -2123,7 +2122,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DELEGRETURN",
 		.op_rsize_bop = nfsd4_only_status_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_delegreturnstateid,
+		.op_get_currentstateid = nfsd4_get_delegreturnstateid,
 	},
 	[OP_GETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_getattr,
@@ -2160,7 +2159,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCKU",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_lockustateid,
+		.op_get_currentstateid = nfsd4_get_lockustateid,
 	},
 	[OP_LOOKUP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookup,
@@ -2197,7 +2196,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_DOWNGRADE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid,
+		.op_get_currentstateid = nfsd4_get_opendowngradestateid,
 		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
 	[OP_PUTFH] = {
@@ -2225,7 +2224,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_read,
 		.op_name = "OP_READ",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
+		.op_get_currentstateid = nfsd4_get_readstateid,
 	},
 	[OP_READDIR] = {
 		.op_func = (nfsd4op_func)nfsd4_readdir,
@@ -2281,7 +2280,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_name = "OP_SETATTR",
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_setattrstateid,
+		.op_get_currentstateid = nfsd4_get_setattrstateid,
 	},
 	[OP_SETCLIENTID] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid,
@@ -2307,7 +2306,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_WRITE",
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid,
+		.op_get_currentstateid = nfsd4_get_writestateid,
 	},
 	[OP_RELEASE_LOCKOWNER] = {
 		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
@@ -2387,7 +2386,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
+		.op_get_currentstateid = nfsd4_get_freestateid,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 #ifdef CONFIG_NFSD_PNFS
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f9552357923e8..01ab21f8d34db 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7135,49 +7135,57 @@ nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate,
  */
 
 void
-nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &odp->od_stateid);
+	get_stateid(cstate, &u->open_downgrade.od_stateid);
 }
 
 void
-nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp)
+nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &drp->dr_stateid);
+	get_stateid(cstate, &u->delegreturn.dr_stateid);
 }
 
 void
-nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp)
+nfsd4_get_freestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &fsp->fr_stateid);
+	get_stateid(cstate, &u->free_stateid.fr_stateid);
 }
 
 void
-nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr)
+nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &setattr->sa_stateid);
+	get_stateid(cstate, &u->setattr.sa_stateid);
 }
 
 void
-nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+nfsd4_get_closestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &close->cl_stateid);
+	get_stateid(cstate, &u->close.cl_stateid);
 }
 
 void
-nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku)
+nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &locku->lu_stateid);
+	get_stateid(cstate, &u->locku.lu_stateid);
 }
 
 void
-nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read)
+nfsd4_get_readstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &read->rd_stateid);
+	get_stateid(cstate, &u->read.rd_stateid);
 }
 
 void
-nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write)
+nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &write->wr_stateid);
+	get_stateid(cstate, &u->write.wr_stateid);
 }
-- 
GitLab


From 62bbf8bbb2fa3aba16e29585b3396264c1a2b2f4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 20:42:10 +0200
Subject: [PATCH 1454/1958] nfsd4: remove nfsd4op_rsize

Except for a lot of unnecessary casts this typedef only has one user,
so remove the casts and expand it in struct nfsd4_operation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4proc.c | 109 ++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 55 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c06ed1cdfca9a..c1e31edf922d3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1523,7 +1523,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 
 typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
 			      void *);
-typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
 
 enum nfsd4_op_flags {
 	ALLOWED_WITHOUT_FH = 1 << 0,	/* No current filehandle required */
@@ -1560,7 +1559,7 @@ struct nfsd4_operation {
 	u32 op_flags;
 	char *op_name;
 	/* Try to get response size before operation */
-	nfsd4op_rsize op_rsize_bop;
+	u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
 	void (*op_get_currentstateid)(struct nfsd4_compound_state *,
 			union nfsd4_op_u *);
 	void (*op_set_currentstateid)(struct nfsd4_compound_state *,
@@ -2095,13 +2094,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
 		.op_func = (nfsd4op_func)nfsd4_access,
 		.op_name = "OP_ACCESS",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
+		.op_rsize_bop = nfsd4_access_rsize,
 	},
 	[OP_CLOSE] = {
 		.op_func = (nfsd4op_func)nfsd4_close,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CLOSE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = nfsd4_get_closestateid,
 		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
@@ -2109,13 +2108,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_commit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_COMMIT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize,
+		.op_rsize_bop = nfsd4_commit_rsize,
 	},
 	[OP_CREATE] = {
 		.op_func = (nfsd4op_func)nfsd4_create,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID,
 		.op_name = "OP_CREATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
+		.op_rsize_bop = nfsd4_create_rsize,
 	},
 	[OP_DELEGRETURN] = {
 		.op_func = (nfsd4op_func)nfsd4_delegreturn,
@@ -2133,69 +2132,69 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_GETFH] = {
 		.op_func = (nfsd4op_func)nfsd4_getfh,
 		.op_name = "OP_GETFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
+		.op_rsize_bop = nfsd4_getfh_rsize,
 	},
 	[OP_LINK] = {
 		.op_func = (nfsd4op_func)nfsd4_link,
 		.op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING
 				| OP_CACHEME,
 		.op_name = "OP_LINK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize,
+		.op_rsize_bop = nfsd4_link_rsize,
 	},
 	[OP_LOCK] = {
 		.op_func = (nfsd4op_func)nfsd4_lock,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
+		.op_rsize_bop = nfsd4_lock_rsize,
 		.op_set_currentstateid = nfsd4_set_lockstateid,
 	},
 	[OP_LOCKT] = {
 		.op_func = (nfsd4op_func)nfsd4_lockt,
 		.op_name = "OP_LOCKT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
+		.op_rsize_bop = nfsd4_lock_rsize,
 	},
 	[OP_LOCKU] = {
 		.op_func = (nfsd4op_func)nfsd4_locku,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCKU",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = nfsd4_get_lockustateid,
 	},
 	[OP_LOOKUP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookup,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUP",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_LOOKUPP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookupp,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUPP",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_NVERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_nverify,
 		.op_name = "OP_NVERIFY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_OPEN] = {
 		.op_func = (nfsd4op_func)nfsd4_open,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
+		.op_rsize_bop = nfsd4_open_rsize,
 		.op_set_currentstateid = nfsd4_set_openstateid,
 	},
 	[OP_OPEN_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_open_confirm,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_CONFIRM",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 	},
 	[OP_OPEN_DOWNGRADE] = {
 		.op_func = (nfsd4op_func)nfsd4_open_downgrade,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_DOWNGRADE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = nfsd4_get_opendowngradestateid,
 		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
@@ -2204,56 +2203,56 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTPUBFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTPUBFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTROOTFH] = {
 		.op_func = (nfsd4op_func)nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTROOTFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_READ] = {
 		.op_func = (nfsd4op_func)nfsd4_read,
 		.op_name = "OP_READ",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
+		.op_rsize_bop = nfsd4_read_rsize,
 		.op_get_currentstateid = nfsd4_get_readstateid,
 	},
 	[OP_READDIR] = {
 		.op_func = (nfsd4op_func)nfsd4_readdir,
 		.op_name = "OP_READDIR",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
+		.op_rsize_bop = nfsd4_readdir_rsize,
 	},
 	[OP_READLINK] = {
 		.op_func = (nfsd4op_func)nfsd4_readlink,
 		.op_name = "OP_READLINK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
+		.op_rsize_bop = nfsd4_readlink_rsize,
 	},
 	[OP_REMOVE] = {
 		.op_func = (nfsd4op_func)nfsd4_remove,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_REMOVE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize,
+		.op_rsize_bop = nfsd4_remove_rsize,
 	},
 	[OP_RENAME] = {
 		.op_func = (nfsd4op_func)nfsd4_rename,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_RENAME",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize,
+		.op_rsize_bop = nfsd4_rename_rsize,
 	},
 	[OP_RENEW] = {
 		.op_func = (nfsd4op_func)nfsd4_renew,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RENEW",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 
 	},
 	[OP_RESTOREFH] = {
@@ -2261,25 +2260,25 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RESTOREFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SAVEFH] = {
 		.op_func = (nfsd4op_func)nfsd4_savefh,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_SAVEFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
+		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_SETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_setattr,
 		.op_name = "OP_SETATTR",
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
+		.op_rsize_bop = nfsd4_setattr_rsize,
 		.op_get_currentstateid = nfsd4_get_setattrstateid,
 	},
 	[OP_SETCLIENTID] = {
@@ -2287,25 +2286,25 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize,
+		.op_rsize_bop = nfsd4_setclientid_rsize,
 	},
 	[OP_SETCLIENTID_CONFIRM] = {
 		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID_CONFIRM",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_VERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_verify,
 		.op_name = "OP_VERIFY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_WRITE] = {
 		.op_func = (nfsd4op_func)nfsd4_write,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_WRITE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+		.op_rsize_bop = nfsd4_write_rsize,
 		.op_get_currentstateid = nfsd4_get_writestateid,
 	},
 	[OP_RELEASE_LOCKOWNER] = {
@@ -2313,7 +2312,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RELEASE_LOCKOWNER",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 
 	/* NFSv4.1 operations */
@@ -2322,97 +2321,97 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_EXCHANGE_ID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
+		.op_rsize_bop = nfsd4_exchange_id_rsize,
 	},
 	[OP_BACKCHANNEL_CTL] = {
 		.op_func = (nfsd4op_func)nfsd4_backchannel_ctl,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BACKCHANNEL_CTL",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_BIND_CONN_TO_SESSION] = {
 		.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BIND_CONN_TO_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize,
+		.op_rsize_bop = nfsd4_bind_conn_to_session_rsize,
 	},
 	[OP_CREATE_SESSION] = {
 		.op_func = (nfsd4op_func)nfsd4_create_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CREATE_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize,
+		.op_rsize_bop = nfsd4_create_session_rsize,
 	},
 	[OP_DESTROY_SESSION] = {
 		.op_func = (nfsd4op_func)nfsd4_destroy_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SEQUENCE] = {
 		.op_func = (nfsd4op_func)nfsd4_sequence,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
 		.op_name = "OP_SEQUENCE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_sequence_rsize,
+		.op_rsize_bop = nfsd4_sequence_rsize,
 	},
 	[OP_DESTROY_CLIENTID] = {
 		.op_func = (nfsd4op_func)nfsd4_destroy_clientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_CLIENTID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_RECLAIM_COMPLETE] = {
 		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RECLAIM_COMPLETE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO_NO_NAME] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO_NO_NAME",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
+		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_TEST_STATEID] = {
 		.op_func = (nfsd4op_func)nfsd4_test_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_TEST_STATEID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
+		.op_rsize_bop = nfsd4_test_stateid_rsize,
 	},
 	[OP_FREE_STATEID] = {
 		.op_func = (nfsd4op_func)nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
 		.op_get_currentstateid = nfsd4_get_freestateid,
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 #ifdef CONFIG_NFSD_PNFS
 	[OP_GETDEVICEINFO] = {
 		.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_GETDEVICEINFO",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
+		.op_rsize_bop = nfsd4_getdeviceinfo_rsize,
 	},
 	[OP_LAYOUTGET] = {
 		.op_func = (nfsd4op_func)nfsd4_layoutget,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTGET",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize,
+		.op_rsize_bop = nfsd4_layoutget_rsize,
 	},
 	[OP_LAYOUTCOMMIT] = {
 		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTCOMMIT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize,
+		.op_rsize_bop = nfsd4_layoutcommit_rsize,
 	},
 	[OP_LAYOUTRETURN] = {
 		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTRETURN",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize,
+		.op_rsize_bop = nfsd4_layoutreturn_rsize,
 	},
 #endif /* CONFIG_NFSD_PNFS */
 
@@ -2421,30 +2420,30 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_allocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_ALLOCATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_DEALLOCATE] = {
 		.op_func = (nfsd4op_func)nfsd4_deallocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_DEALLOCATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_CLONE] = {
 		.op_func = (nfsd4op_func)nfsd4_clone,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_CLONE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_COPY] = {
 		.op_func = (nfsd4op_func)nfsd4_copy,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_COPY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize,
+		.op_rsize_bop = nfsd4_copy_rsize,
 	},
 	[OP_SEEK] = {
 		.op_func = (nfsd4op_func)nfsd4_seek,
 		.op_name = "OP_SEEK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
+		.op_rsize_bop = nfsd4_seek_rsize,
 	},
 };
 
-- 
GitLab


From 72edc37a2c7a30a4bc64889a4eaa8bfd3d308a3a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 20:58:35 +0200
Subject: [PATCH 1455/1958] nfsd4: properly type op_func callbacks

Pass union nfsd4_op_u to the op_func callbacks instead of using unsafe
function pointer casts.

It also adds two missing structures to struct nfsd4_op.u to facilitate
this.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4proc.c  | 234 ++++++++++++++++++++++++--------------------
 fs/nfsd/nfs4state.c |  82 ++++++++++------
 fs/nfsd/xdr4.h      |  66 ++++++-------
 3 files changed, 209 insertions(+), 173 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c1e31edf922d3..403c1d14d99af 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -344,8 +344,9 @@ copy_clientid(clientid_t *clid, struct nfsd4_session *session)
 
 static __be32
 nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_open *open)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_open *open = &u->open;
 	__be32 status;
 	struct svc_fh *resfh = NULL;
 	struct net *net = SVC_NET(rqstp);
@@ -467,14 +468,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
  */
 static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op)
 {
-	struct nfsd4_open *open = (struct nfsd4_open *)&op->u;
+	struct nfsd4_open *open = &op->u.open;
 
 	if (!seqid_mutating_err(ntohl(op->status)))
 		return op->status;
 	if (nfsd4_has_session(cstate))
 		return op->status;
 	open->op_xdr_error = op->status;
-	return nfsd4_open(rqstp, cstate, open);
+	return nfsd4_open(rqstp, cstate, &op->u);
 }
 
 /*
@@ -482,19 +483,21 @@ static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_stat
  */
 static __be32
 nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct svc_fh **getfh)
+	    union nfsd4_op_u *u)
 {
 	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
 
-	*getfh = &cstate->current_fh;
+	u->getfh = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_putfh *putfh)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_putfh *putfh = &u->putfh;
+
 	fh_put(&cstate->current_fh);
 	cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
 	memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
@@ -504,7 +507,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		void *arg)
+		union nfsd4_op_u *u)
 {
 	__be32 status;
 
@@ -515,7 +518,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		void *arg)
+		union nfsd4_op_u *u)
 {
 	if (!cstate->save_fh.fh_dentry)
 		return nfserr_restorefh;
@@ -530,7 +533,7 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     void *arg)
+	     union nfsd4_op_u *u)
 {
 	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
@@ -548,8 +551,10 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
  */
 static __be32
 nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_access *access)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_access *access = &u->access;
+
 	if (access->ac_req_access & ~NFS3_ACCESS_FULL)
 		return nfserr_inval;
 
@@ -574,8 +579,10 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 
 static __be32
 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_commit *commit)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_commit *commit = &u->commit;
+
 	gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
 	return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
 			     commit->co_count);
@@ -583,8 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_create *create)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_create *create = &u->create;
 	struct svc_fh resfh;
 	__be32 status;
 	dev_t rdev;
@@ -670,8 +678,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_getattr *getattr)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_getattr *getattr = &u->getattr;
 	__be32 status;
 
 	status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
@@ -691,8 +700,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_link *link)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_link *link = &u->link;
 	__be32 status = nfserr_nofilehandle;
 
 	if (!cstate->save_fh.fh_dentry)
@@ -723,24 +733,25 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 
 static __be32
 nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      void *arg)
+	      union nfsd4_op_u *u)
 {
 	return nfsd4_do_lookupp(rqstp, &cstate->current_fh);
 }
 
 static __be32
 nfsd4_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_lookup *lookup)
+	     union nfsd4_op_u *u)
 {
 	return nfsd_lookup(rqstp, &cstate->current_fh,
-			   lookup->lo_name, lookup->lo_len,
+			   u->lookup.lo_name, u->lookup.lo_len,
 			   &cstate->current_fh);
 }
 
 static __be32
 nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_read *read)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_read *read = &u->read;
 	__be32 status;
 
 	read->rd_filp = NULL;
@@ -775,8 +786,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_readdir *readdir)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_readdir *readdir = &u->readdir;
 	u64 cookie = readdir->rd_cookie;
 	static const nfs4_verifier zeroverf;
 
@@ -800,17 +812,18 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_readlink(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	       struct nfsd4_readlink *readlink)
+	       union nfsd4_op_u *u)
 {
-	readlink->rl_rqstp = rqstp;
-	readlink->rl_fhp = &cstate->current_fh;
+	u->readlink.rl_rqstp = rqstp;
+	u->readlink.rl_fhp = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_remove *remove)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_remove *remove = &u->remove;
 	__be32 status;
 
 	if (opens_in_grace(SVC_NET(rqstp)))
@@ -826,8 +839,9 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_rename *rename)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_rename *rename = &u->rename;
 	__be32 status = nfserr_nofilehandle;
 
 	if (!cstate->save_fh.fh_dentry)
@@ -847,8 +861,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_secinfo *secinfo)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_secinfo *secinfo = &u->secinfo;
 	struct svc_export *exp;
 	struct dentry *dentry;
 	__be32 err;
@@ -876,11 +891,11 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_secinfo_no_name *sin)
+		union nfsd4_op_u *u)
 {
 	__be32 err;
 
-	switch (sin->sin_style) {
+	switch (u->secinfo_no_name.sin_style) {
 	case NFS4_SECINFO_STYLE4_CURRENT_FH:
 		break;
 	case NFS4_SECINFO_STYLE4_PARENT:
@@ -892,15 +907,16 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
 		return nfserr_inval;
 	}
 
-	sin->sin_exp = exp_get(cstate->current_fh.fh_export);
+	u->secinfo_no_name.sin_exp = exp_get(cstate->current_fh.fh_export);
 	fh_put(&cstate->current_fh);
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_setattr *setattr)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_setattr *setattr = &u->setattr;
 	__be32 status = nfs_ok;
 	int err;
 
@@ -960,8 +976,9 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 
 static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_write *write)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_write *write = &u->write;
 	stateid_t *stateid = &write->wr_stateid;
 	struct file *filp = NULL;
 	__be32 status = nfs_ok;
@@ -1034,8 +1051,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_clone *clone)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_clone *clone = &u->clone;
 	struct file *src, *dst;
 	__be32 status;
 
@@ -1055,8 +1073,9 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_copy *copy)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_copy *copy = &u->copy;
 	struct file *src, *dst;
 	__be32 status;
 	ssize_t bytes;
@@ -1111,23 +1130,24 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_allocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	       struct nfsd4_fallocate *fallocate)
+	       union nfsd4_op_u *u)
 {
-	return nfsd4_fallocate(rqstp, cstate, fallocate, 0);
+	return nfsd4_fallocate(rqstp, cstate, &u->allocate, 0);
 }
 
 static __be32
 nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		 struct nfsd4_fallocate *fallocate)
+		 union nfsd4_op_u *u)
 {
-	return nfsd4_fallocate(rqstp, cstate, fallocate,
+	return nfsd4_fallocate(rqstp, cstate, &u->deallocate,
 			       FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
 }
 
 static __be32
 nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_seek *seek)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_seek *seek = &u->seek;
 	int whence;
 	__be32 status;
 	struct file *file;
@@ -1232,21 +1252,21 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_nverify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_verify *verify)
+	      union nfsd4_op_u *u)
 {
 	__be32 status;
 
-	status = _nfsd4_verify(rqstp, cstate, verify);
+	status = _nfsd4_verify(rqstp, cstate, &u->verify);
 	return status == nfserr_not_same ? nfs_ok : status;
 }
 
 static __be32
 nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_verify *verify)
+	     union nfsd4_op_u *u)
 {
 	__be32 status;
 
-	status = _nfsd4_verify(rqstp, cstate, verify);
+	status = _nfsd4_verify(rqstp, cstate, &u->nverify);
 	return status == nfserr_same ? nfs_ok : status;
 }
 
@@ -1271,9 +1291,9 @@ nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
 
 static __be32
 nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_getdeviceinfo *gdp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_getdeviceinfo *gdp = &u->getdeviceinfo;
 	const struct nfsd4_layout_ops *ops;
 	struct nfsd4_deviceid_map *map;
 	struct svc_export *exp;
@@ -1317,9 +1337,9 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutget(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutget *lgp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutget *lgp = &u->layoutget;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	const struct nfsd4_layout_ops *ops;
 	struct nfs4_layout_stateid *ls;
@@ -1397,9 +1417,9 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutcommit(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutcommit *lcp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
 	const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	const struct nfsd4_layout_ops *ops;
@@ -1461,9 +1481,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutreturn *lrp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	__be32 nfserr;
 
@@ -1521,9 +1541,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 		nfsdstats.nfs4_opcount[opnum]++;
 }
 
-typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
-			      void *);
-
 enum nfsd4_op_flags {
 	ALLOWED_WITHOUT_FH = 1 << 0,	/* No current filehandle required */
 	ALLOWED_ON_ABSENT_FS = 1 << 1,	/* ops processed on absent fs */
@@ -1555,7 +1572,8 @@ enum nfsd4_op_flags {
 };
 
 struct nfsd4_operation {
-	nfsd4op_func op_func;
+	__be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
 	u32 op_flags;
 	char *op_name;
 	/* Try to get response size before operation */
@@ -2092,12 +2110,12 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 
 static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
-		.op_func = (nfsd4op_func)nfsd4_access,
+		.op_func = nfsd4_access,
 		.op_name = "OP_ACCESS",
 		.op_rsize_bop = nfsd4_access_rsize,
 	},
 	[OP_CLOSE] = {
-		.op_func = (nfsd4op_func)nfsd4_close,
+		.op_func = nfsd4_close,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CLOSE",
 		.op_rsize_bop = nfsd4_status_stateid_rsize,
@@ -2105,93 +2123,93 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
 	[OP_COMMIT] = {
-		.op_func = (nfsd4op_func)nfsd4_commit,
+		.op_func = nfsd4_commit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_COMMIT",
 		.op_rsize_bop = nfsd4_commit_rsize,
 	},
 	[OP_CREATE] = {
-		.op_func = (nfsd4op_func)nfsd4_create,
+		.op_func = nfsd4_create,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID,
 		.op_name = "OP_CREATE",
 		.op_rsize_bop = nfsd4_create_rsize,
 	},
 	[OP_DELEGRETURN] = {
-		.op_func = (nfsd4op_func)nfsd4_delegreturn,
+		.op_func = nfsd4_delegreturn,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DELEGRETURN",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 		.op_get_currentstateid = nfsd4_get_delegreturnstateid,
 	},
 	[OP_GETATTR] = {
-		.op_func = (nfsd4op_func)nfsd4_getattr,
+		.op_func = nfsd4_getattr,
 		.op_flags = ALLOWED_ON_ABSENT_FS,
 		.op_rsize_bop = nfsd4_getattr_rsize,
 		.op_name = "OP_GETATTR",
 	},
 	[OP_GETFH] = {
-		.op_func = (nfsd4op_func)nfsd4_getfh,
+		.op_func = nfsd4_getfh,
 		.op_name = "OP_GETFH",
 		.op_rsize_bop = nfsd4_getfh_rsize,
 	},
 	[OP_LINK] = {
-		.op_func = (nfsd4op_func)nfsd4_link,
+		.op_func = nfsd4_link,
 		.op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING
 				| OP_CACHEME,
 		.op_name = "OP_LINK",
 		.op_rsize_bop = nfsd4_link_rsize,
 	},
 	[OP_LOCK] = {
-		.op_func = (nfsd4op_func)nfsd4_lock,
+		.op_func = nfsd4_lock,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCK",
 		.op_rsize_bop = nfsd4_lock_rsize,
 		.op_set_currentstateid = nfsd4_set_lockstateid,
 	},
 	[OP_LOCKT] = {
-		.op_func = (nfsd4op_func)nfsd4_lockt,
+		.op_func = nfsd4_lockt,
 		.op_name = "OP_LOCKT",
 		.op_rsize_bop = nfsd4_lock_rsize,
 	},
 	[OP_LOCKU] = {
-		.op_func = (nfsd4op_func)nfsd4_locku,
+		.op_func = nfsd4_locku,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCKU",
 		.op_rsize_bop = nfsd4_status_stateid_rsize,
 		.op_get_currentstateid = nfsd4_get_lockustateid,
 	},
 	[OP_LOOKUP] = {
-		.op_func = (nfsd4op_func)nfsd4_lookup,
+		.op_func = nfsd4_lookup,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUP",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_LOOKUPP] = {
-		.op_func = (nfsd4op_func)nfsd4_lookupp,
+		.op_func = nfsd4_lookupp,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUPP",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_NVERIFY] = {
-		.op_func = (nfsd4op_func)nfsd4_nverify,
+		.op_func = nfsd4_nverify,
 		.op_name = "OP_NVERIFY",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_OPEN] = {
-		.op_func = (nfsd4op_func)nfsd4_open,
+		.op_func = nfsd4_open,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN",
 		.op_rsize_bop = nfsd4_open_rsize,
 		.op_set_currentstateid = nfsd4_set_openstateid,
 	},
 	[OP_OPEN_CONFIRM] = {
-		.op_func = (nfsd4op_func)nfsd4_open_confirm,
+		.op_func = nfsd4_open_confirm,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_CONFIRM",
 		.op_rsize_bop = nfsd4_status_stateid_rsize,
 	},
 	[OP_OPEN_DOWNGRADE] = {
-		.op_func = (nfsd4op_func)nfsd4_open_downgrade,
+		.op_func = nfsd4_open_downgrade,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_DOWNGRADE",
 		.op_rsize_bop = nfsd4_status_stateid_rsize,
@@ -2199,56 +2217,56 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
 	[OP_PUTFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putfh,
+		.op_func = nfsd4_putfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTPUBFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putrootfh,
+		.op_func = nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTPUBFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTROOTFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putrootfh,
+		.op_func = nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTROOTFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_READ] = {
-		.op_func = (nfsd4op_func)nfsd4_read,
+		.op_func = nfsd4_read,
 		.op_name = "OP_READ",
 		.op_rsize_bop = nfsd4_read_rsize,
 		.op_get_currentstateid = nfsd4_get_readstateid,
 	},
 	[OP_READDIR] = {
-		.op_func = (nfsd4op_func)nfsd4_readdir,
+		.op_func = nfsd4_readdir,
 		.op_name = "OP_READDIR",
 		.op_rsize_bop = nfsd4_readdir_rsize,
 	},
 	[OP_READLINK] = {
-		.op_func = (nfsd4op_func)nfsd4_readlink,
+		.op_func = nfsd4_readlink,
 		.op_name = "OP_READLINK",
 		.op_rsize_bop = nfsd4_readlink_rsize,
 	},
 	[OP_REMOVE] = {
-		.op_func = (nfsd4op_func)nfsd4_remove,
+		.op_func = nfsd4_remove,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_REMOVE",
 		.op_rsize_bop = nfsd4_remove_rsize,
 	},
 	[OP_RENAME] = {
-		.op_func = (nfsd4op_func)nfsd4_rename,
+		.op_func = nfsd4_rename,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_RENAME",
 		.op_rsize_bop = nfsd4_rename_rsize,
 	},
 	[OP_RENEW] = {
-		.op_func = (nfsd4op_func)nfsd4_renew,
+		.op_func = nfsd4_renew,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RENEW",
@@ -2256,59 +2274,59 @@ static struct nfsd4_operation nfsd4_ops[] = {
 
 	},
 	[OP_RESTOREFH] = {
-		.op_func = (nfsd4op_func)nfsd4_restorefh,
+		.op_func = nfsd4_restorefh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RESTOREFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SAVEFH] = {
-		.op_func = (nfsd4op_func)nfsd4_savefh,
+		.op_func = nfsd4_savefh,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_SAVEFH",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO] = {
-		.op_func = (nfsd4op_func)nfsd4_secinfo,
+		.op_func = nfsd4_secinfo,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO",
 		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_SETATTR] = {
-		.op_func = (nfsd4op_func)nfsd4_setattr,
+		.op_func = nfsd4_setattr,
 		.op_name = "OP_SETATTR",
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_rsize_bop = nfsd4_setattr_rsize,
 		.op_get_currentstateid = nfsd4_get_setattrstateid,
 	},
 	[OP_SETCLIENTID] = {
-		.op_func = (nfsd4op_func)nfsd4_setclientid,
+		.op_func = nfsd4_setclientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID",
 		.op_rsize_bop = nfsd4_setclientid_rsize,
 	},
 	[OP_SETCLIENTID_CONFIRM] = {
-		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
+		.op_func = nfsd4_setclientid_confirm,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID_CONFIRM",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_VERIFY] = {
-		.op_func = (nfsd4op_func)nfsd4_verify,
+		.op_func = nfsd4_verify,
 		.op_name = "OP_VERIFY",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_WRITE] = {
-		.op_func = (nfsd4op_func)nfsd4_write,
+		.op_func = nfsd4_write,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_WRITE",
 		.op_rsize_bop = nfsd4_write_rsize,
 		.op_get_currentstateid = nfsd4_get_writestateid,
 	},
 	[OP_RELEASE_LOCKOWNER] = {
-		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
+		.op_func = nfsd4_release_lockowner,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RELEASE_LOCKOWNER",
@@ -2317,72 +2335,72 @@ static struct nfsd4_operation nfsd4_ops[] = {
 
 	/* NFSv4.1 operations */
 	[OP_EXCHANGE_ID] = {
-		.op_func = (nfsd4op_func)nfsd4_exchange_id,
+		.op_func = nfsd4_exchange_id,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_EXCHANGE_ID",
 		.op_rsize_bop = nfsd4_exchange_id_rsize,
 	},
 	[OP_BACKCHANNEL_CTL] = {
-		.op_func = (nfsd4op_func)nfsd4_backchannel_ctl,
+		.op_func = nfsd4_backchannel_ctl,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BACKCHANNEL_CTL",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_BIND_CONN_TO_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
+		.op_func = nfsd4_bind_conn_to_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BIND_CONN_TO_SESSION",
 		.op_rsize_bop = nfsd4_bind_conn_to_session_rsize,
 	},
 	[OP_CREATE_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_create_session,
+		.op_func = nfsd4_create_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CREATE_SESSION",
 		.op_rsize_bop = nfsd4_create_session_rsize,
 	},
 	[OP_DESTROY_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_destroy_session,
+		.op_func = nfsd4_destroy_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_SESSION",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SEQUENCE] = {
-		.op_func = (nfsd4op_func)nfsd4_sequence,
+		.op_func = nfsd4_sequence,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
 		.op_name = "OP_SEQUENCE",
 		.op_rsize_bop = nfsd4_sequence_rsize,
 	},
 	[OP_DESTROY_CLIENTID] = {
-		.op_func = (nfsd4op_func)nfsd4_destroy_clientid,
+		.op_func = nfsd4_destroy_clientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_CLIENTID",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_RECLAIM_COMPLETE] = {
-		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
+		.op_func = nfsd4_reclaim_complete,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RECLAIM_COMPLETE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO_NO_NAME] = {
-		.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
+		.op_func = nfsd4_secinfo_no_name,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO_NO_NAME",
 		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_TEST_STATEID] = {
-		.op_func = (nfsd4op_func)nfsd4_test_stateid,
+		.op_func = nfsd4_test_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_TEST_STATEID",
 		.op_rsize_bop = nfsd4_test_stateid_rsize,
 	},
 	[OP_FREE_STATEID] = {
-		.op_func = (nfsd4op_func)nfsd4_free_stateid,
+		.op_func = nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
 		.op_get_currentstateid = nfsd4_get_freestateid,
@@ -2390,25 +2408,25 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	},
 #ifdef CONFIG_NFSD_PNFS
 	[OP_GETDEVICEINFO] = {
-		.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
+		.op_func = nfsd4_getdeviceinfo,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_GETDEVICEINFO",
 		.op_rsize_bop = nfsd4_getdeviceinfo_rsize,
 	},
 	[OP_LAYOUTGET] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutget,
+		.op_func = nfsd4_layoutget,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTGET",
 		.op_rsize_bop = nfsd4_layoutget_rsize,
 	},
 	[OP_LAYOUTCOMMIT] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
+		.op_func = nfsd4_layoutcommit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTCOMMIT",
 		.op_rsize_bop = nfsd4_layoutcommit_rsize,
 	},
 	[OP_LAYOUTRETURN] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
+		.op_func = nfsd4_layoutreturn,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTRETURN",
 		.op_rsize_bop = nfsd4_layoutreturn_rsize,
@@ -2417,31 +2435,31 @@ static struct nfsd4_operation nfsd4_ops[] = {
 
 	/* NFSv4.2 operations */
 	[OP_ALLOCATE] = {
-		.op_func = (nfsd4op_func)nfsd4_allocate,
+		.op_func = nfsd4_allocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_ALLOCATE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_DEALLOCATE] = {
-		.op_func = (nfsd4op_func)nfsd4_deallocate,
+		.op_func = nfsd4_deallocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_DEALLOCATE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_CLONE] = {
-		.op_func = (nfsd4op_func)nfsd4_clone,
+		.op_func = nfsd4_clone,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_CLONE",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_COPY] = {
-		.op_func = (nfsd4op_func)nfsd4_copy,
+		.op_func = nfsd4_copy,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_COPY",
 		.op_rsize_bop = nfsd4_copy_rsize,
 	},
 	[OP_SEEK] = {
-		.op_func = (nfsd4op_func)nfsd4_seek,
+		.op_func = nfsd4_seek,
 		.op_name = "OP_SEEK",
 		.op_rsize_bop = nfsd4_seek_rsize,
 	},
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 01ab21f8d34db..0c04f81aa63b2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2402,10 +2402,10 @@ static bool client_has_state(struct nfs4_client *clp)
 }
 
 __be32
-nfsd4_exchange_id(struct svc_rqst *rqstp,
-		  struct nfsd4_compound_state *cstate,
-		  struct nfsd4_exchange_id *exid)
+nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_exchange_id *exid = &u->exchange_id;
 	struct nfs4_client *conf, *new;
 	struct nfs4_client *unconf = NULL;
 	__be32 status;
@@ -2698,9 +2698,9 @@ static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs)
 
 __be32
 nfsd4_create_session(struct svc_rqst *rqstp,
-		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_create_session *cr_ses)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_create_session *cr_ses = &u->create_session;
 	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *old = NULL;
@@ -2824,8 +2824,11 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
 	return nfserr_inval;
 }
 
-__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc)
+__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
 	struct nfsd4_session *session = cstate->session;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	__be32 status;
@@ -2845,8 +2848,9 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state
 
 __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_bind_conn_to_session *bcts)
+		     union nfsd4_op_u *u)
 {
+	struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
 	__be32 status;
 	struct nfsd4_conn *conn;
 	struct nfsd4_session *session;
@@ -2886,10 +2890,10 @@ static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4
 }
 
 __be32
-nfsd4_destroy_session(struct svc_rqst *r,
-		      struct nfsd4_compound_state *cstate,
-		      struct nfsd4_destroy_session *sessionid)
+nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_destroy_session *sessionid = &u->destroy_session;
 	struct nfsd4_session *ses;
 	__be32 status;
 	int ref_held_by_me = 0;
@@ -2983,10 +2987,10 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
 }
 
 __be32
-nfsd4_sequence(struct svc_rqst *rqstp,
-	       struct nfsd4_compound_state *cstate,
-	       struct nfsd4_sequence *seq)
+nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_sequence *seq = &u->sequence;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct xdr_stream *xdr = &resp->xdr;
 	struct nfsd4_session *session;
@@ -3120,8 +3124,11 @@ nfsd4_sequence_done(struct nfsd4_compoundres *resp)
 }
 
 __be32
-nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
+nfsd4_destroy_clientid(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *clp = NULL;
 	__be32 status = 0;
@@ -3161,8 +3168,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 }
 
 __be32
-nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
+nfsd4_reclaim_complete(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
 	__be32 status = 0;
 
 	if (rc->rca_one_fs) {
@@ -3199,8 +3208,9 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 
 __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		  struct nfsd4_setclientid *setclid)
+		  union nfsd4_op_u *u)
 {
+	struct nfsd4_setclientid *setclid = &u->setclientid;
 	struct xdr_netobj 	clname = setclid->se_name;
 	nfs4_verifier		clverifier = setclid->se_verf;
 	struct nfs4_client	*conf, *new;
@@ -3257,9 +3267,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-			 struct nfsd4_compound_state *cstate,
-			 struct nfsd4_setclientid_confirm *setclientid_confirm)
+			struct nfsd4_compound_state *cstate,
+			union nfsd4_op_u *u)
 {
+	struct nfsd4_setclientid_confirm *setclientid_confirm =
+			&u->setclientid_confirm;
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *old = NULL;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
@@ -4506,8 +4518,9 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    clientid_t *clid)
+	    union nfsd4_op_u *u)
 {
+	clientid_t *clid = &u->renew;
 	struct nfs4_client *clp;
 	__be32 status;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -4993,8 +5006,9 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
  */
 __be32
 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_test_stateid *test_stateid)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
 	struct nfsd4_test_stateid_id *stateid;
 	struct nfs4_client *cl = cstate->session->se_client;
 
@@ -5033,8 +5047,9 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
 
 __be32
 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_free_stateid *free_stateid)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
 	stateid_t *stateid = &free_stateid->fr_stateid;
 	struct nfs4_stid *s;
 	struct nfs4_delegation *dp;
@@ -5162,8 +5177,9 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
 
 __be32
 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_open_confirm *oc)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_open_confirm *oc = &u->open_confirm;
 	__be32 status;
 	struct nfs4_openowner *oo;
 	struct nfs4_ol_stateid *stp;
@@ -5230,9 +5246,9 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
 
 __be32
 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_open_downgrade *od)
+		     struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_open_downgrade *od = &u->open_downgrade;
 	__be32 status;
 	struct nfs4_ol_stateid *stp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -5300,8 +5316,9 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
  */
 __be32
 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_close *close)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_close *close = &u->close;
 	__be32 status;
 	struct nfs4_ol_stateid *stp;
 	struct net *net = SVC_NET(rqstp);
@@ -5330,8 +5347,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		  struct nfsd4_delegreturn *dr)
+		  union nfsd4_op_u *u)
 {
+	struct nfsd4_delegreturn *dr = &u->delegreturn;
 	struct nfs4_delegation *dp;
 	stateid_t *stateid = &dr->dr_stateid;
 	struct nfs4_stid *s;
@@ -5706,8 +5724,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
  */
 __be32
 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_lock *lock)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_lock *lock = &u->lock;
 	struct nfs4_openowner *open_sop = NULL;
 	struct nfs4_lockowner *lock_sop = NULL;
 	struct nfs4_ol_stateid *lock_stp = NULL;
@@ -5939,8 +5958,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
  */
 __be32
 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_lockt *lockt)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_lockt *lockt = &u->lockt;
 	struct file_lock *file_lock = NULL;
 	struct nfs4_lockowner *lo = NULL;
 	__be32 status;
@@ -6012,8 +6032,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_locku *locku)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_locku *locku = &u->locku;
 	struct nfs4_ol_stateid *stp;
 	struct file *filp = NULL;
 	struct file_lock *file_lock = NULL;
@@ -6119,8 +6140,9 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 			struct nfsd4_compound_state *cstate,
-			struct nfsd4_release_lockowner *rlockowner)
+			union nfsd4_op_u *u)
 {
+	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
 	clientid_t *clid = &rlockowner->rl_clientid;
 	struct nfs4_stateowner *sop;
 	struct nfs4_lockowner *lo = NULL;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index b625f4aa1061e..72c6ad136107c 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -577,6 +577,7 @@ struct nfsd4_op {
 		struct nfsd4_bind_conn_to_session bind_conn_to_session;
 		struct nfsd4_create_session	create_session;
 		struct nfsd4_destroy_session	destroy_session;
+		struct nfsd4_destroy_clientid	destroy_clientid;
 		struct nfsd4_sequence		sequence;
 		struct nfsd4_reclaim_complete	reclaim_complete;
 		struct nfsd4_test_stateid	test_stateid;
@@ -585,6 +586,7 @@ struct nfsd4_op {
 		struct nfsd4_layoutget		layoutget;
 		struct nfsd4_layoutcommit	layoutcommit;
 		struct nfsd4_layoutreturn	layoutreturn;
+		struct nfsd4_secinfo_no_name	secinfo_no_name;
 
 		/* NFSv4.2 */
 		struct nfsd4_fallocate		allocate;
@@ -693,27 +695,26 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
 		struct dentry *dentry,
 		u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
 extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid *setclid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid_confirm *setclientid_confirm);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
-extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
-extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *,
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *,
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_create_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_create_session *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_sequence(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_sequence *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp);
 extern __be32 nfsd4_destroy_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_destroy_session *);
-extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *);
-__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
+__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
 		struct nfsd4_open *open, struct nfsd_net *nn);
 extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
@@ -722,34 +723,29 @@ extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
 extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
 		struct nfsd4_open *open);
 extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
-extern __be32 nfsd4_close(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_close *close);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_open_downgrade *od);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
-		struct nfsd4_lock *lock);
-extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_lockt *lockt);
-extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_locku *locku);
+		union nfsd4_op_u *u);
+extern __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
+extern __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_release_lockowner *rlockowner);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern void nfsd4_release_compoundargs(struct svc_rqst *rqstp);
 extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
-extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
-			  struct nfsd4_compound_state *, clientid_t *clid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *);
 extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *);
 extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
 
 #endif
-- 
GitLab


From 0becc1181cdba562730be4d4b8a5fcb4368ef527 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:40:27 +0200
Subject: [PATCH 1456/1958] sunrpc: move pc_count out of struct svc_procinfo

pc_count is the only writeable memeber of struct svc_procinfo, which is
a good candidate to be const-ified as it contains function pointers.

This patch moves it into out out struct svc_procinfo, and into a
separate writable array that is pointed to by struct svc_version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc.c             |  6 ++++++
 fs/nfs/callback_xdr.c      |  4 ++++
 fs/nfsd/nfs2acl.c          |  2 ++
 fs/nfsd/nfs3acl.c          |  2 ++
 fs/nfsd/nfs3proc.c         |  2 ++
 fs/nfsd/nfs4proc.c         |  2 ++
 fs/nfsd/nfsproc.c          |  2 ++
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/stats.c         | 11 ++++++-----
 net/sunrpc/svc.c           |  2 +-
 10 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5d481e8a1b5d0..cc6abe6280bce 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -739,23 +739,29 @@ module_exit(exit_nlm);
 /*
  * Define NLM program and procedures
  */
+static unsigned int nlmsvc_version1_count[17];
 static struct svc_version	nlmsvc_version1 = {
 		.vs_vers	= 1,
 		.vs_nproc	= 17,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_count	= nlmsvc_version1_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
+static unsigned int nlmsvc_version3_count[24];
 static struct svc_version	nlmsvc_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_count	= nlmsvc_version3_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
+static unsigned int nlmsvc_version4_count[24];
 static struct svc_version	nlmsvc_version4 = {
 		.vs_vers	= 4,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures4,
+		.vs_count	= nlmsvc_version4_count,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index acf75dc63e14e..ecd46b8c09851 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1011,20 +1011,24 @@ static struct svc_procedure nfs4_callback_procedures1[] = {
 	}
 };
 
+static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
 struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
 
+static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
 struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count4,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fc6b179c8fff7..026edfe73fd5b 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -378,10 +378,12 @@ static struct svc_procedure		nfsd_acl_procedures2[] = {
   PROC(access,	access,		access,		access,   RC_NOCACHE, ST+AT+1),
 };
 
+static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
 struct svc_version	nfsd_acl_version2 = {
 		.vs_vers	= 2,
 		.vs_nproc	= 5,
 		.vs_proc	= nfsd_acl_procedures2,
+		.vs_count	= nfsd_acl_count2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9437b758cbfdb..73c0970ccefb4 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -263,10 +263,12 @@ static struct svc_procedure		nfsd_acl_procedures3[] = {
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
 };
 
+static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
 struct svc_version	nfsd_acl_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 3,
 		.vs_proc	= nfsd_acl_procedures3,
+		.vs_count	= nfsd_acl_count3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 17c90c41a3a68..b5823802e2785 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -890,10 +890,12 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 };
 
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
 struct svc_version	nfsd_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 22,
 		.vs_proc	= nfsd_procedures3,
+		.vs_count	= nfsd_count3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 403c1d14d99af..4de6a99507226 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2553,10 +2553,12 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 };
 
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
 struct svc_version	nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
+	.vs_count		= nfsd_count3,
 	.vs_dispatch		= nfsd_dispatch,
 	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
 	.vs_rpcb_optnl		= true,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0ef88d0e67d9d..44b1575537337 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -743,10 +743,12 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 };
 
 
+static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
 struct svc_version	nfsd_version2 = {
 		.vs_vers	= 2,
 		.vs_nproc	= 18,
 		.vs_proc	= nfsd_procedures2,
+		.vs_count	= nfsd_count2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index bd9e313c444af..bcd114f038ef1 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -398,6 +398,7 @@ struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
 	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
 	/* Don't register with rpcbind */
@@ -430,7 +431,6 @@ struct svc_procedure {
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
-	unsigned int		pc_count;	/* call count */
 	unsigned int		pc_cachetype;	/* cache info (NFS) */
 	unsigned int		pc_xdrressize;	/* maximum size of XDR reply */
 };
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 8b6c35ae1d571..1e671333c3d5b 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -77,9 +77,9 @@ static const struct file_operations rpc_proc_fops = {
 /*
  * Get RPC server stats
  */
-void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
+void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
+{
 	const struct svc_program *prog = statp->program;
-	const struct svc_procedure *proc;
 	const struct svc_version *vers;
 	unsigned int i, j;
 
@@ -98,11 +98,12 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
 			statp->rpcbadclnt);
 
 	for (i = 0; i < prog->pg_nvers; i++) {
-		if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
+		vers = prog->pg_vers[i];
+		if (!vers)
 			continue;
 		seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
-		for (j = 0; j < vers->vs_nproc; j++, proc++)
-			seq_printf(seq, " %u", proc->pc_count);
+		for (j = 0; j < vers->vs_nproc; j++)
+			seq_printf(seq, " %u", vers->vs_count[j]);
 		seq_putc(seq, '\n');
 	}
 }
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aa643a29fdc6f..6452592194ac0 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1261,7 +1261,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	svc_putnl(resv, RPC_SUCCESS);
 
 	/* Bump per-procedure stats counter */
-	procp->pc_count++;
+	versp->vs_count[proc]++;
 
 	/* Initialize storage for argp and resp */
 	memset(rqstp->rq_argp, 0, procp->pc_argsize);
-- 
GitLab


From b9c744c19c441f306239ac3e60a2a95b40d698f8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 16:11:49 +0200
Subject: [PATCH 1457/1958] sunrpc: mark all struct svc_procinfo instances as
 const

struct svc_procinfo contains function pointers, and marking it as
constant avoids it being able to be used as an attach vector for
code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/lockd/svc4proc.c         | 2 +-
 fs/lockd/svcproc.c          | 2 +-
 fs/nfs/callback_xdr.c       | 2 +-
 fs/nfsd/nfs2acl.c           | 2 +-
 fs/nfsd/nfs3acl.c           | 2 +-
 fs/nfsd/nfs3proc.c          | 2 +-
 fs/nfsd/nfs4proc.c          | 2 +-
 fs/nfsd/nfsproc.c           | 2 +-
 fs/nfsd/nfssvc.c            | 4 ++--
 include/linux/lockd/lockd.h | 4 ++--
 include/linux/sunrpc/svc.h  | 4 ++--
 net/sunrpc/svc.c            | 2 +-
 12 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index fed0161557917..82925f17ec45f 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -507,7 +507,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)				/* netobj */
 #define	St	1					/* status */
 #define	Rg	4					/* range (offset + length) */
-struct svc_procedure		nlmsvc_procedures4[] = {
+const struct svc_procedure nlmsvc_procedures4[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 14648b051eba0..07915162581d6 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -551,7 +551,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)			/* Net Obj */
 #define	Rg	2				/* range - offset + size */
 
-struct svc_procedure		nlmsvc_procedures[] = {
+const struct svc_procedure nlmsvc_procedures[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ecd46b8c09851..ae249f27297f9 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -995,7 +995,7 @@ static struct callback_op callback_ops[] = {
 /*
  * Define NFS4 callback procedures
  */
-static struct svc_procedure nfs4_callback_procedures1[] = {
+static const struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
 		.pc_decode = nfs4_decode_void,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 026edfe73fd5b..c3f6b8a6b659c 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -370,7 +370,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures2[] = {
+static const struct svc_procedure nfsd_acl_procedures2[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		attrstat,	attrstat, RC_NOCACHE, ST+AT),
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 73c0970ccefb4..1a482ac9d4e95 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -257,7 +257,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures3[] = {
+static const struct svc_procedure nfsd_acl_procedures3[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b5823802e2785..96e0e6a2af510 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -674,7 +674,7 @@ struct nfsd3_voidargs { int dummy; };
 #define pAT (1+AT)	/* post attributes - conditional */
 #define WC (7+pAT)	/* WCC attributes */
 
-static struct svc_procedure		nfsd_procedures3[22] = {
+static const struct svc_procedure nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
 		.pc_encode = nfs3svc_encode_voidres,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4de6a99507226..68aa175b1cb32 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2532,7 +2532,7 @@ static const char *nfsd4_op_name(unsigned opnum)
 #define nfsd4_voidres			nfsd4_voidargs
 struct nfsd4_voidargs { int dummy; };
 
-static struct svc_procedure		nfsd_procedures4[2] = {
+static const struct svc_procedure nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
 		.pc_encode = nfs4svc_encode_voidres,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 44b1575537337..a68b686fda121 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -573,7 +573,7 @@ struct nfsd_void { int dummy; };
 #define FH 8		/* filehandle */
 #define	AT 18		/* attributes */
 
-static struct svc_procedure		nfsd_procedures2[18] = {
+static const struct svc_procedure nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
 		.pc_decode = nfssvc_decode_void,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 5552336641246..379b310c445d3 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -756,7 +756,7 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
  * problem, we enforce these assumptions here:
  */
 static bool nfs_request_too_big(struct svc_rqst *rqstp,
-				struct svc_procedure *proc)
+				const struct svc_procedure *proc)
 {
 	/*
 	 * The ACL code has more careful bounds-checking and is not
@@ -781,7 +781,7 @@ static bool nfs_request_too_big(struct svc_rqst *rqstp,
 int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
-	struct svc_procedure	*proc;
+	const struct svc_procedure *proc;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 41f7b6a04d691..3eca677283666 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -192,9 +192,9 @@ struct nlm_block {
  * Global variables
  */
 extern const struct rpc_program	nlm_program;
-extern struct svc_procedure	nlmsvc_procedures[];
+extern const struct svc_procedure nlmsvc_procedures[];
 #ifdef CONFIG_LOCKD_V4
-extern struct svc_procedure	nlmsvc_procedures4[];
+extern const struct svc_procedure nlmsvc_procedures4[];
 #endif
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index bcd114f038ef1..992ea34197958 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -237,7 +237,7 @@ struct svc_rqst {
 
 	struct svc_serv *	rq_server;	/* RPC service definition */
 	struct svc_pool *	rq_pool;	/* thread pool */
-	struct svc_procedure *	rq_procinfo;	/* procedure info */
+	const struct svc_procedure *rq_procinfo;/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
@@ -397,7 +397,7 @@ struct svc_program {
 struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
-	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	const struct svc_procedure *vs_proc;	/* per-procedure info */
 	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6452592194ac0..049963d676a79 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1152,7 +1152,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
 	struct svc_version	*versp = NULL;	/* compiler food */
-	struct svc_procedure	*procp = NULL;
+	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	__be32			*statp;
 	u32			prog, vers, proc;
-- 
GitLab


From aa8217d5dcb1db594d816794ef6ab434ebf3e127 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 May 2017 16:21:37 +0200
Subject: [PATCH 1458/1958] sunrpc: mark all struct svc_version instances as
 const

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svc.c             | 38 +++++++++++++++++++-------------------
 fs/nfs/callback.c          |  2 +-
 fs/nfs/callback_xdr.c      |  4 ++--
 fs/nfs/internal.h          |  4 ++--
 fs/nfs/nfs4_fs.h           |  4 ++--
 fs/nfsd/nfs2acl.c          | 14 +++++++-------
 fs/nfsd/nfs3acl.c          | 14 +++++++-------
 fs/nfsd/nfs3proc.c         | 14 +++++++-------
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfsd.h             |  6 +++---
 fs/nfsd/nfsproc.c          | 14 +++++++-------
 fs/nfsd/nfssvc.c           |  8 ++++----
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/svc.c           |  4 ++--
 14 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index cc6abe6280bce..726b6cecf430d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -740,32 +740,32 @@ module_exit(exit_nlm);
  * Define NLM program and procedures
  */
 static unsigned int nlmsvc_version1_count[17];
-static struct svc_version	nlmsvc_version1 = {
-		.vs_vers	= 1,
-		.vs_nproc	= 17,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_count	= nlmsvc_version1_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version1 = {
+	.vs_vers	= 1,
+	.vs_nproc	= 17,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version1_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 static unsigned int nlmsvc_version3_count[24];
-static struct svc_version	nlmsvc_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_count	= nlmsvc_version3_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version3_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
 static unsigned int nlmsvc_version4_count[24];
-static struct svc_version	nlmsvc_version4 = {
-		.vs_vers	= 4,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures4,
-		.vs_count	= nlmsvc_version4_count,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static const struct svc_version	nlmsvc_version4 = {
+	.vs_vers	= 4,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures4,
+	.vs_count	= nlmsvc_version4_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
-static struct svc_version *	nlmsvc_version[] = {
+static const struct svc_version *nlmsvc_version[] = {
 	[1] = &nlmsvc_version1,
 	[3] = &nlmsvc_version3,
 #ifdef CONFIG_LOCKD_V4
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 73a1f928226c0..34323877ec132 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -439,7 +439,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 /*
  * Define NFS4 callback program
  */
-static struct svc_version *nfs4_callback_version[] = {
+static const struct svc_version *nfs4_callback_version[] = {
 	[1] = &nfs4_callback_version1,
 	[4] = &nfs4_callback_version4,
 };
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ae249f27297f9..01a430e51daad 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1012,7 +1012,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
 };
 
 static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
-struct svc_version nfs4_callback_version1 = {
+const struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
@@ -1024,7 +1024,7 @@ struct svc_version nfs4_callback_version1 = {
 };
 
 static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
-struct svc_version nfs4_callback_version4 = {
+const struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dc2a29a7d48b0..1c0ce9c15e944 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -225,8 +225,8 @@ static inline void nfs_fs_proc_exit(void)
 #endif
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 struct nfs_pageio_descriptor;
 /* pagelist.c */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9b0cf3872722b..40bd05f05e743 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -498,8 +498,8 @@ extern const struct rpc_procinfo nfs4_procedures[];
 struct nfs4_mount_data;
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
 {
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index c3f6b8a6b659c..6276ec8608b06 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -379,11 +379,11 @@ static const struct svc_procedure nfsd_acl_procedures2[] = {
 };
 
 static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
-struct svc_version	nfsd_acl_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 5,
-		.vs_proc	= nfsd_acl_procedures2,
-		.vs_count	= nfsd_acl_count2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_acl_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 5,
+	.vs_proc	= nfsd_acl_procedures2,
+	.vs_count	= nfsd_acl_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 1a482ac9d4e95..01976529f0424 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -264,12 +264,12 @@ static const struct svc_procedure nfsd_acl_procedures3[] = {
 };
 
 static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
-struct svc_version	nfsd_acl_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 3,
-		.vs_proc	= nfsd_acl_procedures3,
-		.vs_count	= nfsd_acl_count3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_acl_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 3,
+	.vs_proc	= nfsd_acl_procedures3,
+	.vs_count	= nfsd_acl_count3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
 
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 96e0e6a2af510..2cb56a0d6625f 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -891,11 +891,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 };
 
 static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
-struct svc_version	nfsd_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 22,
-		.vs_proc	= nfsd_procedures3,
-		.vs_count	= nfsd_count3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+const struct svc_version nfsd_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 22,
+	.vs_proc	= nfsd_procedures3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_count	= nfsd_count3,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 68aa175b1cb32..3cbd065c639f8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2554,7 +2554,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
 };
 
 static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
-struct svc_version	nfsd_version4 = {
+const struct svc_version nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d96606801d47a..b9c538ab7a59b 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -60,7 +60,7 @@ struct readdir_cd {
 
 
 extern struct svc_program	nfsd_program;
-extern struct svc_version	nfsd_version2, nfsd_version3,
+extern const struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
 extern struct mutex		nfsd_mutex;
 extern spinlock_t		nfsd_drc_lock;
@@ -86,12 +86,12 @@ void		nfsd_destroy(struct net *net);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
-extern struct svc_version nfsd_acl_version2;
+extern const struct svc_version nfsd_acl_version2;
 #else
 #define nfsd_acl_version2 NULL
 #endif
 #ifdef CONFIG_NFSD_V3_ACL
-extern struct svc_version nfsd_acl_version3;
+extern const struct svc_version nfsd_acl_version3;
 #else
 #define nfsd_acl_version3 NULL
 #endif
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a68b686fda121..5076ae2b82585 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -744,13 +744,13 @@ static const struct svc_procedure nfsd_procedures2[18] = {
 
 
 static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
-struct svc_version	nfsd_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 18,
-		.vs_proc	= nfsd_procedures2,
-		.vs_count	= nfsd_count2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
+const struct svc_version nfsd_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 18,
+	.vs_proc	= nfsd_procedures2,
+	.vs_count	= nfsd_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 379b310c445d3..063ae7de2c122 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -68,14 +68,14 @@ unsigned long	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
-static struct svc_version *	nfsd_acl_version[] = {
+static const struct svc_version *nfsd_acl_version[] = {
 	[2] = &nfsd_acl_version2,
 	[3] = &nfsd_acl_version3,
 };
 
 #define NFSD_ACL_MINVERS            2
 #define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
-static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
+static const struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
 
 static struct svc_program	nfsd_acl_program = {
 	.pg_prog		= NFS_ACL_PROGRAM,
@@ -92,7 +92,7 @@ static struct svc_stat	nfsd_acl_svcstats = {
 };
 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
 
-static struct svc_version *	nfsd_version[] = {
+static const struct svc_version *nfsd_version[] = {
 	[2] = &nfsd_version2,
 #if defined(CONFIG_NFSD_V3)
 	[3] = &nfsd_version3,
@@ -104,7 +104,7 @@ static struct svc_version *	nfsd_version[] = {
 
 #define NFSD_MINVERS    	2
 #define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
-static struct svc_version *nfsd_versions[NFSD_NRVERS];
+static const struct svc_version *nfsd_versions[NFSD_NRVERS];
 
 struct svc_program		nfsd_program = {
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 992ea34197958..eec04982a7ea3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -384,7 +384,7 @@ struct svc_program {
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* highest version */
 	unsigned int		pg_nvers;	/* number of versions */
-	struct svc_version **	pg_vers;	/* version array */
+	const struct svc_version **pg_vers;	/* version array */
 	char *			pg_name;	/* service name */
 	char *			pg_class;	/* class name: services sharing authentication */
 	struct svc_stat *	pg_stats;	/* rpc statistics */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 049963d676a79..45b4f2d2e3bdf 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1008,7 +1008,7 @@ int svc_register(const struct svc_serv *serv, struct net *net,
 		 const unsigned short port)
 {
 	struct svc_program	*progp;
-	struct svc_version	*vers;
+	const struct svc_version *vers;
 	unsigned int		i;
 	int			error = 0;
 
@@ -1151,7 +1151,7 @@ static int
 svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
-	struct svc_version	*versp = NULL;	/* compiler food */
+	const struct svc_version *versp = NULL;	/* compiler food */
 	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	__be32			*statp;
-- 
GitLab


From 800222f80fedca63baf05d28028a0d18e9b8d7a5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 May 2017 23:46:47 +0200
Subject: [PATCH 1459/1958] nfsd4: const-ify nfsd4_ops

nfsd4_ops contains function pointers, and marking it as constant avoids
it being able to be used as an attach vector for code injections.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4proc.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3cbd065c639f8..d27e75ad25e34 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1584,7 +1584,7 @@ struct nfsd4_operation {
 			union nfsd4_op_u *);
 };
 
-static struct nfsd4_operation nfsd4_ops[];
+static const struct nfsd4_operation nfsd4_ops[];
 
 static const char *nfsd4_op_name(unsigned opnum);
 
@@ -1621,7 +1621,7 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
 	return nfs_ok;
 }
 
-static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
+static inline const struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
 {
 	return &nfsd4_ops[op->opnum];
 }
@@ -1639,10 +1639,9 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
 	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
 	struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
 	struct nfsd4_op *next = &argp->ops[resp->opcnt];
-	struct nfsd4_operation *thisd;
-	struct nfsd4_operation *nextd;
+	const struct nfsd4_operation *thisd = OPDESC(this);
+	const struct nfsd4_operation *nextd;
 
-	thisd = OPDESC(this);
 	/*
 	 * Most ops check wronsec on our own; only the putfh-like ops
 	 * have special rules.
@@ -1695,7 +1694,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfsd4_op	*op;
-	struct nfsd4_operation *opdesc;
+	const struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	struct svc_fh *save_fh = &cstate->save_fh;
@@ -2108,7 +2107,7 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 	return (op_encode_hdr_size + 3) * sizeof(__be32);
 }
 
-static struct nfsd4_operation nfsd4_ops[] = {
+static const struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
 		.op_func = nfsd4_access,
 		.op_name = "OP_ACCESS",
-- 
GitLab


From 18fe6a23e3114a6737471570753b708ff293b9de Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 16 Jun 2017 12:06:59 -0400
Subject: [PATCH 1460/1958] NFS: Set FATTR4_WORD0_TYPE for . and .. entries

The current code worked okay for getdents(), but getdents64() expects
the d_type field to get filled out properly in the stat structure.
Setting this field fixes xfstests generic/401.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c08c46a3b8cde..735b3068a2d1b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -275,6 +275,7 @@ const u32 nfs4_fs_locations_bitmap[3] = {
 static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
 		struct nfs4_readdir_arg *readdir)
 {
+	unsigned int attrs = FATTR4_WORD0_FILEID | FATTR4_WORD0_TYPE;
 	__be32 *start, *p;
 
 	if (cookie > 2) {
@@ -305,8 +306,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 		memcpy(p, ".\0\0\0", 4);                        /* entry */
 		p++;
 		*p++ = xdr_one;                         /* bitmap length */
-		*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
-		*p++ = htonl(8);              /* attribute buffer length */
+		*p++ = htonl(attrs);                           /* bitmap */
+		*p++ = htonl(12);             /* attribute buffer length */
+		*p++ = htonl(NF4DIR);
 		p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry)));
 	}
 	
@@ -317,8 +319,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 	memcpy(p, "..\0\0", 4);                         /* entry */
 	p++;
 	*p++ = xdr_one;                         /* bitmap length */
-	*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
-	*p++ = htonl(8);              /* attribute buffer length */
+	*p++ = htonl(attrs);                           /* bitmap */
+	*p++ = htonl(12);             /* attribute buffer length */
+	*p++ = htonl(NF4DIR);
 	p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
 
 	readdir->pgbase = (char *)p - (char *)start;
-- 
GitLab


From a7a3b1e971cd806b81ecea3a234d8dae9de0add0 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 20 Jun 2017 08:33:44 -0400
Subject: [PATCH 1461/1958] NFS: convert flags to bool

NFS uses some int, and unsigned int :1, and bool as flags in structs and
args.  Assert the preference for uniformly replacing these with the bool
type.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c            | 16 ++++++++--------
 fs/nfs/internal.h       |  6 +++---
 fs/nfs/nfs2xdr.c        |  2 +-
 fs/nfs/nfs3proc.c       |  2 +-
 fs/nfs/nfs3xdr.c        |  2 +-
 fs/nfs/nfs4proc.c       | 42 ++++++++++++++++++++---------------------
 fs/nfs/nfs4xdr.c        |  2 +-
 fs/nfs/proc.c           |  2 +-
 include/linux/nfs_xdr.h | 10 +++++-----
 9 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32ccd7754f8a2..9688e9bb13dc2 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -151,7 +151,7 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[0];
 };
 
-typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
+typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
@@ -165,8 +165,8 @@ typedef struct {
 	unsigned long	timestamp;
 	unsigned long	gencount;
 	unsigned int	cache_entry_index;
-	unsigned int	plus:1;
-	unsigned int	eof:1;
+	bool plus;
+	bool eof;
 } nfs_readdir_descriptor_t;
 
 /*
@@ -355,7 +355,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
-			desc->plus = 0;
+			desc->plus = false;
 			goto again;
 		}
 		goto error;
@@ -557,7 +557,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 
 		count++;
 
-		if (desc->plus != 0)
+		if (desc->plus)
 			nfs_prime_dcache(file_dentry(desc->file), entry);
 
 		status = nfs_readdir_add_to_array(entry, page);
@@ -860,7 +860,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->ctx = ctx;
 	desc->dir_cookie = &dir_ctx->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
-	desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
+	desc->plus = nfs_use_readdirplus(inode, ctx);
 
 	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
 		res = nfs_revalidate_mapping(inode, file->f_mapping);
@@ -885,8 +885,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			nfs_zap_caches(inode);
 			desc->page_index = 0;
-			desc->plus = 0;
-			desc->eof = 0;
+			desc->plus = false;
+			desc->eof = false;
 			continue;
 		}
 		if (res < 0)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 1c0ce9c15e944..c5054edb01571 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -272,17 +272,17 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 /* nfs2xdr.c */
 extern const struct rpc_procinfo nfs_procedures[];
 extern int nfs2_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 
 /* nfs3xdr.c */
 extern const struct rpc_procinfo nfs3_procedures[];
 extern int nfs3_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 
 /* nfs4xdr.c */
 #if IS_ENABLED(CONFIG_NFS_V4)
 extern int nfs4_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 #endif
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c8a7e98c1371a..fe68dabfbde61 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -939,7 +939,7 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
  *	};
  */
 int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	__be32 *p;
 	int error;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0c07b567118dc..df4a7d3ab9157 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -621,7 +621,7 @@ nfs3_proc_rmdir(struct inode *dir, const struct qstr *name)
  */
 static int
 nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		  u64 cookie, struct page **pages, unsigned int count, int plus)
+		  u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	__be32			*verf = NFS_I(dir)->cookieverf;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 670eddb3ae369..e82c9e5532249 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1991,7 +1991,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
  *	};
  */
 int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	struct nfs_entry old = *entry;
 	__be32 *p;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 735b3068a2d1b..e678fa8f69793 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1037,11 +1037,11 @@ struct nfs4_opendata {
 	struct nfs4_state *state;
 	struct iattr attrs;
 	unsigned long timestamp;
-	unsigned int rpc_done : 1;
-	unsigned int file_created : 1;
-	unsigned int is_recover : 1;
+	bool rpc_done;
+	bool file_created;
+	bool is_recover;
+	bool cancelled;
 	int rpc_status;
-	int cancelled;
 };
 
 static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
@@ -1965,7 +1965,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 		nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
 		renew_lease(data->o_res.server, data->timestamp);
-		data->rpc_done = 1;
+		data->rpc_done = true;
 	}
 }
 
@@ -1975,7 +1975,7 @@ static void nfs4_open_confirm_release(void *calldata)
 	struct nfs4_state *state = NULL;
 
 	/* If this request hasn't been cancelled, do nothing */
-	if (data->cancelled == 0)
+	if (!data->cancelled)
 		goto out_free;
 	/* In case of error, no cleanup! */
 	if (!data->rpc_done)
@@ -2018,7 +2018,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 
 	nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
 	kref_get(&data->kref);
-	data->rpc_done = 0;
+	data->rpc_done = false;
 	data->rpc_status = 0;
 	data->timestamp = jiffies;
 	if (data->is_recover)
@@ -2028,7 +2028,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 		return PTR_ERR(task);
 	status = rpc_wait_for_completion_task(task);
 	if (status != 0) {
-		data->cancelled = 1;
+		data->cancelled = true;
 		smp_wmb();
 	} else
 		status = data->rpc_status;
@@ -2127,7 +2127,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 		if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
 			nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	}
-	data->rpc_done = 1;
+	data->rpc_done = true;
 }
 
 static void nfs4_open_release(void *calldata)
@@ -2136,7 +2136,7 @@ static void nfs4_open_release(void *calldata)
 	struct nfs4_state *state = NULL;
 
 	/* If this request hasn't been cancelled, do nothing */
-	if (data->cancelled == 0)
+	if (!data->cancelled)
 		goto out_free;
 	/* In case of error, no cleanup! */
 	if (data->rpc_status != 0 || !data->rpc_done)
@@ -2182,20 +2182,20 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 
 	nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
 	kref_get(&data->kref);
-	data->rpc_done = 0;
+	data->rpc_done = false;
 	data->rpc_status = 0;
-	data->cancelled = 0;
-	data->is_recover = 0;
+	data->cancelled = false;
+	data->is_recover = false;
 	if (isrecover) {
 		nfs4_set_sequence_privileged(&o_arg->seq_args);
-		data->is_recover = 1;
+		data->is_recover = true;
 	}
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = rpc_wait_for_completion_task(task);
 	if (status != 0) {
-		data->cancelled = 1;
+		data->cancelled = true;
 		smp_wmb();
 	} else
 		status = data->rpc_status;
@@ -2290,9 +2290,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 
 	if (o_arg->open_flags & O_CREAT) {
 		if (o_arg->open_flags & O_EXCL)
-			data->file_created = 1;
+			data->file_created = true;
 		else if (o_res->cinfo.before != o_res->cinfo.after)
-			data->file_created = 1;
+			data->file_created = true;
 		if (data->file_created || dir->i_version != o_res->cinfo.after)
 			update_changeattr(dir, &o_res->cinfo,
 					o_res->f_attr->time_start);
@@ -4275,7 +4275,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 }
 
 static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, int plus)
+		u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	struct nfs4_readdir_arg args = {
@@ -4313,7 +4313,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 }
 
 static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, int plus)
+		u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct nfs4_exception exception = { };
 	int err;
@@ -6137,7 +6137,7 @@ static void nfs4_lock_release(void *calldata)
 
 	dprintk("%s: begin!\n", __func__);
 	nfs_free_seqid(data->arg.open_seqid);
-	if (data->cancelled != 0) {
+	if (data->cancelled) {
 		struct rpc_task *task;
 		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
 				data->arg.lock_seqid);
@@ -6220,7 +6220,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 			nfs4_handle_setlk_error(data->server, data->lsp,
 					data->arg.new_lock_owner, ret);
 	} else
-		data->cancelled = 1;
+		data->cancelled = true;
 	rpc_put_task(task);
 	dprintk("%s: done, ret = %d!\n", __func__, ret);
 	trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0f1f290c97cd0..495d493d3a35b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7447,7 +7447,7 @@ static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
  * on a directory already in our cache.
  */
 int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	unsigned int savep;
 	uint32_t bitmap[3] = {0};
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 9872cf676a50a..7962e49097c38 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -485,7 +485,7 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
  */
 static int
 nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		 u64 cookie, struct page **pages, unsigned int count, int plus)
+		 u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	struct nfs_readdirargs	arg = {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b28c83475ee8e..9b42bffbe07b0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -878,7 +878,7 @@ struct nfs3_readdirargs {
 	struct nfs_fh *		fh;
 	__u64			cookie;
 	__be32			verf[2];
-	int			plus;
+	bool			plus;
 	unsigned int            count;
 	struct page **		pages;
 };
@@ -909,7 +909,7 @@ struct nfs3_linkres {
 struct nfs3_readdirres {
 	struct nfs_fattr *	dir_attr;
 	__be32 *		verf;
-	int			plus;
+	bool			plus;
 };
 
 struct nfs3_getaclres {
@@ -1053,7 +1053,7 @@ struct nfs4_readdir_arg {
 	struct page **			pages;	/* zero-copy data */
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
-	int				plus;
+	bool				plus;
 };
 
 struct nfs4_readdir_res {
@@ -1585,7 +1585,7 @@ struct nfs_rpc_ops {
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, const struct qstr *);
 	int	(*readdir) (struct dentry *, struct rpc_cred *,
-			    u64, struct page **, unsigned int, int);
+			    u64, struct page **, unsigned int, bool);
 	int	(*mknod)   (struct inode *, struct dentry *, struct iattr *,
 			    dev_t);
 	int	(*statfs)  (struct nfs_server *, struct nfs_fh *,
@@ -1595,7 +1595,7 @@ struct nfs_rpc_ops {
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
+	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, bool);
 	int	(*pgio_rpc_prepare)(struct rpc_task *,
 				    struct nfs_pgio_header *);
 	void	(*read_setup)(struct nfs_pgio_header *, struct rpc_message *);
-- 
GitLab


From 818a8dbe83fddff534b814a7d4e0c75b511dff2e Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 16 Jun 2017 11:13:00 -0400
Subject: [PATCH 1462/1958] NFS: nfs_rename() - revalidate directories on
 -ERESTARTSYS

An interrupted rename will leave the old dentry behind if the rename
succeeds.  Fix this by forcing a lookup the next time through
->d_revalidate.

A previous attempt at solving this problem took the approach to complete
the work of the rename asynchronously, however that approach was wrong
since it would allow the d_move() to occur after the directory's i_mutex
had been dropped by the original process.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c            |  6 +++++-
 fs/nfs/unlink.c         | 13 +++++++++++++
 include/linux/nfs_xdr.h |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9688e9bb13dc2..ab69ebb483507 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2056,7 +2056,11 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 
 	error = rpc_wait_for_completion_task(task);
-	if (error == 0)
+	if (error != 0) {
+		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
+		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
+		smp_wmb();
+	} else
 		error = task->tk_status;
 	rpc_put_task(task);
 out:
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 191aa577dd1f3..e3949d93085c8 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -288,6 +288,19 @@ static void nfs_async_rename_release(void *calldata)
 	if (d_really_is_positive(data->old_dentry))
 		nfs_mark_for_revalidate(d_inode(data->old_dentry));
 
+	/* The result of the rename is unknown. Play it safe by
+	 * forcing a new lookup */
+	if (data->cancelled) {
+		spin_lock(&data->old_dir->i_lock);
+		nfs_force_lookup_revalidate(data->old_dir);
+		spin_unlock(&data->old_dir->i_lock);
+		if (data->new_dir != data->old_dir) {
+			spin_lock(&data->new_dir->i_lock);
+			nfs_force_lookup_revalidate(data->new_dir);
+			spin_unlock(&data->new_dir->i_lock);
+		}
+	}
+
 	dput(data->old_dentry);
 	dput(data->new_dentry);
 	iput(data->old_dir);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9b42bffbe07b0..9463eeff9e3c8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1533,6 +1533,7 @@ struct nfs_renamedata {
 	struct nfs_fattr	new_fattr;
 	void (*complete)(struct rpc_task *, struct nfs_renamedata *);
 	long timeout;
+	bool cancelled;
 };
 
 struct nfs_access_entry;
-- 
GitLab


From 92ea011f7cbade821ebd56a1f70d20331c0320c8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 20 Jun 2017 19:35:39 -0400
Subject: [PATCH 1463/1958] SUNRPC: Make slot allocation more reliable

In xprt_alloc_slot(), the spin lock is only needed to provide atomicity
between the atomic_add_unless() failure and the call to xprt_add_backlog().
We do not actually need to hold it across the memory allocation itself.

By dropping the lock, we can use a more resilient GFP_NOFS allocation,
just as we now do in the rest of the RPC client code.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprt.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3e63c5e97ebe6..4654a99342697 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1047,13 +1047,15 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task
 	return ret;
 }
 
-static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
+static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
 {
 	struct rpc_rqst *req = ERR_PTR(-EAGAIN);
 
 	if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
 		goto out;
-	req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
+	spin_unlock(&xprt->reserve_lock);
+	req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
+	spin_lock(&xprt->reserve_lock);
 	if (req != NULL)
 		goto out;
 	atomic_dec(&xprt->num_reqs);
@@ -1081,7 +1083,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 		list_del(&req->rq_list);
 		goto out_init_req;
 	}
-	req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN);
+	req = xprt_dynamic_alloc_slot(xprt);
 	if (!IS_ERR(req))
 		goto out_init_req;
 	switch (PTR_ERR(req)) {
-- 
GitLab


From b5973a8c1ccf375c9ab9e2428e1185e3f799af06 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 20 Jun 2017 19:35:36 -0400
Subject: [PATCH 1464/1958] NFS: Remove unused fields in the page I/O
 structures

Remove the 'layout_private' fields that were only used by the pNFS OSD
layout driver.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c        | 2 --
 include/linux/nfs_page.h | 1 -
 include/linux/nfs_xdr.h  | 1 -
 3 files changed, 4 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ad92b401326c6..de107d866297b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -51,7 +51,6 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = mirror->pg_count;
 	hdr->dreq = desc->pg_dreq;
-	hdr->layout_private = desc->pg_layout_private;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
 	if (hdr->completion_ops->init_hdr)
@@ -711,7 +710,6 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
 	desc->pg_dreq = NULL;
-	desc->pg_layout_private = NULL;
 	desc->pg_bsize = bsize;
 
 	desc->pg_mirror_count = 1;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 247cc3d3498f2..6138cf91346b8 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -94,7 +94,6 @@ struct nfs_pageio_descriptor {
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
 	struct nfs_direct_req	*pg_dreq;
-	void			*pg_layout_private;
 	unsigned int		pg_bsize;	/* default bsize for mirrors */
 
 	u32			pg_mirror_count;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9463eeff9e3c8..7f1e04941763b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1436,7 +1436,6 @@ struct nfs_pgio_header {
 	const struct nfs_pgio_completion_ops *completion_ops;
 	const struct nfs_rw_ops	*rw_ops;
 	struct nfs_direct_req	*dreq;
-	void			*layout_private;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
-- 
GitLab


From 919e3bd9a87593520a2c5dfda27bd3e6599852ed Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 20 Jun 2017 19:35:37 -0400
Subject: [PATCH 1465/1958] NFS: Ensure we commit after writeback is complete

If the page cache is being flushed, then we want to ensure that we
do start a commit once the pages are done being flushed.
If we just wait until all I/O is done to that file, we can end up
livelocking until the balance_dirty_pages() mechanism puts its
foot down and forces I/O to stop.
So instead we do more or less the same thing that O_DIRECT does,
and set up a counter to tell us when the flush is done,

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c        |  3 +++
 fs/nfs/write.c           | 57 ++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_page.h |  1 +
 include/linux/nfs_xdr.h  |  2 ++
 4 files changed, 63 insertions(+)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index de107d866297b..83d2918f1b134 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -50,6 +50,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->cred = hdr->req->wb_context->cred;
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = mirror->pg_count;
+	hdr->io_completion = desc->pg_io_completion;
 	hdr->dreq = desc->pg_dreq;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
@@ -709,6 +710,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
+	desc->pg_io_completion = NULL;
 	desc->pg_dreq = NULL;
 	desc->pg_bsize = bsize;
 
@@ -1231,6 +1233,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
 {
 	LIST_HEAD(failed);
 
+	desc->pg_io_completion = hdr->io_completion;
 	desc->pg_dreq = hdr->dreq;
 	while (!list_empty(&hdr->pages)) {
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index db7ba542559e7..051197cb9195f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -40,6 +40,12 @@
 #define MIN_POOL_WRITE		(32)
 #define MIN_POOL_COMMIT		(4)
 
+struct nfs_io_completion {
+	void (*complete)(void *data);
+	void *data;
+	struct kref refcount;
+};
+
 /*
  * Local function declarations
  */
@@ -108,6 +114,39 @@ static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 	mempool_free(hdr, nfs_wdata_mempool);
 }
 
+static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
+{
+	return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
+}
+
+static void nfs_io_completion_init(struct nfs_io_completion *ioc,
+		void (*complete)(void *), void *data)
+{
+	ioc->complete = complete;
+	ioc->data = data;
+	kref_init(&ioc->refcount);
+}
+
+static void nfs_io_completion_release(struct kref *kref)
+{
+	struct nfs_io_completion *ioc = container_of(kref,
+			struct nfs_io_completion, refcount);
+	ioc->complete(ioc->data);
+	kfree(ioc);
+}
+
+static void nfs_io_completion_get(struct nfs_io_completion *ioc)
+{
+	if (ioc != NULL)
+		kref_get(&ioc->refcount);
+}
+
+static void nfs_io_completion_put(struct nfs_io_completion *ioc)
+{
+	if (ioc != NULL)
+		kref_put(&ioc->refcount, nfs_io_completion_release);
+}
+
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 {
 	ctx->error = error;
@@ -681,18 +720,29 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 	return ret;
 }
 
+static void nfs_io_completion_commit(void *inode)
+{
+	nfs_commit_inode(inode, 0);
+}
+
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
 	struct nfs_pageio_descriptor pgio;
+	struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
 	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
+	if (ioc)
+		nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
+
 	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
 				&nfs_async_write_completion_ops);
+	pgio.pg_io_completion = ioc;
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
+	nfs_io_completion_put(ioc);
 
 	if (err < 0)
 		goto out_err;
@@ -940,6 +990,11 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 	return hdr->verf.committed != NFS_FILE_SYNC;
 }
 
+static void nfs_async_write_init(struct nfs_pgio_header *hdr)
+{
+	nfs_io_completion_get(hdr->io_completion);
+}
+
 static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_commit_info cinfo;
@@ -973,6 +1028,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 		nfs_release_request(req);
 	}
 out:
+	nfs_io_completion_put(hdr->io_completion);
 	hdr->release(hdr);
 }
 
@@ -1378,6 +1434,7 @@ static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
 }
 
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+	.init_hdr = nfs_async_write_init,
 	.error_cleanup = nfs_async_write_error,
 	.completion = nfs_write_completion,
 	.reschedule_io = nfs_async_write_reschedule_io,
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6138cf91346b8..abbee2d15dce9 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -93,6 +93,7 @@ struct nfs_pageio_descriptor {
 	const struct rpc_call_ops *pg_rpc_callops;
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
+	struct nfs_io_completion *pg_io_completion;
 	struct nfs_direct_req	*pg_dreq;
 	unsigned int		pg_bsize;	/* default bsize for mirrors */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7f1e04941763b..89093341f0761 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1422,6 +1422,7 @@ enum {
 	NFS_IOHDR_STAT,
 };
 
+struct nfs_io_completion;
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
@@ -1435,6 +1436,7 @@ struct nfs_pgio_header {
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
 	const struct nfs_rw_ops	*rw_ops;
+	struct nfs_io_completion *io_completion;
 	struct nfs_direct_req	*dreq;
 	spinlock_t		lock;
 	/* fields protected by lock */
-- 
GitLab


From 1a4edf0f46b0cb87dd5020481d355cd5cc456c47 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 20 Jun 2017 19:35:38 -0400
Subject: [PATCH 1466/1958] NFS: Fix commit policy for non-blocking calls to
 nfs_write_inode()

Now that the writes will schedule a commit on their own, we don't
need nfs_write_inode() to schedule one if there are outstanding
writes, and we're being called in non-blocking mode.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 051197cb9195f..b1af5dee5e0a8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1941,7 +1941,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 		/* Don't commit yet if this is a non-blocking flush and there
 		 * are a lot of outstanding writes for this mapping.
 		 */
-		if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
+		if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
 			goto out_mark_dirty;
 
 		/* don't wait for the COMMIT response */
-- 
GitLab


From 2eb3aea7d9c43325a12df312adfc7fb25bbd636b Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 9 Jun 2017 11:03:23 -0400
Subject: [PATCH 1467/1958] NFS: Fix initialization of nfs_page_array->npages

Commit 8ef9b0b9e1c0 open-coded nfs_pgarray_set(), and left out the
initialization of the nfs_page_array's npages.  This mistake didn't show up
until testing with block layouts, and there shows that all pNFS reads
return -EIO.

Fixes: 8ef9b0b9e1c0 ("NFS: move nfs_pgarray_set() to open code")
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Cc: stable@vger.kernel.org # 4.12
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 83d2918f1b134..8a23e2b40b04e 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -779,6 +779,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	gfp_t gfp_flags = GFP_KERNEL;
 
 	pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
+	pg_array->npages = pagecount;
 
 	if (pagecount <= ARRAY_SIZE(pg_array->page_array))
 		pg_array->pagevec = pg_array->page_array;
-- 
GitLab


From ce85bd29210f2cd84dc1f762c3992d8e6db822c2 Mon Sep 17 00:00:00 2001
From: Tuo Chen Peng <tpeng@nvidia.com>
Date: Tue, 6 Jun 2017 23:42:44 -0700
Subject: [PATCH 1468/1958] nfs: Fix fscache stat printing in nfs_show_stats()

nfs_show_stats() was incorrectly reading statistics for bytes when printing that
for fsc. It caused files like /proc/self/mountstats to report incorrect fsc
statistics for NFS mounts.

Signed-off-by: Tuo Chen Peng <tpeng@nvidia.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index eceb4eabb0649..b4176393f049f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -879,7 +879,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
 	if (nfss->options & NFS_OPTION_FSCACHE) {
 		seq_printf(m, "\n\tfsc:\t");
 		for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
-			seq_printf(m, "%Lu ", totals.bytes[i]);
+			seq_printf(m, "%Lu ", totals.fscache[i]);
 	}
 #endif
 	seq_printf(m, "\n");
-- 
GitLab


From 4cd1ec95bdec8aedb280ca0f244ededb76f747ab Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 Jun 2017 18:16:25 +0300
Subject: [PATCH 1469/1958] NFS: silence a uninitialized variable warning

Static checkers have gotten clever enough to complain that "id_long" is
uninitialized on the failure path.  It's harmless, but simple to fix.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4idmap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 835c163f61af5..dd5d27da8c0cc 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -364,7 +364,8 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ
 		ret = -EINVAL;
 	} else {
 		ret = kstrtol(id_str, 10, &id_long);
-		*id = (__u32)id_long;
+		if (!ret)
+			*id = (__u32)id_long;
 	}
 	return ret;
 }
-- 
GitLab


From a0bc01e0f1fa39702b5244b3bac699bea0d4f413 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Fri, 23 Jun 2017 10:26:58 -0400
Subject: [PATCH 1470/1958] PNFS fix EACCESS on commit to DS handling

Commit fabbbee0eb0f "PNFS fix fallback to MDS if got error on
commit to DS" moved the pnfs_set_lo_fail() to unhandled errors
which was not correct and lead to a kernel oops on umount.

Instead, fix the original EACCESS on commit to DS error by
getting the new layout and re-doing the IO.

Fixes: fabbbee0eb0f ("PNFS fix fallback to MDS if got error on commit to DS")
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Cc: stable@vger.kernel.org # v4.12
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayout.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 1cf85d65b7489..3e486cd01cafd 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -172,6 +172,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 	case -NFS4ERR_RETRY_UNCACHED_REP:
 		break;
 	/* Invalidate Layout errors */
+	case -NFS4ERR_ACCESS:
 	case -NFS4ERR_PNFS_NO_LAYOUT:
 	case -ESTALE:           /* mapped NFS4ERR_STALE */
 	case -EBADHANDLE:       /* mapped NFS4ERR_BADHANDLE */
@@ -202,10 +203,10 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 			task->tk_status);
 		nfs4_mark_deviceid_unavailable(devid);
 		pnfs_error_mark_layout_for_return(inode, lseg);
+		pnfs_set_lo_fail(lseg);
 		rpc_wake_up(&tbl->slot_tbl_waitq);
 		/* fall through */
 	default:
-		pnfs_set_lo_fail(lseg);
 reset:
 		dprintk("%s Retry through MDS. Error %d\n", __func__,
 			task->tk_status);
-- 
GitLab


From 22368ff11d72eb06051f9b51abbc6842496c2d3a Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Fri, 23 Jun 2017 10:26:59 -0400
Subject: [PATCH 1471/1958] PNFS for stateid errors retry against MDS first

Upon receiving a stateid error such as BAD_STATEID, the client
should retry the operation against the MDS before deciding to
do stateid recovery.

Previously, the code would initiate state recovery and it could
lead to a race in a state manager that could chose an incorrect
recovery method which would lead to the EIO failure for the
application.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayout.c         | 28 ----------------------
 fs/nfs/flexfilelayout/flexfilelayout.c | 33 --------------------------
 2 files changed, 61 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 3e486cd01cafd..080fc6b278bd5 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 {
 	struct pnfs_layout_hdr *lo = lseg->pls_layout;
 	struct inode *inode = lo->plh_inode;
-	struct nfs_server *mds_server = NFS_SERVER(inode);
 	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
-	struct nfs_client *mds_client = mds_server->nfs_client;
 	struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
 
 	if (task->tk_status >= 0)
 		return 0;
 
 	switch (task->tk_status) {
-	/* MDS state errors */
-	case -NFS4ERR_DELEG_REVOKED:
-	case -NFS4ERR_ADMIN_REVOKED:
-	case -NFS4ERR_BAD_STATEID:
-	case -NFS4ERR_OPENMODE:
-		if (state == NULL)
-			break;
-		if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
-			goto out_bad_stateid;
-		goto wait_on_recovery;
-	case -NFS4ERR_EXPIRED:
-		if (state != NULL) {
-			if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
-				goto out_bad_stateid;
-		}
-		nfs4_schedule_lease_recovery(mds_client);
-		goto wait_on_recovery;
 	/* DS session errors */
 	case -NFS4ERR_BADSESSION:
 	case -NFS4ERR_BADSLOT:
@@ -212,17 +193,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 			task->tk_status);
 		return -NFS4ERR_RESET_TO_MDS;
 	}
-out:
 	task->tk_status = 0;
 	return -EAGAIN;
-out_bad_stateid:
-	task->tk_status = -EIO;
-	return 0;
-wait_on_recovery:
-	rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
-	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
-		rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
-	goto out;
 }
 
 /* NFS_PROTO call done callback routines */
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 23542dc44a25c..1f2ac3dd0fe5c 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 {
 	struct pnfs_layout_hdr *lo = lseg->pls_layout;
 	struct inode *inode = lo->plh_inode;
-	struct nfs_server *mds_server = NFS_SERVER(inode);
-
 	struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
-	struct nfs_client *mds_client = mds_server->nfs_client;
 	struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
 
 	switch (task->tk_status) {
-	/* MDS state errors */
-	case -NFS4ERR_DELEG_REVOKED:
-	case -NFS4ERR_ADMIN_REVOKED:
-	case -NFS4ERR_BAD_STATEID:
-		if (state == NULL)
-			break;
-		nfs_remove_bad_delegation(state->inode, NULL);
-	case -NFS4ERR_OPENMODE:
-		if (state == NULL)
-			break;
-		if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
-			goto out_bad_stateid;
-		goto wait_on_recovery;
-	case -NFS4ERR_EXPIRED:
-		if (state != NULL) {
-			if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
-				goto out_bad_stateid;
-		}
-		nfs4_schedule_lease_recovery(mds_client);
-		goto wait_on_recovery;
-	/* DS session errors */
 	case -NFS4ERR_BADSESSION:
 	case -NFS4ERR_BADSLOT:
 	case -NFS4ERR_BAD_HIGH_SLOT:
@@ -1137,17 +1113,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 			task->tk_status);
 		return -NFS4ERR_RESET_TO_MDS;
 	}
-out:
 	task->tk_status = 0;
 	return -EAGAIN;
-out_bad_stateid:
-	task->tk_status = -EIO;
-	return 0;
-wait_on_recovery:
-	rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
-	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
-		rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
-	goto out;
 }
 
 /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
-- 
GitLab


From cc89684c9a265828ce061037f1f79f4a68ccd3f7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 5 Jul 2017 12:22:20 +1000
Subject: [PATCH 1472/1958] NFS: only invalidate dentrys that are clearly
 invalid.

Since commit bafc9b754f75 ("vfs: More precise tests in d_invalidate")
in v3.18, a return of '0' from ->d_revalidate() will cause the dentry
to be invalidated even if it has filesystems mounted on or it or on a
descendant.  The mounted filesystem is unmounted.

This means we need to be careful not to return 0 unless the directory
referred to truly is invalid.  So -ESTALE or -ENOENT should invalidate
the directory.  Other errors such a -EPERM or -ERESTARTSYS should be
returned from ->d_revalidate() so they are propagated to the caller.

A particular problem can be demonstrated by:

1/ mount an NFS filesystem using NFSv3 on /mnt
2/ mount any other filesystem on /mnt/foo
3/ ls /mnt/foo
4/ turn off network, or otherwise make the server unable to respond
5/ ls /mnt/foo &
6/ cat /proc/$!/stack # note that nfs_lookup_revalidate is in the call stack
7/ kill -9 $! # this results in -ERESTARTSYS being returned
8/ observe that /mnt/foo has been unmounted.

This patch changes nfs_lookup_revalidate() to only treat
  -ESTALE from nfs_lookup_verify_inode() and
  -ESTALE or -ENOENT from ->lookup()
as indicating an invalid inode.  Other errors are returned.

Also nfs_check_inode_attributes() is changed to return -ESTALE rather
than -EIO.  This is consistent with the error returned in similar
circumstances from nfs_update_inode().

As this bug allows any user to unmount a filesystem mounted on an NFS
filesystem, this fix is suitable for stable kernels.

Fixes: bafc9b754f75 ("vfs: More precise tests in d_invalidate")
Cc: stable@vger.kernel.org (v3.18+)
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c   | 12 ++++++++----
 fs/nfs/inode.c |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ab69ebb483507..98b18aaf45c97 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1115,11 +1115,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	/* Force a full look up iff the parent directory has changed */
 	if (!nfs_is_exclusive_create(dir, flags) &&
 	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
-
-		if (nfs_lookup_verify_inode(inode, flags)) {
+		error = nfs_lookup_verify_inode(inode, flags);
+		if (error) {
 			if (flags & LOOKUP_RCU)
 				return -ECHILD;
-			goto out_zap_parent;
+			if (error == -ESTALE)
+				goto out_zap_parent;
+			goto out_error;
 		}
 		nfs_advise_use_readdirplus(dir);
 		goto out_valid;
@@ -1144,8 +1146,10 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
 	trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
-	if (error)
+	if (error == -ESTALE || error == -ENOENT)
 		goto out_bad;
+	if (error)
+		goto out_error;
 	if (nfs_compare_fh(NFS_FH(inode), fhandle))
 		goto out_bad;
 	if ((error = nfs_refresh_inode(inode, fattr)) != 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1de93ba78dc95..8c465d3c7e057 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1315,9 +1315,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 		return 0;
 	/* Has the inode gone and changed behind our back? */
 	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
-		return -EIO;
+		return -ESTALE;
 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
-		return -EIO;
+		return -ESTALE;
 
 	if (!nfs_file_has_buffered_writers(nfsi)) {
 		/* Verify a few of the more important attributes */
-- 
GitLab


From eaa2b82c3b3c938ab4635f8967d33f3e581da836 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 3 Jul 2017 15:27:26 +1000
Subject: [PATCH 1473/1958] NFS: guard against confused server in
 nfs_atomic_open()

A confused server could return a filehandle for an
NFSv4 OPEN request, which it previously returned for a directory.
So the inode returned by  ->open_context() in nfs_atomic_open()
could conceivably be a directory inode.

This has particular implications for the call to
nfs_file_set_open_context() in nfs_finish_open().
If that is called on a directory inode, then the nfs_open_context
that gets stored in the filp->private_data will be linked to
nfs_inode->open_files.

When the directory is closed, nfs_closedir() will (ultimately)
free the ->private_data, but not unlink it from nfs_inode->open_files
(because it doesn't expect an nfs_open_context there).

Subsequently the memory could get used for something else and eventually
if the ->open_files list is walked, the walker will fall off the end and
crash.

So: change nfs_finish_open() to only call nfs_file_set_open_context()
for regular-file inodes.

This failure mode has been seen in a production setting (unknown NFS
server implementation).  The kernel was v3.0 and the specific sequence
seen would not affect more recent kernels, but I think a risk is still
present, and caution is wise.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 98b18aaf45c97..90bc4025ca3cb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1431,8 +1431,10 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
 	err = finish_open(file, dentry, do_open, opened);
 	if (err)
 		goto out;
-	nfs_file_set_open_context(file, ctx);
-
+	if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
+		nfs_file_set_open_context(file, ctx);
+	else
+		err = -ESTALE;
 out:
 	return err;
 }
-- 
GitLab


From 26fde4dfcbdcbbac394bb35de0c0f842de6972b5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 3 Jul 2017 15:27:26 +1000
Subject: [PATCH 1474/1958] NFS: check for nfs_refresh_inode() errors in
 nfs_fhget()

If an NFS server returns a filehandle that we have previously
seen, and reports a different type, then nfs_refresh_inode()
will log a warning and return an error.

nfs_fhget() does not check for this error and may return an
inode with a different type than the one that the server
reported.

This is likely to cause confusion, and is one way that
->open_context() could return a directory inode as discussed
in the previous patch.

So if nfs_refresh_inode() returns and error, return that error
from nfs_fhget() to avoid the confusion propagating.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/inode.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8c465d3c7e057..7e7a894601b9b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -525,8 +525,14 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
 		nfs_fscache_init_inode(inode);
 
 		unlock_new_inode(inode);
-	} else
-		nfs_refresh_inode(inode, fattr);
+	} else {
+		int err = nfs_refresh_inode(inode, fattr);
+		if (err < 0) {
+			iput(inode);
+			inode = ERR_PTR(err);
+			goto out_no_inode;
+		}
+	}
 	dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n",
 		inode->i_sb->s_id,
 		(unsigned long long)NFS_FILEID(inode),
-- 
GitLab


From 04d25b7d5d1b43fbaffe4ad4bb21288f0f305338 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:51:48 -0400
Subject: [PATCH 1475/1958] xprtrdma: On invalidation failure, remove MWs from
 rl_registered

Callers assume the ro_unmap_sync and ro_unmap_safe methods empty
the list of registered MRs. Ensure that all paths through
fmr_op_unmap_sync() remove MWs from that list.

Fixes: 9d6b04097882 ("xprtrdma: Place registered MWs on a ... ")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/fmr_ops.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 59e64025ed96b..21f3cd5e2d6b1 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -295,6 +295,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
 
 	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+		list_del_init(&mw->mw_list);
 		list_del_init(&mw->fmr.fm_mr->list);
 		fmr_op_recover_mr(mw);
 	}
-- 
GitLab


From 4b196dc6fee9ba838ebabf824e294a429c79b27d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:51:56 -0400
Subject: [PATCH 1476/1958] xprtrdma: Pre-mark remotely invalidated MRs

There are rare cases where an rpcrdma_req and its matched
rpcrdma_rep can be re-used, via rpcrdma_buffer_put, while the RPC
reply handler is still using that req. This is typically due to a
signal firing at just the wrong instant.

As part of closing this race window, avoid using the wrong
rpcrdma_rep to detect remotely invalidated MRs. Mark MRs as
invalidated while we are sure the rep is still OK to use.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=305
Fixes: 68791649a725 ('xprtrdma: Invalidate in the RPC reply ... ')
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/frwr_ops.c  |  4 +---
 net/sunrpc/xprtrdma/rpc_rdma.c  | 22 ++++++++++++++++++++--
 net/sunrpc/xprtrdma/verbs.c     |  1 +
 net/sunrpc/xprtrdma/xprt_rdma.h |  6 ++++++
 4 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index f81dd93176c07..31290cbedfe06 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -464,7 +464,6 @@ static void
 frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 {
 	struct ib_send_wr *first, **prev, *last, *bad_wr;
-	struct rpcrdma_rep *rep = req->rl_reply;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_frmr *f;
 	struct rpcrdma_mw *mw;
@@ -483,8 +482,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	list_for_each_entry(mw, &req->rl_registered, mw_list) {
 		mw->frmr.fr_state = FRMR_IS_INVALID;
 
-		if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
-		    (mw->mw_handle == rep->rr_inv_rkey))
+		if (mw->mw_flags & RPCRDMA_MW_F_RI)
 			continue;
 
 		f = &mw->frmr;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 694e9b13ecf07..2356a6305f252 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -928,6 +928,24 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
 	return fixup_copy_count;
 }
 
+/* Caller must guarantee @rep remains stable during this call.
+ */
+static void
+rpcrdma_mark_remote_invalidation(struct list_head *mws,
+				 struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_mw *mw;
+
+	if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
+		return;
+
+	list_for_each_entry(mw, mws, mw_list)
+		if (mw->mw_handle == rep->rr_inv_rkey) {
+			mw->mw_flags = RPCRDMA_MW_F_RI;
+			break; /* only one invalidated MR per RPC */
+		}
+}
+
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /* By convention, backchannel calls arrive via rdma_msg type
  * messages, and never populate the chunk lists. This makes
@@ -1006,13 +1024,13 @@ rpcrdma_reply_handler(struct work_struct *work)
 	/* Sanity checking has passed. We are now committed
 	 * to complete this transaction.
 	 */
+	rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
 	list_del_init(&rqst->rq_list);
+	req->rl_reply = rep;
 	spin_unlock_bh(&xprt->transport_lock);
 	dprintk("RPC:       %s: reply %p completes request %p (xid 0x%08x)\n",
 		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
 
-	/* from here on, the reply is no longer an orphan */
-	req->rl_reply = rep;
 	xprt->reestablish_timeout = 0;
 
 	if (headerp->rm_vers != rpcrdma_version)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3dbce9ac4327a..a8be66d806dcc 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1187,6 +1187,7 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
 
 	if (!mw)
 		goto out_nomws;
+	mw->mw_flags = 0;
 	return mw;
 
 out_nomws:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1d66acf1a723e..2e027335fcbc7 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -271,6 +271,7 @@ struct rpcrdma_mw {
 	struct scatterlist	*mw_sg;
 	int			mw_nents;
 	enum dma_data_direction	mw_dir;
+	unsigned long		mw_flags;
 	union {
 		struct rpcrdma_fmr	fmr;
 		struct rpcrdma_frmr	frmr;
@@ -282,6 +283,11 @@ struct rpcrdma_mw {
 	struct list_head	mw_all;
 };
 
+/* mw_flags */
+enum {
+	RPCRDMA_MW_F_RI		= 1,
+};
+
 /*
  * struct rpcrdma_req -- structure central to the request/reply sequence.
  *
-- 
GitLab


From 451d26e151f0792601d10378a608c52304b6a357 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:52:04 -0400
Subject: [PATCH 1477/1958] xprtrdma: Pass only the list of registered MRs to
 ro_unmap_sync

There are rare cases where an rpcrdma_req can be re-used (via
rpcrdma_buffer_put) while the RPC reply handler is still running.
This is due to a signal firing at just the wrong instant.

Since commit 9d6b04097882 ("xprtrdma: Place registered MWs on a
per-req list"), rpcrdma_mws are self-contained; ie., they fully
describe an MR and scatterlist, and no part of that information is
stored in struct rpcrdma_req.

As part of closing the above race window, pass only the req's list
of registered MRs to ro_unmap_sync, rather than the rpcrdma_req
itself.

Some extra transport header sanity checking is removed. Since the
client depends on its own recollection of what memory had been
registered, there doesn't seem to be a way to abuse this change.

And, the check was not terribly effective. If the client had sent
Read chunks, the "list_empty" test is negative in both of the
removed cases, which are actually looking for Write or Reply
chunks.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=305
Fixes: 68791649a725 ('xprtrdma: Invalidate in the RPC reply ... ')
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/fmr_ops.c   | 16 +++++++++-------
 net/sunrpc/xprtrdma/frwr_ops.c  | 19 +++++++++----------
 net/sunrpc/xprtrdma/rpc_rdma.c  | 16 +++++++---------
 net/sunrpc/xprtrdma/xprt_rdma.h |  2 +-
 4 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 21f3cd5e2d6b1..5556ed99b4b7b 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -255,24 +255,26 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
  * Sleeps until it is safe for the host CPU to access the
  * previously mapped memory regions.
  *
- * Caller ensures that req->rl_registered is not empty.
+ * Caller ensures that @mws is not empty before the call. This
+ * function empties the list.
  */
 static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 {
 	struct rpcrdma_mw *mw, *tmp;
 	LIST_HEAD(unmap_list);
 	int rc;
 
-	dprintk("RPC:       %s: req %p\n", __func__, req);
-
 	/* ORDER: Invalidate all of the req's MRs first
 	 *
 	 * ib_unmap_fmr() is slow, so use a single call instead
 	 * of one call per mapped FMR.
 	 */
-	list_for_each_entry(mw, &req->rl_registered, mw_list)
+	list_for_each_entry(mw, mws, mw_list) {
+		dprintk("RPC:       %s: unmapping fmr %p\n",
+			__func__, &mw->fmr);
 		list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+	}
 	r_xprt->rx_stats.local_inv_needed++;
 	rc = ib_unmap_fmr(&unmap_list);
 	if (rc)
@@ -281,7 +283,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	/* ORDER: Now DMA unmap all of the req's MRs, and return
 	 * them to the free MW list.
 	 */
-	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+	list_for_each_entry_safe(mw, tmp, mws, mw_list) {
 		list_del_init(&mw->mw_list);
 		list_del_init(&mw->fmr.fm_mr->list);
 		ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
@@ -294,7 +296,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 out_reset:
 	pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
 
-	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+	list_for_each_entry_safe(mw, tmp, mws, mw_list) {
 		list_del_init(&mw->mw_list);
 		list_del_init(&mw->fmr.fm_mr->list);
 		fmr_op_recover_mr(mw);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 31290cbedfe06..97f9f85fa5c1a 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -458,10 +458,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
  * Sleeps until it is safe for the host CPU to access the
  * previously mapped memory regions.
  *
- * Caller ensures that req->rl_registered is not empty.
+ * Caller ensures that @mws is not empty before the call. This
+ * function empties the list.
  */
 static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 {
 	struct ib_send_wr *first, **prev, *last, *bad_wr;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -469,9 +470,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	struct rpcrdma_mw *mw;
 	int count, rc;
 
-	dprintk("RPC:       %s: req %p\n", __func__, req);
-
-	/* ORDER: Invalidate all of the req's MRs first
+	/* ORDER: Invalidate all of the MRs first
 	 *
 	 * Chain the LOCAL_INV Work Requests and post them with
 	 * a single ib_post_send() call.
@@ -479,7 +478,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	f = NULL;
 	count = 0;
 	prev = &first;
-	list_for_each_entry(mw, &req->rl_registered, mw_list) {
+	list_for_each_entry(mw, mws, mw_list) {
 		mw->frmr.fr_state = FRMR_IS_INVALID;
 
 		if (mw->mw_flags & RPCRDMA_MW_F_RI)
@@ -528,12 +527,12 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 
 	wait_for_completion(&f->fr_linv_done);
 
-	/* ORDER: Now DMA unmap all of the req's MRs, and return
+	/* ORDER: Now DMA unmap all of the MRs, and return
 	 * them to the free MW list.
 	 */
 unmap:
-	while (!list_empty(&req->rl_registered)) {
-		mw = rpcrdma_pop_mw(&req->rl_registered);
+	while (!list_empty(mws)) {
+		mw = rpcrdma_pop_mw(mws);
 		dprintk("RPC:       %s: DMA unmapping frmr %p\n",
 			__func__, &mw->frmr);
 		ib_dma_unmap_sg(ia->ri_device,
@@ -549,7 +548,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	/* Find and reset the MRs in the LOCAL_INV WRs that did not
 	 * get posted. This is synchronous, and slow.
 	 */
-	list_for_each_entry(mw, &req->rl_registered, mw_list) {
+	list_for_each_entry(mw, mws, mw_list) {
 		f = &mw->frmr;
 		if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
 			__frwr_reset_mr(ia, mw);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2356a6305f252..c88132d02fb81 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -995,6 +995,7 @@ rpcrdma_reply_handler(struct work_struct *work)
 	__be32 *iptr;
 	int rdmalen, status, rmerr;
 	unsigned long cwnd;
+	struct list_head mws;
 
 	dprintk("RPC:       %s: incoming rep %p\n", __func__, rep);
 
@@ -1024,7 +1025,8 @@ rpcrdma_reply_handler(struct work_struct *work)
 	/* Sanity checking has passed. We are now committed
 	 * to complete this transaction.
 	 */
-	rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
+	list_replace_init(&req->rl_registered, &mws);
+	rpcrdma_mark_remote_invalidation(&mws, rep);
 	list_del_init(&rqst->rq_list);
 	req->rl_reply = rep;
 	spin_unlock_bh(&xprt->transport_lock);
@@ -1042,12 +1044,9 @@ rpcrdma_reply_handler(struct work_struct *work)
 	case rdma_msg:
 		/* never expect read chunks */
 		/* never expect reply chunks (two ways to check) */
-		/* never expect write chunks without having offered RDMA */
 		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
 		    (headerp->rm_body.rm_chunks[1] == xdr_zero &&
-		     headerp->rm_body.rm_chunks[2] != xdr_zero) ||
-		    (headerp->rm_body.rm_chunks[1] != xdr_zero &&
-		     list_empty(&req->rl_registered)))
+		     headerp->rm_body.rm_chunks[2] != xdr_zero))
 			goto badheader;
 		if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
 			/* count any expected write chunks in read reply */
@@ -1084,8 +1083,7 @@ rpcrdma_reply_handler(struct work_struct *work)
 		/* never expect read or write chunks, always reply chunks */
 		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
 		    headerp->rm_body.rm_chunks[1] != xdr_zero ||
-		    headerp->rm_body.rm_chunks[2] != xdr_one ||
-		    list_empty(&req->rl_registered))
+		    headerp->rm_body.rm_chunks[2] != xdr_one)
 			goto badheader;
 		iptr = (__be32 *)((unsigned char *)headerp +
 							RPCRDMA_HDRLEN_MIN);
@@ -1118,8 +1116,8 @@ rpcrdma_reply_handler(struct work_struct *work)
 	 * control: waking the next RPC waits until this RPC has
 	 * relinquished all its Send Queue entries.
 	 */
-	if (!list_empty(&req->rl_registered))
-		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
+	if (!list_empty(&mws))
+		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
 
 	spin_lock_bh(&xprt->transport_lock);
 	cwnd = xprt->cwnd;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2e027335fcbc7..1c23117bf1b0d 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -467,7 +467,7 @@ struct rpcrdma_memreg_ops {
 				  struct rpcrdma_mr_seg *, int, bool,
 				  struct rpcrdma_mw **);
 	void		(*ro_unmap_sync)(struct rpcrdma_xprt *,
-					 struct rpcrdma_req *);
+					 struct list_head *);
 	void		(*ro_unmap_safe)(struct rpcrdma_xprt *,
 					 struct rpcrdma_req *, bool);
 	void		(*ro_recover_mr)(struct rpcrdma_mw *);
-- 
GitLab


From a80d66c9e0d1ac31fa3427340efa0bf79b338023 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:52:12 -0400
Subject: [PATCH 1478/1958] xprtrdma: Rename rpcrdma_req::rl_free

Clean up: I'm about to use the rl_free field for purposes other than
a free list. So use a more generic name.

This is a refactoring change only.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=305
Fixes: 68791649a725 ('xprtrdma: Invalidate in the RPC reply ... ')
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c     | 9 ++++-----
 net/sunrpc/xprtrdma/xprt_rdma.h | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index a8be66d806dcc..df72224604d2d 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -971,7 +971,6 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
 	if (req == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&req->rl_free);
 	spin_lock(&buffer->rb_reqslock);
 	list_add(&req->rl_all, &buffer->rb_allreqs);
 	spin_unlock(&buffer->rb_reqslock);
@@ -1055,7 +1054,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 			goto out;
 		}
 		req->rl_backchannel = false;
-		list_add(&req->rl_free, &buf->rb_send_bufs);
+		list_add(&req->rl_list, &buf->rb_send_bufs);
 	}
 
 	INIT_LIST_HEAD(&buf->rb_recv_bufs);
@@ -1084,8 +1083,8 @@ rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
 	struct rpcrdma_req *req;
 
 	req = list_first_entry(&buf->rb_send_bufs,
-			       struct rpcrdma_req, rl_free);
-	list_del(&req->rl_free);
+			       struct rpcrdma_req, rl_list);
+	list_del(&req->rl_list);
 	return req;
 }
 
@@ -1268,7 +1267,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
 
 	spin_lock(&buffers->rb_lock);
 	buffers->rb_send_count--;
-	list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
+	list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
 	if (rep) {
 		buffers->rb_recv_count--;
 		list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1c23117bf1b0d..ad918c840fc71 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -340,7 +340,7 @@ enum {
 
 struct rpcrdma_buffer;
 struct rpcrdma_req {
-	struct list_head	rl_free;
+	struct list_head	rl_list;
 	unsigned int		rl_mapped_sges;
 	unsigned int		rl_connect_cookie;
 	struct rpcrdma_buffer	*rl_buffer;
-- 
GitLab


From 431af645cf662652bc43c7a26f87cb40aedb01d9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:52:20 -0400
Subject: [PATCH 1479/1958] xprtrdma: Fix client lock-up after application
 signal fires

After a signal, the RPC client aborts synchronous RPCs running on
behalf of the signaled application.

The server is still executing those RPCs, and will write the results
back into the client's memory when it's done. By the time the server
writes the results, that memory is likely being used for other
purposes. Therefore xprtrdma has to immediately invalidate all
memory regions used by those aborted RPCs to prevent the server's
writes from clobbering that re-used memory.

With FMR memory registration, invalidation takes a relatively long
time. In fact, the invalidation is often still running when the
server tries to write the results into the memory regions that are
being invalidated.

This sets up a race between two processes:

1.  After the signal, xprt_rdma_free calls ro_unmap_safe.
2.  While ro_unmap_safe is still running, the server replies and
    rpcrdma_reply_handler runs, calling ro_unmap_sync.

Both processes invoke ib_unmap_fmr on the same FMR.

The mlx4 driver allows two ib_unmap_fmr calls on the same FMR at
the same time, but HCAs generally don't tolerate this. Sometimes
this can result in a system crash.

If the HCA happens to survive, rpcrdma_reply_handler continues. It
removes the rpc_rqst from rq_list and releases the transport_lock.
This enables xprt_rdma_free to run in another process, and the
rpc_rqst is released while rpcrdma_reply_handler is still waiting
for the ib_unmap_fmr call to finish.

But further down in rpcrdma_reply_handler, the transport_lock is
taken again, and "rqst" is dereferenced. If "rqst" has already been
released, this triggers a general protection fault. Since bottom-
halves are disabled, the system locks up.

Address both issues by reversing the order of the xprt_lookup_rqst
call and the ro_unmap_sync call. Introduce a separate lookup
mechanism for rpcrdma_req's to enable calling ro_unmap_sync before
xprt_lookup_rqst. Now the handler takes the transport_lock once
and holds it for the XID lookup and RPC completion.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=305
Fixes: 68791649a725 ('xprtrdma: Invalidate in the RPC reply ... ')
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  | 79 +++++++++++++++++++++------------
 net/sunrpc/xprtrdma/transport.c |  3 +-
 net/sunrpc/xprtrdma/verbs.c     |  3 +-
 net/sunrpc/xprtrdma/xprt_rdma.h | 30 +++++++++++++
 4 files changed, 84 insertions(+), 31 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c88132d02fb81..b6584ae8e2517 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -734,6 +734,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 		rpclen = 0;
 	}
 
+	req->rl_xid = rqst->rq_xid;
+	rpcrdma_insert_req(&r_xprt->rx_buf, req);
+
 	/* This implementation supports the following combinations
 	 * of chunk lists in one RPC-over-RDMA Call message:
 	 *
@@ -987,11 +990,12 @@ rpcrdma_reply_handler(struct work_struct *work)
 {
 	struct rpcrdma_rep *rep =
 			container_of(work, struct rpcrdma_rep, rr_work);
+	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 	struct rpcrdma_msg *headerp;
 	struct rpcrdma_req *req;
 	struct rpc_rqst *rqst;
-	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
-	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 	__be32 *iptr;
 	int rdmalen, status, rmerr;
 	unsigned long cwnd;
@@ -1013,28 +1017,45 @@ rpcrdma_reply_handler(struct work_struct *work)
 	/* Match incoming rpcrdma_rep to an rpcrdma_req to
 	 * get context for handling any incoming chunks.
 	 */
-	spin_lock_bh(&xprt->transport_lock);
-	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
-	if (!rqst)
+	spin_lock(&buf->rb_lock);
+	req = rpcrdma_lookup_req_locked(&r_xprt->rx_buf,
+					headerp->rm_xid);
+	if (!req)
 		goto out_nomatch;
-
-	req = rpcr_to_rdmar(rqst);
 	if (req->rl_reply)
 		goto out_duplicate;
 
-	/* Sanity checking has passed. We are now committed
-	 * to complete this transaction.
-	 */
 	list_replace_init(&req->rl_registered, &mws);
 	rpcrdma_mark_remote_invalidation(&mws, rep);
-	list_del_init(&rqst->rq_list);
+
+	/* Avoid races with signals and duplicate replies
+	 * by marking this req as matched.
+	 */
 	req->rl_reply = rep;
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&buf->rb_lock);
+
 	dprintk("RPC:       %s: reply %p completes request %p (xid 0x%08x)\n",
 		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
 
-	xprt->reestablish_timeout = 0;
+	/* Invalidate and unmap the data payloads before waking the
+	 * waiting application. This guarantees the memory regions
+	 * are properly fenced from the server before the application
+	 * accesses the data. It also ensures proper send flow control:
+	 * waking the next RPC waits until this RPC has relinquished
+	 * all its Send Queue entries.
+	 */
+	if (!list_empty(&mws))
+		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
 
+	/* Perform XID lookup, reconstruction of the RPC reply, and
+	 * RPC completion while holding the transport lock to ensure
+	 * the rep, rqst, and rq_task pointers remain stable.
+	 */
+	spin_lock_bh(&xprt->transport_lock);
+	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+	if (!rqst)
+		goto out_norqst;
+	xprt->reestablish_timeout = 0;
 	if (headerp->rm_vers != rpcrdma_version)
 		goto out_badversion;
 
@@ -1109,17 +1130,6 @@ rpcrdma_reply_handler(struct work_struct *work)
 	}
 
 out:
-	/* Invalidate and flush the data payloads before waking the
-	 * waiting application. This guarantees the memory region is
-	 * properly fenced from the server before the application
-	 * accesses the data. It also ensures proper send flow
-	 * control: waking the next RPC waits until this RPC has
-	 * relinquished all its Send Queue entries.
-	 */
-	if (!list_empty(&mws))
-		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
-
-	spin_lock_bh(&xprt->transport_lock);
 	cwnd = xprt->cwnd;
 	xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
 	if (xprt->cwnd > cwnd)
@@ -1128,7 +1138,7 @@ rpcrdma_reply_handler(struct work_struct *work)
 	xprt_complete_rqst(rqst->rq_task, status);
 	spin_unlock_bh(&xprt->transport_lock);
 	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
-			__func__, xprt, rqst, status);
+		__func__, xprt, rqst, status);
 	return;
 
 out_badstatus:
@@ -1177,26 +1187,37 @@ rpcrdma_reply_handler(struct work_struct *work)
 	r_xprt->rx_stats.bad_reply_count++;
 	goto out;
 
-/* If no pending RPC transaction was matched, post a replacement
- * receive buffer before returning.
+/* The req was still available, but by the time the transport_lock
+ * was acquired, the rqst and task had been released. Thus the RPC
+ * has already been terminated.
  */
+out_norqst:
+	spin_unlock_bh(&xprt->transport_lock);
+	rpcrdma_buffer_put(req);
+	dprintk("RPC:       %s: race, no rqst left for req %p\n",
+		__func__, req);
+	return;
+
 out_shortreply:
 	dprintk("RPC:       %s: short/invalid reply\n", __func__);
 	goto repost;
 
 out_nomatch:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&buf->rb_lock);
 	dprintk("RPC:       %s: no match for incoming xid 0x%08x len %d\n",
 		__func__, be32_to_cpu(headerp->rm_xid),
 		rep->rr_len);
 	goto repost;
 
 out_duplicate:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&buf->rb_lock);
 	dprintk("RPC:       %s: "
 		"duplicate reply %p to RPC request %p: xid 0x%08x\n",
 		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
 
+/* If no pending RPC transaction was matched, post a replacement
+ * receive buffer before returning.
+ */
 repost:
 	r_xprt->rx_stats.bad_reply_count++;
 	if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 62ecbccd9748e..d1c458e5ec4de 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -684,7 +684,8 @@ xprt_rdma_free(struct rpc_task *task)
 
 	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
 
-	if (unlikely(!list_empty(&req->rl_registered)))
+	rpcrdma_remove_req(&r_xprt->rx_buf, req);
+	if (!list_empty(&req->rl_registered))
 		ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
 	rpcrdma_unmap_sges(ia, req);
 	rpcrdma_buffer_put(req);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index df72224604d2d..a215a8759dc20 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1032,6 +1032,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 	spin_lock_init(&buf->rb_recovery_lock);
 	INIT_LIST_HEAD(&buf->rb_mws);
 	INIT_LIST_HEAD(&buf->rb_all);
+	INIT_LIST_HEAD(&buf->rb_pending);
 	INIT_LIST_HEAD(&buf->rb_stale_mrs);
 	INIT_DELAYED_WORK(&buf->rb_refresh_worker,
 			  rpcrdma_mr_refresh_worker);
@@ -1084,7 +1085,7 @@ rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
 
 	req = list_first_entry(&buf->rb_send_bufs,
 			       struct rpcrdma_req, rl_list);
-	list_del(&req->rl_list);
+	list_del_init(&req->rl_list);
 	return req;
 }
 
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ad918c840fc71..b282d3f8cdd80 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -341,6 +341,7 @@ enum {
 struct rpcrdma_buffer;
 struct rpcrdma_req {
 	struct list_head	rl_list;
+	__be32			rl_xid;
 	unsigned int		rl_mapped_sges;
 	unsigned int		rl_connect_cookie;
 	struct rpcrdma_buffer	*rl_buffer;
@@ -402,6 +403,7 @@ struct rpcrdma_buffer {
 	int			rb_send_count, rb_recv_count;
 	struct list_head	rb_send_bufs;
 	struct list_head	rb_recv_bufs;
+	struct list_head	rb_pending;
 	u32			rb_max_requests;
 	atomic_t		rb_credits;	/* most recent credit grant */
 
@@ -550,6 +552,34 @@ void rpcrdma_destroy_req(struct rpcrdma_req *);
 int rpcrdma_buffer_create(struct rpcrdma_xprt *);
 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
 
+static inline void
+rpcrdma_insert_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
+{
+	spin_lock(&buffers->rb_lock);
+	if (list_empty(&req->rl_list))
+		list_add_tail(&req->rl_list, &buffers->rb_pending);
+	spin_unlock(&buffers->rb_lock);
+}
+
+static inline struct rpcrdma_req *
+rpcrdma_lookup_req_locked(struct rpcrdma_buffer *buffers, __be32 xid)
+{
+	struct rpcrdma_req *pos;
+
+	list_for_each_entry(pos, &buffers->rb_pending, rl_list)
+		if (pos->rl_xid == xid)
+			return pos;
+	return NULL;
+}
+
+static inline void
+rpcrdma_remove_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
+{
+	spin_lock(&buffers->rb_lock);
+	list_del(&req->rl_list);
+	spin_unlock(&buffers->rb_lock);
+}
+
 struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
 void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
 struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
-- 
GitLab


From 8d75483a232aea9c3224b8146edf45b3bbb552fd Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:52:28 -0400
Subject: [PATCH 1480/1958] xprtrdma: Fix FRWR invalidation error recovery

When ib_post_send() fails, all LOCAL_INV WRs past @bad_wr have to be
examined, and the MRs reset by hand.

I'm not sure how the existing code can work by comparing R_keys.
Restructure the logic so that instead it walks the chain of WRs,
starting from the first bad one.

Make sure to wait for completion if at least one WR was actually
posted. Otherwise, if the ib_post_send fails, we can end up
DMA-unmapping the MR while LOCAL_INV operations are in flight.

Commit 7a89f9c626e3 ("xprtrdma: Honor ->send_request API contract")
added the rdma_disconnect() call site. The disconnect actually
causes more problems than it solves, and SQ overruns happen only as
a result of software bugs. So remove it.

Fixes: d7a21c1bed54 ("xprtrdma: Reset MRs in frwr_op_unmap_sync()")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/frwr_ops.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 97f9f85fa5c1a..24631e0edadbb 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -521,12 +521,13 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 	 * unless ri_id->qp is a valid pointer.
 	 */
 	r_xprt->rx_stats.local_inv_needed++;
+	bad_wr = NULL;
 	rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
+	if (bad_wr != first)
+		wait_for_completion(&f->fr_linv_done);
 	if (rc)
 		goto reset_mrs;
 
-	wait_for_completion(&f->fr_linv_done);
-
 	/* ORDER: Now DMA unmap all of the MRs, and return
 	 * them to the free MW list.
 	 */
@@ -543,17 +544,19 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 
 reset_mrs:
 	pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
-	rdma_disconnect(ia->ri_id);
 
 	/* Find and reset the MRs in the LOCAL_INV WRs that did not
-	 * get posted. This is synchronous, and slow.
+	 * get posted.
 	 */
-	list_for_each_entry(mw, mws, mw_list) {
-		f = &mw->frmr;
-		if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
-			__frwr_reset_mr(ia, mw);
-			bad_wr = bad_wr->next;
-		}
+	rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
+	while (bad_wr) {
+		f = container_of(bad_wr, struct rpcrdma_frmr,
+				 fr_invwr);
+		mw = container_of(f, struct rpcrdma_mw, frmr);
+
+		__frwr_reset_mr(ia, mw);
+
+		bad_wr = bad_wr->next;
 	}
 	goto unmap;
 }
-- 
GitLab


From 1f541895dae956df0be14a3190f507b601ac3fcc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:52:36 -0400
Subject: [PATCH 1481/1958] xprtrdma: Don't defer MR recovery if ro_map fails

Deferred MR recovery does a DMA-unmapping of the MW. However, ro_map
invokes rpcrdma_defer_mr_recovery in some error cases where the MW
has not even been DMA-mapped yet.

Avoid a DMA-unmapping error replacing rpcrdma_defer_mr_recovery.

Also note that if ib_dma_map_sg is asked to map 0 nents, it will
return 0. So the extra "if (i == 0)" check is no longer needed.

Fixes: 42fe28f60763 ("xprtrdma: Do not leak an MW during a DMA ...")
Fixes: 505bbe64dd04 ("xprtrdma: Refactor MR recovery work queues")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/fmr_ops.c  | 18 +++++++++---------
 net/sunrpc/xprtrdma/frwr_ops.c | 19 ++++++++-----------
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 5556ed99b4b7b..2f4eacdde3f2b 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -213,13 +213,11 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
 			break;
 	}
-	mw->mw_nents = i;
 	mw->mw_dir = rpcrdma_data_dir(writing);
-	if (i == 0)
-		goto out_dmamap_err;
 
-	if (!ib_dma_map_sg(r_xprt->rx_ia.ri_device,
-			   mw->mw_sg, mw->mw_nents, mw->mw_dir))
+	mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
+				     mw->mw_sg, i, mw->mw_dir);
+	if (!mw->mw_nents)
 		goto out_dmamap_err;
 
 	for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
@@ -237,16 +235,18 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	return mw->mw_nents;
 
 out_dmamap_err:
-	pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
-	       mw->mw_sg, mw->mw_nents);
-	rpcrdma_defer_mr_recovery(mw);
+	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
+	       mw->mw_sg, i);
+	rpcrdma_put_mw(r_xprt, mw);
 	return -EIO;
 
 out_maperr:
 	pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
 	       len, (unsigned long long)dma_pages[0],
 	       pageoff, mw->mw_nents, rc);
-	rpcrdma_defer_mr_recovery(mw);
+	ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+			mw->mw_sg, mw->mw_nents, mw->mw_dir);
+	rpcrdma_put_mw(r_xprt, mw);
 	return -EIO;
 }
 
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 24631e0edadbb..8f63d38e45a2a 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -355,7 +355,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	struct ib_mr *mr;
 	struct ib_reg_wr *reg_wr;
 	struct ib_send_wr *bad_wr;
-	int rc, i, n, dma_nents;
+	int rc, i, n;
 	u8 key;
 
 	mw = NULL;
@@ -391,14 +391,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
 			break;
 	}
-	mw->mw_nents = i;
 	mw->mw_dir = rpcrdma_data_dir(writing);
-	if (i == 0)
-		goto out_dmamap_err;
 
-	dma_nents = ib_dma_map_sg(ia->ri_device,
-				  mw->mw_sg, mw->mw_nents, mw->mw_dir);
-	if (!dma_nents)
+	mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
+	if (!mw->mw_nents)
 		goto out_dmamap_err;
 
 	n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
@@ -436,13 +432,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	return mw->mw_nents;
 
 out_dmamap_err:
-	pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
-	       mw->mw_sg, mw->mw_nents);
-	rpcrdma_defer_mr_recovery(mw);
+	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
+	       mw->mw_sg, i);
+	frmr->fr_state = FRMR_IS_INVALID;
+	rpcrdma_put_mw(r_xprt, mw);
 	return -EIO;
 
 out_mapmr_err:
-	pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
+	pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
 	       frmr->fr_mr, n, mw->mw_nents);
 	rpcrdma_defer_mr_recovery(mw);
 	return -EIO;
-- 
GitLab


From 8dcbec6d20eb881ba368d0aebc3a8a678aebb1da Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:52:44 -0400
Subject: [PATCH 1482/1958] NFSv4.1: Handle EXCHGID4_FLAG_CONFIRMED_R during
 NFSv4.1 migration

Transparent State Migration copies a client's lease state from the
server where a filesystem used to reside to the server where it now
resides. When an NFSv4.1 client first contacts that destination
server, it uses EXCHANGE_ID to detect trunking relationships.

The lease that was copied there is returned to that client, but the
destination server sets EXCHGID4_FLAG_CONFIRMED_R when replying to
the client. This is because the lease was confirmed on the source
server (before it was copied).

Normally, when CONFIRMED_R is set, a client purges the lease and
creates a new one. However, that throws away the entire benefit of
Transparent State Migration.

Therefore, the client must not purge that lease when it is possible
that Transparent State Migration has occurred.

Reported-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4client.c       |  5 +++++
 fs/nfs/nfs4state.c        | 16 +++++++++++-----
 include/linux/nfs_fs_sb.h |  2 ++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 66776f0221113..50566acb54694 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -414,6 +414,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	if (clp != old)
 		clp->cl_preserve_clid = true;
 	nfs_put_client(clp);
+	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
 	return old;
 
 error:
@@ -852,6 +853,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 	if (server->options & NFS_OPTION_MIGRATION)
 		set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
+	if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
+		set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
 
 	/* Allocate or find a client reference we can use */
 	clp = nfs_get_client(&cl_init);
@@ -1212,9 +1215,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 		return -EAFNOSUPPORT;
 
 	nfs_server_remove_lists(server);
+	set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion, net);
+	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	nfs_put_client(clp);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b34de036501bc..83f8e60bb1e3f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -352,11 +352,17 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
 	if (clp != *result)
 		return 0;
 
-	/* Purge state if the client id was established in a prior instance */
-	if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
-		set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
-	else
-		set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+	/*
+	 * Purge state if the client id was established in a prior
+	 * instance and the client id could not have arrived on the
+	 * server via Transparent State Migration.
+	 */
+	if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) {
+		if (!test_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags))
+			set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+		else
+			set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+	}
 	nfs4_schedule_state_manager(clp);
 	status = nfs_wait_client_init_complete(clp);
 	if (status < 0)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e418a1096662c..74c44665e6d33 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -42,6 +42,7 @@ struct nfs_client {
 #define NFS_CS_MIGRATION	2		/* - transparent state migr */
 #define NFS_CS_INFINITE_SLOTS	3		/* - don't limit TCP slots */
 #define NFS_CS_NO_RETRANS_TIMEOUT	4	/* - Disable retransmit timeouts */
+#define NFS_CS_TSM_POSSIBLE	5		/* - Maybe state migration */
 	struct sockaddr_storage	cl_addr;	/* server identifier */
 	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
@@ -210,6 +211,7 @@ struct nfs_server {
 	unsigned long		mig_status;
 #define NFS_MIG_IN_TRANSITION		(1)
 #define NFS_MIG_FAILED			(2)
+#define NFS_MIG_TSM_POSSIBLE		(3)
 
 	void (*destroy)(struct nfs_server *);
 
-- 
GitLab


From 838edb94977b96f191800d7ce39b2505419aac4a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:52:52 -0400
Subject: [PATCH 1483/1958] NFSv4.1: Use seqid returned by EXCHANGE_ID after
 state migration

Transparent State Migration copies a client's lease state from the
server where a filesystem used to reside to the server where it now
resides. When an NFSv4.1 client first contacts that destination
server, it uses EXCHANGE_ID to detect trunking relationships.

The lease that was copied there is returned to that client, but the
destination server sets EXCHGID4_FLAG_CONFIRMED_R when replying to
the client. This is because the lease was confirmed on the source
server (before it was copied).

When CONFIRMED_R is set, the client throws away the sequence ID
returned by the server. During a Transparent State Migration, however
there's no other way for the client to know what sequence ID to use
with a lease that's been migrated.

Therefore, the client must save and use the contrived slot sequence
value returned by the destination server even when CONFIRMED_R is
set.

Note that some servers always return a seqid of 1 after a migration.

Reported-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e678fa8f69793..01dcd4c8dc4f0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7378,12 +7378,11 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data)
 	if (status == 0) {
 		clp->cl_clientid = cdata->res.clientid;
 		clp->cl_exchange_flags = cdata->res.flags;
+		clp->cl_seqid = cdata->res.seqid;
 		/* Client ID is not confirmed */
-		if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
+		if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R))
 			clear_bit(NFS4_SESSION_ESTABLISHED,
-			&clp->cl_session->session_state);
-			clp->cl_seqid = cdata->res.seqid;
-		}
+				  &clp->cl_session->session_state);
 
 		kfree(clp->cl_serverowner);
 		clp->cl_serverowner = cdata->res.server_owner;
-- 
GitLab


From 173b8f49b3af3d5102168793436dc17b94476b74 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:53:00 -0400
Subject: [PATCH 1484/1958] xprtrdma: Demote "connect" log messages

Some have complained about the log messages generated when xprtrdma
opens or closes a connection to a server. When an NFS mount is
mostly idle these can appear every few minutes as the client idles
out the connection and reconnects.

Connection and disconnection is a normal part of operation, and not
exceptional, so change these to dprintk's for now. At some point
all of these will be converted to tracepoints, but that's for
another day.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 44 +++++++------------------------------
 1 file changed, 8 insertions(+), 36 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index a215a8759dc20..e4171f2abe37d 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -243,8 +243,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
 #endif
-	struct ib_qp_attr *attr = &ia->ri_qp_attr;
-	struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
 	int connstate = 0;
 
 	switch (event->event) {
@@ -267,7 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-		pr_info("rpcrdma: removing device for %pIS:%u\n",
+		pr_info("rpcrdma: removing device %s for %pIS:%u\n",
+			ia->ri_device->name,
 			sap, rpc_get_port(sap));
 #endif
 		set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
@@ -282,13 +281,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		return 1;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		connstate = 1;
-		ib_query_qp(ia->ri_id->qp, attr,
-			    IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
-			    iattr);
-		dprintk("RPC:       %s: %d responder resources"
-			" (%d initiator)\n",
-			__func__, attr->max_dest_rd_atomic,
-			attr->max_rd_atomic);
 		rpcrdma_update_connect_private(xprt, &event->param.conn);
 		goto connected;
 	case RDMA_CM_EVENT_CONNECT_ERROR:
@@ -298,11 +290,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		connstate = -ENETDOWN;
 		goto connected;
 	case RDMA_CM_EVENT_REJECTED:
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-		pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
-			sap, rpc_get_port(sap), ia->ri_device->name,
+		dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
+			sap, rpc_get_port(sap),
 			rdma_reject_msg(id, event->status));
-#endif
 		connstate = -ECONNREFUSED;
 		if (event->status == IB_CM_REJ_STALE_CONN)
 			connstate = -EAGAIN;
@@ -310,37 +300,19 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 	case RDMA_CM_EVENT_DISCONNECTED:
 		connstate = -ECONNABORTED;
 connected:
-		dprintk("RPC:       %s: %sconnected\n",
-					__func__, connstate > 0 ? "" : "dis");
 		atomic_set(&xprt->rx_buf.rb_credits, 1);
 		ep->rep_connected = connstate;
 		rpcrdma_conn_func(ep);
 		wake_up_all(&ep->rep_connect_wait);
 		/*FALLTHROUGH*/
 	default:
-		dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
-			__func__, sap, rpc_get_port(sap), ep,
-			rdma_event_msg(event->event));
+		dprintk("RPC:       %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
+			__func__, sap, rpc_get_port(sap),
+			ia->ri_device->name, ia->ri_ops->ro_displayname,
+			ep, rdma_event_msg(event->event));
 		break;
 	}
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	if (connstate == 1) {
-		int ird = attr->max_dest_rd_atomic;
-		int tird = ep->rep_remote_cma.responder_resources;
-
-		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
-			sap, rpc_get_port(sap),
-			ia->ri_device->name,
-			ia->ri_ops->ro_displayname,
-			xprt->rx_buf.rb_max_requests,
-			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
-	} else if (connstate < 0) {
-		pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
-			sap, rpc_get_port(sap), connstate);
-	}
-#endif
-
 	return 0;
 }
 
-- 
GitLab


From e2f6ef09156284e35f780be47559809753d4b499 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:53:08 -0400
Subject: [PATCH 1485/1958] xprtrdma: FMR does not need list_del_init()

Clean up.

Commit 38f1932e60ba ("xprtrdma: Remove FMRs from the unmap list
after unmapping") utilized list_del_init() to try to prevent some
list corruption. The corruption was actually caused by the reply
handler racing with a signal. Now that MR invalidation is properly
serialized, list_del_init() can safely be replaced.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/fmr_ops.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 2f4eacdde3f2b..d3f84bb1d4435 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -91,7 +91,7 @@ __fmr_unmap(struct rpcrdma_mw *mw)
 
 	list_add(&mw->fmr.fm_mr->list, &l);
 	rc = ib_unmap_fmr(&l);
-	list_del_init(&mw->fmr.fm_mr->list);
+	list_del(&mw->fmr.fm_mr->list);
 	return rc;
 }
 
@@ -261,7 +261,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 static void
 fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 {
-	struct rpcrdma_mw *mw, *tmp;
+	struct rpcrdma_mw *mw;
 	LIST_HEAD(unmap_list);
 	int rc;
 
@@ -283,9 +283,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 	/* ORDER: Now DMA unmap all of the req's MRs, and return
 	 * them to the free MW list.
 	 */
-	list_for_each_entry_safe(mw, tmp, mws, mw_list) {
-		list_del_init(&mw->mw_list);
-		list_del_init(&mw->fmr.fm_mr->list);
+	while (!list_empty(mws)) {
+		mw = rpcrdma_pop_mw(mws);
+		dprintk("RPC:       %s: DMA unmapping fmr %p\n",
+			__func__, &mw->fmr);
+		list_del(&mw->fmr.fm_mr->list);
 		ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
 				mw->mw_sg, mw->mw_nents, mw->mw_dir);
 		rpcrdma_put_mw(r_xprt, mw);
@@ -296,9 +298,9 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 out_reset:
 	pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
 
-	list_for_each_entry_safe(mw, tmp, mws, mw_list) {
-		list_del_init(&mw->mw_list);
-		list_del_init(&mw->fmr.fm_mr->list);
+	while (!list_empty(mws)) {
+		mw = rpcrdma_pop_mw(mws);
+		list_del(&mw->fmr.fm_mr->list);
 		fmr_op_recover_mr(mw);
 	}
 }
-- 
GitLab


From d933cc32019063fc4beb8b20528ca724bd1b7a52 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:53:16 -0400
Subject: [PATCH 1486/1958] xprtrdma: Replace PAGE_MASK with offset_in_page()

Clean up.

Reported by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index b6584ae8e2517..ca4d6e4528f32 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -141,7 +141,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
 
 	if (xdr->page_len) {
 		remaining = xdr->page_len;
-		offset = xdr->page_base & ~PAGE_MASK;
+		offset = offset_in_page(xdr->page_base);
 		count = 0;
 		while (remaining) {
 			remaining -= min_t(unsigned int,
@@ -222,7 +222,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
 
 	len = xdrbuf->page_len;
 	ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
-	page_base = xdrbuf->page_base & ~PAGE_MASK;
+	page_base = offset_in_page(xdrbuf->page_base);
 	p = 0;
 	while (len && n < RPCRDMA_MAX_SEGS) {
 		if (!ppages[p]) {
@@ -540,7 +540,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 			goto out;
 
 		page = virt_to_page(xdr->tail[0].iov_base);
-		page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+		page_base = offset_in_page(xdr->tail[0].iov_base);
 
 		/* If the content in the page list is an odd length,
 		 * xdr_write_pages() has added a pad at the beginning
@@ -557,7 +557,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	 */
 	if (xdr->page_len) {
 		ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
-		page_base = xdr->page_base & ~PAGE_MASK;
+		page_base = offset_in_page(xdr->page_base);
 		remaining = xdr->page_len;
 		while (remaining) {
 			sge_no++;
@@ -587,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	 */
 	if (xdr->tail[0].iov_len) {
 		page = virt_to_page(xdr->tail[0].iov_base);
-		page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+		page_base = offset_in_page(xdr->tail[0].iov_base);
 		len = xdr->tail[0].iov_len;
 
 map_tail:
@@ -878,9 +878,9 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
 	srcp += curlen;
 	copy_len -= curlen;
 
-	page_base = rqst->rq_rcv_buf.page_base;
-	ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
-	page_base &= ~PAGE_MASK;
+	ppages = rqst->rq_rcv_buf.pages +
+		(rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
+	page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
 	fixup_copy_count = 0;
 	if (copy_len && rqst->rq_rcv_buf.page_len) {
 		int pagelist_len;
-- 
GitLab


From 6afafa7799cf6fa3c0efb6887704506d21965ad6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 8 Jun 2017 11:53:24 -0400
Subject: [PATCH 1487/1958] xprtrdma: Fix documenting comments in frwr_ops.c

Clean up.

FASTREG and LOCAL_INV WRs are typically not signaled. localinv_wake
is used for the last LOCAL_INV WR in a chain, which is always
signaled. The documenting comments should reflect that.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/frwr_ops.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 8f63d38e45a2a..6aea36a38bfdc 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -277,7 +277,7 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
 }
 
 /**
- * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC
+ * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
  * @cq:	completion queue (ignored)
  * @wc:	completed WR
  *
@@ -298,7 +298,7 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
 }
 
 /**
- * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
+ * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
  * @cq:	completion queue (ignored)
  * @wc:	completed WR
  *
@@ -319,7 +319,7 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
 }
 
 /**
- * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
+ * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC
  * @cq:	completion queue (ignored)
  * @wc:	completed WR
  *
-- 
GitLab


From 1ee48bdd22440c0b45f215fc41e16032383c1c81 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Thu, 6 Jul 2017 09:43:02 -0400
Subject: [PATCH 1488/1958] NFSv4.2 fix size storage for nfs42_proc_copy

Return size of COPY is u64 but it was assigned to an "int" status.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 319a47db218d1..6c2db51e67a77 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -146,7 +146,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	loff_t pos_src = args->src_pos;
 	loff_t pos_dst = args->dst_pos;
 	size_t count = args->count;
-	int status;
+	ssize_t status;
 
 	status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
 				     src_lock, FMODE_READ);
-- 
GitLab


From 15a8b93fd5690de017ce665382ea45e5d61811a4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 10 Jun 2017 04:59:07 +0200
Subject: [PATCH 1489/1958] sunrpc: use constant time memory comparison for mac

Otherwise, we enable a MAC forgery via timing attack.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: linux-nfs@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index fb39284ec1741..12649c9fedaba 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,6 +34,7 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  */
 
+#include <crypto/algapi.h>
 #include <crypto/hash.h>
 #include <crypto/skcipher.h>
 #include <linux/err.h>
@@ -927,7 +928,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	if (ret)
 		goto out_err;
 
-	if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+	if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
 		ret = GSS_S_BAD_SIG;
 		goto out_err;
 	}
-- 
GitLab


From 774d9513a3f29048cce3ed5df3b0a0da9897678c Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Thu, 29 Jun 2017 06:34:50 -0700
Subject: [PATCH 1490/1958] nfs: replace d_add with d_splice_alias in
 atomic_open

It's a trival change but follows knfsd export document that asks
for d_splice_alias during lookup.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 90bc4025ca3cb..1255891e56955 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1518,7 +1518,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		d_drop(dentry);
 		switch (err) {
 		case -ENOENT:
-			d_add(dentry, NULL);
+			d_splice_alias(NULL, dentry);
 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 			break;
 		case -EISDIR:
-- 
GitLab


From f174ff7a0ab6a097455a94abfc99517940041c07 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Thu, 29 Jun 2017 06:34:51 -0700
Subject: [PATCH 1491/1958] nfs: add a nfs_ilookup helper

This helper will allow to find an existing NFS inode by the file handle
and fattr.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
[hch: split from a larger patch]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/inode.c         | 22 ++++++++++++++++++++++
 include/linux/nfs_fs.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7e7a894601b9b..109279d6d91bd 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -386,6 +386,28 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
 #endif
 EXPORT_SYMBOL_GPL(nfs_setsecurity);
 
+/* Search for inode identified by fh, fileid and i_mode in inode cache. */
+struct inode *
+nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
+{
+	struct nfs_find_desc desc = {
+		.fh	= fh,
+		.fattr	= fattr,
+	};
+	struct inode *inode;
+	unsigned long hash;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_FILEID) ||
+	    !(fattr->valid & NFS_ATTR_FATTR_TYPE))
+		return NULL;
+
+	hash = nfs_fattr_to_ino_t(fattr);
+	inode = ilookup5(sb, hash, nfs_find_actor, &desc);
+
+	dprintk("%s: returning %p\n", __func__, inode);
+	return inode;
+}
+
 /*
  * This is our front-end to iget that looks up inodes by file handle
  * instead of inode number.
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bb0eb2c9acca7..e52cc55ac300f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -332,6 +332,7 @@ extern void nfs_zap_caches(struct inode *);
 extern void nfs_invalidate_atime(struct inode *);
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 				struct nfs_fattr *, struct nfs4_label *);
+struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *);
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
-- 
GitLab


From 00422483ad415d6267d36711f0e51f4bbbd653ed Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Thu, 29 Jun 2017 06:34:53 -0700
Subject: [PATCH 1492/1958] nfs: add export operations

This support for opening files on NFS by file handle, both through the
open_by_handle syscall, and for re-exporting NFS (for example using a
different version).  The support is very basic for now, as each open by
handle will have to do an NFSv4 open operation on the wire.  In the
future this will hopefully be mitigated by an open file cache, as well
as various optimizations in NFS for this specific case.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
[hch: incorporated various changes, resplit the patches, new changelog]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfs/Makefile   |   2 +-
 fs/nfs/export.c   | 177 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/internal.h |   2 +
 fs/nfs/super.c    |   2 +
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 fs/nfs/export.c

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 98f4e5728a67c..1fb118902d57b 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o
 CFLAGS_nfstrace.o += -I$(src)
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \
 			   io.o direct.o pagelist.o read.o symlink.o unlink.o \
-			   write.o namespace.o mount_clnt.o nfstrace.o
+			   write.o namespace.o mount_clnt.o nfstrace.o export.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_SYSCTL)	+= sysctl.o
 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
new file mode 100644
index 0000000000000..249cb96cc5b5c
--- /dev/null
+++ b/fs/nfs/export.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015, Primary Data, Inc. All rights reserved.
+ *
+ * Tao Peng <bergwolf@primarydata.com>
+ */
+#include <linux/dcache.h>
+#include <linux/exportfs.h>
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+
+#include "internal.h"
+#include "nfstrace.h"
+
+#define NFSDBG_FACILITY		NFSDBG_VFS
+
+enum {
+	FILEID_HIGH_OFF = 0,	/* inode fileid high */
+	FILEID_LOW_OFF,		/* inode fileid low */
+	FILE_I_TYPE_OFF,	/* inode type */
+	EMBED_FH_OFF		/* embeded server fh */
+};
+
+
+static struct nfs_fh *nfs_exp_embedfh(__u32 *p)
+{
+	return (struct nfs_fh *)(p + EMBED_FH_OFF);
+}
+
+/*
+ * Let's break subtree checking for now... otherwise we'll have to embed parent fh
+ * but there might not be enough space.
+ */
+static int
+nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
+{
+	struct nfs_fh *server_fh = NFS_FH(inode);
+	struct nfs_fh *clnt_fh = nfs_exp_embedfh(p);
+	size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+	int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+
+	dprintk("%s: max fh len %d inode %p parent %p",
+		__func__, *max_len, inode, parent);
+
+	if (*max_len < len || IS_AUTOMOUNT(inode)) {
+		dprintk("%s: fh len %d too small, required %d\n",
+			__func__, *max_len, len);
+		*max_len = len;
+		return FILEID_INVALID;
+	}
+	if (IS_AUTOMOUNT(inode)) {
+		*max_len = FILEID_INVALID;
+		goto out;
+	}
+
+	p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32;
+	p[FILEID_LOW_OFF] = NFS_FILEID(inode);
+	p[FILE_I_TYPE_OFF] = inode->i_mode & S_IFMT;
+	p[len - 1] = 0; /* Padding */
+	nfs_copy_fh(clnt_fh, server_fh);
+	*max_len = len;
+out:
+	dprintk("%s: result fh fileid %llu mode %u size %d\n",
+		__func__, NFS_FILEID(inode), inode->i_mode, *max_len);
+	return *max_len;
+}
+
+static struct dentry *
+nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+		 int fh_len, int fh_type)
+{
+	struct nfs4_label *label = NULL;
+	struct nfs_fattr *fattr = NULL;
+	struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
+	size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+	const struct nfs_rpc_ops *rpc_ops;
+	struct dentry *dentry;
+	struct inode *inode;
+	int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+	u32 *p = fid->raw;
+	int ret;
+
+	/* NULL translates to ESTALE */
+	if (fh_len < len || fh_type != len)
+		return NULL;
+
+	fattr = nfs_alloc_fattr();
+	if (fattr == NULL) {
+		dentry = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	fattr->fileid = ((u64)p[FILEID_HIGH_OFF] << 32) + p[FILEID_LOW_OFF];
+	fattr->mode = p[FILE_I_TYPE_OFF];
+	fattr->valid |= NFS_ATTR_FATTR_FILEID | NFS_ATTR_FATTR_TYPE;
+
+	dprintk("%s: fileid %llu mode %d\n", __func__, fattr->fileid, fattr->mode);
+
+	inode = nfs_ilookup(sb, fattr, server_fh);
+	if (inode)
+		goto out_found;
+
+	label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL);
+	if (IS_ERR(label)) {
+		dentry = ERR_CAST(label);
+		goto out_free_fattr;
+	}
+
+	rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops;
+	ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label);
+	if (ret) {
+		dprintk("%s: getattr failed %d\n", __func__, ret);
+		dentry = ERR_PTR(ret);
+		goto out_free_label;
+	}
+
+	inode = nfs_fhget(sb, server_fh, fattr, label);
+
+out_found:
+	dentry = d_obtain_alias(inode);
+
+out_free_label:
+	nfs4_label_free(label);
+out_free_fattr:
+	nfs_free_fattr(fattr);
+out:
+	return dentry;
+}
+
+static struct dentry *
+nfs_get_parent(struct dentry *dentry)
+{
+	int ret;
+	struct inode *inode = d_inode(dentry), *pinode;
+	struct super_block *sb = inode->i_sb;
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fattr *fattr = NULL;
+	struct nfs4_label *label = NULL;
+	struct dentry *parent;
+	struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops;
+	struct nfs_fh fh;
+
+	if (!ops->lookupp)
+		return ERR_PTR(-EACCES);
+
+	fattr = nfs_alloc_fattr();
+	if (fattr == NULL) {
+		parent = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	label = nfs4_label_alloc(server, GFP_KERNEL);
+	if (IS_ERR(label)) {
+		parent = ERR_CAST(label);
+		goto out_free_fattr;
+	}
+
+	ret = ops->lookupp(inode, &fh, fattr, label);
+	if (ret) {
+		parent = ERR_PTR(ret);
+		goto out_free_label;
+	}
+
+	pinode = nfs_fhget(sb, &fh, fattr, label);
+	parent = d_obtain_alias(pinode);
+out_free_label:
+	nfs4_label_free(label);
+out_free_fattr:
+	nfs_free_fattr(fattr);
+out:
+	return parent;
+}
+
+const struct export_operations nfs_export_ops = {
+	.encode_fh = nfs_encode_fh,
+	.fh_to_dentry = nfs_fh_to_dentry,
+	.get_parent = nfs_get_parent,
+};
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c5054edb01571..2ebd574989868 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -10,6 +10,8 @@
 
 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
 
+extern const struct export_operations nfs_export_ops;
+
 struct nfs_string;
 
 /* Maximum number of readahead requests
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b4176393f049f..b5271644b4726 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2339,6 +2339,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
 		 */
 		sb->s_flags |= MS_POSIXACL;
 		sb->s_time_gran = 1;
+		sb->s_export_op = &nfs_export_ops;
 	}
 
  	nfs_initialise_sb(sb);
@@ -2360,6 +2361,7 @@ static void nfs_clone_super(struct super_block *sb,
 	sb->s_xattr = old_sb->s_xattr;
 	sb->s_op = old_sb->s_op;
 	sb->s_time_gran = 1;
+	sb->s_export_op = old_sb->s_export_op;
 
 	if (server->nfs_client->rpc_ops->version != 2) {
 		/* The VFS shouldn't apply the umask to mode bits. We will do
-- 
GitLab


From 5b5faaf6df73412af0278997db36dbcb51011d9d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Thu, 29 Jun 2017 06:34:52 -0700
Subject: [PATCH 1493/1958] nfs4: add NFSv4 LOOKUPP handlers

This will be needed in order to implement the get_parent export op
for nfsd.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c       | 49 +++++++++++++++++++++++++++
 fs/nfs/nfs4trace.h      | 29 ++++++++++++++++
 fs/nfs/nfs4xdr.c        | 75 +++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4.h    |  1 +
 include/linux/nfs_xdr.h | 17 +++++++++-
 5 files changed, 170 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 01dcd4c8dc4f0..e1a26c653e786 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3805,6 +3805,54 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name,
 	return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
 }
 
+static int _nfs4_proc_lookupp(struct inode *inode,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+		struct nfs4_label *label)
+{
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_server *server = NFS_SERVER(inode);
+	int		       status;
+	struct nfs4_lookupp_arg args = {
+		.bitmask = server->attr_bitmask,
+		.fh = NFS_FH(inode),
+	};
+	struct nfs4_lookupp_res res = {
+		.server = server,
+		.fattr = fattr,
+		.label = label,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUPP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	args.bitmask = nfs4_bitmask(server, label);
+
+	nfs_fattr_init(fattr);
+
+	dprintk("NFS call  lookupp ino=0x%lx\n", inode->i_ino);
+	status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
+				&res.seq_res, 0);
+	dprintk("NFS reply lookupp: %d\n", status);
+	return status;
+}
+
+static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
+			     struct nfs_fattr *fattr, struct nfs4_label *label)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
+		trace_nfs4_lookupp(inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(inode), err,
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -9314,6 +9362,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
 	.lookup		= nfs4_proc_lookup,
+	.lookupp	= nfs4_proc_lookupp,
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
 	.create		= nfs4_proc_create,
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 845d0eadefc94..be1da19c65d61 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -891,6 +891,35 @@ DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove);
 DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations);
 DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo);
 
+TRACE_EVENT(nfs4_lookupp,
+		TP_PROTO(
+			const struct inode *inode,
+			int error
+		),
+
+		TP_ARGS(inode, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u64, ino)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->ino = NFS_FILEID(inode);
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) inode=%02x:%02x:%llu",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->ino
+		)
+);
+
 TRACE_EVENT(nfs4_rename,
 		TP_PROTO(
 			const struct inode *olddir,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 495d493d3a35b..fa3eb361d4f86 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -159,6 +159,8 @@ static int nfs4_stat_to_errno(int);
 				(op_decode_hdr_maxsz)
 #define encode_lookup_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
 #define decode_lookup_maxsz	(op_decode_hdr_maxsz)
+#define encode_lookupp_maxsz	(op_encode_hdr_maxsz)
+#define decode_lookupp_maxsz	(op_decode_hdr_maxsz)
 #define encode_share_access_maxsz \
 				(2)
 #define encode_createmode_maxsz	(1 + encode_attrs_maxsz + encode_verifier_maxsz)
@@ -618,6 +620,18 @@ static int nfs4_stat_to_errno(int);
 				decode_lookup_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
+#define NFS4_enc_lookupp_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
+				encode_putfh_maxsz + \
+				encode_lookupp_maxsz + \
+				encode_getattr_maxsz + \
+				encode_getfh_maxsz)
+#define NFS4_dec_lookupp_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz + \
+				decode_lookupp_maxsz + \
+				decode_getattr_maxsz + \
+				decode_getfh_maxsz)
 #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putrootfh_maxsz + \
@@ -1368,6 +1382,11 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
 	encode_string(xdr, name->len, name->name);
 }
 
+static void encode_lookupp(struct xdr_stream *xdr, struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_LOOKUPP, decode_lookupp_maxsz, hdr);
+}
+
 static void encode_share_access(struct xdr_stream *xdr, u32 share_access)
 {
 	__be32 *p;
@@ -2122,6 +2141,26 @@ static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode LOOKUPP request
+ */
+static void nfs4_xdr_enc_lookupp(struct rpc_rqst *req, struct xdr_stream *xdr,
+		const void *data)
+{
+	const struct nfs4_lookupp_arg *args = data;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_lookupp(xdr, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_nops(&hdr);
+}
+
 /*
  * Encode LOOKUP_ROOT request
  */
@@ -5058,6 +5097,11 @@ static int decode_lookup(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_LOOKUP);
 }
 
+static int decode_lookupp(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_LOOKUPP);
+}
+
 /* This is too sick! */
 static int decode_space_limit(struct xdr_stream *xdr,
 		unsigned long *pagemod_limit)
@@ -6237,6 +6281,36 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	return status;
 }
 
+/*
+ * Decode LOOKUPP response
+ */
+static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *data)
+{
+	struct nfs4_lookupp_res *res = data;
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_lookupp(xdr);
+	if (status)
+		goto out;
+	status = decode_getfh(xdr, res->fh);
+	if (status)
+		goto out;
+	status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+out:
+	return status;
+}
+
 /*
  * Decode LOOKUP_ROOT response
  */
@@ -7614,6 +7688,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
 	PROC(ACCESS,		enc_access,		dec_access),
 	PROC(GETATTR,		enc_getattr,		dec_getattr),
 	PROC(LOOKUP,		enc_lookup,		dec_lookup),
+	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
 	PROC(LOOKUP_ROOT,	enc_lookup_root,	dec_lookup_root),
 	PROC(REMOVE,		enc_remove,		dec_remove),
 	PROC(RENAME,		enc_rename,		dec_rename),
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 1b1ca04820a30..47239c3366882 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -479,6 +479,7 @@ enum {
 	NFSPROC4_CLNT_ACCESS,
 	NFSPROC4_CLNT_GETATTR,
 	NFSPROC4_CLNT_LOOKUP,
+	NFSPROC4_CLNT_LOOKUPP,
 	NFSPROC4_CLNT_LOOKUP_ROOT,
 	NFSPROC4_CLNT_REMOVE,
 	NFSPROC4_CLNT_RENAME,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 89093341f0761..ca3bcc4ed4e55 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1012,7 +1012,6 @@ struct nfs4_link_res {
 	struct nfs_fattr *		dir_attr;
 };
 
-
 struct nfs4_lookup_arg {
 	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		dir_fh;
@@ -1028,6 +1027,20 @@ struct nfs4_lookup_res {
 	struct nfs4_label		*label;
 };
 
+struct nfs4_lookupp_arg {
+	struct nfs4_sequence_args	seq_args;
+	const struct nfs_fh		*fh;
+	const u32			*bitmask;
+};
+
+struct nfs4_lookupp_res {
+	struct nfs4_sequence_res	seq_res;
+	const struct nfs_server		*server;
+	struct nfs_fattr		*fattr;
+	struct nfs_fh			*fh;
+	struct nfs4_label		*label;
+};
+
 struct nfs4_lookup_root_arg {
 	struct nfs4_sequence_args	seq_args;
 	const u32 *			bitmask;
@@ -1569,6 +1582,8 @@ struct nfs_rpc_ops {
 	int	(*lookup)  (struct inode *, const struct qstr *,
 			    struct nfs_fh *, struct nfs_fattr *,
 			    struct nfs4_label *);
+	int	(*lookupp) (struct inode *, struct nfs_fh *,
+			    struct nfs_fattr *, struct nfs4_label *);
 	int	(*access)  (struct inode *, struct nfs_access_entry *);
 	int	(*readlink)(struct inode *, struct page *, unsigned int,
 			    unsigned int);
-- 
GitLab


From 8fc646b44385ff0a9853f6590497e43049eeb311 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 11 Jul 2017 15:58:35 +0300
Subject: [PATCH 1494/1958] ovl: fix random return value on mount

On failure to prepare_creds(), mount fails with a random
return value, as err was last set to an integer cast of
a valid lower mnt pointer or set to 0 if inodes index feature
is enabled.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 3fe6e52f0626 ("ovl: override creds with the ones from ...")
Cc: <stable@vger.kernel.org> # v4.7
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 44dc2d6ffe0f0..1cf5d3538309a 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1090,6 +1090,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	else
 		sb->s_d_op = &ovl_dentry_operations;
 
+	err = -ENOMEM;
 	ufs->creator_cred = cred = prepare_creds();
 	if (!cred)
 		goto out_put_indexdir;
-- 
GitLab


From ea3dad18dc5f778cfd931311a91a9315aa0065a3 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 11 Jul 2017 15:58:34 +0300
Subject: [PATCH 1495/1958] ovl: mark parent impure on ovl_link()

When linking a file with copy up origin into a new parent, mark the
new parent dir "impure".

Fixes: ee1d6d37b6b8 ("ovl: mark upper dir with type origin entries "impure"")
Cc: <stable@vger.kernel.org> # v4.12
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 641d9ee97f91f..48b70e6490f32 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -481,17 +481,30 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 }
 
 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
-			      struct cattr *attr, struct dentry *hardlink)
+			      struct cattr *attr, struct dentry *hardlink,
+			      bool origin)
 {
 	int err;
 	const struct cred *old_cred;
 	struct cred *override_cred;
+	struct dentry *parent = dentry->d_parent;
 
-	err = ovl_copy_up(dentry->d_parent);
+	err = ovl_copy_up(parent);
 	if (err)
 		return err;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
+
+	/*
+	 * When linking a file with copy up origin into a new parent, mark the
+	 * new parent dir "impure".
+	 */
+	if (origin) {
+		err = ovl_set_impure(parent, ovl_dentry_upper(parent));
+		if (err)
+			goto out_revert_creds;
+	}
+
 	err = -ENOMEM;
 	override_cred = prepare_creds();
 	if (override_cred) {
@@ -550,7 +563,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
 	inode_init_owner(inode, dentry->d_parent->d_inode, mode);
 	attr.mode = inode->i_mode;
 
-	err = ovl_create_or_link(dentry, inode, &attr, NULL);
+	err = ovl_create_or_link(dentry, inode, &attr, NULL, false);
 	if (err)
 		iput(inode);
 
@@ -609,7 +622,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	inode = d_inode(old);
 	ihold(inode);
 
-	err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old));
+	err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old),
+				 ovl_type_origin(old));
 	if (err)
 		iput(inode);
 
-- 
GitLab


From 961af647fc9ebcdb3e98c8f11b399ebe01169fb1 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 11 Jul 2017 15:58:36 +0300
Subject: [PATCH 1496/1958] ovl: fix origin verification of index dir

Commit 54fb347e836f ("ovl: verify index dir matches upper dir")
introduced a new ovl_fh flag OVL_FH_FLAG_PATH_UPPER to indicate
an upper file handle, but forgot to add the flag to the mask of
valid flags, so index dir origin verification always discards
existing origin and stores a new one.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 60d26605e039e..032120a761c4c 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -47,7 +47,8 @@ enum ovl_flag {
 /* Is the real inode encoded in fid an upper inode? */
 #define OVL_FH_FLAG_PATH_UPPER	(1 << 2)
 
-#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN)
+#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \
+			 OVL_FH_FLAG_PATH_UPPER)
 
 #if defined(__LITTLE_ENDIAN)
 #define OVL_FH_FLAG_CPU_ENDIAN 0
-- 
GitLab


From a59f97ff66f0058702ed6f6e26dd8fa3c34caf62 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 11 Jul 2017 15:58:37 +0300
Subject: [PATCH 1497/1958] ovl: remove unneeded check for IS_ERR()

ovl_workdir_create() returns a valid index dentry or NULL.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 1cf5d3538309a..c88493b01d8db 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1058,10 +1058,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 		ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
 						   OVL_INDEXDIR_NAME, true);
-		err = PTR_ERR(ufs->indexdir);
-		if (IS_ERR(ufs->indexdir))
-			goto out_put_lower_mnt;
-
 		if (ufs->indexdir) {
 			/* Verify upper root is index dir origin */
 			err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt,
-- 
GitLab


From 5d89fb33223e0be32e4100623b915048b02beeec Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 13 Jul 2017 13:36:40 -0700
Subject: [PATCH 1498/1958] net: set fib rule refcount after malloc

The configure callback of fib_rules_ops can change the refcnt of a
fib rule. For instance, mlxsw takes a refcnt when adding the processing
of the rule to a work queue. Thus the rule refcnt can not be reset to
to 1 afterwards. Move the refcnt setting to after the allocation.

Fixes: 5361e209dd30 ("net: avoid one splat in fib_nl_delrule()")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a0093e1b02353..fdcb1bcd2afad 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -400,6 +400,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		err = -ENOMEM;
 		goto errout;
 	}
+	refcount_set(&rule->refcnt, 1);
 	rule->fr_net = net;
 
 	rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
@@ -517,8 +518,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		last = r;
 	}
 
-	refcount_set(&rule->refcnt, 1);
-
 	if (last)
 		list_add_rcu(&rule->list, &last->list);
 	else
-- 
GitLab


From 301bfa483016d48b7fb9cbad87c0a04a15c25b90 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:53:48 -0400
Subject: [PATCH 1499/1958] NFS: Don't run wake_up_bit() when nobody is
 waiting...

"perf lock" shows fairly heavy contention for the bit waitqueue locks
when doing an I/O heavy workload.
Use a bit to tell whether or not there has been contention for a lock
so that we can optimise away the bit waitqueue options in those cases.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c        | 17 ++++++++++++++++-
 include/linux/nfs_page.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 8a23e2b40b04e..de9066a92c0d2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 	if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
 		return 0;
 
-	if (!nonblock)
+	if (!nonblock) {
+		set_bit(PG_CONTENDED1, &head->wb_flags);
+		smp_mb__after_atomic();
 		return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
 				TASK_UNINTERRUPTIBLE);
+	}
 
 	return -EAGAIN;
 }
@@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req)
 
 	WARN_ON_ONCE(head != head->wb_head);
 
+	if (!test_bit(PG_HEADLOCK, &head->wb_flags))
+		return;
+	set_bit(PG_CONTENDED1, &head->wb_flags);
+	smp_mb__after_atomic();
 	wait_on_bit(&head->wb_flags, PG_HEADLOCK,
 		TASK_UNINTERRUPTIBLE);
 }
@@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_HEADLOCK, &head->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED1, &head->wb_flags))
+		return;
 	wake_up_bit(&head->wb_flags, PG_HEADLOCK);
 }
 
@@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED2, &req->wb_flags))
+		return;
 	wake_up_bit(&req->wb_flags, PG_BUSY);
 }
 
@@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
+	if (!test_bit(PG_BUSY, &req->wb_flags))
+		return 0;
+	set_bit(PG_CONTENDED2, &req->wb_flags);
+	smp_mb__after_atomic();
 	return wait_on_bit_io(&req->wb_flags, PG_BUSY,
 			      TASK_UNINTERRUPTIBLE);
 }
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index abbee2d15dce9..d67b67ae6c8bf 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -33,6 +33,8 @@ enum {
 	PG_UPTODATE,		/* page group sync bit in read path */
 	PG_WB_END,		/* page group sync bit in write path */
 	PG_REMOVE,		/* page group sync bit in write path */
+	PG_CONTENDED1,		/* Is someone waiting for a lock? */
+	PG_CONTENDED2,		/* Is someone waiting for a lock? */
 };
 
 struct nfs_inode;
-- 
GitLab


From 20fa19027286983ab2734b5910c4a687436e0c31 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Thu, 29 Jun 2017 06:34:53 -0700
Subject: [PATCH 1500/1958] nfs: add export operations

This support for opening files on NFS by file handle, both through the
open_by_handle syscall, and for re-exporting NFS (for example using a
different version).  The support is very basic for now, as each open by
handle will have to do an NFSv4 open operation on the wire.  In the
future this will hopefully be mitigated by an open file cache, as well
as various optimizations in NFS for this specific case.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
[hch: incorporated various changes, resplit the patches, new changelog]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/Makefile   |   2 +-
 fs/nfs/export.c   | 177 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/internal.h |   2 +
 fs/nfs/super.c    |   2 +
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 fs/nfs/export.c

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 98f4e5728a67c..1fb118902d57b 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o
 CFLAGS_nfstrace.o += -I$(src)
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \
 			   io.o direct.o pagelist.o read.o symlink.o unlink.o \
-			   write.o namespace.o mount_clnt.o nfstrace.o
+			   write.o namespace.o mount_clnt.o nfstrace.o export.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_SYSCTL)	+= sysctl.o
 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
new file mode 100644
index 0000000000000..249cb96cc5b5c
--- /dev/null
+++ b/fs/nfs/export.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015, Primary Data, Inc. All rights reserved.
+ *
+ * Tao Peng <bergwolf@primarydata.com>
+ */
+#include <linux/dcache.h>
+#include <linux/exportfs.h>
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+
+#include "internal.h"
+#include "nfstrace.h"
+
+#define NFSDBG_FACILITY		NFSDBG_VFS
+
+enum {
+	FILEID_HIGH_OFF = 0,	/* inode fileid high */
+	FILEID_LOW_OFF,		/* inode fileid low */
+	FILE_I_TYPE_OFF,	/* inode type */
+	EMBED_FH_OFF		/* embeded server fh */
+};
+
+
+static struct nfs_fh *nfs_exp_embedfh(__u32 *p)
+{
+	return (struct nfs_fh *)(p + EMBED_FH_OFF);
+}
+
+/*
+ * Let's break subtree checking for now... otherwise we'll have to embed parent fh
+ * but there might not be enough space.
+ */
+static int
+nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
+{
+	struct nfs_fh *server_fh = NFS_FH(inode);
+	struct nfs_fh *clnt_fh = nfs_exp_embedfh(p);
+	size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+	int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+
+	dprintk("%s: max fh len %d inode %p parent %p",
+		__func__, *max_len, inode, parent);
+
+	if (*max_len < len || IS_AUTOMOUNT(inode)) {
+		dprintk("%s: fh len %d too small, required %d\n",
+			__func__, *max_len, len);
+		*max_len = len;
+		return FILEID_INVALID;
+	}
+	if (IS_AUTOMOUNT(inode)) {
+		*max_len = FILEID_INVALID;
+		goto out;
+	}
+
+	p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32;
+	p[FILEID_LOW_OFF] = NFS_FILEID(inode);
+	p[FILE_I_TYPE_OFF] = inode->i_mode & S_IFMT;
+	p[len - 1] = 0; /* Padding */
+	nfs_copy_fh(clnt_fh, server_fh);
+	*max_len = len;
+out:
+	dprintk("%s: result fh fileid %llu mode %u size %d\n",
+		__func__, NFS_FILEID(inode), inode->i_mode, *max_len);
+	return *max_len;
+}
+
+static struct dentry *
+nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+		 int fh_len, int fh_type)
+{
+	struct nfs4_label *label = NULL;
+	struct nfs_fattr *fattr = NULL;
+	struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
+	size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+	const struct nfs_rpc_ops *rpc_ops;
+	struct dentry *dentry;
+	struct inode *inode;
+	int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+	u32 *p = fid->raw;
+	int ret;
+
+	/* NULL translates to ESTALE */
+	if (fh_len < len || fh_type != len)
+		return NULL;
+
+	fattr = nfs_alloc_fattr();
+	if (fattr == NULL) {
+		dentry = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	fattr->fileid = ((u64)p[FILEID_HIGH_OFF] << 32) + p[FILEID_LOW_OFF];
+	fattr->mode = p[FILE_I_TYPE_OFF];
+	fattr->valid |= NFS_ATTR_FATTR_FILEID | NFS_ATTR_FATTR_TYPE;
+
+	dprintk("%s: fileid %llu mode %d\n", __func__, fattr->fileid, fattr->mode);
+
+	inode = nfs_ilookup(sb, fattr, server_fh);
+	if (inode)
+		goto out_found;
+
+	label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL);
+	if (IS_ERR(label)) {
+		dentry = ERR_CAST(label);
+		goto out_free_fattr;
+	}
+
+	rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops;
+	ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label);
+	if (ret) {
+		dprintk("%s: getattr failed %d\n", __func__, ret);
+		dentry = ERR_PTR(ret);
+		goto out_free_label;
+	}
+
+	inode = nfs_fhget(sb, server_fh, fattr, label);
+
+out_found:
+	dentry = d_obtain_alias(inode);
+
+out_free_label:
+	nfs4_label_free(label);
+out_free_fattr:
+	nfs_free_fattr(fattr);
+out:
+	return dentry;
+}
+
+static struct dentry *
+nfs_get_parent(struct dentry *dentry)
+{
+	int ret;
+	struct inode *inode = d_inode(dentry), *pinode;
+	struct super_block *sb = inode->i_sb;
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fattr *fattr = NULL;
+	struct nfs4_label *label = NULL;
+	struct dentry *parent;
+	struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops;
+	struct nfs_fh fh;
+
+	if (!ops->lookupp)
+		return ERR_PTR(-EACCES);
+
+	fattr = nfs_alloc_fattr();
+	if (fattr == NULL) {
+		parent = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	label = nfs4_label_alloc(server, GFP_KERNEL);
+	if (IS_ERR(label)) {
+		parent = ERR_CAST(label);
+		goto out_free_fattr;
+	}
+
+	ret = ops->lookupp(inode, &fh, fattr, label);
+	if (ret) {
+		parent = ERR_PTR(ret);
+		goto out_free_label;
+	}
+
+	pinode = nfs_fhget(sb, &fh, fattr, label);
+	parent = d_obtain_alias(pinode);
+out_free_label:
+	nfs4_label_free(label);
+out_free_fattr:
+	nfs_free_fattr(fattr);
+out:
+	return parent;
+}
+
+const struct export_operations nfs_export_ops = {
+	.encode_fh = nfs_encode_fh,
+	.fh_to_dentry = nfs_fh_to_dentry,
+	.get_parent = nfs_get_parent,
+};
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c5054edb01571..2ebd574989868 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -10,6 +10,8 @@
 
 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
 
+extern const struct export_operations nfs_export_ops;
+
 struct nfs_string;
 
 /* Maximum number of readahead requests
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b4176393f049f..b5271644b4726 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2339,6 +2339,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
 		 */
 		sb->s_flags |= MS_POSIXACL;
 		sb->s_time_gran = 1;
+		sb->s_export_op = &nfs_export_ops;
 	}
 
  	nfs_initialise_sb(sb);
@@ -2360,6 +2361,7 @@ static void nfs_clone_super(struct super_block *sb,
 	sb->s_xattr = old_sb->s_xattr;
 	sb->s_op = old_sb->s_op;
 	sb->s_time_gran = 1;
+	sb->s_export_op = old_sb->s_export_op;
 
 	if (server->nfs_client->rpc_ops->version != 2) {
 		/* The VFS shouldn't apply the umask to mode bits. We will do
-- 
GitLab


From b4f937cffa66b3d56eb8f586e620d0b223a281a3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:53:48 -0400
Subject: [PATCH 1501/1958] NFS: Don't run wake_up_bit() when nobody is
 waiting...

"perf lock" shows fairly heavy contention for the bit waitqueue locks
when doing an I/O heavy workload.
Use a bit to tell whether or not there has been contention for a lock
so that we can optimise away the bit waitqueue options in those cases.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pagelist.c        | 17 ++++++++++++++++-
 include/linux/nfs_page.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 8a23e2b40b04e..de9066a92c0d2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 	if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
 		return 0;
 
-	if (!nonblock)
+	if (!nonblock) {
+		set_bit(PG_CONTENDED1, &head->wb_flags);
+		smp_mb__after_atomic();
 		return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
 				TASK_UNINTERRUPTIBLE);
+	}
 
 	return -EAGAIN;
 }
@@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req)
 
 	WARN_ON_ONCE(head != head->wb_head);
 
+	if (!test_bit(PG_HEADLOCK, &head->wb_flags))
+		return;
+	set_bit(PG_CONTENDED1, &head->wb_flags);
+	smp_mb__after_atomic();
 	wait_on_bit(&head->wb_flags, PG_HEADLOCK,
 		TASK_UNINTERRUPTIBLE);
 }
@@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_HEADLOCK, &head->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED1, &head->wb_flags))
+		return;
 	wake_up_bit(&head->wb_flags, PG_HEADLOCK);
 }
 
@@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED2, &req->wb_flags))
+		return;
 	wake_up_bit(&req->wb_flags, PG_BUSY);
 }
 
@@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
+	if (!test_bit(PG_BUSY, &req->wb_flags))
+		return 0;
+	set_bit(PG_CONTENDED2, &req->wb_flags);
+	smp_mb__after_atomic();
 	return wait_on_bit_io(&req->wb_flags, PG_BUSY,
 			      TASK_UNINTERRUPTIBLE);
 }
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index abbee2d15dce9..d67b67ae6c8bf 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -33,6 +33,8 @@ enum {
 	PG_UPTODATE,		/* page group sync bit in read path */
 	PG_WB_END,		/* page group sync bit in write path */
 	PG_REMOVE,		/* page group sync bit in write path */
+	PG_CONTENDED1,		/* Is someone waiting for a lock? */
+	PG_CONTENDED2,		/* Is someone waiting for a lock? */
 };
 
 struct nfs_inode;
-- 
GitLab


From cf69f8248cc89c0a0e82f8332f9e7f13ab014c98 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Jul 2017 12:14:33 -0700
Subject: [PATCH 1502/1958] xfs: fixup xfs_attr_get_ilocked

The comment mentioned the wrong lock.  Also add an ASSERT to assert
this locking precondition.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index ef8a1c75a467a..de7b9bd30becc 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -114,12 +114,14 @@ xfs_inode_hasattr(
  * Overall external interface routines.
  *========================================================================*/
 
-/* Retrieve an extended attribute and its value.  Must have iolock. */
+/* Retrieve an extended attribute and its value.  Must have ilock. */
 int
 xfs_attr_get_ilocked(
 	struct xfs_inode	*ip,
 	struct xfs_da_args	*args)
 {
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+
 	if (!xfs_inode_hasattr(ip))
 		return -ENOATTR;
 	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
-- 
GitLab


From 5af7777e11f5a91dd90247035978bef6c95065e4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Jul 2017 12:14:33 -0700
Subject: [PATCH 1503/1958] =?UTF-8?q?xfs:=20assert=20locking=20precond?=
 =?UTF-8?q?=D1=96tion=20in=20xfs=5Fattr=5Flist=5Fint=5Filocked?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_attr_list.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 545eca508d42f..7740c8a5e7369 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -463,6 +463,8 @@ xfs_attr_list_int_ilocked(
 {
 	struct xfs_inode		*dp = context->dp;
 
+	ASSERT(xfs_isilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+
 	/*
 	 * Decide on what work routines to call based on the inode size.
 	 */
-- 
GitLab


From 29db2500f6a92d63834a40c5e9eebab34c78c53a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Jul 2017 12:14:34 -0700
Subject: [PATCH 1504/1958] xfs: assert locking precondition in
 xfs_readlink_bmap_ilocked

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_symlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 12cd9cf7de416..23a50d7aa46a7 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -61,6 +61,8 @@ xfs_readlink_bmap_ilocked(
 	int			fsblocks = 0;
 	int			offset;
 
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+
 	fsblocks = xfs_symlink_blocks(mp, pathlen);
 	error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0);
 	if (error)
-- 
GitLab


From 0891f9971a3b00d243d5743cc78a628ad060adea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Jul 2017 12:14:34 -0700
Subject: [PATCH 1505/1958] Revert "xfs: grab dquots without taking the ilock"

This reverts commit 50e0bdbe9f48f98bb02eac7030d682f4716884ae.

The new XFS_QMOPT_NOLOCK isn't used at all, and conditional locking based
on a flag is always the wrong thing to do - we should be having helpers
that can be called without the lock instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_quota_defs.h |  2 --
 fs/xfs/xfs_dquot.c             | 14 ++++----------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 2834574cb6e7d..d69c772271cb0 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -136,8 +136,6 @@ typedef uint16_t	xfs_qwarncnt_t;
  */
 #define XFS_QMOPT_INHERIT	0x1000000
 
-#define XFS_QMOPT_NOLOCK	0x2000000 /* don't ilock during dqget */
-
 /*
  * flags to xfs_trans_mod_dquot.
  */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index f89f7b5241e68..fd2ef8c2c9a74 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -472,23 +472,18 @@ xfs_qm_dqtobp(
 	struct xfs_mount	*mp = dqp->q_mount;
 	xfs_dqid_t		id = be32_to_cpu(dqp->q_core.d_id);
 	struct xfs_trans	*tp = (tpp ? *tpp : NULL);
-	uint			lock_mode = 0;
+	uint			lock_mode;
 
 	quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
 	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 
-	ASSERT(!(flags & XFS_QMOPT_NOLOCK) ||
-		xfs_isilocked(quotip, XFS_ILOCK_SHARED) ||
-		xfs_isilocked(quotip, XFS_ILOCK_EXCL));
-	if (!(flags & XFS_QMOPT_NOLOCK))
-		lock_mode = xfs_ilock_data_map_shared(quotip);
+	lock_mode = xfs_ilock_data_map_shared(quotip);
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 		/*
 		 * Return if this type of quotas is turned off while we
 		 * didn't have the quota inode lock.
 		 */
-		if (lock_mode)
-			xfs_iunlock(quotip, lock_mode);
+		xfs_iunlock(quotip, lock_mode);
 		return -ESRCH;
 	}
 
@@ -498,8 +493,7 @@ xfs_qm_dqtobp(
 	error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
 			       XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
 
-	if (lock_mode)
-		xfs_iunlock(quotip, lock_mode);
+	xfs_iunlock(quotip, lock_mode);
 	if (error)
 		return error;
 
-- 
GitLab


From d6ab17f261919d212ec0a9e33d01f46df0ec1fde Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 12 Jul 2017 10:26:47 -0700
Subject: [PATCH 1506/1958] vfs: in iomap seek_{hole,data}, return -ENXIO for
 negative offsets

In the iomap implementations of SEEK_HOLE and SEEK_DATA, make sure we
return -ENXIO for negative offsets.

Inspired-by: Mateusz S <muttdini@gmail.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 432eed8f091f1..16f5c07451bf4 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -610,8 +610,8 @@ iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
 	loff_t length = size - offset;
 	loff_t ret;
 
-	/* Nothing to be found beyond the end of the file. */
-	if (offset >= size)
+	/* Nothing to be found before or beyond the end of the file. */
+	if (offset < 0 || offset >= size)
 		return -ENXIO;
 
 	while (length > 0) {
@@ -656,8 +656,8 @@ iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
 	loff_t length = size - offset;
 	loff_t ret;
 
-	/* Nothing to be found beyond the end of the file. */
-	if (offset >= size)
+	/* Nothing to be found before or beyond the end of the file. */
+	if (offset < 0 || offset >= size)
 		return -ENXIO;
 
 	while (length > 0) {
-- 
GitLab


From 6e34e1f23d780978da65968327cbba6d7013a73f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 13 Jul 2017 15:03:51 -0700
Subject: [PATCH 1507/1958] cpufreq: intel_pstate: Correct the busy calculation
 for KNL

The busy percent calculated for the Knights Landing (KNL) platform
is 1024 times smaller than the correct busy value.  This causes
performance to get stuck at the lowest ratio.

The scaling algorithm used for KNL is performance-based, but it still
looks at the CPU load to set the scaled busy factor to 0 when the
load is less than 1 percent.  In this case, since the computed load
is 1024x smaller than it should be, the scaled busy factor will
always be 0, irrespective of CPU business.

This needs a fix similar to the turbostat one in commit b2b34dfe4d9a
(tools/power turbostat: KNL workaround for %Busy and Avg_MHz).

For this reason, add one more callback to processor-specific
callbacks to specify an MPERF multiplier represented by a number of
bit positions to shift the value of that register to the left to
copmensate for its rate difference with respect to the TSC.  This
shift value is used during CPU busy calculations.

Fixes: ffb810563c (intel_pstate: Avoid getting stuck in high P-states when idle)
Reported-and-tested-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: 4.6+ <stable@vger.kernel.org> # 4.6+
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2386d7036e909..0ddc0d0458315 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -225,6 +225,9 @@ struct global_params {
  * @vid:		Stores VID limits for this CPU
  * @pid:		Stores PID parameters for this CPU
  * @last_sample_time:	Last Sample time
+ * @aperf_mperf_shift:	Number of clock cycles after aperf, merf is incremented
+ *			This shift is a multiplier to mperf delta to
+ *			calculate CPU busy.
  * @prev_aperf:		Last APERF value read from APERF MSR
  * @prev_mperf:		Last MPERF value read from MPERF MSR
  * @prev_tsc:		Last timestamp counter (TSC) value
@@ -259,6 +262,7 @@ struct cpudata {
 
 	u64	last_update;
 	u64	last_sample_time;
+	u64	aperf_mperf_shift;
 	u64	prev_aperf;
 	u64	prev_mperf;
 	u64	prev_tsc;
@@ -321,6 +325,7 @@ struct pstate_funcs {
 	int (*get_min)(void);
 	int (*get_turbo)(void);
 	int (*get_scaling)(void);
+	int (*get_aperf_mperf_shift)(void);
 	u64 (*get_val)(struct cpudata*, int pstate);
 	void (*get_vid)(struct cpudata *);
 	void (*update_util)(struct update_util_data *data, u64 time,
@@ -1490,6 +1495,11 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate)
 	return val;
 }
 
+static int knl_get_aperf_mperf_shift(void)
+{
+	return 10;
+}
+
 static int knl_get_turbo_pstate(void)
 {
 	u64 value;
@@ -1547,6 +1557,9 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 	cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
 	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
 
+	if (pstate_funcs.get_aperf_mperf_shift)
+		cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
+
 	if (pstate_funcs.get_vid)
 		pstate_funcs.get_vid(cpu);
 
@@ -1620,7 +1633,8 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 	int32_t busy_frac, boost;
 	int target, avg_pstate;
 
-	busy_frac = div_fp(sample->mperf, sample->tsc);
+	busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
+			   sample->tsc);
 
 	boost = cpu->iowait_boost;
 	cpu->iowait_boost >>= 1;
@@ -1679,7 +1693,8 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 		sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns);
 		perf_scaled = mul_fp(perf_scaled, sample_ratio);
 	} else {
-		sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc);
+		sample_ratio = div_fp(100 * (cpu->sample.mperf << cpu->aperf_mperf_shift),
+				      cpu->sample.tsc);
 		if (sample_ratio < int_tofp(1))
 			perf_scaled = 0;
 	}
@@ -1811,6 +1826,7 @@ static const struct pstate_funcs knl_funcs = {
 	.get_max_physical = core_get_max_pstate_physical,
 	.get_min = core_get_min_pstate,
 	.get_turbo = knl_get_turbo_pstate,
+	.get_aperf_mperf_shift = knl_get_aperf_mperf_shift,
 	.get_scaling = core_get_scaling,
 	.get_val = core_get_val,
 	.update_util = intel_pstate_update_util_pid,
@@ -2407,6 +2423,7 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
 	pstate_funcs.get_val   = funcs->get_val;
 	pstate_funcs.get_vid   = funcs->get_vid;
 	pstate_funcs.update_util = funcs->update_util;
+	pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift;
 
 	intel_pstate_use_acpi_profile();
 }
-- 
GitLab


From 5ccbdbf9870fe9efe78d77df23d5e7d8641c36f8 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Thu, 13 Jul 2017 13:16:49 +0100
Subject: [PATCH 1508/1958] modsign: add markers to endif-statements in
 certs/Makefile

It's a bit hard for eye to track certs/Makefile if you are not
accustomed to it. This commit adds comments to key endif statements in
order to help to keep the context while reading this file.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 certs/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/certs/Makefile b/certs/Makefile
index 4119bb376ea11..847361ce14d10 100644
--- a/certs/Makefile
+++ b/certs/Makefile
@@ -26,7 +26,7 @@ quiet_cmd_extract_certs  = EXTRACT_CERTS   $(patsubst "%",%,$(2))
 targets += x509_certificate_list
 $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(SYSTEM_TRUSTED_KEYS_FILENAME) FORCE
 	$(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
-endif
+endif # CONFIG_SYSTEM_TRUSTED_KEYRING
 
 clean-files := x509_certificate_list .x509.list
 
@@ -87,7 +87,7 @@ $(obj)/x509.genkey:
 	@echo >>$@ "keyUsage=digitalSignature"
 	@echo >>$@ "subjectKeyIdentifier=hash"
 	@echo >>$@ "authorityKeyIdentifier=keyid"
-endif
+endif # CONFIG_MODULE_SIG_KEY
 
 $(eval $(call config_filename,MODULE_SIG_KEY))
 
@@ -102,4 +102,4 @@ $(obj)/system_certificates.o: $(obj)/signing_key.x509
 targets += signing_key.x509
 $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE
 	$(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
-endif
+endif # CONFIG_MODULE_SIG
-- 
GitLab


From 4f9dabfaf8df971f8a3b6aa324f8f817be38d538 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 13 Jul 2017 13:16:56 +0100
Subject: [PATCH 1509/1958] KEYS: DH: validate __spare field

Syscalls must validate that their reserved arguments are zero and return
EINVAL otherwise.  Otherwise, it will be impossible to actually use them
for anything in the future because existing programs may be passing
garbage in.  This is standard practice when adding new APIs.

Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/keys/compat_dh.c | 2 ++
 security/keys/dh.c        | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/security/keys/compat_dh.c b/security/keys/compat_dh.c
index a6a659b6bcb6d..aa6b34cafe5f9 100644
--- a/security/keys/compat_dh.c
+++ b/security/keys/compat_dh.c
@@ -33,6 +33,8 @@ long compat_keyctl_dh_compute(struct keyctl_dh_params __user *params,
 	kdfcopy.hashname = compat_ptr(compat_kdfcopy.hashname);
 	kdfcopy.otherinfo = compat_ptr(compat_kdfcopy.otherinfo);
 	kdfcopy.otherinfolen = compat_kdfcopy.otherinfolen;
+	memcpy(kdfcopy.__spare, compat_kdfcopy.__spare,
+	       sizeof(kdfcopy.__spare));
 
 	return __keyctl_dh_compute(params, buffer, buflen, &kdfcopy);
 }
diff --git a/security/keys/dh.c b/security/keys/dh.c
index 4755d4b4f9454..d1ea9f325f947 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -266,6 +266,11 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
 	if (kdfcopy) {
 		char *hashname;
 
+		if (memchr_inv(kdfcopy->__spare, 0, sizeof(kdfcopy->__spare))) {
+			ret = -EINVAL;
+			goto out1;
+		}
+
 		if (buflen > KEYCTL_KDF_MAX_OUTPUT_LEN ||
 		    kdfcopy->otherinfolen > KEYCTL_KDF_MAX_OI_LEN) {
 			ret = -EMSGSIZE;
-- 
GitLab


From 7228b66aaf723a623e578aa4db7d083bb39546c9 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Thu, 13 Jul 2017 13:17:03 +0100
Subject: [PATCH 1510/1958] KEYS: Add documentation for asymmetric keyring
 restrictions

Provide more specific examples of keyring restrictions as applied to
X.509 signature chain verification.

Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 Documentation/crypto/asymmetric-keys.txt | 65 +++++++++++++++++++++---
 Documentation/security/keys/core.rst     |  6 +++
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
index b82b6ad484887..5969bf42562a8 100644
--- a/Documentation/crypto/asymmetric-keys.txt
+++ b/Documentation/crypto/asymmetric-keys.txt
@@ -10,6 +10,7 @@ Contents:
     - Signature verification.
   - Asymmetric key subtypes.
   - Instantiation data parsers.
+  - Keyring link restrictions.
 
 
 ========
@@ -318,7 +319,8 @@ KEYRING LINK RESTRICTIONS
 =========================
 
 Keyrings created from userspace using add_key can be configured to check the
-signature of the key being linked.
+signature of the key being linked.  Keys without a valid signature are not
+allowed to link.
 
 Several restriction methods are available:
 
@@ -327,9 +329,10 @@ Several restriction methods are available:
      - Option string used with KEYCTL_RESTRICT_KEYRING:
        - "builtin_trusted"
 
-     The kernel builtin trusted keyring will be searched for the signing
-     key. The ca_keys kernel parameter also affects which keys are used for
-     signature verification.
+     The kernel builtin trusted keyring will be searched for the signing key.
+     If the builtin trusted keyring is not configured, all links will be
+     rejected.  The ca_keys kernel parameter also affects which keys are used
+     for signature verification.
 
  (2) Restrict using the kernel builtin and secondary trusted keyrings
 
@@ -337,8 +340,10 @@ Several restriction methods are available:
        - "builtin_and_secondary_trusted"
 
      The kernel builtin and secondary trusted keyrings will be searched for the
-     signing key. The ca_keys kernel parameter also affects which keys are used
-     for signature verification.
+     signing key.  If the secondary trusted keyring is not configured, this
+     restriction will behave like the "builtin_trusted" option.  The ca_keys
+     kernel parameter also affects which keys are used for signature
+     verification.
 
  (3) Restrict using a separate key or keyring
 
@@ -346,7 +351,7 @@ Several restriction methods are available:
        - "key_or_keyring:<key or keyring serial number>[:chain]"
 
      Whenever a key link is requested, the link will only succeed if the key
-     being linked is signed by one of the designated keys. This key may be
+     being linked is signed by one of the designated keys.  This key may be
      specified directly by providing a serial number for one asymmetric key, or
      a group of keys may be searched for the signing key by providing the
      serial number for a keyring.
@@ -354,7 +359,51 @@ Several restriction methods are available:
      When the "chain" option is provided at the end of the string, the keys
      within the destination keyring will also be searched for signing keys.
      This allows for verification of certificate chains by adding each
-     cert in order (starting closest to the root) to one keyring.
+     certificate in order (starting closest to the root) to a keyring.  For
+     instance, one keyring can be populated with links to a set of root
+     certificates, with a separate, restricted keyring set up for each
+     certificate chain to be validated:
+
+	# Create and populate a keyring for root certificates
+	root_id=`keyctl add keyring root-certs "" @s`
+	keyctl padd asymmetric "" $root_id < root1.cert
+	keyctl padd asymmetric "" $root_id < root2.cert
+
+	# Create and restrict a keyring for the certificate chain
+	chain_id=`keyctl add keyring chain "" @s`
+	keyctl restrict_keyring $chain_id asymmetric key_or_keyring:$root_id:chain
+
+	# Attempt to add each certificate in the chain, starting with the
+	# certificate closest to the root.
+	keyctl padd asymmetric "" $chain_id < intermediateA.cert
+	keyctl padd asymmetric "" $chain_id < intermediateB.cert
+	keyctl padd asymmetric "" $chain_id < end-entity.cert
+
+     If the final end-entity certificate is successfully added to the "chain"
+     keyring, we can be certain that it has a valid signing chain going back to
+     one of the root certificates.
+
+     A single keyring can be used to verify a chain of signatures by
+     restricting the keyring after linking the root certificate:
+
+	# Create a keyring for the certificate chain and add the root
+	chain2_id=`keyctl add keyring chain2 "" @s`
+	keyctl padd asymmetric "" $chain2_id < root1.cert
+
+	# Restrict the keyring that already has root1.cert linked.  The cert
+	# will remain linked by the keyring.
+	keyctl restrict_keyring $chain2_id asymmetric key_or_keyring:0:chain
+
+	# Attempt to add each certificate in the chain, starting with the
+	# certificate closest to the root.
+	keyctl padd asymmetric "" $chain2_id < intermediateA.cert
+	keyctl padd asymmetric "" $chain2_id < intermediateB.cert
+	keyctl padd asymmetric "" $chain2_id < end-entity.cert
+
+     If the final end-entity certificate is successfully added to the "chain2"
+     keyring, we can be certain that there is a valid signing chain going back
+     to the root certificate that was added before the keyring was restricted.
+
 
 In all of these cases, if the signing key is found the signature of the key to
 be linked will be verified using the signing key.  The requested key is added
diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 0d831a7afe4fe..1648fa80b3bfd 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -894,6 +894,12 @@ The keyctl syscall functions are:
      To apply a keyring restriction the process must have Set Attribute
      permission and the keyring must not be previously restricted.
 
+     One application of restricted keyrings is to verify X.509 certificate
+     chains or individual certificate signatures using the asymmetric key type.
+     See Documentation/crypto/asymmetric-keys.txt for specific restrictions
+     applicable to the asymmetric key type.
+
+
 Kernel Services
 ===============
 
-- 
GitLab


From 3447d220155bd9f4b5435ea6e9d58b536c7e94dd Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Thu, 13 Jul 2017 20:21:53 -0500
Subject: [PATCH 1511/1958] drm/amdgpu: Fix KFD oversubscription by tracking
 queues correctly

The number of compute queues available to the KFD was erroneously
calculated as 64. Only the first MEC can execute compute queues and
it has 32 queue slots.

This caused the oversubscription limit to be calculated incorrectly,
leading to a missing chained runlist command at the end of an
oversubscribed runlist.

v2: Remove unused num_mec field to avoid duplicate logic
v3: Separate num_mec removal into separate patches

Change-Id: I9e7bba2cc1928b624e3eeb1edb06fdb602e5294f
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 5f8ada1d872bc..76ddedcd1e65c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -122,7 +122,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 
 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
 		 * nbits is not compile time constant */
-		last_valid_bit = adev->gfx.mec.num_mec
+		last_valid_bit = 1 /* only first MEC can have compute queues */
 				* adev->gfx.mec.num_pipe_per_mec
 				* adev->gfx.mec.num_queue_per_pipe;
 		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
-- 
GitLab


From 13c4a2c78e7ed932cff019f54cdd4f6976dad140 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Thu, 13 Jul 2017 20:21:54 -0500
Subject: [PATCH 1512/1958] drm/amdkfd: Remove unused references to
 shared_resources.num_mec

Dead code.

Change-Id: Ic0bb1bcca87e96bc5e8fa9894727b0de152e8818
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c               | 4 ----
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 -------
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 1 -
 3 files changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 88187bfc5ea3f..3f95f7cb40194 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -226,10 +226,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
 	kfd->shared_resources = *gpu_resources;
 
-	/* We only use the first MEC */
-	if (kfd->shared_resources.num_mec > 1)
-		kfd->shared_resources.num_mec = 1;
-
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
 			kfd->device_info->mqd_size_aligned;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 955aa304ff486..602769ced3bd3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -77,13 +77,6 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
 	return false;
 }
 
-unsigned int get_mec_num(struct device_queue_manager *dqm)
-{
-	BUG_ON(!dqm || !dqm->dev);
-
-	return dqm->dev->shared_resources.num_mec;
-}
-
 unsigned int get_queues_num(struct device_queue_manager *dqm)
 {
 	BUG_ON(!dqm || !dqm->dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 66b9615bc3c17..faf820a064000 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -180,7 +180,6 @@ void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
 void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
 void program_sh_mem_settings(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
-unsigned int get_mec_num(struct device_queue_manager *dqm);
 unsigned int get_queues_num(struct device_queue_manager *dqm);
 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
-- 
GitLab


From 1ff036dc391b169ff5a74ef0f0215c174c07166f Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Thu, 13 Jul 2017 20:21:55 -0500
Subject: [PATCH 1513/1958] drm/radeon: Remove initialization of
 shared_resources.num_mec

Dead code.

Change-Id: I2383e0b541ed55288570b6a0ec8a0d49cdd4df89
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/radeon/radeon_kfd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 699fe7f9b8bfc..a2ab6dcdf4a25 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -184,7 +184,6 @@ void radeon_kfd_device_init(struct radeon_device *rdev)
 	if (rdev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
-			.num_mec = 1,
 			.num_pipe_per_mec = 4,
 			.num_queue_per_pipe = 8
 		};
-- 
GitLab


From f835edf9aec13bc8abb3dd4836151773cc359f42 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Thu, 13 Jul 2017 20:21:56 -0500
Subject: [PATCH 1514/1958] drm/amdgpu: Remove unused field
 kgd2kfd_shared_resources.num_mec

Dead code.

Change-Id: I9575aa73b5741b80dc340f953cc773385c92b2be
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c      | 1 -
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 76ddedcd1e65c..37971d9402e3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -101,7 +101,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
-			.num_mec = adev->gfx.mec.num_mec,
 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
 		};
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 91ef1484b3bb6..36f376677a533 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -63,9 +63,6 @@ struct kgd2kfd_shared_resources {
 	/* Bit n == 1 means VMID n is available for KFD. */
 	unsigned int compute_vmid_bitmap;
 
-	/* number of mec available from the hardware */
-	uint32_t num_mec;
-
 	/* number of pipes per mec */
 	uint32_t num_pipe_per_mec;
 
-- 
GitLab


From 0e4097c3354e2f5a5ad8affd9dc7f7f7d00bb6b9 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Sun, 9 Jul 2017 00:40:28 -0700
Subject: [PATCH 1515/1958] sched/cputime: Don't use smp_processor_id() in
 preemptible context

Recent kernels trigger this warning:

 BUG: using smp_processor_id() in preemptible [00000000] code: 99-trinity/181
 caller is debug_smp_processor_id+0x17/0x19
 CPU: 0 PID: 181 Comm: 99-trinity Not tainted 4.12.0-01059-g2a42eb9 #1
 Call Trace:
  dump_stack+0x82/0xb8
  check_preemption_disabled()
  debug_smp_processor_id()
  vtime_delta()
  task_cputime()
  thread_group_cputime()
  thread_group_cputime_adjusted()
  wait_consider_task()
  do_wait()
  SYSC_wait4()
  do_syscall_64()
  entry_SYSCALL64_slow_path()

As Frederic pointed out:

| Although those sched_clock_cpu() things seem to only matter when the
| sched_clock() is unstable. And that stability is a condition for nohz_full
| to work anyway. So probably sched_clock() alone would be enough.

This patch fixes it by replacing sched_clock_cpu() with sched_clock() to
avoid calling smp_processor_id() in a preemptible context.

Reported-by: Xiaolong Ye <xiaolong.ye@intel.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1499586028-7402-1-git-send-email-wanpeng.li@hotmail.com
[ Prettified the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cputime.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 6e3ea4ac1bdaf..14d2dbf97c531 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -683,7 +683,7 @@ static u64 vtime_delta(struct vtime *vtime)
 {
 	unsigned long long clock;
 
-	clock = sched_clock_cpu(smp_processor_id());
+	clock = sched_clock();
 	if (clock < vtime->starttime)
 		return 0;
 
@@ -814,7 +814,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
 
 	write_seqcount_begin(&vtime->seqcount);
 	vtime->state = VTIME_SYS;
-	vtime->starttime = sched_clock_cpu(smp_processor_id());
+	vtime->starttime = sched_clock();
 	write_seqcount_end(&vtime->seqcount);
 }
 
@@ -826,7 +826,7 @@ void vtime_init_idle(struct task_struct *t, int cpu)
 	local_irq_save(flags);
 	write_seqcount_begin(&vtime->seqcount);
 	vtime->state = VTIME_SYS;
-	vtime->starttime = sched_clock_cpu(cpu);
+	vtime->starttime = sched_clock();
 	write_seqcount_end(&vtime->seqcount);
 	local_irq_restore(flags);
 }
-- 
GitLab


From 193be41e33168a3a06eb9d356d9e39c69de161d2 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Wed, 12 Jul 2017 19:24:29 -0700
Subject: [PATCH 1516/1958] sched/deadline: Fix confusing comments about
 selection of top pi-waiter

This comment in the code is incomplete, and I believe it begs a definition of
dl_boosted to make sense of the condition that follows. Rewrite the comment and
also rearrange the condition that follows to reflect the first condition "we
have a top pi-waiter which is a SCHED_DEADLINE task" in that order. Also fix a
typo that follows.

Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170713022429.10307-1-joelaf@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a84299f44b5d8..755bd3f1a1a93 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1392,17 +1392,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	struct sched_dl_entity *pi_se = &p->dl;
 
 	/*
-	 * Use the scheduling parameters of the top pi-waiter
-	 * task if we have one and its (absolute) deadline is
-	 * smaller than our one... OTW we keep our runtime and
-	 * deadline.
+	 * Use the scheduling parameters of the top pi-waiter task if:
+	 * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
+	 * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
+	 *   smaller than our deadline OR we are a !SCHED_DEADLINE task getting
+	 *   boosted due to a SCHED_DEADLINE pi-waiter).
+	 * Otherwise we keep our runtime and deadline.
 	 */
-	if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
+	if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
 		pi_se = &pi_task->dl;
 	} else if (!dl_prio(p->normal_prio)) {
 		/*
 		 * Special case in which we have a !SCHED_DEADLINE task
-		 * that is going to be deboosted, but exceedes its
+		 * that is going to be deboosted, but exceeds its
 		 * runtime while doing so. No point in replenishing
 		 * it, as it's going to return back to its original
 		 * scheduling class after this.
-- 
GitLab


From cfcd20e5caad6ba552978c16ed8bed7edb0143cf Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 13 Jul 2017 18:30:39 -0700
Subject: [PATCH 1517/1958] KVM: x86: Simplify kvm_x86_ops->queue_exception
 parameter list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch removes all arguments except the first in
kvm_x86_ops->queue_exception since they can extract the arguments from
vcpu->arch.exception themselves.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 4 +---
 arch/x86/kvm/svm.c              | 8 +++++---
 arch/x86/kvm/vmx.c              | 8 +++++---
 arch/x86/kvm/x86.c              | 5 +----
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9d8de5dd7546d..8d11ddcb0dbfd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -954,9 +954,7 @@ struct kvm_x86_ops {
 				unsigned char *hypercall_addr);
 	void (*set_irq)(struct kvm_vcpu *vcpu);
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
-	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
-				bool has_error_code, u32 error_code,
-				bool reinject);
+	void (*queue_exception)(struct kvm_vcpu *vcpu);
 	void (*cancel_injection)(struct kvm_vcpu *vcpu);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4c98d362e3e44..cde756a02b1aa 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -637,11 +637,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	svm_set_interrupt_shadow(vcpu, 0);
 }
 
-static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-				bool has_error_code, u32 error_code,
-				bool reinject)
+static void svm_queue_exception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+	unsigned nr = vcpu->arch.exception.nr;
+	bool has_error_code = vcpu->arch.exception.has_error_code;
+	bool reinject = vcpu->arch.exception.reinject;
+	u32 error_code = vcpu->arch.exception.error_code;
 
 	/*
 	 * If we are within a nested VM we'd better #VMEXIT and let the guest
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 25f2fdccf6251..69cc228436ea6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2435,11 +2435,13 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
 	return 1;
 }
 
-static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-				bool has_error_code, u32 error_code,
-				bool reinject)
+static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	unsigned nr = vcpu->arch.exception.nr;
+	bool has_error_code = vcpu->arch.exception.has_error_code;
+	bool reinject = vcpu->arch.exception.reinject;
+	u32 error_code = vcpu->arch.exception.error_code;
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
 	if (!reinject && is_guest_mode(vcpu) &&
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f41c5222ecd4..e149c92476f14 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6356,10 +6356,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			kvm_update_dr7(vcpu);
 		}
 
-		kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
-					  vcpu->arch.exception.has_error_code,
-					  vcpu->arch.exception.error_code,
-					  vcpu->arch.exception.reinject);
+		kvm_x86_ops->queue_exception(vcpu);
 		return 0;
 	}
 
-- 
GitLab


From 1261bfa326f5e903166498628a1894edce0caabc Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 13 Jul 2017 18:30:40 -0700
Subject: [PATCH 1518/1958] KVM: async_pf: Add L1 guest async_pf #PF vmexit
 handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the L1 guest async page fault #PF vmexit handler, such
by L1 similar to ordinary async page fault.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
[Passed insn parameters to kvm_mmu_page_fault().]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c              | 33 ++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu.h              |  3 +++
 arch/x86/kvm/svm.c              | 36 ++++++---------------------------
 arch/x86/kvm/vmx.c              | 15 +++++++-------
 5 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8d11ddcb0dbfd..4f20ee6c79a16 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -650,6 +650,7 @@ struct kvm_vcpu_arch {
 		u64 msr_val;
 		u32 id;
 		bool send_user_only;
+		u32 host_apf_reason;
 	} apf;
 
 	/* OSVW MSRs (AMD only) */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aafd399cf8c6f..3825a35cd7524 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -46,6 +46,7 @@
 #include <asm/io.h>
 #include <asm/vmx.h>
 #include <asm/kvm_page_track.h>
+#include "trace.h"
 
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
@@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	return false;
 }
 
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+				u64 fault_address, char *insn, int insn_len,
+				bool need_unprotect)
+{
+	int r = 1;
+
+	switch (vcpu->arch.apf.host_apf_reason) {
+	default:
+		trace_kvm_page_fault(fault_address, error_code);
+
+		if (need_unprotect && kvm_event_needs_reinjection(vcpu))
+			kvm_mmu_unprotect_page_virt(vcpu, fault_address);
+		r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
+				insn_len);
+		break;
+	case KVM_PV_REASON_PAGE_NOT_PRESENT:
+		vcpu->arch.apf.host_apf_reason = 0;
+		local_irq_disable();
+		kvm_async_pf_task_wait(fault_address);
+		local_irq_enable();
+		break;
+	case KVM_PV_REASON_PAGE_READY:
+		vcpu->arch.apf.host_apf_reason = 0;
+		local_irq_disable();
+		kvm_async_pf_task_wake(fault_address);
+		local_irq_enable();
+		break;
+	}
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+
 static bool
 check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index a276834950c14..d7d248a000dd6 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty);
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+				u64 fault_address, char *insn, int insn_len,
+				bool need_unprotect);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index cde756a02b1aa..fb23497cf9157 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -194,7 +194,6 @@ struct vcpu_svm {
 
 	unsigned int3_injected;
 	unsigned long int3_rip;
-	u32 apf_reason;
 
 	/* cached guest cpuid flags for faster access */
 	bool nrips_enabled	: 1;
@@ -2122,34 +2121,11 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 static int pf_interception(struct vcpu_svm *svm)
 {
 	u64 fault_address = svm->vmcb->control.exit_info_2;
-	u64 error_code;
-	int r = 1;
+	u64 error_code = svm->vmcb->control.exit_info_1;
 
-	switch (svm->apf_reason) {
-	default:
-		error_code = svm->vmcb->control.exit_info_1;
-
-		trace_kvm_page_fault(fault_address, error_code);
-		if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
-			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
-		r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
+	return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
 			svm->vmcb->control.insn_bytes,
-			svm->vmcb->control.insn_len);
-		break;
-	case KVM_PV_REASON_PAGE_NOT_PRESENT:
-		svm->apf_reason = 0;
-		local_irq_disable();
-		kvm_async_pf_task_wait(fault_address);
-		local_irq_enable();
-		break;
-	case KVM_PV_REASON_PAGE_READY:
-		svm->apf_reason = 0;
-		local_irq_disable();
-		kvm_async_pf_task_wake(fault_address);
-		local_irq_enable();
-		break;
-	}
-	return r;
+			svm->vmcb->control.insn_len, !npt_enabled);
 }
 
 static int db_interception(struct vcpu_svm *svm)
@@ -2630,7 +2606,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
 		break;
 	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
 		/* When we're shadowing, trap PFs, but not async PF */
-		if (!npt_enabled && svm->apf_reason == 0)
+		if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
 			return NESTED_EXIT_HOST;
 		break;
 	default:
@@ -2677,7 +2653,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
 		}
 		/* async page fault always cause vmexit */
 		else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-			 svm->apf_reason != 0)
+			 svm->vcpu.arch.apf.host_apf_reason != 0)
 			vmexit = NESTED_EXIT_DONE;
 		break;
 	}
@@ -4998,7 +4974,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	/* if exit due to PF check for async PF */
 	if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
-		svm->apf_reason = kvm_read_and_reset_pf_reason();
+		svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
 
 	if (npt_enabled) {
 		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 69cc228436ea6..c9c46e63f744e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5698,14 +5698,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 	}
 
 	if (is_page_fault(intr_info)) {
-		/* EPT won't cause page fault directly */
-		BUG_ON(enable_ept);
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
-		trace_kvm_page_fault(cr2, error_code);
-
-		if (kvm_event_needs_reinjection(vcpu))
-			kvm_mmu_unprotect_page_virt(vcpu, cr2);
-		return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
+		/* EPT won't cause page fault directly */
+		WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
+		return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
+				true);
 	}
 
 	ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@@ -8643,6 +8640,10 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 		exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 	vmx->exit_intr_info = exit_intr_info;
 
+	/* if exit due to PF check for async PF */
+	if (is_page_fault(exit_intr_info))
+		vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
+
 	/* Handle machine checks before interrupts are enabled */
 	if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
 	    is_machine_check(exit_intr_info))
-- 
GitLab


From adfe20fb48785dd73af3bf91407196eb5403c8cf Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 13 Jul 2017 18:30:41 -0700
Subject: [PATCH 1519/1958] KVM: async_pf: Force a nested vmexit if the
 injected #PF is async_pf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an nested_apf field to vcpu->arch.exception to identify an async page
fault, and constructs the expected vm-exit information fields. Force a
nested VM exit from nested_vmx_check_exception() if the injected #PF is
async page fault.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/kvm/svm.c                 | 16 ++++++++++------
 arch/x86/kvm/vmx.c                 | 17 ++++++++++++++---
 arch/x86/kvm/x86.c                 |  9 ++++++++-
 5 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 722d0e5688634..fde36f189836d 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -23,6 +23,7 @@ struct x86_exception {
 	u16 error_code;
 	bool nested_page_fault;
 	u64 address; /* cr2 or nested page fault gpa */
+	u8 async_page_fault;
 };
 
 /*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4f20ee6c79a16..5e9ac508f7189 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -550,6 +550,7 @@ struct kvm_vcpu_arch {
 		bool reinject;
 		u8 nr;
 		u32 error_code;
+		u8 nested_apf;
 	} exception;
 
 	struct kvm_queued_interrupt {
@@ -651,6 +652,7 @@ struct kvm_vcpu_arch {
 		u32 id;
 		bool send_user_only;
 		u32 host_apf_reason;
+		unsigned long nested_apf_token;
 	} apf;
 
 	/* OSVW MSRs (AMD only) */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fb23497cf9157..4d8141e533c36 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2423,15 +2423,19 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 	if (!is_guest_mode(&svm->vcpu))
 		return 0;
 
+	vmexit = nested_svm_intercept(svm);
+	if (vmexit != NESTED_EXIT_DONE)
+		return 0;
+
 	svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
 	svm->vmcb->control.exit_code_hi = 0;
 	svm->vmcb->control.exit_info_1 = error_code;
-	svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
-	vmexit = nested_svm_intercept(svm);
-	if (vmexit == NESTED_EXIT_DONE)
-		svm->nested.exit_required = true;
+	if (svm->vcpu.arch.exception.nested_apf)
+		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+	else
+		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
 
+	svm->nested.exit_required = true;
 	return vmexit;
 }
 
@@ -2653,7 +2657,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
 		}
 		/* async page fault always cause vmexit */
 		else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-			 svm->vcpu.arch.apf.host_apf_reason != 0)
+			 svm->vcpu.arch.exception.nested_apf != 0)
 			vmexit = NESTED_EXIT_DONE;
 		break;
 	}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c9c46e63f744e..5a3bb1a697a22 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2422,13 +2422,24 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	unsigned int nr = vcpu->arch.exception.nr;
 
-	if (!(vmcs12->exception_bitmap & (1u << nr)))
+	if (!((vmcs12->exception_bitmap & (1u << nr)) ||
+		(nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
 		return 0;
 
+	if (vcpu->arch.exception.nested_apf) {
+		vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
+		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+			PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
+			INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
+			vcpu->arch.apf.nested_apf_token);
+		return 1;
+	}
+
 	nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 			  vmcs_read32(VM_EXIT_INTR_INFO),
 			  vmcs_readl(EXIT_QUALIFICATION));
@@ -2445,7 +2456,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
 	if (!reinject && is_guest_mode(vcpu) &&
-	    nested_vmx_check_exception(vcpu, nr))
+	    nested_vmx_check_exception(vcpu))
 		return;
 
 	if (has_error_code) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e149c92476f14..f3f10154c1339 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -450,7 +450,12 @@ EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 {
 	++vcpu->stat.pf_guest;
-	vcpu->arch.cr2 = fault->address;
+	vcpu->arch.exception.nested_apf =
+		is_guest_mode(vcpu) && fault->async_page_fault;
+	if (vcpu->arch.exception.nested_apf)
+		vcpu->arch.apf.nested_apf_token = fault->address;
+	else
+		vcpu->arch.cr2 = fault->address;
 	kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
 }
 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
@@ -8582,6 +8587,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 		fault.error_code = 0;
 		fault.nested_page_fault = false;
 		fault.address = work->arch.token;
+		fault.async_page_fault = true;
 		kvm_inject_page_fault(vcpu, &fault);
 	}
 }
@@ -8604,6 +8610,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 		fault.error_code = 0;
 		fault.nested_page_fault = false;
 		fault.address = work->arch.token;
+		fault.async_page_fault = true;
 		kvm_inject_page_fault(vcpu, &fault);
 	}
 	vcpu->arch.apf.halted = false;
-- 
GitLab


From 52a5c155cf79f1f059bffebf4d06d0249573e659 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 13 Jul 2017 18:30:42 -0700
Subject: [PATCH 1520/1958] KVM: async_pf: Let guest support delivery of
 async_pf from guest mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds another flag bit (bit 2) to MSR_KVM_ASYNC_PF_EN. If bit 2 is 1,
async page faults are delivered to L1 as #PF vmexits; if bit 2 is 0,
kvm_can_do_async_pf returns 0 if in guest mode.

This is similar to what svm.c wanted to do all along, but it is only
enabled for Linux as L1 hypervisor.  Foreign hypervisors must never
receive async page faults as vmexits, because they'd probably be very
confused about that.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 Documentation/virtual/kvm/msr.txt    | 5 +++--
 arch/x86/include/asm/kvm_host.h      | 1 +
 arch/x86/include/uapi/asm/kvm_para.h | 1 +
 arch/x86/kernel/kvm.c                | 7 ++++++-
 arch/x86/kvm/mmu.c                   | 2 +-
 arch/x86/kvm/vmx.c                   | 2 +-
 arch/x86/kvm/x86.c                   | 5 +++--
 7 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 0a9ea515512a2..1ebecc115dc6e 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
 MSR_KVM_ASYNC_PF_EN: 0x4b564d02
 	data: Bits 63-6 hold 64-byte aligned physical address of a
 	64 byte memory area which must be in guest RAM and must be
-	zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
+	zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
 	when asynchronous page faults are enabled on the vcpu 0 when
 	disabled. Bit 1 is 1 if asynchronous page faults can be injected
-	when vcpu is in cpl == 0.
+	when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
+	are delivered to L1 as #PF vmexits.
 
 	First 4 byte of 64 byte memory location will be written to by
 	the hypervisor at the time of asynchronous page fault (APF)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5e9ac508f7189..da3261e384d3a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -653,6 +653,7 @@ struct kvm_vcpu_arch {
 		bool send_user_only;
 		u32 host_apf_reason;
 		unsigned long nested_apf_token;
+		bool delivery_as_pf_vmexit;
 	} apf;
 
 	/* OSVW MSRs (AMD only) */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index cff0bb6556f88..a965e5b0d3280 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -67,6 +67,7 @@ struct kvm_clock_pairing {
 
 #define KVM_ASYNC_PF_ENABLED			(1 << 0)
 #define KVM_ASYNC_PF_SEND_ALWAYS		(1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT	(1 << 2)
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE            1
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 43e10d6fdbeda..71c17a5be9835 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
 #ifdef CONFIG_PREEMPT
 		pa |= KVM_ASYNC_PF_SEND_ALWAYS;
 #endif
-		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
+		pa |= KVM_ASYNC_PF_ENABLED;
+
+		/* Async page fault support for L1 hypervisor is optional */
+		if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
+			(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
+			wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
 		__this_cpu_write(apf_reason.enabled, 1);
 		printk(KERN_INFO"KVM setup async PF for cpu %d\n",
 		       smp_processor_id());
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3825a35cd7524..9b1dd114956a8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3749,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 		     kvm_event_needs_reinjection(vcpu)))
 		return false;
 
-	if (is_guest_mode(vcpu))
+	if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
 		return false;
 
 	return kvm_x86_ops->interrupt_allowed(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5a3bb1a697a22..84e62acf2dd86 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8037,7 +8037,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		if (is_nmi(intr_info))
 			return false;
 		else if (is_page_fault(intr_info))
-			return enable_ept;
+			return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
 		else if (is_no_device(intr_info) &&
 			 !(vmcs12->guest_cr0 & X86_CR0_TS))
 			return false;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f3f10154c1339..6753f09827914 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2063,8 +2063,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
 
-	/* Bits 2:5 are reserved, Should be zero */
-	if (data & 0x3c)
+	/* Bits 3:5 are reserved, Should be zero */
+	if (data & 0x38)
 		return 1;
 
 	vcpu->arch.apf.msr_val = data;
@@ -2080,6 +2080,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 		return 1;
 
 	vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
+	vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
 	kvm_async_pf_wakeup_all(vcpu);
 	return 0;
 }
-- 
GitLab


From d3457c877b14aaee8c52923eedf05a3b78af0476 Mon Sep 17 00:00:00 2001
From: Roman Kagan <rkagan@virtuozzo.com>
Date: Fri, 14 Jul 2017 17:13:20 +0300
Subject: [PATCH 1521/1958] kvm: x86: hyperv: make VP_INDEX managed by
 userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hyper-V identifies vCPUs by Virtual Processor Index, which can be
queried via HV_X64_MSR_VP_INDEX msr.  It is defined by the spec as a
sequential number which can't exceed the maximum number of vCPUs per VM.
APIC ids can be sparse and thus aren't a valid replacement for VP
indices.

Current KVM uses its internal vcpu index as VP_INDEX.  However, to make
it predictable and persistent across VM migrations, the userspace has to
control the value of VP_INDEX.

This patch achieves that, by storing vp_index explicitly on vcpu, and
allowing HV_X64_MSR_VP_INDEX to be set from the host side.  For
compatibility it's initialized to KVM vcpu index.  Also a few variables
are renamed to make clear distinction betweed this Hyper-V vp_index and
KVM vcpu_id (== APIC id).  Besides, a new capability,
KVM_CAP_HYPERV_VP_INDEX, is added to allow the userspace to skip
attempting msr writes where unsupported, to avoid spamming error logs.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  9 ++++++
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/hyperv.c             | 54 ++++++++++++++++++++-----------
 arch/x86/kvm/hyperv.h             |  1 +
 arch/x86/kvm/x86.c                |  3 ++
 include/uapi/linux/kvm.h          |  1 +
 6 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 78ac577c93781..e63a35fafef0e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4338,3 +4338,12 @@ This capability enables a newer version of Hyper-V Synthetic interrupt
 controller (SynIC).  The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
 doesn't clear SynIC message and event flags pages when they are enabled by
 writing to the respective MSRs.
+
+8.12 KVM_CAP_HYPERV_VP_INDEX
+
+Architectures: x86
+
+This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr.  Its
+value is used to denote the target vcpu for a SynIC interrupt.  For
+compatibilty, KVM initializes this msr to KVM's internal vcpu index.  When this
+capability is absent, userspace can still query this msr's value.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index da3261e384d3a..87ac4fba6d8e1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -467,6 +467,7 @@ struct kvm_vcpu_hv_synic {
 
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
+	u32 vp_index;
 	u64 hv_vapic;
 	s64 runtime_offset;
 	struct kvm_vcpu_hv_synic synic;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a8084406707e3..2695a34fa1c51 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
 	return 0;
 }
 
-static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
+static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int i;
+
+	if (vpidx < KVM_MAX_VCPUS)
+		vcpu = kvm_get_vcpu(kvm, vpidx);
+	if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
+		return vcpu;
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
+			return vcpu;
+	return NULL;
+}
+
+static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vcpu_hv_synic *synic;
 
-	if (vcpu_id >= atomic_read(&kvm->online_vcpus))
-		return NULL;
-	vcpu = kvm_get_vcpu(kvm, vcpu_id);
+	vcpu = get_vcpu_by_vpidx(kvm, vpidx);
 	if (!vcpu)
 		return NULL;
 	synic = vcpu_to_synic(vcpu);
@@ -320,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
 	return ret;
 }
 
-int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
 {
 	struct kvm_vcpu_hv_synic *synic;
 
-	synic = synic_get(kvm, vcpu_id);
+	synic = synic_get(kvm, vpidx);
 	if (!synic)
 		return -EINVAL;
 
@@ -343,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
 			kvm_hv_notify_acked_sint(vcpu, i);
 }
 
-static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
+static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
 {
 	struct kvm_vcpu_hv_synic *synic;
 
-	synic = synic_get(kvm, vcpu_id);
+	synic = synic_get(kvm, vpidx);
 	if (!synic)
 		return -EINVAL;
 
@@ -689,6 +702,13 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 		stimer_init(&hv_vcpu->stimer[i], i);
 }
 
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+
+	hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
+}
+
 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
 {
 	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
@@ -983,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
 
 	switch (msr) {
+	case HV_X64_MSR_VP_INDEX:
+		if (!host)
+			return 1;
+		hv->vp_index = (u32)data;
+		break;
 	case HV_X64_MSR_APIC_ASSIST_PAGE: {
 		u64 gfn;
 		unsigned long addr;
@@ -1094,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
 
 	switch (msr) {
-	case HV_X64_MSR_VP_INDEX: {
-		int r;
-		struct kvm_vcpu *v;
-
-		kvm_for_each_vcpu(r, v, vcpu->kvm) {
-			if (v == vcpu) {
-				data = r;
-				break;
-			}
-		}
+	case HV_X64_MSR_VP_INDEX:
+		data = hv->vp_index;
 		break;
-	}
 	case HV_X64_MSR_EOI:
 		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
 	case HV_X64_MSR_ICR:
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 12f65fe1011d5..e637631a9574f 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -59,6 +59,7 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
 
 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6753f09827914..5b8f07889f6a5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2666,6 +2666,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_SPIN:
 	case KVM_CAP_HYPERV_SYNIC:
 	case KVM_CAP_HYPERV_SYNIC2:
+	case KVM_CAP_HYPERV_VP_INDEX:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -7688,6 +7689,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	struct msr_data msr;
 	struct kvm *kvm = vcpu->kvm;
 
+	kvm_hv_vcpu_postcreate(vcpu);
+
 	if (vcpu_load(vcpu))
 		return;
 	msr.data = 0x0;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 38b2cfbc81120..6cd63c18708ae 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -928,6 +928,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_FWNMI 146
 #define KVM_CAP_PPC_SMT_POSSIBLE 147
 #define KVM_CAP_HYPERV_SYNIC2 148
+#define KVM_CAP_HYPERV_VP_INDEX 149
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
GitLab


From 457839ed3e78618cf0354cf79a1f47fe6eb26aef Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Mon, 10 Jul 2017 14:35:23 +0200
Subject: [PATCH 1522/1958] mdio: mux: fix parsing mux registers outside of the
 PHY address range

mdio_mux_init parses the child nodes of the MDIO mux. When using
"mdio-mux-mmioreg" the child nodes are describing the register value
that is written to switch between the MDIO busses.

The change which makes the error messages more verbose changed the
parsing of the "reg" property from a simple of_property_read_u32 call
to of_mdio_parse_addr. On a Khadas VIM (based on the Meson GXL SoC,
which uses mdio-mux-mmioreg) this prevents registering the MDIO mux
(because the "reg" values on the MDIO mux child nodes are 0x2009087f
and 0xe40908ff) and leads to the following errors:
  mdio-mux-mmioreg c883455c.eth-phy-mux: /soc/periphs@c8834000/eth-phy-mux/mdio@e40908ff PHY address -469169921 is too large
  mdio-mux-mmioreg c883455c.eth-phy-mux: Error: Failed to find reg for child /soc/periphs@c8834000/eth-phy-mux/mdio@e40908ff
  mdio-mux-mmioreg c883455c.eth-phy-mux: /soc/periphs@c8834000/eth-phy-mux/mdio@2009087f PHY address 537462911 is too large
  mdio-mux-mmioreg c883455c.eth-phy-mux: Error: Failed to find reg for child /soc/periphs@c8834000/eth-phy-mux/mdio@2009087f
  mdio-mux-mmioreg c883455c.eth-phy-mux: Error: No acceptable child buses found
  mdio-mux-mmioreg c883455c.eth-phy-mux: failed to register mdio-mux bus /soc/periphs@c8834000/eth-phy-mux
(as a result of that ethernet is not working, because the PHY which is
connected through the mux' child MDIO bus, which is not being
registered).

Fix this by reverting the change from of_mdio_parse_addr to
of_mdio_parse_addr.

Fixes: 342fa1964439 ("mdio: mux: make child bus walking more permissive and errors more verbose")
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mux.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 00755b6a42cf1..c608e1dfaf097 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -135,8 +135,8 @@ int mdio_mux_init(struct device *dev,
 	for_each_available_child_of_node(dev->of_node, child_bus_node) {
 		int v;
 
-		v = of_mdio_parse_addr(dev, child_bus_node);
-		if (v < 0) {
+		r = of_property_read_u32(child_bus_node, "reg", &v);
+		if (r) {
 			dev_err(dev,
 				"Error: Failed to find reg for child %s\n",
 				of_node_full_name(child_bus_node));
-- 
GitLab


From 2b02c20ce0c28974b44e69a2e2f5ddc6a470ad6f Mon Sep 17 00:00:00 2001
From: Enrico Mioso <mrkiko.rs@gmail.com>
Date: Tue, 11 Jul 2017 17:21:52 +0200
Subject: [PATCH 1523/1958] cdc_ncm: Set NTB format again after altsetting
 switch for Huawei devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some firmwares in Huawei E3372H devices have been observed to switch back
to NTB 32-bit format after altsetting switch.
This patch implements a driver flag to check for the device settings and
set NTB format to 16-bit again if needed.
The flag has been activated for devices controlled by the huawei_cdc_ncm.c
driver.

V1->V2:
- fixed broken error checks
- some corrections to the commit message
V2->V3:
- variable name changes, to clarify what's happening
- check (and possibly set) the NTB format later in the common bind code path

Signed-off-by: Enrico Mioso <mrkiko.rs@gmail.com>
Reported-and-tested-by: Christian Panton <christian@panton.org>
Reviewed-by: Bjørn Mork <bjorn@mork.no>
CC: Bjørn Mork <bjorn@mork.no>
CC: Christian Panton <christian@panton.org>
CC: linux-usb@vger.kernel.org
CC: netdev@vger.kernel.org
CC: Oliver Neukum <oliver@neukum.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c        | 28 ++++++++++++++++++++++++++++
 drivers/net/usb/huawei_cdc_ncm.c |  6 ++++++
 include/linux/usb/cdc_ncm.h      |  1 +
 3 files changed, 35 insertions(+)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index d103a1d4fb367..8f572b9f36255 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -768,8 +768,10 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	u8 *buf;
 	int len;
 	int temp;
+	int err;
 	u8 iface_no;
 	struct usb_cdc_parsed_header hdr;
+	u16 curr_ntb_format;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -874,6 +876,32 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 		goto error2;
 	}
 
+	/*
+	 * Some Huawei devices have been observed to come out of reset in NDP32 mode.
+	 * Let's check if this is the case, and set the device to NDP16 mode again if
+	 * needed.
+	*/
+	if (ctx->drvflags & CDC_NCM_FLAG_RESET_NTB16) {
+		err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_FORMAT,
+				      USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE,
+				      0, iface_no, &curr_ntb_format, 2);
+		if (err < 0) {
+			goto error2;
+		}
+
+		if (curr_ntb_format == USB_CDC_NCM_NTB32_FORMAT) {
+			dev_info(&intf->dev, "resetting NTB format to 16-bit");
+			err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT,
+					       USB_TYPE_CLASS | USB_DIR_OUT
+					       | USB_RECIP_INTERFACE,
+					       USB_CDC_NCM_NTB16_FORMAT,
+					       iface_no, NULL, 0);
+
+			if (err < 0)
+				goto error2;
+		}
+	}
+
 	cdc_ncm_find_endpoints(dev, ctx->data);
 	cdc_ncm_find_endpoints(dev, ctx->control);
 	if (!dev->in || !dev->out || !dev->status) {
diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c
index 2680a65cd5e4f..63f28908afda7 100644
--- a/drivers/net/usb/huawei_cdc_ncm.c
+++ b/drivers/net/usb/huawei_cdc_ncm.c
@@ -80,6 +80,12 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev,
 	 * be at the end of the frame.
 	 */
 	drvflags |= CDC_NCM_FLAG_NDP_TO_END;
+
+	/* Additionally, it has been reported that some Huawei E3372H devices, with
+	 * firmware version 21.318.01.00.541, come out of reset in NTB32 format mode, hence
+	 * needing to be set to the NTB16 one again.
+	 */
+	drvflags |= CDC_NCM_FLAG_RESET_NTB16;
 	ret = cdc_ncm_bind_common(usbnet_dev, intf, 1, drvflags);
 	if (ret)
 		goto err;
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 021f7a88f52c9..1a59699cf82a9 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -83,6 +83,7 @@
 /* Driver flags */
 #define CDC_NCM_FLAG_NDP_TO_END			0x02	/* NDP is placed at end of frame */
 #define CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE	0x04	/* Avoid altsetting toggle during init */
+#define CDC_NCM_FLAG_RESET_NTB16 0x08	/* set NDP16 one more time after altsetting switch */
 
 #define cdc_ncm_comm_intf_is_mbim(x)  ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \
 				       (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE)
-- 
GitLab


From 230cd1279d0019d52f9529c7d91c96d095cae755 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 12 Jul 2017 15:56:41 -0700
Subject: [PATCH 1524/1958] netpoll: shut up a kernel warning on refcount

When we convert atomic_t to refcount_t, a new kernel warning
on "increment on 0" is introduced in the netpoll code,
zap_completion_queue(). In fact for this special case, we know
the refcount is 0 and we just have to set it to 1 to satisfy
the following dev_kfree_skb_any(), so we can just use
refcount_set(..., 1) instead.

Fixes: 633547973ffc ("net: convert sk_buff.users from atomic_t to refcount_t")
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Cc: Reshetova, Elena <elena.reshetova@intel.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d3408a6931662..8357f164c6609 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -277,7 +277,7 @@ static void zap_completion_queue(void)
 			struct sk_buff *skb = clist;
 			clist = clist->next;
 			if (!skb_irq_freeable(skb)) {
-				refcount_inc(&skb->users);
+				refcount_set(&skb->users, 1);
 				dev_kfree_skb_any(skb); /* put this one back */
 			} else {
 				__kfree_skb(skb);
-- 
GitLab


From 1e4babee70a2e2d8f9e0da06f013563b0e26f654 Mon Sep 17 00:00:00 2001
From: LiuJian <liujian56@huawei.com>
Date: Thu, 13 Jul 2017 18:57:54 +0800
Subject: [PATCH 1525/1958] net: hns: add acpi function of xge led control

The current code only support DT method to control xge led.
This patch is the implementation of acpi method to control xge led.

Signed-off-by: LiuJian <liujian56@huawei.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: Daode Huang <huangdaode@hisilicon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns/hns_ae_adapt.c |  3 +-
 .../ethernet/hisilicon/hns/hns_dsaf_misc.c    | 58 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index ff864a187d5a7..a37166ee577b7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -776,8 +776,9 @@ void hns_ae_update_led_status(struct hnae_handle *handle)
 
 	assert(handle);
 	mac_cb = hns_get_mac_cb(handle);
-	if (!mac_cb->cpld_ctrl)
+	if (mac_cb->media_type != HNAE_MEDIA_TYPE_FIBER)
 		return;
+
 	hns_set_led_opt(mac_cb);
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 7a8addda726e3..408b63faf9a81 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -53,6 +53,34 @@ static u32 dsaf_read_sub(struct dsaf_device *dsaf_dev, u32 reg)
 	return ret;
 }
 
+static void hns_dsaf_acpi_ledctrl_by_port(struct hns_mac_cb *mac_cb, u8 op_type,
+                                      u32 link, u32 port, u32 act)
+{
+       union acpi_object *obj;
+       union acpi_object obj_args[3], argv4;
+
+       obj_args[0].integer.type = ACPI_TYPE_INTEGER;
+       obj_args[0].integer.value = link;
+       obj_args[1].integer.type = ACPI_TYPE_INTEGER;
+       obj_args[1].integer.value = port;
+       obj_args[2].integer.type = ACPI_TYPE_INTEGER;
+       obj_args[2].integer.value = act;
+
+       argv4.type = ACPI_TYPE_PACKAGE;
+       argv4.package.count = 3;
+       argv4.package.elements = obj_args;
+
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
+                               &hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
+       if (!obj) {
+               dev_warn(mac_cb->dev, "ledctrl fail, link:%d port:%d act:%d!\n",
+                        link, port, act);
+               return;
+       }
+
+       ACPI_FREE(obj);
+}
+
 static void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
 			     u16 speed, int data)
 {
@@ -93,6 +121,18 @@ static void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
 	}
 }
 
+static void hns_cpld_set_led_acpi(struct hns_mac_cb *mac_cb, int link_status,
+                            u16 speed, int data)
+{
+       if (!mac_cb) {
+               pr_err("cpld_led_set mac_cb is null!\n");
+               return;
+       }
+
+       hns_dsaf_acpi_ledctrl_by_port(mac_cb, HNS_OP_LED_SET_FUNC,
+               link_status, mac_cb->mac_id, data);
+}
+
 static void cpld_led_reset(struct hns_mac_cb *mac_cb)
 {
 	if (!mac_cb || !mac_cb->cpld_ctrl)
@@ -103,6 +143,20 @@ static void cpld_led_reset(struct hns_mac_cb *mac_cb)
 	mac_cb->cpld_led_value = CPLD_LED_DEFAULT_VALUE;
 }
 
+static void cpld_led_reset_acpi(struct hns_mac_cb *mac_cb)
+{
+       if (!mac_cb) {
+               pr_err("cpld_led_reset mac_cb is null!\n");
+               return;
+       }
+
+       if (mac_cb->media_type != HNAE_MEDIA_TYPE_FIBER)
+                return;
+
+       hns_dsaf_acpi_ledctrl_by_port(mac_cb, HNS_OP_LED_SET_FUNC,
+               0, mac_cb->mac_id, 0);
+}
+
 static int cpld_set_led_id(struct hns_mac_cb *mac_cb,
 			   enum hnae_led_state status)
 {
@@ -604,8 +658,8 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 
 		misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback;
 	} else if (is_acpi_node(dsaf_dev->dev->fwnode)) {
-		misc_op->cpld_set_led = hns_cpld_set_led;
-		misc_op->cpld_reset_led = cpld_led_reset;
+		misc_op->cpld_set_led = hns_cpld_set_led_acpi;
+		misc_op->cpld_reset_led = cpld_led_reset_acpi;
 		misc_op->cpld_set_led_id = cpld_set_led_id;
 
 		misc_op->dsaf_reset = hns_dsaf_rst_acpi;
-- 
GitLab


From 40fbbce007cb458da4c15cbf53beacf6b755cedc Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Thu, 13 Jul 2017 18:36:50 +0530
Subject: [PATCH 1526/1958] cxgb4: ptp_clock_register() returns error pointers

Check ptp_clock_register() return not only for NULL but
also for error pointers, and also nullify adapter->ptp_clock
if ptp_clock_register() fails.

Fixes: 9c33e4208bce ("cxgb4: Add PTP Hardware Clock (PHC) support")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
index 50517cfd96715..9f9d6cae39d55 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
@@ -441,7 +441,8 @@ void cxgb4_ptp_init(struct adapter *adapter)
 
 	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_clock_info,
 						&adapter->pdev->dev);
-	if (!adapter->ptp_clock) {
+	if (IS_ERR_OR_NULL(adapter->ptp_clock)) {
+		adapter->ptp_clock = NULL;
 		dev_err(adapter->pdev_dev,
 			"PTP %s Clock registration has failed\n", __func__);
 		return;
-- 
GitLab


From 31a4562d7408493c6377933ff2f7d7302dbdea80 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Thu, 13 Jul 2017 16:09:10 +0300
Subject: [PATCH 1527/1958] net: bridge: fix dest lookup when vlan proto
 doesn't match

With 802.1ad support the vlan_ingress code started checking for vlan
protocol mismatch which causes the current tag to be inserted and the
bridge vlan protocol & pvid to be set. The vlan tag insertion changes
the skb mac_header and thus the lookup mac dest pointer which was loaded
prior to calling br_allowed_ingress in br_handle_frame_finish is VLAN_HLEN
bytes off now, pointing to the last two bytes of the destination mac and
the first four of the source mac causing lookups to always fail and
broadcasting all such packets to all ports. Same thing happens for locally
originated packets when passing via br_dev_xmit. So load the dest pointer
after the vlan checks and possible skb change.

Fixes: 8580e2117c06 ("bridge: Prepare for 802.1ad vlan filtering support")
Reported-by: Anitha Narasimha Murthy <anitha@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Acked-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 3 ++-
 net/bridge/br_input.c  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f0f3447e8aa48..861ae2a165f4d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -34,11 +34,11 @@ static struct lock_class_key bridge_netdev_addr_lock_key;
 netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	const unsigned char *dest = skb->data;
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
 	struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
 	const struct nf_br_ops *nf_ops;
+	const unsigned char *dest;
 	u16 vid = 0;
 
 	rcu_read_lock();
@@ -61,6 +61,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
 		goto out;
 
+	dest = eth_hdr(skb)->h_dest;
 	if (is_broadcast_ether_addr(dest)) {
 		br_flood(br, skb, BR_PKT_BROADCAST, false, true);
 	} else if (is_multicast_ether_addr(dest)) {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 013f2290bfa56..7637f58c12263 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,11 +131,11 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
-	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	enum br_pkt_type pkt_type = BR_PKT_UNICAST;
 	struct net_bridge_fdb_entry *dst = NULL;
 	struct net_bridge_mdb_entry *mdst;
 	bool local_rcv, mcast_hit = false;
+	const unsigned char *dest;
 	struct net_bridge *br;
 	u16 vid = 0;
 
@@ -153,6 +153,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 		br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
 
 	local_rcv = !!(br->dev->flags & IFF_PROMISC);
+	dest = eth_hdr(skb)->h_dest;
 	if (is_multicast_ether_addr(dest)) {
 		/* by definition the broadcast is also a multicast address */
 		if (is_broadcast_ether_addr(dest)) {
-- 
GitLab


From ccd4eb49f3392ebf989d58bd013a7bf44cdca4d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Iv=C3=A1n=20Briano?= <ivan.briano@intel.com>
Date: Thu, 13 Jul 2017 09:46:58 -0700
Subject: [PATCH 1528/1958] net/packet: Fix Tx queue selection for AF_PACKET
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When PACKET_QDISC_BYPASS is not used, Tx queue selection will be done
before the packet is enqueued, taking into account any mappings set by
a queuing discipline such as mqprio without hardware offloading. This
selection may be affected by a previously saved queue_mapping, either on
the Rx path, or done before the packet reaches the device, as it's
currently the case for AF_PACKET.

In order for queue selection to work as expected when using traffic
control, there can't be another selection done before that point is
reached, so move the call to packet_pick_tx_queue to
packet_direct_xmit, leaving the default xmit path as it was before
PACKET_QDISC_BYPASS was introduced.

A forward declaration of packet_pick_tx_queue() is introduced to avoid
the need to reorder the functions within the file.

Fixes: d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option")
Signed-off-by: Iván Briano <ivan.briano@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e3beb28203ebe..008bb34ee3240 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -214,6 +214,7 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *,
 static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
 		struct tpacket3_hdr *);
 static void packet_flush_mclist(struct sock *sk);
+static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb);
 
 struct packet_skb_cb {
 	union {
@@ -260,6 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	if (skb != orig_skb)
 		goto drop;
 
+	packet_pick_tx_queue(dev, skb);
 	txq = skb_get_tx_queue(dev, skb);
 
 	local_bh_disable();
@@ -2747,8 +2749,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			goto tpacket_error;
 		}
 
-		packet_pick_tx_queue(dev, skb);
-
 		skb->destructor = tpacket_destruct_skb;
 		__packet_set_status(po, ph, TP_STATUS_SENDING);
 		packet_inc_pending(&po->tx_ring);
@@ -2931,8 +2931,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	skb->priority = sk->sk_priority;
 	skb->mark = sockc.mark;
 
-	packet_pick_tx_queue(dev, skb);
-
 	if (po->has_vnet_hdr) {
 		err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
 		if (err)
-- 
GitLab


From c4c4290c17bd099b7654d683f8ab6233b4f4a364 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Thu, 13 Jul 2017 13:12:18 -0400
Subject: [PATCH 1529/1958] net sched actions: rename act_get_notify() to
 tcf_get_notify()

Make name consistent with other TC event notification routines, such as
tcf_add_notify() and tcf_del_notify()

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index aed6cf2e9fd87..f2e9ed34a963e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -835,7 +835,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
 }
 
 static int
-act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
+tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 	       struct list_head *actions, int event)
 {
 	struct sk_buff *skb;
@@ -1018,7 +1018,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	}
 
 	if (event == RTM_GETACTION)
-		ret = act_get_notify(net, portid, n, &actions, event);
+		ret = tcf_get_notify(net, portid, n, &actions, event);
 	else { /* delete */
 		ret = tcf_del_notify(net, n, &actions, portid);
 		if (ret)
-- 
GitLab


From a8f5cb9e79913a6918f87495d4969706f8817b8e Mon Sep 17 00:00:00 2001
From: Petr Kulhavy <brain@jikos.cz>
Date: Thu, 13 Jul 2017 19:40:57 +0200
Subject: [PATCH 1530/1958] smsc95xx: use ethtool_op_get_ts_info()

This change enables the use of SW timestamping on Raspberry PI.

smsc95xx uses the usbnet transmit function usbnet_start_xmit(), which
implements software timestamping. However the SOF_TIMESTAMPING_TX_SOFTWARE
capability was missing and only SOF_TIMESTAMPING_RX_SOFTWARE was announced.
By using ethtool_op_get_ts_info() as get_ts_info() also the
SOF_TIMESTAMPING_TX_SOFTWARE is announced.

Signed-off-by: Petr Kulhavy <brain@jikos.cz>
Reviewed-by: Woojung Huh <Woojung.Huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc95xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 2dfca96a63b60..340c13484e5cc 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -898,6 +898,7 @@ static const struct ethtool_ops smsc95xx_ethtool_ops = {
 	.set_wol	= smsc95xx_ethtool_set_wol,
 	.get_link_ksettings	= smsc95xx_get_link_ksettings,
 	.set_link_ksettings	= smsc95xx_set_link_ksettings,
+	.get_ts_info	= ethtool_op_get_ts_info,
 };
 
 static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
-- 
GitLab


From c98b0537f0d962b46c62c27b6c2d783257f7400f Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Thu, 13 Jul 2017 15:45:41 -0500
Subject: [PATCH 1531/1958] net: qcom/emac: fix double free of SGMII IRQ during
 shutdown

If the interface is not up, then don't try to close it during a
shutdown.  This avoids possible double free of the IRQ, which
can happen during a shutdown.

Fixes: 03eb3eb4d4d5 ("net: qcom/emac: add shutdown function")
Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/emac/emac.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 746d94e28470f..60850bfa3d32e 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -766,11 +766,13 @@ static void emac_shutdown(struct platform_device *pdev)
 	struct emac_adapter *adpt = netdev_priv(netdev);
 	struct emac_sgmii *sgmii = &adpt->phy;
 
-	/* Closing the SGMII turns off its interrupts */
-	sgmii->close(adpt);
+	if (netdev->flags & IFF_UP) {
+		/* Closing the SGMII turns off its interrupts */
+		sgmii->close(adpt);
 
-	/* Resetting the MAC turns off all DMA and its interrupts */
-	emac_mac_reset(adpt);
+		/* Resetting the MAC turns off all DMA and its interrupts */
+		emac_mac_reset(adpt);
+	}
 }
 
 static struct platform_driver emac_platform_driver = {
-- 
GitLab


From 45e0b4b3d532ea67bf90701e6162f31855ab3c98 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jul 2017 14:07:00 +0200
Subject: [PATCH 1532/1958] isdn: divert: fix sprintf buffer overflow warning

One string we pass into the cs->info buffer might be too long,
as pointed out by gcc:

drivers/isdn/divert/isdn_divert.c: In function 'll_callback':
drivers/isdn/divert/isdn_divert.c:488:22: error: '%d' directive writing between 1 and 3 bytes into a region of size between 1 and 69 [-Werror=format-overflow=]
 sprintf(cs->info, "%d 0x%lx %s %s %s %s 0x%x 0x%x %d %d %s\n",
                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/isdn/divert/isdn_divert.c:488:22: note: directive argument in the range [0, 255]
drivers/isdn/divert/isdn_divert.c:488:4: note: 'sprintf' output 25 or more bytes (assuming 129) into a destination of size 90

This is unlikely to actually cause problems, so let's use snprintf
as a simple workaround to shut  up the warning and truncate the
buffer instead.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/divert/isdn_divert.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/isdn/divert/isdn_divert.c b/drivers/isdn/divert/isdn_divert.c
index 060d357f107f8..6f423bc49d0dc 100644
--- a/drivers/isdn/divert/isdn_divert.c
+++ b/drivers/isdn/divert/isdn_divert.c
@@ -485,18 +485,19 @@ static int isdn_divert_icall(isdn_ctrl *ic)
 				cs->deflect_dest[0] = '\0';
 				retval = 4; /* only proceed */
 			}
-			sprintf(cs->info, "%d 0x%lx %s %s %s %s 0x%x 0x%x %d %d %s\n",
-				cs->akt_state,
-				cs->divert_id,
-				divert_if.drv_to_name(cs->ics.driver),
-				(ic->command == ISDN_STAT_ICALLW) ? "1" : "0",
-				cs->ics.parm.setup.phone,
-				cs->ics.parm.setup.eazmsn,
-				cs->ics.parm.setup.si1,
-				cs->ics.parm.setup.si2,
-				cs->ics.parm.setup.screen,
-				dv->rule.waittime,
-				cs->deflect_dest);
+			snprintf(cs->info, sizeof(cs->info),
+				 "%d 0x%lx %s %s %s %s 0x%x 0x%x %d %d %s\n",
+				 cs->akt_state,
+				 cs->divert_id,
+				 divert_if.drv_to_name(cs->ics.driver),
+				 (ic->command == ISDN_STAT_ICALLW) ? "1" : "0",
+				 cs->ics.parm.setup.phone,
+				 cs->ics.parm.setup.eazmsn,
+				 cs->ics.parm.setup.si1,
+				 cs->ics.parm.setup.si2,
+				 cs->ics.parm.setup.screen,
+				 dv->rule.waittime,
+				 cs->deflect_dest);
 			if ((dv->rule.action == DEFLECT_REPORT) ||
 			    (dv->rule.action == DEFLECT_REJECT)) {
 				put_info_buffer(cs->info);
-- 
GitLab


From 73066f6c53fea76f95caadd70d802d24db747303 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jul 2017 14:07:01 +0200
Subject: [PATCH 1533/1958] net: niu: fix format string overflow warning:

We get a warning for the port_name string that might be longer than
six characters if we had more than 10 ports:

drivers/net/ethernet/sun/niu.c: In function 'niu_put_parent':
drivers/net/ethernet/sun/niu.c:9563:21: error: '%d' directive writing between 1 and 3 bytes into a region of size 2 [-Werror=format-overflow=]
  sprintf(port_name, "port%d", port);
                     ^~~~~~~~
drivers/net/ethernet/sun/niu.c:9563:21: note: directive argument in the range [0, 255]
drivers/net/ethernet/sun/niu.c:9563:2: note: 'sprintf' output between 6 and 8 bytes into a destination of size 6
  sprintf(port_name, "port%d", port);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/sun/niu.c: In function 'niu_pci_init_one':
drivers/net/ethernet/sun/niu.c:9538:22: error: '%d' directive writing between 1 and 3 bytes into a region of size 2 [-Werror=format-overflow=]
   sprintf(port_name, "port%d", port);
                      ^~~~~~~~
drivers/net/ethernet/sun/niu.c:9538:22: note: directive argument in the range [0, 255]
drivers/net/ethernet/sun/niu.c:9538:3: note: 'sprintf' output between 6 and 8 bytes into a destination of size 6

While we know that the port number is small, there is no harm in
making the format string two bytes longer to avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/niu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 46cb7f8955a22..4bb04aaf9650e 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9532,7 +9532,7 @@ static struct niu_parent *niu_get_parent(struct niu *np,
 		p = niu_new_parent(np, id, ptype);
 
 	if (p) {
-		char port_name[6];
+		char port_name[8];
 		int err;
 
 		sprintf(port_name, "port%d", port);
@@ -9553,7 +9553,7 @@ static void niu_put_parent(struct niu *np)
 {
 	struct niu_parent *p = np->parent;
 	u8 port = np->port;
-	char port_name[6];
+	char port_name[8];
 
 	BUG_ON(!p || p->ports[port] != np);
 
-- 
GitLab


From be9cdf1b97023cf5548bd4f6920bb546272ebabc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jul 2017 14:07:02 +0200
Subject: [PATCH 1534/1958] bnx2x: fix format overflow warning

gcc notices that large queue numbers would overflow the queue name
string:

drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c: In function 'bnx2x_get_strings':
drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c:3165:25: error: '%d' directive writing between 1 and 10 bytes into a region of size 5 [-Werror=format-overflow=]
drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c:3165:25: note: directive argument in the range [0, 2147483647]
drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c:3165:5: note: 'sprintf' output between 2 and 11 bytes into a destination of size 5

There is a hard limit in place that makes the number at most two
digits, so the code is fine. This changes it to use snprintf()
to truncate instead of overflowing, which shuts up that warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 21bc4bed6b260..1e33abde4a3e8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -3162,7 +3162,8 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 		if (is_multi(bp)) {
 			for_each_eth_queue(bp, i) {
 				memset(queue_name, 0, sizeof(queue_name));
-				sprintf(queue_name, "%d", i);
+				snprintf(queue_name, sizeof(queue_name),
+					 "%d", i);
 				for (j = 0; j < BNX2X_NUM_Q_STATS; j++)
 					snprintf(buf + (k + j)*ETH_GSTRING_LEN,
 						ETH_GSTRING_LEN,
-- 
GitLab


From c41626ce3113eb2d40b5aa1c4fc2b0cd2785367b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jul 2017 14:07:03 +0200
Subject: [PATCH 1535/1958] net: thunder_bgx: avoid format string overflow
 warning

gcc warns that the temporary buffer might be too small here:

drivers/net/ethernet/cavium/thunder/thunder_bgx.c: In function 'bgx_probe':
drivers/net/ethernet/cavium/thunder/thunder_bgx.c:1020:16: error: '%d' directive writing between 1 and 10 bytes into a region of size between 9 and 11 [-Werror=format-overflow=]
sprintf(str, "BGX%d LMAC%d mode", bgx->bgx_id, lmacid);
             ^~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/cavium/thunder/thunder_bgx.c:1020:16: note: directive argument in the range [0, 2147483647]
drivers/net/ethernet/cavium/thunder/thunder_bgx.c:1020:3: note: 'sprintf' output between 16 and 27 bytes into a destination of size 20

This probably can't happen, but it can't hurt to make it long
enough for the theoretical limit.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index a0ca68ce3fbb1..79112563a25ae 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1008,7 +1008,7 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
 {
 	struct device *dev = &bgx->pdev->dev;
 	struct lmac *lmac;
-	char str[20];
+	char str[27];
 
 	if (!bgx->is_dlm && lmacid)
 		return;
-- 
GitLab


From c7673e4dea9a338e00fa26cdd42d3697e8e22319 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jul 2017 14:07:04 +0200
Subject: [PATCH 1536/1958] vmxnet3: avoid format strint overflow warning

gcc-7 notices that "-event-%d" could be more than 11 characters long
if we had larger 'vector' numbers:

drivers/net/vmxnet3/vmxnet3_drv.c: In function 'vmxnet3_activate_dev':
drivers/net/vmxnet3/vmxnet3_drv.c:2095:40: error: 'sprintf' may write a terminating nul past the end of the destination [-Werror=format-overflow=]
sprintf(intr->event_msi_vector_name, "%s-event-%d",
                                     ^~~~~~~~~~~~~
drivers/net/vmxnet3/vmxnet3_drv.c:2095:3: note: 'sprintf' output between 9 and 33 bytes into a destination of size 32

The current code is safe, but making the string a little longer
is harmless and lets gcc see that it's ok.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_int.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index ba1c9f93592b8..9c51b8be00388 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -311,7 +311,7 @@ struct vmxnet3_intr {
 	u8  num_intrs;			/* # of intr vectors */
 	u8  event_intr_idx;		/* idx of the intr vector for event */
 	u8  mod_levels[VMXNET3_LINUX_MAX_MSIX_VECT]; /* moderation level */
-	char	event_msi_vector_name[IFNAMSIZ+11];
+	char	event_msi_vector_name[IFNAMSIZ+17];
 #ifdef CONFIG_PCI_MSI
 	struct msix_entry msix_entries[VMXNET3_LINUX_MAX_MSIX_VECT];
 #endif
-- 
GitLab


From 56c0da495a0b38f8ac0c0c0e3fcc750ea449daea Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jul 2017 14:07:05 +0200
Subject: [PATCH 1537/1958] liquidio: fix possible eeprom format string
 overflow

gcc reports that the temporary buffer for computing the
string length may be too small here:

drivers/net/ethernet/cavium/liquidio/lio_ethtool.c: In function 'lio_get_eeprom_len':
/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c:345:21: error: 'sprintf' may write a terminating nul past the end of the destination [-Werror=format-overflow=]
  len = sprintf(buf, "boardname:%s serialnum:%s maj:%lld min:%lld\n",
                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c:345:6: note: 'sprintf' output between 35 and 167 bytes into a destination of size 128
  len = sprintf(buf, "boardname:%s serialnum:%s maj:%lld min:%lld\n",

This extends it to 192 bytes, which is certainly enough. As far
as I could tell, there are no other constraints that require a specific
maximum size.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 28ecda3d34049..ebd353bc78ffa 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -335,7 +335,7 @@ lio_ethtool_get_channels(struct net_device *dev,
 
 static int lio_get_eeprom_len(struct net_device *netdev)
 {
-	u8 buf[128];
+	u8 buf[192];
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct_dev = lio->oct_dev;
 	struct octeon_board_info *board_info;
-- 
GitLab


From 10b3bf54406bb7f4e78da9bb2a485c5c986678ad Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 14 Jul 2017 22:07:33 +0800
Subject: [PATCH 1538/1958] sctp: fix an array overflow when all ext chunks are
 set

Marcelo noticed an array overflow caused by commit c28445c3cb07
("sctp: add reconf_enable in asoc ep and netns"), in which sctp
would add SCTP_CID_RECONF into extensions when reconf_enable is
set in sctp_make_init and sctp_make_init_ack.

Then now when all ext chunks are set, 4 ext chunk ids can be put
into extensions array while extensions array size is 3. It would
cause a kernel panic because of this overflow.

This patch is to fix it by defining extensions array size is 4 in
both sctp_make_init and sctp_make_init_ack.

Fixes: c28445c3cb07 ("sctp: add reconf_enable in asoc ep and netns")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4e16b02ed8321..6110447fe51dc 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	sctp_adaptation_ind_param_t aiparam;
 	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
-	__u8 extensions[3];
+	__u8 extensions[4];
 	struct sctp_paramhdr *auth_chunks = NULL,
 			*auth_hmacs = NULL;
 
@@ -396,7 +396,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	sctp_adaptation_ind_param_t aiparam;
 	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
-	__u8 extensions[3];
+	__u8 extensions[4];
 	struct sctp_paramhdr *auth_chunks = NULL,
 			*auth_hmacs = NULL,
 			*auth_random = NULL;
-- 
GitLab


From 9d53caec84c7c5700e7c1ed744ea584fff55f9ac Mon Sep 17 00:00:00 2001
From: Jane Chu <jane.chu@oracle.com>
Date: Tue, 11 Jul 2017 12:00:54 -0600
Subject: [PATCH 1539/1958] sparc64: Measure receiver forward progress to avoid
 send mondo timeout

A large sun4v SPARC system may have moments of intensive xcall activities,
usually caused by unmapping many pages on many CPUs concurrently. This can
flood receivers with CPU mondo interrupts for an extended period, causing
some unlucky senders to hit send-mondo timeout. This problem gets worse
as cpu count increases because sometimes mappings must be invalidated on
all CPUs, and sometimes all CPUs may gang up on a single CPU.

But a busy system is not a broken system. In the above scenario, as long
as the receiver is making forward progress processing mondo interrupts,
the sender should continue to retry.

This patch implements the receiver's forward progress meter by introducing
a per cpu counter 'cpu_mondo_counter[cpu]' where 'cpu' is in the range
of 0..NR_CPUS. The receiver increments its counter as soon as it receives
a mondo and the sender tracks the receiver's counter. If the receiver has
stopped making forward progress when the retry limit is reached, the sender
declares send-mondo-timeout and panic; otherwise, the receiver is allowed
to keep making forward progress.

In addition, it's been observed that PCIe hotplug events generate Correctable
Errors that are handled by hypervisor and then OS. Hypervisor 'borrows'
a guest cpu strand briefly to provide the service. If the cpu strand is
simultaneously the only cpu targeted by a mondo, it may not be available
for the mondo in 20msec, causing SUN4V mondo timeout. It appears that 1 second
is the agreed wait time between hypervisor and guest OS, this patch makes
the adjustment.

Orabug: 25476541
Orabug: 26417466

Signed-off-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Anthony Yznaga <anthony.yznaga@oracle.com>
Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
Reviewed-by: Thomas Tai <thomas.tai@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/trap_block.h |   1 +
 arch/sparc/kernel/smp_64.c          | 185 +++++++++++++++++-----------
 arch/sparc/kernel/sun4v_ivec.S      |  15 +++
 arch/sparc/kernel/traps_64.c        |   1 +
 4 files changed, 132 insertions(+), 70 deletions(-)

diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index ec9c04de36649..ff05992dae7a3 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS];
 void init_cur_cpu_trap(struct thread_info *);
 void setup_tba(void);
 extern int ncpus_probed;
+extern u64 cpu_mondo_counter[NR_CPUS];
 
 unsigned long real_hard_smp_processor_id(void);
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index fdf31040a7dc5..3218bc43302e1 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -622,22 +622,48 @@ static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 	}
 }
 
-/* Multi-cpu list version.  */
+#define	CPU_MONDO_COUNTER(cpuid)	(cpu_mondo_counter[cpuid])
+#define	MONDO_USEC_WAIT_MIN		2
+#define	MONDO_USEC_WAIT_MAX		100
+#define	MONDO_RETRY_LIMIT		500000
+
+/* Multi-cpu list version.
+ *
+ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
+ * Sometimes not all cpus receive the mondo, requiring us to re-send
+ * the mondo until all cpus have received, or cpus are truly stuck
+ * unable to receive mondo, and we timeout.
+ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
+ * perform guest service, such as PCIe error handling. Consider the
+ * service time, 1 second overall wait is reasonable for 1 cpu.
+ * Here two in-between mondo check wait time are defined: 2 usec for
+ * single cpu quick turn around and up to 100usec for large cpu count.
+ * Deliver mondo to large number of cpus could take longer, we adjusts
+ * the retry count as long as target cpus are making forward progress.
+ */
 static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 {
-	int retries, this_cpu, prev_sent, i, saw_cpu_error;
+	int this_cpu, tot_cpus, prev_sent, i, rem;
+	int usec_wait, retries, tot_retries;
+	u16 first_cpu = 0xffff;
+	unsigned long xc_rcvd = 0;
 	unsigned long status;
+	int ecpuerror_id = 0;
+	int enocpu_id = 0;
 	u16 *cpu_list;
+	u16 cpu;
 
 	this_cpu = smp_processor_id();
-
 	cpu_list = __va(tb->cpu_list_pa);
-
-	saw_cpu_error = 0;
-	retries = 0;
+	usec_wait = cnt * MONDO_USEC_WAIT_MIN;
+	if (usec_wait > MONDO_USEC_WAIT_MAX)
+		usec_wait = MONDO_USEC_WAIT_MAX;
+	retries = tot_retries = 0;
+	tot_cpus = cnt;
 	prev_sent = 0;
+
 	do {
-		int forward_progress, n_sent;
+		int n_sent, mondo_delivered, target_cpu_busy;
 
 		status = sun4v_cpu_mondo_send(cnt,
 					      tb->cpu_list_pa,
@@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 
 		/* HV_EOK means all cpus received the xcall, we're done.  */
 		if (likely(status == HV_EOK))
-			break;
+			goto xcall_done;
+
+		/* If not these non-fatal errors, panic */
+		if (unlikely((status != HV_EWOULDBLOCK) &&
+			(status != HV_ECPUERROR) &&
+			(status != HV_ENOCPU)))
+			goto fatal_errors;
 
 		/* First, see if we made any forward progress.
+		 *
+		 * Go through the cpu_list, count the target cpus that have
+		 * received our mondo (n_sent), and those that did not (rem).
+		 * Re-pack cpu_list with the cpus remain to be retried in the
+		 * front - this simplifies tracking the truly stalled cpus.
 		 *
 		 * The hypervisor indicates successful sends by setting
 		 * cpu list entries to the value 0xffff.
+		 *
+		 * EWOULDBLOCK means some target cpus did not receive the
+		 * mondo and retry usually helps.
+		 *
+		 * ECPUERROR means at least one target cpu is in error state,
+		 * it's usually safe to skip the faulty cpu and retry.
+		 *
+		 * ENOCPU means one of the target cpu doesn't belong to the
+		 * domain, perhaps offlined which is unexpected, but not
+		 * fatal and it's okay to skip the offlined cpu.
 		 */
+		rem = 0;
 		n_sent = 0;
 		for (i = 0; i < cnt; i++) {
-			if (likely(cpu_list[i] == 0xffff))
+			cpu = cpu_list[i];
+			if (likely(cpu == 0xffff)) {
 				n_sent++;
+			} else if ((status == HV_ECPUERROR) &&
+				(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
+				ecpuerror_id = cpu + 1;
+			} else if (status == HV_ENOCPU && !cpu_online(cpu)) {
+				enocpu_id = cpu + 1;
+			} else {
+				cpu_list[rem++] = cpu;
+			}
 		}
 
-		forward_progress = 0;
-		if (n_sent > prev_sent)
-			forward_progress = 1;
+		/* No cpu remained, we're done. */
+		if (rem == 0)
+			break;
 
-		prev_sent = n_sent;
+		/* Otherwise, update the cpu count for retry. */
+		cnt = rem;
 
-		/* If we get a HV_ECPUERROR, then one or more of the cpus
-		 * in the list are in error state.  Use the cpu_state()
-		 * hypervisor call to find out which cpus are in error state.
+		/* Record the overall number of mondos received by the
+		 * first of the remaining cpus.
 		 */
-		if (unlikely(status == HV_ECPUERROR)) {
-			for (i = 0; i < cnt; i++) {
-				long err;
-				u16 cpu;
+		if (first_cpu != cpu_list[0]) {
+			first_cpu = cpu_list[0];
+			xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
+		}
 
-				cpu = cpu_list[i];
-				if (cpu == 0xffff)
-					continue;
+		/* Was any mondo delivered successfully? */
+		mondo_delivered = (n_sent > prev_sent);
+		prev_sent = n_sent;
 
-				err = sun4v_cpu_state(cpu);
-				if (err == HV_CPU_STATE_ERROR) {
-					saw_cpu_error = (cpu + 1);
-					cpu_list[i] = 0xffff;
-				}
-			}
-		} else if (unlikely(status != HV_EWOULDBLOCK))
-			goto fatal_mondo_error;
+		/* or, was any target cpu busy processing other mondos? */
+		target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
+		xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
 
-		/* Don't bother rewriting the CPU list, just leave the
-		 * 0xffff and non-0xffff entries in there and the
-		 * hypervisor will do the right thing.
-		 *
-		 * Only advance timeout state if we didn't make any
-		 * forward progress.
+		/* Retry count is for no progress. If we're making progress,
+		 * reset the retry count.
 		 */
-		if (unlikely(!forward_progress)) {
-			if (unlikely(++retries > 10000))
-				goto fatal_mondo_timeout;
-
-			/* Delay a little bit to let other cpus catch up
-			 * on their cpu mondo queue work.
-			 */
-			udelay(2 * cnt);
+		if (likely(mondo_delivered || target_cpu_busy)) {
+			tot_retries += retries;
+			retries = 0;
+		} else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
+			goto fatal_mondo_timeout;
 		}
-	} while (1);
 
-	if (unlikely(saw_cpu_error))
-		goto fatal_mondo_cpu_error;
+		/* Delay a little bit to let other cpus catch up on
+		 * their cpu mondo queue work.
+		 */
+		if (!mondo_delivered)
+			udelay(usec_wait);
 
-	return;
+		retries++;
+	} while (1);
 
-fatal_mondo_cpu_error:
-	printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
-	       "(including %d) were in error state\n",
-	       this_cpu, saw_cpu_error - 1);
+xcall_done:
+	if (unlikely(ecpuerror_id > 0)) {
+		pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
+		       this_cpu, ecpuerror_id - 1);
+	} else if (unlikely(enocpu_id > 0)) {
+		pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
+		       this_cpu, enocpu_id - 1);
+	}
 	return;
 
+fatal_errors:
+	/* fatal errors include bad alignment, etc */
+	pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
+	       this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
+	panic("Unexpected SUN4V mondo error %lu\n", status);
+
 fatal_mondo_timeout:
-	printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
-	       " progress after %d retries.\n",
-	       this_cpu, retries);
-	goto dump_cpu_list_and_out;
-
-fatal_mondo_error:
-	printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
-	       this_cpu, status);
-	printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
-	       "mondo_block_pa(%lx)\n",
-	       this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
-
-dump_cpu_list_and_out:
-	printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
-	for (i = 0; i < cnt; i++)
-		printk("%u ", cpu_list[i]);
-	printk("]\n");
+	/* some cpus being non-responsive to the cpu mondo */
+	pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
+	       this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
+	panic("SUN4V mondo timeout panic\n");
 }
 
 static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S
index 559bc5e9c1992..34631995859af 100644
--- a/arch/sparc/kernel/sun4v_ivec.S
+++ b/arch/sparc/kernel/sun4v_ivec.S
@@ -26,6 +26,21 @@ sun4v_cpu_mondo:
 	ldxa	[%g0] ASI_SCRATCHPAD, %g4
 	sub	%g4, TRAP_PER_CPU_FAULT_INFO, %g4
 
+	/* Get smp_processor_id() into %g3 */
+	sethi	%hi(trap_block), %g5
+	or	%g5, %lo(trap_block), %g5
+	sub	%g4, %g5, %g3
+	srlx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
+
+	/* Increment cpu_mondo_counter[smp_processor_id()] */
+	sethi	%hi(cpu_mondo_counter), %g5
+	or	%g5, %lo(cpu_mondo_counter), %g5
+	sllx	%g3, 3, %g3
+	add	%g5, %g3, %g5
+	ldx	[%g5], %g3
+	add	%g3, 1, %g3
+	stx	%g3, [%g5]
+
 	/* Get CPU mondo queue base phys address into %g7.  */
 	ldx	[%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
 
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 196ee5eb4d489..ad31af1dd7265 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs)
 	}
 }
 
+u64 cpu_mondo_counter[NR_CPUS] = {0};
 struct trap_per_cpu trap_block[NR_CPUS];
 EXPORT_SYMBOL(trap_block);
 
-- 
GitLab


From c09abff87f90c81d74b6483837a665f97448a475 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 13 Jul 2017 18:10:07 +0200
Subject: [PATCH 1540/1958] btrfs: cloned bios must not be iterated by
 bio_for_each_segment_all

We've started using cloned bios more in 4.13, there are some specifics
regarding the iteration.  Filipe found [1] that the raid56 iterated a
cloned bio using bio_for_each_segment_all, which is incorrect. The
cloned bios have wrong bi_vcnt and this could lead to silent
corruptions.  This patch adds assertions to all remaining
bio_for_each_segment_all cases.

[1] https://patchwork.kernel.org/patch/9838535/

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 1 +
 fs/btrfs/disk-io.c     | 1 +
 fs/btrfs/extent_io.c   | 3 +++
 fs/btrfs/inode.c       | 2 ++
 4 files changed, 7 insertions(+)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index fcd323eceb5bb..8ba1b86c9b725 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -152,6 +152,7 @@ static void end_compressed_bio_read(struct bio *bio)
 		 * we have verified the checksum already, set page
 		 * checked so the end_io handlers know about it
 		 */
+		ASSERT(!bio_flagged(bio, BIO_CLONED));
 		bio_for_each_segment_all(bvec, cb->orig_bio, i)
 			SetPageChecked(bvec->bv_page);
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b6758892874f3..075beedb43521 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -964,6 +964,7 @@ static int btree_csum_one_bio(struct bio *bio)
 	struct btrfs_root *root;
 	int i, ret = 0;
 
+	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, i) {
 		root = BTRFS_I(bvec->bv_page->mapping->host)->root;
 		ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e6f699083033..a3122fba54e8d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2452,6 +2452,7 @@ static void end_bio_extent_writepage(struct bio *bio)
 	u64 end;
 	int i;
 
+	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 		struct inode *inode = page->mapping->host;
@@ -2522,6 +2523,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 	int ret;
 	int i;
 
+	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 		struct inode *inode = page->mapping->host;
@@ -3675,6 +3677,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
 	struct extent_buffer *eb;
 	int i, done;
 
+	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5d3c6ac960fd2..529437c337b49 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8060,6 +8060,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 	ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode));
 
 	done->uptodate = 1;
+	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, i)
 		clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
 				 io_tree, done->start, bvec->bv_page,
@@ -8151,6 +8152,7 @@ static void btrfs_retry_endio(struct bio *bio)
 	io_tree = &BTRFS_I(inode)->io_tree;
 	failure_tree = &BTRFS_I(inode)->io_failure_tree;
 
+	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, i) {
 		ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
 					     bvec->bv_offset, done->start,
-- 
GitLab


From e8f5b395d5332b5d720d62d33d6e06836392fd85 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 13 Jul 2017 17:42:15 +0200
Subject: [PATCH 1541/1958] btrfs: btrfs_create_repair_bio never fails, skip
 error handling

As the function uses the non-failing bio allocation, we can remove error
handling from the callers as well.

Signed-off-by: David Sterba <dsterba@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 4 ----
 fs/btrfs/inode.c     | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a3122fba54e8d..3a9ef02cad49e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2394,10 +2394,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 				      start - page_offset(page),
 				      (int)phy_offset, failed_bio->bi_end_io,
 				      NULL);
-	if (!bio) {
-		free_io_failure(failure_tree, tree, failrec);
-		return -EIO;
-	}
 	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
 
 	btrfs_debug(btrfs_sb(inode->i_sb),
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 529437c337b49..eb495e956d53e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8017,10 +8017,6 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	isector >>= inode->i_sb->s_blocksize_bits;
 	bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
 				pgoff, isector, repair_endio, repair_arg);
-	if (!bio) {
-		free_io_failure(failure_tree, io_tree, failrec);
-		return -EIO;
-	}
 	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
 
 	btrfs_debug(BTRFS_I(inode)->root->fs_info,
-- 
GitLab


From c3cfb656307583ddfea45375c10183737593c195 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 13 Jul 2017 15:00:50 -0700
Subject: [PATCH 1542/1958] Btrfs: fix unexpected return value of
 bio_readpage_error

With blk_status_t conversion (that are now present in master),
bio_readpage_error() may return 1 as now ->submit_bio_hook() may not set
%ret if it runs without problems.

This fixes that unexpected return value by changing
btrfs_check_repairable() to return a bool instead of updating %ret, and
patch is applicable to both codebases with and without blk_status_t.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 12 ++++++------
 fs/btrfs/extent_io.h |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3a9ef02cad49e..eb484a0d1320c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2257,7 +2257,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 	return 0;
 }
 
-int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
+bool btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
 			   struct io_failure_record *failrec, int failed_mirror)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2273,7 +2273,7 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
 		btrfs_debug(fs_info,
 			"Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
 			num_copies, failrec->this_mirror, failed_mirror);
-		return 0;
+		return false;
 	}
 
 	/*
@@ -2314,10 +2314,10 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
 		btrfs_debug(fs_info,
 			"Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
 			num_copies, failrec->this_mirror, failed_mirror);
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
 
@@ -2380,8 +2380,8 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	if (ret)
 		return ret;
 
-	ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
-	if (!ret) {
+	if (!btrfs_check_repairable(inode, failed_bio, failrec,
+				    failed_mirror)) {
 		free_io_failure(failure_tree, tree, failrec);
 		return -EIO;
 	}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index aeafdb35d90ba..cfdbb9efaaed4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -539,8 +539,8 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
 		u64 end);
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 				struct io_failure_record **failrec_ret);
-int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
-			   struct io_failure_record *failrec, int fail_mirror);
+bool btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
+			    struct io_failure_record *failrec, int fail_mirror);
 struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
 				    struct io_failure_record *failrec,
 				    struct page *page, int pg_offset, int icsum,
-- 
GitLab


From 76250f2b743b72cb685cc51ac0cdabb32957180b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 14 Feb 2017 12:40:01 +0000
Subject: [PATCH 1543/1958] dma-buf/fence: Avoid use of uninitialised timestamp

[  236.821534] WARNING: kmemcheck: Caught 64-bit read from uninitialized memory (ffff8802538683d0)
[  236.828642] 420000001e7f0000000000000000000000080000000000000000000000000000
[  236.839543]  i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
[  236.850420]                                  ^
[  236.854123] RIP: 0010:[<ffffffff81396f07>]  [<ffffffff81396f07>] fence_signal+0x17/0xd0
[  236.861313] RSP: 0018:ffff88024acd7ba0  EFLAGS: 00010282
[  236.865027] RAX: ffffffff812f6a90 RBX: ffff8802527ca800 RCX: ffff880252cb30e0
[  236.868801] RDX: ffff88024ac5d918 RSI: ffff880252f780e0 RDI: ffff880253868380
[  236.872579] RBP: ffff88024acd7bc0 R08: ffff88024acd7be0 R09: 0000000000000000
[  236.876407] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880253868380
[  236.880185] R13: ffff8802538684d0 R14: ffff880253868380 R15: ffff88024cd48e00
[  236.883983] FS:  00007f1646d1a740(0000) GS:ffff88025d000000(0000) knlGS:0000000000000000
[  236.890959] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  236.894702] CR2: ffff880251360318 CR3: 000000024ad21000 CR4: 00000000001406f0
[  236.898481]  [<ffffffff8130d1ad>] i915_gem_request_retire+0x1cd/0x230
[  236.902439]  [<ffffffff8130e2b3>] i915_gem_request_alloc+0xa3/0x2f0
[  236.906435]  [<ffffffff812fb1bd>] i915_gem_do_execbuffer.isra.41+0xb6d/0x18b0
[  236.910434]  [<ffffffff812fc265>] i915_gem_execbuffer2+0x95/0x1e0
[  236.914390]  [<ffffffff812ad625>] drm_ioctl+0x1e5/0x460
[  236.918275]  [<ffffffff8110d4cf>] do_vfs_ioctl+0x8f/0x5c0
[  236.922168]  [<ffffffff8110da3c>] SyS_ioctl+0x3c/0x70
[  236.926090]  [<ffffffff814b7a5f>] entry_SYSCALL_64_fastpath+0x17/0x93
[  236.930045]  [<ffffffffffffffff>] 0xffffffffffffffff

We only set the timestamp before we mark the fence as signaled. It is
done before to avoid observers having a window in which they may see the
fence as complete but no timestamp. Having it does incur a potential for
the timestamp to be written twice, and even for it to be corrupted if
the u64 write is not atomic. Instead use a new bit to record the
presence of the timestamp, and teach the readers to wait until it is set
if the fence is complete. There still remains a race where the timestamp
for the signaled fence may be shown before the fence is reported as
signaled, but that's a pre-existing error.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Reported-by: Rafael Antognolli <rafael.antognolli@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170214124001.1930-1-chris@chris-wilson.co.uk
---
 drivers/dma-buf/dma-fence.c  | 17 ++++++-----------
 drivers/dma-buf/sync_debug.c |  2 +-
 drivers/dma-buf/sync_file.c  |  8 +++++++-
 include/linux/dma-fence.h    |  2 ++
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 0918d3f003d65..13556fdda2a52 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -75,11 +75,6 @@ int dma_fence_signal_locked(struct dma_fence *fence)
 	if (WARN_ON(!fence))
 		return -EINVAL;
 
-	if (!ktime_to_ns(fence->timestamp)) {
-		fence->timestamp = ktime_get();
-		smp_mb__before_atomic();
-	}
-
 	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
 		ret = -EINVAL;
 
@@ -87,8 +82,11 @@ int dma_fence_signal_locked(struct dma_fence *fence)
 		 * we might have raced with the unlocked dma_fence_signal,
 		 * still run through all callbacks
 		 */
-	} else
+	} else {
+		fence->timestamp = ktime_get();
+		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
 		trace_dma_fence_signaled(fence);
+	}
 
 	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
 		list_del_init(&cur->node);
@@ -115,14 +113,11 @@ int dma_fence_signal(struct dma_fence *fence)
 	if (!fence)
 		return -EINVAL;
 
-	if (!ktime_to_ns(fence->timestamp)) {
-		fence->timestamp = ktime_get();
-		smp_mb__before_atomic();
-	}
-
 	if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return -EINVAL;
 
+	fence->timestamp = ktime_get();
+	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
 	trace_dma_fence_signaled(fence);
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index c769dc653b344..bfead12390f24 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -84,7 +84,7 @@ static void sync_print_fence(struct seq_file *s,
 		   show ? "_" : "",
 		   sync_status_str(status));
 
-	if (status) {
+	if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) {
 		struct timespec64 ts64 =
 			ktime_to_timespec64(fence->timestamp);
 
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2321035f6204b..95f259b719fc4 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -375,7 +375,13 @@ static void sync_fill_fence_info(struct dma_fence *fence,
 		sizeof(info->driver_name));
 
 	info->status = dma_fence_get_status(fence);
-	info->timestamp_ns = ktime_to_ns(fence->timestamp);
+	while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
+	       !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
+		cpu_relax();
+	info->timestamp_ns =
+		test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ?
+		ktime_to_ns(fence->timestamp) :
+		ktime_set(0, 0);
 }
 
 static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index a5195a7d6f77e..0a186c4f3981c 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -55,6 +55,7 @@ struct dma_fence_cb;
  * of the time.
  *
  * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
+ * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
  * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
  * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
  * implementer of the fence for its own purposes. Can be used in different
@@ -84,6 +85,7 @@ struct dma_fence {
 
 enum dma_fence_flag_bits {
 	DMA_FENCE_FLAG_SIGNALED_BIT,
+	DMA_FENCE_FLAG_TIMESTAMP_BIT,
 	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
 };
-- 
GitLab


From a966ac73d7772a2b067c50fa16bbbfe418fc6374 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 07:49:15 -0300
Subject: [PATCH 1544/1958] bcache.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add a title for the document;
- Use a list for the listed URLs;
- mark literal blocks;
- adjust whitespaces;
- Don't capitalize section titles.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/bcache.txt | 190 ++++++++++++++++++++++-----------------
 1 file changed, 107 insertions(+), 83 deletions(-)

diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt
index a9259b562d5c8..c0ce64d75bbf7 100644
--- a/Documentation/bcache.txt
+++ b/Documentation/bcache.txt
@@ -1,10 +1,15 @@
+============================
+A block layer cache (bcache)
+============================
+
 Say you've got a big slow raid 6, and an ssd or three. Wouldn't it be
 nice if you could use them as cache... Hence bcache.
 
 Wiki and git repositories are at:
-  http://bcache.evilpiepirate.org
-  http://evilpiepirate.org/git/linux-bcache.git
-  http://evilpiepirate.org/git/bcache-tools.git
+
+  - http://bcache.evilpiepirate.org
+  - http://evilpiepirate.org/git/linux-bcache.git
+  - http://evilpiepirate.org/git/bcache-tools.git
 
 It's designed around the performance characteristics of SSDs - it only allocates
 in erase block sized buckets, and it uses a hybrid btree/log to track cached
@@ -37,17 +42,19 @@ to be flushed.
 
 Getting started:
 You'll need make-bcache from the bcache-tools repository. Both the cache device
-and backing device must be formatted before use.
+and backing device must be formatted before use::
+
   make-bcache -B /dev/sdb
   make-bcache -C /dev/sdc
 
 make-bcache has the ability to format multiple devices at the same time - if
 you format your backing devices and cache device at the same time, you won't
-have to manually attach:
+have to manually attach::
+
   make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
 
 bcache-tools now ships udev rules, and bcache devices are known to the kernel
-immediately.  Without udev, you can manually register devices like this:
+immediately.  Without udev, you can manually register devices like this::
 
   echo /dev/sdb > /sys/fs/bcache/register
   echo /dev/sdc > /sys/fs/bcache/register
@@ -60,16 +67,16 @@ slow devices as bcache backing devices without a cache, and you can choose to ad
 a caching device later.
 See 'ATTACHING' section below.
 
-The devices show up as:
+The devices show up as::
 
   /dev/bcache<N>
 
-As well as (with udev):
+As well as (with udev)::
 
   /dev/bcache/by-uuid/<uuid>
   /dev/bcache/by-label/<label>
 
-To get started:
+To get started::
 
   mkfs.ext4 /dev/bcache0
   mount /dev/bcache0 /mnt
@@ -81,13 +88,13 @@ Cache devices are managed as sets; multiple caches per set isn't supported yet
 but will allow for mirroring of metadata and dirty data in the future. Your new
 cache set shows up as /sys/fs/bcache/<UUID>
 
-ATTACHING
+Attaching
 ---------
 
 After your cache device and backing device are registered, the backing device
 must be attached to your cache set to enable caching. Attaching a backing
 device to a cache set is done thusly, with the UUID of the cache set in
-/sys/fs/bcache:
+/sys/fs/bcache::
 
   echo <CSET-UUID> > /sys/block/bcache0/bcache/attach
 
@@ -97,7 +104,7 @@ your bcache devices. If a backing device has data in a cache somewhere, the
 important if you have writeback caching turned on.
 
 If you're booting up and your cache device is gone and never coming back, you
-can force run the backing device:
+can force run the backing device::
 
   echo 1 > /sys/block/sdb/bcache/running
 
@@ -110,7 +117,7 @@ but all the cached data will be invalidated. If there was dirty data in the
 cache, don't expect the filesystem to be recoverable - you will have massive
 filesystem corruption, though ext4's fsck does work miracles.
 
-ERROR HANDLING
+Error Handling
 --------------
 
 Bcache tries to transparently handle IO errors to/from the cache device without
@@ -134,25 +141,27 @@ the backing devices to passthrough mode.
    read some of the dirty data, though.
 
 
-HOWTO/COOKBOOK
+Howto/cookbook
 --------------
 
 A) Starting a bcache with a missing caching device
 
 If registering the backing device doesn't help, it's already there, you just need
-to force it to run without the cache:
+to force it to run without the cache::
+
 	host:~# echo /dev/sdb1 > /sys/fs/bcache/register
 	[  119.844831] bcache: register_bcache() error opening /dev/sdb1: device already registered
 
 Next, you try to register your caching device if it's present. However
 if it's absent, or registration fails for some reason, you can still
-start your bcache without its cache, like so:
+start your bcache without its cache, like so::
+
 	host:/sys/block/sdb/sdb1/bcache# echo 1 > running
 
 Note that this may cause data loss if you were running in writeback mode.
 
 
-B) Bcache does not find its cache
+B) Bcache does not find its cache::
 
 	host:/sys/block/md5/bcache# echo 0226553a-37cf-41d5-b3ce-8b1e944543a8 > attach
 	[ 1933.455082] bcache: bch_cached_dev_attach() Couldn't find uuid for md5 in set
@@ -160,7 +169,8 @@ B) Bcache does not find its cache
 	[ 1933.478179] : cache set not found
 
 In this case, the caching device was simply not registered at boot
-or disappeared and came back, and needs to be (re-)registered:
+or disappeared and came back, and needs to be (re-)registered::
+
 	host:/sys/block/md5/bcache# echo /dev/sdh2 > /sys/fs/bcache/register
 
 
@@ -180,7 +190,8 @@ device is still available at an 8KiB offset. So either via a loopdev
 of the backing device created with --offset 8K, or any value defined by
 --data-offset when you originally formatted bcache with `make-bcache`.
 
-For example:
+For example::
+
 	losetup -o 8192 /dev/loop0 /dev/your_bcache_backing_dev
 
 This should present your unmodified backing device data in /dev/loop0
@@ -191,33 +202,38 @@ cache device without loosing data.
 
 E) Wiping a cache device
 
-host:~# wipefs -a /dev/sdh2
-16 bytes were erased at offset 0x1018 (bcache)
-they were: c6 85 73 f6 4e 1a 45 ca 82 65 f5 7f 48 ba 6d 81
+::
+
+	host:~# wipefs -a /dev/sdh2
+	16 bytes were erased at offset 0x1018 (bcache)
+	they were: c6 85 73 f6 4e 1a 45 ca 82 65 f5 7f 48 ba 6d 81
+
+After you boot back with bcache enabled, you recreate the cache and attach it::
 
-After you boot back with bcache enabled, you recreate the cache and attach it:
-host:~# make-bcache -C /dev/sdh2
-UUID:                   7be7e175-8f4c-4f99-94b2-9c904d227045
-Set UUID:               5bc072a8-ab17-446d-9744-e247949913c1
-version:                0
-nbuckets:               106874
-block_size:             1
-bucket_size:            1024
-nr_in_set:              1
-nr_this_dev:            0
-first_bucket:           1
-[  650.511912] bcache: run_cache_set() invalidating existing data
-[  650.549228] bcache: register_cache() registered cache device sdh2
+	host:~# make-bcache -C /dev/sdh2
+	UUID:                   7be7e175-8f4c-4f99-94b2-9c904d227045
+	Set UUID:               5bc072a8-ab17-446d-9744-e247949913c1
+	version:                0
+	nbuckets:               106874
+	block_size:             1
+	bucket_size:            1024
+	nr_in_set:              1
+	nr_this_dev:            0
+	first_bucket:           1
+	[  650.511912] bcache: run_cache_set() invalidating existing data
+	[  650.549228] bcache: register_cache() registered cache device sdh2
 
-start backing device with missing cache:
-host:/sys/block/md5/bcache# echo 1 > running
+start backing device with missing cache::
 
-attach new cache:
-host:/sys/block/md5/bcache# echo 5bc072a8-ab17-446d-9744-e247949913c1 > attach
-[  865.276616] bcache: bch_cached_dev_attach() Caching md5 as bcache0 on set 5bc072a8-ab17-446d-9744-e247949913c1
+	host:/sys/block/md5/bcache# echo 1 > running
 
+attach new cache::
 
-F) Remove or replace a caching device
+	host:/sys/block/md5/bcache# echo 5bc072a8-ab17-446d-9744-e247949913c1 > attach
+	[  865.276616] bcache: bch_cached_dev_attach() Caching md5 as bcache0 on set 5bc072a8-ab17-446d-9744-e247949913c1
+
+
+F) Remove or replace a caching device::
 
 	host:/sys/block/sda/sda7/bcache# echo 1 > detach
 	[  695.872542] bcache: cached_dev_detach_finish() Caching disabled for sda7
@@ -226,13 +242,15 @@ F) Remove or replace a caching device
 	wipefs: error: /dev/nvme0n1p4: probing initialization failed: Device or resource busy
 	Ooops, it's disabled, but not unregistered, so it's still protected
 
-We need to go and unregister it:
+We need to go and unregister it::
+
 	host:/sys/fs/bcache/b7ba27a1-2398-4649-8ae3-0959f57ba128# ls -l cache0
 	lrwxrwxrwx 1 root root 0 Feb 25 18:33 cache0 -> ../../../devices/pci0000:00/0000:00:1d.0/0000:70:00.0/nvme/nvme0/nvme0n1/nvme0n1p4/bcache/
 	host:/sys/fs/bcache/b7ba27a1-2398-4649-8ae3-0959f57ba128# echo 1 > stop
 	kernel: [  917.041908] bcache: cache_set_free() Cache set b7ba27a1-2398-4649-8ae3-0959f57ba128 unregistered
 
-Now we can wipe it:
+Now we can wipe it::
+
 	host:~# wipefs -a /dev/nvme0n1p4
 	/dev/nvme0n1p4: 16 bytes were erased at offset 0x00001018 (bcache): c6 85 73 f6 4e 1a 45 ca 82 65 f5 7f 48 ba 6d 81
 
@@ -252,40 +270,44 @@ if there are any active backing or caching devices left on it:
 
 1) Is it present in /dev/bcache* ? (there are times where it won't be)
 
-If so, it's easy:
+   If so, it's easy::
+
 	host:/sys/block/bcache0/bcache# echo 1 > stop
 
-2) But if your backing device is gone, this won't work:
+2) But if your backing device is gone, this won't work::
+
 	host:/sys/block/bcache0# cd bcache
 	bash: cd: bcache: No such file or directory
 
-In this case, you may have to unregister the dmcrypt block device that
-references this bcache to free it up:
+   In this case, you may have to unregister the dmcrypt block device that
+   references this bcache to free it up::
+
 	host:~# dmsetup remove oldds1
 	bcache: bcache_device_free() bcache0 stopped
 	bcache: cache_set_free() Cache set 5bc072a8-ab17-446d-9744-e247949913c1 unregistered
 
-This causes the backing bcache to be removed from /sys/fs/bcache and
-then it can be reused.  This would be true of any block device stacking
-where bcache is a lower device.
+   This causes the backing bcache to be removed from /sys/fs/bcache and
+   then it can be reused.  This would be true of any block device stacking
+   where bcache is a lower device.
+
+3) In other cases, you can also look in /sys/fs/bcache/::
 
-3) In other cases, you can also look in /sys/fs/bcache/:
+	host:/sys/fs/bcache# ls -l */{cache?,bdev?}
+	lrwxrwxrwx 1 root root 0 Mar  5 09:39 0226553a-37cf-41d5-b3ce-8b1e944543a8/bdev1 -> ../../../devices/virtual/block/dm-1/bcache/
+	lrwxrwxrwx 1 root root 0 Mar  5 09:39 0226553a-37cf-41d5-b3ce-8b1e944543a8/cache0 -> ../../../devices/virtual/block/dm-4/bcache/
+	lrwxrwxrwx 1 root root 0 Mar  5 09:39 5bc072a8-ab17-446d-9744-e247949913c1/cache0 -> ../../../devices/pci0000:00/0000:00:01.0/0000:01:00.0/ata10/host9/target9:0:0/9:0:0:0/block/sdl/sdl2/bcache/
 
-host:/sys/fs/bcache# ls -l */{cache?,bdev?}
-lrwxrwxrwx 1 root root 0 Mar  5 09:39 0226553a-37cf-41d5-b3ce-8b1e944543a8/bdev1 -> ../../../devices/virtual/block/dm-1/bcache/
-lrwxrwxrwx 1 root root 0 Mar  5 09:39 0226553a-37cf-41d5-b3ce-8b1e944543a8/cache0 -> ../../../devices/virtual/block/dm-4/bcache/
-lrwxrwxrwx 1 root root 0 Mar  5 09:39 5bc072a8-ab17-446d-9744-e247949913c1/cache0 -> ../../../devices/pci0000:00/0000:00:01.0/0000:01:00.0/ata10/host9/target9:0:0/9:0:0:0/block/sdl/sdl2/bcache/
+   The device names will show which UUID is relevant, cd in that directory
+   and stop the cache::
 
-The device names will show which UUID is relevant, cd in that directory
-and stop the cache:
 	host:/sys/fs/bcache/5bc072a8-ab17-446d-9744-e247949913c1# echo 1 > stop
 
-This will free up bcache references and let you reuse the partition for
-other purposes.
+   This will free up bcache references and let you reuse the partition for
+   other purposes.
 
 
-TROUBLESHOOTING PERFORMANCE
+Troubleshooting performance
 ---------------------------
 
 Bcache has a bunch of config options and tunables. The defaults are intended to
@@ -301,11 +323,13 @@ want for getting the best possible numbers when benchmarking.
    raid stripe size to get the disk multiples that you would like.
 
    For example:  If you have a 64k stripe size, then the following offset
-   would provide alignment for many common RAID5 data spindle counts:
+   would provide alignment for many common RAID5 data spindle counts::
+
 	64k * 2*2*2*3*3*5*7 bytes = 161280k
 
    That space is wasted, but for only 157.5MB you can grow your RAID 5
-   volume to the following data-spindle counts without re-aligning:
+   volume to the following data-spindle counts without re-aligning::
+
 	3,4,5,6,7,8,9,10,12,14,15,18,20,21 ...
 
  - Bad write performance
@@ -313,9 +337,9 @@ want for getting the best possible numbers when benchmarking.
    If write performance is not what you expected, you probably wanted to be
    running in writeback mode, which isn't the default (not due to a lack of
    maturity, but simply because in writeback mode you'll lose data if something
-   happens to your SSD)
+   happens to your SSD)::
 
-   # echo writeback > /sys/block/bcache0/bcache/cache_mode
+	# echo writeback > /sys/block/bcache0/bcache/cache_mode
 
  - Bad performance, or traffic not going to the SSD that you'd expect
 
@@ -325,13 +349,13 @@ want for getting the best possible numbers when benchmarking.
    accessed data out of your cache.
 
    But if you want to benchmark reads from cache, and you start out with fio
-   writing an 8 gigabyte test file - so you want to disable that.
+   writing an 8 gigabyte test file - so you want to disable that::
 
-   # echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
+	# echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
 
-   To set it back to the default (4 mb), do
+   To set it back to the default (4 mb), do::
 
-   # echo 4M > /sys/block/bcache0/bcache/sequential_cutoff
+	# echo 4M > /sys/block/bcache0/bcache/sequential_cutoff
 
  - Traffic's still going to the spindle/still getting cache misses
 
@@ -344,10 +368,10 @@ want for getting the best possible numbers when benchmarking.
    throttles traffic if the latency exceeds a threshold (it does this by
    cranking down the sequential bypass).
 
-   You can disable this if you need to by setting the thresholds to 0:
+   You can disable this if you need to by setting the thresholds to 0::
 
-   # echo 0 > /sys/fs/bcache/<cache set>/congested_read_threshold_us
-   # echo 0 > /sys/fs/bcache/<cache set>/congested_write_threshold_us
+	# echo 0 > /sys/fs/bcache/<cache set>/congested_read_threshold_us
+	# echo 0 > /sys/fs/bcache/<cache set>/congested_write_threshold_us
 
    The default is 2000 us (2 milliseconds) for reads, and 20000 for writes.
 
@@ -369,7 +393,7 @@ want for getting the best possible numbers when benchmarking.
    a fix for the issue there).
 
 
-SYSFS - BACKING DEVICE
+Sysfs - backing device
 ----------------------
 
 Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
@@ -454,7 +478,8 @@ writeback_running
   still be added to the cache until it is mostly full; only meant for
   benchmarking. Defaults to on.
 
-SYSFS - BACKING DEVICE STATS:
+Sysfs - backing device stats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 There are directories with these numbers for a running total, as well as
 versions that decay over the past day, hour and 5 minutes; they're also
@@ -463,14 +488,11 @@ aggregated in the cache set directory as well.
 bypassed
   Amount of IO (both reads and writes) that has bypassed the cache
 
-cache_hits
-cache_misses
-cache_hit_ratio
+cache_hits, cache_misses, cache_hit_ratio
   Hits and misses are counted per individual IO as bcache sees them; a
   partial hit is counted as a miss.
 
-cache_bypass_hits
-cache_bypass_misses
+cache_bypass_hits, cache_bypass_misses
   Hits and misses for IO that is intended to skip the cache are still counted,
   but broken out here.
 
@@ -482,7 +504,8 @@ cache_miss_collisions
 cache_readaheads
   Count of times readahead occurred.
 
-SYSFS - CACHE SET:
+Sysfs - cache set
+~~~~~~~~~~~~~~~~~
 
 Available at /sys/fs/bcache/<cset-uuid>
 
@@ -520,8 +543,7 @@ flash_vol_create
   Echoing a size to this file (in human readable units, k/M/G) creates a thinly
   provisioned volume backed by the cache set.
 
-io_error_halflife
-io_error_limit
+io_error_halflife, io_error_limit
   These determines how many errors we accept before disabling the cache.
   Each error is decayed by the half life (in # ios).  If the decaying count
   reaches io_error_limit dirty data is written out and the cache is disabled.
@@ -545,7 +567,8 @@ unregister
   Detaches all backing devices and closes the cache devices; if dirty data is
   present it will disable writeback caching and wait for it to be flushed.
 
-SYSFS - CACHE SET INTERNAL:
+Sysfs - cache set internal
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 This directory also exposes timings for a number of internal operations, with
 separate files for average duration, average frequency, last occurrence and max
@@ -574,7 +597,8 @@ cache_read_races
 trigger_gc
   Writing to this file forces garbage collection to run.
 
-SYSFS - CACHE DEVICE:
+Sysfs - Cache device
+~~~~~~~~~~~~~~~~~~~~
 
 Available at /sys/block/<cdev>/bcache
 
-- 
GitLab


From b4b8af5c4eb69fb5394fb24bb82a5cb404ce86c1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 07:55:40 -0300
Subject: [PATCH 1545/1958] bt8xxgpio.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Adjust titles to match the convention;
- use a literal block for ascii artwork.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/bt8xxgpio.txt | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt
index d8297e4ebd265..a845feb074de7 100644
--- a/Documentation/bt8xxgpio.txt
+++ b/Documentation/bt8xxgpio.txt
@@ -1,12 +1,8 @@
-===============================================================
-==  BT8XXGPIO driver                                         ==
-==                                                           ==
-==  A driver for a selfmade cheap BT8xx based PCI GPIO-card  ==
-==                                                           ==
-==  For advanced documentation, see                          ==
-==  http://www.bu3sch.de/btgpio.php                          ==
-===============================================================
+===================================================================
+A driver for a selfmade cheap BT8xx based PCI GPIO-card (bt8xxgpio)
+===================================================================
 
+For advanced documentation, see http://www.bu3sch.de/btgpio.php
 
 A generic digital 24-port PCI GPIO card can be built out of an ordinary
 Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
@@ -17,9 +13,8 @@ The bt8xx chip does have 24 digital GPIO ports.
 These ports are accessible via 24 pins on the SMD chip package.
 
 
-==============================================
-==  How to physically access the GPIO pins  ==
-==============================================
+How to physically access the GPIO pins
+======================================
 
 The are several ways to access these pins. One might unsolder the whole chip
 and put it on a custom PCI board, or one might only unsolder each individual
@@ -27,7 +22,7 @@ GPIO pin and solder that to some tiny wire. As the chip package really is tiny
 there are some advanced soldering skills needed in any case.
 
 The physical pinouts are drawn in the following ASCII art.
-The GPIO pins are marked with G00-G23
+The GPIO pins are marked with G00-G23::
 
                                            G G G G G G G G G G G G     G G G G G G
                                            0 0 0 0 0 0 0 0 0 0 1 1     1 1 1 1 1 1
-- 
GitLab


From 7f4e012133644726acb7d1f912bd2a4b0b7c2205 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 08:03:53 -0300
Subject: [PATCH 1546/1958] btmrvl.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Use the right notation for titles;
- Use a list for image names;
- Use literal blocks where needed;
- Whitespace fixes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/btmrvl.txt | 65 +++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/Documentation/btmrvl.txt b/Documentation/btmrvl.txt
index 34916a46c0997..ec57740ead0c7 100644
--- a/Documentation/btmrvl.txt
+++ b/Documentation/btmrvl.txt
@@ -1,18 +1,16 @@
-=======================================================================
-		README for btmrvl driver
-=======================================================================
-
+=============
+btmrvl driver
+=============
 
 All commands are used via debugfs interface.
 
-=====================
-Set/get driver configurations:
+Set/get driver configurations
+=============================
 
 Path:	/debug/btmrvl/config/
 
-gpiogap=[n]
-hscfgcmd
-	These commands are used to configure the host sleep parameters.
+gpiogap=[n], hscfgcmd
+	These commands are used to configure the host sleep parameters::
 	bit 8:0  -- Gap
 	bit 16:8 -- GPIO
 
@@ -23,7 +21,8 @@ hscfgcmd
 	where Gap is the gap in milli seconds between wakeup signal and
 	wakeup event, or 0xff for special host sleep setting.
 
-	Usage:
+	Usage::
+
 		# Use SDIO interface to wake up the host and set GAP to 0x80:
 		echo 0xff80 > /debug/btmrvl/config/gpiogap
 		echo 1 > /debug/btmrvl/config/hscfgcmd
@@ -32,15 +31,16 @@ hscfgcmd
 		echo 0x03ff >  /debug/btmrvl/config/gpiogap
 		echo 1 > /debug/btmrvl/config/hscfgcmd
 
-psmode=[n]
-pscmd
+psmode=[n], pscmd
 	These commands are used to enable/disable auto sleep mode
 
-	where the option is:
+	where the option is::
+
 			1 	-- Enable auto sleep mode
 			0 	-- Disable auto sleep mode
 
-	Usage:
+	Usage::
+
 		# Enable auto sleep mode
 		echo 1 > /debug/btmrvl/config/psmode
 		echo 1 > /debug/btmrvl/config/pscmd
@@ -50,15 +50,16 @@ pscmd
 		echo 1 > /debug/btmrvl/config/pscmd
 
 
-hsmode=[n]
-hscmd
+hsmode=[n], hscmd
 	These commands are used to enable host sleep or wake up firmware
 
-	where the option is:
+	where the option is::
+
 			1	-- Enable host sleep
 			0	-- Wake up firmware
 
-	Usage:
+	Usage::
+
 		# Enable host sleep
 		echo 1 > /debug/btmrvl/config/hsmode
 		echo 1 > /debug/btmrvl/config/hscmd
@@ -68,12 +69,13 @@ hscmd
 		echo 1 > /debug/btmrvl/config/hscmd
 
 
-======================
-Get driver status:
+Get driver status
+=================
 
 Path:	/debug/btmrvl/status/
 
-Usage:
+Usage::
+
 	cat /debug/btmrvl/status/<args>
 
 where the args are:
@@ -90,14 +92,17 @@ hsstate
 txdnldrdy
 	This command displays the value of Tx download ready flag.
 
-
-=====================
+Issuing a raw hci command
+=========================
 
 Use hcitool to issue raw hci command, refer to hcitool manual
 
-	Usage: Hcitool cmd <ogf> <ocf> [Parameters]
+Usage::
+
+	Hcitool cmd <ogf> <ocf> [Parameters]
+
+Interface Control Command::
 
-	Interface Control Command
 	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x00    --Enable All interface
 	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x01    --Enable Wlan interface
 	hcitool cmd 0x3f 0x5b 0xf5 0x01 0x02    --Enable BT interface
@@ -105,13 +110,13 @@ Use hcitool to issue raw hci command, refer to hcitool manual
 	hcitool cmd 0x3f 0x5b 0xf5 0x00 0x01    --Disable Wlan interface
 	hcitool cmd 0x3f 0x5b 0xf5 0x00 0x02    --Disable BT interface
 
-=======================================================================
-
+SD8688 firmware
+===============
 
-SD8688 firmware:
+Images:
 
-/lib/firmware/sd8688_helper.bin
-/lib/firmware/sd8688.bin
+- /lib/firmware/sd8688_helper.bin
+- /lib/firmware/sd8688.bin
 
 
 The images can be downloaded from:
-- 
GitLab


From 38975e905a510349bb913f60824d2268ccbb833d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 08:13:48 -0300
Subject: [PATCH 1547/1958] bus-virt-phys-mapping.txt: standardize document
 format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Move author info to the beginning of file and use :Author:
- use warning/note annotation;
- mark literal blocks as such;
- Add a title for the document;
- use **emphasis** instead of _emphasis_.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/bus-virt-phys-mapping.txt | 64 +++++++++++++++----------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/Documentation/bus-virt-phys-mapping.txt b/Documentation/bus-virt-phys-mapping.txt
index 2bc55ff3b4d1e..4bb07c2f3e7d3 100644
--- a/Documentation/bus-virt-phys-mapping.txt
+++ b/Documentation/bus-virt-phys-mapping.txt
@@ -1,17 +1,27 @@
-[ NOTE: The virt_to_bus() and bus_to_virt() functions have been
+==========================================================
+How to access I/O mapped memory from within device drivers
+==========================================================
+
+:Author: Linus
+
+.. warning::
+
+	The virt_to_bus() and bus_to_virt() functions have been
 	superseded by the functionality provided by the PCI DMA interface
 	(see Documentation/DMA-API-HOWTO.txt).  They continue
 	to be documented below for historical purposes, but new code
-	must not use them. --davidm 00/12/12 ]
+	must not use them. --davidm 00/12/12
 
-[ This is a mail message in response to a query on IO mapping, thus the
-  strange format for a "document" ]
+::
+
+  [ This is a mail message in response to a query on IO mapping, thus the
+    strange format for a "document" ]
 
 The AHA-1542 is a bus-master device, and your patch makes the driver give the
 controller the physical address of the buffers, which is correct on x86
 (because all bus master devices see the physical memory mappings directly). 
 
-However, on many setups, there are actually _three_ different ways of looking
+However, on many setups, there are actually **three** different ways of looking
 at memory addresses, and in this case we actually want the third, the
 so-called "bus address". 
 
@@ -38,7 +48,7 @@ because the memory and the devices share the same address space, and that is
 not generally necessarily true on other PCI/ISA setups. 
 
 Now, just as an example, on the PReP (PowerPC Reference Platform), the 
-CPU sees a memory map something like this (this is from memory):
+CPU sees a memory map something like this (this is from memory)::
 
 	0-2 GB		"real memory"
 	2 GB-3 GB	"system IO" (inb/out and similar accesses on x86)
@@ -52,7 +62,7 @@ So when the CPU wants any bus master to write to physical memory 0, it
 has to give the master address 0x80000000 as the memory address.
 
 So, for example, depending on how the kernel is actually mapped on the 
-PPC, you can end up with a setup like this:
+PPC, you can end up with a setup like this::
 
  physical address:	0
  virtual address:	0xC0000000
@@ -61,7 +71,7 @@ PPC, you can end up with a setup like this:
 where all the addresses actually point to the same thing.  It's just seen 
 through different translations..
 
-Similarly, on the Alpha, the normal translation is
+Similarly, on the Alpha, the normal translation is::
 
  physical address:	0
  virtual address:	0xfffffc0000000000
@@ -70,7 +80,7 @@ Similarly, on the Alpha, the normal translation is
 (but there are also Alphas where the physical address and the bus address
 are the same). 
 
-Anyway, the way to look up all these translations, you do
+Anyway, the way to look up all these translations, you do::
 
 	#include <asm/io.h>
 
@@ -81,8 +91,8 @@ Anyway, the way to look up all these translations, you do
 
 Now, when do you need these?
 
-You want the _virtual_ address when you are actually going to access that 
-pointer from the kernel. So you can have something like this:
+You want the **virtual** address when you are actually going to access that
+pointer from the kernel. So you can have something like this::
 
 	/*
 	 * this is the hardware "mailbox" we use to communicate with
@@ -104,7 +114,7 @@ pointer from the kernel. So you can have something like this:
 				...
 
 on the other hand, you want the bus address when you have a buffer that 
-you want to give to the controller:
+you want to give to the controller::
 
 	/* ask the controller to read the sense status into "sense_buffer" */
 	mbox.bufstart = virt_to_bus(&sense_buffer);
@@ -112,7 +122,7 @@ you want to give to the controller:
 	mbox.status = 0;
 	notify_controller(&mbox);
 
-And you generally _never_ want to use the physical address, because you can't
+And you generally **never** want to use the physical address, because you can't
 use that from the CPU (the CPU only uses translated virtual addresses), and
 you can't use it from the bus master. 
 
@@ -124,8 +134,10 @@ be remapped as measured in units of pages, a.k.a. the pfn (the memory
 management layer doesn't know about devices outside the CPU, so it
 shouldn't need to know about "bus addresses" etc).
 
-NOTE NOTE NOTE! The above is only one part of the whole equation. The above
-only talks about "real memory", that is, CPU memory (RAM). 
+.. note::
+
+	The above is only one part of the whole equation. The above
+	only talks about "real memory", that is, CPU memory (RAM).
 
 There is a completely different type of memory too, and that's the "shared
 memory" on the PCI or ISA bus. That's generally not RAM (although in the case
@@ -137,20 +149,22 @@ whatever, and there is only one way to access it: the readb/writeb and
 related functions. You should never take the address of such memory, because
 there is really nothing you can do with such an address: it's not
 conceptually in the same memory space as "real memory" at all, so you cannot
-just dereference a pointer. (Sadly, on x86 it _is_ in the same memory space,
+just dereference a pointer. (Sadly, on x86 it **is** in the same memory space,
 so on x86 it actually works to just deference a pointer, but it's not
 portable). 
 
-For such memory, you can do things like
+For such memory, you can do things like:
+
+ - reading::
 
- - reading:
 	/*
 	 * read first 32 bits from ISA memory at 0xC0000, aka
 	 * C000:0000 in DOS terms
 	 */
 	unsigned int signature = isa_readl(0xC0000);
 
- - remapping and writing:
+ - remapping and writing::
+
 	/*
 	 * remap framebuffer PCI memory area at 0xFC000000,
 	 * size 1MB, so that we can access it: We can directly
@@ -165,7 +179,8 @@ For such memory, you can do things like
 	/* unmap when we unload the driver */
 	iounmap(baseptr);
 
- - copying and clearing:
+ - copying and clearing::
+
 	/* get the 6-byte Ethernet address at ISA address E000:0040 */
 	memcpy_fromio(kernel_buffer, 0xE0040, 6);
 	/* write a packet to the driver */
@@ -181,10 +196,10 @@ happy that your driver works ;)
 Note that kernel versions 2.0.x (and earlier) mistakenly called the
 ioremap() function "vremap()".  ioremap() is the proper name, but I
 didn't think straight when I wrote it originally.  People who have to
-support both can do something like:
+support both can do something like::
  
 	/* support old naming silliness */
-	#if LINUX_VERSION_CODE < 0x020100                                     
+	#if LINUX_VERSION_CODE < 0x020100
 	#define ioremap vremap
 	#define iounmap vfree                                                     
 	#endif
@@ -196,13 +211,10 @@ And the above sounds worse than it really is.  Most real drivers really
 don't do all that complex things (or rather: the complexity is not so
 much in the actual IO accesses as in error handling and timeouts etc). 
 It's generally not hard to fix drivers, and in many cases the code
-actually looks better afterwards:
+actually looks better afterwards::
 
 	unsigned long signature = *(unsigned int *) 0xC0000;
 		vs
 	unsigned long signature = readl(0xC0000);
 
 I think the second version actually is more readable, no?
-
-		Linus
-
-- 
GitLab


From fdefdbca7e3295341aefa13036f1af7fd5401e1d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 08:25:51 -0300
Subject: [PATCH 1548/1958] cachetlb.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Adjust the title format;
- use :Author: for author's name;
- mark literals as such;
- use note and important notation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cachetlb.txt | 92 +++++++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 40 deletions(-)

diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 3f9f808b51198..6eb9d3f090cdf 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -1,7 +1,8 @@
-		Cache and TLB Flushing
-		     Under Linux
+==================================
+Cache and TLB Flushing Under Linux
+==================================
 
-	    David S. Miller <davem@redhat.com>
+:Author: David S. Miller <davem@redhat.com>
 
 This document describes the cache/tlb flushing interfaces called
 by the Linux VM subsystem.  It enumerates over each interface,
@@ -28,7 +29,7 @@ Therefore when software page table changes occur, the kernel will
 invoke one of the following flush methods _after_ the page table
 changes occur:
 
-1) void flush_tlb_all(void)
+1) ``void flush_tlb_all(void)``
 
 	The most severe flush of all.  After this interface runs,
 	any previous page table modification whatsoever will be
@@ -37,7 +38,7 @@ changes occur:
 	This is usually invoked when the kernel page tables are
 	changed, since such translations are "global" in nature.
 
-2) void flush_tlb_mm(struct mm_struct *mm)
+2) ``void flush_tlb_mm(struct mm_struct *mm)``
 
 	This interface flushes an entire user address space from
 	the TLB.  After running, this interface must make sure that
@@ -49,8 +50,8 @@ changes occur:
 	page table operations such as what happens during
 	fork, and exec.
 
-3) void flush_tlb_range(struct vm_area_struct *vma,
-			unsigned long start, unsigned long end)
+3) ``void flush_tlb_range(struct vm_area_struct *vma,
+   unsigned long start, unsigned long end)``
 
 	Here we are flushing a specific range of (user) virtual
 	address translations from the TLB.  After running, this
@@ -69,7 +70,7 @@ changes occur:
 	call flush_tlb_page (see below) for each entry which may be
 	modified.
 
-4) void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+4) ``void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)``
 
 	This time we need to remove the PAGE_SIZE sized translation
 	from the TLB.  The 'vma' is the backing structure used by
@@ -87,8 +88,8 @@ changes occur:
 
 	This is used primarily during fault processing.
 
-5) void update_mmu_cache(struct vm_area_struct *vma,
-			 unsigned long address, pte_t *ptep)
+5) ``void update_mmu_cache(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep)``
 
 	At the end of every page fault, this routine is invoked to
 	tell the architecture specific code that a translation
@@ -100,7 +101,7 @@ changes occur:
 	translations for software managed TLB configurations.
 	The sparc64 port currently does this.
 
-6) void tlb_migrate_finish(struct mm_struct *mm)
+6) ``void tlb_migrate_finish(struct mm_struct *mm)``
 
 	This interface is called at the end of an explicit
 	process migration. This interface provides a hook
@@ -112,7 +113,7 @@ changes occur:
 
 Next, we have the cache flushing interfaces.  In general, when Linux
 is changing an existing virtual-->physical mapping to a new value,
-the sequence will be in one of the following forms:
+the sequence will be in one of the following forms::
 
 	1) flush_cache_mm(mm);
 	   change_all_page_tables_of(mm);
@@ -143,7 +144,7 @@ and have no dependency on translation information.
 
 Here are the routines, one by one:
 
-1) void flush_cache_mm(struct mm_struct *mm)
+1) ``void flush_cache_mm(struct mm_struct *mm)``
 
 	This interface flushes an entire user address space from
 	the caches.  That is, after running, there will be no cache
@@ -152,7 +153,7 @@ Here are the routines, one by one:
 	This interface is used to handle whole address space
 	page table operations such as what happens during exit and exec.
 
-2) void flush_cache_dup_mm(struct mm_struct *mm)
+2) ``void flush_cache_dup_mm(struct mm_struct *mm)``
 
 	This interface flushes an entire user address space from
 	the caches.  That is, after running, there will be no cache
@@ -164,8 +165,8 @@ Here are the routines, one by one:
 	This option is separate from flush_cache_mm to allow some
 	optimizations for VIPT caches.
 
-3) void flush_cache_range(struct vm_area_struct *vma,
-			  unsigned long start, unsigned long end)
+3) ``void flush_cache_range(struct vm_area_struct *vma,
+   unsigned long start, unsigned long end)``
 
 	Here we are flushing a specific range of (user) virtual
 	addresses from the cache.  After running, there will be no
@@ -181,7 +182,7 @@ Here are the routines, one by one:
 	call flush_cache_page (see below) for each entry which may be
 	modified.
 
-4) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
+4) ``void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)``
 
 	This time we need to remove a PAGE_SIZE sized range
 	from the cache.  The 'vma' is the backing structure used by
@@ -202,7 +203,7 @@ Here are the routines, one by one:
 
 	This is used primarily during fault processing.
 
-5) void flush_cache_kmaps(void)
+5) ``void flush_cache_kmaps(void)``
 
 	This routine need only be implemented if the platform utilizes
 	highmem.  It will be called right before all of the kmaps
@@ -214,8 +215,8 @@ Here are the routines, one by one:
 
 	This routing should be implemented in asm/highmem.h
 
-6) void flush_cache_vmap(unsigned long start, unsigned long end)
-   void flush_cache_vunmap(unsigned long start, unsigned long end)
+6) ``void flush_cache_vmap(unsigned long start, unsigned long end)``
+   ``void flush_cache_vunmap(unsigned long start, unsigned long end)``
 
 	Here in these two interfaces we are flushing a specific range
 	of (kernel) virtual addresses from the cache.  After running,
@@ -243,8 +244,10 @@ size).  This setting will force the SYSv IPC layer to only allow user
 processes to mmap shared memory at address which are a multiple of
 this value.
 
-NOTE: This does not fix shared mmaps, check out the sparc64 port for
-one way to solve this (in particular SPARC_FLAG_MMAPSHARED).
+.. note::
+
+  This does not fix shared mmaps, check out the sparc64 port for
+  one way to solve this (in particular SPARC_FLAG_MMAPSHARED).
 
 Next, you have to solve the D-cache aliasing issue for all
 other cases.  Please keep in mind that fact that, for a given page
@@ -255,8 +258,8 @@ physical page into its address space, by implication the D-cache
 aliasing problem has the potential to exist since the kernel already
 maps this page at its virtual address.
 
-  void copy_user_page(void *to, void *from, unsigned long addr, struct page *page)
-  void clear_user_page(void *to, unsigned long addr, struct page *page)
+  ``void copy_user_page(void *to, void *from, unsigned long addr, struct page *page)``
+  ``void clear_user_page(void *to, unsigned long addr, struct page *page)``
 
 	These two routines store data in user anonymous or COW
 	pages.  It allows a port to efficiently avoid D-cache alias
@@ -276,14 +279,16 @@ maps this page at its virtual address.
 	If D-cache aliasing is not an issue, these two routines may
 	simply call memcpy/memset directly and do nothing more.
 
-  void flush_dcache_page(struct page *page)
+  ``void flush_dcache_page(struct page *page)``
 
 	Any time the kernel writes to a page cache page, _OR_
 	the kernel is about to read from a page cache page and
 	user space shared/writable mappings of this page potentially
 	exist, this routine is called.
 
-	NOTE: This routine need only be called for page cache pages
+	.. note::
+
+	      This routine need only be called for page cache pages
 	      which can potentially ever be mapped into the address
 	      space of a user process.  So for example, VFS layer code
 	      handling vfs symlinks in the page cache need not call
@@ -322,18 +327,19 @@ maps this page at its virtual address.
 	made of this flag bit, and if set the flush is done and the flag
 	bit is cleared.
 
-	IMPORTANT NOTE: It is often important, if you defer the flush,
+	.. important::
+
+			It is often important, if you defer the flush,
 			that the actual flush occurs on the same CPU
 			as did the cpu stores into the page to make it
 			dirty.  Again, see sparc64 for examples of how
 			to deal with this.
 
-  void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
-                         unsigned long user_vaddr,
-                         void *dst, void *src, int len)
-  void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
-                           unsigned long user_vaddr,
-                           void *dst, void *src, int len)
+  ``void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+  unsigned long user_vaddr, void *dst, void *src, int len)``
+  ``void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+  unsigned long user_vaddr, void *dst, void *src, int len)``
+
 	When the kernel needs to copy arbitrary data in and out
 	of arbitrary user pages (f.e. for ptrace()) it will use
 	these two routines.
@@ -344,8 +350,9 @@ maps this page at its virtual address.
 	likely that you will need to flush the instruction cache
 	for copy_to_user_page().
 
-  void flush_anon_page(struct vm_area_struct *vma, struct page *page,
-                       unsigned long vmaddr)
+  ``void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+  unsigned long vmaddr)``
+
   	When the kernel needs to access the contents of an anonymous
 	page, it calls this function (currently only
 	get_user_pages()).  Note: flush_dcache_page() deliberately
@@ -354,7 +361,8 @@ maps this page at its virtual address.
 	architectures).  For incoherent architectures, it should flush
 	the cache of the page at vmaddr.
 
-  void flush_kernel_dcache_page(struct page *page)
+  ``void flush_kernel_dcache_page(struct page *page)``
+
 	When the kernel needs to modify a user page is has obtained
 	with kmap, it calls this function after all modifications are
 	complete (but before kunmapping it) to bring the underlying
@@ -366,14 +374,16 @@ maps this page at its virtual address.
 	the kernel cache for page (using page_address(page)).
 
 
-  void flush_icache_range(unsigned long start, unsigned long end)
+  ``void flush_icache_range(unsigned long start, unsigned long end)``
+
   	When the kernel stores into addresses that it will execute
 	out of (eg when loading modules), this function is called.
 
 	If the icache does not snoop stores then this routine will need
 	to flush it.
 
-  void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+  ``void flush_icache_page(struct vm_area_struct *vma, struct page *page)``
+
 	All the functionality of flush_icache_page can be implemented in
 	flush_dcache_page and update_mmu_cache. In the future, the hope
 	is to remove this interface completely.
@@ -387,7 +397,8 @@ the kernel trying to do I/O to vmap areas must manually manage
 coherency.  It must do this by flushing the vmap range before doing
 I/O and invalidating it after the I/O returns.
 
-  void flush_kernel_vmap_range(void *vaddr, int size)
+  ``void flush_kernel_vmap_range(void *vaddr, int size)``
+
        flushes the kernel cache for a given virtual address range in
        the vmap area.  This is to make sure that any data the kernel
        modified in the vmap range is made visible to the physical
@@ -395,7 +406,8 @@ I/O and invalidating it after the I/O returns.
        Note that this API does *not* also flush the offset map alias
        of the area.
 
-  void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates
+  ``void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates``
+
        the cache for a given virtual address range in the vmap area
        which prevents the processor from making the cache stale by
        speculatively reading data while the I/O was occurring to the
-- 
GitLab


From f1d8b71c5fefea29ef4ea856e2192c07446e0c98 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 08:55:25 -0300
Subject: [PATCH 1549/1958] circular-buffers.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Mark titles with ReST notation;
- comment the contents table;
- Use :Author: tag for authorship;
- mark literal blocks as such;
- use valid numbered list markups;
- Don't capitalize titles.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/circular-buffers.txt | 51 ++++++++++++++----------------
 1 file changed, 23 insertions(+), 28 deletions(-)

diff --git a/Documentation/circular-buffers.txt b/Documentation/circular-buffers.txt
index 4a824d232472d..d4628174b7c57 100644
--- a/Documentation/circular-buffers.txt
+++ b/Documentation/circular-buffers.txt
@@ -1,9 +1,9 @@
-			       ================
-			       CIRCULAR BUFFERS
-			       ================
+================
+Circular Buffers
+================
 
-By: David Howells <dhowells@redhat.com>
-    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+:Author: David Howells <dhowells@redhat.com>
+:Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 
 
 Linux provides a number of features that can be used to implement circular
@@ -20,7 +20,7 @@ producer and just one consumer.  It is possible to handle multiple producers by
 serialising them, and to handle multiple consumers by serialising them.
 
 
-Contents:
+.. Contents:
 
  (*) What is a circular buffer?
 
@@ -31,8 +31,8 @@ Contents:
      - The consumer.
 
 
-==========================
-WHAT IS A CIRCULAR BUFFER?
+
+What is a circular buffer?
 ==========================
 
 First of all, what is a circular buffer?  A circular buffer is a buffer of
@@ -60,9 +60,7 @@ buffer, provided that neither index overtakes the other.  The implementer must
 be careful, however, as a region more than one unit in size may wrap the end of
 the buffer and be broken into two segments.
 
-
-============================
-MEASURING POWER-OF-2 BUFFERS
+Measuring power-of-2 buffers
 ============================
 
 Calculation of the occupancy or the remaining capacity of an arbitrarily sized
@@ -71,13 +69,13 @@ modulus (divide) instruction.  However, if the buffer is of a power-of-2 size,
 then a much quicker bitwise-AND instruction can be used instead.
 
 Linux provides a set of macros for handling power-of-2 circular buffers.  These
-can be made use of by:
+can be made use of by::
 
 	#include <linux/circ_buf.h>
 
 The macros are:
 
- (*) Measure the remaining capacity of a buffer:
+ (#) Measure the remaining capacity of a buffer::
 
 	CIRC_SPACE(head_index, tail_index, buffer_size);
 
@@ -85,7 +83,7 @@ The macros are:
      can be inserted.
 
 
- (*) Measure the maximum consecutive immediate space in a buffer:
+ (#) Measure the maximum consecutive immediate space in a buffer::
 
 	CIRC_SPACE_TO_END(head_index, tail_index, buffer_size);
 
@@ -94,14 +92,14 @@ The macros are:
      beginning of the buffer.
 
 
- (*) Measure the occupancy of a buffer:
+ (#) Measure the occupancy of a buffer::
 
 	CIRC_CNT(head_index, tail_index, buffer_size);
 
      This returns the number of items currently occupying a buffer[2].
 
 
- (*) Measure the non-wrapping occupancy of a buffer:
+ (#) Measure the non-wrapping occupancy of a buffer::
 
 	CIRC_CNT_TO_END(head_index, tail_index, buffer_size);
 
@@ -112,7 +110,7 @@ The macros are:
 Each of these macros will nominally return a value between 0 and buffer_size-1,
 however:
 
- [1] CIRC_SPACE*() are intended to be used in the producer.  To the producer
+ (1) CIRC_SPACE*() are intended to be used in the producer.  To the producer
      they will return a lower bound as the producer controls the head index,
      but the consumer may still be depleting the buffer on another CPU and
      moving the tail index.
@@ -120,7 +118,7 @@ however:
      To the consumer it will show an upper bound as the producer may be busy
      depleting the space.
 
- [2] CIRC_CNT*() are intended to be used in the consumer.  To the consumer they
+ (2) CIRC_CNT*() are intended to be used in the consumer.  To the consumer they
      will return a lower bound as the consumer controls the tail index, but the
      producer may still be filling the buffer on another CPU and moving the
      head index.
@@ -128,14 +126,12 @@ however:
      To the producer it will show an upper bound as the consumer may be busy
      emptying the buffer.
 
- [3] To a third party, the order in which the writes to the indices by the
+ (3) To a third party, the order in which the writes to the indices by the
      producer and consumer become visible cannot be guaranteed as they are
      independent and may be made on different CPUs - so the result in such a
      situation will merely be a guess, and may even be negative.
 
-
-===========================================
-USING MEMORY BARRIERS WITH CIRCULAR BUFFERS
+Using memory barriers with circular buffers
 ===========================================
 
 By using memory barriers in conjunction with circular buffers, you can avoid
@@ -152,10 +148,10 @@ time, and only one thing should be emptying a buffer at any one time, but the
 two sides can operate simultaneously.
 
 
-THE PRODUCER
+The producer
 ------------
 
-The producer will look something like this:
+The producer will look something like this::
 
 	spin_lock(&producer_lock);
 
@@ -193,10 +189,10 @@ ordering between the read of the index indicating that the consumer has
 vacated a given element and the write by the producer to that same element.
 
 
-THE CONSUMER
+The Consumer
 ------------
 
-The consumer will look something like this:
+The consumer will look something like this::
 
 	spin_lock(&consumer_lock);
 
@@ -235,8 +231,7 @@ prevents the compiler from tearing the store, and enforces ordering
 against previous accesses.
 
 
-===============
-FURTHER READING
+Further reading
 ===============
 
 See also Documentation/memory-barriers.txt for a description of Linux's memory
-- 
GitLab


From f68ac62d11e760de68a55b05c8bbc2c47f275ee9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 09:15:12 -0300
Subject: [PATCH 1550/1958] clk.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Use section/title markups;
- Use :Author: for authorship;
- Mark literals and literal blocks;
- Mark tables;
- Use ReST notation for footnotes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/clk.txt | 189 +++++++++++++++++++++++-------------------
 1 file changed, 106 insertions(+), 83 deletions(-)

diff --git a/Documentation/clk.txt b/Documentation/clk.txt
index 22f026aa2f342..be909ed45970b 100644
--- a/Documentation/clk.txt
+++ b/Documentation/clk.txt
@@ -1,12 +1,16 @@
-		The Common Clk Framework
-		Mike Turquette <mturquette@ti.com>
+========================
+The Common Clk Framework
+========================
+
+:Author: Mike Turquette <mturquette@ti.com>
 
 This document endeavours to explain the common clk framework details,
 and how to port a platform over to this framework.  It is not yet a
 detailed explanation of the clock api in include/linux/clk.h, but
 perhaps someday it will include that information.
 
-	Part 1 - introduction and interface split
+Introduction and interface split
+================================
 
 The common clk framework is an interface to control the clock nodes
 available on various devices today.  This may come in the form of clock
@@ -35,10 +39,11 @@ is defined in struct clk_foo and pointed to within struct clk_core.  This
 allows for easy navigation between the two discrete halves of the common
 clock interface.
 
-	Part 2 - common data structures and api
+Common data structures and api
+==============================
 
 Below is the common struct clk_core definition from
-drivers/clk/clk.c, modified for brevity:
+drivers/clk/clk.c, modified for brevity::
 
 	struct clk_core {
 		const char		*name;
@@ -59,7 +64,7 @@ struct clk.  That api is documented in include/linux/clk.h.
 
 Platforms and devices utilizing the common struct clk_core use the struct
 clk_ops pointer in struct clk_core to perform the hardware-specific parts of
-the operations defined in clk-provider.h:
+the operations defined in clk-provider.h::
 
 	struct clk_ops {
 		int		(*prepare)(struct clk_hw *hw);
@@ -95,19 +100,20 @@ the operations defined in clk-provider.h:
 					      struct dentry *dentry);
 	};
 
-	Part 3 - hardware clk implementations
+Hardware clk implementations
+============================
 
 The strength of the common struct clk_core comes from its .ops and .hw pointers
 which abstract the details of struct clk from the hardware-specific bits, and
 vice versa.  To illustrate consider the simple gateable clk implementation in
-drivers/clk/clk-gate.c:
+drivers/clk/clk-gate.c::
 
-struct clk_gate {
-	struct clk_hw	hw;
-	void __iomem    *reg;
-	u8              bit_idx;
-	...
-};
+	struct clk_gate {
+		struct clk_hw	hw;
+		void __iomem    *reg;
+		u8              bit_idx;
+		...
+	};
 
 struct clk_gate contains struct clk_hw hw as well as hardware-specific
 knowledge about which register and bit controls this clk's gating.
@@ -115,7 +121,7 @@ Nothing about clock topology or accounting, such as enable_count or
 notifier_count, is needed here.  That is all handled by the common
 framework code and struct clk_core.
 
-Let's walk through enabling this clk from driver code:
+Let's walk through enabling this clk from driver code::
 
 	struct clk *clk;
 	clk = clk_get(NULL, "my_gateable_clk");
@@ -123,70 +129,71 @@ Let's walk through enabling this clk from driver code:
 	clk_prepare(clk);
 	clk_enable(clk);
 
-The call graph for clk_enable is very simple:
+The call graph for clk_enable is very simple::
 
-clk_enable(clk);
-	clk->ops->enable(clk->hw);
-	[resolves to...]
-		clk_gate_enable(hw);
-		[resolves struct clk gate with to_clk_gate(hw)]
-			clk_gate_set_bit(gate);
+	clk_enable(clk);
+		clk->ops->enable(clk->hw);
+		[resolves to...]
+			clk_gate_enable(hw);
+			[resolves struct clk gate with to_clk_gate(hw)]
+				clk_gate_set_bit(gate);
 
-And the definition of clk_gate_set_bit:
+And the definition of clk_gate_set_bit::
 
-static void clk_gate_set_bit(struct clk_gate *gate)
-{
-	u32 reg;
+	static void clk_gate_set_bit(struct clk_gate *gate)
+	{
+		u32 reg;
 
-	reg = __raw_readl(gate->reg);
-	reg |= BIT(gate->bit_idx);
-	writel(reg, gate->reg);
-}
+		reg = __raw_readl(gate->reg);
+		reg |= BIT(gate->bit_idx);
+		writel(reg, gate->reg);
+	}
 
-Note that to_clk_gate is defined as:
+Note that to_clk_gate is defined as::
 
-#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
+	#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
 
 This pattern of abstraction is used for every clock hardware
 representation.
 
-	Part 4 - supporting your own clk hardware
+Supporting your own clk hardware
+================================
 
 When implementing support for a new type of clock it is only necessary to
-include the following header:
+include the following header::
 
-#include <linux/clk-provider.h>
+	#include <linux/clk-provider.h>
 
 To construct a clk hardware structure for your platform you must define
-the following:
+the following::
 
-struct clk_foo {
-	struct clk_hw hw;
-	... hardware specific data goes here ...
-};
+	struct clk_foo {
+		struct clk_hw hw;
+		... hardware specific data goes here ...
+	};
 
 To take advantage of your data you'll need to support valid operations
-for your clk:
+for your clk::
 
-struct clk_ops clk_foo_ops {
-	.enable		= &clk_foo_enable;
-	.disable	= &clk_foo_disable;
-};
+	struct clk_ops clk_foo_ops {
+		.enable		= &clk_foo_enable;
+		.disable	= &clk_foo_disable;
+	};
 
-Implement the above functions using container_of:
+Implement the above functions using container_of::
 
-#define to_clk_foo(_hw) container_of(_hw, struct clk_foo, hw)
+	#define to_clk_foo(_hw) container_of(_hw, struct clk_foo, hw)
 
-int clk_foo_enable(struct clk_hw *hw)
-{
-	struct clk_foo *foo;
+	int clk_foo_enable(struct clk_hw *hw)
+	{
+		struct clk_foo *foo;
 
-	foo = to_clk_foo(hw);
+		foo = to_clk_foo(hw);
 
-	... perform magic on foo ...
+		... perform magic on foo ...
 
-	return 0;
-};
+		return 0;
+	};
 
 Below is a matrix detailing which clk_ops are mandatory based upon the
 hardware capabilities of that clock.  A cell marked as "y" means
@@ -194,41 +201,56 @@ mandatory, a cell marked as "n" implies that either including that
 callback is invalid or otherwise unnecessary.  Empty cells are either
 optional or must be evaluated on a case-by-case basis.
 
-                              clock hardware characteristics
-                -----------------------------------------------------------
-                | gate | change rate | single parent | multiplexer | root |
-                |------|-------------|---------------|-------------|------|
-.prepare        |      |             |               |             |      |
-.unprepare      |      |             |               |             |      |
-                |      |             |               |             |      |
-.enable         | y    |             |               |             |      |
-.disable        | y    |             |               |             |      |
-.is_enabled     | y    |             |               |             |      |
-                |      |             |               |             |      |
-.recalc_rate    |      | y           |               |             |      |
-.round_rate     |      | y [1]       |               |             |      |
-.determine_rate |      | y [1]       |               |             |      |
-.set_rate       |      | y           |               |             |      |
-                |      |             |               |             |      |
-.set_parent     |      |             | n             | y           | n    |
-.get_parent     |      |             | n             | y           | n    |
-                |      |             |               |             |      |
-.recalc_accuracy|      |             |               |             |      |
-                |      |             |               |             |      |
-.init           |      |             |               |             |      |
-                -----------------------------------------------------------
-[1] either one of round_rate or determine_rate is required.
+.. table:: clock hardware characteristics
+
+   +----------------+------+-------------+---------------+-------------+------+
+   |                | gate | change rate | single parent | multiplexer | root |
+   +================+======+=============+===============+=============+======+
+   |.prepare        |      |             |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   |.unprepare      |      |             |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   +----------------+------+-------------+---------------+-------------+------+
+   |.enable         | y    |             |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   |.disable        | y    |             |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   |.is_enabled     | y    |             |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   +----------------+------+-------------+---------------+-------------+------+
+   |.recalc_rate    |      | y           |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   |.round_rate     |      | y [1]_      |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   |.determine_rate |      | y [1]_      |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   |.set_rate       |      | y           |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   +----------------+------+-------------+---------------+-------------+------+
+   |.set_parent     |      |             | n             | y           | n    |
+   +----------------+------+-------------+---------------+-------------+------+
+   |.get_parent     |      |             | n             | y           | n    |
+   +----------------+------+-------------+---------------+-------------+------+
+   +----------------+------+-------------+---------------+-------------+------+
+   |.recalc_accuracy|      |             |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+   +----------------+------+-------------+---------------+-------------+------+
+   |.init           |      |             |               |             |      |
+   +----------------+------+-------------+---------------+-------------+------+
+
+.. [1] either one of round_rate or determine_rate is required.
 
 Finally, register your clock at run-time with a hardware-specific
 registration function.  This function simply populates struct clk_foo's
 data and then passes the common struct clk parameters to the framework
-with a call to:
+with a call to::
 
-clk_register(...)
+	clk_register(...)
 
-See the basic clock types in drivers/clk/clk-*.c for examples.
+See the basic clock types in ``drivers/clk/clk-*.c`` for examples.
 
-	Part 5 - Disabling clock gating of unused clocks
+Disabling clock gating of unused clocks
+=======================================
 
 Sometimes during development it can be useful to be able to bypass the
 default disabling of unused clocks. For example, if drivers aren't enabling
@@ -239,7 +261,8 @@ are sorted out.
 To bypass this disabling, include "clk_ignore_unused" in the bootargs to the
 kernel.
 
-	Part 6 - Locking
+Locking
+=======
 
 The common clock framework uses two global locks, the prepare lock and the
 enable lock.
-- 
GitLab


From 09338fb0f4c333a99ccc5bf551042869e07eeec4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 09:42:08 -0300
Subject: [PATCH 1551/1958] cpu-load: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark literals;
- Adjust document title;
- Use a list for references.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cpu-load.txt | 131 +++++++++++++++++++------------------
 1 file changed, 66 insertions(+), 65 deletions(-)

diff --git a/Documentation/cpu-load.txt b/Documentation/cpu-load.txt
index 287224e57cfc5..2d01ce43d2a2c 100644
--- a/Documentation/cpu-load.txt
+++ b/Documentation/cpu-load.txt
@@ -1,9 +1,10 @@
+========
 CPU load
---------
+========
 
-Linux exports various bits of information via `/proc/stat' and
-`/proc/uptime' that userland tools, such as top(1), use to calculate
-the average time system spent in a particular state, for example:
+Linux exports various bits of information via ``/proc/stat`` and
+``/proc/uptime`` that userland tools, such as top(1), use to calculate
+the average time system spent in a particular state, for example::
 
     $ iostat
     Linux 2.6.18.3-exp (linmac)     02/20/2007
@@ -17,7 +18,7 @@ Here the system thinks that over the default sampling period the
 system spent 10.01% of the time doing work in user space, 2.92% in the
 kernel, and was overall 81.63% of the time idle.
 
-In most cases the `/proc/stat' information reflects the reality quite
+In most cases the ``/proc/stat``	 information reflects the reality quite
 closely, however due to the nature of how/when the kernel collects
 this data sometimes it can not be trusted at all.
 
@@ -33,78 +34,78 @@ Example
 -------
 
 If we imagine the system with one task that periodically burns cycles
-in the following manner:
+in the following manner::
 
- time line between two timer interrupts
-|--------------------------------------|
- ^                                    ^
- |_ something begins working          |
-                                      |_ something goes to sleep
-                                     (only to be awaken quite soon)
+     time line between two timer interrupts
+    |--------------------------------------|
+     ^                                    ^
+     |_ something begins working          |
+                                          |_ something goes to sleep
+                                         (only to be awaken quite soon)
 
 In the above situation the system will be 0% loaded according to the
-`/proc/stat' (since the timer interrupt will always happen when the
+``/proc/stat`` (since the timer interrupt will always happen when the
 system is executing the idle handler), but in reality the load is
 closer to 99%.
 
 One can imagine many more situations where this behavior of the kernel
-will lead to quite erratic information inside `/proc/stat'.
-
-
-/* gcc -o hog smallhog.c */
-#include <time.h>
-#include <limits.h>
-#include <signal.h>
-#include <sys/time.h>
-#define HIST 10
-
-static volatile sig_atomic_t stop;
-
-static void sighandler (int signr)
-{
-     (void) signr;
-     stop = 1;
-}
-static unsigned long hog (unsigned long niters)
-{
-     stop = 0;
-     while (!stop && --niters);
-     return niters;
-}
-int main (void)
-{
-     int i;
-     struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
-                             .it_value = { .tv_sec = 0, .tv_usec = 1 } };
-     sigset_t set;
-     unsigned long v[HIST];
-     double tmp = 0.0;
-     unsigned long n;
-     signal (SIGALRM, &sighandler);
-     setitimer (ITIMER_REAL, &it, NULL);
-
-     hog (ULONG_MAX);
-     for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
-     for (i = 0; i < HIST; ++i) tmp += v[i];
-     tmp /= HIST;
-     n = tmp - (tmp / 3.0);
-
-     sigemptyset (&set);
-     sigaddset (&set, SIGALRM);
-
-     for (;;) {
-         hog (n);
-         sigwait (&set, &i);
-     }
-     return 0;
-}
+will lead to quite erratic information inside ``/proc/stat``::
+
+
+	/* gcc -o hog smallhog.c */
+	#include <time.h>
+	#include <limits.h>
+	#include <signal.h>
+	#include <sys/time.h>
+	#define HIST 10
+
+	static volatile sig_atomic_t stop;
+
+	static void sighandler (int signr)
+	{
+	(void) signr;
+	stop = 1;
+	}
+	static unsigned long hog (unsigned long niters)
+	{
+	stop = 0;
+	while (!stop && --niters);
+	return niters;
+	}
+	int main (void)
+	{
+	int i;
+	struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
+				.it_value = { .tv_sec = 0, .tv_usec = 1 } };
+	sigset_t set;
+	unsigned long v[HIST];
+	double tmp = 0.0;
+	unsigned long n;
+	signal (SIGALRM, &sighandler);
+	setitimer (ITIMER_REAL, &it, NULL);
+
+	hog (ULONG_MAX);
+	for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
+	for (i = 0; i < HIST; ++i) tmp += v[i];
+	tmp /= HIST;
+	n = tmp - (tmp / 3.0);
+
+	sigemptyset (&set);
+	sigaddset (&set, SIGALRM);
+
+	for (;;) {
+		hog (n);
+		sigwait (&set, &i);
+	}
+	return 0;
+	}
 
 
 References
 ----------
 
-http://lkml.org/lkml/2007/2/12/6
-Documentation/filesystems/proc.txt (1.8)
+- http://lkml.org/lkml/2007/2/12/6
+- Documentation/filesystems/proc.txt (1.8)
 
 
 Thanks
-- 
GitLab


From e8cb6f1edc57e5c729ce4e37a54a5f84fac0c6a4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 09:52:01 -0300
Subject: [PATCH 1552/1958] cputopology.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add a title for the document;
- mark literals;
- use table markup for existing table.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cputopology.txt | 37 +++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 127c9d8c2174a..c6e7e9196a8b4 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -1,3 +1,6 @@
+===========================================
+How CPU topology info is exported via sysfs
+===========================================
 
 Export CPU topology info via sysfs. Items (attributes) are similar
 to /proc/cpuinfo output of some architectures:
@@ -75,24 +78,26 @@ CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where
 they reflect the cpu and cache hierarchy.
 
 For an architecture to support this feature, it must define some of
-these macros in include/asm-XXX/topology.h:
-#define topology_physical_package_id(cpu)
-#define topology_core_id(cpu)
-#define topology_book_id(cpu)
-#define topology_drawer_id(cpu)
-#define topology_sibling_cpumask(cpu)
-#define topology_core_cpumask(cpu)
-#define topology_book_cpumask(cpu)
-#define topology_drawer_cpumask(cpu)
-
-The type of **_id macros is int.
-The type of **_cpumask macros is (const) struct cpumask *. The latter
-correspond with appropriate **_siblings sysfs attributes (except for
+these macros in include/asm-XXX/topology.h::
+
+	#define topology_physical_package_id(cpu)
+	#define topology_core_id(cpu)
+	#define topology_book_id(cpu)
+	#define topology_drawer_id(cpu)
+	#define topology_sibling_cpumask(cpu)
+	#define topology_core_cpumask(cpu)
+	#define topology_book_cpumask(cpu)
+	#define topology_drawer_cpumask(cpu)
+
+The type of ``**_id macros`` is int.
+The type of ``**_cpumask macros`` is ``(const) struct cpumask *``. The latter
+correspond with appropriate ``**_siblings`` sysfs attributes (except for
 topology_sibling_cpumask() which corresponds with thread_siblings).
 
 To be consistent on all architectures, include/linux/topology.h
 provides default definitions for any of the above macros that are
 not defined by include/asm-XXX/topology.h:
+
 1) physical_package_id: -1
 2) core_id: 0
 3) sibling_cpumask: just the given CPU
@@ -107,6 +112,7 @@ Additionally, CPU topology information is provided under
 /sys/devices/system/cpu and includes these files.  The internal
 source for the output is in brackets ("[]").
 
+    =========== ==========================================================
     kernel_max: the maximum CPU index allowed by the kernel configuration.
 		[NR_CPUS-1]
 
@@ -122,6 +128,7 @@ source for the output is in brackets ("[]").
 
     present:	CPUs that have been identified as being present in the
 		system. [cpu_present_mask]
+    =========== ==========================================================
 
 The format for the above output is compatible with cpulist_parse()
 [see <linux/cpumask.h>].  Some examples follow.
@@ -129,7 +136,7 @@ The format for the above output is compatible with cpulist_parse()
 In this example, there are 64 CPUs in the system but cpus 32-63 exceed
 the kernel max which is limited to 0..31 by the NR_CPUS config option
 being 32.  Note also that CPUs 2 and 4-31 are not online but could be
-brought online as they are both present and possible.
+brought online as they are both present and possible::
 
      kernel_max: 31
         offline: 2,4-31,32-63
@@ -140,7 +147,7 @@ brought online as they are both present and possible.
 In this example, the NR_CPUS config option is 128, but the kernel was
 started with possible_cpus=144.  There are 4 CPUs in the system and cpu2
 was manually taken offline (and is the only CPU that can be brought
-online.)
+online.)::
 
      kernel_max: 127
         offline: 2,4-127,128-143
-- 
GitLab


From 2e4e6f30f7ff84632e893786fab5e8ff44ff3e03 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 09:56:02 -0300
Subject: [PATCH 1553/1958] crc32.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add a title for the document;
- Mark literal blocks.

While here, replace a comma by a dot at the end of a paragraph.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/crc32.txt | 75 ++++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 34 deletions(-)

diff --git a/Documentation/crc32.txt b/Documentation/crc32.txt
index a08a7dd9d6255..8a6860f33b4e7 100644
--- a/Documentation/crc32.txt
+++ b/Documentation/crc32.txt
@@ -1,4 +1,6 @@
-A brief CRC tutorial.
+=================================
+brief tutorial on CRC computation
+=================================
 
 A CRC is a long-division remainder.  You add the CRC to the message,
 and the whole thing (message+CRC) is a multiple of the given
@@ -8,7 +10,8 @@ remainder computed on the message+CRC is 0.  This latter approach
 is used by a lot of hardware implementations, and is why so many
 protocols put the end-of-frame flag after the CRC.
 
-It's actually the same long division you learned in school, except that
+It's actually the same long division you learned in school, except that:
+
 - We're working in binary, so the digits are only 0 and 1, and
 - When dividing polynomials, there are no carries.  Rather than add and
   subtract, we just xor.  Thus, we tend to get a bit sloppy about
@@ -40,11 +43,12 @@ throw the quotient bit away, but subtract the appropriate multiple of
 the polynomial from the remainder and we're back to where we started,
 ready to process the next bit.
 
-A big-endian CRC written this way would be coded like:
-for (i = 0; i < input_bits; i++) {
-	multiple = remainder & 0x80000000 ? CRCPOLY : 0;
-	remainder = (remainder << 1 | next_input_bit()) ^ multiple;
-}
+A big-endian CRC written this way would be coded like::
+
+	for (i = 0; i < input_bits; i++) {
+		multiple = remainder & 0x80000000 ? CRCPOLY : 0;
+		remainder = (remainder << 1 | next_input_bit()) ^ multiple;
+	}
 
 Notice how, to get at bit 32 of the shifted remainder, we look
 at bit 31 of the remainder *before* shifting it.
@@ -54,25 +58,26 @@ the remainder don't actually affect any decision-making until
 32 bits later.  Thus, the first 32 cycles of this are pretty boring.
 Also, to add the CRC to a message, we need a 32-bit-long hole for it at
 the end, so we have to add 32 extra cycles shifting in zeros at the
-end of every message,
+end of every message.
 
 These details lead to a standard trick: rearrange merging in the
 next_input_bit() until the moment it's needed.  Then the first 32 cycles
 can be precomputed, and merging in the final 32 zero bits to make room
-for the CRC can be skipped entirely.  This changes the code to:
+for the CRC can be skipped entirely.  This changes the code to::
 
-for (i = 0; i < input_bits; i++) {
-	remainder ^= next_input_bit() << 31;
-	multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
-	remainder = (remainder << 1) ^ multiple;
-}
+	for (i = 0; i < input_bits; i++) {
+		remainder ^= next_input_bit() << 31;
+		multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
+		remainder = (remainder << 1) ^ multiple;
+	}
 
-With this optimization, the little-endian code is particularly simple:
-for (i = 0; i < input_bits; i++) {
-	remainder ^= next_input_bit();
-	multiple = (remainder & 1) ? CRCPOLY : 0;
-	remainder = (remainder >> 1) ^ multiple;
-}
+With this optimization, the little-endian code is particularly simple::
+
+	for (i = 0; i < input_bits; i++) {
+		remainder ^= next_input_bit();
+		multiple = (remainder & 1) ? CRCPOLY : 0;
+		remainder = (remainder >> 1) ^ multiple;
+	}
 
 The most significant coefficient of the remainder polynomial is stored
 in the least significant bit of the binary "remainder" variable.
@@ -81,23 +86,25 @@ be bit-reversed) and next_input_bit().
 
 As long as next_input_bit is returning the bits in a sensible order, we don't
 *have* to wait until the last possible moment to merge in additional bits.
-We can do it 8 bits at a time rather than 1 bit at a time:
-for (i = 0; i < input_bytes; i++) {
-	remainder ^= next_input_byte() << 24;
-	for (j = 0; j < 8; j++) {
-		multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
-		remainder = (remainder << 1) ^ multiple;
+We can do it 8 bits at a time rather than 1 bit at a time::
+
+	for (i = 0; i < input_bytes; i++) {
+		remainder ^= next_input_byte() << 24;
+		for (j = 0; j < 8; j++) {
+			multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
+			remainder = (remainder << 1) ^ multiple;
+		}
 	}
-}
 
-Or in little-endian:
-for (i = 0; i < input_bytes; i++) {
-	remainder ^= next_input_byte();
-	for (j = 0; j < 8; j++) {
-		multiple = (remainder & 1) ? CRCPOLY : 0;
-		remainder = (remainder >> 1) ^ multiple;
+Or in little-endian::
+
+	for (i = 0; i < input_bytes; i++) {
+		remainder ^= next_input_byte();
+		for (j = 0; j < 8; j++) {
+			multiple = (remainder & 1) ? CRCPOLY : 0;
+			remainder = (remainder >> 1) ^ multiple;
+		}
 	}
-}
 
 If the input is a multiple of 32 bits, you can even XOR in a 32-bit
 word at a time and increase the inner loop count to 32.
-- 
GitLab


From 7effa5b0e8feab19e723db4d072d8c65d4cdafa3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Wed, 17 May 2017 11:59:38 -0300
Subject: [PATCH 1554/1958] dcdbas.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Mark titles;
- Mark literal blocks;
- Mark table.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dcdbas.txt | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/Documentation/dcdbas.txt b/Documentation/dcdbas.txt
index e1c52e2dc3616..309cc57a7c1c8 100644
--- a/Documentation/dcdbas.txt
+++ b/Documentation/dcdbas.txt
@@ -1,4 +1,9 @@
+===================================
+Dell Systems Management Base Driver
+===================================
+
 Overview
+========
 
 The Dell Systems Management Base Driver provides a sysfs interface for
 systems management software such as Dell OpenManage to perform system
@@ -17,6 +22,7 @@ more information about the libsmbios project.
 
 
 System Management Interrupt
+===========================
 
 On some Dell systems, systems management software must access certain
 management information via a system management interrupt (SMI).  The SMI data
@@ -24,12 +30,12 @@ buffer must reside in 32-bit address space, and the physical address of the
 buffer is required for the SMI.  The driver maintains the memory required for
 the SMI and provides a way for the application to generate the SMI.
 The driver creates the following sysfs entries for systems management
-software to perform these system management interrupts:
+software to perform these system management interrupts::
 
-/sys/devices/platform/dcdbas/smi_data
-/sys/devices/platform/dcdbas/smi_data_buf_phys_addr
-/sys/devices/platform/dcdbas/smi_data_buf_size
-/sys/devices/platform/dcdbas/smi_request
+	/sys/devices/platform/dcdbas/smi_data
+	/sys/devices/platform/dcdbas/smi_data_buf_phys_addr
+	/sys/devices/platform/dcdbas/smi_data_buf_size
+	/sys/devices/platform/dcdbas/smi_request
 
 Systems management software must perform the following steps to execute
 a SMI using this driver:
@@ -43,6 +49,7 @@ a SMI using this driver:
 
 
 Host Control Action
+===================
 
 Dell OpenManage supports a host control feature that allows the administrator
 to perform a power cycle or power off of the system after the OS has finished
@@ -69,12 +76,14 @@ power off host control action using this driver:
 
 
 Host Control SMI Type
+=====================
 
 The following table shows the value to write to host_control_smi_type to
 perform a power cycle or power off host control action:
 
+=================== =====================
 PowerEdge System    Host Control SMI Type
-----------------    ---------------------
+=================== =====================
       300             HC_SMITYPE_TYPE1
      1300             HC_SMITYPE_TYPE1
      1400             HC_SMITYPE_TYPE2
@@ -87,5 +96,4 @@ PowerEdge System    Host Control SMI Type
      1655MC           HC_SMITYPE_TYPE2
       700             HC_SMITYPE_TYPE3
       750             HC_SMITYPE_TYPE3
-
-
+=================== =====================
-- 
GitLab


From a2fbbcea7bb9b1b6d29a24b1e74a6cffd6dd44e6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 10:04:21 -0300
Subject: [PATCH 1555/1958] digsig.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- comment the internal index;
- use the proper markups for titles;
- mark literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/digsig.txt | 131 ++++++++++++++++++++-------------------
 1 file changed, 68 insertions(+), 63 deletions(-)

diff --git a/Documentation/digsig.txt b/Documentation/digsig.txt
index 3f682889068bf..f6a8902d3ef77 100644
--- a/Documentation/digsig.txt
+++ b/Documentation/digsig.txt
@@ -1,13 +1,20 @@
+==================================
 Digital Signature Verification API
+==================================
 
-CONTENTS
+:Author: Dmitry Kasatkin
+:Date: 06.10.2011
 
-1. Introduction
-2. API
-3. User-space utilities
 
+.. CONTENTS
 
-1. Introduction
+   1. Introduction
+   2. API
+   3. User-space utilities
+
+
+Introduction
+============
 
 Digital signature verification API provides a method to verify digital signature.
 Currently digital signatures are used by the IMA/EVM integrity protection subsystem.
@@ -17,25 +24,25 @@ GnuPG multi-precision integers (MPI) library. The kernel port provides
 memory allocation errors handling, has been refactored according to kernel
 coding style, and checkpatch.pl reported errors and warnings have been fixed.
 
-Public key and signature consist of header and MPIs.
-
-struct pubkey_hdr {
-	uint8_t		version;	/* key format version */
-	time_t		timestamp;	/* key made, always 0 for now */
-	uint8_t		algo;
-	uint8_t		nmpi;
-	char		mpi[0];
-} __packed;
-
-struct signature_hdr {
-	uint8_t		version;	/* signature format version */
-	time_t		timestamp;	/* signature made */
-	uint8_t		algo;
-	uint8_t		hash;
-	uint8_t		keyid[8];
-	uint8_t		nmpi;
-	char		mpi[0];
-} __packed;
+Public key and signature consist of header and MPIs::
+
+	struct pubkey_hdr {
+		uint8_t		version;	/* key format version */
+		time_t		timestamp;	/* key made, always 0 for now */
+		uint8_t		algo;
+		uint8_t		nmpi;
+		char		mpi[0];
+	} __packed;
+
+	struct signature_hdr {
+		uint8_t		version;	/* signature format version */
+		time_t		timestamp;	/* signature made */
+		uint8_t		algo;
+		uint8_t		hash;
+		uint8_t		keyid[8];
+		uint8_t		nmpi;
+		char		mpi[0];
+	} __packed;
 
 keyid equals to SHA1[12-19] over the total key content.
 Signature header is used as an input to generate a signature.
@@ -43,31 +50,33 @@ Such approach insures that key or signature header could not be changed.
 It protects timestamp from been changed and can be used for rollback
 protection.
 
-2. API
+API
+===
 
-API currently includes only 1 function:
+API currently includes only 1 function::
 
 	digsig_verify() - digital signature verification with public key
 
 
-/**
- * digsig_verify() - digital signature verification with public key
- * @keyring:	keyring to search key in
- * @sig:	digital signature
- * @sigen:	length of the signature
- * @data:	data
- * @datalen:	length of the data
- * @return:	0 on success, -EINVAL otherwise
- *
- * Verifies data integrity against digital signature.
- * Currently only RSA is supported.
- * Normally hash of the content is used as a data for this function.
- *
- */
-int digsig_verify(struct key *keyring, const char *sig, int siglen,
-						const char *data, int datalen);
-
-3. User-space utilities
+	/**
+	* digsig_verify() - digital signature verification with public key
+	* @keyring:	keyring to search key in
+	* @sig:	digital signature
+	* @sigen:	length of the signature
+	* @data:	data
+	* @datalen:	length of the data
+	* @return:	0 on success, -EINVAL otherwise
+	*
+	* Verifies data integrity against digital signature.
+	* Currently only RSA is supported.
+	* Normally hash of the content is used as a data for this function.
+	*
+	*/
+	int digsig_verify(struct key *keyring, const char *sig, int siglen,
+			  const char *data, int datalen);
+
+User-space utilities
+====================
 
 The signing and key management utilities evm-utils provide functionality
 to generate signatures, to load keys into the kernel keyring.
@@ -75,22 +84,18 @@ Keys can be in PEM or converted to the kernel format.
 When the key is added to the kernel keyring, the keyid defines the name
 of the key: 5D2B05FC633EE3E8 in the example bellow.
 
-Here is example output of the keyctl utility.
-
-$ keyctl show
-Session Keyring
-       -3 --alswrv      0     0  keyring: _ses
-603976250 --alswrv      0    -1   \_ keyring: _uid.0
-817777377 --alswrv      0     0       \_ user: kmk
-891974900 --alswrv      0     0       \_ encrypted: evm-key
-170323636 --alswrv      0     0       \_ keyring: _module
-548221616 --alswrv      0     0       \_ keyring: _ima
-128198054 --alswrv      0     0       \_ keyring: _evm
-
-$ keyctl list 128198054
-1 key in keyring:
-620789745: --alswrv     0     0 user: 5D2B05FC633EE3E8
-
-
-Dmitry Kasatkin
-06.10.2011
+Here is example output of the keyctl utility::
+
+	$ keyctl show
+	Session Keyring
+	-3 --alswrv      0     0  keyring: _ses
+	603976250 --alswrv      0    -1   \_ keyring: _uid.0
+	817777377 --alswrv      0     0       \_ user: kmk
+	891974900 --alswrv      0     0       \_ encrypted: evm-key
+	170323636 --alswrv      0     0       \_ keyring: _module
+	548221616 --alswrv      0     0       \_ keyring: _ima
+	128198054 --alswrv      0     0       \_ keyring: _evm
+
+	$ keyctl list 128198054
+	1 key in keyring:
+	620789745: --alswrv     0     0 user: 5D2B05FC633EE3E8
-- 
GitLab


From 03158a70adde2fd46b0f266b5d1090efe2ff6887 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 07:27:52 -0300
Subject: [PATCH 1556/1958] DMA-API.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Fix some title marks to match ReST;
- use :Author: for author name;
- foo_ is an hyperlink. Get rid of it;
- Mark literal blocks as such;
- Use tables on some places that are almost using the
  table format.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DMA-API.txt | 580 +++++++++++++++++++++-----------------
 1 file changed, 329 insertions(+), 251 deletions(-)

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 71200dfa09228..45b29326d719b 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -1,7 +1,8 @@
-               Dynamic DMA mapping using the generic device
-               ============================================
+============================================
+Dynamic DMA mapping using the generic device
+============================================
 
-        James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
+:Author: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
 
 This document describes the DMA API.  For a more gentle introduction
 of the API (and actual examples), see Documentation/DMA-API-HOWTO.txt.
@@ -12,10 +13,10 @@ machines.  Unless you know that your driver absolutely has to support
 non-consistent platforms (this is usually only legacy platforms) you
 should only use the API described in part I.
 
-Part I - dma_ API
--------------------------------------
+Part I - dma_API
+----------------
 
-To get the dma_ API, you must #include <linux/dma-mapping.h>.  This
+To get the dma_API, you must #include <linux/dma-mapping.h>.  This
 provides dma_addr_t and the interfaces described below.
 
 A dma_addr_t can hold any valid DMA address for the platform.  It can be
@@ -26,9 +27,11 @@ address space and the DMA address space.
 Part Ia - Using large DMA-coherent buffers
 ------------------------------------------
 
-void *
-dma_alloc_coherent(struct device *dev, size_t size,
-			     dma_addr_t *dma_handle, gfp_t flag)
+::
+
+	void *
+	dma_alloc_coherent(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t flag)
 
 Consistent memory is memory for which a write by either the device or
 the processor can immediately be read by the processor or device
@@ -51,20 +54,24 @@ consolidate your requests for consistent memory as much as possible.
 The simplest way to do that is to use the dma_pool calls (see below).
 
 The flag parameter (dma_alloc_coherent() only) allows the caller to
-specify the GFP_ flags (see kmalloc()) for the allocation (the
+specify the ``GFP_`` flags (see kmalloc()) for the allocation (the
 implementation may choose to ignore flags that affect the location of
 the returned memory, like GFP_DMA).
 
-void *
-dma_zalloc_coherent(struct device *dev, size_t size,
-			     dma_addr_t *dma_handle, gfp_t flag)
+::
+
+	void *
+	dma_zalloc_coherent(struct device *dev, size_t size,
+			    dma_addr_t *dma_handle, gfp_t flag)
 
 Wraps dma_alloc_coherent() and also zeroes the returned memory if the
 allocation attempt succeeded.
 
-void
-dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
-			   dma_addr_t dma_handle)
+::
+
+	void
+	dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+			  dma_addr_t dma_handle)
 
 Free a region of consistent memory you previously allocated.  dev,
 size and dma_handle must all be the same as those passed into
@@ -78,7 +85,7 @@ may only be called with IRQs enabled.
 Part Ib - Using small DMA-coherent buffers
 ------------------------------------------
 
-To get this part of the dma_ API, you must #include <linux/dmapool.h>
+To get this part of the dma_API, you must #include <linux/dmapool.h>
 
 Many drivers need lots of small DMA-coherent memory regions for DMA
 descriptors or I/O buffers.  Rather than allocating in units of a page
@@ -88,6 +95,8 @@ not __get_free_pages().  Also, they understand common hardware constraints
 for alignment, like queue heads needing to be aligned on N-byte boundaries.
 
 
+::
+
 	struct dma_pool *
 	dma_pool_create(const char *name, struct device *dev,
 			size_t size, size_t align, size_t alloc);
@@ -103,16 +112,21 @@ in bytes, and must be a power of two).  If your device has no boundary
 crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
 from this pool must not cross 4KByte boundaries.
 
+::
 
-	void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
-			      dma_addr_t *handle)
+	void *
+	dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
+		        dma_addr_t *handle)
 
 Wraps dma_pool_alloc() and also zeroes the returned memory if the
 allocation attempt succeeded.
 
 
-	void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
-			dma_addr_t *dma_handle);
+::
+
+	void *
+	dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
+		       dma_addr_t *dma_handle);
 
 This allocates memory from the pool; the returned memory will meet the
 size and alignment requirements specified at creation time.  Pass
@@ -122,16 +136,20 @@ blocking.  Like dma_alloc_coherent(), this returns two values:  an
 address usable by the CPU, and the DMA address usable by the pool's
 device.
 
+::
 
-	void dma_pool_free(struct dma_pool *pool, void *vaddr,
-			dma_addr_t addr);
+	void
+	dma_pool_free(struct dma_pool *pool, void *vaddr,
+		      dma_addr_t addr);
 
 This puts memory back into the pool.  The pool is what was passed to
 dma_pool_alloc(); the CPU (vaddr) and DMA addresses are what
 were returned when that routine allocated the memory being freed.
 
+::
 
-	void dma_pool_destroy(struct dma_pool *pool);
+	void
+	dma_pool_destroy(struct dma_pool *pool);
 
 dma_pool_destroy() frees the resources of the pool.  It must be
 called in a context which can sleep.  Make sure you've freed all allocated
@@ -141,32 +159,40 @@ memory back to the pool before you destroy it.
 Part Ic - DMA addressing limitations
 ------------------------------------
 
-int
-dma_set_mask_and_coherent(struct device *dev, u64 mask)
+::
+
+	int
+	dma_set_mask_and_coherent(struct device *dev, u64 mask)
 
 Checks to see if the mask is possible and updates the device
 streaming and coherent DMA mask parameters if it is.
 
 Returns: 0 if successful and a negative error if not.
 
-int
-dma_set_mask(struct device *dev, u64 mask)
+::
+
+	int
+	dma_set_mask(struct device *dev, u64 mask)
 
 Checks to see if the mask is possible and updates the device
 parameters if it is.
 
 Returns: 0 if successful and a negative error if not.
 
-int
-dma_set_coherent_mask(struct device *dev, u64 mask)
+::
+
+	int
+	dma_set_coherent_mask(struct device *dev, u64 mask)
 
 Checks to see if the mask is possible and updates the device
 parameters if it is.
 
 Returns: 0 if successful and a negative error if not.
 
-u64
-dma_get_required_mask(struct device *dev)
+::
+
+	u64
+	dma_get_required_mask(struct device *dev)
 
 This API returns the mask that the platform requires to
 operate efficiently.  Usually this means the returned mask
@@ -182,94 +208,107 @@ call to set the mask to the value returned.
 Part Id - Streaming DMA mappings
 --------------------------------
 
-dma_addr_t
-dma_map_single(struct device *dev, void *cpu_addr, size_t size,
-		      enum dma_data_direction direction)
+::
+
+	dma_addr_t
+	dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+		       enum dma_data_direction direction)
 
 Maps a piece of processor virtual memory so it can be accessed by the
 device and returns the DMA address of the memory.
 
 The direction for both APIs may be converted freely by casting.
-However the dma_ API uses a strongly typed enumerator for its
+However the dma_API uses a strongly typed enumerator for its
 direction:
 
+======================= =============================================
 DMA_NONE		no direction (used for debugging)
 DMA_TO_DEVICE		data is going from the memory to the device
 DMA_FROM_DEVICE		data is coming from the device to the memory
 DMA_BIDIRECTIONAL	direction isn't known
+======================= =============================================
+
+.. note::
+
+	Not all memory regions in a machine can be mapped by this API.
+	Further, contiguous kernel virtual space may not be contiguous as
+	physical memory.  Since this API does not provide any scatter/gather
+	capability, it will fail if the user tries to map a non-physically
+	contiguous piece of memory.  For this reason, memory to be mapped by
+	this API should be obtained from sources which guarantee it to be
+	physically contiguous (like kmalloc).
+
+	Further, the DMA address of the memory must be within the
+	dma_mask of the device (the dma_mask is a bit mask of the
+	addressable region for the device, i.e., if the DMA address of
+	the memory ANDed with the dma_mask is still equal to the DMA
+	address, then the device can perform DMA to the memory).  To
+	ensure that the memory allocated by kmalloc is within the dma_mask,
+	the driver may specify various platform-dependent flags to restrict
+	the DMA address range of the allocation (e.g., on x86, GFP_DMA
+	guarantees to be within the first 16MB of available DMA addresses,
+	as required by ISA devices).
+
+	Note also that the above constraints on physical contiguity and
+	dma_mask may not apply if the platform has an IOMMU (a device which
+	maps an I/O DMA address to a physical memory address).  However, to be
+	portable, device driver writers may *not* assume that such an IOMMU
+	exists.
+
+.. warning::
+
+	Memory coherency operates at a granularity called the cache
+	line width.  In order for memory mapped by this API to operate
+	correctly, the mapped region must begin exactly on a cache line
+	boundary and end exactly on one (to prevent two separately mapped
+	regions from sharing a single cache line).  Since the cache line size
+	may not be known at compile time, the API will not enforce this
+	requirement.  Therefore, it is recommended that driver writers who
+	don't take special care to determine the cache line size at run time
+	only map virtual regions that begin and end on page boundaries (which
+	are guaranteed also to be cache line boundaries).
+
+	DMA_TO_DEVICE synchronisation must be done after the last modification
+	of the memory region by the software and before it is handed off to
+	the device.  Once this primitive is used, memory covered by this
+	primitive should be treated as read-only by the device.  If the device
+	may write to it at any point, it should be DMA_BIDIRECTIONAL (see
+	below).
+
+	DMA_FROM_DEVICE synchronisation must be done before the driver
+	accesses data that may be changed by the device.  This memory should
+	be treated as read-only by the driver.  If the driver needs to write
+	to it at any point, it should be DMA_BIDIRECTIONAL (see below).
+
+	DMA_BIDIRECTIONAL requires special handling: it means that the driver
+	isn't sure if the memory was modified before being handed off to the
+	device and also isn't sure if the device will also modify it.  Thus,
+	you must always sync bidirectional memory twice: once before the
+	memory is handed off to the device (to make sure all memory changes
+	are flushed from the processor) and once before the data may be
+	accessed after being used by the device (to make sure any processor
+	cache lines are updated with data that the device may have changed).
+
+::
 
-Notes:  Not all memory regions in a machine can be mapped by this API.
-Further, contiguous kernel virtual space may not be contiguous as
-physical memory.  Since this API does not provide any scatter/gather
-capability, it will fail if the user tries to map a non-physically
-contiguous piece of memory.  For this reason, memory to be mapped by
-this API should be obtained from sources which guarantee it to be
-physically contiguous (like kmalloc).
-
-Further, the DMA address of the memory must be within the
-dma_mask of the device (the dma_mask is a bit mask of the
-addressable region for the device, i.e., if the DMA address of
-the memory ANDed with the dma_mask is still equal to the DMA
-address, then the device can perform DMA to the memory).  To
-ensure that the memory allocated by kmalloc is within the dma_mask,
-the driver may specify various platform-dependent flags to restrict
-the DMA address range of the allocation (e.g., on x86, GFP_DMA
-guarantees to be within the first 16MB of available DMA addresses,
-as required by ISA devices).
-
-Note also that the above constraints on physical contiguity and
-dma_mask may not apply if the platform has an IOMMU (a device which
-maps an I/O DMA address to a physical memory address).  However, to be
-portable, device driver writers may *not* assume that such an IOMMU
-exists.
-
-Warnings:  Memory coherency operates at a granularity called the cache
-line width.  In order for memory mapped by this API to operate
-correctly, the mapped region must begin exactly on a cache line
-boundary and end exactly on one (to prevent two separately mapped
-regions from sharing a single cache line).  Since the cache line size
-may not be known at compile time, the API will not enforce this
-requirement.  Therefore, it is recommended that driver writers who
-don't take special care to determine the cache line size at run time
-only map virtual regions that begin and end on page boundaries (which
-are guaranteed also to be cache line boundaries).
-
-DMA_TO_DEVICE synchronisation must be done after the last modification
-of the memory region by the software and before it is handed off to
-the device.  Once this primitive is used, memory covered by this
-primitive should be treated as read-only by the device.  If the device
-may write to it at any point, it should be DMA_BIDIRECTIONAL (see
-below).
-
-DMA_FROM_DEVICE synchronisation must be done before the driver
-accesses data that may be changed by the device.  This memory should
-be treated as read-only by the driver.  If the driver needs to write
-to it at any point, it should be DMA_BIDIRECTIONAL (see below).
-
-DMA_BIDIRECTIONAL requires special handling: it means that the driver
-isn't sure if the memory was modified before being handed off to the
-device and also isn't sure if the device will also modify it.  Thus,
-you must always sync bidirectional memory twice: once before the
-memory is handed off to the device (to make sure all memory changes
-are flushed from the processor) and once before the data may be
-accessed after being used by the device (to make sure any processor
-cache lines are updated with data that the device may have changed).
-
-void
-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		 enum dma_data_direction direction)
+	void
+	dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+			 enum dma_data_direction direction)
 
 Unmaps the region previously mapped.  All the parameters passed in
 must be identical to those passed in (and returned) by the mapping
 API.
 
-dma_addr_t
-dma_map_page(struct device *dev, struct page *page,
-		    unsigned long offset, size_t size,
-		    enum dma_data_direction direction)
-void
-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-	       enum dma_data_direction direction)
+::
+
+	dma_addr_t
+	dma_map_page(struct device *dev, struct page *page,
+		     unsigned long offset, size_t size,
+		     enum dma_data_direction direction)
+
+	void
+	dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+		       enum dma_data_direction direction)
 
 API for mapping and unmapping for pages.  All the notes and warnings
 for the other mapping APIs apply here.  Also, although the <offset>
@@ -277,20 +316,24 @@ and <size> parameters are provided to do partial page mapping, it is
 recommended that you never use these unless you really know what the
 cache width is.
 
-dma_addr_t
-dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
-		 enum dma_data_direction dir, unsigned long attrs)
+::
 
-void
-dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
-		   enum dma_data_direction dir, unsigned long attrs)
+	dma_addr_t
+	dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
+			 enum dma_data_direction dir, unsigned long attrs)
+
+	void
+	dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
+			   enum dma_data_direction dir, unsigned long attrs)
 
 API for mapping and unmapping for MMIO resources. All the notes and
 warnings for the other mapping APIs apply here. The API should only be
 used to map device MMIO resources, mapping of RAM is not permitted.
 
-int
-dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+::
+
+	int
+	dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 
 In some circumstances dma_map_single(), dma_map_page() and dma_map_resource()
 will fail to create a mapping. A driver can check for these errors by testing
@@ -298,9 +341,11 @@ the returned DMA address with dma_mapping_error(). A non-zero return value
 means the mapping could not be created and the driver should take appropriate
 action (e.g. reduce current DMA mapping usage or delay and try again later).
 
+::
+
 	int
 	dma_map_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction direction)
+		   int nents, enum dma_data_direction direction)
 
 Returns: the number of DMA address segments mapped (this may be shorter
 than <nents> passed in if some elements of the scatter/gather list are
@@ -316,7 +361,7 @@ critical that the driver do something, in the case of a block driver
 aborting the request or even oopsing is better than doing nothing and
 corrupting the filesystem.
 
-With scatterlists, you use the resulting mapping like this:
+With scatterlists, you use the resulting mapping like this::
 
 	int i, count = dma_map_sg(dev, sglist, nents, direction);
 	struct scatterlist *sg;
@@ -337,9 +382,11 @@ Then you should loop count times (note: this can be less than nents times)
 and use sg_dma_address() and sg_dma_len() macros where you previously
 accessed sg->address and sg->length as shown above.
 
+::
+
 	void
 	dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction direction)
+		     int nents, enum dma_data_direction direction)
 
 Unmap the previously mapped scatter/gather list.  All the parameters
 must be the same as those and passed in to the scatter/gather mapping
@@ -348,18 +395,27 @@ API.
 Note: <nents> must be the number you passed in, *not* the number of
 DMA address entries returned.
 
-void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
-			enum dma_data_direction direction)
-void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
-			   enum dma_data_direction direction)
-void
-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
-		    enum dma_data_direction direction)
-void
-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents,
-		       enum dma_data_direction direction)
+::
+
+	void
+	dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				size_t size,
+				enum dma_data_direction direction)
+
+	void
+	dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+				   size_t size,
+				   enum dma_data_direction direction)
+
+	void
+	dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			    int nents,
+			    enum dma_data_direction direction)
+
+	void
+	dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			       int nents,
+			       enum dma_data_direction direction)
 
 Synchronise a single contiguous or scatter/gather mapping for the CPU
 and device. With the sync_sg API, all the parameters must be the same
@@ -367,36 +423,41 @@ as those passed into the single mapping API. With the sync_single API,
 you can use dma_handle and size parameters that aren't identical to
 those passed into the single mapping API to do a partial sync.
 
-Notes:  You must do this:
 
-- Before reading values that have been written by DMA from the device
-  (use the DMA_FROM_DEVICE direction)
-- After writing values that will be written to the device using DMA
-  (use the DMA_TO_DEVICE) direction
-- before *and* after handing memory to the device if the memory is
-  DMA_BIDIRECTIONAL
+.. note::
+
+   You must do this:
+
+   - Before reading values that have been written by DMA from the device
+     (use the DMA_FROM_DEVICE direction)
+   - After writing values that will be written to the device using DMA
+     (use the DMA_TO_DEVICE) direction
+   - before *and* after handing memory to the device if the memory is
+     DMA_BIDIRECTIONAL
 
 See also dma_map_single().
 
-dma_addr_t
-dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
-		     enum dma_data_direction dir,
-		     unsigned long attrs)
+::
+
+	dma_addr_t
+	dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
+			     enum dma_data_direction dir,
+			     unsigned long attrs)
 
-void
-dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
-		       size_t size, enum dma_data_direction dir,
-		       unsigned long attrs)
+	void
+	dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
+			       size_t size, enum dma_data_direction dir,
+			       unsigned long attrs)
 
-int
-dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
-		 int nents, enum dma_data_direction dir,
-		 unsigned long attrs)
+	int
+	dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+			 int nents, enum dma_data_direction dir,
+			 unsigned long attrs)
 
-void
-dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
-		   int nents, enum dma_data_direction dir,
-		   unsigned long attrs)
+	void
+	dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
+			   int nents, enum dma_data_direction dir,
+			   unsigned long attrs)
 
 The four functions above are just like the counterpart functions
 without the _attrs suffixes, except that they pass an optional
@@ -410,37 +471,38 @@ is identical to those of the corresponding function
 without the _attrs suffix. As a result dma_map_single_attrs()
 can generally replace dma_map_single(), etc.
 
-As an example of the use of the *_attrs functions, here's how
+As an example of the use of the ``*_attrs`` functions, here's how
 you could pass an attribute DMA_ATTR_FOO when mapping memory
-for DMA:
+for DMA::
 
-#include <linux/dma-mapping.h>
-/* DMA_ATTR_FOO should be defined in linux/dma-mapping.h and
- * documented in Documentation/DMA-attributes.txt */
-...
+	#include <linux/dma-mapping.h>
+	/* DMA_ATTR_FOO should be defined in linux/dma-mapping.h and
+	* documented in Documentation/DMA-attributes.txt */
+	...
 
-	unsigned long attr;
-	attr |= DMA_ATTR_FOO;
-	....
-	n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, attr);
-	....
+		unsigned long attr;
+		attr |= DMA_ATTR_FOO;
+		....
+		n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, attr);
+		....
 
 Architectures that care about DMA_ATTR_FOO would check for its
 presence in their implementations of the mapping and unmapping
-routines, e.g.:
-
-void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
-			     size_t size, enum dma_data_direction dir,
-			     unsigned long attrs)
-{
-	....
-	if (attrs & DMA_ATTR_FOO)
-		/* twizzle the frobnozzle */
-	....
+routines, e.g.:::
+
+	void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
+				     size_t size, enum dma_data_direction dir,
+				     unsigned long attrs)
+	{
+		....
+		if (attrs & DMA_ATTR_FOO)
+			/* twizzle the frobnozzle */
+		....
+	}
 
 
-Part II - Advanced dma_ usage
------------------------------
+Part II - Advanced dma usage
+----------------------------
 
 Warning: These pieces of the DMA API should not be used in the
 majority of cases, since they cater for unlikely corner cases that
@@ -450,9 +512,11 @@ If you don't understand how cache line coherency works between a
 processor and an I/O device, you should not be using this part of the
 API at all.
 
-void *
-dma_alloc_noncoherent(struct device *dev, size_t size,
-			       dma_addr_t *dma_handle, gfp_t flag)
+::
+
+	void *
+	dma_alloc_noncoherent(struct device *dev, size_t size,
+			      dma_addr_t *dma_handle, gfp_t flag)
 
 Identical to dma_alloc_coherent() except that the platform will
 choose to return either consistent or non-consistent memory as it sees
@@ -468,39 +532,49 @@ only use this API if you positively know your driver will be
 required to work on one of the rare (usually non-PCI) architectures
 that simply cannot make consistent memory.
 
-void
-dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
-			      dma_addr_t dma_handle)
+::
+
+	void
+	dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
+			     dma_addr_t dma_handle)
 
 Free memory allocated by the nonconsistent API.  All parameters must
 be identical to those passed in (and returned by
 dma_alloc_noncoherent()).
 
-int
-dma_get_cache_alignment(void)
+::
+
+	int
+	dma_get_cache_alignment(void)
 
 Returns the processor cache alignment.  This is the absolute minimum
 alignment *and* width that you must observe when either mapping
 memory or doing partial flushes.
 
-Notes: This API may return a number *larger* than the actual cache
-line, but it will guarantee that one or more cache lines fit exactly
-into the width returned by this call.  It will also always be a power
-of two for easy alignment.
+.. note::
 
-void
-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
-	       enum dma_data_direction direction)
+	This API may return a number *larger* than the actual cache
+	line, but it will guarantee that one or more cache lines fit exactly
+	into the width returned by this call.  It will also always be a power
+	of two for easy alignment.
+
+::
+
+	void
+	dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		       enum dma_data_direction direction)
 
 Do a partial sync of memory that was allocated by
 dma_alloc_noncoherent(), starting at virtual address vaddr and
 continuing on for size.  Again, you *must* observe the cache line
 boundaries when doing this.
 
-int
-dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-			    dma_addr_t device_addr, size_t size, int
-			    flags)
+::
+
+	int
+	dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
+				    dma_addr_t device_addr, size_t size, int
+				    flags)
 
 Declare region of memory to be handed out by dma_alloc_coherent() when
 it's asked for coherent memory for this device.
@@ -516,21 +590,21 @@ size is the size of the area (must be multiples of PAGE_SIZE).
 
 flags can be ORed together and are:
 
-DMA_MEMORY_MAP - request that the memory returned from
-dma_alloc_coherent() be directly writable.
+- DMA_MEMORY_MAP - request that the memory returned from
+  dma_alloc_coherent() be directly writable.
 
-DMA_MEMORY_IO - request that the memory returned from
-dma_alloc_coherent() be addressable using read()/write()/memcpy_toio() etc.
+- DMA_MEMORY_IO - request that the memory returned from
+  dma_alloc_coherent() be addressable using read()/write()/memcpy_toio() etc.
 
 One or both of these flags must be present.
 
-DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by
-dma_alloc_coherent of any child devices of this one (for memory residing
-on a bridge).
+- DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by
+  dma_alloc_coherent of any child devices of this one (for memory residing
+  on a bridge).
 
-DMA_MEMORY_EXCLUSIVE - only allocate memory from the declared regions. 
-Do not allow dma_alloc_coherent() to fall back to system memory when
-it's out of memory in the declared region.
+- DMA_MEMORY_EXCLUSIVE - only allocate memory from the declared regions.
+  Do not allow dma_alloc_coherent() to fall back to system memory when
+  it's out of memory in the declared region.
 
 The return value will be either DMA_MEMORY_MAP or DMA_MEMORY_IO and
 must correspond to a passed in flag (i.e. no returning DMA_MEMORY_IO
@@ -543,15 +617,17 @@ must be accessed using the correct bus functions.  If your driver
 isn't prepared to handle this contingency, it should not specify
 DMA_MEMORY_IO in the input flags.
 
-As a simplification for the platforms, only *one* such region of
+As a simplification for the platforms, only **one** such region of
 memory may be declared per device.
 
 For reasons of efficiency, most platforms choose to track the declared
 region only at the granularity of a page.  For smaller allocations,
 you should use the dma_pool() API.
 
-void
-dma_release_declared_memory(struct device *dev)
+::
+
+	void
+	dma_release_declared_memory(struct device *dev)
 
 Remove the memory region previously declared from the system.  This
 API performs *no* in-use checking for this region and will return
@@ -559,9 +635,11 @@ unconditionally having removed all the required structures.  It is the
 driver's job to ensure that no parts of this memory region are
 currently in use.
 
-void *
-dma_mark_declared_memory_occupied(struct device *dev,
-				  dma_addr_t device_addr, size_t size)
+::
+
+	void *
+	dma_mark_declared_memory_occupied(struct device *dev,
+					  dma_addr_t device_addr, size_t size)
 
 This is used to occupy specific regions of the declared space
 (dma_alloc_coherent() will hand out the first free region it finds).
@@ -592,38 +670,37 @@ option has a performance impact. Do not enable it in production kernels.
 If you boot the resulting kernel will contain code which does some bookkeeping
 about what DMA memory was allocated for which device. If this code detects an
 error it prints a warning message with some details into your kernel log. An
-example warning message may look like this:
-
-------------[ cut here ]------------
-WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
-	check_unmap+0x203/0x490()
-Hardware name:
-forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
-	function [device address=0x00000000640444be] [size=66 bytes] [mapped as
-single] [unmapped as page]
-Modules linked in: nfsd exportfs bridge stp llc r8169
-Pid: 0, comm: swapper Tainted: G        W  2.6.28-dmatest-09289-g8bb99c0 #1
-Call Trace:
- <IRQ>  [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
- [<ffffffff80647b70>] _spin_unlock+0x10/0x30
- [<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
- [<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
- [<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
- [<ffffffff80252f96>] queue_work+0x56/0x60
- [<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
- [<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
- [<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
- [<ffffffff80235177>] find_busiest_group+0x207/0x8a0
- [<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
- [<ffffffff803c7ea3>] check_unmap+0x203/0x490
- [<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
- [<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
- [<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
- [<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
- [<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
- [<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
- [<ffffffff8020c093>] ret_from_intr+0x0/0xa
- <EOI> <4>---[ end trace f6435a98e2a38c0e ]---
+example warning message may look like this::
+
+	WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
+		check_unmap+0x203/0x490()
+	Hardware name:
+	forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
+		function [device address=0x00000000640444be] [size=66 bytes] [mapped as
+	single] [unmapped as page]
+	Modules linked in: nfsd exportfs bridge stp llc r8169
+	Pid: 0, comm: swapper Tainted: G        W  2.6.28-dmatest-09289-g8bb99c0 #1
+	Call Trace:
+	<IRQ>  [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
+	[<ffffffff80647b70>] _spin_unlock+0x10/0x30
+	[<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
+	[<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
+	[<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
+	[<ffffffff80252f96>] queue_work+0x56/0x60
+	[<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
+	[<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
+	[<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
+	[<ffffffff80235177>] find_busiest_group+0x207/0x8a0
+	[<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
+	[<ffffffff803c7ea3>] check_unmap+0x203/0x490
+	[<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
+	[<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
+	[<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
+	[<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
+	[<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
+	[<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
+	[<ffffffff8020c093>] ret_from_intr+0x0/0xa
+	<EOI> <4>---[ end trace f6435a98e2a38c0e ]---
 
 The driver developer can find the driver and the device including a stacktrace
 of the DMA-API call which caused this warning.
@@ -637,43 +714,42 @@ details.
 The debugfs directory for the DMA-API debugging code is called dma-api/. In
 this directory the following files can currently be found:
 
-	dma-api/all_errors	This file contains a numeric value. If this
+=============================== ===============================================
+dma-api/all_errors		This file contains a numeric value. If this
 				value is not equal to zero the debugging code
 				will print a warning for every error it finds
 				into the kernel log. Be careful with this
 				option, as it can easily flood your logs.
 
-	dma-api/disabled	This read-only file contains the character 'Y'
+dma-api/disabled		This read-only file contains the character 'Y'
 				if the debugging code is disabled. This can
 				happen when it runs out of memory or if it was
 				disabled at boot time
 
-	dma-api/error_count	This file is read-only and shows the total
+dma-api/error_count		This file is read-only and shows the total
 				numbers of errors found.
 
-	dma-api/num_errors	The number in this file shows how many
+dma-api/num_errors		The number in this file shows how many
 				warnings will be printed to the kernel log
 				before it stops. This number is initialized to
 				one at system boot and be set by writing into
 				this file
 
-	dma-api/min_free_entries
-				This read-only file can be read to get the
+dma-api/min_free_entries	This read-only file can be read to get the
 				minimum number of free dma_debug_entries the
 				allocator has ever seen. If this value goes
 				down to zero the code will disable itself
 				because it is not longer reliable.
 
-	dma-api/num_free_entries
-				The current number of free dma_debug_entries
+dma-api/num_free_entries	The current number of free dma_debug_entries
 				in the allocator.
 
-	dma-api/driver-filter
-				You can write a name of a driver into this file
+dma-api/driver-filter		You can write a name of a driver into this file
 				to limit the debug output to requests from that
 				particular driver. Write an empty string to
 				that file to disable the filter and see
 				all errors again.
+=============================== ===============================================
 
 If you have this code compiled into your kernel it will be enabled by default.
 If you want to boot without the bookkeeping anyway you can provide
@@ -692,7 +768,10 @@ of preallocated entries is defined per architecture. If it is too low for you
 boot with 'dma_debug_entries=<your_desired_number>' to overwrite the
 architectural default.
 
-void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+::
+
+	void
+	debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
 dma-debug interface debug_dma_mapping_error() to debug drivers that fail
 to check DMA mapping errors on addresses returned by dma_map_single() and
@@ -702,4 +781,3 @@ the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
 this flag is still set, prints warning message that includes call trace that
 leads up to the unmap. This interface can be called from dma_mapping_error()
 routines to enable DMA mapping error check debugging.
-
-- 
GitLab


From 266921bdb53ca8a171b96c113e3995b231228b66 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 10:27:28 -0300
Subject: [PATCH 1557/1958] DMA-API-HOWTO.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Mark titles;
- Mark literal blocks;
- Mark some literals that would otherwise produce warnings;
- Mark authorship.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DMA-API-HOWTO.txt | 153 ++++++++++++++++++--------------
 1 file changed, 88 insertions(+), 65 deletions(-)

diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index 4ed3883568984..f0cc3f7722654 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -1,22 +1,24 @@
-		     Dynamic DMA mapping Guide
-		     =========================
+=========================
+Dynamic DMA mapping Guide
+=========================
 
-		 David S. Miller <davem@redhat.com>
-		 Richard Henderson <rth@cygnus.com>
-		  Jakub Jelinek <jakub@redhat.com>
+:Author: David S. Miller <davem@redhat.com>
+:Author: Richard Henderson <rth@cygnus.com>
+:Author: Jakub Jelinek <jakub@redhat.com>
 
 This is a guide to device driver writers on how to use the DMA API
 with example pseudo-code.  For a concise description of the API, see
 DMA-API.txt.
 
-                       CPU and DMA addresses
+CPU and DMA addresses
+=====================
 
 There are several kinds of addresses involved in the DMA API, and it's
 important to understand the differences.
 
 The kernel normally uses virtual addresses.  Any address returned by
 kmalloc(), vmalloc(), and similar interfaces is a virtual address and can
-be stored in a "void *".
+be stored in a ``void *``.
 
 The virtual memory system (TLB, page tables, etc.) translates virtual
 addresses to CPU physical addresses, which are stored as "phys_addr_t" or
@@ -37,7 +39,7 @@ be restricted to a subset of that space.  For example, even if a system
 supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU
 so devices only need to use 32-bit DMA addresses.
 
-Here's a picture and some examples:
+Here's a picture and some examples::
 
                CPU                  CPU                  Bus
              Virtual              Physical             Address
@@ -98,15 +100,16 @@ microprocessor architecture. You should use the DMA API rather than the
 bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
 pci_map_*() interfaces.
 
-First of all, you should make sure
+First of all, you should make sure::
 
-#include <linux/dma-mapping.h>
+	#include <linux/dma-mapping.h>
 
 is in your driver, which provides the definition of dma_addr_t.  This type
 can hold any valid DMA address for the platform and should be used
 everywhere you hold a DMA address returned from the DMA mapping functions.
 
-			 What memory is DMA'able?
+What memory is DMA'able?
+========================
 
 The first piece of information you must know is what kernel memory can
 be used with the DMA mapping facilities.  There has been an unwritten
@@ -143,7 +146,8 @@ What about block I/O and networking buffers?  The block I/O and
 networking subsystems make sure that the buffers they use are valid
 for you to DMA from/to.
 
-			DMA addressing limitations
+DMA addressing limitations
+==========================
 
 Does your device have any DMA addressing limitations?  For example, is
 your device only capable of driving the low order 24-bits of address?
@@ -166,7 +170,7 @@ style to do this even if your device holds the default setting,
 because this shows that you did think about these issues wrt. your
 device.
 
-The query is performed via a call to dma_set_mask_and_coherent():
+The query is performed via a call to dma_set_mask_and_coherent()::
 
 	int dma_set_mask_and_coherent(struct device *dev, u64 mask);
 
@@ -175,12 +179,12 @@ If you have some special requirements, then the following two separate
 queries can be used instead:
 
 	The query for streaming mappings is performed via a call to
-	dma_set_mask():
+	dma_set_mask()::
 
 		int dma_set_mask(struct device *dev, u64 mask);
 
 	The query for consistent allocations is performed via a call
-	to dma_set_coherent_mask():
+	to dma_set_coherent_mask()::
 
 		int dma_set_coherent_mask(struct device *dev, u64 mask);
 
@@ -209,7 +213,7 @@ of your driver reports that performance is bad or that the device is not
 even detected, you can ask them for the kernel messages to find out
 exactly why.
 
-The standard 32-bit addressing device would do something like this:
+The standard 32-bit addressing device would do something like this::
 
 	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
 		dev_warn(dev, "mydev: No suitable DMA available\n");
@@ -225,7 +229,7 @@ than 64-bit addressing.  For example, Sparc64 PCI SAC addressing is
 more efficient than DAC addressing.
 
 Here is how you would handle a 64-bit capable device which can drive
-all 64-bits when accessing streaming DMA:
+all 64-bits when accessing streaming DMA::
 
 	int using_dac;
 
@@ -239,7 +243,7 @@ all 64-bits when accessing streaming DMA:
 	}
 
 If a card is capable of using 64-bit consistent allocations as well,
-the case would look like this:
+the case would look like this::
 
 	int using_dac, consistent_using_dac;
 
@@ -260,7 +264,7 @@ uses consistent allocations, one would have to check the return value from
 dma_set_coherent_mask().
 
 Finally, if your device can only drive the low 24-bits of
-address you might do something like:
+address you might do something like::
 
 	if (dma_set_mask(dev, DMA_BIT_MASK(24))) {
 		dev_warn(dev, "mydev: 24-bit DMA addressing not available\n");
@@ -280,7 +284,7 @@ only provide the functionality which the machine can handle.  It
 is important that the last call to dma_set_mask() be for the
 most specific mask.
 
-Here is pseudo-code showing how this might be done:
+Here is pseudo-code showing how this might be done::
 
 	#define PLAYBACK_ADDRESS_BITS	DMA_BIT_MASK(32)
 	#define RECORD_ADDRESS_BITS	DMA_BIT_MASK(24)
@@ -308,7 +312,8 @@ A sound card was used as an example here because this genre of PCI
 devices seems to be littered with ISA chips given a PCI front end,
 and thus retaining the 16MB DMA addressing limitations of ISA.
 
-			Types of DMA mappings
+Types of DMA mappings
+=====================
 
 There are two types of DMA mappings:
 
@@ -336,12 +341,14 @@ There are two types of DMA mappings:
   to memory is immediately visible to the device, and vice
   versa.  Consistent mappings guarantee this.
 
-  IMPORTANT: Consistent DMA memory does not preclude the usage of
-             proper memory barriers.  The CPU may reorder stores to
+  .. important::
+
+	     Consistent DMA memory does not preclude the usage of
+	     proper memory barriers.  The CPU may reorder stores to
 	     consistent memory just as it may normal memory.  Example:
 	     if it is important for the device to see the first word
 	     of a descriptor updated before the second, you must do
-	     something like:
+	     something like::
 
 		desc->word0 = address;
 		wmb();
@@ -377,16 +384,17 @@ Also, systems with caches that aren't DMA-coherent will work better
 when the underlying buffers don't share cache lines with other data.
 
 
-		 Using Consistent DMA mappings.
+Using Consistent DMA mappings
+=============================
 
 To allocate and map large (PAGE_SIZE or so) consistent DMA regions,
-you should do:
+you should do::
 
 	dma_addr_t dma_handle;
 
 	cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, gfp);
 
-where device is a struct device *. This may be called in interrupt
+where device is a ``struct device *``. This may be called in interrupt
 context with the GFP_ATOMIC flag.
 
 Size is the length of the region you want to allocate, in bytes.
@@ -415,7 +423,7 @@ exists (for example) to guarantee that if you allocate a chunk
 which is smaller than or equal to 64 kilobytes, the extent of the
 buffer you receive will not cross a 64K boundary.
 
-To unmap and free such a DMA region, you call:
+To unmap and free such a DMA region, you call::
 
 	dma_free_coherent(dev, size, cpu_addr, dma_handle);
 
@@ -430,7 +438,7 @@ a kmem_cache, but it uses dma_alloc_coherent(), not __get_free_pages().
 Also, it understands common hardware constraints for alignment,
 like queue heads needing to be aligned on N byte boundaries.
 
-Create a dma_pool like this:
+Create a dma_pool like this::
 
 	struct dma_pool *pool;
 
@@ -444,7 +452,7 @@ pass 0 for boundary; passing 4096 says memory allocated from this pool
 must not cross 4KByte boundaries (but at that time it may be better to
 use dma_alloc_coherent() directly instead).
 
-Allocate memory from a DMA pool like this:
+Allocate memory from a DMA pool like this::
 
 	cpu_addr = dma_pool_alloc(pool, flags, &dma_handle);
 
@@ -452,7 +460,7 @@ flags are GFP_KERNEL if blocking is permitted (not in_interrupt nor
 holding SMP locks), GFP_ATOMIC otherwise.  Like dma_alloc_coherent(),
 this returns two values, cpu_addr and dma_handle.
 
-Free memory that was allocated from a dma_pool like this:
+Free memory that was allocated from a dma_pool like this::
 
 	dma_pool_free(pool, cpu_addr, dma_handle);
 
@@ -460,7 +468,7 @@ where pool is what you passed to dma_pool_alloc(), and cpu_addr and
 dma_handle are the values dma_pool_alloc() returned. This function
 may be called in interrupt context.
 
-Destroy a dma_pool by calling:
+Destroy a dma_pool by calling::
 
 	dma_pool_destroy(pool);
 
@@ -468,11 +476,12 @@ Make sure you've called dma_pool_free() for all memory allocated
 from a pool before you destroy the pool. This function may not
 be called in interrupt context.
 
-			DMA Direction
+DMA Direction
+=============
 
 The interfaces described in subsequent portions of this document
 take a DMA direction argument, which is an integer and takes on
-one of the following values:
+one of the following values::
 
  DMA_BIDIRECTIONAL
  DMA_TO_DEVICE
@@ -521,14 +530,15 @@ packets, map/unmap them with the DMA_TO_DEVICE direction
 specifier.  For receive packets, just the opposite, map/unmap them
 with the DMA_FROM_DEVICE direction specifier.
 
-		  Using Streaming DMA mappings
+Using Streaming DMA mappings
+============================
 
 The streaming DMA mapping routines can be called from interrupt
 context.  There are two versions of each map/unmap, one which will
 map/unmap a single memory region, and one which will map/unmap a
 scatterlist.
 
-To map a single region, you do:
+To map a single region, you do::
 
 	struct device *dev = &my_dev->dev;
 	dma_addr_t dma_handle;
@@ -545,7 +555,7 @@ To map a single region, you do:
 		goto map_error_handling;
 	}
 
-and to unmap it:
+and to unmap it::
 
 	dma_unmap_single(dev, dma_handle, size, direction);
 
@@ -563,7 +573,7 @@ Using CPU pointers like this for single mappings has a disadvantage:
 you cannot reference HIGHMEM memory in this way.  Thus, there is a
 map/unmap interface pair akin to dma_{map,unmap}_single().  These
 interfaces deal with page/offset pairs instead of CPU pointers.
-Specifically:
+Specifically::
 
 	struct device *dev = &my_dev->dev;
 	dma_addr_t dma_handle;
@@ -593,7 +603,7 @@ error as outlined under the dma_map_single() discussion.
 You should call dma_unmap_page() when the DMA activity is finished, e.g.,
 from the interrupt which told you that the DMA transfer is done.
 
-With scatterlists, you map a region gathered from several regions by:
+With scatterlists, you map a region gathered from several regions by::
 
 	int i, count = dma_map_sg(dev, sglist, nents, direction);
 	struct scatterlist *sg;
@@ -617,16 +627,18 @@ Then you should loop count times (note: this can be less than nents times)
 and use sg_dma_address() and sg_dma_len() macros where you previously
 accessed sg->address and sg->length as shown above.
 
-To unmap a scatterlist, just call:
+To unmap a scatterlist, just call::
 
 	dma_unmap_sg(dev, sglist, nents, direction);
 
 Again, make sure DMA activity has already finished.
 
-PLEASE NOTE:  The 'nents' argument to the dma_unmap_sg call must be
-              the _same_ one you passed into the dma_map_sg call,
-	      it should _NOT_ be the 'count' value _returned_ from the
-              dma_map_sg call.
+.. note::
+
+	The 'nents' argument to the dma_unmap_sg call must be
+	the _same_ one you passed into the dma_map_sg call,
+	it should _NOT_ be the 'count' value _returned_ from the
+	dma_map_sg call.
 
 Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}()
 counterpart, because the DMA address space is a shared resource and
@@ -638,11 +650,11 @@ properly in order for the CPU and device to see the most up-to-date and
 correct copy of the DMA buffer.
 
 So, firstly, just map it with dma_map_{single,sg}(), and after each DMA
-transfer call either:
+transfer call either::
 
 	dma_sync_single_for_cpu(dev, dma_handle, size, direction);
 
-or:
+or::
 
 	dma_sync_sg_for_cpu(dev, sglist, nents, direction);
 
@@ -650,17 +662,19 @@ as appropriate.
 
 Then, if you wish to let the device get at the DMA area again,
 finish accessing the data with the CPU, and then before actually
-giving the buffer to the hardware call either:
+giving the buffer to the hardware call either::
 
 	dma_sync_single_for_device(dev, dma_handle, size, direction);
 
-or:
+or::
 
 	dma_sync_sg_for_device(dev, sglist, nents, direction);
 
 as appropriate.
 
-PLEASE NOTE:  The 'nents' argument to dma_sync_sg_for_cpu() and
+.. note::
+
+	      The 'nents' argument to dma_sync_sg_for_cpu() and
 	      dma_sync_sg_for_device() must be the same passed to
 	      dma_map_sg(). It is _NOT_ the count returned by
 	      dma_map_sg().
@@ -671,7 +685,7 @@ dma_map_*() call till dma_unmap_*(), then you don't have to call the
 dma_sync_*() routines at all.
 
 Here is pseudo code which shows a situation in which you would need
-to use the dma_sync_*() interfaces.
+to use the dma_sync_*() interfaces::
 
 	my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len)
 	{
@@ -747,7 +761,8 @@ is planned to completely remove virt_to_bus() and bus_to_virt() as
 they are entirely deprecated.  Some ports already do not provide these
 as it is impossible to correctly support them.
 
-			Handling Errors
+Handling Errors
+===============
 
 DMA address space is limited on some architectures and an allocation
 failure can be determined by:
@@ -755,7 +770,7 @@ failure can be determined by:
 - checking if dma_alloc_coherent() returns NULL or dma_map_sg returns 0
 
 - checking the dma_addr_t returned from dma_map_single() and dma_map_page()
-  by using dma_mapping_error():
+  by using dma_mapping_error()::
 
 	dma_addr_t dma_handle;
 
@@ -773,7 +788,8 @@ failure can be determined by:
   of a multiple page mapping attempt. These example are applicable to
   dma_map_page() as well.
 
-Example 1:
+Example 1::
+
 	dma_addr_t dma_handle1;
 	dma_addr_t dma_handle2;
 
@@ -802,8 +818,12 @@ Example 1:
 		dma_unmap_single(dma_handle1);
 	map_error_handling1:
 
-Example 2: (if buffers are allocated in a loop, unmap all mapped buffers when
-	    mapping error is detected in the middle)
+Example 2::
+
+	/*
+	 * if buffers are allocated in a loop, unmap all mapped buffers when
+	 * mapping error is detected in the middle
+	 */
 
 	dma_addr_t dma_addr;
 	dma_addr_t array[DMA_BUFFERS];
@@ -846,7 +866,8 @@ SCSI drivers must return SCSI_MLQUEUE_HOST_BUSY if the DMA mapping
 fails in the queuecommand hook. This means that the SCSI subsystem
 passes the command to the driver again later.
 
-		Optimizing Unmap State Space Consumption
+Optimizing Unmap State Space Consumption
+========================================
 
 On many platforms, dma_unmap_{single,page}() is simply a nop.
 Therefore, keeping track of the mapping address and length is a waste
@@ -858,7 +879,7 @@ Actually, instead of describing the macros one by one, we'll
 transform some example code.
 
 1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
-   Example, before:
+   Example, before::
 
 	struct ring_state {
 		struct sk_buff *skb;
@@ -866,7 +887,7 @@ transform some example code.
 		__u32 len;
 	};
 
-   after:
+   after::
 
 	struct ring_state {
 		struct sk_buff *skb;
@@ -875,23 +896,23 @@ transform some example code.
 	};
 
 2) Use dma_unmap_{addr,len}_set() to set these values.
-   Example, before:
+   Example, before::
 
 	ringp->mapping = FOO;
 	ringp->len = BAR;
 
-   after:
+   after::
 
 	dma_unmap_addr_set(ringp, mapping, FOO);
 	dma_unmap_len_set(ringp, len, BAR);
 
 3) Use dma_unmap_{addr,len}() to access these values.
-   Example, before:
+   Example, before::
 
 	dma_unmap_single(dev, ringp->mapping, ringp->len,
 			 DMA_FROM_DEVICE);
 
-   after:
+   after::
 
 	dma_unmap_single(dev,
 			 dma_unmap_addr(ringp, mapping),
@@ -902,7 +923,8 @@ It really should be self-explanatory.  We treat the ADDR and LEN
 separately, because it is possible for an implementation to only
 need the address in order to perform the unmap operation.
 
-			Platform Issues
+Platform Issues
+===============
 
 If you are just writing drivers for Linux and do not maintain
 an architecture port for the kernel, you can safely skip down
@@ -928,12 +950,13 @@ to "Closing".
    alignment constraints (e.g. the alignment constraints about 64-bit
    objects).
 
-			   Closing
+Closing
+=======
 
 This document, and the API itself, would not be in its current
 form without the feedback and suggestions from numerous individuals.
 We would like to specifically mention, in no particular order, the
-following people:
+following people::
 
 	Russell King <rmk@arm.linux.org.uk>
 	Leo Dagum <dagum@barrel.engr.sgi.com>
-- 
GitLab


From 36c682f68e8ec56564172f82f05e42f8c1754667 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:08:35 -0300
Subject: [PATCH 1558/1958] DMA-attributes.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

This file is almost on the standard format. All it needs
to be parsed by Sphinx is to fix its main title and add
a few blank lines.

Yet, it is worth to replace NOTE: by the corresponding
ReST markup.

While here, remove the extra "---" from a subtitle.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DMA-attributes.txt | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 44c6bc496eee6..8f8d97f65d737 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -1,5 +1,6 @@
-			DMA attributes
-			==============
+==============
+DMA attributes
+==============
 
 This document describes the semantics of the DMA attributes that are
 defined in linux/dma-mapping.h.
@@ -108,6 +109,7 @@ This is a hint to the DMA-mapping subsystem that it's probably not worth
 the time to try to allocate memory to in a way that gives better TLB
 efficiency (AKA it's not worth trying to build the mapping out of larger
 pages).  You might want to specify this if:
+
 - You know that the accesses to this memory won't thrash the TLB.
   You might know that the accesses are likely to be sequential or
   that they aren't sequential but it's unlikely you'll ping-pong
@@ -121,11 +123,12 @@ pages).  You might want to specify this if:
   the mapping to have a short lifetime then it may be worth it to
   optimize allocation (avoid coming up with large pages) instead of
   getting the slight performance win of larger pages.
+
 Setting this hint doesn't guarantee that you won't get huge pages, but it
 means that we won't try quite as hard to get them.
 
-NOTE: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
-though ARM64 patches will likely be posted soon.
+.. note:: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
+	  though ARM64 patches will likely be posted soon.
 
 DMA_ATTR_NO_WARN
 ----------------
@@ -142,10 +145,10 @@ problem at all, depending on the implementation of the retry mechanism.
 So, this provides a way for drivers to avoid those error messages on calls
 where allocation failures are not a problem, and shouldn't bother the logs.
 
-NOTE: At the moment DMA_ATTR_NO_WARN is only implemented on PowerPC.
+.. note:: At the moment DMA_ATTR_NO_WARN is only implemented on PowerPC.
 
 DMA_ATTR_PRIVILEGED
-------------------------------
+-------------------
 
 Some advanced peripherals such as remote processors and GPUs perform
 accesses to DMA buffers in both privileged "supervisor" and unprivileged
-- 
GitLab


From 5d75cf6d37e5a88a702e96288dc040663ba7d915 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:16:36 -0300
Subject: [PATCH 1559/1958] DMA-ISA-LPC.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- use proper markups for titles;
- use :Author: for authorship;
- identify the literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DMA-ISA-LPC.txt | 71 ++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
index 7a065ac4a9d1c..8c2b8be6e45b9 100644
--- a/Documentation/DMA-ISA-LPC.txt
+++ b/Documentation/DMA-ISA-LPC.txt
@@ -1,19 +1,20 @@
-                        DMA with ISA and LPC devices
-                        ============================
+============================
+DMA with ISA and LPC devices
+============================
 
-                      Pierre Ossman <drzeus@drzeus.cx>
+:Author: Pierre Ossman <drzeus@drzeus.cx>
 
 This document describes how to do DMA transfers using the old ISA DMA
 controller. Even though ISA is more or less dead today the LPC bus
 uses the same DMA system so it will be around for quite some time.
 
-Part I - Headers and dependencies
----------------------------------
+Headers and dependencies
+------------------------
 
-To do ISA style DMA you need to include two headers:
+To do ISA style DMA you need to include two headers::
 
-#include <linux/dma-mapping.h>
-#include <asm/dma.h>
+	#include <linux/dma-mapping.h>
+	#include <asm/dma.h>
 
 The first is the generic DMA API used to convert virtual addresses to
 bus addresses (see Documentation/DMA-API.txt for details).
@@ -23,8 +24,8 @@ this is not present on all platforms make sure you construct your
 Kconfig to be dependent on ISA_DMA_API (not ISA) so that nobody tries
 to build your driver on unsupported platforms.
 
-Part II - Buffer allocation
----------------------------
+Buffer allocation
+-----------------
 
 The ISA DMA controller has some very strict requirements on which
 memory it can access so extra care must be taken when allocating
@@ -47,8 +48,8 @@ __GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
 (This scarcity also means that you should allocate the buffer as
 early as possible and not release it until the driver is unloaded.)
 
-Part III - Address translation
-------------------------------
+Address translation
+-------------------
 
 To translate the virtual address to a bus address, use the normal DMA
 API. Do _not_ use isa_virt_to_phys() even though it does the same
@@ -61,8 +62,8 @@ Note: x86_64 had a broken DMA API when it came to ISA but has since
 been fixed. If your arch has problems then fix the DMA API instead of
 reverting to the ISA functions.
 
-Part IV - Channels
-------------------
+Channels
+--------
 
 A normal ISA DMA controller has 8 channels. The lower four are for
 8-bit transfers and the upper four are for 16-bit transfers.
@@ -80,8 +81,8 @@ The ability to use 16-bit or 8-bit transfers is _not_ up to you as a
 driver author but depends on what the hardware supports. Check your
 specs or test different channels.
 
-Part V - Transfer data
-----------------------
+Transfer data
+-------------
 
 Now for the good stuff, the actual DMA transfer. :)
 
@@ -112,37 +113,37 @@ Once the DMA transfer is finished (or timed out) you should disable
 the channel again. You should also check get_dma_residue() to make
 sure that all data has been transferred.
 
-Example:
+Example::
 
-int flags, residue;
+	int flags, residue;
 
-flags = claim_dma_lock();
+	flags = claim_dma_lock();
 
-clear_dma_ff();
+	clear_dma_ff();
 
-set_dma_mode(channel, DMA_MODE_WRITE);
-set_dma_addr(channel, phys_addr);
-set_dma_count(channel, num_bytes);
+	set_dma_mode(channel, DMA_MODE_WRITE);
+	set_dma_addr(channel, phys_addr);
+	set_dma_count(channel, num_bytes);
 
-dma_enable(channel);
+	dma_enable(channel);
 
-release_dma_lock(flags);
+	release_dma_lock(flags);
 
-while (!device_done());
+	while (!device_done());
 
-flags = claim_dma_lock();
+	flags = claim_dma_lock();
 
-dma_disable(channel);
+	dma_disable(channel);
 
-residue = dma_get_residue(channel);
-if (residue != 0)
-	printk(KERN_ERR "driver: Incomplete DMA transfer!"
-		" %d bytes left!\n", residue);
+	residue = dma_get_residue(channel);
+	if (residue != 0)
+		printk(KERN_ERR "driver: Incomplete DMA transfer!"
+			" %d bytes left!\n", residue);
 
-release_dma_lock(flags);
+	release_dma_lock(flags);
 
-Part VI - Suspend/resume
-------------------------
+Suspend/resume
+--------------
 
 It is the driver's responsibility to make sure that the machine isn't
 suspended while a DMA transfer is in progress. Also, all DMA settings
-- 
GitLab


From 7c6a2d229dd6dab7cc7563f72b78dd5867d351b4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 10:29:58 -0300
Subject: [PATCH 1560/1958] debugging-via-ohci1394.txt: standardize document
 format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Mark and adjust titles;
- Mark and indent literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/debugging-via-ohci1394.txt | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
index 9ff026d22b751..981ad4f89fd3e 100644
--- a/Documentation/debugging-via-ohci1394.txt
+++ b/Documentation/debugging-via-ohci1394.txt
@@ -1,6 +1,6 @@
-
-  Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
-  ---------------------------------------------------------------------------
+===========================================================================
+Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
+===========================================================================
 
 Introduction
 ------------
@@ -91,10 +91,10 @@ Step-by-step instructions for using firescope with early OHCI initialization:
 1) Verify that your hardware is supported:
 
    Load the firewire-ohci module and check your kernel logs.
-   You should see a line similar to
+   You should see a line similar to::
 
-   firewire_ohci 0000:15:00.1: added OHCI v1.0 device as card 2, 4 IR + 4 IT
-   ... contexts, quirks 0x11
+     firewire_ohci 0000:15:00.1: added OHCI v1.0 device as card 2, 4 IR + 4 IT
+     ... contexts, quirks 0x11
 
    when loading the driver. If you have no supported controller, many PCI,
    CardBus and even some Express cards which are fully compliant to OHCI-1394
@@ -113,9 +113,9 @@ Step-by-step instructions for using firescope with early OHCI initialization:
    stable connection and has matching connectors (there are small 4-pin and
    large 6-pin FireWire ports) will do.
 
-   If an driver is running on both machines you should see a line like
+   If an driver is running on both machines you should see a line like::
 
-   firewire_core 0000:15:00.1: created device fw1: GUID 00061b0020105917, S400
+     firewire_core 0000:15:00.1: created device fw1: GUID 00061b0020105917, S400
 
    on both machines in the kernel log when the cable is plugged in
    and connects the two machines.
@@ -123,7 +123,7 @@ Step-by-step instructions for using firescope with early OHCI initialization:
 3) Test physical DMA using firescope:
 
    On the debug host, make sure that /dev/fw* is accessible,
-   then start firescope:
+   then start firescope::
 
 	$ firescope
 	Port 0 (/dev/fw1) opened, 2 nodes detected
@@ -163,7 +163,7 @@ Step-by-step instructions for using firescope with early OHCI initialization:
    host loaded, reboot the debugged machine, booting the kernel which has
    CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early.
 
-   Then, on the debugging host, run firescope, for example by using -A:
+   Then, on the debugging host, run firescope, for example by using -A::
 
 	firescope -A System.map-of-debug-target-kernel
 
@@ -178,6 +178,7 @@ Step-by-step instructions for using firescope with early OHCI initialization:
 
 Notes
 -----
+
 Documentation and specifications: http://halobates.de/firewire/
 
 FireWire is a trademark of Apple Inc. - for more information please refer to:
-- 
GitLab


From ef16bcc7f72d5084f2da551c1d88374c014f76a0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:19:44 -0300
Subject: [PATCH 1561/1958] efi-stub.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- use proper markups for titles;
- identify literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/efi-stub.txt | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/Documentation/efi-stub.txt b/Documentation/efi-stub.txt
index e157469882614..41df801f9a507 100644
--- a/Documentation/efi-stub.txt
+++ b/Documentation/efi-stub.txt
@@ -1,5 +1,6 @@
-			  The EFI Boot Stub
-		     ---------------------------
+=================
+The EFI Boot Stub
+=================
 
 On the x86 and ARM platforms, a kernel zImage/bzImage can masquerade
 as a PE/COFF image, thereby convincing EFI firmware loaders to load
@@ -25,7 +26,8 @@ a certain sense it *IS* the boot loader.
 The EFI boot stub is enabled with the CONFIG_EFI_STUB kernel option.
 
 
-**** How to install bzImage.efi
+How to install bzImage.efi
+--------------------------
 
 The bzImage located in arch/x86/boot/bzImage must be copied to the EFI
 System Partition (ESP) and renamed with the extension ".efi". Without
@@ -37,14 +39,16 @@ may not need to be renamed. Similarly for arm64, arch/arm64/boot/Image
 should be copied but not necessarily renamed.
 
 
-**** Passing kernel parameters from the EFI shell
+Passing kernel parameters from the EFI shell
+--------------------------------------------
 
-Arguments to the kernel can be passed after bzImage.efi, e.g.
+Arguments to the kernel can be passed after bzImage.efi, e.g.::
 
 	fs0:> bzImage.efi console=ttyS0 root=/dev/sda4
 
 
-**** The "initrd=" option
+The "initrd=" option
+--------------------
 
 Like most boot loaders, the EFI stub allows the user to specify
 multiple initrd files using the "initrd=" option. This is the only EFI
@@ -54,9 +58,9 @@ kernel when it boots.
 The path to the initrd file must be an absolute path from the
 beginning of the ESP, relative path names do not work. Also, the path
 is an EFI-style path and directory elements must be separated with
-backslashes (\). For example, given the following directory layout,
+backslashes (\). For example, given the following directory layout::
 
-fs0:>
+  fs0:>
 	Kernels\
 			bzImage.efi
 			initrd-large.img
@@ -66,7 +70,7 @@ fs0:>
 			initrd-medium.img
 
 to boot with the initrd-large.img file if the current working
-directory is fs0:\Kernels, the following command must be used,
+directory is fs0:\Kernels, the following command must be used::
 
 	fs0:\Kernels> bzImage.efi initrd=\Kernels\initrd-large.img
 
@@ -76,7 +80,8 @@ which understands relative paths, whereas the rest of the command line
 is passed to bzImage.efi.
 
 
-**** The "dtb=" option
+The "dtb=" option
+-----------------
 
 For the ARM and arm64 architectures, we also need to be able to provide a
 device tree to the kernel. This is done with the "dtb=" command line option,
-- 
GitLab


From 72fd15c01554694bd8d69652cc9bb77c2cda193b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:27:55 -0300
Subject: [PATCH 1562/1958] eisa.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- use ReST notation for titles;
- identify literal blocks;
- use :Author: for document authorship;
- use the proper notation for tables;
- adjust whitespaces where needed.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/eisa.txt | 273 ++++++++++++++++++++++-------------------
 1 file changed, 150 insertions(+), 123 deletions(-)

diff --git a/Documentation/eisa.txt b/Documentation/eisa.txt
index a55e4910924ea..2806e5544e435 100644
--- a/Documentation/eisa.txt
+++ b/Documentation/eisa.txt
@@ -1,4 +1,8 @@
-EISA bus support (Marc Zyngier <maz@wild-wind.fr.eu.org>)
+================
+EISA bus support
+================
+
+:Author: Marc Zyngier <maz@wild-wind.fr.eu.org>
 
 This document groups random notes about porting EISA drivers to the
 new EISA/sysfs API.
@@ -14,168 +18,189 @@ detection code is generally also used to probe ISA cards). Moreover,
 most EISA drivers are among the oldest Linux drivers so, as you can
 imagine, some dust has settled here over the years.
 
-The EISA infrastructure is made up of three parts :
+The EISA infrastructure is made up of three parts:
 
     - The bus code implements most of the generic code. It is shared
-    among all the architectures that the EISA code runs on. It
-    implements bus probing (detecting EISA cards available on the bus),
-    allocates I/O resources, allows fancy naming through sysfs, and
-    offers interfaces for driver to register.
+      among all the architectures that the EISA code runs on. It
+      implements bus probing (detecting EISA cards available on the bus),
+      allocates I/O resources, allows fancy naming through sysfs, and
+      offers interfaces for driver to register.
 
     - The bus root driver implements the glue between the bus hardware
-    and the generic bus code. It is responsible for discovering the
-    device implementing the bus, and setting it up to be latter probed
-    by the bus code. This can go from something as simple as reserving
-    an I/O region on x86, to the rather more complex, like the hppa
-    EISA code. This is the part to implement in order to have EISA
-    running on an "new" platform.
+      and the generic bus code. It is responsible for discovering the
+      device implementing the bus, and setting it up to be latter probed
+      by the bus code. This can go from something as simple as reserving
+      an I/O region on x86, to the rather more complex, like the hppa
+      EISA code. This is the part to implement in order to have EISA
+      running on an "new" platform.
 
     - The driver offers the bus a list of devices that it manages, and
-    implements the necessary callbacks to probe and release devices
-    whenever told to.
+      implements the necessary callbacks to probe and release devices
+      whenever told to.
 
 Every function/structure below lives in <linux/eisa.h>, which depends
 heavily on <linux/device.h>.
 
-** Bus root driver :
+Bus root driver
+===============
+
+::
 
-int eisa_root_register (struct eisa_root_device *root);
+	int eisa_root_register (struct eisa_root_device *root);
 
 The eisa_root_register function is used to declare a device as the
 root of an EISA bus. The eisa_root_device structure holds a reference
-to this device, as well as some parameters for probing purposes.
-
-struct eisa_root_device {
-	struct device   *dev;	 /* Pointer to bridge device */
-	struct resource *res;
-	unsigned long    bus_base_addr;
-	int		 slots;  /* Max slot number */
-	int		 force_probe; /* Probe even when no slot 0 */
-	u64		 dma_mask; /* from bridge device */
-	int              bus_nr; /* Set by eisa_root_register */
-	struct resource  eisa_root_res;	/* ditto */
-};
-
-node          : used for eisa_root_register internal purpose
-dev           : pointer to the root device
-res           : root device I/O resource
-bus_base_addr : slot 0 address on this bus
-slots	      : max slot number to probe
-force_probe   : Probe even when slot 0 is empty (no EISA mainboard)
-dma_mask      : Default DMA mask. Usually the bridge device dma_mask.
-bus_nr	      : unique bus id, set by eisa_root_register
-
-** Driver :
-
-int eisa_driver_register (struct eisa_driver *edrv);
-void eisa_driver_unregister (struct eisa_driver *edrv);
+to this device, as well as some parameters for probing purposes::
+
+	struct eisa_root_device {
+		struct device   *dev;	 /* Pointer to bridge device */
+		struct resource *res;
+		unsigned long    bus_base_addr;
+		int		 slots;  /* Max slot number */
+		int		 force_probe; /* Probe even when no slot 0 */
+		u64		 dma_mask; /* from bridge device */
+		int              bus_nr; /* Set by eisa_root_register */
+		struct resource  eisa_root_res;	/* ditto */
+	};
+
+============= ======================================================
+node          used for eisa_root_register internal purpose
+dev           pointer to the root device
+res           root device I/O resource
+bus_base_addr slot 0 address on this bus
+slots	      max slot number to probe
+force_probe   Probe even when slot 0 is empty (no EISA mainboard)
+dma_mask      Default DMA mask. Usually the bridge device dma_mask.
+bus_nr	      unique bus id, set by eisa_root_register
+============= ======================================================
+
+Driver
+======
+
+::
+
+	int eisa_driver_register (struct eisa_driver *edrv);
+	void eisa_driver_unregister (struct eisa_driver *edrv);
 
 Clear enough ?
 
-struct eisa_device_id {
-        char sig[EISA_SIG_LEN];
-	unsigned long driver_data;
-};
-
-struct eisa_driver {
-        const struct eisa_device_id *id_table;
-        struct device_driver         driver;
-};
-
-id_table	: an array of NULL terminated EISA id strings,
-		  followed by an empty string. Each string can
-		  optionally be paired with a driver-dependent value
-		  (driver_data).
-
-driver		: a generic driver, such as described in
-		  Documentation/driver-model/driver.txt. Only .name,
-		  .probe and .remove members are mandatory.
-
-An example is the 3c59x driver :
-
-static struct eisa_device_id vortex_eisa_ids[] = {
-	{ "TCM5920", EISA_3C592_OFFSET },
-	{ "TCM5970", EISA_3C597_OFFSET },
-	{ "" }
-};
-
-static struct eisa_driver vortex_eisa_driver = {
-	.id_table = vortex_eisa_ids,
-	.driver   = {
-		.name    = "3c59x",
-		.probe   = vortex_eisa_probe,
-		.remove  = vortex_eisa_remove
-	}
-};
-
-** Device :
+::
+
+	struct eisa_device_id {
+		char sig[EISA_SIG_LEN];
+		unsigned long driver_data;
+	};
+
+	struct eisa_driver {
+		const struct eisa_device_id *id_table;
+		struct device_driver         driver;
+	};
+
+=============== ====================================================
+id_table	an array of NULL terminated EISA id strings,
+		followed by an empty string. Each string can
+		optionally be paired with a driver-dependent value
+		(driver_data).
+
+driver		a generic driver, such as described in
+		Documentation/driver-model/driver.txt. Only .name,
+		.probe and .remove members are mandatory.
+=============== ====================================================
+
+An example is the 3c59x driver::
+
+	static struct eisa_device_id vortex_eisa_ids[] = {
+		{ "TCM5920", EISA_3C592_OFFSET },
+		{ "TCM5970", EISA_3C597_OFFSET },
+		{ "" }
+	};
+
+	static struct eisa_driver vortex_eisa_driver = {
+		.id_table = vortex_eisa_ids,
+		.driver   = {
+			.name    = "3c59x",
+			.probe   = vortex_eisa_probe,
+			.remove  = vortex_eisa_remove
+		}
+	};
+
+Device
+======
 
 The sysfs framework calls .probe and .remove functions upon device
 discovery and removal (note that the .remove function is only called
 when driver is built as a module).
 
 Both functions are passed a pointer to a 'struct device', which is
-encapsulated in a 'struct eisa_device' described as follows :
-
-struct eisa_device {
-        struct eisa_device_id id;
-        int                   slot;
-	int                   state;
-	unsigned long         base_addr;
-	struct resource       res[EISA_MAX_RESOURCES];
-	u64                   dma_mask;
-        struct device         dev; /* generic device */
-};
-
-id	: EISA id, as read from device. id.driver_data is set from the
-	  matching driver EISA id.
-slot	: slot number which the device was detected on
-state   : set of flags indicating the state of the device. Current
-	  flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED.
-res	: set of four 256 bytes I/O regions allocated to this device
-dma_mask: DMA mask set from the parent device.
-dev	: generic device (see Documentation/driver-model/device.txt)
+encapsulated in a 'struct eisa_device' described as follows::
+
+	struct eisa_device {
+		struct eisa_device_id id;
+		int                   slot;
+		int                   state;
+		unsigned long         base_addr;
+		struct resource       res[EISA_MAX_RESOURCES];
+		u64                   dma_mask;
+		struct device         dev; /* generic device */
+	};
+
+======== ============================================================
+id	 EISA id, as read from device. id.driver_data is set from the
+	 matching driver EISA id.
+slot	 slot number which the device was detected on
+state    set of flags indicating the state of the device. Current
+	 flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED.
+res	 set of four 256 bytes I/O regions allocated to this device
+dma_mask DMA mask set from the parent device.
+dev	 generic device (see Documentation/driver-model/device.txt)
+======== ============================================================
 
 You can get the 'struct eisa_device' from 'struct device' using the
 'to_eisa_device' macro.
 
-** Misc stuff :
+Misc stuff
+==========
+
+::
 
-void eisa_set_drvdata (struct eisa_device *edev, void *data);
+	void eisa_set_drvdata (struct eisa_device *edev, void *data);
 
 Stores data into the device's driver_data area.
 
-void *eisa_get_drvdata (struct eisa_device *edev):
+::
+
+	void *eisa_get_drvdata (struct eisa_device *edev):
 
 Gets the pointer previously stored into the device's driver_data area.
 
-int eisa_get_region_index (void *addr);
+::
+
+	int eisa_get_region_index (void *addr);
 
 Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given
 address.
 
-** Kernel parameters :
+Kernel parameters
+=================
 
-eisa_bus.enable_dev :
+eisa_bus.enable_dev
+	A comma-separated list of slots to be enabled, even if the firmware
+	set the card as disabled. The driver must be able to properly
+	initialize the device in such conditions.
 
-A comma-separated list of slots to be enabled, even if the firmware
-set the card as disabled. The driver must be able to properly
-initialize the device in such conditions.
+eisa_bus.disable_dev
+	A comma-separated list of slots to be enabled, even if the firmware
+	set the card as enabled. The driver won't be called to handle this
+	device.
 
-eisa_bus.disable_dev :
+virtual_root.force_probe
+	Force the probing code to probe EISA slots even when it cannot find an
+	EISA compliant mainboard (nothing appears on slot 0). Defaults to 0
+	(don't force), and set to 1 (force probing) when either
+	CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set.
 
-A comma-separated list of slots to be enabled, even if the firmware
-set the card as enabled. The driver won't be called to handle this
-device.
-
-virtual_root.force_probe :
-
-Force the probing code to probe EISA slots even when it cannot find an
-EISA compliant mainboard (nothing appears on slot 0). Defaults to 0
-(don't force), and set to 1 (force probing) when either
-CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set.
-
-** Random notes :
+Random notes
+============
 
 Converting an EISA driver to the new API mostly involves *deleting*
 code (since probing is now in the core EISA code). Unfortunately, most
@@ -194,9 +219,11 @@ routine.
 For example, switching your favorite EISA SCSI card to the "hotplug"
 model is "the right thing"(tm).
 
-** Thanks :
+Thanks
+======
+
+I'd like to thank the following people for their help:
 
-I'd like to thank the following people for their help :
 - Xavier Benigni for lending me a wonderful Alpha Jensen,
 - James Bottomley, Jeff Garzik for getting this stuff into the kernel,
 - Andries Brouwer for contributing numerous EISA ids,
-- 
GitLab


From af7175bc21e3c2a73f85bc920852a136469c6112 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:32:50 -0300
Subject: [PATCH 1563/1958] flexible-arrays.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- use :Author: and :Updated: markups;
- use proper markup for the document title;
- mark the literal-blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/flexible-arrays.txt | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt
index df904aec99044..a0f2989dd8040 100644
--- a/Documentation/flexible-arrays.txt
+++ b/Documentation/flexible-arrays.txt
@@ -1,6 +1,9 @@
+===================================
 Using flexible arrays in the kernel
-Last updated for 2.6.32
-Jonathan Corbet <corbet@lwn.net>
+===================================
+
+:Updated: Last updated for 2.6.32
+:Author: Jonathan Corbet <corbet@lwn.net>
 
 Large contiguous memory allocations can be unreliable in the Linux kernel.
 Kernel programmers will sometimes respond to this problem by allocating
@@ -26,7 +29,7 @@ operation.  It's also worth noting that flexible arrays do no internal
 locking at all; if concurrent access to an array is possible, then the
 caller must arrange for appropriate mutual exclusion.
 
-The creation of a flexible array is done with:
+The creation of a flexible array is done with::
 
     #include <linux/flex_array.h>
 
@@ -40,14 +43,14 @@ argument is passed directly to the internal memory allocation calls.  With
 the current code, using flags to ask for high memory is likely to lead to
 notably unpleasant side effects.
 
-It is also possible to define flexible arrays at compile time with:
+It is also possible to define flexible arrays at compile time with::
 
     DEFINE_FLEX_ARRAY(name, element_size, total);
 
 This macro will result in a definition of an array with the given name; the
 element size and total will be checked for validity at compile time.
 
-Storing data into a flexible array is accomplished with a call to:
+Storing data into a flexible array is accomplished with a call to::
 
     int flex_array_put(struct flex_array *array, unsigned int element_nr,
     		       void *src, gfp_t flags);
@@ -63,7 +66,7 @@ running in some sort of atomic context; in this situation, sleeping in the
 memory allocator would be a bad thing.  That can be avoided by using
 GFP_ATOMIC for the flags value, but, often, there is a better way.  The
 trick is to ensure that any needed memory allocations are done before
-entering atomic context, using:
+entering atomic context, using::
 
     int flex_array_prealloc(struct flex_array *array, unsigned int start,
 			    unsigned int nr_elements, gfp_t flags);
@@ -73,7 +76,7 @@ defined by start and nr_elements has been allocated.  Thereafter, a
 flex_array_put() call on an element in that range is guaranteed not to
 block.
 
-Getting data back out of the array is done with:
+Getting data back out of the array is done with::
 
     void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
 
@@ -89,7 +92,7 @@ involving that number probably result from use of unstored array entries.
 Note that, if array elements are allocated with __GFP_ZERO, they will be
 initialized to zero and this poisoning will not happen.
 
-Individual elements in the array can be cleared with:
+Individual elements in the array can be cleared with::
 
     int flex_array_clear(struct flex_array *array, unsigned int element_nr);
 
@@ -97,7 +100,7 @@ This function will set the given element to FLEX_ARRAY_FREE and return
 zero.  If storage for the indicated element is not allocated for the array,
 flex_array_clear() will return -EINVAL instead.  Note that clearing an
 element does not release the storage associated with it; to reduce the
-allocated size of an array, call:
+allocated size of an array, call::
 
     int flex_array_shrink(struct flex_array *array);
 
@@ -106,12 +109,12 @@ This function works by scanning the array for pages containing nothing but
 FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work
 if the array's pages are allocated with __GFP_ZERO.
 
-It is possible to remove all elements of an array with a call to:
+It is possible to remove all elements of an array with a call to::
 
     void flex_array_free_parts(struct flex_array *array);
 
 This call frees all elements, but leaves the array itself in place.
-Freeing the entire array is done with:
+Freeing the entire array is done with::
 
     void flex_array_free(struct flex_array *array);
 
-- 
GitLab


From db4df4819c8c3f7e749752cf95966034d4c42ed2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:35:29 -0300
Subject: [PATCH 1564/1958] futex-requeue-pi.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- promote level for the document title;
- mark literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/futex-requeue-pi.txt | 93 +++++++++++++++---------------
 1 file changed, 47 insertions(+), 46 deletions(-)

diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
index 77b36f59d16b4..14ab5787b9a77 100644
--- a/Documentation/futex-requeue-pi.txt
+++ b/Documentation/futex-requeue-pi.txt
@@ -1,5 +1,6 @@
+================
 Futex Requeue PI
-----------------
+================
 
 Requeueing of tasks from a non-PI futex to a PI futex requires
 special handling in order to ensure the underlying rt_mutex is never
@@ -20,28 +21,28 @@ implementation would wake the highest-priority waiter, and leave the
 rest to the natural wakeup inherent in unlocking the mutex
 associated with the condvar.
 
-Consider the simplified glibc calls:
-
-/* caller must lock mutex */
-pthread_cond_wait(cond, mutex)
-{
-	lock(cond->__data.__lock);
-	unlock(mutex);
-	do {
-	   unlock(cond->__data.__lock);
-	   futex_wait(cond->__data.__futex);
-	   lock(cond->__data.__lock);
-	} while(...)
-	unlock(cond->__data.__lock);
-	lock(mutex);
-}
-
-pthread_cond_broadcast(cond)
-{
-	lock(cond->__data.__lock);
-	unlock(cond->__data.__lock);
-	futex_requeue(cond->data.__futex, cond->mutex);
-}
+Consider the simplified glibc calls::
+
+	/* caller must lock mutex */
+	pthread_cond_wait(cond, mutex)
+	{
+		lock(cond->__data.__lock);
+		unlock(mutex);
+		do {
+		unlock(cond->__data.__lock);
+		futex_wait(cond->__data.__futex);
+		lock(cond->__data.__lock);
+		} while(...)
+		unlock(cond->__data.__lock);
+		lock(mutex);
+	}
+
+	pthread_cond_broadcast(cond)
+	{
+		lock(cond->__data.__lock);
+		unlock(cond->__data.__lock);
+		futex_requeue(cond->data.__futex, cond->mutex);
+	}
 
 Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
 has waiters. Note that pthread_cond_wait() attempts to lock the
@@ -53,29 +54,29 @@ In order to support PI-aware pthread_condvar's, the kernel needs to
 be able to requeue tasks to PI futexes.  This support implies that
 upon a successful futex_wait system call, the caller would return to
 user space already holding the PI futex.  The glibc implementation
-would be modified as follows:
-
-
-/* caller must lock mutex */
-pthread_cond_wait_pi(cond, mutex)
-{
-	lock(cond->__data.__lock);
-	unlock(mutex);
-	do {
-	   unlock(cond->__data.__lock);
-	   futex_wait_requeue_pi(cond->__data.__futex);
-	   lock(cond->__data.__lock);
-	} while(...)
-	unlock(cond->__data.__lock);
-        /* the kernel acquired the mutex for us */
-}
-
-pthread_cond_broadcast_pi(cond)
-{
-	lock(cond->__data.__lock);
-	unlock(cond->__data.__lock);
-	futex_requeue_pi(cond->data.__futex, cond->mutex);
-}
+would be modified as follows::
+
+
+	/* caller must lock mutex */
+	pthread_cond_wait_pi(cond, mutex)
+	{
+		lock(cond->__data.__lock);
+		unlock(mutex);
+		do {
+		unlock(cond->__data.__lock);
+		futex_wait_requeue_pi(cond->__data.__futex);
+		lock(cond->__data.__lock);
+		} while(...)
+		unlock(cond->__data.__lock);
+		/* the kernel acquired the mutex for us */
+	}
+
+	pthread_cond_broadcast_pi(cond)
+	{
+		lock(cond->__data.__lock);
+		unlock(cond->__data.__lock);
+		futex_requeue_pi(cond->data.__futex, cond->mutex);
+	}
 
 The actual glibc implementation will likely test for PI and make the
 necessary changes inside the existing calls rather than creating new
-- 
GitLab


From ccd8d5577c17ac276388dc77c34f16b4e9c1b62e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:45:26 -0300
Subject: [PATCH 1565/1958] gcc-plugins.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- promote main title;
- use the right markup for footnotes;
- use bold markup for files name;
- identify literal blocks;
- add blank lines to avoid Sphinx to complain;
- remove numeration from titles.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/gcc-plugins.txt | 58 +++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/Documentation/gcc-plugins.txt b/Documentation/gcc-plugins.txt
index 433eaefb4aa17..8502f24396fbb 100644
--- a/Documentation/gcc-plugins.txt
+++ b/Documentation/gcc-plugins.txt
@@ -1,14 +1,15 @@
+=========================
 GCC plugin infrastructure
 =========================
 
 
-1. Introduction
-===============
+Introduction
+============
 
 GCC plugins are loadable modules that provide extra features to the
-compiler [1]. They are useful for runtime instrumentation and static analysis.
+compiler [1]_. They are useful for runtime instrumentation and static analysis.
 We can analyse, change and add further code during compilation via
-callbacks [2], GIMPLE [3], IPA [4] and RTL passes [5].
+callbacks [2]_, GIMPLE [3]_, IPA [4]_ and RTL passes [5]_.
 
 The GCC plugin infrastructure of the kernel supports all gcc versions from
 4.5 to 6.0, building out-of-tree modules, cross-compilation and building in a
@@ -21,56 +22,61 @@ and versions 4.8+ can only be compiled by a C++ compiler.
 Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and
 powerpc architectures.
 
-This infrastructure was ported from grsecurity [6] and PaX [7].
+This infrastructure was ported from grsecurity [6]_ and PaX [7]_.
 
 --
-[1] https://gcc.gnu.org/onlinedocs/gccint/Plugins.html
-[2] https://gcc.gnu.org/onlinedocs/gccint/Plugin-API.html#Plugin-API
-[3] https://gcc.gnu.org/onlinedocs/gccint/GIMPLE.html
-[4] https://gcc.gnu.org/onlinedocs/gccint/IPA.html
-[5] https://gcc.gnu.org/onlinedocs/gccint/RTL.html
-[6] https://grsecurity.net/
-[7] https://pax.grsecurity.net/
 
+.. [1] https://gcc.gnu.org/onlinedocs/gccint/Plugins.html
+.. [2] https://gcc.gnu.org/onlinedocs/gccint/Plugin-API.html#Plugin-API
+.. [3] https://gcc.gnu.org/onlinedocs/gccint/GIMPLE.html
+.. [4] https://gcc.gnu.org/onlinedocs/gccint/IPA.html
+.. [5] https://gcc.gnu.org/onlinedocs/gccint/RTL.html
+.. [6] https://grsecurity.net/
+.. [7] https://pax.grsecurity.net/
+
+
+Files
+=====
 
-2. Files
-========
+**$(src)/scripts/gcc-plugins**
 
-$(src)/scripts/gcc-plugins
 	This is the directory of the GCC plugins.
 
-$(src)/scripts/gcc-plugins/gcc-common.h
+**$(src)/scripts/gcc-plugins/gcc-common.h**
+
 	This is a compatibility header for GCC plugins.
 	It should be always included instead of individual gcc headers.
 
-$(src)/scripts/gcc-plugin.sh
+**$(src)/scripts/gcc-plugin.sh**
+
 	This script checks the availability of the included headers in
 	gcc-common.h and chooses the proper host compiler to build the plugins
 	(gcc-4.7 can be built by either gcc or g++).
 
-$(src)/scripts/gcc-plugins/gcc-generate-gimple-pass.h
-$(src)/scripts/gcc-plugins/gcc-generate-ipa-pass.h
-$(src)/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h
-$(src)/scripts/gcc-plugins/gcc-generate-rtl-pass.h
+**$(src)/scripts/gcc-plugins/gcc-generate-gimple-pass.h,
+$(src)/scripts/gcc-plugins/gcc-generate-ipa-pass.h,
+$(src)/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h,
+$(src)/scripts/gcc-plugins/gcc-generate-rtl-pass.h**
+
 	These headers automatically generate the registration structures for
 	GIMPLE, SIMPLE_IPA, IPA and RTL passes. They support all gcc versions
 	from 4.5 to 6.0.
 	They should be preferred to creating the structures by hand.
 
 
-3. Usage
-========
+Usage
+=====
 
 You must install the gcc plugin headers for your gcc version,
-e.g., on Ubuntu for gcc-4.9:
+e.g., on Ubuntu for gcc-4.9::
 
 	apt-get install gcc-4.9-plugin-dev
 
-Enable a GCC plugin based feature in the kernel config:
+Enable a GCC plugin based feature in the kernel config::
 
 	CONFIG_GCC_PLUGIN_CYC_COMPLEXITY = y
 
-To compile only the plugin(s):
+To compile only the plugin(s)::
 
 	make gcc-plugins
 
-- 
GitLab


From e45b082d463fea5bb1b85aff00d049cc51f593fa Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:51:39 -0300
Subject: [PATCH 1566/1958] highuid.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- use a markup for the title;
- use :Author: and :Last updated: markups at the beginning with
  authorship info;
- use proper markups for the lists.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/highuid.txt | 47 +++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/Documentation/highuid.txt b/Documentation/highuid.txt
index 6bad6f1d1cac4..6ee70465c0ead 100644
--- a/Documentation/highuid.txt
+++ b/Documentation/highuid.txt
@@ -1,4 +1,9 @@
-Notes on the change from 16-bit UIDs to 32-bit UIDs:
+===================================================
+Notes on the change from 16-bit UIDs to 32-bit UIDs
+===================================================
+
+:Author: Chris Wing <wingc@umich.edu>
+:Last updated: January 11, 2000
 
 - kernel code MUST take into account __kernel_uid_t and __kernel_uid32_t
   when communicating between user and kernel space in an ioctl or data
@@ -28,30 +33,34 @@ What's left to be done for 32-bit UIDs on all Linux architectures:
   uses the 32-bit UID system calls properly otherwise.
 
   This affects at least:
-	iBCS on Intel
 
-	sparc32 emulation on sparc64
-	(need to support whatever new 32-bit UID system calls are added to
-	sparc32)
+	- iBCS on Intel
+
+	- sparc32 emulation on sparc64
+	  (need to support whatever new 32-bit UID system calls are added to
+	  sparc32)
 
 - Validate that all filesystems behave properly.
 
   At present, 32-bit UIDs _should_ work for:
-	ext2
-	ufs
-	isofs
-	nfs
-	coda
-	udf
+
+	- ext2
+	- ufs
+	- isofs
+	- nfs
+	- coda
+	- udf
 
   Ioctl() fixups have been made for:
-	ncpfs
-	smbfs
+
+	- ncpfs
+	- smbfs
 
   Filesystems with simple fixups to prevent 16-bit UID wraparound:
-	minix
-	sysv
-	qnx4
+
+	- minix
+	- sysv
+	- qnx4
 
   Other filesystems have not been checked yet.
 
@@ -69,9 +78,3 @@ What's left to be done for 32-bit UIDs on all Linux architectures:
 - make sure that the UID mapping feature of AX25 networking works properly
   (it should be safe because it's always used a 32-bit integer to
   communicate between user and kernel)
-
-
-Chris Wing
-wingc@umich.edu
-
-last updated: January 11, 2000
-- 
GitLab


From 440e4f6d293c5e599013a04a8e3a8ae375bbaf71 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 13:59:18 -0300
Subject: [PATCH 1567/1958] hw_random.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- use proper markups for titles;
- adjust section identation;
- use proper markup for notes and fix it to properly show the
  numbered list.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/hw_random.txt | 159 ++++++++++++++++++++----------------
 1 file changed, 87 insertions(+), 72 deletions(-)

diff --git a/Documentation/hw_random.txt b/Documentation/hw_random.txt
index fce1634907d0a..121de96e395e2 100644
--- a/Documentation/hw_random.txt
+++ b/Documentation/hw_random.txt
@@ -1,90 +1,105 @@
-Introduction:
-
-	The hw_random framework is software that makes use of a
-	special hardware feature on your CPU or motherboard,
-	a Random Number Generator (RNG).  The software has two parts:
-	a core providing the /dev/hwrng character device and its
-	sysfs support, plus a hardware-specific driver that plugs
-	into that core.
-
-	To make the most effective use of these mechanisms, you
-	should download the support software as well.  Download the
-	latest version of the "rng-tools" package from the
-	hw_random driver's official Web site:
-
-		http://sourceforge.net/projects/gkernel/
-
-	Those tools use /dev/hwrng to fill the kernel entropy pool,
-	which is used internally and exported by the /dev/urandom and
-	/dev/random special files.
-
-Theory of operation:
-
-	CHARACTER DEVICE.  Using the standard open()
-	and read() system calls, you can read random data from
-	the hardware RNG device.  This data is NOT CHECKED by any
-	fitness tests, and could potentially be bogus (if the
-	hardware is faulty or has been tampered with).  Data is only
-	output if the hardware "has-data" flag is set, but nevertheless
-	a security-conscious person would run fitness tests on the
-	data before assuming it is truly random.
-
-	The rng-tools package uses such tests in "rngd", and lets you
-	run them by hand with a "rngtest" utility.
-
-	/dev/hwrng is char device major 10, minor 183.
-
-	CLASS DEVICE.  There is a /sys/class/misc/hw_random node with
-	two unique attributes, "rng_available" and "rng_current".  The
-	"rng_available" attribute lists the hardware-specific drivers
-	available, while "rng_current" lists the one which is currently
-	connected to /dev/hwrng.  If your system has more than one
-	RNG available, you may change the one used by writing a name from
-	the list in "rng_available" into "rng_current".
+==========================================================
+Linux support for random number generator in i8xx chipsets
+==========================================================
+
+Introduction
+============
+
+The hw_random framework is software that makes use of a
+special hardware feature on your CPU or motherboard,
+a Random Number Generator (RNG).  The software has two parts:
+a core providing the /dev/hwrng character device and its
+sysfs support, plus a hardware-specific driver that plugs
+into that core.
+
+To make the most effective use of these mechanisms, you
+should download the support software as well.  Download the
+latest version of the "rng-tools" package from the
+hw_random driver's official Web site:
+
+	http://sourceforge.net/projects/gkernel/
+
+Those tools use /dev/hwrng to fill the kernel entropy pool,
+which is used internally and exported by the /dev/urandom and
+/dev/random special files.
+
+Theory of operation
+===================
+
+CHARACTER DEVICE.  Using the standard open()
+and read() system calls, you can read random data from
+the hardware RNG device.  This data is NOT CHECKED by any
+fitness tests, and could potentially be bogus (if the
+hardware is faulty or has been tampered with).  Data is only
+output if the hardware "has-data" flag is set, but nevertheless
+a security-conscious person would run fitness tests on the
+data before assuming it is truly random.
+
+The rng-tools package uses such tests in "rngd", and lets you
+run them by hand with a "rngtest" utility.
+
+/dev/hwrng is char device major 10, minor 183.
+
+CLASS DEVICE.  There is a /sys/class/misc/hw_random node with
+two unique attributes, "rng_available" and "rng_current".  The
+"rng_available" attribute lists the hardware-specific drivers
+available, while "rng_current" lists the one which is currently
+connected to /dev/hwrng.  If your system has more than one
+RNG available, you may change the one used by writing a name from
+the list in "rng_available" into "rng_current".
 
 ==========================================================================
 
-	Hardware driver for Intel/AMD/VIA Random Number Generators (RNG)
-	Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
-	Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
 
+Hardware driver for Intel/AMD/VIA Random Number Generators (RNG)
+	- Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
+	- Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
 
-About the Intel RNG hardware, from the firmware hub datasheet:
 
-	The Firmware Hub integrates a Random Number Generator (RNG)
-	using thermal noise generated from inherently random quantum
-	mechanical properties of silicon. When not generating new random
-	bits the RNG circuitry will enter a low power state. Intel will
-	provide a binary software driver to give third party software
-	access to our RNG for use as a security feature. At this time,
-	the RNG is only to be used with a system in an OS-present state.
+About the Intel RNG hardware, from the firmware hub datasheet
+=============================================================
 
-Intel RNG Driver notes:
+The Firmware Hub integrates a Random Number Generator (RNG)
+using thermal noise generated from inherently random quantum
+mechanical properties of silicon. When not generating new random
+bits the RNG circuitry will enter a low power state. Intel will
+provide a binary software driver to give third party software
+access to our RNG for use as a security feature. At this time,
+the RNG is only to be used with a system in an OS-present state.
 
-	* FIXME: support poll(2)
+Intel RNG Driver notes
+======================
 
-	NOTE: request_mem_region was removed, for three reasons:
-	1) Only one RNG is supported by this driver, 2) The location
-	used by the RNG is a fixed location in MMIO-addressable memory,
+FIXME: support poll(2)
+
+.. note::
+
+	request_mem_region was removed, for three reasons:
+
+	1) Only one RNG is supported by this driver;
+	2) The location used by the RNG is a fixed location in
+	   MMIO-addressable memory;
 	3) users with properly working BIOS e820 handling will always
-	have the region in which the RNG is located reserved, so
-	request_mem_region calls always fail for proper setups.
-	However, for people who use mem=XX, BIOS e820 information is
-	-not- in /proc/iomem, and request_mem_region(RNG_ADDR) can
-	succeed.
+	   have the region in which the RNG is located reserved, so
+	   request_mem_region calls always fail for proper setups.
+	   However, for people who use mem=XX, BIOS e820 information is
+	   **not** in /proc/iomem, and request_mem_region(RNG_ADDR) can
+	   succeed.
 
-Driver details:
+Driver details
+==============
 
-	Based on:
+Based on:
 	Intel 82802AB/82802AC Firmware Hub (FWH) Datasheet
-		May 1999 Order Number: 290658-002 R
+	May 1999 Order Number: 290658-002 R
 
-	Intel 82802 Firmware Hub: Random Number Generator
+Intel 82802 Firmware Hub:
+	Random Number Generator
 	Programmer's Reference Manual
-		December 1999 Order Number: 298029-001 R
+	December 1999 Order Number: 298029-001 R
 
-	Intel 82802 Firmware HUB Random Number Generator Driver
+Intel 82802 Firmware HUB Random Number Generator Driver
 	Copyright (c) 2000 Matt Sottek <msottek@quiknet.com>
 
-	Special thanks to Matt Sottek.  I did the "guts", he
-	did the "brains" and all the testing.
+Special thanks to Matt Sottek.  I did the "guts", he
+did the "brains" and all the testing.
-- 
GitLab


From e2862b25dcce61e1d19bfb5cf1840415ef3664e6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 14:23:08 -0300
Subject: [PATCH 1568/1958] hwspinlock.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Adjust title markups;
- remove explicit numeration from titles;
- mark literal blocks as such;
- replace _foo_ by **foo** for emphasis;
- adjust whitespaces and add blank lines where needed.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/hwspinlock.txt | 527 ++++++++++++++++++++---------------
 1 file changed, 307 insertions(+), 220 deletions(-)

diff --git a/Documentation/hwspinlock.txt b/Documentation/hwspinlock.txt
index 61c1ee98e59f2..ed640a278185c 100644
--- a/Documentation/hwspinlock.txt
+++ b/Documentation/hwspinlock.txt
@@ -1,6 +1,9 @@
+===========================
 Hardware Spinlock Framework
+===========================
 
-1. Introduction
+Introduction
+============
 
 Hardware spinlock modules provide hardware assistance for synchronization
 and mutual exclusion between heterogeneous processors and those not operating
@@ -32,286 +35,370 @@ structure).
 A common hwspinlock interface makes it possible to have generic, platform-
 independent, drivers.
 
-2. User API
+User API
+========
+
+::
 
   struct hwspinlock *hwspin_lock_request(void);
-   - dynamically assign an hwspinlock and return its address, or NULL
-     in case an unused hwspinlock isn't available. Users of this
-     API will usually want to communicate the lock's id to the remote core
-     before it can be used to achieve synchronization.
-     Should be called from a process context (might sleep).
+
+Dynamically assign an hwspinlock and return its address, or NULL
+in case an unused hwspinlock isn't available. Users of this
+API will usually want to communicate the lock's id to the remote core
+before it can be used to achieve synchronization.
+
+Should be called from a process context (might sleep).
+
+::
 
   struct hwspinlock *hwspin_lock_request_specific(unsigned int id);
-   - assign a specific hwspinlock id and return its address, or NULL
-     if that hwspinlock is already in use. Usually board code will
-     be calling this function in order to reserve specific hwspinlock
-     ids for predefined purposes.
-     Should be called from a process context (might sleep).
+
+Assign a specific hwspinlock id and return its address, or NULL
+if that hwspinlock is already in use. Usually board code will
+be calling this function in order to reserve specific hwspinlock
+ids for predefined purposes.
+
+Should be called from a process context (might sleep).
+
+::
 
   int of_hwspin_lock_get_id(struct device_node *np, int index);
-   - retrieve the global lock id for an OF phandle-based specific lock.
-     This function provides a means for DT users of a hwspinlock module
-     to get the global lock id of a specific hwspinlock, so that it can
-     be requested using the normal hwspin_lock_request_specific() API.
-     The function returns a lock id number on success, -EPROBE_DEFER if
-     the hwspinlock device is not yet registered with the core, or other
-     error values.
-     Should be called from a process context (might sleep).
+
+Retrieve the global lock id for an OF phandle-based specific lock.
+This function provides a means for DT users of a hwspinlock module
+to get the global lock id of a specific hwspinlock, so that it can
+be requested using the normal hwspin_lock_request_specific() API.
+
+The function returns a lock id number on success, -EPROBE_DEFER if
+the hwspinlock device is not yet registered with the core, or other
+error values.
+
+Should be called from a process context (might sleep).
+
+::
 
   int hwspin_lock_free(struct hwspinlock *hwlock);
-   - free a previously-assigned hwspinlock; returns 0 on success, or an
-     appropriate error code on failure (e.g. -EINVAL if the hwspinlock
-     is already free).
-     Should be called from a process context (might sleep).
+
+Free a previously-assigned hwspinlock; returns 0 on success, or an
+appropriate error code on failure (e.g. -EINVAL if the hwspinlock
+is already free).
+
+Should be called from a process context (might sleep).
+
+::
 
   int hwspin_lock_timeout(struct hwspinlock *hwlock, unsigned int timeout);
-   - lock a previously-assigned hwspinlock with a timeout limit (specified in
-     msecs). If the hwspinlock is already taken, the function will busy loop
-     waiting for it to be released, but give up when the timeout elapses.
-     Upon a successful return from this function, preemption is disabled so
-     the caller must not sleep, and is advised to release the hwspinlock as
-     soon as possible, in order to minimize remote cores polling on the
-     hardware interconnect.
-     Returns 0 when successful and an appropriate error code otherwise (most
-     notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
-     The function will never sleep.
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+Upon a successful return from this function, preemption is disabled so
+the caller must not sleep, and is advised to release the hwspinlock as
+soon as possible, in order to minimize remote cores polling on the
+hardware interconnect.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+The function will never sleep.
+
+::
 
   int hwspin_lock_timeout_irq(struct hwspinlock *hwlock, unsigned int timeout);
-   - lock a previously-assigned hwspinlock with a timeout limit (specified in
-     msecs). If the hwspinlock is already taken, the function will busy loop
-     waiting for it to be released, but give up when the timeout elapses.
-     Upon a successful return from this function, preemption and the local
-     interrupts are disabled, so the caller must not sleep, and is advised to
-     release the hwspinlock as soon as possible.
-     Returns 0 when successful and an appropriate error code otherwise (most
-     notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
-     The function will never sleep.
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+Upon a successful return from this function, preemption and the local
+interrupts are disabled, so the caller must not sleep, and is advised to
+release the hwspinlock as soon as possible.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+The function will never sleep.
+
+::
 
   int hwspin_lock_timeout_irqsave(struct hwspinlock *hwlock, unsigned int to,
-							unsigned long *flags);
-   - lock a previously-assigned hwspinlock with a timeout limit (specified in
-     msecs). If the hwspinlock is already taken, the function will busy loop
-     waiting for it to be released, but give up when the timeout elapses.
-     Upon a successful return from this function, preemption is disabled,
-     local interrupts are disabled and their previous state is saved at the
-     given flags placeholder. The caller must not sleep, and is advised to
-     release the hwspinlock as soon as possible.
-     Returns 0 when successful and an appropriate error code otherwise (most
-     notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
-     The function will never sleep.
+				  unsigned long *flags);
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+Upon a successful return from this function, preemption is disabled,
+local interrupts are disabled and their previous state is saved at the
+given flags placeholder. The caller must not sleep, and is advised to
+release the hwspinlock as soon as possible.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+
+The function will never sleep.
+
+::
 
   int hwspin_trylock(struct hwspinlock *hwlock);
-   - attempt to lock a previously-assigned hwspinlock, but immediately fail if
-     it is already taken.
-     Upon a successful return from this function, preemption is disabled so
-     caller must not sleep, and is advised to release the hwspinlock as soon as
-     possible, in order to minimize remote cores polling on the hardware
-     interconnect.
-     Returns 0 on success and an appropriate error code otherwise (most
-     notably -EBUSY if the hwspinlock was already taken).
-     The function will never sleep.
+
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+Upon a successful return from this function, preemption is disabled so
+caller must not sleep, and is advised to release the hwspinlock as soon as
+possible, in order to minimize remote cores polling on the hardware
+interconnect.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+The function will never sleep.
+
+::
 
   int hwspin_trylock_irq(struct hwspinlock *hwlock);
-   - attempt to lock a previously-assigned hwspinlock, but immediately fail if
-     it is already taken.
-     Upon a successful return from this function, preemption and the local
-     interrupts are disabled so caller must not sleep, and is advised to
-     release the hwspinlock as soon as possible.
-     Returns 0 on success and an appropriate error code otherwise (most
-     notably -EBUSY if the hwspinlock was already taken).
-     The function will never sleep.
+
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+Upon a successful return from this function, preemption and the local
+interrupts are disabled so caller must not sleep, and is advised to
+release the hwspinlock as soon as possible.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+
+The function will never sleep.
+
+::
 
   int hwspin_trylock_irqsave(struct hwspinlock *hwlock, unsigned long *flags);
-   - attempt to lock a previously-assigned hwspinlock, but immediately fail if
-     it is already taken.
-     Upon a successful return from this function, preemption is disabled,
-     the local interrupts are disabled and their previous state is saved
-     at the given flags placeholder. The caller must not sleep, and is advised
-     to release the hwspinlock as soon as possible.
-     Returns 0 on success and an appropriate error code otherwise (most
-     notably -EBUSY if the hwspinlock was already taken).
-     The function will never sleep.
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+Upon a successful return from this function, preemption is disabled,
+the local interrupts are disabled and their previous state is saved
+at the given flags placeholder. The caller must not sleep, and is advised
+to release the hwspinlock as soon as possible.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+The function will never sleep.
+
+::
 
   void hwspin_unlock(struct hwspinlock *hwlock);
-   - unlock a previously-locked hwspinlock. Always succeed, and can be called
-     from any context (the function never sleeps). Note: code should _never_
-     unlock an hwspinlock which is already unlocked (there is no protection
-     against this).
+
+Unlock a previously-locked hwspinlock. Always succeed, and can be called
+from any context (the function never sleeps).
+
+.. note::
+
+  code should **never** unlock an hwspinlock which is already unlocked
+  (there is no protection against this).
+
+::
 
   void hwspin_unlock_irq(struct hwspinlock *hwlock);
-   - unlock a previously-locked hwspinlock and enable local interrupts.
-     The caller should _never_ unlock an hwspinlock which is already unlocked.
-     Doing so is considered a bug (there is no protection against this).
-     Upon a successful return from this function, preemption and local
-     interrupts are enabled. This function will never sleep.
+
+Unlock a previously-locked hwspinlock and enable local interrupts.
+The caller should **never** unlock an hwspinlock which is already unlocked.
+
+Doing so is considered a bug (there is no protection against this).
+Upon a successful return from this function, preemption and local
+interrupts are enabled. This function will never sleep.
+
+::
 
   void
   hwspin_unlock_irqrestore(struct hwspinlock *hwlock, unsigned long *flags);
-   - unlock a previously-locked hwspinlock.
-     The caller should _never_ unlock an hwspinlock which is already unlocked.
-     Doing so is considered a bug (there is no protection against this).
-     Upon a successful return from this function, preemption is reenabled,
-     and the state of the local interrupts is restored to the state saved at
-     the given flags. This function will never sleep.
+
+Unlock a previously-locked hwspinlock.
+
+The caller should **never** unlock an hwspinlock which is already unlocked.
+Doing so is considered a bug (there is no protection against this).
+Upon a successful return from this function, preemption is reenabled,
+and the state of the local interrupts is restored to the state saved at
+the given flags. This function will never sleep.
+
+::
 
   int hwspin_lock_get_id(struct hwspinlock *hwlock);
-   - retrieve id number of a given hwspinlock. This is needed when an
-     hwspinlock is dynamically assigned: before it can be used to achieve
-     mutual exclusion with a remote cpu, the id number should be communicated
-     to the remote task with which we want to synchronize.
-     Returns the hwspinlock id number, or -EINVAL if hwlock is null.
-
-3. Typical usage
-
-#include <linux/hwspinlock.h>
-#include <linux/err.h>
-
-int hwspinlock_example1(void)
-{
-	struct hwspinlock *hwlock;
-	int ret;
-
-	/* dynamically assign a hwspinlock */
-	hwlock = hwspin_lock_request();
-	if (!hwlock)
-		...
-
-	id = hwspin_lock_get_id(hwlock);
-	/* probably need to communicate id to a remote processor now */
-
-	/* take the lock, spin for 1 sec if it's already taken */
-	ret = hwspin_lock_timeout(hwlock, 1000);
-	if (ret)
-		...
-
-	/*
-	 * we took the lock, do our thing now, but do NOT sleep
-	 */
-
-	/* release the lock */
-	hwspin_unlock(hwlock);
-
-	/* free the lock */
-	ret = hwspin_lock_free(hwlock);
-	if (ret)
-		...
-
-	return ret;
-}
-
-int hwspinlock_example2(void)
-{
-	struct hwspinlock *hwlock;
-	int ret;
-
-	/*
-	 * assign a specific hwspinlock id - this should be called early
-	 * by board init code.
-	 */
-	hwlock = hwspin_lock_request_specific(PREDEFINED_LOCK_ID);
-	if (!hwlock)
-		...
-
-	/* try to take it, but don't spin on it */
-	ret = hwspin_trylock(hwlock);
-	if (!ret) {
-		pr_info("lock is already taken\n");
-		return -EBUSY;
-	}
 
-	/*
-	 * we took the lock, do our thing now, but do NOT sleep
-	 */
+Retrieve id number of a given hwspinlock. This is needed when an
+hwspinlock is dynamically assigned: before it can be used to achieve
+mutual exclusion with a remote cpu, the id number should be communicated
+to the remote task with which we want to synchronize.
+
+Returns the hwspinlock id number, or -EINVAL if hwlock is null.
+
+Typical usage
+=============
 
-	/* release the lock */
-	hwspin_unlock(hwlock);
+::
 
-	/* free the lock */
-	ret = hwspin_lock_free(hwlock);
-	if (ret)
-		...
+	#include <linux/hwspinlock.h>
+	#include <linux/err.h>
 
-	return ret;
-}
+	int hwspinlock_example1(void)
+	{
+		struct hwspinlock *hwlock;
+		int ret;
 
+		/* dynamically assign a hwspinlock */
+		hwlock = hwspin_lock_request();
+		if (!hwlock)
+			...
 
-4. API for implementors
+		id = hwspin_lock_get_id(hwlock);
+		/* probably need to communicate id to a remote processor now */
+
+		/* take the lock, spin for 1 sec if it's already taken */
+		ret = hwspin_lock_timeout(hwlock, 1000);
+		if (ret)
+			...
+
+		/*
+		* we took the lock, do our thing now, but do NOT sleep
+		*/
+
+		/* release the lock */
+		hwspin_unlock(hwlock);
+
+		/* free the lock */
+		ret = hwspin_lock_free(hwlock);
+		if (ret)
+			...
+
+		return ret;
+	}
+
+	int hwspinlock_example2(void)
+	{
+		struct hwspinlock *hwlock;
+		int ret;
+
+		/*
+		* assign a specific hwspinlock id - this should be called early
+		* by board init code.
+		*/
+		hwlock = hwspin_lock_request_specific(PREDEFINED_LOCK_ID);
+		if (!hwlock)
+			...
+
+		/* try to take it, but don't spin on it */
+		ret = hwspin_trylock(hwlock);
+		if (!ret) {
+			pr_info("lock is already taken\n");
+			return -EBUSY;
+		}
+
+		/*
+		* we took the lock, do our thing now, but do NOT sleep
+		*/
+
+		/* release the lock */
+		hwspin_unlock(hwlock);
+
+		/* free the lock */
+		ret = hwspin_lock_free(hwlock);
+		if (ret)
+			...
+
+		return ret;
+	}
+
+
+API for implementors
+====================
+
+::
 
   int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
 		const struct hwspinlock_ops *ops, int base_id, int num_locks);
-   - to be called from the underlying platform-specific implementation, in
-     order to register a new hwspinlock device (which is usually a bank of
-     numerous locks). Should be called from a process context (this function
-     might sleep).
-     Returns 0 on success, or appropriate error code on failure.
+
+To be called from the underlying platform-specific implementation, in
+order to register a new hwspinlock device (which is usually a bank of
+numerous locks). Should be called from a process context (this function
+might sleep).
+
+Returns 0 on success, or appropriate error code on failure.
+
+::
 
   int hwspin_lock_unregister(struct hwspinlock_device *bank);
-   - to be called from the underlying vendor-specific implementation, in order
-     to unregister an hwspinlock device (which is usually a bank of numerous
-     locks).
-     Should be called from a process context (this function might sleep).
-     Returns the address of hwspinlock on success, or NULL on error (e.g.
-     if the hwspinlock is still in use).
 
-5. Important structs
+To be called from the underlying vendor-specific implementation, in order
+to unregister an hwspinlock device (which is usually a bank of numerous
+locks).
+
+Should be called from a process context (this function might sleep).
+
+Returns the address of hwspinlock on success, or NULL on error (e.g.
+if the hwspinlock is still in use).
+
+Important structs
+=================
 
 struct hwspinlock_device is a device which usually contains a bank
 of hardware locks. It is registered by the underlying hwspinlock
 implementation using the hwspin_lock_register() API.
 
-/**
- * struct hwspinlock_device - a device which usually spans numerous hwspinlocks
- * @dev: underlying device, will be used to invoke runtime PM api
- * @ops: platform-specific hwspinlock handlers
- * @base_id: id index of the first lock in this device
- * @num_locks: number of locks in this device
- * @lock: dynamically allocated array of 'struct hwspinlock'
- */
-struct hwspinlock_device {
-	struct device *dev;
-	const struct hwspinlock_ops *ops;
-	int base_id;
-	int num_locks;
-	struct hwspinlock lock[0];
-};
+::
+
+	/**
+	* struct hwspinlock_device - a device which usually spans numerous hwspinlocks
+	* @dev: underlying device, will be used to invoke runtime PM api
+	* @ops: platform-specific hwspinlock handlers
+	* @base_id: id index of the first lock in this device
+	* @num_locks: number of locks in this device
+	* @lock: dynamically allocated array of 'struct hwspinlock'
+	*/
+	struct hwspinlock_device {
+		struct device *dev;
+		const struct hwspinlock_ops *ops;
+		int base_id;
+		int num_locks;
+		struct hwspinlock lock[0];
+	};
 
 struct hwspinlock_device contains an array of hwspinlock structs, each
-of which represents a single hardware lock:
-
-/**
- * struct hwspinlock - this struct represents a single hwspinlock instance
- * @bank: the hwspinlock_device structure which owns this lock
- * @lock: initialized and used by hwspinlock core
- * @priv: private data, owned by the underlying platform-specific hwspinlock drv
- */
-struct hwspinlock {
-	struct hwspinlock_device *bank;
-	spinlock_t lock;
-	void *priv;
-};
+of which represents a single hardware lock::
+
+	/**
+	* struct hwspinlock - this struct represents a single hwspinlock instance
+	* @bank: the hwspinlock_device structure which owns this lock
+	* @lock: initialized and used by hwspinlock core
+	* @priv: private data, owned by the underlying platform-specific hwspinlock drv
+	*/
+	struct hwspinlock {
+		struct hwspinlock_device *bank;
+		spinlock_t lock;
+		void *priv;
+	};
 
 When registering a bank of locks, the hwspinlock driver only needs to
 set the priv members of the locks. The rest of the members are set and
 initialized by the hwspinlock core itself.
 
-6. Implementation callbacks
+Implementation callbacks
+========================
 
-There are three possible callbacks defined in 'struct hwspinlock_ops':
+There are three possible callbacks defined in 'struct hwspinlock_ops'::
 
-struct hwspinlock_ops {
-	int (*trylock)(struct hwspinlock *lock);
-	void (*unlock)(struct hwspinlock *lock);
-	void (*relax)(struct hwspinlock *lock);
-};
+	struct hwspinlock_ops {
+		int (*trylock)(struct hwspinlock *lock);
+		void (*unlock)(struct hwspinlock *lock);
+		void (*relax)(struct hwspinlock *lock);
+	};
 
 The first two callbacks are mandatory:
 
 The ->trylock() callback should make a single attempt to take the lock, and
-return 0 on failure and 1 on success. This callback may _not_ sleep.
+return 0 on failure and 1 on success. This callback may **not** sleep.
 
 The ->unlock() callback releases the lock. It always succeed, and it, too,
-may _not_ sleep.
+may **not** sleep.
 
 The ->relax() callback is optional. It is called by hwspinlock core while
 spinning on a lock, and can be used by the underlying implementation to force
-a delay between two successive invocations of ->trylock(). It may _not_ sleep.
+a delay between two successive invocations of ->trylock(). It may **not** sleep.
-- 
GitLab


From 7e18c07e79546cbaa114e56b409e4f860a9d49cc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 14:08:23 -0300
Subject: [PATCH 1569/1958] intel_txt.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- promote main title one level;
- fix the bulleted list markup;
- use bulletted list markup where needed;
- add whitespaces where needed;
- mark literal blocks;
- remove extra ":" after section titles.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/intel_txt.txt | 63 +++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 23 deletions(-)

diff --git a/Documentation/intel_txt.txt b/Documentation/intel_txt.txt
index 91d89c5407098..d83c1a2122c9c 100644
--- a/Documentation/intel_txt.txt
+++ b/Documentation/intel_txt.txt
@@ -1,4 +1,5 @@
-Intel(R) TXT Overview:
+=====================
+Intel(R) TXT Overview
 =====================
 
 Intel's technology for safer computing, Intel(R) Trusted Execution
@@ -8,9 +9,10 @@ provide the building blocks for creating trusted platforms.
 Intel TXT was formerly known by the code name LaGrande Technology (LT).
 
 Intel TXT in Brief:
-o  Provides dynamic root of trust for measurement (DRTM)
-o  Data protection in case of improper shutdown
-o  Measurement and verification of launched environment
+
+-  Provides dynamic root of trust for measurement (DRTM)
+-  Data protection in case of improper shutdown
+-  Measurement and verification of launched environment
 
 Intel TXT is part of the vPro(TM) brand and is also available some
 non-vPro systems.  It is currently available on desktop systems
@@ -24,16 +26,21 @@ which has been updated for the new released platforms.
 
 Intel TXT has been presented at various events over the past few
 years, some of which are:
-      LinuxTAG 2008:
+
+      - LinuxTAG 2008:
           http://www.linuxtag.org/2008/en/conf/events/vp-donnerstag.html
-      TRUST2008:
+
+      - TRUST2008:
           http://www.trust-conference.eu/downloads/Keynote-Speakers/
           3_David-Grawrock_The-Front-Door-of-Trusted-Computing.pdf
-      IDF, Shanghai:
+
+      - IDF, Shanghai:
           http://www.prcidf.com.cn/index_en.html
-      IDFs 2006, 2007 (I'm not sure if/where they are online)
 
-Trusted Boot Project Overview:
+      - IDFs 2006, 2007
+	  (I'm not sure if/where they are online)
+
+Trusted Boot Project Overview
 =============================
 
 Trusted Boot (tboot) is an open source, pre-kernel/VMM module that
@@ -87,11 +94,12 @@ Intel-provided firmware).
 How Does it Work?
 =================
 
-o  Tboot is an executable that is launched by the bootloader as
+-  Tboot is an executable that is launched by the bootloader as
    the "kernel" (the binary the bootloader executes).
-o  It performs all of the work necessary to determine if the
+-  It performs all of the work necessary to determine if the
    platform supports Intel TXT and, if so, executes the GETSEC[SENTER]
    processor instruction that initiates the dynamic root of trust.
+
    -  If tboot determines that the system does not support Intel TXT
       or is not configured correctly (e.g. the SINIT AC Module was
       incorrect), it will directly launch the kernel with no changes
@@ -99,12 +107,14 @@ o  It performs all of the work necessary to determine if the
    -  Tboot will output various information about its progress to the
       terminal, serial port, and/or an in-memory log; the output
       locations can be configured with a command line switch.
-o  The GETSEC[SENTER] instruction will return control to tboot and
+
+-  The GETSEC[SENTER] instruction will return control to tboot and
    tboot then verifies certain aspects of the environment (e.g. TPM NV
    lock, e820 table does not have invalid entries, etc.).
-o  It will wake the APs from the special sleep state the GETSEC[SENTER]
+-  It will wake the APs from the special sleep state the GETSEC[SENTER]
    instruction had put them in and place them into a wait-for-SIPI
    state.
+
    -  Because the processors will not respond to an INIT or SIPI when
       in the TXT environment, it is necessary to create a small VT-x
       guest for the APs.  When they run in this guest, they will
@@ -112,8 +122,10 @@ o  It will wake the APs from the special sleep state the GETSEC[SENTER]
       VMEXITs, and then disable VT and jump to the SIPI vector.  This
       approach seemed like a better choice than having to insert
       special code into the kernel's MP wakeup sequence.
-o  Tboot then applies an (optional) user-defined launch policy to
+
+-  Tboot then applies an (optional) user-defined launch policy to
    verify the kernel and initrd.
+
    -  This policy is rooted in TPM NV and is described in the tboot
       project.  The tboot project also contains code for tools to
       create and provision the policy.
@@ -121,30 +133,34 @@ o  Tboot then applies an (optional) user-defined launch policy to
       then any kernel will be launched.
    -  Policy action is flexible and can include halting on failures
       or simply logging them and continuing.
-o  Tboot adjusts the e820 table provided by the bootloader to reserve
+
+-  Tboot adjusts the e820 table provided by the bootloader to reserve
    its own location in memory as well as to reserve certain other
    TXT-related regions.
-o  As part of its launch, tboot DMA protects all of RAM (using the
+-  As part of its launch, tboot DMA protects all of RAM (using the
    VT-d PMRs).  Thus, the kernel must be booted with 'intel_iommu=on'
    in order to remove this blanket protection and use VT-d's
    page-level protection.
-o  Tboot will populate a shared page with some data about itself and
+-  Tboot will populate a shared page with some data about itself and
    pass this to the Linux kernel as it transfers control.
+
    -  The location of the shared page is passed via the boot_params
       struct as a physical address.
-o  The kernel will look for the tboot shared page address and, if it
+
+-  The kernel will look for the tboot shared page address and, if it
    exists, map it.
-o  As one of the checks/protections provided by TXT, it makes a copy
+-  As one of the checks/protections provided by TXT, it makes a copy
    of the VT-d DMARs in a DMA-protected region of memory and verifies
    them for correctness.  The VT-d code will detect if the kernel was
    launched with tboot and use this copy instead of the one in the
    ACPI table.
-o  At this point, tboot and TXT are out of the picture until a
+-  At this point, tboot and TXT are out of the picture until a
    shutdown (S<n>)
-o  In order to put a system into any of the sleep states after a TXT
+-  In order to put a system into any of the sleep states after a TXT
    launch, TXT must first be exited.  This is to prevent attacks that
    attempt to crash the system to gain control on reboot and steal
    data left in memory.
+
    -  The kernel will perform all of its sleep preparation and
       populate the shared page with the ACPI data needed to put the
       platform in the desired sleep state.
@@ -172,7 +188,7 @@ o  In order to put a system into any of the sleep states after a TXT
 That's pretty much it for TXT support.
 
 
-Configuring the System:
+Configuring the System
 ======================
 
 This code works with 32bit, 32bit PAE, and 64bit (x86_64) kernels.
@@ -181,7 +197,8 @@ In BIOS, the user must enable:  TPM, TXT, VT-x, VT-d.  Not all BIOSes
 allow these to be individually enabled/disabled and the screens in
 which to find them are BIOS-specific.
 
-grub.conf needs to be modified as follows:
+grub.conf needs to be modified as follows::
+
         title Linux 2.6.29-tip w/ tboot
           root (hd0,0)
                 kernel /tboot.gz logging=serial,vga,memory
-- 
GitLab


From 45d85146269f711b8fbfdda017a033676caf29ab Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 14:13:14 -0300
Subject: [PATCH 1570/1958] Intel-IOMMU.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

This file is almost in the right format. It only needed to
convert a list to bulleted list and to use the right markup
for literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Intel-IOMMU.txt | 37 +++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
index 49585b6e1ea24..9dae6b47e398b 100644
--- a/Documentation/Intel-IOMMU.txt
+++ b/Documentation/Intel-IOMMU.txt
@@ -1,3 +1,4 @@
+===================
 Linux IOMMU Support
 ===================
 
@@ -9,11 +10,11 @@ This guide gives a quick cheat sheet for some basic understanding.
 
 Some Keywords
 
-DMAR - DMA remapping
-DRHD - DMA Remapping Hardware Unit Definition
-RMRR - Reserved memory Region Reporting Structure
-ZLR  - Zero length reads from PCI devices
-IOVA - IO Virtual address.
+- DMAR - DMA remapping
+- DRHD - DMA Remapping Hardware Unit Definition
+- RMRR - Reserved memory Region Reporting Structure
+- ZLR  - Zero length reads from PCI devices
+- IOVA - IO Virtual address.
 
 Basic stuff
 -----------
@@ -33,7 +34,7 @@ devices that need to access these regions. OS is expected to setup
 unity mappings for these regions for these devices to access these regions.
 
 How is IOVA generated?
----------------------
+----------------------
 
 Well behaved drivers call pci_map_*() calls before sending command to device
 that needs to perform DMA. Once DMA is completed and mapping is no longer
@@ -82,14 +83,14 @@ in ACPI.
 ACPI: DMAR (v001 A M I  OEMDMAR  0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0
 
 When DMAR is being processed and initialized by ACPI, prints DMAR locations
-and any RMRR's processed.
+and any RMRR's processed::
 
-ACPI DMAR:Host address width 36
-ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
-ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
-ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
-ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
-ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
+	ACPI DMAR:Host address width 36
+	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
+	ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
+	ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
+	ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
+	ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
 
 When DMAR is enabled for use, you will notice..
 
@@ -98,10 +99,12 @@ PCI-DMA: Using DMAR IOMMU
 Fault reporting
 ---------------
 
-DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
-DMAR:[fault reason 05] PTE Write access is not set
-DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
-DMAR:[fault reason 05] PTE Write access is not set
+::
+
+	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+	DMAR:[fault reason 05] PTE Write access is not set
+	DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+	DMAR:[fault reason 05] PTE Write access is not set
 
 TBD
 ----
-- 
GitLab


From 9cf5116d5b10793b5105f962fa3d899b2d6cb5f6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 14:32:35 -0300
Subject: [PATCH 1571/1958] io-mapping.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add a title for the document and for API chapter;
- mark literal blocks;
- Adjust whitespacing.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/io-mapping.txt | 67 ++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/Documentation/io-mapping.txt b/Documentation/io-mapping.txt
index 5ca78426f54c5..a966239f04e48 100644
--- a/Documentation/io-mapping.txt
+++ b/Documentation/io-mapping.txt
@@ -1,66 +1,81 @@
+========================
+The io_mapping functions
+========================
+
+API
+===
+
 The io_mapping functions in linux/io-mapping.h provide an abstraction for
 efficiently mapping small regions of an I/O device to the CPU. The initial
 usage is to support the large graphics aperture on 32-bit processors where
 ioremap_wc cannot be used to statically map the entire aperture to the CPU
 as it would consume too much of the kernel address space.
 
-A mapping object is created during driver initialization using
+A mapping object is created during driver initialization using::
 
 	struct io_mapping *io_mapping_create_wc(unsigned long base,
 						unsigned long size)
 
-		'base' is the bus address of the region to be made
-		mappable, while 'size' indicates how large a mapping region to
-		enable. Both are in bytes.
+'base' is the bus address of the region to be made
+mappable, while 'size' indicates how large a mapping region to
+enable. Both are in bytes.
 
-		This _wc variant provides a mapping which may only be used
-		with the io_mapping_map_atomic_wc or io_mapping_map_wc.
+This _wc variant provides a mapping which may only be used
+with the io_mapping_map_atomic_wc or io_mapping_map_wc.
 
 With this mapping object, individual pages can be mapped either atomically
 or not, depending on the necessary scheduling environment. Of course, atomic
-maps are more efficient:
+maps are more efficient::
 
 	void *io_mapping_map_atomic_wc(struct io_mapping *mapping,
 				       unsigned long offset)
 
-		'offset' is the offset within the defined mapping region.
-		Accessing addresses beyond the region specified in the
-		creation function yields undefined results. Using an offset
-		which is not page aligned yields an undefined result. The
-		return value points to a single page in CPU address space.
+'offset' is the offset within the defined mapping region.
+Accessing addresses beyond the region specified in the
+creation function yields undefined results. Using an offset
+which is not page aligned yields an undefined result. The
+return value points to a single page in CPU address space.
+
+This _wc variant returns a write-combining map to the
+page and may only be used with mappings created by
+io_mapping_create_wc
 
-		This _wc variant returns a write-combining map to the
-		page and may only be used with mappings created by
-		io_mapping_create_wc
+Note that the task may not sleep while holding this page
+mapped.
 
-		Note that the task may not sleep while holding this page
-		mapped.
+::
 
 	void io_mapping_unmap_atomic(void *vaddr)
 
-		'vaddr' must be the value returned by the last
-		io_mapping_map_atomic_wc call. This unmaps the specified
-		page and allows the task to sleep once again.
+'vaddr' must be the value returned by the last
+io_mapping_map_atomic_wc call. This unmaps the specified
+page and allows the task to sleep once again.
 
 If you need to sleep while holding the lock, you can use the non-atomic
 variant, although they may be significantly slower.
 
+::
+
 	void *io_mapping_map_wc(struct io_mapping *mapping,
 				unsigned long offset)
 
-		This works like io_mapping_map_atomic_wc except it allows
-		the task to sleep while holding the page mapped.
+This works like io_mapping_map_atomic_wc except it allows
+the task to sleep while holding the page mapped.
+
+
+::
 
 	void io_mapping_unmap(void *vaddr)
 
-		This works like io_mapping_unmap_atomic, except it is used
-		for pages mapped with io_mapping_map_wc.
+This works like io_mapping_unmap_atomic, except it is used
+for pages mapped with io_mapping_map_wc.
 
-At driver close time, the io_mapping object must be freed:
+At driver close time, the io_mapping object must be freed::
 
 	void io_mapping_free(struct io_mapping *mapping)
 
-Current Implementation:
+Current Implementation
+======================
 
 The initial implementation of these functions uses existing mapping
 mechanisms and so provides only an abstraction layer and no new
-- 
GitLab


From 0e95c85341b7b5be34f999b6023e3df4d03f4977 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 14:45:35 -0300
Subject: [PATCH 1572/1958] io_ordering.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add a title;
- mark literal-blocks as such.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/io_ordering.txt | 62 +++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/Documentation/io_ordering.txt b/Documentation/io_ordering.txt
index 9faae6f26d322..2ab303ce9a0d0 100644
--- a/Documentation/io_ordering.txt
+++ b/Documentation/io_ordering.txt
@@ -1,3 +1,7 @@
+==============================================
+Ordering I/O writes to memory-mapped addresses
+==============================================
+
 On some platforms, so-called memory-mapped I/O is weakly ordered.  On such
 platforms, driver writers are responsible for ensuring that I/O writes to
 memory-mapped addresses on their device arrive in the order intended.  This is
@@ -8,39 +12,39 @@ critical section of code protected by spinlocks.  This would ensure that
 subsequent writes to I/O space arrived only after all prior writes (much like a
 memory barrier op, mb(), only with respect to I/O).
 
-A more concrete example from a hypothetical device driver:
+A more concrete example from a hypothetical device driver::
 
-        ...
-CPU A:  spin_lock_irqsave(&dev_lock, flags)
-CPU A:  val = readl(my_status);
-CPU A:  ...
-CPU A:  writel(newval, ring_ptr);
-CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
-        ...
-CPU B:  spin_lock_irqsave(&dev_lock, flags)
-CPU B:  val = readl(my_status);
-CPU B:  ...
-CPU B:  writel(newval2, ring_ptr);
-CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
-        ...
+		...
+	CPU A:  spin_lock_irqsave(&dev_lock, flags)
+	CPU A:  val = readl(my_status);
+	CPU A:  ...
+	CPU A:  writel(newval, ring_ptr);
+	CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
+		...
+	CPU B:  spin_lock_irqsave(&dev_lock, flags)
+	CPU B:  val = readl(my_status);
+	CPU B:  ...
+	CPU B:  writel(newval2, ring_ptr);
+	CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
+		...
 
 In the case above, the device may receive newval2 before it receives newval,
-which could cause problems.  Fixing it is easy enough though:
+which could cause problems.  Fixing it is easy enough though::
 
-        ...
-CPU A:  spin_lock_irqsave(&dev_lock, flags)
-CPU A:  val = readl(my_status);
-CPU A:  ...
-CPU A:  writel(newval, ring_ptr);
-CPU A:  (void)readl(safe_register); /* maybe a config register? */
-CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
-        ...
-CPU B:  spin_lock_irqsave(&dev_lock, flags)
-CPU B:  val = readl(my_status);
-CPU B:  ...
-CPU B:  writel(newval2, ring_ptr);
-CPU B:  (void)readl(safe_register); /* maybe a config register? */
-CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
+		...
+	CPU A:  spin_lock_irqsave(&dev_lock, flags)
+	CPU A:  val = readl(my_status);
+	CPU A:  ...
+	CPU A:  writel(newval, ring_ptr);
+	CPU A:  (void)readl(safe_register); /* maybe a config register? */
+	CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
+		...
+	CPU B:  spin_lock_irqsave(&dev_lock, flags)
+	CPU B:  val = readl(my_status);
+	CPU B:  ...
+	CPU B:  writel(newval2, ring_ptr);
+	CPU B:  (void)readl(safe_register); /* maybe a config register? */
+	CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
 
 Here, the reads from safe_register will cause the I/O chipset to flush any
 pending writes before actually posting the read to the chipset, preventing
-- 
GitLab


From 378012cf68f457a38a80d1efb938090ccc484e19 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 14:52:53 -0300
Subject: [PATCH 1573/1958] iostats.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- promote main title level;
- mark literal blocks as such;
- Add blank lines to avoid Sphinx errors.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/iostats.txt | 40 ++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
index 65f694f2d1c94..456a5b5e0c53d 100644
--- a/Documentation/iostats.txt
+++ b/Documentation/iostats.txt
@@ -1,5 +1,6 @@
+=====================
 I/O statistics fields
----------------
+=====================
 
 Since 2.4.20 (and some versions before, with patches), and 2.5.45,
 more extensive disk statistics have been introduced to help measure disk
@@ -16,20 +17,20 @@ is mounted on /sys, although of course it may be mounted anywhere.
 Both /proc/diskstats and sysfs use the same source for the information
 and so should not differ.
 
-Here are examples of these different formats:
+Here are examples of these different formats::
 
-2.4:
-   3     0   39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
-   3     1    9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030
+   2.4:
+      3     0   39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+      3     1    9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030
 
 
-2.6 sysfs:
-   446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
-   35486    38030    38030    38030
+   2.6 sysfs:
+      446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+      35486    38030    38030    38030
 
-2.6 diskstats:
-   3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
-   3    1   hda1 35486 38030 38030 38030
+   2.6 diskstats:
+      3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+      3    1   hda1 35486 38030 38030 38030
 
 On 2.4 you might execute "grep 'hda ' /proc/partitions". On 2.6, you have
 a choice of "cat /sys/block/hda/stat" or "grep 'hda ' /proc/diskstats".
@@ -59,30 +60,40 @@ system-wide stats you'll have to find all the devices and sum them all up.
 
 Field  1 -- # of reads completed
     This is the total number of reads completed successfully.
+
 Field  2 -- # of reads merged, field 6 -- # of writes merged
     Reads and writes which are adjacent to each other may be merged for
     efficiency.  Thus two 4K reads may become one 8K read before it is
     ultimately handed to the disk, and so it will be counted (and queued)
     as only one I/O.  This field lets you know how often this was done.
+
 Field  3 -- # of sectors read
     This is the total number of sectors read successfully.
+
 Field  4 -- # of milliseconds spent reading
     This is the total number of milliseconds spent by all reads (as
     measured from __make_request() to end_that_request_last()).
+
 Field  5 -- # of writes completed
     This is the total number of writes completed successfully.
+
 Field  6 -- # of writes merged
     See the description of field 2.
+
 Field  7 -- # of sectors written
     This is the total number of sectors written successfully.
+
 Field  8 -- # of milliseconds spent writing
     This is the total number of milliseconds spent by all writes (as
     measured from __make_request() to end_that_request_last()).
+
 Field  9 -- # of I/Os currently in progress
     The only field that should go to zero. Incremented as requests are
     given to appropriate struct request_queue and decremented as they finish.
+
 Field 10 -- # of milliseconds spent doing I/Os
     This field increases so long as field 9 is nonzero.
+
 Field 11 -- weighted # of milliseconds spent doing I/Os
     This field is incremented at each I/O start, I/O completion, I/O
     merge, or read of these stats by the number of I/Os in progress
@@ -117,11 +128,14 @@ for partitions on 2.6 machines.  This is reflected in the examples above.
 
 Field  1 -- # of reads issued
     This is the total number of reads issued to this partition.
+
 Field  2 -- # of sectors read
     This is the total number of sectors requested to be read from this
     partition.
+
 Field  3 -- # of writes issued
     This is the total number of writes issued to this partition.
+
 Field  4 -- # of sectors written
     This is the total number of sectors requested to be written to
     this partition.
@@ -151,9 +165,9 @@ Additional notes
 
 In 2.6, sysfs is not mounted by default.  If your distribution of
 Linux hasn't added it already, here's the line you'll want to add to
-your /etc/fstab:
+your /etc/fstab::
 
-none /sys sysfs defaults 0 0
+	none /sys sysfs defaults 0 0
 
 
 In 2.6, all disk statistics were removed from /proc/stat.  In 2.4, they
-- 
GitLab


From 877b638ff814205ff256173295bb79a220f9ad0e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 15:08:22 -0300
Subject: [PATCH 1574/1958] iostats.txt: update it to cover recent Kernels

Everything there that it is said for 2.6 also applies on
current 4.x Kernels. So, update the information there.

While here, use ``foo`` for literals.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/iostats.txt | 44 +++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
index 456a5b5e0c53d..04d394a2e06ce 100644
--- a/Documentation/iostats.txt
+++ b/Documentation/iostats.txt
@@ -4,17 +4,17 @@ I/O statistics fields
 
 Since 2.4.20 (and some versions before, with patches), and 2.5.45,
 more extensive disk statistics have been introduced to help measure disk
-activity. Tools such as sar and iostat typically interpret these and do
+activity. Tools such as ``sar`` and ``iostat`` typically interpret these and do
 the work for you, but in case you are interested in creating your own
 tools, the fields are explained here.
 
 In 2.4 now, the information is found as additional fields in
-/proc/partitions.  In 2.6, the same information is found in two
-places: one is in the file /proc/diskstats, and the other is within
+``/proc/partitions``.  In 2.6 and upper, the same information is found in two
+places: one is in the file ``/proc/diskstats``, and the other is within
 the sysfs file system, which must be mounted in order to obtain
 the information. Throughout this document we'll assume that sysfs
-is mounted on /sys, although of course it may be mounted anywhere.
-Both /proc/diskstats and sysfs use the same source for the information
+is mounted on ``/sys``, although of course it may be mounted anywhere.
+Both ``/proc/diskstats`` and sysfs use the same source for the information
 and so should not differ.
 
 Here are examples of these different formats::
@@ -23,28 +23,28 @@ Here are examples of these different formats::
       3     0   39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
       3     1    9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030
 
-
-   2.6 sysfs:
+   2.6+ sysfs:
       446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
       35486    38030    38030    38030
 
-   2.6 diskstats:
+   2.6+ diskstats:
       3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
       3    1   hda1 35486 38030 38030 38030
 
-On 2.4 you might execute "grep 'hda ' /proc/partitions". On 2.6, you have
-a choice of "cat /sys/block/hda/stat" or "grep 'hda ' /proc/diskstats".
+On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
+a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
+
 The advantage of one over the other is that the sysfs choice works well
-if you are watching a known, small set of disks.  /proc/diskstats may
+if you are watching a known, small set of disks.  ``/proc/diskstats`` may
 be a better choice if you are watching a large number of disks because
 you'll avoid the overhead of 50, 100, or 500 or more opens/closes with
 each snapshot of your disk statistics.
 
 In 2.4, the statistics fields are those after the device name. In
 the above example, the first field of statistics would be 446216.
-By contrast, in 2.6 if you look at /sys/block/hda/stat, you'll
+By contrast, in 2.6+ if you look at ``/sys/block/hda/stat``, you'll
 find just the eleven fields, beginning with 446216.  If you look at
-/proc/diskstats, the eleven fields will be preceded by the major and
+``/proc/diskstats``, the eleven fields will be preceded by the major and
 minor device numbers, and device name.  Each of these formats provides
 eleven fields of statistics, each meaning exactly the same things.
 All fields except field 9 are cumulative since boot.  Field 9 should
@@ -108,7 +108,7 @@ introduced when changes collide, so (for instance) adding up all the
 read I/Os issued per partition should equal those made to the disks ...
 but due to the lack of locking it may only be very close.
 
-In 2.6, there are counters for each CPU, which make the lack of locking
+In 2.6+, there are counters for each CPU, which make the lack of locking
 almost a non-issue.  When the statistics are read, the per-CPU counters
 are summed (possibly overflowing the unsigned long variable they are
 summed to) and the result given to the user.  There is no convenient
@@ -117,14 +117,14 @@ user interface for accessing the per-CPU counters themselves.
 Disks vs Partitions
 -------------------
 
-There were significant changes between 2.4 and 2.6 in the I/O subsystem.
+There were significant changes between 2.4 and 2.6+ in the I/O subsystem.
 As a result, some statistic information disappeared. The translation from
 a disk address relative to a partition to the disk address relative to
 the host disk happens much earlier.  All merges and timings now happen
 at the disk level rather than at both the disk and partition level as
-in 2.4.  Consequently, you'll see a different statistics output on 2.6 for
+in 2.4.  Consequently, you'll see a different statistics output on 2.6+ for
 partitions from that for disks.  There are only *four* fields available
-for partitions on 2.6 machines.  This is reflected in the examples above.
+for partitions on 2.6+ machines.  This is reflected in the examples above.
 
 Field  1 -- # of reads issued
     This is the total number of reads issued to this partition.
@@ -163,16 +163,16 @@ to some (probably insignificant) inaccuracy.
 Additional notes
 ----------------
 
-In 2.6, sysfs is not mounted by default.  If your distribution of
+In 2.6+, sysfs is not mounted by default.  If your distribution of
 Linux hasn't added it already, here's the line you'll want to add to
-your /etc/fstab::
+your ``/etc/fstab``::
 
 	none /sys sysfs defaults 0 0
 
 
-In 2.6, all disk statistics were removed from /proc/stat.  In 2.4, they
-appear in both /proc/partitions and /proc/stat, although the ones in
-/proc/stat take a very different format from those in /proc/partitions
+In 2.6+, all disk statistics were removed from ``/proc/stat``.  In 2.4, they
+appear in both ``/proc/partitions`` and ``/proc/stat``, although the ones in
+``/proc/stat`` take a very different format from those in ``/proc/partitions``
 (see proc(5), if your system has it.)
 
 -- ricklind@us.ibm.com
-- 
GitLab


From f5981a5c59a90780138ea844ba3362bb72210808 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 15:18:08 -0300
Subject: [PATCH 1575/1958] IPMI.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- fix document type;
- add missing markups for subitems;
- mark literal blocks;
- add whitespaces and blank lines where needed;
- use bulleted list markups where neded.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/IPMI.txt | 76 ++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 32 deletions(-)

diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
index 6962cab997efd..aa77a25a09400 100644
--- a/Documentation/IPMI.txt
+++ b/Documentation/IPMI.txt
@@ -1,9 +1,8 @@
+=====================
+The Linux IPMI Driver
+=====================
 
-                          The Linux IPMI Driver
-			  ---------------------
-			      Corey Minyard
-			  <minyard@mvista.com>
-			    <minyard@acm.org>
+:Author: Corey Minyard <minyard@mvista.com> / <minyard@acm.org>
 
 The Intelligent Platform Management Interface, or IPMI, is a
 standard for controlling intelligent devices that monitor a system.
@@ -141,7 +140,7 @@ Addressing
 ----------
 
 The IPMI addressing works much like IP addresses, you have an overlay
-to handle the different address types.  The overlay is:
+to handle the different address types.  The overlay is::
 
   struct ipmi_addr
   {
@@ -153,7 +152,7 @@ to handle the different address types.  The overlay is:
 The addr_type determines what the address really is.  The driver
 currently understands two different types of addresses.
 
-"System Interface" addresses are defined as:
+"System Interface" addresses are defined as::
 
   struct ipmi_system_interface_addr
   {
@@ -166,7 +165,7 @@ straight to the BMC on the current card.  The channel must be
 IPMI_BMC_CHANNEL.
 
 Messages that are destined to go out on the IPMB bus use the
-IPMI_IPMB_ADDR_TYPE address type.  The format is
+IPMI_IPMB_ADDR_TYPE address type.  The format is::
 
   struct ipmi_ipmb_addr
   {
@@ -184,16 +183,16 @@ spec.
 Messages
 --------
 
-Messages are defined as:
+Messages are defined as::
 
-struct ipmi_msg
-{
+  struct ipmi_msg
+  {
 	unsigned char netfn;
 	unsigned char lun;
 	unsigned char cmd;
 	unsigned char *data;
 	int           data_len;
-};
+  };
 
 The driver takes care of adding/stripping the header information.  The
 data portion is just the data to be send (do NOT put addressing info
@@ -208,7 +207,7 @@ block of data, even when receiving messages.  Otherwise the driver
 will have no place to put the message.
 
 Messages coming up from the message handler in kernelland will come in
-as:
+as::
 
   struct ipmi_recv_msg
   {
@@ -246,6 +245,7 @@ and the user should not have to care what type of SMI is below them.
 
 
 Watching For Interfaces
+^^^^^^^^^^^^^^^^^^^^^^^
 
 When your code comes up, the IPMI driver may or may not have detected
 if IPMI devices exist.  So you might have to defer your setup until
@@ -256,6 +256,7 @@ and tell you when they come and go.
 
 
 Creating the User
+^^^^^^^^^^^^^^^^^
 
 To use the message handler, you must first create a user using
 ipmi_create_user.  The interface number specifies which SMI you want
@@ -272,6 +273,7 @@ closing the device automatically destroys the user.
 
 
 Messaging
+^^^^^^^^^
 
 To send a message from kernel-land, the ipmi_request_settime() call does
 pretty much all message handling.  Most of the parameter are
@@ -321,6 +323,7 @@ though, since it is tricky to manage your own buffers.
 
 
 Events and Incoming Commands
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The driver takes care of polling for IPMI events and receiving
 commands (commands are messages that are not responses, they are
@@ -367,7 +370,7 @@ in the system.  It discovers interfaces through a host of different
 methods, depending on the system.
 
 You can specify up to four interfaces on the module load line and
-control some module parameters:
+control some module parameters::
 
   modprobe ipmi_si.o type=<type1>,<type2>....
        ports=<port1>,<port2>... addrs=<addr1>,<addr2>...
@@ -437,7 +440,7 @@ default is one.  Setting to 0 is useful with the hotmod, but is
 obviously only useful for modules.
 
 When compiled into the kernel, the parameters can be specified on the
-kernel command line as:
+kernel command line as::
 
   ipmi_si.type=<type1>,<type2>...
        ipmi_si.ports=<port1>,<port2>... ipmi_si.addrs=<addr1>,<addr2>...
@@ -474,16 +477,22 @@ The driver supports a hot add and remove of interfaces.  This way,
 interfaces can be added or removed after the kernel is up and running.
 This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a
 write-only parameter.  You write a string to this interface.  The string
-has the format:
+has the format::
+
    <op1>[:op2[:op3...]]
-The "op"s are:
+
+The "op"s are::
+
    add|remove,kcs|bt|smic,mem|i/o,<address>[,<opt1>[,<opt2>[,...]]]
-You can specify more than one interface on the line.  The "opt"s are:
+
+You can specify more than one interface on the line.  The "opt"s are::
+
    rsp=<regspacing>
    rsi=<regsize>
    rsh=<regshift>
    irq=<irq>
    ipmb=<ipmb slave addr>
+
 and these have the same meanings as discussed above.  Note that you
 can also use this on the kernel command line for a more compact format
 for specifying an interface.  Note that when removing an interface,
@@ -496,7 +505,7 @@ The SMBus Driver (SSIF)
 The SMBus driver allows up to 4 SMBus devices to be configured in the
 system.  By default, the driver will only register with something it
 finds in DMI or ACPI tables.  You can change this
-at module load time (for a module) with:
+at module load time (for a module) with::
 
   modprobe ipmi_ssif.o
 	addr=<i2caddr1>[,<i2caddr2>[,...]]
@@ -535,7 +544,7 @@ the smb_addr parameter unless you have DMI or ACPI data to tell the
 driver what to use.
 
 When compiled into the kernel, the addresses can be specified on the
-kernel command line as:
+kernel command line as::
 
   ipmb_ssif.addr=<i2caddr1>[,<i2caddr2>[...]]
 	ipmi_ssif.adapter=<adapter1>[,<adapter2>[...]]
@@ -565,9 +574,9 @@ Some users need more detailed information about a device, like where
 the address came from or the raw base device for the IPMI interface.
 You can use the IPMI smi_watcher to catch the IPMI interfaces as they
 come or go, and to grab the information, you can use the function
-ipmi_get_smi_info(), which returns the following structure:
+ipmi_get_smi_info(), which returns the following structure::
 
-struct ipmi_smi_info {
+  struct ipmi_smi_info {
 	enum ipmi_addr_src addr_src;
 	struct device *dev;
 	union {
@@ -575,7 +584,7 @@ struct ipmi_smi_info {
 			void *acpi_handle;
 		} acpi_info;
 	} addr_info;
-};
+  };
 
 Currently special info for only for SI_ACPI address sources is
 returned.  Others may be added as necessary.
@@ -590,7 +599,7 @@ Watchdog
 
 A watchdog timer is provided that implements the Linux-standard
 watchdog timer interface.  It has three module parameters that can be
-used to control it:
+used to control it::
 
   modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type>
       preaction=<preaction type> preop=<preop type> start_now=x
@@ -635,7 +644,7 @@ watchdog device is closed.  The default value of nowayout is true
 if the CONFIG_WATCHDOG_NOWAYOUT option is enabled, or false if not.
 
 When compiled into the kernel, the kernel command line is available
-for configuring the watchdog:
+for configuring the watchdog::
 
   ipmi_watchdog.timeout=<t> ipmi_watchdog.pretimeout=<t>
 	ipmi_watchdog.action=<action type>
@@ -675,6 +684,7 @@ also get a bunch of OEM events holding the panic string.
 
 
 The field settings of the events are:
+
 * Generator ID: 0x21 (kernel)
 * EvM Rev: 0x03 (this event is formatting in IPMI 1.0 format)
 * Sensor Type: 0x20 (OS critical stop sensor)
@@ -683,18 +693,20 @@ The field settings of the events are:
 * Event Data 1: 0xa1 (Runtime stop in OEM bytes 2 and 3)
 * Event data 2: second byte of panic string
 * Event data 3: third byte of panic string
+
 See the IPMI spec for the details of the event layout.  This event is
 always sent to the local management controller.  It will handle routing
 the message to the right place
 
 Other OEM events have the following format:
-Record ID (bytes 0-1): Set by the SEL.
-Record type (byte 2): 0xf0 (OEM non-timestamped)
-byte 3: The slave address of the card saving the panic
-byte 4: A sequence number (starting at zero)
-The rest of the bytes (11 bytes) are the panic string.  If the panic string
-is longer than 11 bytes, multiple messages will be sent with increasing
-sequence numbers.
+
+* Record ID (bytes 0-1): Set by the SEL.
+* Record type (byte 2): 0xf0 (OEM non-timestamped)
+* byte 3: The slave address of the card saving the panic
+* byte 4: A sequence number (starting at zero)
+  The rest of the bytes (11 bytes) are the panic string.  If the panic string
+  is longer than 11 bytes, multiple messages will be sent with increasing
+  sequence numbers.
 
 Because you cannot send OEM events using the standard interface, this
 function will attempt to find an SEL and add the events there.  It
-- 
GitLab


From ae2f26aa4c31498f9bbcfd2f2798d07b4cb82728 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 15:22:54 -0300
Subject: [PATCH 1576/1958] IRQ-affinity.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add a title for the document;
- mark literal blocks as such;
- use a bulleted list for changelog.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/IRQ-affinity.txt | 75 ++++++++++++++++++----------------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
index 01a675175a367..29da5000836a9 100644
--- a/Documentation/IRQ-affinity.txt
+++ b/Documentation/IRQ-affinity.txt
@@ -1,8 +1,11 @@
+================
+SMP IRQ affinity
+================
+
 ChangeLog:
-	Started by Ingo Molnar <mingo@redhat.com>
-	Update by Max Krasnyansky <maxk@qualcomm.com>
+	- Started by Ingo Molnar <mingo@redhat.com>
+	- Update by Max Krasnyansky <maxk@qualcomm.com>
 
-SMP IRQ affinity
 
 /proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
 which target CPUs are permitted for a given IRQ source.  It's a bitmask
@@ -16,50 +19,52 @@ will be set to the default mask. It can then be changed as described above.
 Default mask is 0xffffffff.
 
 Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting
-it to CPU4-7 (this is an 8-CPU SMP box):
+it to CPU4-7 (this is an 8-CPU SMP box)::
 
-[root@moon 44]# cd /proc/irq/44
-[root@moon 44]# cat smp_affinity
-ffffffff
+	[root@moon 44]# cd /proc/irq/44
+	[root@moon 44]# cat smp_affinity
+	ffffffff
 
-[root@moon 44]# echo 0f > smp_affinity
-[root@moon 44]# cat smp_affinity
-0000000f
-[root@moon 44]# ping -f h
-PING hell (195.4.7.3): 56 data bytes
-...
---- hell ping statistics ---
-6029 packets transmitted, 6027 packets received, 0% packet loss
-round-trip min/avg/max = 0.1/0.1/0.4 ms
-[root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
-           CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
- 44:       1068       1785       1785       1783         0          0           0         0    IO-APIC-level  eth1
+	[root@moon 44]# echo 0f > smp_affinity
+	[root@moon 44]# cat smp_affinity
+	0000000f
+	[root@moon 44]# ping -f h
+	PING hell (195.4.7.3): 56 data bytes
+	...
+	--- hell ping statistics ---
+	6029 packets transmitted, 6027 packets received, 0% packet loss
+	round-trip min/avg/max = 0.1/0.1/0.4 ms
+	[root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
+		CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
+	44:       1068       1785       1785       1783         0          0           0         0    IO-APIC-level  eth1
 
 As can be seen from the line above IRQ44 was delivered only to the first four
 processors (0-3).
 Now lets restrict that IRQ to CPU(4-7).
 
-[root@moon 44]# echo f0 > smp_affinity
-[root@moon 44]# cat smp_affinity
-000000f0
-[root@moon 44]# ping -f h
-PING hell (195.4.7.3): 56 data bytes
-..
---- hell ping statistics ---
-2779 packets transmitted, 2777 packets received, 0% packet loss
-round-trip min/avg/max = 0.1/0.5/585.4 ms
-[root@moon 44]# cat /proc/interrupts |  'CPU\|44:'
-           CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
- 44:       1068       1785       1785       1783      1784       1069        1070       1069   IO-APIC-level  eth1
+::
+
+	[root@moon 44]# echo f0 > smp_affinity
+	[root@moon 44]# cat smp_affinity
+	000000f0
+	[root@moon 44]# ping -f h
+	PING hell (195.4.7.3): 56 data bytes
+	..
+	--- hell ping statistics ---
+	2779 packets transmitted, 2777 packets received, 0% packet loss
+	round-trip min/avg/max = 0.1/0.5/585.4 ms
+	[root@moon 44]# cat /proc/interrupts |  'CPU\|44:'
+		CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
+	44:       1068       1785       1785       1783      1784       1069        1070       1069   IO-APIC-level  eth1
 
 This time around IRQ44 was delivered only to the last four processors.
 i.e counters for the CPU0-3 did not change.
 
-Here is an example of limiting that same irq (44) to cpus 1024 to 1031:
+Here is an example of limiting that same irq (44) to cpus 1024 to 1031::
 
-[root@moon 44]# echo 1024-1031 > smp_affinity_list
-[root@moon 44]# cat smp_affinity_list
-1024-1031
+	[root@moon 44]# echo 1024-1031 > smp_affinity_list
+	[root@moon 44]# cat smp_affinity_list
+	1024-1031
 
 Note that to do this with a bitmask would require 32 bitmasks of zero
 to follow the pertinent one.
-- 
GitLab


From 1642a1e68a70230c4dd0fb1523e1e886e3f5972a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 15:29:55 -0300
Subject: [PATCH 1577/1958] IRQ-domain.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- use proper markups for titles;
- mark literal blocks as such;
- add blank lines where needed.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/IRQ-domain.txt | 69 +++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 20 deletions(-)

diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt
index 1f246eb25ca54..4a1cd7645d856 100644
--- a/Documentation/IRQ-domain.txt
+++ b/Documentation/IRQ-domain.txt
@@ -1,4 +1,6 @@
-irq_domain interrupt number mapping library
+===============================================
+The irq_domain interrupt number mapping library
+===============================================
 
 The current design of the Linux kernel uses a single large number
 space where each separate IRQ source is assigned a different number.
@@ -36,7 +38,9 @@ irq_domain also implements translation from an abstract irq_fwspec
 structure to hwirq numbers (Device Tree and ACPI GSI so far), and can
 be easily extended to support other IRQ topology data sources.
 
-=== irq_domain usage ===
+irq_domain usage
+================
+
 An interrupt controller driver creates and registers an irq_domain by
 calling one of the irq_domain_add_*() functions (each mapping method
 has a different allocator function, more on that later).  The function
@@ -62,15 +66,21 @@ If the driver has the Linux IRQ number or the irq_data pointer, and
 needs to know the associated hwirq number (such as in the irq_chip
 callbacks) then it can be directly obtained from irq_data->hwirq.
 
-=== Types of irq_domain mappings ===
+Types of irq_domain mappings
+============================
+
 There are several mechanisms available for reverse mapping from hwirq
 to Linux irq, and each mechanism uses a different allocation function.
 Which reverse map type should be used depends on the use case.  Each
 of the reverse map types are described below:
 
-==== Linear ====
-irq_domain_add_linear()
-irq_domain_create_linear()
+Linear
+------
+
+::
+
+	irq_domain_add_linear()
+	irq_domain_create_linear()
 
 The linear reverse map maintains a fixed size table indexed by the
 hwirq number.  When a hwirq is mapped, an irq_desc is allocated for
@@ -89,9 +99,13 @@ accepts a more general abstraction 'struct fwnode_handle'.
 
 The majority of drivers should use the linear map.
 
-==== Tree ====
-irq_domain_add_tree()
-irq_domain_create_tree()
+Tree
+----
+
+::
+
+	irq_domain_add_tree()
+	irq_domain_create_tree()
 
 The irq_domain maintains a radix tree map from hwirq numbers to Linux
 IRQs.  When an hwirq is mapped, an irq_desc is allocated and the
@@ -109,8 +123,12 @@ accepts a more general abstraction 'struct fwnode_handle'.
 
 Very few drivers should need this mapping.
 
-==== No Map ===-
-irq_domain_add_nomap()
+No Map
+------
+
+::
+
+	irq_domain_add_nomap()
 
 The No Map mapping is to be used when the hwirq number is
 programmable in the hardware.  In this case it is best to program the
@@ -121,10 +139,14 @@ Linux IRQ number into the hardware.
 
 Most drivers cannot use this mapping.
 
-==== Legacy ====
-irq_domain_add_simple()
-irq_domain_add_legacy()
-irq_domain_add_legacy_isa()
+Legacy
+------
+
+::
+
+	irq_domain_add_simple()
+	irq_domain_add_legacy()
+	irq_domain_add_legacy_isa()
 
 The Legacy mapping is a special case for drivers that already have a
 range of irq_descs allocated for the hwirqs.  It is used when the
@@ -163,14 +185,17 @@ that the driver using the simple domain call irq_create_mapping()
 before any irq_find_mapping() since the latter will actually work
 for the static IRQ assignment case.
 
-==== Hierarchy IRQ domain ====
+Hierarchy IRQ domain
+--------------------
+
 On some architectures, there may be multiple interrupt controllers
 involved in delivering an interrupt from the device to the target CPU.
-Let's look at a typical interrupt delivering path on x86 platforms:
+Let's look at a typical interrupt delivering path on x86 platforms::
 
-Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
+  Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
 
 There are three interrupt controllers involved:
+
 1) IOAPIC controller
 2) Interrupt remapping controller
 3) Local APIC controller
@@ -180,7 +205,8 @@ hardware architecture, an irq_domain data structure is built for each
 interrupt controller and those irq_domains are organized into hierarchy.
 When building irq_domain hierarchy, the irq_domain near to the device is
 child and the irq_domain near to CPU is parent. So a hierarchy structure
-as below will be built for the example above.
+as below will be built for the example above::
+
 	CPU Vector irq_domain (root irq_domain to manage CPU vectors)
 		^
 		|
@@ -190,6 +216,7 @@ as below will be built for the example above.
 	IOAPIC irq_domain (manage IOAPIC delivery entries/pins)
 
 There are four major interfaces to use hierarchy irq_domain:
+
 1) irq_domain_alloc_irqs(): allocate IRQ descriptors and interrupt
    controller related resources to deliver these interrupts.
 2) irq_domain_free_irqs(): free IRQ descriptors and interrupt controller
@@ -199,7 +226,8 @@ There are four major interfaces to use hierarchy irq_domain:
 4) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
    to stop delivering the interrupt.
 
-Following changes are needed to support hierarchy irq_domain.
+Following changes are needed to support hierarchy irq_domain:
+
 1) a new field 'parent' is added to struct irq_domain; it's used to
    maintain irq_domain hierarchy information.
 2) a new field 'parent_data' is added to struct irq_data; it's used to
@@ -223,6 +251,7 @@ software architecture.
 
 For an interrupt controller driver to support hierarchy irq_domain, it
 needs to:
+
 1) Implement irq_domain_ops.alloc and irq_domain_ops.free
 2) Optionally implement irq_domain_ops.activate and
    irq_domain_ops.deactivate.
-- 
GitLab


From b4282c79204b086276f4278568994f7e1ee449ca Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 15:35:40 -0300
Subject: [PATCH 1578/1958] irqflags-tracing.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

There isn't much to be done here: just mark the document
title as such and add a :Author:.

While here, use upper case at the beginning of a few paragraphs
that start with lower case.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/irqflags-tracing.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
index f6da05670e16d..bdd208259fb30 100644
--- a/Documentation/irqflags-tracing.txt
+++ b/Documentation/irqflags-tracing.txt
@@ -1,8 +1,10 @@
+=======================
 IRQ-flags state tracing
+=======================
 
-started by Ingo Molnar <mingo@redhat.com>
+:Author: started by Ingo Molnar <mingo@redhat.com>
 
-the "irq-flags tracing" feature "traces" hardirq and softirq state, in
+The "irq-flags tracing" feature "traces" hardirq and softirq state, in
 that it gives interested subsystems an opportunity to be notified of
 every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
 happens in the kernel.
@@ -14,7 +16,7 @@ CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
 are locking APIs that are not used in IRQ context. (the one exception
 for rwsems is worked around)
 
-architecture support for this is certainly not in the "trivial"
+Architecture support for this is certainly not in the "trivial"
 category, because lots of lowlevel assembly code deal with irq-flags
 state changes. But an architecture can be irq-flags-tracing enabled in a
 rather straightforward and risk-free manner.
@@ -41,7 +43,7 @@ irq-flags-tracing support:
   excluded from the irq-tracing [and lock validation] mechanism via
   lockdep_off()/lockdep_on().
 
-in general there is no risk from having an incomplete irq-flags-tracing
+In general there is no risk from having an incomplete irq-flags-tracing
 implementation in an architecture: lockdep will detect that and will
 turn itself off. I.e. the lock validator will still be reliable. There
 should be no crashes due to irq-tracing bugs. (except if the assembly
-- 
GitLab


From e7225dc8705a59e102ce31037e35a8cb6fe4056c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 15:40:07 -0300
Subject: [PATCH 1579/1958] IRQ.txt: add a markup for its title

This simple document only needs a markup for its title to be
using the standard we're adopting for text documents.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/IRQ.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/IRQ.txt b/Documentation/IRQ.txt
index 1011e71750216..4273806a606bb 100644
--- a/Documentation/IRQ.txt
+++ b/Documentation/IRQ.txt
@@ -1,4 +1,6 @@
+===============
 What is an IRQ?
+===============
 
 An IRQ is an interrupt request from a device.
 Currently they can come in over a pin, or over a packet.
-- 
GitLab


From 66b338eabef3255a2c470aa6d75a62a832c4fc4a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 15:42:12 -0300
Subject: [PATCH 1580/1958] isapnp.txt: promote title level

This simple document only needs to promote its title to be
using the standard we're using for document texts.

Yet, IMHO, it would be worth merging this file with
Documentation/pnp.txt in the future.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/isapnp.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/isapnp.txt b/Documentation/isapnp.txt
index 400d1b5b523dd..8d0840ac847bf 100644
--- a/Documentation/isapnp.txt
+++ b/Documentation/isapnp.txt
@@ -1,3 +1,4 @@
+==========================================================
 ISA Plug & Play support by Jaroslav Kysela <perex@suse.cz>
 ==========================================================
 
-- 
GitLab


From 0685552f2c37531eb2f29faf7918a15b632175c5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 15:47:07 -0300
Subject: [PATCH 1581/1958] isa.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Use the main title standard for this document;
- replace _foo_ by **foo** for emphasis;
- mark literal blocks as such.

Acked-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/isa.txt | 53 ++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/Documentation/isa.txt b/Documentation/isa.txt
index f232c26a40be2..def4a7b690b56 100644
--- a/Documentation/isa.txt
+++ b/Documentation/isa.txt
@@ -1,5 +1,6 @@
+===========
 ISA Drivers
------------
+===========
 
 The following text is adapted from the commit message of the initial
 commit of the ISA bus driver authored by Rene Herman.
@@ -23,17 +24,17 @@ that all device creation has been made internal as well.
 
 The usage model this provides is nice, and has been acked from the ALSA
 side by Takashi Iwai and Jaroslav Kysela. The ALSA driver module_init's
-now (for oldisa-only drivers) become:
+now (for oldisa-only drivers) become::
 
-static int __init alsa_card_foo_init(void)
-{
-	return isa_register_driver(&snd_foo_isa_driver, SNDRV_CARDS);
-}
+	static int __init alsa_card_foo_init(void)
+	{
+		return isa_register_driver(&snd_foo_isa_driver, SNDRV_CARDS);
+	}
 
-static void __exit alsa_card_foo_exit(void)
-{
-	isa_unregister_driver(&snd_foo_isa_driver);
-}
+	static void __exit alsa_card_foo_exit(void)
+	{
+		isa_unregister_driver(&snd_foo_isa_driver);
+	}
 
 Quite like the other bus models therefore. This removes a lot of
 duplicated init code from the ALSA ISA drivers.
@@ -47,11 +48,11 @@ parameter, indicating how many devices to create and call our methods
 with.
 
 The platform_driver callbacks are called with a platform_device param;
-the isa_driver callbacks are being called with a "struct device *dev,
-unsigned int id" pair directly -- with the device creation completely
+the isa_driver callbacks are being called with a ``struct device *dev,
+unsigned int id`` pair directly -- with the device creation completely
 internal to the bus it's much cleaner to not leak isa_dev's by passing
 them in at all. The id is the only thing we ever want other then the
-struct device * anyways, and it makes for nicer code in the callbacks as
+struct device anyways, and it makes for nicer code in the callbacks as
 well.
 
 With this additional .match() callback ISA drivers have all options. If
@@ -75,20 +76,20 @@ This exports only two functions; isa_{,un}register_driver().
 
 isa_register_driver() register's the struct device_driver, and then
 loops over the passed in ndev creating devices and registering them.
-This causes the bus match method to be called for them, which is:
+This causes the bus match method to be called for them, which is::
 
-int isa_bus_match(struct device *dev, struct device_driver *driver)
-{
-          struct isa_driver *isa_driver = to_isa_driver(driver);
+	int isa_bus_match(struct device *dev, struct device_driver *driver)
+	{
+		struct isa_driver *isa_driver = to_isa_driver(driver);
 
-          if (dev->platform_data == isa_driver) {
-                  if (!isa_driver->match ||
-                          isa_driver->match(dev, to_isa_dev(dev)->id))
-                          return 1;
-                  dev->platform_data = NULL;
-          }
-          return 0;
-}
+		if (dev->platform_data == isa_driver) {
+			if (!isa_driver->match ||
+				isa_driver->match(dev, to_isa_dev(dev)->id))
+				return 1;
+			dev->platform_data = NULL;
+		}
+		return 0;
+	}
 
 The first thing this does is check if this device is in fact one of this
 driver's devices by seeing if the device's platform_data pointer is set
@@ -102,7 +103,7 @@ well.
 Then, if the the driver did not provide a .match, it matches. If it did,
 the driver match() method is called to determine a match.
 
-If it did _not_ match, dev->platform_data is reset to indicate this to
+If it did **not** match, dev->platform_data is reset to indicate this to
 isa_register_driver which can then unregister the device again.
 
 If during all this, there's any error, or no devices matched at all
-- 
GitLab


From 7d98c21bd0257ec9f56910c758519bee71767517 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 16:07:16 -0300
Subject: [PATCH 1582/1958] kernel-per-CPU-kthreads.txt: standardize document
 format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Use title markups;
- use "-" for bulletted lists;
- Split Name/Purpose on two lines, in order to make visually
  easier to read (in text format), and to bold the title
  (on ReST output)
- Add blank lines to split bulleted lists;
- use sub-titles for the several kthread softirq types;
- mark one literal var with asterisk as such, in order to
  avoid an error warning on Sphinx.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/kernel-per-CPU-kthreads.txt | 156 +++++++++++++++++-----
 1 file changed, 121 insertions(+), 35 deletions(-)

diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index 2cb7dc5c0e0db..0f00f9c164ac0 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -1,27 +1,29 @@
-REDUCING OS JITTER DUE TO PER-CPU KTHREADS
+==========================================
+Reducing OS jitter due to per-cpu kthreads
+==========================================
 
 This document lists per-CPU kthreads in the Linux kernel and presents
 options to control their OS jitter.  Note that non-per-CPU kthreads are
 not listed here.  To reduce OS jitter from non-per-CPU kthreads, bind
 them to a "housekeeping" CPU dedicated to such work.
 
+References
+==========
 
-REFERENCES
+-	Documentation/IRQ-affinity.txt:  Binding interrupts to sets of CPUs.
 
-o	Documentation/IRQ-affinity.txt:  Binding interrupts to sets of CPUs.
+-	Documentation/cgroup-v1:  Using cgroups to bind tasks to sets of CPUs.
 
-o	Documentation/cgroup-v1:  Using cgroups to bind tasks to sets of CPUs.
-
-o	man taskset:  Using the taskset command to bind tasks to sets
+-	man taskset:  Using the taskset command to bind tasks to sets
 	of CPUs.
 
-o	man sched_setaffinity:  Using the sched_setaffinity() system
+-	man sched_setaffinity:  Using the sched_setaffinity() system
 	call to bind tasks to sets of CPUs.
 
-o	/sys/devices/system/cpu/cpuN/online:  Control CPU N's hotplug state,
+-	/sys/devices/system/cpu/cpuN/online:  Control CPU N's hotplug state,
 	writing "0" to offline and "1" to online.
 
-o	In order to locate kernel-generated OS jitter on CPU N:
+-	In order to locate kernel-generated OS jitter on CPU N:
 
 		cd /sys/kernel/debug/tracing
 		echo 1 > max_graph_depth # Increase the "1" for more detail
@@ -29,12 +31,17 @@ o	In order to locate kernel-generated OS jitter on CPU N:
 		# run workload
 		cat per_cpu/cpuN/trace
 
+kthreads
+========
+
+Name:
+  ehca_comp/%u
 
-KTHREADS
+Purpose:
+  Periodically process Infiniband-related work.
 
-Name: ehca_comp/%u
-Purpose: Periodically process Infiniband-related work.
 To reduce its OS jitter, do any of the following:
+
 1.	Don't use eHCA Infiniband hardware, instead choosing hardware
 	that does not require per-CPU kthreads.  This will prevent these
 	kthreads from being created in the first place.  (This will
@@ -46,26 +53,45 @@ To reduce its OS jitter, do any of the following:
 	provisioned only on selected CPUs.
 
 
-Name: irq/%d-%s
-Purpose: Handle threaded interrupts.
+Name:
+  irq/%d-%s
+
+Purpose:
+  Handle threaded interrupts.
+
 To reduce its OS jitter, do the following:
+
 1.	Use irq affinity to force the irq threads to execute on
 	some other CPU.
 
-Name: kcmtpd_ctr_%d
-Purpose: Handle Bluetooth work.
+Name:
+  kcmtpd_ctr_%d
+
+Purpose:
+  Handle Bluetooth work.
+
 To reduce its OS jitter, do one of the following:
+
 1.	Don't use Bluetooth, in which case these kthreads won't be
 	created in the first place.
 2.	Use irq affinity to force Bluetooth-related interrupts to
 	occur on some other CPU and furthermore initiate all
 	Bluetooth activity on some other CPU.
 
-Name: ksoftirqd/%u
-Purpose: Execute softirq handlers when threaded or when under heavy load.
+Name:
+  ksoftirqd/%u
+
+Purpose:
+  Execute softirq handlers when threaded or when under heavy load.
+
 To reduce its OS jitter, each softirq vector must be handled
 separately as follows:
-TIMER_SOFTIRQ:  Do all of the following:
+
+TIMER_SOFTIRQ
+-------------
+
+Do all of the following:
+
 1.	To the extent possible, keep the CPU out of the kernel when it
 	is non-idle, for example, by avoiding system calls and by forcing
 	both kernel threads and interrupts to execute elsewhere.
@@ -76,34 +102,59 @@ TIMER_SOFTIRQ:  Do all of the following:
 	first one back online.  Once you have onlined the CPUs in question,
 	do not offline any other CPUs, because doing so could force the
 	timer back onto one of the CPUs in question.
-NET_TX_SOFTIRQ and NET_RX_SOFTIRQ:  Do all of the following:
+
+NET_TX_SOFTIRQ and NET_RX_SOFTIRQ
+---------------------------------
+
+Do all of the following:
+
 1.	Force networking interrupts onto other CPUs.
 2.	Initiate any network I/O on other CPUs.
 3.	Once your application has started, prevent CPU-hotplug operations
 	from being initiated from tasks that might run on the CPU to
 	be de-jittered.  (It is OK to force this CPU offline and then
 	bring it back online before you start your application.)
-BLOCK_SOFTIRQ:  Do all of the following:
+
+BLOCK_SOFTIRQ
+-------------
+
+Do all of the following:
+
 1.	Force block-device interrupts onto some other CPU.
 2.	Initiate any block I/O on other CPUs.
 3.	Once your application has started, prevent CPU-hotplug operations
 	from being initiated from tasks that might run on the CPU to
 	be de-jittered.  (It is OK to force this CPU offline and then
 	bring it back online before you start your application.)
-IRQ_POLL_SOFTIRQ:  Do all of the following:
+
+IRQ_POLL_SOFTIRQ
+----------------
+
+Do all of the following:
+
 1.	Force block-device interrupts onto some other CPU.
 2.	Initiate any block I/O and block-I/O polling on other CPUs.
 3.	Once your application has started, prevent CPU-hotplug operations
 	from being initiated from tasks that might run on the CPU to
 	be de-jittered.  (It is OK to force this CPU offline and then
 	bring it back online before you start your application.)
-TASKLET_SOFTIRQ: Do one or more of the following:
+
+TASKLET_SOFTIRQ
+---------------
+
+Do one or more of the following:
+
 1.	Avoid use of drivers that use tasklets.  (Such drivers will contain
 	calls to things like tasklet_schedule().)
 2.	Convert all drivers that you must use from tasklets to workqueues.
 3.	Force interrupts for drivers using tasklets onto other CPUs,
 	and also do I/O involving these drivers on other CPUs.
-SCHED_SOFTIRQ: Do all of the following:
+
+SCHED_SOFTIRQ
+-------------
+
+Do all of the following:
+
 1.	Avoid sending scheduler IPIs to the CPU to be de-jittered,
 	for example, ensure that at most one runnable kthread is present
 	on that CPU.  If a thread that expects to run on the de-jittered
@@ -120,7 +171,12 @@ SCHED_SOFTIRQ: Do all of the following:
 	forcing both kernel threads and interrupts to execute elsewhere.
 	This further reduces the number of scheduler-clock interrupts
 	received by the de-jittered CPU.
-HRTIMER_SOFTIRQ:  Do all of the following:
+
+HRTIMER_SOFTIRQ
+---------------
+
+Do all of the following:
+
 1.	To the extent possible, keep the CPU out of the kernel when it
 	is non-idle.  For example, avoid system calls and force both
 	kernel threads and interrupts to execute elsewhere.
@@ -131,9 +187,15 @@ HRTIMER_SOFTIRQ:  Do all of the following:
 	back online.  Once you have onlined the CPUs in question, do not
 	offline any other CPUs, because doing so could force the timer
 	back onto one of the CPUs in question.
-RCU_SOFTIRQ:  Do at least one of the following:
+
+RCU_SOFTIRQ
+-----------
+
+Do at least one of the following:
+
 1.	Offload callbacks and keep the CPU in either dyntick-idle or
 	adaptive-ticks state by doing all of the following:
+
 	a.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be
 		de-jittered is marked as an adaptive-ticks CPU using the
 		"nohz_full=" boot parameter.  Bind the rcuo kthreads to
@@ -142,8 +204,10 @@ RCU_SOFTIRQ:  Do at least one of the following:
 		when it is non-idle, for example, by avoiding system
 		calls and by forcing both kernel threads and interrupts
 		to execute elsewhere.
+
 2.	Enable RCU to do its processing remotely via dyntick-idle by
 	doing all of the following:
+
 	a.	Build with CONFIG_NO_HZ=y and CONFIG_RCU_FAST_NO_HZ=y.
 	b.	Ensure that the CPU goes idle frequently, allowing other
 		CPUs to detect that it has passed through an RCU quiescent
@@ -155,15 +219,20 @@ RCU_SOFTIRQ:  Do at least one of the following:
 		calls and by forcing both kernel threads and interrupts
 		to execute elsewhere.
 
-Name: kworker/%u:%d%s (cpu, id, priority)
-Purpose: Execute workqueue requests
+Name:
+  kworker/%u:%d%s (cpu, id, priority)
+
+Purpose:
+  Execute workqueue requests
+
 To reduce its OS jitter, do any of the following:
+
 1.	Run your workload at a real-time priority, which will allow
 	preempting the kworker daemons.
 2.	A given workqueue can be made visible in the sysfs filesystem
 	by passing the WQ_SYSFS to that workqueue's alloc_workqueue().
 	Such a workqueue can be confined to a given subset of the
-	CPUs using the /sys/devices/virtual/workqueue/*/cpumask sysfs
+	CPUs using the ``/sys/devices/virtual/workqueue/*/cpumask`` sysfs
 	files.	The set of WQ_SYSFS workqueues can be displayed using
 	"ls sys/devices/virtual/workqueue".  That said, the workqueues
 	maintainer would like to caution people against indiscriminately
@@ -173,6 +242,7 @@ To reduce its OS jitter, do any of the following:
 	to remove it, even if its addition was a mistake.
 3.	Do any of the following needed to avoid jitter that your
 	application cannot tolerate:
+
 	a.	Build your kernel with CONFIG_SLUB=y rather than
 		CONFIG_SLAB=y, thus avoiding the slab allocator's periodic
 		use of each CPU's workqueues to run its cache_reap()
@@ -186,6 +256,7 @@ To reduce its OS jitter, do any of the following:
 		be able to build your kernel with CONFIG_CPU_FREQ=n to
 		avoid the CPU-frequency governor periodically running
 		on each CPU, including cs_dbs_timer() and od_dbs_timer().
+
 		WARNING:  Please check your CPU specifications to
 		make sure that this is safe on your particular system.
 	d.	As of v3.18, Christoph Lameter's on-demand vmstat workers
@@ -222,9 +293,14 @@ To reduce its OS jitter, do any of the following:
 		CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
 		avoiding OS jitter from rackmeter_do_timer().
 
-Name: rcuc/%u
-Purpose: Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels.
+Name:
+  rcuc/%u
+
+Purpose:
+  Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels.
+
 To reduce its OS jitter, do at least one of the following:
+
 1.	Build the kernel with CONFIG_PREEMPT=n.  This prevents these
 	kthreads from being created in the first place, and also obviates
 	the need for RCU priority boosting.  This approach is feasible
@@ -244,9 +320,14 @@ To reduce its OS jitter, do at least one of the following:
 	CPU, again preventing the rcuc/%u kthreads from having any work
 	to do.
 
-Name: rcuob/%d, rcuop/%d, and rcuos/%d
-Purpose: Offload RCU callbacks from the corresponding CPU.
+Name:
+  rcuob/%d, rcuop/%d, and rcuos/%d
+
+Purpose:
+  Offload RCU callbacks from the corresponding CPU.
+
 To reduce its OS jitter, do at least one of the following:
+
 1.	Use affinity, cgroups, or other mechanism to force these kthreads
 	to execute on some other CPU.
 2.	Build with CONFIG_RCU_NOCB_CPU=n, which will prevent these
@@ -254,9 +335,14 @@ To reduce its OS jitter, do at least one of the following:
 	note that this will not eliminate OS jitter, but will instead
 	shift it to RCU_SOFTIRQ.
 
-Name: watchdog/%u
-Purpose: Detect software lockups on each CPU.
+Name:
+  watchdog/%u
+
+Purpose:
+  Detect software lockups on each CPU.
+
 To reduce its OS jitter, do at least one of the following:
+
 1.	Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these
 	kthreads from being created in the first place.
 2.	Boot with "nosoftlockup=0", which will also prevent these kthreads
-- 
GitLab


From 7472723305906f3d60e95de1ecf3e31b20c2a854 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 16:25:04 -0300
Subject: [PATCH 1583/1958] kobject.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add markups for titles;
- mark literal blocks as such;
- add needed whitespace/blank lines;
- use :Author: and :Last updated: for authorship.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/kobject.txt | 69 ++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 27 deletions(-)

diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index 1be59a3a521c8..fc9485d790618 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -1,13 +1,13 @@
+=====================================================================
 Everything you never wanted to know about kobjects, ksets, and ktypes
+=====================================================================
 
-Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+:Author: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+:Last updated: December 19, 2007
 
 Based on an original article by Jon Corbet for lwn.net written October 1,
 2003 and located at http://lwn.net/Articles/51437/
 
-Last updated December 19, 2007
-
-
 Part of the difficulty in understanding the driver model - and the kobject
 abstraction upon which it is built - is that there is no obvious starting
 place. Dealing with kobjects requires understanding a few different types,
@@ -47,6 +47,7 @@ approach will be taken, so we'll go back to kobjects.
 
 
 Embedding kobjects
+==================
 
 It is rare for kernel code to create a standalone kobject, with one major
 exception explained below.  Instead, kobjects are used to control access to
@@ -65,7 +66,7 @@ their own, but are invariably found embedded in the larger objects of
 interest.)
 
 So, for example, the UIO code in drivers/uio/uio.c has a structure that
-defines the memory region associated with a uio device:
+defines the memory region associated with a uio device::
 
     struct uio_map {
 	struct kobject kobj;
@@ -77,7 +78,7 @@ just a matter of using the kobj member.  Code that works with kobjects will
 often have the opposite problem, however: given a struct kobject pointer,
 what is the pointer to the containing structure?  You must avoid tricks
 (such as assuming that the kobject is at the beginning of the structure)
-and, instead, use the container_of() macro, found in <linux/kernel.h>:
+and, instead, use the container_of() macro, found in <linux/kernel.h>::
 
     container_of(pointer, type, member)
 
@@ -90,13 +91,13 @@ where:
 The return value from container_of() is a pointer to the corresponding
 container type. So, for example, a pointer "kp" to a struct kobject
 embedded *within* a struct uio_map could be converted to a pointer to the
-*containing* uio_map structure with:
+*containing* uio_map structure with::
 
     struct uio_map *u_map = container_of(kp, struct uio_map, kobj);
 
 For convenience, programmers often define a simple macro for "back-casting"
 kobject pointers to the containing type.  Exactly this happens in the
-earlier drivers/uio/uio.c, as you can see here:
+earlier drivers/uio/uio.c, as you can see here::
 
     struct uio_map {
         struct kobject kobj;
@@ -106,23 +107,25 @@ earlier drivers/uio/uio.c, as you can see here:
     #define to_map(map) container_of(map, struct uio_map, kobj)
 
 where the macro argument "map" is a pointer to the struct kobject in
-question.  That macro is subsequently invoked with:
+question.  That macro is subsequently invoked with::
 
     struct uio_map *map = to_map(kobj);
 
 
 Initialization of kobjects
+==========================
 
 Code which creates a kobject must, of course, initialize that object. Some
-of the internal fields are setup with a (mandatory) call to kobject_init():
+of the internal fields are setup with a (mandatory) call to kobject_init()::
 
     void kobject_init(struct kobject *kobj, struct kobj_type *ktype);
 
 The ktype is required for a kobject to be created properly, as every kobject
 must have an associated kobj_type.  After calling kobject_init(), to
-register the kobject with sysfs, the function kobject_add() must be called:
+register the kobject with sysfs, the function kobject_add() must be called::
 
-    int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...);
+    int kobject_add(struct kobject *kobj, struct kobject *parent,
+		    const char *fmt, ...);
 
 This sets up the parent of the kobject and the name for the kobject
 properly.  If the kobject is to be associated with a specific kset,
@@ -133,7 +136,7 @@ kset itself.
 
 As the name of the kobject is set when it is added to the kernel, the name
 of the kobject should never be manipulated directly.  If you must change
-the name of the kobject, call kobject_rename():
+the name of the kobject, call kobject_rename()::
 
     int kobject_rename(struct kobject *kobj, const char *new_name);
 
@@ -146,12 +149,12 @@ is being removed.  If your code needs to call this function, it is
 incorrect and needs to be fixed.
 
 To properly access the name of the kobject, use the function
-kobject_name():
+kobject_name()::
 
     const char *kobject_name(const struct kobject * kobj);
 
 There is a helper function to both initialize and add the kobject to the
-kernel at the same time, called surprisingly enough kobject_init_and_add():
+kernel at the same time, called surprisingly enough kobject_init_and_add()::
 
     int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
                              struct kobject *parent, const char *fmt, ...);
@@ -161,10 +164,11 @@ kobject_add() functions described above.
 
 
 Uevents
+=======
 
 After a kobject has been registered with the kobject core, you need to
 announce to the world that it has been created.  This can be done with a
-call to kobject_uevent():
+call to kobject_uevent()::
 
     int kobject_uevent(struct kobject *kobj, enum kobject_action action);
 
@@ -180,11 +184,12 @@ hand.
 
 
 Reference counts
+================
 
 One of the key functions of a kobject is to serve as a reference counter
 for the object in which it is embedded. As long as references to the object
 exist, the object (and the code which supports it) must continue to exist.
-The low-level functions for manipulating a kobject's reference counts are:
+The low-level functions for manipulating a kobject's reference counts are::
 
     struct kobject *kobject_get(struct kobject *kobj);
     void kobject_put(struct kobject *kobj);
@@ -209,21 +214,24 @@ file Documentation/kref.txt in the Linux kernel source tree.
 
 
 Creating "simple" kobjects
+==========================
 
 Sometimes all that a developer wants is a way to create a simple directory
 in the sysfs hierarchy, and not have to mess with the whole complication of
 ksets, show and store functions, and other details.  This is the one
 exception where a single kobject should be created.  To create such an
-entry, use the function:
+entry, use the function::
 
     struct kobject *kobject_create_and_add(char *name, struct kobject *parent);
 
 This function will create a kobject and place it in sysfs in the location
 underneath the specified parent kobject.  To create simple attributes
-associated with this kobject, use:
+associated with this kobject, use::
 
     int sysfs_create_file(struct kobject *kobj, struct attribute *attr);
-or
+
+or::
+
     int sysfs_create_group(struct kobject *kobj, struct attribute_group *grp);
 
 Both types of attributes used here, with a kobject that has been created
@@ -236,6 +244,7 @@ implementation of a simple kobject and attributes.
 
 
 ktypes and release methods
+==========================
 
 One important thing still missing from the discussion is what happens to a
 kobject when its reference count reaches zero. The code which created the
@@ -257,7 +266,7 @@ is good practice to always use kobject_put() after kobject_init() to avoid
 errors creeping in.
 
 This notification is done through a kobject's release() method. Usually
-such a method has a form like:
+such a method has a form like::
 
     void my_object_release(struct kobject *kobj)
     {
@@ -281,7 +290,7 @@ leak in the kobject core, which makes people unhappy.
 
 Interestingly, the release() method is not stored in the kobject itself;
 instead, it is associated with the ktype. So let us introduce struct
-kobj_type:
+kobj_type::
 
     struct kobj_type {
 	    void (*release)(struct kobject *kobj);
@@ -306,6 +315,7 @@ automatically created for any kobject that is registered with this ktype.
 
 
 ksets
+=====
 
 A kset is merely a collection of kobjects that want to be associated with
 each other.  There is no restriction that they be of the same ktype, but be
@@ -335,13 +345,16 @@ kobject) in their parent.
 
 As a kset contains a kobject within it, it should always be dynamically
 created and never declared statically or on the stack.  To create a new
-kset use:
+kset use::
+
   struct kset *kset_create_and_add(const char *name,
 				   struct kset_uevent_ops *u,
 				   struct kobject *parent);
 
-When you are finished with the kset, call:
+When you are finished with the kset, call::
+
   void kset_unregister(struct kset *kset);
+
 to destroy it.  This removes the kset from sysfs and decrements its reference
 count.  When the reference count goes to zero, the kset will be released.
 Because other references to the kset may still exist, the release may happen
@@ -351,14 +364,14 @@ An example of using a kset can be seen in the
 samples/kobject/kset-example.c file in the kernel tree.
 
 If a kset wishes to control the uevent operations of the kobjects
-associated with it, it can use the struct kset_uevent_ops to handle it:
+associated with it, it can use the struct kset_uevent_ops to handle it::
 
-struct kset_uevent_ops {
+  struct kset_uevent_ops {
         int (*filter)(struct kset *kset, struct kobject *kobj);
         const char *(*name)(struct kset *kset, struct kobject *kobj);
         int (*uevent)(struct kset *kset, struct kobject *kobj,
                       struct kobj_uevent_env *env);
-};
+  };
 
 
 The filter function allows a kset to prevent a uevent from being emitted to
@@ -386,6 +399,7 @@ added below the parent kobject.
 
 
 Kobject removal
+===============
 
 After a kobject has been registered with the kobject core successfully, it
 must be cleaned up when the code is finished with it.  To do that, call
@@ -409,6 +423,7 @@ called, and the objects in the former circle release each other.
 
 
 Example code to copy from
+=========================
 
 For a more complete example of using ksets and kobjects properly, see the
 example programs samples/kobject/{kobject-example.c,kset-example.c},
-- 
GitLab


From a1dac767622c001c72373534e557bd3dcb61434b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 16:51:34 -0300
Subject: [PATCH 1584/1958] kprobes.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- comment the contents;
- add proper markups for titles;
- mark literal blocks as such;
- use :Author: for authorship;
- use the right markups for footnotes;
- escape some literals that would otherwise cause problems;
- fix identation and add blank lines where needed.

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/kprobes.txt | 474 ++++++++++++++++++++++----------------
 1 file changed, 279 insertions(+), 195 deletions(-)

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 1f6d45abfe425..bb5ff6d04bac2 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -1,30 +1,36 @@
-Title	: Kernel Probes (Kprobes)
-Authors	: Jim Keniston <jkenisto@us.ibm.com>
-	: Prasanna S Panchamukhi <prasanna.panchamukhi@gmail.com>
-	: Masami Hiramatsu <mhiramat@redhat.com>
-
-CONTENTS
-
-1. Concepts: Kprobes, Jprobes, Return Probes
-2. Architectures Supported
-3. Configuring Kprobes
-4. API Reference
-5. Kprobes Features and Limitations
-6. Probe Overhead
-7. TODO
-8. Kprobes Example
-9. Jprobes Example
-10. Kretprobes Example
-Appendix A: The kprobes debugfs interface
-Appendix B: The kprobes sysctl interface
-
-1. Concepts: Kprobes, Jprobes, Return Probes
+=======================
+Kernel Probes (Kprobes)
+=======================
+
+:Author: Jim Keniston <jkenisto@us.ibm.com>
+:Author: Prasanna S Panchamukhi <prasanna.panchamukhi@gmail.com>
+:Author: Masami Hiramatsu <mhiramat@redhat.com>
+
+.. CONTENTS
+
+  1. Concepts: Kprobes, Jprobes, Return Probes
+  2. Architectures Supported
+  3. Configuring Kprobes
+  4. API Reference
+  5. Kprobes Features and Limitations
+  6. Probe Overhead
+  7. TODO
+  8. Kprobes Example
+  9. Jprobes Example
+  10. Kretprobes Example
+  Appendix A: The kprobes debugfs interface
+  Appendix B: The kprobes sysctl interface
+
+Concepts: Kprobes, Jprobes, Return Probes
+=========================================
 
 Kprobes enables you to dynamically break into any kernel routine and
 collect debugging and performance information non-disruptively. You
-can trap at almost any kernel code address(*), specifying a handler
+can trap at almost any kernel code address [1]_, specifying a handler
 routine to be invoked when the breakpoint is hit.
-(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist)
+
+.. [1] some parts of the kernel code can not be trapped, see
+       :ref:`kprobes_blacklist`)
 
 There are currently three types of probes: kprobes, jprobes, and
 kretprobes (also called return probes).  A kprobe can be inserted
@@ -40,8 +46,8 @@ registration function such as register_kprobe() specifies where
 the probe is to be inserted and what handler is to be called when
 the probe is hit.
 
-There are also register_/unregister_*probes() functions for batch
-registration/unregistration of a group of *probes. These functions
+There are also ``register_/unregister_*probes()`` functions for batch
+registration/unregistration of a group of ``*probes``. These functions
 can speed up unregistration process when you have to unregister
 a lot of probes at once.
 
@@ -51,9 +57,10 @@ things that you'll need to know in order to make the best use of
 Kprobes -- e.g., the difference between a pre_handler and
 a post_handler, and how to use the maxactive and nmissed fields of
 a kretprobe.  But if you're in a hurry to start using Kprobes, you
-can skip ahead to section 2.
+can skip ahead to :ref:`kprobes_archs_supported`.
 
-1.1 How Does a Kprobe Work?
+How Does a Kprobe Work?
+-----------------------
 
 When a kprobe is registered, Kprobes makes a copy of the probed
 instruction and replaces the first byte(s) of the probed instruction
@@ -75,7 +82,8 @@ After the instruction is single-stepped, Kprobes executes the
 "post_handler," if any, that is associated with the kprobe.
 Execution then continues with the instruction following the probepoint.
 
-1.2 How Does a Jprobe Work?
+How Does a Jprobe Work?
+-----------------------
 
 A jprobe is implemented using a kprobe that is placed on a function's
 entry point.  It employs a simple mirroring principle to allow
@@ -113,9 +121,11 @@ more than eight function arguments, an argument of more than sixteen
 bytes, or more than 64 bytes of argument data, depending on
 architecture).
 
-1.3 Return Probes
+Return Probes
+-------------
 
-1.3.1 How Does a Return Probe Work?
+How Does a Return Probe Work?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 When you call register_kretprobe(), Kprobes establishes a kprobe at
 the entry to the function.  When the probed function is called and this
@@ -150,7 +160,8 @@ zero when the return probe is registered, and is incremented every
 time the probed function is entered but there is no kretprobe_instance
 object available for establishing the return probe.
 
-1.3.2 Kretprobe entry-handler
+Kretprobe entry-handler
+^^^^^^^^^^^^^^^^^^^^^^^
 
 Kretprobes also provides an optional user-specified handler which runs
 on function entry. This handler is specified by setting the entry_handler
@@ -174,7 +185,10 @@ In case probed function is entered but there is no kretprobe_instance
 object available, then in addition to incrementing the nmissed count,
 the user entry_handler invocation is also skipped.
 
-1.4 How Does Jump Optimization Work?
+.. _kprobes_jump_optimization:
+
+How Does Jump Optimization Work?
+--------------------------------
 
 If your kernel is built with CONFIG_OPTPROBES=y (currently this flag
 is automatically set 'y' on x86/x86-64, non-preemptive kernel) and
@@ -182,53 +196,60 @@ the "debug.kprobes_optimization" kernel parameter is set to 1 (see
 sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump
 instruction instead of a breakpoint instruction at each probepoint.
 
-1.4.1 Init a Kprobe
+Init a Kprobe
+^^^^^^^^^^^^^
 
 When a probe is registered, before attempting this optimization,
 Kprobes inserts an ordinary, breakpoint-based kprobe at the specified
 address. So, even if it's not possible to optimize this particular
 probepoint, there'll be a probe there.
 
-1.4.2 Safety Check
+Safety Check
+^^^^^^^^^^^^
 
 Before optimizing a probe, Kprobes performs the following safety checks:
 
 - Kprobes verifies that the region that will be replaced by the jump
-instruction (the "optimized region") lies entirely within one function.
-(A jump instruction is multiple bytes, and so may overlay multiple
-instructions.)
+  instruction (the "optimized region") lies entirely within one function.
+  (A jump instruction is multiple bytes, and so may overlay multiple
+  instructions.)
 
 - Kprobes analyzes the entire function and verifies that there is no
-jump into the optimized region.  Specifically:
+  jump into the optimized region.  Specifically:
+
   - the function contains no indirect jump;
   - the function contains no instruction that causes an exception (since
-  the fixup code triggered by the exception could jump back into the
-  optimized region -- Kprobes checks the exception tables to verify this);
-  and
+    the fixup code triggered by the exception could jump back into the
+    optimized region -- Kprobes checks the exception tables to verify this);
   - there is no near jump to the optimized region (other than to the first
-  byte).
+    byte).
 
 - For each instruction in the optimized region, Kprobes verifies that
-the instruction can be executed out of line.
+  the instruction can be executed out of line.
 
-1.4.3 Preparing Detour Buffer
+Preparing Detour Buffer
+^^^^^^^^^^^^^^^^^^^^^^^
 
 Next, Kprobes prepares a "detour" buffer, which contains the following
 instruction sequence:
+
 - code to push the CPU's registers (emulating a breakpoint trap)
 - a call to the trampoline code which calls user's probe handlers.
 - code to restore registers
 - the instructions from the optimized region
 - a jump back to the original execution path.
 
-1.4.4 Pre-optimization
+Pre-optimization
+^^^^^^^^^^^^^^^^
 
 After preparing the detour buffer, Kprobes verifies that none of the
 following situations exist:
+
 - The probe has either a break_handler (i.e., it's a jprobe) or a
-post_handler.
+  post_handler.
 - Other instructions in the optimized region are probed.
 - The probe is disabled.
+
 In any of the above cases, Kprobes won't start optimizing the probe.
 Since these are temporary situations, Kprobes tries to start
 optimizing it again if the situation is changed.
@@ -240,21 +261,23 @@ Kprobes returns control to the original instruction path by setting
 the CPU's instruction pointer to the copied code in the detour buffer
 -- thus at least avoiding the single-step.
 
-1.4.5 Optimization
+Optimization
+^^^^^^^^^^^^
 
 The Kprobe-optimizer doesn't insert the jump instruction immediately;
 rather, it calls synchronize_sched() for safety first, because it's
 possible for a CPU to be interrupted in the middle of executing the
-optimized region(*).  As you know, synchronize_sched() can ensure
+optimized region [3]_.  As you know, synchronize_sched() can ensure
 that all interruptions that were active when synchronize_sched()
 was called are done, but only if CONFIG_PREEMPT=n.  So, this version
-of kprobe optimization supports only kernels with CONFIG_PREEMPT=n.(**)
+of kprobe optimization supports only kernels with CONFIG_PREEMPT=n [4]_.
 
 After that, the Kprobe-optimizer calls stop_machine() to replace
 the optimized region with a jump instruction to the detour buffer,
 using text_poke_smp().
 
-1.4.6 Unoptimization
+Unoptimization
+^^^^^^^^^^^^^^
 
 When an optimized kprobe is unregistered, disabled, or blocked by
 another kprobe, it will be unoptimized.  If this happens before
@@ -263,15 +286,15 @@ optimized list.  If the optimization has been done, the jump is
 replaced with the original code (except for an int3 breakpoint in
 the first byte) by using text_poke_smp().
 
-(*)Please imagine that the 2nd instruction is interrupted and then
-the optimizer replaces the 2nd instruction with the jump *address*
-while the interrupt handler is running. When the interrupt
-returns to original address, there is no valid instruction,
-and it causes an unexpected result.
+.. [3] Please imagine that the 2nd instruction is interrupted and then
+   the optimizer replaces the 2nd instruction with the jump *address*
+   while the interrupt handler is running. When the interrupt
+   returns to original address, there is no valid instruction,
+   and it causes an unexpected result.
 
-(**)This optimization-safety checking may be replaced with the
-stop-machine method that ksplice uses for supporting a CONFIG_PREEMPT=y
-kernel.
+.. [4] This optimization-safety checking may be replaced with the
+   stop-machine method that ksplice uses for supporting a CONFIG_PREEMPT=y
+   kernel.
 
 NOTE for geeks:
 The jump optimization changes the kprobe's pre_handler behavior.
@@ -280,11 +303,17 @@ path by changing regs->ip and returning 1.  However, when the probe
 is optimized, that modification is ignored.  Thus, if you want to
 tweak the kernel's execution path, you need to suppress optimization,
 using one of the following techniques:
+
 - Specify an empty function for the kprobe's post_handler or break_handler.
- or
+
+or
+
 - Execute 'sysctl -w debug.kprobes_optimization=n'
 
-1.5 Blacklist
+.. _kprobes_blacklist:
+
+Blacklist
+---------
 
 Kprobes can probe most of the kernel except itself. This means
 that there are some functions where kprobes cannot probe. Probing
@@ -297,7 +326,10 @@ to specify a blacklisted function.
 Kprobes checks the given probe address against the blacklist and
 rejects registering it, if the given address is in the blacklist.
 
-2. Architectures Supported
+.. _kprobes_archs_supported:
+
+Architectures Supported
+=======================
 
 Kprobes, jprobes, and return probes are implemented on the following
 architectures:
@@ -312,7 +344,8 @@ architectures:
 - mips
 - s390
 
-3. Configuring Kprobes
+Configuring Kprobes
+===================
 
 When configuring the kernel using make menuconfig/xconfig/oldconfig,
 ensure that CONFIG_KPROBES is set to "y". Under "General setup", look
@@ -331,7 +364,8 @@ it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO),
 so you can use "objdump -d -l vmlinux" to see the source-to-object
 code mapping.
 
-4. API Reference
+API Reference
+=============
 
 The Kprobes API includes a "register" function and an "unregister"
 function for each type of probe. The API also includes "register_*probes"
@@ -340,10 +374,13 @@ Here are terse, mini-man-page specifications for these functions and
 the associated probe handlers that you'll write. See the files in the
 samples/kprobes/ sub-directory for examples.
 
-4.1 register_kprobe
+register_kprobe
+---------------
 
-#include <linux/kprobes.h>
-int register_kprobe(struct kprobe *kp);
+::
+
+	#include <linux/kprobes.h>
+	int register_kprobe(struct kprobe *kp);
 
 Sets a breakpoint at the address kp->addr.  When the breakpoint is
 hit, Kprobes calls kp->pre_handler.  After the probed instruction
@@ -354,61 +391,68 @@ kp->fault_handler.  Any or all handlers can be NULL. If kp->flags
 is set KPROBE_FLAG_DISABLED, that kp will be registered but disabled,
 so, its handlers aren't hit until calling enable_kprobe(kp).
 
-NOTE:
-1. With the introduction of the "symbol_name" field to struct kprobe,
-the probepoint address resolution will now be taken care of by the kernel.
-The following will now work:
+.. note::
+
+   1. With the introduction of the "symbol_name" field to struct kprobe,
+      the probepoint address resolution will now be taken care of by the kernel.
+      The following will now work::
 
 	kp.symbol_name = "symbol_name";
 
-(64-bit powerpc intricacies such as function descriptors are handled
-transparently)
+      (64-bit powerpc intricacies such as function descriptors are handled
+      transparently)
 
-2. Use the "offset" field of struct kprobe if the offset into the symbol
-to install a probepoint is known. This field is used to calculate the
-probepoint.
+   2. Use the "offset" field of struct kprobe if the offset into the symbol
+      to install a probepoint is known. This field is used to calculate the
+      probepoint.
 
-3. Specify either the kprobe "symbol_name" OR the "addr". If both are
-specified, kprobe registration will fail with -EINVAL.
+   3. Specify either the kprobe "symbol_name" OR the "addr". If both are
+      specified, kprobe registration will fail with -EINVAL.
 
-4. With CISC architectures (such as i386 and x86_64), the kprobes code
-does not validate if the kprobe.addr is at an instruction boundary.
-Use "offset" with caution.
+   4. With CISC architectures (such as i386 and x86_64), the kprobes code
+      does not validate if the kprobe.addr is at an instruction boundary.
+      Use "offset" with caution.
 
 register_kprobe() returns 0 on success, or a negative errno otherwise.
 
-User's pre-handler (kp->pre_handler):
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-int pre_handler(struct kprobe *p, struct pt_regs *regs);
+User's pre-handler (kp->pre_handler)::
+
+	#include <linux/kprobes.h>
+	#include <linux/ptrace.h>
+	int pre_handler(struct kprobe *p, struct pt_regs *regs);
 
 Called with p pointing to the kprobe associated with the breakpoint,
 and regs pointing to the struct containing the registers saved when
 the breakpoint was hit.  Return 0 here unless you're a Kprobes geek.
 
-User's post-handler (kp->post_handler):
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-void post_handler(struct kprobe *p, struct pt_regs *regs,
-	unsigned long flags);
+User's post-handler (kp->post_handler)::
+
+	#include <linux/kprobes.h>
+	#include <linux/ptrace.h>
+	void post_handler(struct kprobe *p, struct pt_regs *regs,
+			  unsigned long flags);
 
 p and regs are as described for the pre_handler.  flags always seems
 to be zero.
 
-User's fault-handler (kp->fault_handler):
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-int fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr);
+User's fault-handler (kp->fault_handler)::
+
+	#include <linux/kprobes.h>
+	#include <linux/ptrace.h>
+	int fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr);
 
 p and regs are as described for the pre_handler.  trapnr is the
 architecture-specific trap number associated with the fault (e.g.,
 on i386, 13 for a general protection fault or 14 for a page fault).
 Returns 1 if it successfully handled the exception.
 
-4.2 register_jprobe
+register_jprobe
+---------------
+
+::
 
-#include <linux/kprobes.h>
-int register_jprobe(struct jprobe *jp)
+	#include <linux/kprobes.h>
+	int register_jprobe(struct jprobe *jp)
 
 Sets a breakpoint at the address jp->kp.addr, which must be the address
 of the first instruction of a function.  When the breakpoint is hit,
@@ -423,10 +467,13 @@ declaration must match.
 
 register_jprobe() returns 0 on success, or a negative errno otherwise.
 
-4.3 register_kretprobe
+register_kretprobe
+------------------
 
-#include <linux/kprobes.h>
-int register_kretprobe(struct kretprobe *rp);
+::
+
+	#include <linux/kprobes.h>
+	int register_kretprobe(struct kretprobe *rp);
 
 Establishes a return probe for the function whose address is
 rp->kp.addr.  When that function returns, Kprobes calls rp->handler.
@@ -436,14 +483,17 @@ register_kretprobe(); see "How Does a Return Probe Work?" for details.
 register_kretprobe() returns 0 on success, or a negative errno
 otherwise.
 
-User's return-probe handler (rp->handler):
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-int kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs);
+User's return-probe handler (rp->handler)::
+
+	#include <linux/kprobes.h>
+	#include <linux/ptrace.h>
+	int kretprobe_handler(struct kretprobe_instance *ri,
+			      struct pt_regs *regs);
 
 regs is as described for kprobe.pre_handler.  ri points to the
 kretprobe_instance object, of which the following fields may be
 of interest:
+
 - ret_addr: the return address
 - rp: points to the corresponding kretprobe object
 - task: points to the corresponding task struct
@@ -456,74 +506,94 @@ the architecture's ABI.
 
 The handler's return value is currently ignored.
 
-4.4 unregister_*probe
+unregister_*probe
+------------------
 
-#include <linux/kprobes.h>
-void unregister_kprobe(struct kprobe *kp);
-void unregister_jprobe(struct jprobe *jp);
-void unregister_kretprobe(struct kretprobe *rp);
+::
+
+	#include <linux/kprobes.h>
+	void unregister_kprobe(struct kprobe *kp);
+	void unregister_jprobe(struct jprobe *jp);
+	void unregister_kretprobe(struct kretprobe *rp);
 
 Removes the specified probe.  The unregister function can be called
 at any time after the probe has been registered.
 
-NOTE:
-If the functions find an incorrect probe (ex. an unregistered probe),
-they clear the addr field of the probe.
+.. note::
+
+   If the functions find an incorrect probe (ex. an unregistered probe),
+   they clear the addr field of the probe.
 
-4.5 register_*probes
+register_*probes
+----------------
 
-#include <linux/kprobes.h>
-int register_kprobes(struct kprobe **kps, int num);
-int register_kretprobes(struct kretprobe **rps, int num);
-int register_jprobes(struct jprobe **jps, int num);
+::
+
+	#include <linux/kprobes.h>
+	int register_kprobes(struct kprobe **kps, int num);
+	int register_kretprobes(struct kretprobe **rps, int num);
+	int register_jprobes(struct jprobe **jps, int num);
 
 Registers each of the num probes in the specified array.  If any
 error occurs during registration, all probes in the array, up to
 the bad probe, are safely unregistered before the register_*probes
 function returns.
-- kps/rps/jps: an array of pointers to *probe data structures
+
+- kps/rps/jps: an array of pointers to ``*probe`` data structures
 - num: the number of the array entries.
 
-NOTE:
-You have to allocate(or define) an array of pointers and set all
-of the array entries before using these functions.
+.. note::
+
+   You have to allocate(or define) an array of pointers and set all
+   of the array entries before using these functions.
+
+unregister_*probes
+------------------
 
-4.6 unregister_*probes
+::
 
-#include <linux/kprobes.h>
-void unregister_kprobes(struct kprobe **kps, int num);
-void unregister_kretprobes(struct kretprobe **rps, int num);
-void unregister_jprobes(struct jprobe **jps, int num);
+	#include <linux/kprobes.h>
+	void unregister_kprobes(struct kprobe **kps, int num);
+	void unregister_kretprobes(struct kretprobe **rps, int num);
+	void unregister_jprobes(struct jprobe **jps, int num);
 
 Removes each of the num probes in the specified array at once.
 
-NOTE:
-If the functions find some incorrect probes (ex. unregistered
-probes) in the specified array, they clear the addr field of those
-incorrect probes. However, other probes in the array are
-unregistered correctly.
+.. note::
 
-4.7 disable_*probe
+   If the functions find some incorrect probes (ex. unregistered
+   probes) in the specified array, they clear the addr field of those
+   incorrect probes. However, other probes in the array are
+   unregistered correctly.
 
-#include <linux/kprobes.h>
-int disable_kprobe(struct kprobe *kp);
-int disable_kretprobe(struct kretprobe *rp);
-int disable_jprobe(struct jprobe *jp);
+disable_*probe
+--------------
 
-Temporarily disables the specified *probe. You can enable it again by using
+::
+
+	#include <linux/kprobes.h>
+	int disable_kprobe(struct kprobe *kp);
+	int disable_kretprobe(struct kretprobe *rp);
+	int disable_jprobe(struct jprobe *jp);
+
+Temporarily disables the specified ``*probe``. You can enable it again by using
 enable_*probe(). You must specify the probe which has been registered.
 
-4.8 enable_*probe
+enable_*probe
+-------------
 
-#include <linux/kprobes.h>
-int enable_kprobe(struct kprobe *kp);
-int enable_kretprobe(struct kretprobe *rp);
-int enable_jprobe(struct jprobe *jp);
+::
 
-Enables *probe which has been disabled by disable_*probe(). You must specify
+	#include <linux/kprobes.h>
+	int enable_kprobe(struct kprobe *kp);
+	int enable_kretprobe(struct kretprobe *rp);
+	int enable_jprobe(struct jprobe *jp);
+
+Enables ``*probe`` which has been disabled by disable_*probe(). You must specify
 the probe which has been registered.
 
-5. Kprobes Features and Limitations
+Kprobes Features and Limitations
+================================
 
 Kprobes allows multiple probes at the same address.  Currently,
 however, there cannot be multiple jprobes on the same function at
@@ -538,7 +608,7 @@ are discussed in this section.
 
 The register_*probe functions will return -EINVAL if you attempt
 to install a probe in the code that implements Kprobes (mostly
-kernel/kprobes.c and arch/*/kernel/kprobes.c, but also functions such
+kernel/kprobes.c and ``arch/*/kernel/kprobes.c``, but also functions such
 as do_page_fault and notifier_call_chain).
 
 If you install a probe in an inline-able function, Kprobes makes
@@ -602,19 +672,21 @@ explain it, we introduce some terminology. Imagine a 3-instruction
 sequence consisting of a two 2-byte instructions and one 3-byte
 instruction.
 
-        IA
-         |
-[-2][-1][0][1][2][3][4][5][6][7]
-        [ins1][ins2][  ins3 ]
-	[<-     DCR       ->]
-	   [<- JTPR ->]
+::
+
+		IA
+		|
+	[-2][-1][0][1][2][3][4][5][6][7]
+		[ins1][ins2][  ins3 ]
+		[<-     DCR       ->]
+		[<- JTPR ->]
 
-ins1: 1st Instruction
-ins2: 2nd Instruction
-ins3: 3rd Instruction
-IA:  Insertion Address
-JTPR: Jump Target Prohibition Region
-DCR: Detoured Code Region
+	ins1: 1st Instruction
+	ins2: 2nd Instruction
+	ins3: 3rd Instruction
+	IA:  Insertion Address
+	JTPR: Jump Target Prohibition Region
+	DCR: Detoured Code Region
 
 The instructions in DCR are copied to the out-of-line buffer
 of the kprobe, because the bytes in DCR are replaced by
@@ -628,7 +700,8 @@ d) DCR must not straddle the border between functions.
 Anyway, these limitations are checked by the in-kernel instruction
 decoder, so you don't need to worry about that.
 
-6. Probe Overhead
+Probe Overhead
+==============
 
 On a typical CPU in use in 2005, a kprobe hit takes 0.5 to 1.0
 microseconds to process.  Specifically, a benchmark that hits the same
@@ -638,70 +711,80 @@ return-probe hit typically takes 50-75% longer than a kprobe hit.
 When you have a return probe set on a function, adding a kprobe at
 the entry to that function adds essentially no overhead.
 
-Here are sample overhead figures (in usec) for different architectures.
-k = kprobe; j = jprobe; r = return probe; kr = kprobe + return probe
-on same function; jr = jprobe + return probe on same function
+Here are sample overhead figures (in usec) for different architectures::
 
-i386: Intel Pentium M, 1495 MHz, 2957.31 bogomips
-k = 0.57 usec; j = 1.00; r = 0.92; kr = 0.99; jr = 1.40
+  k = kprobe; j = jprobe; r = return probe; kr = kprobe + return probe
+  on same function; jr = jprobe + return probe on same function::
 
-x86_64: AMD Opteron 246, 1994 MHz, 3971.48 bogomips
-k = 0.49 usec; j = 0.76; r = 0.80; kr = 0.82; jr = 1.07
+  i386: Intel Pentium M, 1495 MHz, 2957.31 bogomips
+  k = 0.57 usec; j = 1.00; r = 0.92; kr = 0.99; jr = 1.40
 
-ppc64: POWER5 (gr), 1656 MHz (SMT disabled, 1 virtual CPU per physical CPU)
-k = 0.77 usec; j = 1.31; r = 1.26; kr = 1.45; jr = 1.99
+  x86_64: AMD Opteron 246, 1994 MHz, 3971.48 bogomips
+  k = 0.49 usec; j = 0.76; r = 0.80; kr = 0.82; jr = 1.07
 
-6.1 Optimized Probe Overhead
+  ppc64: POWER5 (gr), 1656 MHz (SMT disabled, 1 virtual CPU per physical CPU)
+  k = 0.77 usec; j = 1.31; r = 1.26; kr = 1.45; jr = 1.99
+
+Optimized Probe Overhead
+------------------------
 
 Typically, an optimized kprobe hit takes 0.07 to 0.1 microseconds to
-process. Here are sample overhead figures (in usec) for x86 architectures.
-k = unoptimized kprobe, b = boosted (single-step skipped), o = optimized kprobe,
-r = unoptimized kretprobe, rb = boosted kretprobe, ro = optimized kretprobe.
+process. Here are sample overhead figures (in usec) for x86 architectures::
+
+  k = unoptimized kprobe, b = boosted (single-step skipped), o = optimized kprobe,
+  r = unoptimized kretprobe, rb = boosted kretprobe, ro = optimized kretprobe.
 
-i386: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
-k = 0.80 usec; b = 0.33; o = 0.05; r = 1.10; rb = 0.61; ro = 0.33
+  i386: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
+  k = 0.80 usec; b = 0.33; o = 0.05; r = 1.10; rb = 0.61; ro = 0.33
 
-x86-64: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
-k = 0.99 usec; b = 0.43; o = 0.06; r = 1.24; rb = 0.68; ro = 0.30
+  x86-64: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
+  k = 0.99 usec; b = 0.43; o = 0.06; r = 1.24; rb = 0.68; ro = 0.30
 
-7. TODO
+TODO
+====
 
 a. SystemTap (http://sourceware.org/systemtap): Provides a simplified
-programming interface for probe-based instrumentation.  Try it out.
+   programming interface for probe-based instrumentation.  Try it out.
 b. Kernel return probes for sparc64.
 c. Support for other architectures.
 d. User-space probes.
 e. Watchpoint probes (which fire on data references).
 
-8. Kprobes Example
+Kprobes Example
+===============
 
 See samples/kprobes/kprobe_example.c
 
-9. Jprobes Example
+Jprobes Example
+===============
 
 See samples/kprobes/jprobe_example.c
 
-10. Kretprobes Example
+Kretprobes Example
+==================
 
 See samples/kprobes/kretprobe_example.c
 
 For additional information on Kprobes, refer to the following URLs:
-http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
-http://www.redhat.com/magazine/005mar05/features/kprobes/
-http://www-users.cs.umn.edu/~boutcher/kprobes/
-http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
 
+- http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
+- http://www.redhat.com/magazine/005mar05/features/kprobes/
+- http://www-users.cs.umn.edu/~boutcher/kprobes/
+- http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
+
+
+The kprobes debugfs interface
+=============================
 
-Appendix A: The kprobes debugfs interface
 
 With recent kernels (> 2.6.20) the list of registered kprobes is visible
 under the /sys/kernel/debug/kprobes/ directory (assuming debugfs is mounted at //sys/kernel/debug).
 
-/sys/kernel/debug/kprobes/list: Lists all registered probes on the system
+/sys/kernel/debug/kprobes/list: Lists all registered probes on the system::
 
-c015d71a  k  vfs_read+0x0
-c011a316  j  do_fork+0x0
-c03dedc5  r  tcp_v4_rcv+0x0
+	c015d71a  k  vfs_read+0x0
+	c011a316  j  do_fork+0x0
+	c03dedc5  r  tcp_v4_rcv+0x0
 
 The first column provides the kernel address where the probe is inserted.
 The second column identifies the type of probe (k - kprobe, r - kretprobe
@@ -725,17 +808,18 @@ change each probe's disabling state. This means that disabled kprobes (marked
 [DISABLED]) will be not enabled if you turn ON all kprobes by this knob.
 
 
-Appendix B: The kprobes sysctl interface
+The kprobes sysctl interface
+============================
 
 /proc/sys/debug/kprobes-optimization: Turn kprobes optimization ON/OFF.
 
 When CONFIG_OPTPROBES=y, this sysctl interface appears and it provides
 a knob to globally and forcibly turn jump optimization (see section
-1.4) ON or OFF. By default, jump optimization is allowed (ON).
-If you echo "0" to this file or set "debug.kprobes_optimization" to
-0 via sysctl, all optimized probes will be unoptimized, and any new
-probes registered after that will not be optimized.  Note that this
-knob *changes* the optimized state. This means that optimized probes
-(marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be
+:ref:`kprobes_jump_optimization`) ON or OFF. By default, jump optimization
+is allowed (ON). If you echo "0" to this file or set
+"debug.kprobes_optimization" to 0 via sysctl, all optimized probes will be
+unoptimized, and any new probes registered after that will not be optimized.
+ Note that this knob *changes* the optimized state. This means that optimized
+probes (marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be
 removed). If the knob is turned on, they will be optimized again.
 
-- 
GitLab


From d6ac1c7e2fa3bfe20ac078cdc49104babf015ca2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 17:13:21 -0300
Subject: [PATCH 1585/1958] kref.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- add a title for the document and section titles;
- move authorship information to the beginning and use
  :Author:
- mark literal blocks as such and ident them if needed.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/kref.txt | 295 ++++++++++++++++++++++-------------------
 1 file changed, 155 insertions(+), 140 deletions(-)

diff --git a/Documentation/kref.txt b/Documentation/kref.txt
index d26a27ca964d4..3af384156d7e0 100644
--- a/Documentation/kref.txt
+++ b/Documentation/kref.txt
@@ -1,24 +1,42 @@
+===================================================
+Adding reference counters (krefs) to kernel objects
+===================================================
+
+:Author: Corey Minyard <minyard@acm.org>
+:Author: Thomas Hellstrom <thellstrom@vmware.com>
+
+A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
+presentation on krefs, which can be found at:
+
+  - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
+  - http://www.kroah.com/linux/talks/ols_2004_kref_talk/
+
+Introduction
+============
 
 krefs allow you to add reference counters to your objects.  If you
 have objects that are used in multiple places and passed around, and
 you don't have refcounts, your code is almost certainly broken.  If
 you want refcounts, krefs are the way to go.
 
-To use a kref, add one to your data structures like:
+To use a kref, add one to your data structures like::
 
-struct my_data
-{
+    struct my_data
+    {
 	.
 	.
 	struct kref refcount;
 	.
 	.
-};
+    };
 
 The kref can occur anywhere within the data structure.
 
+Initialization
+==============
+
 You must initialize the kref after you allocate it.  To do this, call
-kref_init as so:
+kref_init as so::
 
      struct my_data *data;
 
@@ -29,18 +47,25 @@ kref_init as so:
 
 This sets the refcount in the kref to 1.
 
+Kref rules
+==========
+
 Once you have an initialized kref, you must follow the following
 rules:
 
 1) If you make a non-temporary copy of a pointer, especially if
    it can be passed to another thread of execution, you must
-   increment the refcount with kref_get() before passing it off:
+   increment the refcount with kref_get() before passing it off::
+
        kref_get(&data->refcount);
+
    If you already have a valid pointer to a kref-ed structure (the
    refcount cannot go to zero) you may do this without a lock.
 
-2) When you are done with a pointer, you must call kref_put():
+2) When you are done with a pointer, you must call kref_put()::
+
        kref_put(&data->refcount, data_release);
+
    If this is the last reference to the pointer, the release
    routine will be called.  If the code never tries to get
    a valid pointer to a kref-ed structure without already
@@ -53,25 +78,25 @@ rules:
    structure must remain valid during the kref_get().
 
 For example, if you allocate some data and then pass it to another
-thread to process:
+thread to process::
 
-void data_release(struct kref *ref)
-{
+    void data_release(struct kref *ref)
+    {
 	struct my_data *data = container_of(ref, struct my_data, refcount);
 	kfree(data);
-}
+    }
 
-void more_data_handling(void *cb_data)
-{
+    void more_data_handling(void *cb_data)
+    {
 	struct my_data *data = cb_data;
 	.
 	. do stuff with data here
 	.
 	kref_put(&data->refcount, data_release);
-}
+    }
 
-int my_data_handler(void)
-{
+    int my_data_handler(void)
+    {
 	int rv = 0;
 	struct my_data *data;
 	struct task_struct *task;
@@ -91,10 +116,10 @@ int my_data_handler(void)
 	.
 	. do stuff with data here
 	.
- out:
+    out:
 	kref_put(&data->refcount, data_release);
 	return rv;
-}
+    }
 
 This way, it doesn't matter what order the two threads handle the
 data, the kref_put() handles knowing when the data is not referenced
@@ -104,7 +129,7 @@ put needs no lock because nothing tries to get the data without
 already holding a pointer.
 
 Note that the "before" in rule 1 is very important.  You should never
-do something like:
+do something like::
 
 	task = kthread_run(more_data_handling, data, "more_data_handling");
 	if (task == ERR_PTR(-ENOMEM)) {
@@ -124,14 +149,14 @@ bad style.  Don't do it.
 There are some situations where you can optimize the gets and puts.
 For instance, if you are done with an object and enqueuing it for
 something else or passing it off to something else, there is no reason
-to do a get then a put:
+to do a get then a put::
 
 	/* Silly extra get and put */
 	kref_get(&obj->ref);
 	enqueue(obj);
 	kref_put(&obj->ref, obj_cleanup);
 
-Just do the enqueue.  A comment about this is always welcome:
+Just do the enqueue.  A comment about this is always welcome::
 
 	enqueue(obj);
 	/* We are done with obj, so we pass our refcount off
@@ -142,109 +167,99 @@ instance, you have a list of items that are each kref-ed, and you wish
 to get the first one.  You can't just pull the first item off the list
 and kref_get() it.  That violates rule 3 because you are not already
 holding a valid pointer.  You must add a mutex (or some other lock).
-For instance:
-
-static DEFINE_MUTEX(mutex);
-static LIST_HEAD(q);
-struct my_data
-{
-	struct kref      refcount;
-	struct list_head link;
-};
-
-static struct my_data *get_entry()
-{
-	struct my_data *entry = NULL;
-	mutex_lock(&mutex);
-	if (!list_empty(&q)) {
-		entry = container_of(q.next, struct my_data, link);
-		kref_get(&entry->refcount);
+For instance::
+
+	static DEFINE_MUTEX(mutex);
+	static LIST_HEAD(q);
+	struct my_data
+	{
+		struct kref      refcount;
+		struct list_head link;
+	};
+
+	static struct my_data *get_entry()
+	{
+		struct my_data *entry = NULL;
+		mutex_lock(&mutex);
+		if (!list_empty(&q)) {
+			entry = container_of(q.next, struct my_data, link);
+			kref_get(&entry->refcount);
+		}
+		mutex_unlock(&mutex);
+		return entry;
 	}
-	mutex_unlock(&mutex);
-	return entry;
-}
 
-static void release_entry(struct kref *ref)
-{
-	struct my_data *entry = container_of(ref, struct my_data, refcount);
+	static void release_entry(struct kref *ref)
+	{
+		struct my_data *entry = container_of(ref, struct my_data, refcount);
 
-	list_del(&entry->link);
-	kfree(entry);
-}
+		list_del(&entry->link);
+		kfree(entry);
+	}
 
-static void put_entry(struct my_data *entry)
-{
-	mutex_lock(&mutex);
-	kref_put(&entry->refcount, release_entry);
-	mutex_unlock(&mutex);
-}
+	static void put_entry(struct my_data *entry)
+	{
+		mutex_lock(&mutex);
+		kref_put(&entry->refcount, release_entry);
+		mutex_unlock(&mutex);
+	}
 
 The kref_put() return value is useful if you do not want to hold the
 lock during the whole release operation.  Say you didn't want to call
 kfree() with the lock held in the example above (since it is kind of
-pointless to do so).  You could use kref_put() as follows:
+pointless to do so).  You could use kref_put() as follows::
 
-static void release_entry(struct kref *ref)
-{
-	/* All work is done after the return from kref_put(). */
-}
+	static void release_entry(struct kref *ref)
+	{
+		/* All work is done after the return from kref_put(). */
+	}
 
-static void put_entry(struct my_data *entry)
-{
-	mutex_lock(&mutex);
-	if (kref_put(&entry->refcount, release_entry)) {
-		list_del(&entry->link);
-		mutex_unlock(&mutex);
-		kfree(entry);
-	} else
-		mutex_unlock(&mutex);
-}
+	static void put_entry(struct my_data *entry)
+	{
+		mutex_lock(&mutex);
+		if (kref_put(&entry->refcount, release_entry)) {
+			list_del(&entry->link);
+			mutex_unlock(&mutex);
+			kfree(entry);
+		} else
+			mutex_unlock(&mutex);
+	}
 
 This is really more useful if you have to call other routines as part
 of the free operations that could take a long time or might claim the
 same lock.  Note that doing everything in the release routine is still
 preferred as it is a little neater.
 
-
-Corey Minyard <minyard@acm.org>
-
-A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
-presentation on krefs, which can be found at:
-  http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
-and:
-  http://www.kroah.com/linux/talks/ols_2004_kref_talk/
-
-
 The above example could also be optimized using kref_get_unless_zero() in
-the following way:
-
-static struct my_data *get_entry()
-{
-	struct my_data *entry = NULL;
-	mutex_lock(&mutex);
-	if (!list_empty(&q)) {
-		entry = container_of(q.next, struct my_data, link);
-		if (!kref_get_unless_zero(&entry->refcount))
-			entry = NULL;
+the following way::
+
+	static struct my_data *get_entry()
+	{
+		struct my_data *entry = NULL;
+		mutex_lock(&mutex);
+		if (!list_empty(&q)) {
+			entry = container_of(q.next, struct my_data, link);
+			if (!kref_get_unless_zero(&entry->refcount))
+				entry = NULL;
+		}
+		mutex_unlock(&mutex);
+		return entry;
 	}
-	mutex_unlock(&mutex);
-	return entry;
-}
 
-static void release_entry(struct kref *ref)
-{
-	struct my_data *entry = container_of(ref, struct my_data, refcount);
+	static void release_entry(struct kref *ref)
+	{
+		struct my_data *entry = container_of(ref, struct my_data, refcount);
 
-	mutex_lock(&mutex);
-	list_del(&entry->link);
-	mutex_unlock(&mutex);
-	kfree(entry);
-}
+		mutex_lock(&mutex);
+		list_del(&entry->link);
+		mutex_unlock(&mutex);
+		kfree(entry);
+	}
 
-static void put_entry(struct my_data *entry)
-{
-	kref_put(&entry->refcount, release_entry);
-}
+	static void put_entry(struct my_data *entry)
+	{
+		kref_put(&entry->refcount, release_entry);
+	}
 
 Which is useful to remove the mutex lock around kref_put() in put_entry(), but
 it's important that kref_get_unless_zero is enclosed in the same critical
@@ -254,51 +269,51 @@ Note that it is illegal to use kref_get_unless_zero without checking its
 return value. If you are sure (by already having a valid pointer) that
 kref_get_unless_zero() will return true, then use kref_get() instead.
 
-The function kref_get_unless_zero also makes it possible to use rcu
-locking for lookups in the above example:
+Krefs and RCU
+=============
 
-struct my_data
-{
-	struct rcu_head rhead;
-	.
-	struct kref refcount;
-	.
-	.
-};
-
-static struct my_data *get_entry_rcu()
-{
-	struct my_data *entry = NULL;
-	rcu_read_lock();
-	if (!list_empty(&q)) {
-		entry = container_of(q.next, struct my_data, link);
-		if (!kref_get_unless_zero(&entry->refcount))
-			entry = NULL;
+The function kref_get_unless_zero also makes it possible to use rcu
+locking for lookups in the above example::
+
+	struct my_data
+	{
+		struct rcu_head rhead;
+		.
+		struct kref refcount;
+		.
+		.
+	};
+
+	static struct my_data *get_entry_rcu()
+	{
+		struct my_data *entry = NULL;
+		rcu_read_lock();
+		if (!list_empty(&q)) {
+			entry = container_of(q.next, struct my_data, link);
+			if (!kref_get_unless_zero(&entry->refcount))
+				entry = NULL;
+		}
+		rcu_read_unlock();
+		return entry;
 	}
-	rcu_read_unlock();
-	return entry;
-}
 
-static void release_entry_rcu(struct kref *ref)
-{
-	struct my_data *entry = container_of(ref, struct my_data, refcount);
+	static void release_entry_rcu(struct kref *ref)
+	{
+		struct my_data *entry = container_of(ref, struct my_data, refcount);
 
-	mutex_lock(&mutex);
-	list_del_rcu(&entry->link);
-	mutex_unlock(&mutex);
-	kfree_rcu(entry, rhead);
-}
+		mutex_lock(&mutex);
+		list_del_rcu(&entry->link);
+		mutex_unlock(&mutex);
+		kfree_rcu(entry, rhead);
+	}
 
-static void put_entry(struct my_data *entry)
-{
-	kref_put(&entry->refcount, release_entry_rcu);
-}
+	static void put_entry(struct my_data *entry)
+	{
+		kref_put(&entry->refcount, release_entry_rcu);
+	}
 
 But note that the struct kref member needs to remain in valid memory for a
 rcu grace period after release_entry_rcu was called. That can be accomplished
 by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
 before using kfree, but note that synchronize_rcu() may sleep for a
 substantial amount of time.
-
-
-Thomas Hellstrom <thellstrom@vmware.com>
-- 
GitLab


From 793c6382026ef66edd1799d3a82e7e46a9fec94a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 17:39:55 -0300
Subject: [PATCH 1586/1958] ldm.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Reformat its title;
- Use :Author: and :Last Updated: for authorship
- Use note markup;
- Reformat table to match ReST standard;
- Use bulleted lists where needed.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ldm.txt | 56 ++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/Documentation/ldm.txt b/Documentation/ldm.txt
index 4f80edd14d0a6..12c571368e73c 100644
--- a/Documentation/ldm.txt
+++ b/Documentation/ldm.txt
@@ -1,9 +1,9 @@
+==========================================
+LDM - Logical Disk Manager (Dynamic Disks)
+==========================================
 
-            LDM - Logical Disk Manager (Dynamic Disks)
-            ------------------------------------------
-
-Originally Written by FlatCap - Richard Russon <ldm@flatcap.org>.
-Last Updated by Anton Altaparmakov on 30 March 2007 for Windows Vista.
+:Author: Originally Written by FlatCap - Richard Russon <ldm@flatcap.org>.
+:Last Updated: Anton Altaparmakov on 30 March 2007 for Windows Vista.
 
 Overview
 --------
@@ -37,24 +37,36 @@ Example
 -------
 
 Below we have a 50MiB disk, divided into seven partitions.
-N.B.  The missing 1MiB at the end of the disk is where the LDM database is
-      stored.
-
-  Device | Offset Bytes  Sectors  MiB | Size   Bytes  Sectors  MiB
-  -------+----------------------------+---------------------------
-  hda    |            0        0    0 |     52428800   102400   50
-  hda1   |     51380224   100352   49 |      1048576     2048    1
-  hda2   |        16384       32    0 |      6979584    13632    6
-  hda3   |      6995968    13664    6 |     10485760    20480   10
-  hda4   |     17481728    34144   16 |      4194304     8192    4
-  hda5   |     21676032    42336   20 |      5242880    10240    5
-  hda6   |     26918912    52576   25 |     10485760    20480   10
-  hda7   |     37404672    73056   35 |     13959168    27264   13
+
+.. note::
+
+   The missing 1MiB at the end of the disk is where the LDM database is
+   stored.
+
++-------++--------------+---------+-----++--------------+---------+----+
+|Device || Offset Bytes | Sectors | MiB || Size   Bytes | Sectors | MiB|
++=======++==============+=========+=====++==============+=========+====+
+|hda    ||            0 |       0 |   0 ||     52428800 |  102400 |  50|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda1   ||     51380224 |  100352 |  49 ||      1048576 |    2048 |   1|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda2   ||        16384 |      32 |   0 ||      6979584 |   13632 |   6|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda3   ||      6995968 |   13664 |   6 ||     10485760 |   20480 |  10|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda4   ||     17481728 |   34144 |  16 ||      4194304 |    8192 |   4|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda5   ||     21676032 |   42336 |  20 ||      5242880 |   10240 |   5|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda6   ||     26918912 |   52576 |  25 ||     10485760 |   20480 |  10|
++-------++--------------+---------+-----++--------------+---------+----+
+|hda7   ||     37404672 |   73056 |  35 ||     13959168 |   27264 |  13|
++-------++--------------+---------+-----++--------------+---------+----+
 
 The LDM Database may not store the partitions in the order that they appear on
 disk, but the driver will sort them.
 
-When Linux boots, you will see something like:
+When Linux boots, you will see something like::
 
   hda: 102400 sectors w/32KiB Cache, CHS=50/64/32
   hda: [LDM] hda1 hda2 hda3 hda4 hda5 hda6 hda7
@@ -65,13 +77,13 @@ Compiling LDM Support
 
 To enable LDM, choose the following two options: 
 
-  "Advanced partition selection" CONFIG_PARTITION_ADVANCED
-  "Windows Logical Disk Manager (Dynamic Disk) support" CONFIG_LDM_PARTITION
+  - "Advanced partition selection" CONFIG_PARTITION_ADVANCED
+  - "Windows Logical Disk Manager (Dynamic Disk) support" CONFIG_LDM_PARTITION
 
 If you believe the driver isn't working as it should, you can enable the extra
 debugging code.  This will produce a LOT of output.  The option is:
 
-  "Windows LDM extra logging" CONFIG_LDM_DEBUG
+  - "Windows LDM extra logging" CONFIG_LDM_DEBUG
 
 N.B. The partition code cannot be compiled as a module.
 
-- 
GitLab


From c926d4d4a4c815bc43155c542a3113afb5832620 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 20:57:08 -0300
Subject: [PATCH 1587/1958] lockup-watchdogs.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx.

This file is almost at ReST format. Just one title needs
to be adjusted, in order to follow the standard.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/lockup-watchdogs.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
index c8b8378513d6e..290840c160af2 100644
--- a/Documentation/lockup-watchdogs.txt
+++ b/Documentation/lockup-watchdogs.txt
@@ -30,7 +30,8 @@ timeout is set through the confusingly named "kernel.panic" sysctl),
 to cause the system to reboot automatically after a specified amount
 of time.
 
-=== Implementation ===
+Implementation
+==============
 
 The soft and hard lockup detectors are built on top of the hrtimer and
 perf subsystems, respectively. A direct consequence of this is that,
-- 
GitLab


From 7b001bff4609b80ad626b429cc069b40a1d5fab3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 21:02:59 -0300
Subject: [PATCH 1588/1958] lzo.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add markups for section titles;
- mark literal blocks;
- use ".. important::" for an important note.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/lzo.txt | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
index 285c54f667791..6fa6a93d09497 100644
--- a/Documentation/lzo.txt
+++ b/Documentation/lzo.txt
@@ -1,8 +1,9 @@
-
+===========================================================
 LZO stream format as understood by Linux's LZO decompressor
 ===========================================================
 
 Introduction
+============
 
   This is not a specification. No specification seems to be publicly available
   for the LZO stream format. This document describes what input format the LZO
@@ -14,12 +15,13 @@ Introduction
   for future bug reports.
 
 Description
+===========
 
   The stream is composed of a series of instructions, operands, and data. The
   instructions consist in a few bits representing an opcode, and bits forming
   the operands for the instruction, whose size and position depend on the
   opcode and on the number of literals copied by previous instruction. The
-  operands are used to indicate :
+  operands are used to indicate:
 
     - a distance when copying data from the dictionary (past output buffer)
     - a length (number of bytes to copy from dictionary)
@@ -38,7 +40,7 @@ Description
   of bits in the operand. If the number of bits isn't enough to represent the
   length, up to 255 may be added in increments by consuming more bytes with a
   rate of at most 255 per extra byte (thus the compression ratio cannot exceed
-  around 255:1). The variable length encoding using #bits is always the same :
+  around 255:1). The variable length encoding using #bits is always the same::
 
        length = byte & ((1 << #bits) - 1)
        if (!length) {
@@ -67,15 +69,19 @@ Description
   instruction may encode this distance (0001HLLL), it takes one LE16 operand
   for the distance, thus requiring 3 bytes.
 
-  IMPORTANT NOTE : in the code some length checks are missing because certain
-  instructions are called under the assumption that a certain number of bytes
-  follow because it has already been guaranteed before parsing the instructions.
-  They just have to "refill" this credit if they consume extra bytes. This is
-  an implementation design choice independent on the algorithm or encoding.
+  .. important::
+
+     In the code some length checks are missing because certain instructions
+     are called under the assumption that a certain number of bytes follow
+     because it has already been guaranteed before parsing the instructions.
+     They just have to "refill" this credit if they consume extra bytes. This
+     is an implementation design choice independent on the algorithm or
+     encoding.
 
 Byte sequences
+==============
 
-  First byte encoding :
+  First byte encoding::
 
       0..17   : follow regular instruction encoding, see below. It is worth
                 noting that codes 16 and 17 will represent a block copy from
@@ -91,7 +97,7 @@ Byte sequences
                 state = 4 [ don't copy extra literals ]
                 skip byte
 
-  Instruction encoding :
+  Instruction encoding::
 
       0 0 0 0 X X X X  (0..15)
         Depends on the number of literals copied by the last instruction.
@@ -156,6 +162,7 @@ Byte sequences
            distance = (H << 3) + D + 1
 
 Authors
+=======
 
   This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
   analysis of the decompression code available in Linux 3.16-rc5. The code is
-- 
GitLab


From ad98211ba4d9b1169a419d2bc8a2e67c8f9c9ede Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 21:06:34 -0300
Subject: [PATCH 1589/1958] mailbox.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add markups for section titles;
- Use :Author: for authorship;
- Mark literal block as such and ident it.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/mailbox.txt | 185 ++++++++++++++++++++------------------
 1 file changed, 97 insertions(+), 88 deletions(-)

diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
index 7ed371c852046..0ed95009cc307 100644
--- a/Documentation/mailbox.txt
+++ b/Documentation/mailbox.txt
@@ -1,7 +1,10 @@
-		The Common Mailbox Framework
-		Jassi Brar <jaswinder.singh@linaro.org>
+============================
+The Common Mailbox Framework
+============================
 
- This document aims to help developers write client and controller
+:Author: Jassi Brar <jaswinder.singh@linaro.org>
+
+This document aims to help developers write client and controller
 drivers for the API. But before we start, let us note that the
 client (especially) and controller drivers are likely going to be
 very platform specific because the remote firmware is likely to be
@@ -13,14 +16,17 @@ similar copies of code written for each platform. Having said that,
 nothing prevents the remote f/w to also be Linux based and use the
 same api there. However none of that helps us locally because we only
 ever deal at client's protocol level.
- Some of the choices made during implementation are the result of this
+
+Some of the choices made during implementation are the result of this
 peculiarity of this "common" framework.
 
 
-	Part 1 - Controller Driver (See include/linux/mailbox_controller.h)
+Controller Driver (See include/linux/mailbox_controller.h)
+==========================================================
+
 
- Allocate mbox_controller and the array of mbox_chan.
+Allocate mbox_controller and the array of mbox_chan.
 Populate mbox_chan_ops, except peek_data() all are mandatory.
 The controller driver might know a message has been consumed
 by the remote by getting an IRQ or polling some hardware flag
@@ -30,91 +36,94 @@ the controller driver should set via 'txdone_irq' or 'txdone_poll'
 or neither.
 
 
-	Part 2 - Client Driver (See include/linux/mailbox_client.h)
+Client Driver (See include/linux/mailbox_client.h)
+==================================================
 
- The client might want to operate in blocking mode (synchronously
+
+The client might want to operate in blocking mode (synchronously
 send a message through before returning) or non-blocking/async mode (submit
 a message and a callback function to the API and return immediately).
 
-
-struct demo_client {
-	struct mbox_client cl;
-	struct mbox_chan *mbox;
-	struct completion c;
-	bool async;
-	/* ... */
-};
-
-/*
- * This is the handler for data received from remote. The behaviour is purely
- * dependent upon the protocol. This is just an example.
- */
-static void message_from_remote(struct mbox_client *cl, void *mssg)
-{
-	struct demo_client *dc = container_of(cl, struct demo_client, cl);
-	if (dc->async) {
-		if (is_an_ack(mssg)) {
-			/* An ACK to our last sample sent */
-			return; /* Or do something else here */
-		} else { /* A new message from remote */
-			queue_req(mssg);
+::
+
+	struct demo_client {
+		struct mbox_client cl;
+		struct mbox_chan *mbox;
+		struct completion c;
+		bool async;
+		/* ... */
+	};
+
+	/*
+	* This is the handler for data received from remote. The behaviour is purely
+	* dependent upon the protocol. This is just an example.
+	*/
+	static void message_from_remote(struct mbox_client *cl, void *mssg)
+	{
+		struct demo_client *dc = container_of(cl, struct demo_client, cl);
+		if (dc->async) {
+			if (is_an_ack(mssg)) {
+				/* An ACK to our last sample sent */
+				return; /* Or do something else here */
+			} else { /* A new message from remote */
+				queue_req(mssg);
+			}
+		} else {
+			/* Remote f/w sends only ACK packets on this channel */
+			return;
 		}
-	} else {
-		/* Remote f/w sends only ACK packets on this channel */
-		return;
 	}
-}
-
-static void sample_sent(struct mbox_client *cl, void *mssg, int r)
-{
-	struct demo_client *dc = container_of(cl, struct demo_client, cl);
-	complete(&dc->c);
-}
-
-static void client_demo(struct platform_device *pdev)
-{
-	struct demo_client *dc_sync, *dc_async;
-	/* The controller already knows async_pkt and sync_pkt */
-	struct async_pkt ap;
-	struct sync_pkt sp;
-
-	dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
-	dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
-
-	/* Populate non-blocking mode client */
-	dc_async->cl.dev = &pdev->dev;
-	dc_async->cl.rx_callback = message_from_remote;
-	dc_async->cl.tx_done = sample_sent;
-	dc_async->cl.tx_block = false;
-	dc_async->cl.tx_tout = 0; /* doesn't matter here */
-	dc_async->cl.knows_txdone = false; /* depending upon protocol */
-	dc_async->async = true;
-	init_completion(&dc_async->c);
-
-	/* Populate blocking mode client */
-	dc_sync->cl.dev = &pdev->dev;
-	dc_sync->cl.rx_callback = message_from_remote;
-	dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
-	dc_sync->cl.tx_block = true;
-	dc_sync->cl.tx_tout = 500; /* by half a second */
-	dc_sync->cl.knows_txdone = false; /* depending upon protocol */
-	dc_sync->async = false;
-
-	/* ASync mailbox is listed second in 'mboxes' property */
-	dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
-	/* Populate data packet */
-	/* ap.xxx = 123; etc */
-	/* Send async message to remote */
-	mbox_send_message(dc_async->mbox, &ap);
-
-	/* Sync mailbox is listed first in 'mboxes' property */
-	dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
-	/* Populate data packet */
-	/* sp.abc = 123; etc */
-	/* Send message to remote in blocking mode */
-	mbox_send_message(dc_sync->mbox, &sp);
-	/* At this point 'sp' has been sent */
-
-	/* Now wait for async chan to be done */
-	wait_for_completion(&dc_async->c);
-}
+
+	static void sample_sent(struct mbox_client *cl, void *mssg, int r)
+	{
+		struct demo_client *dc = container_of(cl, struct demo_client, cl);
+		complete(&dc->c);
+	}
+
+	static void client_demo(struct platform_device *pdev)
+	{
+		struct demo_client *dc_sync, *dc_async;
+		/* The controller already knows async_pkt and sync_pkt */
+		struct async_pkt ap;
+		struct sync_pkt sp;
+
+		dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
+		dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
+
+		/* Populate non-blocking mode client */
+		dc_async->cl.dev = &pdev->dev;
+		dc_async->cl.rx_callback = message_from_remote;
+		dc_async->cl.tx_done = sample_sent;
+		dc_async->cl.tx_block = false;
+		dc_async->cl.tx_tout = 0; /* doesn't matter here */
+		dc_async->cl.knows_txdone = false; /* depending upon protocol */
+		dc_async->async = true;
+		init_completion(&dc_async->c);
+
+		/* Populate blocking mode client */
+		dc_sync->cl.dev = &pdev->dev;
+		dc_sync->cl.rx_callback = message_from_remote;
+		dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
+		dc_sync->cl.tx_block = true;
+		dc_sync->cl.tx_tout = 500; /* by half a second */
+		dc_sync->cl.knows_txdone = false; /* depending upon protocol */
+		dc_sync->async = false;
+
+		/* ASync mailbox is listed second in 'mboxes' property */
+		dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
+		/* Populate data packet */
+		/* ap.xxx = 123; etc */
+		/* Send async message to remote */
+		mbox_send_message(dc_async->mbox, &ap);
+
+		/* Sync mailbox is listed first in 'mboxes' property */
+		dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
+		/* Populate data packet */
+		/* sp.abc = 123; etc */
+		/* Send message to remote in blocking mode */
+		mbox_send_message(dc_sync->mbox, &sp);
+		/* At this point 'sp' has been sent */
+
+		/* Now wait for async chan to be done */
+		wait_for_completion(&dc_async->c);
+	}
-- 
GitLab


From c18c1cce0c1a02a2cc197a4a4c80ac2db7492617 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 22:52:22 -0300
Subject: [PATCH 1590/1958] memory-hotplug.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- use :Created: and :Updated: for the timestamps;
- comment its internal index;
- adjust titles and use proper markup;
- Whitespace fixes;
- Use cross references where needed;
- Use bulleted lists where needed;
- mark literal blocks;
- Use the ReST notation for a table.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/memory-hotplug.txt | 355 +++++++++++++++++--------------
 1 file changed, 194 insertions(+), 161 deletions(-)

diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 5c628e19d6cd4..7f49ebf3ddb23 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -2,43 +2,48 @@
 Memory Hotplug
 ==============
 
-Created:					Jul 28 2007
-Add description of notifier of memory hotplug	Oct 11 2007
+:Created:							Jul 28 2007
+:Updated: Add description of notifier of memory hotplug:	Oct 11 2007
 
 This document is about memory hotplug including how-to-use and current status.
 Because Memory Hotplug is still under development, contents of this text will
 be changed often.
 
-1. Introduction
-  1.1 purpose of memory hotplug
-  1.2. Phases of memory hotplug
-  1.3. Unit of Memory online/offline operation
-2. Kernel Configuration
-3. sysfs files for memory hotplug
-4. Physical memory hot-add phase
-  4.1 Hardware(Firmware) Support
-  4.2 Notify memory hot-add event by hand
-5. Logical Memory hot-add phase
-  5.1. State of memory
-  5.2. How to online memory
-6. Logical memory remove
-  6.1 Memory offline and ZONE_MOVABLE
-  6.2. How to offline memory
-7. Physical memory remove
-8. Memory hotplug event notifier
-9. Future Work List
-
-Note(1): x86_64's has special implementation for memory hotplug.
-         This text does not describe it.
-Note(2): This text assumes that sysfs is mounted at /sys.
+.. CONTENTS
 
+  1. Introduction
+    1.1 purpose of memory hotplug
+    1.2. Phases of memory hotplug
+    1.3. Unit of Memory online/offline operation
+  2. Kernel Configuration
+  3. sysfs files for memory hotplug
+  4. Physical memory hot-add phase
+    4.1 Hardware(Firmware) Support
+    4.2 Notify memory hot-add event by hand
+  5. Logical Memory hot-add phase
+    5.1. State of memory
+    5.2. How to online memory
+  6. Logical memory remove
+    6.1 Memory offline and ZONE_MOVABLE
+    6.2. How to offline memory
+  7. Physical memory remove
+  8. Memory hotplug event notifier
+  9. Future Work List
 
----------------
-1. Introduction
----------------
 
-1.1 purpose of memory hotplug
-------------
+.. note::
+
+    (1) x86_64's has special implementation for memory hotplug.
+        This text does not describe it.
+    (2) This text assumes that sysfs is mounted at /sys.
+
+
+Introduction
+============
+
+purpose of memory hotplug
+-------------------------
+
 Memory Hotplug allows users to increase/decrease the amount of memory.
 Generally, there are two purposes.
 
@@ -53,9 +58,11 @@ hardware which supports memory power management.
 Linux memory hotplug is designed for both purpose.
 
 
-1.2. Phases of memory hotplug
----------------
-There are 2 phases in Memory Hotplug.
+Phases of memory hotplug
+------------------------
+
+There are 2 phases in Memory Hotplug:
+
   1) Physical Memory Hotplug phase
   2) Logical Memory Hotplug phase.
 
@@ -70,7 +77,7 @@ management tables, and makes sysfs files for new memory's operation.
 If firmware supports notification of connection of new memory to OS,
 this phase is triggered automatically. ACPI can notify this event. If not,
 "probe" operation by system administration is used instead.
-(see Section 4.).
+(see :ref:`memory_hotplug_physical_mem`).
 
 Logical Memory Hotplug phase is to change memory state into
 available/unavailable for users. Amount of memory from user's view is
@@ -83,11 +90,12 @@ Logical Memory Hotplug phase is triggered by write of sysfs file by system
 administrator. For the hot-add case, it must be executed after Physical Hotplug
 phase by hand.
 (However, if you writes udev's hotplug scripts for memory hotplug, these
- phases can be execute in seamless way.)
+phases can be execute in seamless way.)
+
 
+Unit of Memory online/offline operation
+---------------------------------------
 
-1.3. Unit of Memory online/offline operation
-------------
 Memory hotplug uses SPARSEMEM memory model which allows memory to be divided
 into chunks of the same size. These chunks are called "sections". The size of
 a memory section is architecture dependent. For example, power uses 16MiB, ia64
@@ -97,46 +105,50 @@ Memory sections are combined into chunks referred to as "memory blocks". The
 size of a memory block is architecture dependent and represents the logical
 unit upon which memory online/offline operations are to be performed. The
 default size of a memory block is the same as memory section size unless an
-architecture specifies otherwise. (see Section 3.)
+architecture specifies otherwise. (see :ref:`memory_hotplug_sysfs_files`.)
 
 To determine the size (in bytes) of a memory block please read this file:
 
 /sys/devices/system/memory/block_size_bytes
 
 
------------------------
-2. Kernel Configuration
------------------------
+Kernel Configuration
+====================
+
 To use memory hotplug feature, kernel must be compiled with following
 config options.
 
-- For all memory hotplug
-    Memory model -> Sparse Memory  (CONFIG_SPARSEMEM)
-    Allow for memory hot-add       (CONFIG_MEMORY_HOTPLUG)
+- For all memory hotplug:
+    - Memory model -> Sparse Memory  (CONFIG_SPARSEMEM)
+    - Allow for memory hot-add       (CONFIG_MEMORY_HOTPLUG)
 
-- To enable memory removal, the following are also necessary
-    Allow for memory hot remove    (CONFIG_MEMORY_HOTREMOVE)
-    Page Migration                 (CONFIG_MIGRATION)
+- To enable memory removal, the following are also necessary:
+    - Allow for memory hot remove    (CONFIG_MEMORY_HOTREMOVE)
+    - Page Migration                 (CONFIG_MIGRATION)
 
-- For ACPI memory hotplug, the following are also necessary
-    Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
-    This option can be kernel module.
+- For ACPI memory hotplug, the following are also necessary:
+    - Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
+    - This option can be kernel module.
 
 - As a related configuration, if your box has a feature of NUMA-node hotplug
   via ACPI, then this option is necessary too.
-    ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
-    (CONFIG_ACPI_CONTAINER).
-    This option can be kernel module too.
 
+    - ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
+      (CONFIG_ACPI_CONTAINER).
+
+     This option can be kernel module too.
+
+
+.. _memory_hotplug_sysfs_files:
+
+sysfs files for memory hotplug
+==============================
 
---------------------------------
-3 sysfs files for memory hotplug
---------------------------------
 All memory blocks have their device information in sysfs.  Each memory block
-is described under /sys/devices/system/memory as
+is described under /sys/devices/system/memory as:
 
-/sys/devices/system/memory/memoryXXX
-(XXX is the memory block id.)
+	/sys/devices/system/memory/memoryXXX
+	(XXX is the memory block id.)
 
 For the memory block covered by the sysfs directory.  It is expected that all
 memory sections in this range are present and no memory holes exist in the
@@ -145,43 +157,53 @@ the existence of one should not affect the hotplug capabilities of the memory
 block.
 
 For example, assume 1GiB memory block size. A device for a memory starting at
-0x100000000 is /sys/device/system/memory/memory4
-(0x100000000 / 1Gib = 4)
+0x100000000 is /sys/device/system/memory/memory4::
+
+	(0x100000000 / 1Gib = 4)
+
 This device covers address range [0x100000000 ... 0x140000000)
 
 Under each memory block, you can see 5 files:
 
-/sys/devices/system/memory/memoryXXX/phys_index
-/sys/devices/system/memory/memoryXXX/phys_device
-/sys/devices/system/memory/memoryXXX/state
-/sys/devices/system/memory/memoryXXX/removable
-/sys/devices/system/memory/memoryXXX/valid_zones
+- /sys/devices/system/memory/memoryXXX/phys_index
+- /sys/devices/system/memory/memoryXXX/phys_device
+- /sys/devices/system/memory/memoryXXX/state
+- /sys/devices/system/memory/memoryXXX/removable
+- /sys/devices/system/memory/memoryXXX/valid_zones
+
+=================== ============================================================
+``phys_index``      read-only and contains memory block id, same as XXX.
+``state``           read-write
+
+                    - at read:  contains online/offline state of memory.
+                    - at write: user can specify "online_kernel",
 
-'phys_index'      : read-only and contains memory block id, same as XXX.
-'state'           : read-write
-                    at read:  contains online/offline state of memory.
-                    at write: user can specify "online_kernel",
                     "online_movable", "online", "offline" command
                     which will be performed on all sections in the block.
-'phys_device'     : read-only: designed to show the name of physical memory
+``phys_device``     read-only: designed to show the name of physical memory
                     device.  This is not well implemented now.
-'removable'       : read-only: contains an integer value indicating
+``removable``       read-only: contains an integer value indicating
                     whether the memory block is removable or not
                     removable.  A value of 1 indicates that the memory
                     block is removable and a value of 0 indicates that
                     it is not removable. A memory block is removable only if
                     every section in the block is removable.
-'valid_zones'     : read-only: designed to show which zones this memory block
+``valid_zones``     read-only: designed to show which zones this memory block
 		    can be onlined to.
-		    The first column shows it's default zone.
+
+		    The first column shows it`s default zone.
+
 		    "memory6/valid_zones: Normal Movable" shows this memoryblock
 		    can be onlined to ZONE_NORMAL by default and to ZONE_MOVABLE
 		    by online_movable.
+
 		    "memory7/valid_zones: Movable Normal" shows this memoryblock
 		    can be onlined to ZONE_MOVABLE by default and to ZONE_NORMAL
 		    by online_kernel.
+=================== ============================================================
+
+.. note::
 
-NOTE:
   These directories/files appear after physical memory hotplug phase.
 
 If CONFIG_NUMA is enabled the memoryXXX/ directories can also be accessed
@@ -193,13 +215,14 @@ For example:
 A backlink will also be created:
 /sys/devices/system/memory/memory9/node0 -> ../../node/node0
 
+.. _memory_hotplug_physical_mem:
+
+Physical memory hot-add phase
+=============================
 
---------------------------------
-4. Physical memory hot-add phase
---------------------------------
+Hardware(Firmware) Support
+--------------------------
 
-4.1 Hardware(Firmware) Support
-------------
 On x86_64/ia64 platform, memory hotplug by ACPI is supported.
 
 In general, the firmware (ACPI) which supports memory hotplug defines
@@ -209,7 +232,8 @@ script. This will be done automatically.
 
 But scripts for memory hotplug are not contained in generic udev package(now).
 You may have to write it by yourself or online/offline memory by hand.
-Please see "How to online memory", "How to offline memory" in this text.
+Please see :ref:`memory_hotplug_how_to_online_memory` and
+:ref:`memory_hotplug_how_to_offline_memory`.
 
 If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
 "PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
@@ -217,8 +241,9 @@ calls hotplug code for all of objects which are defined in it.
 If memory device is found, memory hotplug code will be called.
 
 
-4.2 Notify memory hot-add event by hand
-------------
+Notify memory hot-add event by hand
+-----------------------------------
+
 On some architectures, the firmware may not notify the kernel of a memory
 hotplug event.  Therefore, the memory "probe" interface is supported to
 explicitly notify the kernel.  This interface depends on
@@ -229,45 +254,48 @@ notification.
 Probe interface is located at
 /sys/devices/system/memory/probe
 
-You can tell the physical address of new memory to the kernel by
+You can tell the physical address of new memory to the kernel by::
 
-% echo start_address_of_new_memory > /sys/devices/system/memory/probe
+	% echo start_address_of_new_memory > /sys/devices/system/memory/probe
 
 Then, [start_address_of_new_memory, start_address_of_new_memory +
 memory_block_size] memory range is hot-added. In this case, hotplug script is
 not called (in current implementation). You'll have to online memory by
-yourself.  Please see "How to online memory" in this text.
+yourself.  Please see :ref:`memory_hotplug_how_to_online_memory`.
 
 
-------------------------------
-5. Logical Memory hot-add phase
-------------------------------
+Logical Memory hot-add phase
+============================
 
-5.1. State of memory
-------------
-To see (online/offline) state of a memory block, read 'state' file.
+State of memory
+---------------
+
+To see (online/offline) state of a memory block, read 'state' file::
+
+	% cat /sys/device/system/memory/memoryXXX/state
 
-% cat /sys/device/system/memory/memoryXXX/state
 
+- If the memory block is online, you'll read "online".
+- If the memory block is offline, you'll read "offline".
 
-If the memory block is online, you'll read "online".
-If the memory block is offline, you'll read "offline".
 
+.. _memory_hotplug_how_to_online_memory:
+
+How to online memory
+--------------------
 
-5.2. How to online memory
-------------
 When the memory is hot-added, the kernel decides whether or not to "online"
-it according to the policy which can be read from "auto_online_blocks" file:
+it according to the policy which can be read from "auto_online_blocks" file::
 
-% cat /sys/devices/system/memory/auto_online_blocks
+	% cat /sys/devices/system/memory/auto_online_blocks
 
 The default depends on the CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
 option. If it is disabled the default is "offline" which means the newly added
 memory is not in a ready-to-use state and you have to "online" the newly added
 memory blocks manually. Automatic onlining can be requested by writing "online"
-to "auto_online_blocks" file:
+to "auto_online_blocks" file::
 
-% echo online > /sys/devices/system/memory/auto_online_blocks
+	% echo online > /sys/devices/system/memory/auto_online_blocks
 
 This sets a global policy and impacts all memory blocks that will subsequently
 be hotplugged. Currently offline blocks keep their state. It is possible, under
@@ -277,24 +305,26 @@ online. User space tools can check their "state" files
 
 If the automatic onlining wasn't requested, failed, or some memory block was
 offlined it is possible to change the individual block's state by writing to the
-"state" file:
+"state" file::
 
-% echo online > /sys/devices/system/memory/memoryXXX/state
+	% echo online > /sys/devices/system/memory/memoryXXX/state
 
 This onlining will not change the ZONE type of the target memory block,
 If the memory block doesn't belong to any zone an appropriate kernel zone
 (usually ZONE_NORMAL) will be used unless movable_node kernel command line
 option is specified when ZONE_MOVABLE will be used.
 
-You can explicitly request to associate it with ZONE_MOVABLE by
+You can explicitly request to associate it with ZONE_MOVABLE by::
+
+	% echo online_movable > /sys/devices/system/memory/memoryXXX/state
 
-% echo online_movable > /sys/devices/system/memory/memoryXXX/state
-(NOTE: current limit: this memory block must be adjacent to ZONE_MOVABLE)
+.. note:: current limit: this memory block must be adjacent to ZONE_MOVABLE
 
-Or you can explicitly request a kernel zone (usually ZONE_NORMAL) by:
+Or you can explicitly request a kernel zone (usually ZONE_NORMAL) by::
 
-% echo online_kernel > /sys/devices/system/memory/memoryXXX/state
-(NOTE: current limit: this memory block must be adjacent to ZONE_NORMAL)
+	% echo online_kernel > /sys/devices/system/memory/memoryXXX/state
+
+.. note:: current limit: this memory block must be adjacent to ZONE_NORMAL
 
 An explicit zone onlining can fail (e.g. when the range is already within
 and existing and incompatible zone already).
@@ -306,12 +336,12 @@ This may be changed in future.
 
 
-------------------------
-6. Logical memory remove
-------------------------
+Logical memory remove
+=====================
+
+Memory offline and ZONE_MOVABLE
+-------------------------------
 
-6.1 Memory offline and ZONE_MOVABLE
-------------
 Memory offlining is more complicated than memory online. Because memory offline
 has to make the whole memory block be unused, memory offline can fail if
 the memory block includes memory which cannot be freed.
@@ -336,24 +366,27 @@ Assume the system has "TOTAL" amount of memory at boot time, this boot option
 creates ZONE_MOVABLE as following.
 
 1) When kernelcore=YYYY boot option is used,
-  Size of memory not for movable pages (not for offline) is YYYY.
-  Size of memory for movable pages (for offline) is TOTAL-YYYY.
+   Size of memory not for movable pages (not for offline) is YYYY.
+   Size of memory for movable pages (for offline) is TOTAL-YYYY.
 
 2) When movablecore=ZZZZ boot option is used,
-  Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
-  Size of memory for movable pages (for offline) is ZZZZ.
+   Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
+   Size of memory for movable pages (for offline) is ZZZZ.
+
+.. note::
 
+   Unfortunately, there is no information to show which memory block belongs
+   to ZONE_MOVABLE. This is TBD.
 
-Note: Unfortunately, there is no information to show which memory block belongs
-to ZONE_MOVABLE. This is TBD.
+.. _memory_hotplug_how_to_offline_memory:
 
+How to offline memory
+---------------------
 
-6.2. How to offline memory
-------------
 You can offline a memory block by using the same sysfs interface that was used
-in memory onlining.
+in memory onlining::
 
-% echo offline > /sys/devices/system/memory/memoryXXX/state
+	% echo offline > /sys/devices/system/memory/memoryXXX/state
 
 If offline succeeds, the state of the memory block is changed to be "offline".
 If it fails, some error core (like -EBUSY) will be returned by the kernel.
@@ -367,22 +400,22 @@ able to offline it (or not). (For example, a page is referred to by some kernel
 internal call and released soon.)
 
 Consideration:
-Memory hotplug's design direction is to make the possibility of memory offlining
-higher and to guarantee unplugging memory under any situation. But it needs
-more work. Returning -EBUSY under some situation may be good because the user
-can decide to retry more or not by himself. Currently, memory offlining code
-does some amount of retry with 120 seconds timeout.
+  Memory hotplug's design direction is to make the possibility of memory
+  offlining higher and to guarantee unplugging memory under any situation. But
+  it needs more work. Returning -EBUSY under some situation may be good because
+  the user can decide to retry more or not by himself. Currently, memory
+  offlining code does some amount of retry with 120 seconds timeout.
+
+Physical memory remove
+======================
 
--------------------------
-7. Physical memory remove
--------------------------
 Need more implementation yet....
  - Notification completion of remove works by OS to firmware.
  - Guard from remove if not yet.
 
---------------------------------
-8. Memory hotplug event notifier
---------------------------------
+Memory hotplug event notifier
+=============================
+
 Hotplugging events are sent to a notification queue.
 
 There are six types of notification defined in include/linux/memory.h:
@@ -412,14 +445,14 @@ MEM_CANCEL_OFFLINE
 MEM_OFFLINE
   Generated after offlining memory is complete.
 
-A callback routine can be registered by calling
+A callback routine can be registered by calling::
 
   hotplug_memory_notifier(callback_func, priority)
 
 Callback functions with higher values of priority are called before callback
 functions with lower values.
 
-A callback function must have the following prototype:
+A callback function must have the following prototype::
 
   int callback_func(
     struct notifier_block *self, unsigned long action, void *arg);
@@ -427,27 +460,28 @@ A callback function must have the following prototype:
 The first argument of the callback function (self) is a pointer to the block
 of the notifier chain that points to the callback function itself.
 The second argument (action) is one of the event types described above.
-The third argument (arg) passes a pointer of struct memory_notify.
-
-struct memory_notify {
-       unsigned long start_pfn;
-       unsigned long nr_pages;
-       int status_change_nid_normal;
-       int status_change_nid_high;
-       int status_change_nid;
-}
-
-start_pfn is start_pfn of online/offline memory.
-nr_pages is # of pages of online/offline memory.
-status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
-is (will be) set/clear, if this is -1, then nodemask status is not changed.
-status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask
-is (will be) set/clear, if this is -1, then nodemask status is not changed.
-status_change_nid is set node id when N_MEMORY of nodemask is (will be)
-set/clear. It means a new(memoryless) node gets new memory by online and a
-node loses all memory. If this is -1, then nodemask status is not changed.
-If status_changed_nid* >= 0, callback should create/discard structures for the
-node if necessary.
+The third argument (arg) passes a pointer of struct memory_notify::
+
+	struct memory_notify {
+		unsigned long start_pfn;
+		unsigned long nr_pages;
+		int status_change_nid_normal;
+		int status_change_nid_high;
+		int status_change_nid;
+	}
+
+- start_pfn is start_pfn of online/offline memory.
+- nr_pages is # of pages of online/offline memory.
+- status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
+  is (will be) set/clear, if this is -1, then nodemask status is not changed.
+- status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask
+  is (will be) set/clear, if this is -1, then nodemask status is not changed.
+- status_change_nid is set node id when N_MEMORY of nodemask is (will be)
+  set/clear. It means a new(memoryless) node gets new memory by online and a
+  node loses all memory. If this is -1, then nodemask status is not changed.
+
+  If status_changed_nid* >= 0, callback should create/discard structures for the
+  node if necessary.
 
 The callback routine shall return one of the values
 NOTIFY_DONE, NOTIFY_OK, NOTIFY_BAD, NOTIFY_STOP
@@ -461,9 +495,9 @@ further processing of the notification queue.
 
 NOTIFY_STOP stops further processing of the notification queue.
 
---------------
-9. Future Work
---------------
+Future Work
+===========
+
   - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
     sysctl or new control file.
   - showing memory block and physical device relationship.
@@ -471,4 +505,3 @@ NOTIFY_STOP stops further processing of the notification queue.
   - support HugeTLB page migration and offlining.
   - memmap removing at memory offline.
   - physical remove memory.
-
-- 
GitLab


From 78b11f40d48e10292ab6c642652f25fb4c95bdcc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 09:43:52 -0300
Subject: [PATCH 1591/1958] men-chameleon-bus.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Adjust identations;
- Remove title numbering;
- mark literal blocks;
- comment its TOC.

Acked-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/men-chameleon-bus.txt | 330 ++++++++++++++--------------
 1 file changed, 171 insertions(+), 159 deletions(-)

diff --git a/Documentation/men-chameleon-bus.txt b/Documentation/men-chameleon-bus.txt
index 30ded732027e2..1b1f048aa7483 100644
--- a/Documentation/men-chameleon-bus.txt
+++ b/Documentation/men-chameleon-bus.txt
@@ -1,163 +1,175 @@
-                               MEN Chameleon Bus
-                               =================
-
-Table of Contents
 =================
-1 Introduction
-    1.1 Scope of this Document
-    1.2 Limitations of the current implementation
-2 Architecture
-    2.1 MEN Chameleon Bus
-    2.2 Carrier Devices
-    2.3 Parser
-3 Resource handling
-    3.1 Memory Resources
-    3.2 IRQs
-4 Writing an MCB driver
-    4.1 The driver structure
-    4.2 Probing and attaching
-    4.3 Initializing the driver
-
-
-1 Introduction
-===============
-  This document describes the architecture and implementation of the MEN
-  Chameleon Bus (called MCB throughout this document).
-
-1.1 Scope of this Document
----------------------------
-  This document is intended to be a short overview of the current
-  implementation and does by no means describe the complete possibilities of MCB
-  based devices.
-
-1.2 Limitations of the current implementation
-----------------------------------------------
-  The current implementation is limited to PCI and PCIe based carrier devices
-  that only use a single memory resource and share the PCI legacy IRQ.  Not
-  implemented are:
-  - Multi-resource MCB devices like the VME Controller or M-Module carrier.
-  - MCB devices that need another MCB device, like SRAM for a DMA Controller's
-    buffer descriptors or a video controller's video memory.
-  - A per-carrier IRQ domain for carrier devices that have one (or more) IRQs
-    per MCB device like PCIe based carriers with MSI or MSI-X support.
-
-2 Architecture
-===============
-  MCB is divided into 3 functional blocks:
-  - The MEN Chameleon Bus itself,
-  - drivers for MCB Carrier Devices and
-  - the parser for the Chameleon table.
-
-2.1 MEN Chameleon Bus
+MEN Chameleon Bus
+=================
+
+.. Table of Contents
+   =================
+   1 Introduction
+       1.1 Scope of this Document
+       1.2 Limitations of the current implementation
+   2 Architecture
+       2.1 MEN Chameleon Bus
+       2.2 Carrier Devices
+       2.3 Parser
+   3 Resource handling
+       3.1 Memory Resources
+       3.2 IRQs
+   4 Writing an MCB driver
+       4.1 The driver structure
+       4.2 Probing and attaching
+       4.3 Initializing the driver
+
+
+Introduction
+============
+
+This document describes the architecture and implementation of the MEN
+Chameleon Bus (called MCB throughout this document).
+
+Scope of this Document
 ----------------------
-   The MEN Chameleon Bus is an artificial bus system that attaches to a so
-   called Chameleon FPGA device found on some hardware produced my MEN Mikro
-   Elektronik GmbH. These devices are multi-function devices implemented in a
-   single FPGA and usually attached via some sort of PCI or PCIe link. Each
-   FPGA contains a header section describing the content of the FPGA. The
-   header lists the device id, PCI BAR, offset from the beginning of the PCI
-   BAR, size in the FPGA, interrupt number and some other properties currently
-   not handled by the MCB implementation.
-
-2.2 Carrier Devices
+
+This document is intended to be a short overview of the current
+implementation and does by no means describe the complete possibilities of MCB
+based devices.
+
+Limitations of the current implementation
+-----------------------------------------
+
+The current implementation is limited to PCI and PCIe based carrier devices
+that only use a single memory resource and share the PCI legacy IRQ.  Not
+implemented are:
+
+- Multi-resource MCB devices like the VME Controller or M-Module carrier.
+- MCB devices that need another MCB device, like SRAM for a DMA Controller's
+  buffer descriptors or a video controller's video memory.
+- A per-carrier IRQ domain for carrier devices that have one (or more) IRQs
+  per MCB device like PCIe based carriers with MSI or MSI-X support.
+
+Architecture
+============
+
+MCB is divided into 3 functional blocks:
+
+- The MEN Chameleon Bus itself,
+- drivers for MCB Carrier Devices and
+- the parser for the Chameleon table.
+
+MEN Chameleon Bus
+-----------------
+
+The MEN Chameleon Bus is an artificial bus system that attaches to a so
+called Chameleon FPGA device found on some hardware produced my MEN Mikro
+Elektronik GmbH. These devices are multi-function devices implemented in a
+single FPGA and usually attached via some sort of PCI or PCIe link. Each
+FPGA contains a header section describing the content of the FPGA. The
+header lists the device id, PCI BAR, offset from the beginning of the PCI
+BAR, size in the FPGA, interrupt number and some other properties currently
+not handled by the MCB implementation.
+
+Carrier Devices
+---------------
+
+A carrier device is just an abstraction for the real world physical bus the
+Chameleon FPGA is attached to. Some IP Core drivers may need to interact with
+properties of the carrier device (like querying the IRQ number of a PCI
+device). To provide abstraction from the real hardware bus, an MCB carrier
+device provides callback methods to translate the driver's MCB function calls
+to hardware related function calls. For example a carrier device may
+implement the get_irq() method which can be translated into a hardware bus
+query for the IRQ number the device should use.
+
+Parser
+------
+
+The parser reads the first 512 bytes of a Chameleon device and parses the
+Chameleon table. Currently the parser only supports the Chameleon v2 variant
+of the Chameleon table but can easily be adopted to support an older or
+possible future variant. While parsing the table's entries new MCB devices
+are allocated and their resources are assigned according to the resource
+assignment in the Chameleon table. After resource assignment is finished, the
+MCB devices are registered at the MCB and thus at the driver core of the
+Linux kernel.
+
+Resource handling
+=================
+
+The current implementation assigns exactly one memory and one IRQ resource
+per MCB device. But this is likely going to change in the future.
+
+Memory Resources
+----------------
+
+Each MCB device has exactly one memory resource, which can be requested from
+the MCB bus. This memory resource is the physical address of the MCB device
+inside the carrier and is intended to be passed to ioremap() and friends. It
+is already requested from the kernel by calling request_mem_region().
+
+IRQs
+----
+
+Each MCB device has exactly one IRQ resource, which can be requested from the
+MCB bus. If a carrier device driver implements the ->get_irq() callback
+method, the IRQ number assigned by the carrier device will be returned,
+otherwise the IRQ number inside the Chameleon table will be returned. This
+number is suitable to be passed to request_irq().
+
+Writing an MCB driver
+=====================
+
+The driver structure
 --------------------
-   A carrier device is just an abstraction for the real world physical bus the
-   Chameleon FPGA is attached to. Some IP Core drivers may need to interact with
-   properties of the carrier device (like querying the IRQ number of a PCI
-   device). To provide abstraction from the real hardware bus, an MCB carrier
-   device provides callback methods to translate the driver's MCB function calls
-   to hardware related function calls. For example a carrier device may
-   implement the get_irq() method which can be translated into a hardware bus
-   query for the IRQ number the device should use.
-
-2.3 Parser
------------
-   The parser reads the first 512 bytes of a Chameleon device and parses the
-   Chameleon table. Currently the parser only supports the Chameleon v2 variant
-   of the Chameleon table but can easily be adopted to support an older or
-   possible future variant. While parsing the table's entries new MCB devices
-   are allocated and their resources are assigned according to the resource
-   assignment in the Chameleon table. After resource assignment is finished, the
-   MCB devices are registered at the MCB and thus at the driver core of the
-   Linux kernel.
-
-3 Resource handling
-====================
-  The current implementation assigns exactly one memory and one IRQ resource
-  per MCB device. But this is likely going to change in the future.
-
-3.1 Memory Resources
+
+Each MCB driver has a structure to identify the device driver as well as
+device ids which identify the IP Core inside the FPGA. The driver structure
+also contains callback methods which get executed on driver probe and
+removal from the system::
+
+	static const struct mcb_device_id foo_ids[] = {
+		{ .device = 0x123 },
+		{ }
+	};
+	MODULE_DEVICE_TABLE(mcb, foo_ids);
+
+	static struct mcb_driver foo_driver = {
+	driver = {
+		.name = "foo-bar",
+		.owner = THIS_MODULE,
+	},
+		.probe = foo_probe,
+		.remove = foo_remove,
+		.id_table = foo_ids,
+	};
+
+Probing and attaching
 ---------------------
-   Each MCB device has exactly one memory resource, which can be requested from
-   the MCB bus. This memory resource is the physical address of the MCB device
-   inside the carrier and is intended to be passed to ioremap() and friends. It
-   is already requested from the kernel by calling request_mem_region().
-
-3.2 IRQs
----------
-   Each MCB device has exactly one IRQ resource, which can be requested from the
-   MCB bus. If a carrier device driver implements the ->get_irq() callback
-   method, the IRQ number assigned by the carrier device will be returned,
-   otherwise the IRQ number inside the Chameleon table will be returned. This
-   number is suitable to be passed to request_irq().
-
-4 Writing an MCB driver
-=======================
-
-4.1 The driver structure
--------------------------
-    Each MCB driver has a structure to identify the device driver as well as
-    device ids which identify the IP Core inside the FPGA. The driver structure
-    also contains callback methods which get executed on driver probe and
-    removal from the system.
-
-
-  static const struct mcb_device_id foo_ids[] = {
-          { .device = 0x123 },
-          { }
-  };
-  MODULE_DEVICE_TABLE(mcb, foo_ids);
-
-  static struct mcb_driver foo_driver = {
-          driver = {
-                  .name = "foo-bar",
-                  .owner = THIS_MODULE,
-          },
-          .probe = foo_probe,
-          .remove = foo_remove,
-          .id_table = foo_ids,
-  };
-
-4.2 Probing and attaching
---------------------------
-   When a driver is loaded and the MCB devices it services are found, the MCB
-   core will call the driver's probe callback method. When the driver is removed
-   from the system, the MCB core will call the driver's remove callback method.
-
-
-  static init foo_probe(struct mcb_device *mdev, const struct mcb_device_id *id);
-  static void foo_remove(struct mcb_device *mdev);
-
-4.3 Initializing the driver
-----------------------------
-   When the kernel is booted or your foo driver module is inserted, you have to
-   perform driver initialization. Usually it is enough to register your driver
-   module at the MCB core.
-
-
-  static int __init foo_init(void)
-  {
-          return mcb_register_driver(&foo_driver);
-  }
-  module_init(foo_init);
-
-  static void __exit foo_exit(void)
-  {
-          mcb_unregister_driver(&foo_driver);
-  }
-  module_exit(foo_exit);
-
-   The module_mcb_driver() macro can be used to reduce the above code.
-
-
-  module_mcb_driver(foo_driver);
+
+When a driver is loaded and the MCB devices it services are found, the MCB
+core will call the driver's probe callback method. When the driver is removed
+from the system, the MCB core will call the driver's remove callback method::
+
+	static init foo_probe(struct mcb_device *mdev, const struct mcb_device_id *id);
+	static void foo_remove(struct mcb_device *mdev);
+
+Initializing the driver
+-----------------------
+
+When the kernel is booted or your foo driver module is inserted, you have to
+perform driver initialization. Usually it is enough to register your driver
+module at the MCB core::
+
+	static int __init foo_init(void)
+	{
+		return mcb_register_driver(&foo_driver);
+	}
+	module_init(foo_init);
+
+	static void __exit foo_exit(void)
+	{
+		mcb_unregister_driver(&foo_driver);
+	}
+	module_exit(foo_exit);
+
+The module_mcb_driver() macro can be used to reduce the above code::
+
+	module_mcb_driver(foo_driver);
-- 
GitLab


From c49e51a531bc01469cd083a71f495171e4a3d067 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 09:48:37 -0300
Subject: [PATCH 1592/1958] nommu-mmap.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Adjust identation for main title;
- fix level for chapter titles;
- use ".. important::" tag for an important note;
- use the right notation for paragraph auto-numbering "(#)";
- Fix footnotes syntax;
- fix one literal var to use the right ReST tag.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/nommu-mmap.txt | 52 +++++++++++++++---------------------
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt
index ae57b9ea0d416..39a62ab0f50a7 100644
--- a/Documentation/nommu-mmap.txt
+++ b/Documentation/nommu-mmap.txt
@@ -1,6 +1,6 @@
-			 =============================
-			 NO-MMU MEMORY MAPPING SUPPORT
-			 =============================
+=============================
+NO-MMU MEMORY MAPPING SUPPORT
+=============================
 
 The kernel has limited support for memory mapping under no-MMU conditions, such
 as are used in uClinux environments. From the userspace point of view, memory
@@ -16,7 +16,7 @@ the CLONE_VM flag.
 The behaviour is similar between the MMU and no-MMU cases, but not identical;
 and it's also much more restricted in the latter case:
 
- (*) Anonymous mapping, MAP_PRIVATE
+ (#) Anonymous mapping, MAP_PRIVATE
 
 	In the MMU case: VM regions backed by arbitrary pages; copy-on-write
 	across fork.
@@ -24,14 +24,14 @@ and it's also much more restricted in the latter case:
 	In the no-MMU case: VM regions backed by arbitrary contiguous runs of
 	pages.
 
- (*) Anonymous mapping, MAP_SHARED
+ (#) Anonymous mapping, MAP_SHARED
 
 	These behave very much like private mappings, except that they're
 	shared across fork() or clone() without CLONE_VM in the MMU case. Since
 	the no-MMU case doesn't support these, behaviour is identical to
 	MAP_PRIVATE there.
 
- (*) File, MAP_PRIVATE, PROT_READ / PROT_EXEC, !PROT_WRITE
+ (#) File, MAP_PRIVATE, PROT_READ / PROT_EXEC, !PROT_WRITE
 
 	In the MMU case: VM regions backed by pages read from file; changes to
 	the underlying file are reflected in the mapping; copied across fork.
@@ -56,7 +56,7 @@ and it's also much more restricted in the latter case:
 	   are visible in other processes (no MMU protection), but should not
 	   happen.
 
- (*) File, MAP_PRIVATE, PROT_READ / PROT_EXEC, PROT_WRITE
+ (#) File, MAP_PRIVATE, PROT_READ / PROT_EXEC, PROT_WRITE
 
 	In the MMU case: like the non-PROT_WRITE case, except that the pages in
 	question get copied before the write actually happens. From that point
@@ -66,7 +66,7 @@ and it's also much more restricted in the latter case:
 	In the no-MMU case: works much like the non-PROT_WRITE case, except
 	that a copy is always taken and never shared.
 
- (*) Regular file / blockdev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
+ (#) Regular file / blockdev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
 
 	In the MMU case: VM regions backed by pages read from file; changes to
 	pages written back to file; writes to file reflected into pages backing
@@ -74,7 +74,7 @@ and it's also much more restricted in the latter case:
 
 	In the no-MMU case: not supported.
 
- (*) Memory backed regular file, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
+ (#) Memory backed regular file, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
 
 	In the MMU case: As for ordinary regular files.
 
@@ -85,7 +85,7 @@ and it's also much more restricted in the latter case:
 	as for the MMU case. If the filesystem does not provide any such
 	support, then the mapping request will be denied.
 
- (*) Memory backed blockdev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
+ (#) Memory backed blockdev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
 
 	In the MMU case: As for ordinary regular files.
 
@@ -94,7 +94,7 @@ and it's also much more restricted in the latter case:
 	truncate being called. The ramdisk driver could do this if it allocated
 	all its memory as a contiguous array upfront.
 
- (*) Memory backed chardev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
+ (#) Memory backed chardev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
 
 	In the MMU case: As for ordinary regular files.
 
@@ -105,21 +105,20 @@ and it's also much more restricted in the latter case:
 	provide any such support, then the mapping request will be denied.
 
 
-============================
 FURTHER NOTES ON NO-MMU MMAP
 ============================
 
- (*) A request for a private mapping of a file may return a buffer that is not
+ (#) A request for a private mapping of a file may return a buffer that is not
      page-aligned.  This is because XIP may take place, and the data may not be
      paged aligned in the backing store.
 
- (*) A request for an anonymous mapping will always be page aligned.  If
+ (#) A request for an anonymous mapping will always be page aligned.  If
      possible the size of the request should be a power of two otherwise some
      of the space may be wasted as the kernel must allocate a power-of-2
      granule but will only discard the excess if appropriately configured as
      this has an effect on fragmentation.
 
- (*) The memory allocated by a request for an anonymous mapping will normally
+ (#) The memory allocated by a request for an anonymous mapping will normally
      be cleared by the kernel before being returned in accordance with the
      Linux man pages (ver 2.22 or later).
 
@@ -145,23 +144,22 @@ FURTHER NOTES ON NO-MMU MMAP
      uClibc uses this to speed up malloc(), and the ELF-FDPIC binfmt uses this
      to allocate the brk and stack region.
 
- (*) A list of all the private copy and anonymous mappings on the system is
+ (#) A list of all the private copy and anonymous mappings on the system is
      visible through /proc/maps in no-MMU mode.
 
- (*) A list of all the mappings in use by a process is visible through
+ (#) A list of all the mappings in use by a process is visible through
      /proc/<pid>/maps in no-MMU mode.
 
- (*) Supplying MAP_FIXED or a requesting a particular mapping address will
+ (#) Supplying MAP_FIXED or a requesting a particular mapping address will
      result in an error.
 
- (*) Files mapped privately usually have to have a read method provided by the
+ (#) Files mapped privately usually have to have a read method provided by the
      driver or filesystem so that the contents can be read into the memory
      allocated if mmap() chooses not to map the backing device directly. An
      error will result if they don't. This is most likely to be encountered
      with character device files, pipes, fifos and sockets.
 
 
-==========================
 INTERPROCESS SHARED MEMORY
 ==========================
 
@@ -170,7 +168,6 @@ mode.  The former through the usual mechanism, the latter through files created
 on ramfs or tmpfs mounts.
 
 
-=======
 FUTEXES
 =======
 
@@ -180,12 +177,11 @@ mappings made by a process or if the mapping in which the address lies does not
 support futexes (such as an I/O chardev mapping).
 
 
-=============
 NO-MMU MREMAP
 =============
 
 The mremap() function is partially supported.  It may change the size of a
-mapping, and may move it[*] if MREMAP_MAYMOVE is specified and if the new size
+mapping, and may move it [#]_ if MREMAP_MAYMOVE is specified and if the new size
 of the mapping exceeds the size of the slab object currently occupied by the
 memory to which the mapping refers, or if a smaller slab object could be used.
 
@@ -200,10 +196,9 @@ a previously mapped object.  It may not be used to create holes in existing
 mappings, move parts of existing mappings or resize parts of mappings.  It must
 act on a complete mapping.
 
-[*] Not currently supported.
+.. [#] Not currently supported.
 
 
-============================================
 PROVIDING SHAREABLE CHARACTER DEVICE SUPPORT
 ============================================
 
@@ -235,7 +230,7 @@ direct the call to the device-specific driver. Under such circumstances, the
 mapping request will be rejected if NOMMU_MAP_COPY is not specified, and a
 copy mapped otherwise.
 
-IMPORTANT NOTE:
+.. important::
 
 	Some types of device may present a different appearance to anyone
 	looking at them in certain modes. Flash chips can be like this; for
@@ -249,7 +244,6 @@ IMPORTANT NOTE:
 	circumstances!
 
 
-==============================================
 PROVIDING SHAREABLE MEMORY-BACKED FILE SUPPORT
 ==============================================
 
@@ -267,7 +261,6 @@ Memory backed devices are indicated by the mapping's backing device info having
 the memory_backed flag set.
 
 
-========================================
 PROVIDING SHAREABLE BLOCK DEVICE SUPPORT
 ========================================
 
@@ -276,7 +269,6 @@ character devices. If there isn't a real device underneath, then the driver
 should allocate sufficient contiguous memory to honour any supported mapping.
 
 
-=================================
 ADJUSTING PAGE TRIMMING BEHAVIOUR
 =================================
 
@@ -288,4 +280,4 @@ allocator.  In order to retain finer-grained control over fragmentation, this
 behaviour can either be disabled completely, or bumped up to a higher page
 watermark where trimming begins.
 
-Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'.
+Page trimming behaviour is configurable via the sysctl ``vm.nr_trim_pages``.
-- 
GitLab


From 853afb719f6e4f3e8387390999717ca2b91862e5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 09:54:22 -0300
Subject: [PATCH 1593/1958] nommu-mmap.txt: don't use all upper case on titles

This file is almost in the standard format we're adopting for
other documentation text files. Yet, it use upper case on
titles.

So, in order to uniform how chapter names, adjust caps on
titles.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/nommu-mmap.txt | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt
index 39a62ab0f50a7..69556f0d494b8 100644
--- a/Documentation/nommu-mmap.txt
+++ b/Documentation/nommu-mmap.txt
@@ -1,5 +1,5 @@
 =============================
-NO-MMU MEMORY MAPPING SUPPORT
+No-MMU memory mapping support
 =============================
 
 The kernel has limited support for memory mapping under no-MMU conditions, such
@@ -105,7 +105,7 @@ and it's also much more restricted in the latter case:
 	provide any such support, then the mapping request will be denied.
 
 
-FURTHER NOTES ON NO-MMU MMAP
+Further notes on no-MMU MMAP
 ============================
 
  (#) A request for a private mapping of a file may return a buffer that is not
@@ -160,7 +160,7 @@ FURTHER NOTES ON NO-MMU MMAP
      with character device files, pipes, fifos and sockets.
 
 
-INTERPROCESS SHARED MEMORY
+Interprocess shared memory
 ==========================
 
 Both SYSV IPC SHM shared memory and POSIX shared memory is supported in NOMMU
@@ -168,7 +168,7 @@ mode.  The former through the usual mechanism, the latter through files created
 on ramfs or tmpfs mounts.
 
 
-FUTEXES
+Futexes
 =======
 
 Futexes are supported in NOMMU mode if the arch supports them.  An error will
@@ -177,7 +177,7 @@ mappings made by a process or if the mapping in which the address lies does not
 support futexes (such as an I/O chardev mapping).
 
 
-NO-MMU MREMAP
+No-MMU mremap
 =============
 
 The mremap() function is partially supported.  It may change the size of a
@@ -199,7 +199,7 @@ act on a complete mapping.
 .. [#] Not currently supported.
 
 
-PROVIDING SHAREABLE CHARACTER DEVICE SUPPORT
+Providing shareable character device support
 ============================================
 
 To provide shareable character device support, a driver must provide a
@@ -244,7 +244,7 @@ copy mapped otherwise.
 	circumstances!
 
 
-PROVIDING SHAREABLE MEMORY-BACKED FILE SUPPORT
+Providing shareable memory-backed file support
 ==============================================
 
 Provision of shared mappings on memory backed files is similar to the provision
@@ -261,7 +261,7 @@ Memory backed devices are indicated by the mapping's backing device info having
 the memory_backed flag set.
 
 
-PROVIDING SHAREABLE BLOCK DEVICE SUPPORT
+Providing shareable block device support
 ========================================
 
 Provision of shared mappings on block device files is exactly the same as for
@@ -269,7 +269,7 @@ character devices. If there isn't a real device underneath, then the driver
 should allocate sufficient contiguous memory to honour any supported mapping.
 
 
-ADJUSTING PAGE TRIMMING BEHAVIOUR
+Adjusting page trimming behaviour
 =================================
 
 NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages
-- 
GitLab


From e3866726ebe382815e2b5dcce1a767dac9b6c8fa Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 10:00:04 -0300
Subject: [PATCH 1594/1958] ntb.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx.

This file is using some other markup notation (likely, markdown).
Convert it to the adopted standard:

  - Adjust the header level markup;
  - Adjust identation for debugfs files and module parameters.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ntb.txt | 55 +++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt
index 1d9bbabb6c79a..e4771e5c2ad77 100644
--- a/Documentation/ntb.txt
+++ b/Documentation/ntb.txt
@@ -1,4 +1,6 @@
-# NTB Drivers
+===========
+NTB Drivers
+===========
 
 NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects
 the separate memory systems of two computers to the same PCI-Express fabric.
@@ -10,7 +12,8 @@ fixed address.  Doorbell registers provide a way for peers to send interrupt
 events.  Memory windows allow translated read and write access to the peer
 memory.
 
-## NTB Core Driver (ntb)
+NTB Core Driver (ntb)
+=====================
 
 The NTB core driver defines an api wrapping the common feature set, and allows
 clients interested in NTB features to discover NTB the devices supported by
@@ -18,7 +21,8 @@ hardware drivers.  The term "client" is used here to mean an upper layer
 component making use of the NTB api.  The term "driver," or "hardware driver,"
 is used here to mean a driver for a specific vendor and model of NTB hardware.
 
-## NTB Client Drivers
+NTB Client Drivers
+==================
 
 NTB client drivers should register with the NTB core driver.  After
 registering, the client probe and remove functions will be called appropriately
@@ -26,7 +30,8 @@ as ntb hardware, or hardware drivers, are inserted and removed.  The
 registration uses the Linux Device framework, so it should feel familiar to
 anyone who has written a pci driver.
 
-### NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev)
+NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev)
+------------------------------------------------------------------
 
 The primary client for NTB is the Transport client, used in tandem with NTB
 Netdev.  These drivers function together to create a logical link to the peer,
@@ -37,7 +42,8 @@ Transport queue pair.  Network data is copied between socket buffers and the
 Transport queue pair buffer.  The Transport client may be used for other things
 besides Netdev, however no other applications have yet been written.
 
-### NTB Ping Pong Test Client (ntb\_pingpong)
+NTB Ping Pong Test Client (ntb\_pingpong)
+-----------------------------------------
 
 The Ping Pong test client serves as a demonstration to exercise the doorbell
 and scratchpad registers of NTB hardware, and as an example simple NTB client.
@@ -64,7 +70,8 @@ Module Parameters:
 * dyndbg - It is suggested to specify dyndbg=+p when loading this module, and
 	then to observe debugging output on the console.
 
-### NTB Tool Test Client (ntb\_tool)
+NTB Tool Test Client (ntb\_tool)
+--------------------------------
 
 The Tool test client serves for debugging, primarily, ntb hardware and drivers.
 The Tool provides access through debugfs for reading, setting, and clearing the
@@ -74,48 +81,60 @@ The Tool does not currently have any module parameters.
 
 Debugfs Files:
 
-* *debugfs*/ntb\_tool/*hw*/ - A directory in debugfs will be created for each
+* *debugfs*/ntb\_tool/*hw*/
+	A directory in debugfs will be created for each
 	NTB device probed by the tool.  This directory is shortened to *hw*
 	below.
-* *hw*/db - This file is used to read, set, and clear the local doorbell.  Not
+* *hw*/db
+	This file is used to read, set, and clear the local doorbell.  Not
 	all operations may be supported by all hardware.  To read the doorbell,
 	read the file.  To set the doorbell, write `s` followed by the bits to
 	set (eg: `echo 's 0x0101' > db`).  To clear the doorbell, write `c`
 	followed by the bits to clear.
-* *hw*/mask - This file is used to read, set, and clear the local doorbell mask.
+* *hw*/mask
+	This file is used to read, set, and clear the local doorbell mask.
 	See *db* for details.
-* *hw*/peer\_db - This file is used to read, set, and clear the peer doorbell.
+* *hw*/peer\_db
+	This file is used to read, set, and clear the peer doorbell.
 	See *db* for details.
-* *hw*/peer\_mask - This file is used to read, set, and clear the peer doorbell
+* *hw*/peer\_mask
+	This file is used to read, set, and clear the peer doorbell
 	mask.  See *db* for details.
-* *hw*/spad - This file is used to read and write local scratchpads.  To read
+* *hw*/spad
+	This file is used to read and write local scratchpads.  To read
 	the values of all scratchpads, read the file.  To write values, write a
 	series of pairs of scratchpad number and value
 	(eg: `echo '4 0x123 7 0xabc' > spad`
 	# to set scratchpads `4` and `7` to `0x123` and `0xabc`, respectively).
-* *hw*/peer\_spad - This file is used to read and write peer scratchpads.  See
+* *hw*/peer\_spad
+	This file is used to read and write peer scratchpads.  See
 	*spad* for details.
 
-## NTB Hardware Drivers
+NTB Hardware Drivers
+====================
 
 NTB hardware drivers should register devices with the NTB core driver.  After
 registering, clients probe and remove functions will be called.
 
-### NTB Intel Hardware Driver (ntb\_hw\_intel)
+NTB Intel Hardware Driver (ntb\_hw\_intel)
+------------------------------------------
 
 The Intel hardware driver supports NTB on Xeon and Atom CPUs.
 
 Module Parameters:
 
-* b2b\_mw\_idx - If the peer ntb is to be accessed via a memory window, then use
+* b2b\_mw\_idx
+	If the peer ntb is to be accessed via a memory window, then use
 	this memory window to access the peer ntb.  A value of zero or positive
 	starts from the first mw idx, and a negative value starts from the last
 	mw idx.  Both sides MUST set the same value here!  The default value is
 	`-1`.
-* b2b\_mw\_share - If the peer ntb is to be accessed via a memory window, and if
+* b2b\_mw\_share
+	If the peer ntb is to be accessed via a memory window, and if
 	the memory window is large enough, still allow the client to use the
 	second half of the memory window for address translation to the peer.
-* xeon\_b2b\_usd\_bar2\_addr64 - If using B2B topology on Xeon hardware, use
+* xeon\_b2b\_usd\_bar2\_addr64
+	If using B2B topology on Xeon hardware, use
 	this 64 bit address on the bus between the NTB devices for the window
 	at BAR2, on the upstream side of the link.
 * xeon\_b2b\_usd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*.
-- 
GitLab


From e4a5c33ed2084d000b968e1847a7d341551f75ec Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 10:04:34 -0300
Subject: [PATCH 1595/1958] numastat.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark the document title;
- mark the table as such.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/numastat.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/numastat.txt b/Documentation/numastat.txt
index 520327790d543..aaf1667489f87 100644
--- a/Documentation/numastat.txt
+++ b/Documentation/numastat.txt
@@ -1,10 +1,12 @@
-
+===============================
 Numa policy hit/miss statistics
+===============================
 
 /sys/devices/system/node/node*/numastat
 
 All units are pages. Hugepages have separate counters.
 
+=============== ============================================================
 numa_hit	A process wanted to allocate memory from this node,
 		and succeeded.
 
@@ -20,6 +22,7 @@ other_node	A process ran on this node and got memory from another node.
 
 interleave_hit 	Interleaving wanted to allocate from this node
 		and succeeded.
+=============== ============================================================
 
 For easier reading you can use the numastat utility from the numactl package
 (http://oss.sgi.com/projects/libnuma/). Note that it only works
-- 
GitLab


From 7576b2b98dc9aa8a3ce8921df4a5fbb354269ed8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 10:06:48 -0300
Subject: [PATCH 1596/1958] padata.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark document title;
- mark literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/padata.txt | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/Documentation/padata.txt b/Documentation/padata.txt
index 7ddfe216a0aa7..b103d0c820004 100644
--- a/Documentation/padata.txt
+++ b/Documentation/padata.txt
@@ -1,5 +1,8 @@
+=======================================
 The padata parallel execution mechanism
-Last updated for 2.6.36
+=======================================
+
+:Last updated: for 2.6.36
 
 Padata is a mechanism by which the kernel can farm work out to be done in
 parallel on multiple CPUs while retaining the ordering of tasks.  It was
@@ -9,7 +12,7 @@ those packets.  The crypto developers made a point of writing padata in a
 sufficiently general fashion that it could be put to other uses as well.
 
 The first step in using padata is to set up a padata_instance structure for
-overall control of how tasks are to be run:
+overall control of how tasks are to be run::
 
     #include <linux/padata.h>
 
@@ -24,7 +27,7 @@ The workqueue wq is where the work will actually be done; it should be
 a multithreaded queue, naturally.
 
 To allocate a padata instance with the cpu_possible_mask for both
-cpumasks this helper function can be used:
+cpumasks this helper function can be used::
 
     struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq);
 
@@ -36,7 +39,7 @@ it is legal to supply a cpumask to padata that contains offline CPUs.
 Once an offline CPU in the user supplied cpumask comes online, padata
 is going to use it.
 
-There are functions for enabling and disabling the instance:
+There are functions for enabling and disabling the instance::
 
     int padata_start(struct padata_instance *pinst);
     void padata_stop(struct padata_instance *pinst);
@@ -48,7 +51,7 @@ padata cpumask contains no active CPU (flag not set).
 padata_stop clears the flag and blocks until the padata instance
 is unused.
 
-The list of CPUs to be used can be adjusted with these functions:
+The list of CPUs to be used can be adjusted with these functions::
 
     int padata_set_cpumasks(struct padata_instance *pinst,
 			    cpumask_var_t pcpumask,
@@ -71,12 +74,12 @@ padata_add_cpu/padata_remove_cpu are used. cpu specifies the CPU to add or
 remove and mask is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL.
 
 If a user is interested in padata cpumask changes, he can register to
-the padata cpumask change notifier:
+the padata cpumask change notifier::
 
     int padata_register_cpumask_notifier(struct padata_instance *pinst,
 					 struct notifier_block *nblock);
 
-To unregister from that notifier:
+To unregister from that notifier::
 
     int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
 					   struct notifier_block *nblock);
@@ -84,7 +87,7 @@ To unregister from that notifier:
 The padata cpumask change notifier notifies about changes of the usable
 cpumasks, i.e. the subset of active CPUs in the user supplied cpumask.
 
-Padata calls the notifier chain with:
+Padata calls the notifier chain with::
 
     blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
 				 notification_mask,
@@ -95,7 +98,7 @@ is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL and cpumask is a pointer
 to a struct padata_cpumask that contains the new cpumask information.
 
 Actually submitting work to the padata instance requires the creation of a
-padata_priv structure:
+padata_priv structure::
 
     struct padata_priv {
         /* Other stuff here... */
@@ -110,7 +113,7 @@ parallel() and serial() functions should be provided.  Those functions will
 be called in the process of getting the work done as we will see
 momentarily.
 
-The submission of work is done with:
+The submission of work is done with::
 
     int padata_do_parallel(struct padata_instance *pinst,
 		           struct padata_priv *padata, int cb_cpu);
@@ -138,7 +141,7 @@ need not be completed during this call, but, if parallel() leaves work
 outstanding, it should be prepared to be called again with a new job before
 the previous one completes.  When a task does complete, parallel() (or
 whatever function actually finishes the job) should inform padata of the
-fact with a call to:
+fact with a call to::
 
     void padata_do_serial(struct padata_priv *padata);
 
@@ -151,7 +154,7 @@ pains to ensure that tasks are completed in the order in which they were
 submitted.
 
 The one remaining function in the padata API should be called to clean up
-when a padata instance is no longer needed:
+when a padata instance is no longer needed::
 
     void padata_free(struct padata_instance *pinst);
 
-- 
GitLab


From be9d0411f1608ad62c2334d3a289a68e4259e48c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 11:21:45 -0300
Subject: [PATCH 1597/1958] parport-lowlevel.txt: standardize document format

Each text file under Documentation follows a different
format. This one uses a man-page like approach.

Change its representation to be closer to the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Mark titles;
- Mark literals and literal blocks;
- Adjust identation.

Still, the best would be to move its contents to kernel-docs.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/parport-lowlevel.txt | 1321 ++++++++++++++++++----------
 1 file changed, 841 insertions(+), 480 deletions(-)

diff --git a/Documentation/parport-lowlevel.txt b/Documentation/parport-lowlevel.txt
index 120eb20dbb091..0633d70ffda7f 100644
--- a/Documentation/parport-lowlevel.txt
+++ b/Documentation/parport-lowlevel.txt
@@ -1,11 +1,12 @@
+===============================
 PARPORT interface documentation
--------------------------------
+===============================
 
-Time-stamp: <2000-02-24 13:30:20 twaugh>
+:Time-stamp: <2000-02-24 13:30:20 twaugh>
 
 Described here are the following functions:
 
-Global functions:
+Global functions::
   parport_register_driver
   parport_unregister_driver
   parport_enumerate
@@ -31,7 +32,8 @@ Global functions:
   parport_set_timeout
 
 Port functions (can be overridden by low-level drivers):
-  SPP:
+
+  SPP::
     port->ops->read_data
     port->ops->write_data
     port->ops->read_status
@@ -43,23 +45,23 @@ Port functions (can be overridden by low-level drivers):
     port->ops->data_forward
     port->ops->data_reverse
 
-  EPP:
+  EPP::
     port->ops->epp_write_data
     port->ops->epp_read_data
     port->ops->epp_write_addr
     port->ops->epp_read_addr
 
-  ECP:
+  ECP::
     port->ops->ecp_write_data
     port->ops->ecp_read_data
     port->ops->ecp_write_addr
 
-  Other:
+  Other::
     port->ops->nibble_read_data
     port->ops->byte_read_data
     port->ops->compat_write_data
 
-The parport subsystem comprises 'parport' (the core port-sharing
+The parport subsystem comprises ``parport`` (the core port-sharing
 code), and a variety of low-level drivers that actually do the port
 accesses.  Each low-level driver handles a particular style of port
 (PC, Amiga, and so on).
@@ -70,14 +72,14 @@ into global functions and port functions.
 The global functions are mostly for communicating between the device
 driver and the parport subsystem: acquiring a list of available ports,
 claiming a port for exclusive use, and so on.  They also include
-'generic' functions for doing standard things that will work on any
+``generic`` functions for doing standard things that will work on any
 IEEE 1284-capable architecture.
 
 The port functions are provided by the low-level drivers, although the
-core parport module provides generic 'defaults' for some routines.
+core parport module provides generic ``defaults`` for some routines.
 The port functions can be split into three groups: SPP, EPP, and ECP.
 
-SPP (Standard Parallel Port) functions modify so-called 'SPP'
+SPP (Standard Parallel Port) functions modify so-called ``SPP``
 registers: data, status, and control.  The hardware may not actually
 have registers exactly like that, but the PC does and this interface is
 modelled after common PC implementations.  Other low-level drivers may
@@ -95,58 +97,63 @@ to cope with peripherals that only tenuously support IEEE 1284, a
 low-level driver specific function is provided, for altering 'fudge
 factors'.
 
-GLOBAL FUNCTIONS
-----------------
+Global functions
+================
 
 parport_register_driver - register a device driver with parport
------------------------
+---------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-struct parport_driver {
-	const char *name;
-	void (*attach) (struct parport *);
-	void (*detach) (struct parport *);
-	struct parport_driver *next;
-};
-int parport_register_driver (struct parport_driver *driver);
+	struct parport_driver {
+		const char *name;
+		void (*attach) (struct parport *);
+		void (*detach) (struct parport *);
+		struct parport_driver *next;
+	};
+	int parport_register_driver (struct parport_driver *driver);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 In order to be notified about parallel ports when they are detected,
 parport_register_driver should be called.  Your driver will
 immediately be notified of all ports that have already been detected,
 and of each new port as low-level drivers are loaded.
 
-A 'struct parport_driver' contains the textual name of your driver,
+A ``struct parport_driver`` contains the textual name of your driver,
 a pointer to a function to handle new ports, and a pointer to a
 function to handle ports going away due to a low-level driver
 unloading.  Ports will only be detached if they are not being used
 (i.e. there are no devices registered on them).
 
-The visible parts of the 'struct parport *' argument given to
-attach/detach are:
-
-struct parport
-{
-	struct parport *next; /* next parport in list */
-	const char *name;     /* port's name */
-	unsigned int modes;   /* bitfield of hardware modes */
-	struct parport_device_info probe_info;
-			      /* IEEE1284 info */
-	int number;           /* parport index */
-	struct parport_operations *ops;
-	...
-};
+The visible parts of the ``struct parport *`` argument given to
+attach/detach are::
+
+	struct parport
+	{
+		struct parport *next; /* next parport in list */
+		const char *name;     /* port's name */
+		unsigned int modes;   /* bitfield of hardware modes */
+		struct parport_device_info probe_info;
+				/* IEEE1284 info */
+		int number;           /* parport index */
+		struct parport_operations *ops;
+		...
+	};
 
 There are other members of the structure, but they should not be
 touched.
 
-The 'modes' member summarises the capabilities of the underlying
+The ``modes`` member summarises the capabilities of the underlying
 hardware.  It consists of flags which may be bitwise-ored together:
 
+  ============================= ===============================================
   PARPORT_MODE_PCSPP		IBM PC registers are available,
 				i.e. functions that act on data,
 				control and status registers are
@@ -169,297 +176,351 @@ hardware.  It consists of flags which may be bitwise-ored together:
 				GFP_DMA flag with kmalloc) to the
 				low-level driver in order to take
 				advantage of it.
+  ============================= ===============================================
 
-There may be other flags in 'modes' as well.
+There may be other flags in ``modes`` as well.
 
-The contents of 'modes' is advisory only.  For example, if the
-hardware is capable of DMA, and PARPORT_MODE_DMA is in 'modes', it
+The contents of ``modes`` is advisory only.  For example, if the
+hardware is capable of DMA, and PARPORT_MODE_DMA is in ``modes``, it
 doesn't necessarily mean that DMA will always be used when possible.
 Similarly, hardware that is capable of assisting ECP transfers won't
 necessarily be used.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 Zero on success, otherwise an error code.
 
 ERRORS
+^^^^^^
 
 None. (Can it fail? Why return int?)
 
 EXAMPLE
+^^^^^^^
 
-static void lp_attach (struct parport *port)
-{
-	...
-	private = kmalloc (...);
-	dev[count++] = parport_register_device (...);
-	...
-}
+::
 
-static void lp_detach (struct parport *port)
-{
-	...
-}
+	static void lp_attach (struct parport *port)
+	{
+		...
+		private = kmalloc (...);
+		dev[count++] = parport_register_device (...);
+		...
+	}
 
-static struct parport_driver lp_driver = {
-	"lp",
-	lp_attach,
-	lp_detach,
-	NULL /* always put NULL here */
-};
+	static void lp_detach (struct parport *port)
+	{
+		...
+	}
 
-int lp_init (void)
-{
-	...
-	if (parport_register_driver (&lp_driver)) {
-		/* Failed; nothing we can do. */
-		return -EIO;
+	static struct parport_driver lp_driver = {
+		"lp",
+		lp_attach,
+		lp_detach,
+		NULL /* always put NULL here */
+	};
+
+	int lp_init (void)
+	{
+		...
+		if (parport_register_driver (&lp_driver)) {
+			/* Failed; nothing we can do. */
+			return -EIO;
+		}
+		...
 	}
-	...
-}
+
 
 SEE ALSO
+^^^^^^^^
 
 parport_unregister_driver, parport_register_device, parport_enumerate
-
+
+
+
 parport_unregister_driver - tell parport to forget about this driver
--------------------------
+--------------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_driver {
-	const char *name;
-	void (*attach) (struct parport *);
-	void (*detach) (struct parport *);
-	struct parport_driver *next;
-};
-void parport_unregister_driver (struct parport_driver *driver);
+	#include <linux/parport.h>
+
+	struct parport_driver {
+		const char *name;
+		void (*attach) (struct parport *);
+		void (*detach) (struct parport *);
+		struct parport_driver *next;
+	};
+	void parport_unregister_driver (struct parport_driver *driver);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 This tells parport not to notify the device driver of new ports or of
 ports going away.  Registered devices belonging to that driver are NOT
 unregistered: parport_unregister_device must be used for each one.
 
 EXAMPLE
+^^^^^^^
 
-void cleanup_module (void)
-{
-	...
-	/* Stop notifications. */
-	parport_unregister_driver (&lp_driver);
+::
 
-	/* Unregister devices. */
-	for (i = 0; i < NUM_DEVS; i++)
-		parport_unregister_device (dev[i]);
-	...
-}
+	void cleanup_module (void)
+	{
+		...
+		/* Stop notifications. */
+		parport_unregister_driver (&lp_driver);
+
+		/* Unregister devices. */
+		for (i = 0; i < NUM_DEVS; i++)
+			parport_unregister_device (dev[i]);
+		...
+	}
 
 SEE ALSO
+^^^^^^^^
 
 parport_register_driver, parport_enumerate
-
+
+
+
 parport_enumerate - retrieve a list of parallel ports (DEPRECATED)
------------------
+------------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport *parport_enumerate (void);
+	#include <linux/parport.h>
+
+	struct parport *parport_enumerate (void);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Retrieve the first of a list of valid parallel ports for this machine.
-Successive parallel ports can be found using the 'struct parport
-*next' element of the 'struct parport *' that is returned.  If 'next'
+Successive parallel ports can be found using the ``struct parport
+*next`` element of the ``struct parport *`` that is returned.  If ``next``
 is NULL, there are no more parallel ports in the list.  The number of
 ports in the list will not exceed PARPORT_MAX.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
-A 'struct parport *' describing a valid parallel port for the machine,
+A ``struct parport *`` describing a valid parallel port for the machine,
 or NULL if there are none.
 
 ERRORS
+^^^^^^
 
 This function can return NULL to indicate that there are no parallel
 ports to use.
 
 EXAMPLE
+^^^^^^^
+
+::
 
-int detect_device (void)
-{
-	struct parport *port;
+	int detect_device (void)
+	{
+		struct parport *port;
+
+		for (port = parport_enumerate ();
+		port != NULL;
+		port = port->next) {
+			/* Try to detect a device on the port... */
+			...
+		}
+		}
 
-	for (port = parport_enumerate ();
-	     port != NULL;
-	     port = port->next) {
-		/* Try to detect a device on the port... */
 		...
-             }
 	}
 
-	...
-}
-
 NOTES
+^^^^^
 
 parport_enumerate is deprecated; parport_register_driver should be
 used instead.
 
 SEE ALSO
+^^^^^^^^
 
 parport_register_driver, parport_unregister_driver
-
+
+
+
 parport_register_device - register to use a port
------------------------
+------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-typedef int (*preempt_func) (void *handle);
-typedef void (*wakeup_func) (void *handle);
-typedef int (*irq_func) (int irq, void *handle, struct pt_regs *);
+	#include <linux/parport.h>
 
-struct pardevice *parport_register_device(struct parport *port,
-                                          const char *name,
-                                          preempt_func preempt,
-                                          wakeup_func wakeup,
-                                          irq_func irq,
-                                          int flags,
-                                          void *handle);
+	typedef int (*preempt_func) (void *handle);
+	typedef void (*wakeup_func) (void *handle);
+	typedef int (*irq_func) (int irq, void *handle, struct pt_regs *);
+
+	struct pardevice *parport_register_device(struct parport *port,
+						  const char *name,
+						  preempt_func preempt,
+						  wakeup_func wakeup,
+						  irq_func irq,
+						  int flags,
+						  void *handle);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Use this function to register your device driver on a parallel port
-('port').  Once you have done that, you will be able to use
+(``port``).  Once you have done that, you will be able to use
 parport_claim and parport_release in order to use the port.
 
-The ('name') argument is the name of the device that appears in /proc
+The (``name``) argument is the name of the device that appears in /proc
 filesystem. The string must be valid for the whole lifetime of the
 device (until parport_unregister_device is called).
 
 This function will register three callbacks into your driver:
-'preempt', 'wakeup' and 'irq'.  Each of these may be NULL in order to
+``preempt``, ``wakeup`` and ``irq``.  Each of these may be NULL in order to
 indicate that you do not want a callback.
 
-When the 'preempt' function is called, it is because another driver
-wishes to use the parallel port.  The 'preempt' function should return
+When the ``preempt`` function is called, it is because another driver
+wishes to use the parallel port.  The ``preempt`` function should return
 non-zero if the parallel port cannot be released yet -- if zero is
 returned, the port is lost to another driver and the port must be
 re-claimed before use.
 
-The 'wakeup' function is called once another driver has released the
+The ``wakeup`` function is called once another driver has released the
 port and no other driver has yet claimed it.  You can claim the
-parallel port from within the 'wakeup' function (in which case the
+parallel port from within the ``wakeup`` function (in which case the
 claim is guaranteed to succeed), or choose not to if you don't need it
 now.
 
 If an interrupt occurs on the parallel port your driver has claimed,
-the 'irq' function will be called. (Write something about shared
+the ``irq`` function will be called. (Write something about shared
 interrupts here.)
 
-The 'handle' is a pointer to driver-specific data, and is passed to
+The ``handle`` is a pointer to driver-specific data, and is passed to
 the callback functions.
 
-'flags' may be a bitwise combination of the following flags:
+``flags`` may be a bitwise combination of the following flags:
 
+  ===================== =================================================
         Flag            Meaning
+  ===================== =================================================
   PARPORT_DEV_EXCL	The device cannot share the parallel port at all.
 			Use this only when absolutely necessary.
+  ===================== =================================================
 
 The typedefs are not actually defined -- they are only shown in order
 to make the function prototype more readable.
 
-The visible parts of the returned 'struct pardevice' are:
+The visible parts of the returned ``struct pardevice`` are::
 
-struct pardevice {
-	struct parport *port;	/* Associated port */
-	void *private;		/* Device driver's 'handle' */
-	...
-};
+	struct pardevice {
+		struct parport *port;	/* Associated port */
+		void *private;		/* Device driver's 'handle' */
+		...
+	};
 
 RETURN VALUE
+^^^^^^^^^^^^
 
-A 'struct pardevice *': a handle to the registered parallel port
+A ``struct pardevice *``: a handle to the registered parallel port
 device that can be used for parport_claim, parport_release, etc.
 
 ERRORS
+^^^^^^
 
 A return value of NULL indicates that there was a problem registering
 a device on that port.
 
 EXAMPLE
+^^^^^^^
+
+::
+
+	static int preempt (void *handle)
+	{
+		if (busy_right_now)
+			return 1;
+
+		must_reclaim_port = 1;
+		return 0;
+	}
+
+	static void wakeup (void *handle)
+	{
+		struct toaster *private = handle;
+		struct pardevice *dev = private->dev;
+		if (!dev) return; /* avoid races */
+
+		if (want_port)
+			parport_claim (dev);
+	}
+
+	static int toaster_detect (struct toaster *private, struct parport *port)
+	{
+		private->dev = parport_register_device (port, "toaster", preempt,
+							wakeup, NULL, 0,
+							private);
+		if (!private->dev)
+			/* Couldn't register with parport. */
+			return -EIO;
 
-static int preempt (void *handle)
-{
-	if (busy_right_now)
-		return 1;
-
-	must_reclaim_port = 1;
-	return 0;
-}
-
-static void wakeup (void *handle)
-{
-	struct toaster *private = handle;
-	struct pardevice *dev = private->dev;
-	if (!dev) return; /* avoid races */
-
-	if (want_port)
-		parport_claim (dev);
-}
-
-static int toaster_detect (struct toaster *private, struct parport *port)
-{
-	private->dev = parport_register_device (port, "toaster", preempt,
-					        wakeup, NULL, 0,
-						private);
-	if (!private->dev)
-		/* Couldn't register with parport. */
-		return -EIO;
-
-	must_reclaim_port = 0;
-	busy_right_now = 1;
-	parport_claim_or_block (private->dev);
-	...
-	/* Don't need the port while the toaster warms up. */
-	busy_right_now = 0;
-	...
-	busy_right_now = 1;
-	if (must_reclaim_port) {
-		parport_claim_or_block (private->dev);
 		must_reclaim_port = 0;
+		busy_right_now = 1;
+		parport_claim_or_block (private->dev);
+		...
+		/* Don't need the port while the toaster warms up. */
+		busy_right_now = 0;
+		...
+		busy_right_now = 1;
+		if (must_reclaim_port) {
+			parport_claim_or_block (private->dev);
+			must_reclaim_port = 0;
+		}
+		...
 	}
-	...
-}
 
 SEE ALSO
+^^^^^^^^
 
 parport_unregister_device, parport_claim
+
+
 
 parport_unregister_device - finish using a port
--------------------------
+-----------------------------------------------
 
 SYNPOPSIS
 
-#include <linux/parport.h>
+::
+
+	#include <linux/parport.h>
 
-void parport_unregister_device (struct pardevice *dev);
+	void parport_unregister_device (struct pardevice *dev);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 This function is the opposite of parport_register_device.  After using
-parport_unregister_device, 'dev' is no longer a valid device handle.
+parport_unregister_device, ``dev`` is no longer a valid device handle.
 
 You should not unregister a device that is currently claimed, although
 if you do it will be released automatically.
 
 EXAMPLE
+^^^^^^^
+
+::
 
 	...
 	kfree (dev->private); /* before we lose the pointer */
@@ -467,460 +528,602 @@ EXAMPLE
 	...
 
 SEE ALSO
+^^^^^^^^
+
 
 parport_unregister_driver
 
 parport_claim, parport_claim_or_block - claim the parallel port for a device
--------------------------------------
+----------------------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-int parport_claim (struct pardevice *dev);
-int parport_claim_or_block (struct pardevice *dev);
+	int parport_claim (struct pardevice *dev);
+	int parport_claim_or_block (struct pardevice *dev);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 These functions attempt to gain control of the parallel port on which
-'dev' is registered.  'parport_claim' does not block, but
-'parport_claim_or_block' may do. (Put something here about blocking
+``dev`` is registered.  ``parport_claim`` does not block, but
+``parport_claim_or_block`` may do. (Put something here about blocking
 interruptibly or non-interruptibly.)
 
 You should not try to claim a port that you have already claimed.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 A return value of zero indicates that the port was successfully
 claimed, and the caller now has possession of the parallel port.
 
-If 'parport_claim_or_block' blocks before returning successfully, the
+If ``parport_claim_or_block`` blocks before returning successfully, the
 return value is positive.
 
 ERRORS
+^^^^^^
 
+========== ==========================================================
   -EAGAIN  The port is unavailable at the moment, but another attempt
            to claim it may succeed.
+========== ==========================================================
 
 SEE ALSO
+^^^^^^^^
+
 
 parport_release
 
 parport_release - release the parallel port
----------------
+-------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-void parport_release (struct pardevice *dev);
+	void parport_release (struct pardevice *dev);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Once a parallel port device has been claimed, it can be released using
-'parport_release'.  It cannot fail, but you should not release a
+``parport_release``.  It cannot fail, but you should not release a
 device that you do not have possession of.
 
 EXAMPLE
+^^^^^^^
 
-static size_t write (struct pardevice *dev, const void *buf,
-		     size_t len)
-{
-	...
-	written = dev->port->ops->write_ecp_data (dev->port, buf,
-						  len);
-	parport_release (dev);
-	...
-}
+::
+
+	static size_t write (struct pardevice *dev, const void *buf,
+			size_t len)
+	{
+		...
+		written = dev->port->ops->write_ecp_data (dev->port, buf,
+							len);
+		parport_release (dev);
+		...
+	}
 
 
 SEE ALSO
+^^^^^^^^
 
 change_mode, parport_claim, parport_claim_or_block, parport_yield
-
+
+
+
 parport_yield, parport_yield_blocking - temporarily release a parallel port
--------------------------------------
+---------------------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-int parport_yield (struct pardevice *dev)
-int parport_yield_blocking (struct pardevice *dev);
+	int parport_yield (struct pardevice *dev)
+	int parport_yield_blocking (struct pardevice *dev);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 When a driver has control of a parallel port, it may allow another
-driver to temporarily 'borrow' it.  'parport_yield' does not block;
-'parport_yield_blocking' may do.
+driver to temporarily ``borrow`` it.  ``parport_yield`` does not block;
+``parport_yield_blocking`` may do.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 A return value of zero indicates that the caller still owns the port
 and the call did not block.
 
-A positive return value from 'parport_yield_blocking' indicates that
+A positive return value from ``parport_yield_blocking`` indicates that
 the caller still owns the port and the call blocked.
 
 A return value of -EAGAIN indicates that the caller no longer owns the
 port, and it must be re-claimed before use.
 
 ERRORS
+^^^^^^
 
+========= ==========================================================
   -EAGAIN  Ownership of the parallel port was given away.
+========= ==========================================================
 
 SEE ALSO
+^^^^^^^^
 
 parport_release
+
+
 
 parport_wait_peripheral - wait for status lines, up to 35ms
------------------------
+-----------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-int parport_wait_peripheral (struct parport *port,
-			     unsigned char mask,
-			     unsigned char val);
+	int parport_wait_peripheral (struct parport *port,
+				     unsigned char mask,
+				     unsigned char val);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Wait for the status lines in mask to match the values in val.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
+======== ==========================================================
  -EINTR  a signal is pending
       0  the status lines in mask have values in val
       1  timed out while waiting (35ms elapsed)
+======== ==========================================================
 
 SEE ALSO
+^^^^^^^^
 
 parport_poll_peripheral
+
+
 
 parport_poll_peripheral - wait for status lines, in usec
------------------------
+--------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-int parport_poll_peripheral (struct parport *port,
-			     unsigned char mask,
-			     unsigned char val,
-			     int usec);
+	int parport_poll_peripheral (struct parport *port,
+				     unsigned char mask,
+				     unsigned char val,
+				     int usec);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Wait for the status lines in mask to match the values in val.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
+======== ==========================================================
  -EINTR  a signal is pending
       0  the status lines in mask have values in val
       1  timed out while waiting (usec microseconds have elapsed)
+======== ==========================================================
 
 SEE ALSO
+^^^^^^^^
 
 parport_wait_peripheral
-
+
+
+
 parport_wait_event - wait for an event on a port
-------------------
+------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-int parport_wait_event (struct parport *port, signed long timeout)
+	#include <linux/parport.h>
+
+	int parport_wait_event (struct parport *port, signed long timeout)
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Wait for an event (e.g. interrupt) on a port.  The timeout is in
 jiffies.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
+======= ==========================================================
       0  success
      <0  error (exit as soon as possible)
      >0  timed out
-
+======= ==========================================================
+
 parport_negotiate - perform IEEE 1284 negotiation
------------------
+-------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-int parport_negotiate (struct parport *, int mode);
+	int parport_negotiate (struct parport *, int mode);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Perform IEEE 1284 negotiation.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
+======= ==========================================================
      0  handshake OK; IEEE 1284 peripheral and mode available
     -1  handshake failed; peripheral not compliant (or none present)
      1  handshake OK; IEEE 1284 peripheral present but mode not
         available
+======= ==========================================================
 
 SEE ALSO
+^^^^^^^^
 
 parport_read, parport_write
-
+
+
+
 parport_read - read data from device
-------------
+------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-ssize_t parport_read (struct parport *, void *buf, size_t len);
+	ssize_t parport_read (struct parport *, void *buf, size_t len);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Read data from device in current IEEE 1284 transfer mode.  This only
 works for modes that support reverse data transfer.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 If negative, an error code; otherwise the number of bytes transferred.
 
 SEE ALSO
+^^^^^^^^
 
 parport_write, parport_negotiate
-
+
+
+
 parport_write - write data to device
--------------
+------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-ssize_t parport_write (struct parport *, const void *buf, size_t len);
+	ssize_t parport_write (struct parport *, const void *buf, size_t len);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Write data to device in current IEEE 1284 transfer mode.  This only
 works for modes that support forward data transfer.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 If negative, an error code; otherwise the number of bytes transferred.
 
 SEE ALSO
+^^^^^^^^
 
 parport_read, parport_negotiate
+
+
 
 parport_open - register device for particular device number
-------------
+-----------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct pardevice *parport_open (int devnum, const char *name,
-			        int (*pf) (void *),
-				void (*kf) (void *),
-				void (*irqf) (int, void *,
-					      struct pt_regs *),
-				int flags, void *handle);
+	#include <linux/parport.h>
+
+	struct pardevice *parport_open (int devnum, const char *name,
+				        int (*pf) (void *),
+					void (*kf) (void *),
+					void (*irqf) (int, void *,
+						      struct pt_regs *),
+					int flags, void *handle);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 This is like parport_register_device but takes a device number instead
 of a pointer to a struct parport.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 See parport_register_device.  If no device is associated with devnum,
 NULL is returned.
 
 SEE ALSO
+^^^^^^^^
 
 parport_register_device
-
+
+
+
 parport_close - unregister device for particular device number
--------------
+--------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-void parport_close (struct pardevice *dev);
+	void parport_close (struct pardevice *dev);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 This is the equivalent of parport_unregister_device for parport_open.
 
 SEE ALSO
+^^^^^^^^
 
 parport_unregister_device, parport_open
-
+
+
+
 parport_device_id - obtain IEEE 1284 Device ID
------------------
+----------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-ssize_t parport_device_id (int devnum, char *buffer, size_t len);
+	ssize_t parport_device_id (int devnum, char *buffer, size_t len);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Obtains the IEEE 1284 Device ID associated with a given device.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 If negative, an error code; otherwise, the number of bytes of buffer
 that contain the device ID.  The format of the device ID is as
-follows:
+follows::
 
-[length][ID]
+	[length][ID]
 
 The first two bytes indicate the inclusive length of the entire Device
 ID, and are in big-endian order.  The ID is a sequence of pairs of the
-form:
+form::
 
-key:value;
+	key:value;
 
 NOTES
+^^^^^
 
 Many devices have ill-formed IEEE 1284 Device IDs.
 
 SEE ALSO
+^^^^^^^^
 
 parport_find_class, parport_find_device
-
+
+
+
 parport_device_coords - convert device number to device coordinates
-------------------
+-------------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-int parport_device_coords (int devnum, int *parport, int *mux,
-			   int *daisy);
+	int parport_device_coords (int devnum, int *parport, int *mux,
+				   int *daisy);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Convert between device number (zero-based) and device coordinates
 (port, multiplexor, daisy chain address).
 
 RETURN VALUE
+^^^^^^^^^^^^
 
-Zero on success, in which case the coordinates are (*parport, *mux,
-*daisy).
+Zero on success, in which case the coordinates are (``*parport``, ``*mux``,
+``*daisy``).
 
 SEE ALSO
+^^^^^^^^
 
 parport_open, parport_device_id
-
+
+
+
 parport_find_class - find a device by its class
-------------------
+-----------------------------------------------
 
 SYNOPSIS
-
-#include <linux/parport.h>
-
-typedef enum {
-	PARPORT_CLASS_LEGACY = 0,       /* Non-IEEE1284 device */
-	PARPORT_CLASS_PRINTER,
-	PARPORT_CLASS_MODEM,
-	PARPORT_CLASS_NET,
-	PARPORT_CLASS_HDC,              /* Hard disk controller */
-	PARPORT_CLASS_PCMCIA,
-	PARPORT_CLASS_MEDIA,            /* Multimedia device */
-	PARPORT_CLASS_FDC,              /* Floppy disk controller */
-	PARPORT_CLASS_PORTS,
-	PARPORT_CLASS_SCANNER,
-	PARPORT_CLASS_DIGCAM,
-	PARPORT_CLASS_OTHER,            /* Anything else */
-	PARPORT_CLASS_UNSPEC,           /* No CLS field in ID */
-	PARPORT_CLASS_SCSIADAPTER
-} parport_device_class;
-
-int parport_find_class (parport_device_class cls, int from);
+^^^^^^^^
+
+::
+
+	#include <linux/parport.h>
+
+	typedef enum {
+		PARPORT_CLASS_LEGACY = 0,       /* Non-IEEE1284 device */
+		PARPORT_CLASS_PRINTER,
+		PARPORT_CLASS_MODEM,
+		PARPORT_CLASS_NET,
+		PARPORT_CLASS_HDC,              /* Hard disk controller */
+		PARPORT_CLASS_PCMCIA,
+		PARPORT_CLASS_MEDIA,            /* Multimedia device */
+		PARPORT_CLASS_FDC,              /* Floppy disk controller */
+		PARPORT_CLASS_PORTS,
+		PARPORT_CLASS_SCANNER,
+		PARPORT_CLASS_DIGCAM,
+		PARPORT_CLASS_OTHER,            /* Anything else */
+		PARPORT_CLASS_UNSPEC,           /* No CLS field in ID */
+		PARPORT_CLASS_SCSIADAPTER
+	} parport_device_class;
+
+	int parport_find_class (parport_device_class cls, int from);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Find a device by class.  The search starts from device number from+1.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The device number of the next device in that class, or -1 if no such
 device exists.
 
 NOTES
+^^^^^
 
-Example usage:
+Example usage::
 
-int devnum = -1;
-while ((devnum = parport_find_class (PARPORT_CLASS_DIGCAM, devnum)) != -1) {
-    struct pardevice *dev = parport_open (devnum, ...);
-    ...
-}
+	int devnum = -1;
+	while ((devnum = parport_find_class (PARPORT_CLASS_DIGCAM, devnum)) != -1) {
+		struct pardevice *dev = parport_open (devnum, ...);
+		...
+	}
 
 SEE ALSO
+^^^^^^^^
 
 parport_find_device, parport_open, parport_device_id
-
+
+
+
 parport_find_device - find a device by its class
-------------------
+------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-int parport_find_device (const char *mfg, const char *mdl, int from);
+	#include <linux/parport.h>
+
+	int parport_find_device (const char *mfg, const char *mdl, int from);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Find a device by vendor and model.  The search starts from device
 number from+1.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The device number of the next device matching the specifications, or
 -1 if no such device exists.
 
 NOTES
+^^^^^
 
-Example usage:
+Example usage::
 
-int devnum = -1;
-while ((devnum = parport_find_device ("IOMEGA", "ZIP+", devnum)) != -1) {
-    struct pardevice *dev = parport_open (devnum, ...);
-    ...
-}
+	int devnum = -1;
+	while ((devnum = parport_find_device ("IOMEGA", "ZIP+", devnum)) != -1) {
+		struct pardevice *dev = parport_open (devnum, ...);
+		...
+	}
 
 SEE ALSO
+^^^^^^^^
 
 parport_find_class, parport_open, parport_device_id
+
+
 
 parport_set_timeout - set the inactivity timeout
--------------------
+------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
+
+::
 
-#include <linux/parport.h>
+	#include <linux/parport.h>
 
-long parport_set_timeout (struct pardevice *dev, long inactivity);
+	long parport_set_timeout (struct pardevice *dev, long inactivity);
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Set the inactivity timeout, in jiffies, for a registered device.  The
 previous timeout is returned.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The previous timeout, in jiffies.
 
 NOTES
+^^^^^
 
 Some of the port->ops functions for a parport may take time, owing to
 delays at the peripheral.  After the peripheral has not responded for
-'inactivity' jiffies, a timeout will occur and the blocking function
+``inactivity`` jiffies, a timeout will occur and the blocking function
 will return.
 
 A timeout of 0 jiffies is a special case: the function must do as much
@@ -932,29 +1135,37 @@ Once set for a registered device, the timeout will remain at the set
 value until set again.
 
 SEE ALSO
+^^^^^^^^
 
 port->ops->xxx_read/write_yyy
-
+
+
+
+
 PORT FUNCTIONS
---------------
+==============
 
 The functions in the port->ops structure (struct parport_operations)
 are provided by the low-level driver responsible for that port.
 
 port->ops->read_data - read the data register
---------------------
+---------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	unsigned char (*read_data) (struct parport *port);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		unsigned char (*read_data) (struct parport *port);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 If port->modes contains the PARPORT_MODE_TRISTATE flag and the
 PARPORT_CONTROL_DIRECTION bit in the control register is set, this
@@ -964,45 +1175,59 @@ not set, the return value _may_ be the last value written to the data
 register.  Otherwise the return value is undefined.
 
 SEE ALSO
+^^^^^^^^
 
 write_data, read_status, write_control
+
+
 
 port->ops->write_data - write the data register
----------------------
+-----------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	void (*write_data) (struct parport *port, unsigned char d);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*write_data) (struct parport *port, unsigned char d);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Writes to the data register.  May have side-effects (a STROBE pulse,
 for instance).
 
 SEE ALSO
+^^^^^^^^
 
 read_data, read_status, write_control
+
+
 
 port->ops->read_status - read the status register
-----------------------
+-------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	unsigned char (*read_status) (struct parport *port);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		unsigned char (*read_status) (struct parport *port);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Reads from the status register.  This is a bitmask:
 
@@ -1015,76 +1240,98 @@ Reads from the status register.  This is a bitmask:
 There may be other bits set.
 
 SEE ALSO
+^^^^^^^^
 
 read_data, write_data, write_control
+
+
 
 port->ops->read_control - read the control register
------------------------
+---------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	unsigned char (*read_control) (struct parport *port);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		unsigned char (*read_control) (struct parport *port);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Returns the last value written to the control register (either from
 write_control or frob_control).  No port access is performed.
 
 SEE ALSO
+^^^^^^^^
 
 read_data, write_data, read_status, write_control
+
+
 
 port->ops->write_control - write the control register
-------------------------
+-----------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	void (*write_control) (struct parport *port, unsigned char s);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*write_control) (struct parport *port, unsigned char s);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
-Writes to the control register. This is a bitmask:
-                          _______
-- PARPORT_CONTROL_STROBE (nStrobe)
-                          _______
-- PARPORT_CONTROL_AUTOFD (nAutoFd)
-                        _____
-- PARPORT_CONTROL_INIT (nInit)
-                          _________
-- PARPORT_CONTROL_SELECT (nSelectIn)
+Writes to the control register. This is a bitmask::
+
+				  _______
+	- PARPORT_CONTROL_STROBE (nStrobe)
+				  _______
+	- PARPORT_CONTROL_AUTOFD (nAutoFd)
+				_____
+	- PARPORT_CONTROL_INIT (nInit)
+				  _________
+	- PARPORT_CONTROL_SELECT (nSelectIn)
 
 SEE ALSO
+^^^^^^^^
 
 read_data, write_data, read_status, frob_control
+
+
 
 port->ops->frob_control - write control register bits
------------------------
+-----------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	unsigned char (*frob_control) (struct parport *port,
-				       unsigned char mask,
-				       unsigned char val);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		unsigned char (*frob_control) (struct parport *port,
+					unsigned char mask,
+					unsigned char val);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 This is equivalent to reading from the control register, masking out
 the bits in mask, exclusive-or'ing with the bits in val, and writing
@@ -1095,23 +1342,30 @@ of its contents is maintained, so frob_control is in fact only one
 port access.
 
 SEE ALSO
+^^^^^^^^
 
 read_data, write_data, read_status, write_control
+
+
 
 port->ops->enable_irq - enable interrupt generation
----------------------
+---------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	void (*enable_irq) (struct parport *port);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*enable_irq) (struct parport *port);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 The parallel port hardware is instructed to generate interrupts at
 appropriate moments, although those moments are
@@ -1119,353 +1373,460 @@ architecture-specific.  For the PC architecture, interrupts are
 commonly generated on the rising edge of nAck.
 
 SEE ALSO
+^^^^^^^^
 
 disable_irq
+
+
 
 port->ops->disable_irq - disable interrupt generation
-----------------------
+-----------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	void (*disable_irq) (struct parport *port);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*disable_irq) (struct parport *port);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 The parallel port hardware is instructed not to generate interrupts.
 The interrupt itself is not masked.
 
 SEE ALSO
+^^^^^^^^
 
 enable_irq
 
+
+
 port->ops->data_forward - enable data drivers
------------------------
+---------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	void (*data_forward) (struct parport *port);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*data_forward) (struct parport *port);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Enables the data line drivers, for 8-bit host-to-peripheral
 communications.
 
 SEE ALSO
+^^^^^^^^
 
 data_reverse
+
+
 
 port->ops->data_reverse - tristate the buffer
------------------------
+---------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	void (*data_reverse) (struct parport *port);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		void (*data_reverse) (struct parport *port);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Places the data bus in a high impedance state, if port->modes has the
 PARPORT_MODE_TRISTATE bit set.
 
 SEE ALSO
+^^^^^^^^
 
 data_forward
-
+
+
+
 port->ops->epp_write_data - write EPP data
--------------------------
+------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*epp_write_data) (struct parport *port, const void *buf,
-				  size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*epp_write_data) (struct parport *port, const void *buf,
+					size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Writes data in EPP mode, and returns the number of bytes written.
 
-The 'flags' parameter may be one or more of the following,
+The ``flags`` parameter may be one or more of the following,
 bitwise-or'ed together:
 
+======================= =================================================
 PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
 			32-bit registers.  However, if a transfer
 			times out, the return value may be unreliable.
+======================= =================================================
 
 SEE ALSO
+^^^^^^^^
 
 epp_read_data, epp_write_addr, epp_read_addr
+
+
 
 port->ops->epp_read_data - read EPP data
-------------------------
+----------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*epp_read_data) (struct parport *port, void *buf,
-				 size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*epp_read_data) (struct parport *port, void *buf,
+					size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Reads data in EPP mode, and returns the number of bytes read.
 
-The 'flags' parameter may be one or more of the following,
+The ``flags`` parameter may be one or more of the following,
 bitwise-or'ed together:
 
+======================= =================================================
 PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
 			32-bit registers.  However, if a transfer
 			times out, the return value may be unreliable.
+======================= =================================================
 
 SEE ALSO
+^^^^^^^^
 
 epp_write_data, epp_write_addr, epp_read_addr
-
+
+
+
 port->ops->epp_write_addr - write EPP address
--------------------------
+---------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*epp_write_addr) (struct parport *port,
-				  const void *buf, size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*epp_write_addr) (struct parport *port,
+					const void *buf, size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Writes EPP addresses (8 bits each), and returns the number written.
 
-The 'flags' parameter may be one or more of the following,
+The ``flags`` parameter may be one or more of the following,
 bitwise-or'ed together:
 
+======================= =================================================
 PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
 			32-bit registers.  However, if a transfer
 			times out, the return value may be unreliable.
+======================= =================================================
 
 (Does PARPORT_EPP_FAST make sense for this function?)
 
 SEE ALSO
+^^^^^^^^
 
 epp_write_data, epp_read_data, epp_read_addr
+
+
 
 port->ops->epp_read_addr - read EPP address
-------------------------
+-------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*epp_read_addr) (struct parport *port, void *buf,
-				 size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*epp_read_addr) (struct parport *port, void *buf,
+					size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
 Reads EPP addresses (8 bits each), and returns the number read.
 
-The 'flags' parameter may be one or more of the following,
+The ``flags`` parameter may be one or more of the following,
 bitwise-or'ed together:
 
+======================= =================================================
 PARPORT_EPP_FAST	Use fast transfers. Some chips provide 16-bit and
 			32-bit registers.  However, if a transfer
 			times out, the return value may be unreliable.
+======================= =================================================
 
 (Does PARPORT_EPP_FAST make sense for this function?)
 
 SEE ALSO
+^^^^^^^^
 
 epp_write_data, epp_read_data, epp_write_addr
+
+
 
 port->ops->ecp_write_data - write a block of ECP data
--------------------------
+-----------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*ecp_write_data) (struct parport *port,
-				  const void *buf, size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*ecp_write_data) (struct parport *port,
+					const void *buf, size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
-Writes a block of ECP data.  The 'flags' parameter is ignored.
+Writes a block of ECP data.  The ``flags`` parameter is ignored.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The number of bytes written.
 
 SEE ALSO
+^^^^^^^^
 
 ecp_read_data, ecp_write_addr
 
+
+
 port->ops->ecp_read_data - read a block of ECP data
-------------------------
+---------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*ecp_read_data) (struct parport *port,
-				 void *buf, size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*ecp_read_data) (struct parport *port,
+					void *buf, size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
-Reads a block of ECP data.  The 'flags' parameter is ignored.
+Reads a block of ECP data.  The ``flags`` parameter is ignored.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The number of bytes read.  NB. There may be more unread data in a
 FIFO.  Is there a way of stunning the FIFO to prevent this?
 
 SEE ALSO
+^^^^^^^^
 
 ecp_write_block, ecp_write_addr
-
+
+
+
 port->ops->ecp_write_addr - write a block of ECP addresses
--------------------------
+----------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*ecp_write_addr) (struct parport *port,
-				  const void *buf, size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*ecp_write_addr) (struct parport *port,
+					const void *buf, size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
-Writes a block of ECP addresses.  The 'flags' parameter is ignored.
+Writes a block of ECP addresses.  The ``flags`` parameter is ignored.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The number of bytes written.
 
 NOTES
+^^^^^
 
 This may use a FIFO, and if so shall not return until the FIFO is empty.
 
 SEE ALSO
+^^^^^^^^
 
 ecp_read_data, ecp_write_data
-
+
+
+
 port->ops->nibble_read_data - read a block of data in nibble mode
----------------------------
+-----------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*nibble_read_data) (struct parport *port,
-				    void *buf, size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*nibble_read_data) (struct parport *port,
+					void *buf, size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
-Reads a block of data in nibble mode.  The 'flags' parameter is ignored.
+Reads a block of data in nibble mode.  The ``flags`` parameter is ignored.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The number of whole bytes read.
 
 SEE ALSO
+^^^^^^^^
 
 byte_read_data, compat_write_data
+
+
 
 port->ops->byte_read_data - read a block of data in byte mode
--------------------------
+-------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*byte_read_data) (struct parport *port,
-				  void *buf, size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*byte_read_data) (struct parport *port,
+					void *buf, size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
-Reads a block of data in byte mode.  The 'flags' parameter is ignored.
+Reads a block of data in byte mode.  The ``flags`` parameter is ignored.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The number of bytes read.
 
 SEE ALSO
+^^^^^^^^
 
 nibble_read_data, compat_write_data
+
+
 
 port->ops->compat_write_data - write a block of data in compatibility mode
-----------------------------
+--------------------------------------------------------------------------
 
 SYNOPSIS
+^^^^^^^^
 
-#include <linux/parport.h>
+::
 
-struct parport_operations {
-	...
-	size_t (*compat_write_data) (struct parport *port,
-				     const void *buf, size_t len, int flags);
-	...
-};
+	#include <linux/parport.h>
+
+	struct parport_operations {
+		...
+		size_t (*compat_write_data) (struct parport *port,
+					const void *buf, size_t len, int flags);
+		...
+	};
 
 DESCRIPTION
+^^^^^^^^^^^
 
-Writes a block of data in compatibility mode.  The 'flags' parameter
+Writes a block of data in compatibility mode.  The ``flags`` parameter
 is ignored.
 
 RETURN VALUE
+^^^^^^^^^^^^
 
 The number of bytes written.
 
 SEE ALSO
+^^^^^^^^
 
 nibble_read_data, byte_read_data
-- 
GitLab


From c437c3a405f01ceab5d0251e2cb9698edb991160 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 10:14:12 -0300
Subject: [PATCH 1598/1958] percpu-rw-semaphore.txt: standardize document
 format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

This document is already adopting the standard format,
with a single exception: we're using this convention
for the document title:
	===
	foo
	===

So, adjust the title of this document to follow the
standard.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/percpu-rw-semaphore.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/percpu-rw-semaphore.txt b/Documentation/percpu-rw-semaphore.txt
index 7d3c82431909d..247de64108557 100644
--- a/Documentation/percpu-rw-semaphore.txt
+++ b/Documentation/percpu-rw-semaphore.txt
@@ -1,5 +1,6 @@
+====================
 Percpu rw semaphores
---------------------
+====================
 
 Percpu rw semaphores is a new read-write semaphore design that is
 optimized for locking for reading.
-- 
GitLab


From 5426a2cc70be499ea3bc67985c0608c7ac62fe6c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 10:20:19 -0300
Subject: [PATCH 1599/1958] phy.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark titles;
- use :Author: for authorship;
- mark literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/phy.txt | 106 ++++++++++++++++++++++++++----------------
 1 file changed, 67 insertions(+), 39 deletions(-)

diff --git a/Documentation/phy.txt b/Documentation/phy.txt
index 383cdd863f083..457c3e0f86d62 100644
--- a/Documentation/phy.txt
+++ b/Documentation/phy.txt
@@ -1,10 +1,14 @@
-			    PHY SUBSYSTEM
-		  Kishon Vijay Abraham I <kishon@ti.com>
+=============
+PHY subsystem
+=============
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
 
 This document explains the Generic PHY Framework along with the APIs provided,
 and how-to-use.
 
-1. Introduction
+Introduction
+============
 
 *PHY* is the abbreviation for physical layer. It is used to connect a device
 to the physical medium e.g., the USB controller has a PHY to provide functions
@@ -21,7 +25,8 @@ better code maintainability.
 This framework will be of use only to devices that use external PHY (PHY
 functionality is not embedded within the controller).
 
-2. Registering/Unregistering the PHY provider
+Registering/Unregistering the PHY provider
+==========================================
 
 PHY provider refers to an entity that implements one or more PHY instances.
 For the simple case where the PHY provider implements only a single instance of
@@ -30,11 +35,14 @@ of_phy_simple_xlate. If the PHY provider implements multiple instances, it
 should provide its own implementation of of_xlate. of_xlate is used only for
 dt boot case.
 
-#define of_phy_provider_register(dev, xlate)    \
-        __of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate))
+::
+
+	#define of_phy_provider_register(dev, xlate)    \
+		__of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate))
 
-#define devm_of_phy_provider_register(dev, xlate)       \
-        __devm_of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate))
+	#define devm_of_phy_provider_register(dev, xlate)       \
+		__devm_of_phy_provider_register((dev), NULL, THIS_MODULE,
+						(xlate))
 
 of_phy_provider_register and devm_of_phy_provider_register macros can be used to
 register the phy_provider and it takes device and of_xlate as
@@ -47,28 +55,35 @@ nodes within extra levels for context and extensibility, in which case the low
 level of_phy_provider_register_full() and devm_of_phy_provider_register_full()
 macros can be used to override the node containing the children.
 
-#define of_phy_provider_register_full(dev, children, xlate) \
-	__of_phy_provider_register(dev, children, THIS_MODULE, xlate)
+::
+
+	#define of_phy_provider_register_full(dev, children, xlate) \
+		__of_phy_provider_register(dev, children, THIS_MODULE, xlate)
 
-#define devm_of_phy_provider_register_full(dev, children, xlate) \
-	__devm_of_phy_provider_register_full(dev, children, THIS_MODULE, xlate)
+	#define devm_of_phy_provider_register_full(dev, children, xlate) \
+		__devm_of_phy_provider_register_full(dev, children,
+						     THIS_MODULE, xlate)
 
-void devm_of_phy_provider_unregister(struct device *dev,
-	struct phy_provider *phy_provider);
-void of_phy_provider_unregister(struct phy_provider *phy_provider);
+	void devm_of_phy_provider_unregister(struct device *dev,
+		struct phy_provider *phy_provider);
+	void of_phy_provider_unregister(struct phy_provider *phy_provider);
 
 devm_of_phy_provider_unregister and of_phy_provider_unregister can be used to
 unregister the PHY.
 
-3. Creating the PHY
+Creating the PHY
+================
 
 The PHY driver should create the PHY in order for other peripheral controllers
 to make use of it. The PHY framework provides 2 APIs to create the PHY.
 
-struct phy *phy_create(struct device *dev, struct device_node *node,
-		       const struct phy_ops *ops);
-struct phy *devm_phy_create(struct device *dev, struct device_node *node,
-			    const struct phy_ops *ops);
+::
+
+	struct phy *phy_create(struct device *dev, struct device_node *node,
+			       const struct phy_ops *ops);
+	struct phy *devm_phy_create(struct device *dev,
+				    struct device_node *node,
+				    const struct phy_ops *ops);
 
 The PHY drivers can use one of the above 2 APIs to create the PHY by passing
 the device pointer and phy ops.
@@ -84,12 +99,16 @@ phy_ops to get back the private data.
 Before the controller can make use of the PHY, it has to get a reference to
 it. This framework provides the following APIs to get a reference to the PHY.
 
-struct phy *phy_get(struct device *dev, const char *string);
-struct phy *phy_optional_get(struct device *dev, const char *string);
-struct phy *devm_phy_get(struct device *dev, const char *string);
-struct phy *devm_phy_optional_get(struct device *dev, const char *string);
-struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np,
-				     int index);
+::
+
+	struct phy *phy_get(struct device *dev, const char *string);
+	struct phy *phy_optional_get(struct device *dev, const char *string);
+	struct phy *devm_phy_get(struct device *dev, const char *string);
+	struct phy *devm_phy_optional_get(struct device *dev,
+					  const char *string);
+	struct phy *devm_of_phy_get_by_index(struct device *dev,
+					     struct device_node *np,
+					     int index);
 
 phy_get, phy_optional_get, devm_phy_get and devm_phy_optional_get can
 be used to get the PHY. In the case of dt boot, the string arguments
@@ -111,30 +130,35 @@ the phy_init() and phy_exit() calls, and phy_power_on() and
 phy_power_off() calls are all NOP when applied to a NULL phy. The NULL
 phy is useful in devices for handling optional phy devices.
 
-5. Releasing a reference to the PHY
+Releasing a reference to the PHY
+================================
 
 When the controller no longer needs the PHY, it has to release the reference
 to the PHY it has obtained using the APIs mentioned in the above section. The
 PHY framework provides 2 APIs to release a reference to the PHY.
 
-void phy_put(struct phy *phy);
-void devm_phy_put(struct device *dev, struct phy *phy);
+::
+
+	void phy_put(struct phy *phy);
+	void devm_phy_put(struct device *dev, struct phy *phy);
 
 Both these APIs are used to release a reference to the PHY and devm_phy_put
 destroys the devres associated with this PHY.
 
-6. Destroying the PHY
+Destroying the PHY
+==================
 
 When the driver that created the PHY is unloaded, it should destroy the PHY it
-created using one of the following 2 APIs.
+created using one of the following 2 APIs::
 
-void phy_destroy(struct phy *phy);
-void devm_phy_destroy(struct device *dev, struct phy *phy);
+	void phy_destroy(struct phy *phy);
+	void devm_phy_destroy(struct device *dev, struct phy *phy);
 
 Both these APIs destroy the PHY and devm_phy_destroy destroys the devres
 associated with this PHY.
 
-7. PM Runtime
+PM Runtime
+==========
 
 This subsystem is pm runtime enabled. So while creating the PHY,
 pm_runtime_enable of the phy device created by this subsystem is called and
@@ -150,7 +174,8 @@ There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync,
 phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and
 phy_pm_runtime_forbid for performing PM operations.
 
-8. PHY Mappings
+PHY Mappings
+============
 
 In order to get reference to a PHY without help from DeviceTree, the framework
 offers lookups which can be compared to clkdev that allow clk structures to be
@@ -158,12 +183,15 @@ bound to devices. A lookup can be made be made during runtime when a handle to
 the struct phy already exists.
 
 The framework offers the following API for registering and unregistering the
-lookups.
+lookups::
 
-int phy_create_lookup(struct phy *phy, const char *con_id, const char *dev_id);
-void phy_remove_lookup(struct phy *phy, const char *con_id, const char *dev_id);
+	int phy_create_lookup(struct phy *phy, const char *con_id,
+			      const char *dev_id);
+	void phy_remove_lookup(struct phy *phy, const char *con_id,
+			       const char *dev_id);
 
-9. DeviceTree Binding
+DeviceTree Binding
+==================
 
 The documentation for PHY dt binding can be found @
 Documentation/devicetree/bindings/phy/phy-bindings.txt
-- 
GitLab


From 5da98b8230fb998b0731b2ba3893461ff238a297 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 10:23:05 -0300
Subject: [PATCH 1600/1958] pi-futex.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx.

This document requires just minor adjustments to match
the standard documentation style:

- promote document name;
- remove extra collons on some chapter titles;
- use "-" for a bulleted list.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/pi-futex.txt | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/Documentation/pi-futex.txt b/Documentation/pi-futex.txt
index 9a5bc8651c292..aafddbee73774 100644
--- a/Documentation/pi-futex.txt
+++ b/Documentation/pi-futex.txt
@@ -1,5 +1,6 @@
+======================
 Lightweight PI-futexes
-----------------------
+======================
 
 We are calling them lightweight for 3 reasons:
 
@@ -25,8 +26,8 @@ determinism and well-bound latencies. Even in the worst-case, PI will
 improve the statistical distribution of locking related application
 delays.
 
-The longer reply:
------------------
+The longer reply
+----------------
 
 Firstly, sharing locks between multiple tasks is a common programming
 technique that often cannot be replaced with lockless algorithms. As we
@@ -71,8 +72,8 @@ deterministic execution of the high-prio task: any medium-priority task
 could preempt the low-prio task while it holds the shared lock and
 executes the critical section, and could delay it indefinitely.
 
-Implementation:
----------------
+Implementation
+--------------
 
 As mentioned before, the userspace fastpath of PI-enabled pthread
 mutexes involves no kernel work at all - they behave quite similarly to
@@ -83,8 +84,8 @@ entering the kernel.
 
 To handle the slowpath, we have added two new futex ops:
 
-  FUTEX_LOCK_PI
-  FUTEX_UNLOCK_PI
+  - FUTEX_LOCK_PI
+  - FUTEX_UNLOCK_PI
 
 If the lock-acquire fastpath fails, [i.e. an atomic transition from 0 to
 TID fails], then FUTEX_LOCK_PI is called. The kernel does all the
-- 
GitLab


From 9a4aa7bfce3764b1795ce283b52808b72aad1a66 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 11:23:58 -0300
Subject: [PATCH 1601/1958] pnp.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Use a markup for document title;
- use :Author: and :Last updated: for authorship;
- adjust whitespaces where needed;
- mark literal blocks;
- fix a few subtitle markups.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/pnp.txt | 343 +++++++++++++++++++++++-------------------
 1 file changed, 192 insertions(+), 151 deletions(-)

diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt
index 763e4659bf186..bab2d10631f00 100644
--- a/Documentation/pnp.txt
+++ b/Documentation/pnp.txt
@@ -1,98 +1,118 @@
+=================================
 Linux Plug and Play Documentation
-by Adam Belay <ambx1@neo.rr.com>
-last updated: Oct. 16, 2002
----------------------------------------------------------------------------------------
+=================================
 
+:Author: Adam Belay <ambx1@neo.rr.com>
+:Last updated: Oct. 16, 2002
 
 
 Overview
 --------
-	Plug and Play provides a means of detecting and setting resources for legacy or
+
+Plug and Play provides a means of detecting and setting resources for legacy or
 otherwise unconfigurable devices.  The Linux Plug and Play Layer provides these 
 services to compatible drivers.
 
 
-
 The User Interface
 ------------------
-	The Linux Plug and Play user interface provides a means to activate PnP devices
+
+The Linux Plug and Play user interface provides a means to activate PnP devices
 for legacy and user level drivers that do not support Linux Plug and Play.  The 
 user interface is integrated into sysfs.
 
 In addition to the standard sysfs file the following are created in each
 device's directory:
-id - displays a list of support EISA IDs
-options - displays possible resource configurations
-resources - displays currently allocated resources and allows resource changes
+- id - displays a list of support EISA IDs
+- options - displays possible resource configurations
+- resources - displays currently allocated resources and allows resource changes
 
--activating a device
+activating a device
+^^^^^^^^^^^^^^^^^^^
 
-#echo "auto" > resources
+::
+
+	# echo "auto" > resources
 
 this will invoke the automatic resource config system to activate the device
 
--manually activating a device
+manually activating a device
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+	# echo "manual <depnum> <mode>" > resources
 
-#echo "manual <depnum> <mode>" > resources
-<depnum> - the configuration number
-<mode> - static or dynamic
-		static = for next boot
-		dynamic = now
+	<depnum> - the configuration number
+	<mode> - static or dynamic
+		 static = for next boot
+		 dynamic = now
 
--disabling a device
+disabling a device
+^^^^^^^^^^^^^^^^^^
 
-#echo "disable" > resources
+::
+
+	# echo "disable" > resources
 
 
 EXAMPLE:
 
 Suppose you need to activate the floppy disk controller.
-1.) change to the proper directory, in my case it is 
-/driver/bus/pnp/devices/00:0f
-# cd /driver/bus/pnp/devices/00:0f
-# cat name
-PC standard floppy disk controller
-
-2.) check if the device is already active
-# cat resources
-DISABLED
-
-- Notice the string "DISABLED".  This means the device is not active.
-
-3.) check the device's possible configurations (optional)
-# cat options
-Dependent: 01 - Priority acceptable
-    port 0x3f0-0x3f0, align 0x7, size 0x6, 16-bit address decoding
-    port 0x3f7-0x3f7, align 0x0, size 0x1, 16-bit address decoding
-    irq 6
-    dma 2 8-bit compatible
-Dependent: 02 - Priority acceptable
-    port 0x370-0x370, align 0x7, size 0x6, 16-bit address decoding
-    port 0x377-0x377, align 0x0, size 0x1, 16-bit address decoding
-    irq 6
-    dma 2 8-bit compatible
-
-4.) now activate the device
-# echo "auto" > resources
-
-5.) finally check if the device is active
-# cat resources
-io 0x3f0-0x3f5
-io 0x3f7-0x3f7
-irq 6
-dma 2
-
-also there are a series of kernel parameters:
-pnp_reserve_irq=irq1[,irq2] ....
-pnp_reserve_dma=dma1[,dma2] ....
-pnp_reserve_io=io1,size1[,io2,size2] ....
-pnp_reserve_mem=mem1,size1[,mem2,size2] ....
+
+1. change to the proper directory, in my case it is
+   /driver/bus/pnp/devices/00:0f::
+
+	# cd /driver/bus/pnp/devices/00:0f
+	# cat name
+	PC standard floppy disk controller
+
+2. check if the device is already active::
+
+	# cat resources
+	DISABLED
+
+  - Notice the string "DISABLED".  This means the device is not active.
+
+3. check the device's possible configurations (optional)::
+
+	# cat options
+	Dependent: 01 - Priority acceptable
+	    port 0x3f0-0x3f0, align 0x7, size 0x6, 16-bit address decoding
+	    port 0x3f7-0x3f7, align 0x0, size 0x1, 16-bit address decoding
+	    irq 6
+	    dma 2 8-bit compatible
+	Dependent: 02 - Priority acceptable
+	    port 0x370-0x370, align 0x7, size 0x6, 16-bit address decoding
+	    port 0x377-0x377, align 0x0, size 0x1, 16-bit address decoding
+	    irq 6
+	    dma 2 8-bit compatible
+
+4. now activate the device::
+
+	# echo "auto" > resources
+
+5. finally check if the device is active::
+
+	# cat resources
+	io 0x3f0-0x3f5
+	io 0x3f7-0x3f7
+	irq 6
+	dma 2
+
+also there are a series of kernel parameters::
+
+	pnp_reserve_irq=irq1[,irq2] ....
+	pnp_reserve_dma=dma1[,dma2] ....
+	pnp_reserve_io=io1,size1[,io2,size2] ....
+	pnp_reserve_mem=mem1,size1[,mem2,size2] ....
 
 
 The Unified Plug and Play Layer
 -------------------------------
-	All Plug and Play drivers, protocols, and services meet at a central location 
+
+All Plug and Play drivers, protocols, and services meet at a central location
 called the Plug and Play Layer.  This layer is responsible for the exchange of 
 information between PnP drivers and PnP protocols.  Thus it automatically 
 forwards commands to the proper protocol.  This makes writing PnP drivers 
@@ -101,64 +121,73 @@ significantly easier.
 The following functions are available from the Plug and Play Layer:
 
 pnp_get_protocol
-- increments the number of uses by one
+  increments the number of uses by one
 
 pnp_put_protocol
-- deincrements the number of uses by one
+  deincrements the number of uses by one
 
 pnp_register_protocol
-- use this to register a new PnP protocol
+  use this to register a new PnP protocol
 
 pnp_unregister_protocol
-- use this function to remove a PnP protocol from the Plug and Play Layer
+  use this function to remove a PnP protocol from the Plug and Play Layer
 
 pnp_register_driver
-- adds a PnP driver to the Plug and Play Layer
-- this includes driver model integration
-- returns zero for success or a negative error number for failure; count
+  adds a PnP driver to the Plug and Play Layer
+
+  this includes driver model integration
+  returns zero for success or a negative error number for failure; count
   calls to the .add() method if you need to know how many devices bind to
   the driver
 
 pnp_unregister_driver
-- removes a PnP driver from the Plug and Play Layer
+  removes a PnP driver from the Plug and Play Layer
 
 
 Plug and Play Protocols
 -----------------------
-	This section contains information for PnP protocol developers.
+
+This section contains information for PnP protocol developers.
 
 The following Protocols are currently available in the computing world:
-- PNPBIOS: used for system devices such as serial and parallel ports.
-- ISAPNP: provides PnP support for the ISA bus
-- ACPI: among its many uses, ACPI provides information about system level 
-devices.
+
+- PNPBIOS:
+    used for system devices such as serial and parallel ports.
+- ISAPNP:
+    provides PnP support for the ISA bus
+- ACPI:
+    among its many uses, ACPI provides information about system level
+    devices.
+
 It is meant to replace the PNPBIOS.  It is not currently supported by Linux
 Plug and Play but it is planned to be in the near future.
 
 
 Requirements for a Linux PnP protocol:
-1.) the protocol must use EISA IDs
-2.) the protocol must inform the PnP Layer of a device's current configuration
+1. the protocol must use EISA IDs
+2. the protocol must inform the PnP Layer of a device's current configuration
+
 - the ability to set resources is optional but preferred.
 
 The following are PnP protocol related functions:
 
 pnp_add_device
-- use this function to add a PnP device to the PnP layer
-- only call this function when all wanted values are set in the pnp_dev 
-structure
+  use this function to add a PnP device to the PnP layer
+
+  only call this function when all wanted values are set in the pnp_dev
+  structure
 
 pnp_init_device
-- call this to initialize the PnP structure
+  call this to initialize the PnP structure
 
 pnp_remove_device
-- call this to remove a device from the Plug and Play Layer.
-- it will fail if the device is still in use.
-- automatically will free mem used by the device and related structures
+  call this to remove a device from the Plug and Play Layer.
+  it will fail if the device is still in use.
+  automatically will free mem used by the device and related structures
 
 pnp_add_id
-- adds an EISA ID to the list of supported IDs for the specified device
+  adds an EISA ID to the list of supported IDs for the specified device
 
 For more information consult the source of a protocol such as
 /drivers/pnp/pnpbios/core.c.
@@ -167,85 +196,97 @@ For more information consult the source of a protocol such as
 
 Linux Plug and Play Drivers
 ---------------------------
-	This section contains information for Linux PnP driver developers.
+
+This section contains information for Linux PnP driver developers.
 
 The New Way
-...........
-1.) first make a list of supported EISA IDS
-ex:
-static const struct pnp_id pnp_dev_table[] = {
-	/* Standard LPT Printer Port */
-	{.id = "PNP0400", .driver_data = 0},
-	/* ECP Printer Port */
-	{.id = "PNP0401", .driver_data = 0},
-	{.id = ""}
-};
-
-Please note that the character 'X' can be used as a wild card in the function
-portion (last four characters).
-ex:
+^^^^^^^^^^^
+
+1. first make a list of supported EISA IDS
+
+   ex::
+
+	static const struct pnp_id pnp_dev_table[] = {
+		/* Standard LPT Printer Port */
+		{.id = "PNP0400", .driver_data = 0},
+		/* ECP Printer Port */
+		{.id = "PNP0401", .driver_data = 0},
+		{.id = ""}
+	};
+
+   Please note that the character 'X' can be used as a wild card in the function
+   portion (last four characters).
+
+   ex::
+
 	/* Unknown PnP modems */
 	{	"PNPCXXX",		UNKNOWN_DEV	},
 
-Supported PnP card IDs can optionally be defined.
-ex:
-static const struct pnp_id pnp_card_table[] = {
-	{	"ANYDEVS",		0	},
-	{	"",			0	}
-};
-
-2.) Optionally define probe and remove functions.  It may make sense not to 
-define these functions if the driver already has a reliable method of detecting
-the resources, such as the parport_pc driver.
-ex:
-static int
-serial_pnp_probe(struct pnp_dev * dev, const struct pnp_id *card_id, const 
-                 struct pnp_id *dev_id)
-{
-. . .
-
-ex:
-static void serial_pnp_remove(struct pnp_dev * dev)
-{
-. . .
-
-consult /drivers/serial/8250_pnp.c for more information.
-
-3.) create a driver structure
-ex:
-
-static struct pnp_driver serial_pnp_driver = {
-	.name		= "serial",
-	.card_id_table	= pnp_card_table,
-	.id_table	= pnp_dev_table,
-	.probe		= serial_pnp_probe,
-	.remove		= serial_pnp_remove,
-};
-
-* name and id_table cannot be NULL.
-
-4.) register the driver
-ex:
-
-static int __init serial8250_pnp_init(void)
-{
-	return pnp_register_driver(&serial_pnp_driver);
-}
+   Supported PnP card IDs can optionally be defined.
+   ex::
+
+	static const struct pnp_id pnp_card_table[] = {
+		{	"ANYDEVS",		0	},
+		{	"",			0	}
+	};
+
+2. Optionally define probe and remove functions.  It may make sense not to
+   define these functions if the driver already has a reliable method of detecting
+   the resources, such as the parport_pc driver.
+
+   ex::
+
+	static int
+	serial_pnp_probe(struct pnp_dev * dev, const struct pnp_id *card_id, const
+			struct pnp_id *dev_id)
+	{
+	. . .
+
+   ex::
+
+	static void serial_pnp_remove(struct pnp_dev * dev)
+	{
+	. . .
+
+   consult /drivers/serial/8250_pnp.c for more information.
+
+3. create a driver structure
+
+   ex::
+
+	static struct pnp_driver serial_pnp_driver = {
+		.name		= "serial",
+		.card_id_table	= pnp_card_table,
+		.id_table	= pnp_dev_table,
+		.probe		= serial_pnp_probe,
+		.remove		= serial_pnp_remove,
+	};
+
+   * name and id_table cannot be NULL.
+
+4. register the driver
+
+   ex::
+
+	static int __init serial8250_pnp_init(void)
+	{
+		return pnp_register_driver(&serial_pnp_driver);
+	}
 
 The Old Way
-...........
+^^^^^^^^^^^
 
 A series of compatibility functions have been created to make it easy to convert
 ISAPNP drivers.  They should serve as a temporary solution only.
 
-They are as follows:
+They are as follows::
 
-struct pnp_card *pnp_find_card(unsigned short vendor,
-				 unsigned short device,
-				 struct pnp_card *from)
+	struct pnp_card *pnp_find_card(unsigned short vendor,
+				       unsigned short device,
+				       struct pnp_card *from)
 
-struct pnp_dev *pnp_find_dev(struct pnp_card *card,
-				unsigned short vendor,
-				unsigned short function,
-				struct pnp_dev *from)
+	struct pnp_dev *pnp_find_dev(struct pnp_card *card,
+				     unsigned short vendor,
+				     unsigned short function,
+				     struct pnp_dev *from)
 
-- 
GitLab


From 9cc07df4b548fce9f29aaf27e51b8b5ccefa2cd9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 21:58:47 -0300
Subject: [PATCH 1602/1958] preempt-locking.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark titles;
- mark literal blocks;
- adjust identation where needed;
- use :Author: for authorship.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/preempt-locking.txt | 40 +++++++++++++++++++------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/Documentation/preempt-locking.txt b/Documentation/preempt-locking.txt
index e89ce6624af2f..c945062be66c2 100644
--- a/Documentation/preempt-locking.txt
+++ b/Documentation/preempt-locking.txt
@@ -1,10 +1,13 @@
-		  Proper Locking Under a Preemptible Kernel:
-		       Keeping Kernel Code Preempt-Safe
-			 Robert Love <rml@tech9.net>
-			  Last Updated: 28 Aug 2002
+===========================================================================
+Proper Locking Under a Preemptible Kernel: Keeping Kernel Code Preempt-Safe
+===========================================================================
 
+:Author: Robert Love <rml@tech9.net>
+:Last Updated: 28 Aug 2002
 
-INTRODUCTION
+
+Introduction
+============
 
 
 A preemptible kernel creates new locking issues.  The issues are the same as
@@ -17,9 +20,10 @@ requires protecting these situations.
  
 
 RULE #1: Per-CPU data structures need explicit protection
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
-Two similar problems arise. An example code snippet:
+Two similar problems arise. An example code snippet::
 
 	struct this_needs_locking tux[NR_CPUS];
 	tux[smp_processor_id()] = some_value;
@@ -35,6 +39,7 @@ You can also use put_cpu() and get_cpu(), which will disable preemption.
 
 
 RULE #2: CPU state must be protected.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
 Under preemption, the state of the CPU must be protected.  This is arch-
@@ -52,6 +57,7 @@ However, fpu__restore() must be called with preemption disabled.
 
 
 RULE #3: Lock acquire and release must be performed by same task
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
 A lock acquired in one task must be released by the same task.  This
@@ -61,17 +67,20 @@ like this, acquire and release the task in the same code path and
 have the caller wait on an event by the other task.
 
 
-SOLUTION
+Solution
+========
 
 
 Data protection under preemption is achieved by disabling preemption for the
 duration of the critical region.
 
-preempt_enable()		decrement the preempt counter
-preempt_disable()		increment the preempt counter
-preempt_enable_no_resched()	decrement, but do not immediately preempt
-preempt_check_resched()		if needed, reschedule
-preempt_count()			return the preempt counter
+::
+
+  preempt_enable()		decrement the preempt counter
+  preempt_disable()		increment the preempt counter
+  preempt_enable_no_resched()	decrement, but do not immediately preempt
+  preempt_check_resched()	if needed, reschedule
+  preempt_count()		return the preempt counter
 
 The functions are nestable.  In other words, you can call preempt_disable
 n-times in a code path, and preemption will not be reenabled until the n-th
@@ -89,7 +98,7 @@ So use this implicit preemption-disabling property only if you know that the
 affected codepath does not do any of this. Best policy is to use this only for
 small, atomic code that you wrote and which calls no complex functions.
 
-Example:
+Example::
 
 	cpucache_t *cc; /* this is per-CPU */
 	preempt_disable();
@@ -102,7 +111,7 @@ Example:
 	return 0;
 
 Notice how the preemption statements must encompass every reference of the
-critical variables.  Another example:
+critical variables.  Another example::
 
 	int buf[NR_CPUS];
 	set_cpu_val(buf);
@@ -114,7 +123,8 @@ This code is not preempt-safe, but see how easily we can fix it by simply
 moving the spin_lock up two lines.
 
 
-PREVENTING PREEMPTION USING INTERRUPT DISABLING
+Preventing preemption using interrupt disabling
+===============================================
 
 
 It is possible to prevent a preemption event using local_irq_disable and
-- 
GitLab


From 3b033380cb8dea6f00503d6d30c8f1e5c571f565 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Tue, 16 May 2017 22:27:11 -0300
Subject: [PATCH 1603/1958] printk-formats.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- add a title for the document;
- add markups for section titles;
- move authorship to the beginning and use :Author:;
- use right markup for tables;
- mark literals and literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/printk-formats.txt | 384 ++++++++++++++++++-------------
 1 file changed, 229 insertions(+), 155 deletions(-)

diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 619cdffa5d44a..65ea5915178b4 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -1,5 +1,18 @@
-If variable is of Type,		use printk format specifier:
----------------------------------------------------------
+=========================================
+How to get printk format specifiers right
+=========================================
+
+:Author: Randy Dunlap <rdunlap@infradead.org>
+:Author: Andrew Murray <amurray@mpc-data.co.uk>
+
+
+Integer types
+=============
+
+::
+
+	If variable is of Type,		use printk format specifier:
+	------------------------------------------------------------
 		int			%d or %x
 		unsigned int		%u or %x
 		long			%ld or %lx
@@ -13,25 +26,29 @@ If variable is of Type,		use printk format specifier:
 		s64			%lld or %llx
 		u64			%llu or %llx
 
-If <type> is dependent on a config option for its size (e.g., sector_t,
-blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a
-format specifier of its largest possible type and explicitly cast to it.
-Example:
+If <type> is dependent on a config option for its size (e.g., ``sector_t``,
+``blkcnt_t``) or is architecture-dependent for its size (e.g., ``tcflag_t``),
+use a format specifier of its largest possible type and explicitly cast to it.
+
+Example::
 
 	printk("test: sector number/total blocks: %llu/%llu\n",
 		(unsigned long long)sector, (unsigned long long)blockcount);
 
-Reminder: sizeof() result is of type size_t.
+Reminder: ``sizeof()`` result is of type ``size_t``.
 
-The kernel's printf does not support %n. For obvious reasons, floating
-point formats (%e, %f, %g, %a) are also not recognized. Use of any
+The kernel's printf does not support ``%n``. For obvious reasons, floating
+point formats (``%e, %f, %g, %a``) are also not recognized. Use of any
 unsupported specifier or length qualifier results in a WARN and early
 return from vsnprintf.
 
 Raw pointer value SHOULD be printed with %p. The kernel supports
 the following extended format specifiers for pointer types:
 
-Symbols/Function Pointers:
+Symbols/Function Pointers
+=========================
+
+::
 
 	%pF	versatile_init+0x0/0x110
 	%pf	versatile_init
@@ -41,99 +58,122 @@ Symbols/Function Pointers:
 	%ps	versatile_init
 	%pB	prev_fn_of_versatile_init+0x88/0x88
 
-	For printing symbols and function pointers. The 'S' and 's' specifiers
-	result in the symbol name with ('S') or without ('s') offsets. Where
-	this is used on a kernel without KALLSYMS - the symbol address is
-	printed instead.
+For printing symbols and function pointers. The ``S`` and ``s`` specifiers
+result in the symbol name with (``S``) or without (``s``) offsets. Where
+this is used on a kernel without KALLSYMS - the symbol address is
+printed instead.
+
+The ``B`` specifier results in the symbol name with offsets and should be
+used when printing stack backtraces. The specifier takes into
+consideration the effect of compiler optimisations which may occur
+when tail-call``s are used and marked with the noreturn GCC attribute.
 
-	The 'B' specifier results in the symbol name with offsets and should be
-	used when printing stack backtraces. The specifier takes into
-	consideration the effect of compiler optimisations which may occur
-	when tail-call's are used and marked with the noreturn GCC attribute.
+On ia64, ppc64 and parisc64 architectures function pointers are
+actually function descriptors which must first be resolved. The ``F`` and
+``f`` specifiers perform this resolution and then provide the same
+functionality as the ``S`` and ``s`` specifiers.
 
-	On ia64, ppc64 and parisc64 architectures function pointers are
-	actually function descriptors which must first be resolved. The 'F' and
-	'f' specifiers perform this resolution and then provide the same
-	functionality as the 'S' and 's' specifiers.
+Kernel Pointers
+===============
 
-Kernel Pointers:
+::
 
 	%pK	0x01234567 or 0x0123456789abcdef
 
-	For printing kernel pointers which should be hidden from unprivileged
-	users. The behaviour of %pK depends on the kptr_restrict sysctl - see
-	Documentation/sysctl/kernel.txt for more details.
+For printing kernel pointers which should be hidden from unprivileged
+users. The behaviour of ``%pK`` depends on the ``kptr_restrict sysctl`` - see
+Documentation/sysctl/kernel.txt for more details.
+
+Struct Resources
+================
 
-Struct Resources:
+::
 
 	%pr	[mem 0x60000000-0x6fffffff flags 0x2200] or
 		[mem 0x0000000060000000-0x000000006fffffff flags 0x2200]
 	%pR	[mem 0x60000000-0x6fffffff pref] or
 		[mem 0x0000000060000000-0x000000006fffffff pref]
 
-	For printing struct resources. The 'R' and 'r' specifiers result in a
-	printed resource with ('R') or without ('r') a decoded flags member.
-	Passed by reference.
+For printing struct resources. The ``R`` and ``r`` specifiers result in a
+printed resource with (``R``) or without (``r``) a decoded flags member.
+Passed by reference.
+
+Physical addresses types ``phys_addr_t``
+========================================
 
-Physical addresses types phys_addr_t:
+::
 
 	%pa[p]	0x01234567 or 0x0123456789abcdef
 
-	For printing a phys_addr_t type (and its derivatives, such as
-	resource_size_t) which can vary based on build options, regardless of
-	the width of the CPU data path. Passed by reference.
+For printing a ``phys_addr_t`` type (and its derivatives, such as
+``resource_size_t``) which can vary based on build options, regardless of
+the width of the CPU data path. Passed by reference.
 
-DMA addresses types dma_addr_t:
+DMA addresses types ``dma_addr_t``
+==================================
+
+::
 
 	%pad	0x01234567 or 0x0123456789abcdef
 
-	For printing a dma_addr_t type which can vary based on build options,
-	regardless of the width of the CPU data path. Passed by reference.
+For printing a ``dma_addr_t`` type which can vary based on build options,
+regardless of the width of the CPU data path. Passed by reference.
+
+Raw buffer as an escaped string
+===============================
 
-Raw buffer as an escaped string:
+::
 
 	%*pE[achnops]
 
-	For printing raw buffer as an escaped string. For the following buffer
+For printing raw buffer as an escaped string. For the following buffer::
 
 		1b 62 20 5c 43 07 22 90 0d 5d
 
-	few examples show how the conversion would be done (the result string
-	without surrounding quotes):
+few examples show how the conversion would be done (the result string
+without surrounding quotes)::
 
 		%*pE		"\eb \C\a"\220\r]"
 		%*pEhp		"\x1bb \C\x07"\x90\x0d]"
 		%*pEa		"\e\142\040\\\103\a\042\220\r\135"
 
-	The conversion rules are applied according to an optional combination
-	of flags (see string_escape_mem() kernel documentation for the
-	details):
-		a - ESCAPE_ANY
-		c - ESCAPE_SPECIAL
-		h - ESCAPE_HEX
-		n - ESCAPE_NULL
-		o - ESCAPE_OCTAL
-		p - ESCAPE_NP
-		s - ESCAPE_SPACE
-	By default ESCAPE_ANY_NP is used.
+The conversion rules are applied according to an optional combination
+of flags (see :c:func:`string_escape_mem` kernel documentation for the
+details):
+
+	- ``a`` - ESCAPE_ANY
+	- ``c`` - ESCAPE_SPECIAL
+	- ``h`` - ESCAPE_HEX
+	- ``n`` - ESCAPE_NULL
+	- ``o`` - ESCAPE_OCTAL
+	- ``p`` - ESCAPE_NP
+	- ``s`` - ESCAPE_SPACE
 
-	ESCAPE_ANY_NP is the sane choice for many cases, in particularly for
-	printing SSIDs.
+By default ESCAPE_ANY_NP is used.
 
-	If field width is omitted the 1 byte only will be escaped.
+ESCAPE_ANY_NP is the sane choice for many cases, in particularly for
+printing SSIDs.
 
-Raw buffer as a hex string:
+If field width is omitted the 1 byte only will be escaped.
+
+Raw buffer as a hex string
+==========================
+
+::
 
 	%*ph	00 01 02  ...  3f
 	%*phC	00:01:02: ... :3f
 	%*phD	00-01-02- ... -3f
 	%*phN	000102 ... 3f
 
-	For printing a small buffers (up to 64 bytes long) as a hex string with
-	certain separator. For the larger buffers consider to use
-	print_hex_dump().
+For printing a small buffers (up to 64 bytes long) as a hex string with
+certain separator. For the larger buffers consider to use
+:c:func:`print_hex_dump`.
+
+MAC/FDDI addresses
+==================
 
-MAC/FDDI addresses:
+::
 
 	%pM	00:01:02:03:04:05
 	%pMR	05:04:03:02:01:00
@@ -141,53 +181,62 @@ MAC/FDDI addresses:
 	%pm	000102030405
 	%pmR	050403020100
 
-	For printing 6-byte MAC/FDDI addresses in hex notation. The 'M' and 'm'
-	specifiers result in a printed address with ('M') or without ('m') byte
-	separators. The default byte separator is the colon (':').
+For printing 6-byte MAC/FDDI addresses in hex notation. The ``M`` and ``m``
+specifiers result in a printed address with (``M``) or without (``m``) byte
+separators. The default byte separator is the colon (``:``).
 
-	Where FDDI addresses are concerned the 'F' specifier can be used after
-	the 'M' specifier to use dash ('-') separators instead of the default
-	separator.
+Where FDDI addresses are concerned the ``F`` specifier can be used after
+the ``M`` specifier to use dash (``-``) separators instead of the default
+separator.
 
-	For Bluetooth addresses the 'R' specifier shall be used after the 'M'
-	specifier to use reversed byte order suitable for visual interpretation
-	of Bluetooth addresses which are in the little endian order.
+For Bluetooth addresses the ``R`` specifier shall be used after the ``M``
+specifier to use reversed byte order suitable for visual interpretation
+of Bluetooth addresses which are in the little endian order.
 
-	Passed by reference.
+Passed by reference.
+
+IPv4 addresses
+==============
 
-IPv4 addresses:
+::
 
 	%pI4	1.2.3.4
 	%pi4	001.002.003.004
 	%p[Ii]4[hnbl]
 
-	For printing IPv4 dot-separated decimal addresses. The 'I4' and 'i4'
-	specifiers result in a printed address with ('i4') or without ('I4')
-	leading zeros.
+For printing IPv4 dot-separated decimal addresses. The ``I4`` and ``i4``
+specifiers result in a printed address with (``i4``) or without (``I4``)
+leading zeros.
 
-	The additional 'h', 'n', 'b', and 'l' specifiers are used to specify
-	host, network, big or little endian order addresses respectively. Where
-	no specifier is provided the default network/big endian order is used.
+The additional ``h``, ``n``, ``b``, and ``l`` specifiers are used to specify
+host, network, big or little endian order addresses respectively. Where
+no specifier is provided the default network/big endian order is used.
 
-	Passed by reference.
+Passed by reference.
 
-IPv6 addresses:
+IPv6 addresses
+==============
+
+::
 
 	%pI6	0001:0002:0003:0004:0005:0006:0007:0008
 	%pi6	00010002000300040005000600070008
 	%pI6c	1:2:3:4:5:6:7:8
 
-	For printing IPv6 network-order 16-bit hex addresses. The 'I6' and 'i6'
-	specifiers result in a printed address with ('I6') or without ('i6')
-	colon-separators. Leading zeros are always used.
+For printing IPv6 network-order 16-bit hex addresses. The ``I6`` and ``i6``
+specifiers result in a printed address with (``I6``) or without (``i6``)
+colon-separators. Leading zeros are always used.
 
-	The additional 'c' specifier can be used with the 'I' specifier to
-	print a compressed IPv6 address as described by
-	http://tools.ietf.org/html/rfc5952
+The additional ``c`` specifier can be used with the ``I`` specifier to
+print a compressed IPv6 address as described by
+http://tools.ietf.org/html/rfc5952
 
-	Passed by reference.
+Passed by reference.
 
-IPv4/IPv6 addresses (generic, with port, flowinfo, scope):
+IPv4/IPv6 addresses (generic, with port, flowinfo, scope)
+=========================================================
+
+::
 
 	%pIS	1.2.3.4		or 0001:0002:0003:0004:0005:0006:0007:0008
 	%piS	001.002.003.004	or 00010002000300040005000600070008
@@ -195,87 +244,103 @@ IPv4/IPv6 addresses (generic, with port, flowinfo, scope):
 	%pISpc	1.2.3.4:12345	or [1:2:3:4:5:6:7:8]:12345
 	%p[Ii]S[pfschnbl]
 
-	For printing an IP address without the need to distinguish whether it's
-	of type AF_INET or AF_INET6, a pointer to a valid 'struct sockaddr',
-	specified through 'IS' or 'iS', can be passed to this format specifier.
+For printing an IP address without the need to distinguish whether it``s
+of type AF_INET or AF_INET6, a pointer to a valid ``struct sockaddr``,
+specified through ``IS`` or ``iS``, can be passed to this format specifier.
 
-	The additional 'p', 'f', and 's' specifiers are used to specify port
-	(IPv4, IPv6), flowinfo (IPv6) and scope (IPv6). Ports have a ':' prefix,
-	flowinfo a '/' and scope a '%', each followed by the actual value.
+The additional ``p``, ``f``, and ``s`` specifiers are used to specify port
+(IPv4, IPv6), flowinfo (IPv6) and scope (IPv6). Ports have a ``:`` prefix,
+flowinfo a ``/`` and scope a ``%``, each followed by the actual value.
 
-	In case of an IPv6 address the compressed IPv6 address as described by
-	http://tools.ietf.org/html/rfc5952 is being used if the additional
-	specifier 'c' is given. The IPv6 address is surrounded by '[', ']' in
-	case of additional specifiers 'p', 'f' or 's' as suggested by
-	https://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-07
+In case of an IPv6 address the compressed IPv6 address as described by
+http://tools.ietf.org/html/rfc5952 is being used if the additional
+specifier ``c`` is given. The IPv6 address is surrounded by ``[``, ``]`` in
+case of additional specifiers ``p``, ``f`` or ``s`` as suggested by
+https://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-07
 
-	In case of IPv4 addresses, the additional 'h', 'n', 'b', and 'l'
-	specifiers can be used as well and are ignored in case of an IPv6
-	address.
+In case of IPv4 addresses, the additional ``h``, ``n``, ``b``, and ``l``
+specifiers can be used as well and are ignored in case of an IPv6
+address.
 
-	Passed by reference.
+Passed by reference.
 
-	Further examples:
+Further examples::
 
 	%pISfc		1.2.3.4		or [1:2:3:4:5:6:7:8]/123456789
 	%pISsc		1.2.3.4		or [1:2:3:4:5:6:7:8]%1234567890
 	%pISpfc		1.2.3.4:12345	or [1:2:3:4:5:6:7:8]:12345/123456789
 
-UUID/GUID addresses:
+UUID/GUID addresses
+===================
+
+::
 
 	%pUb	00010203-0405-0607-0809-0a0b0c0d0e0f
 	%pUB	00010203-0405-0607-0809-0A0B0C0D0E0F
 	%pUl	03020100-0504-0706-0809-0a0b0c0e0e0f
 	%pUL	03020100-0504-0706-0809-0A0B0C0E0E0F
 
-	For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L',
-	'b' and 'B' specifiers are used to specify a little endian order in
-	lower ('l') or upper case ('L') hex characters - and big endian order
-	in lower ('b') or upper case ('B') hex characters.
+For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L',
+'b' and 'B' specifiers are used to specify a little endian order in
+lower ('l') or upper case ('L') hex characters - and big endian order
+in lower ('b') or upper case ('B') hex characters.
 
-	Where no additional specifiers are used the default big endian
-	order with lower case hex characters will be printed.
+Where no additional specifiers are used the default big endian
+order with lower case hex characters will be printed.
 
-	Passed by reference.
+Passed by reference.
+
+dentry names
+============
 
-dentry names:
+::
 
 	%pd{,2,3,4}
 	%pD{,2,3,4}
 
-	For printing dentry name; if we race with d_move(), the name might be
-	a mix of old and new ones, but it won't oops.  %pd dentry is a safer
-	equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
-	n last components.  %pD does the same thing for struct file.
+For printing dentry name; if we race with :c:func:`d_move`, the name might be
+a mix of old and new ones, but it won't oops.  ``%pd`` dentry is a safer
+equivalent of ``%s`` ``dentry->d_name.name`` we used to use, ``%pd<n>`` prints
+``n`` last components.  ``%pD`` does the same thing for struct file.
 
-	Passed by reference.
+Passed by reference.
 
-block_device names:
+block_device names
+==================
+
+::
 
 	%pg	sda, sda1 or loop0p1
 
-	For printing name of block_device pointers.
+For printing name of block_device pointers.
+
+struct va_format
+================
 
-struct va_format:
+::
 
 	%pV
 
-	For printing struct va_format structures. These contain a format string
-	and va_list as follows:
+For printing struct va_format structures. These contain a format string
+and va_list as follows::
 
 	struct va_format {
 		const char *fmt;
 		va_list *va;
 	};
 
-	Implements a "recursive vsnprintf".
+Implements a "recursive vsnprintf".
 
-	Do not use this feature without some mechanism to verify the
-	correctness of the format string and va_list arguments.
+Do not use this feature without some mechanism to verify the
+correctness of the format string and va_list arguments.
 
-	Passed by reference.
+Passed by reference.
+
+kobjects
+========
+
+::
 
-kobjects:
 	%pO
 
 	Base specifier for kobject based structs. Must be followed with
@@ -311,61 +376,70 @@ kobjects:
 
 	Passed by reference.
 
-struct clk:
+
+struct clk
+==========
+
+::
 
 	%pC	pll1
 	%pCn	pll1
 	%pCr	1560000000
 
-	For printing struct clk structures. '%pC' and '%pCn' print the name
-	(Common Clock Framework) or address (legacy clock framework) of the
-	structure; '%pCr' prints the current clock rate.
+For printing struct clk structures. ``%pC`` and ``%pCn`` print the name
+(Common Clock Framework) or address (legacy clock framework) of the
+structure; ``%pCr`` prints the current clock rate.
 
-	Passed by reference.
+Passed by reference.
 
-bitmap and its derivatives such as cpumask and nodemask:
+bitmap and its derivatives such as cpumask and nodemask
+=======================================================
+
+::
 
 	%*pb	0779
 	%*pbl	0,3-6,8-10
 
-	For printing bitmap and its derivatives such as cpumask and nodemask,
-	%*pb output the bitmap with field width as the number of bits and %*pbl
-	output the bitmap as range list with field width as the number of bits.
+For printing bitmap and its derivatives such as cpumask and nodemask,
+``%*pb`` output the bitmap with field width as the number of bits and ``%*pbl``
+output the bitmap as range list with field width as the number of bits.
 
-	Passed by reference.
+Passed by reference.
+
+Flags bitfields such as page flags, gfp_flags
+=============================================
 
-Flags bitfields such as page flags, gfp_flags:
+::
 
 	%pGp	referenced|uptodate|lru|active|private
 	%pGg	GFP_USER|GFP_DMA32|GFP_NOWARN
 	%pGv	read|exec|mayread|maywrite|mayexec|denywrite
 
-	For printing flags bitfields as a collection of symbolic constants that
-	would construct the value. The type of flags is given by the third
-	character. Currently supported are [p]age flags, [v]ma_flags (both
-	expect unsigned long *) and [g]fp_flags (expects gfp_t *). The flag
-	names and print order depends on the particular	type.
+For printing flags bitfields as a collection of symbolic constants that
+would construct the value. The type of flags is given by the third
+character. Currently supported are [p]age flags, [v]ma_flags (both
+expect ``unsigned long *``) and [g]fp_flags (expects ``gfp_t *``). The flag
+names and print order depends on the particular	type.
 
-	Note that this format should not be used directly in TP_printk() part
-	of a tracepoint. Instead, use the show_*_flags() functions from
-	<trace/events/mmflags.h>.
+Note that this format should not be used directly in :c:func:`TP_printk()` part
+of a tracepoint. Instead, use the ``show_*_flags()`` functions from
+<trace/events/mmflags.h>.
 
-	Passed by reference.
+Passed by reference.
+
+Network device features
+=======================
 
-Network device features:
+::
 
 	%pNF	0x000000000000c000
 
-	For printing netdev_features_t.
+For printing netdev_features_t.
 
-	Passed by reference.
+Passed by reference.
 
-If you add other %p extensions, please extend lib/test_printf.c with
+If you add other ``%p`` extensions, please extend lib/test_printf.c with
 one or more test cases, if at all feasible.
 
 
 Thank you for your cooperation and attention.
-
-
-By Randy Dunlap <rdunlap@infradead.org> and
-Andrew Murray <amurray@mpc-data.co.uk>
-- 
GitLab


From ce0f95a501b201f857909738e201729048d41be7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 06:18:13 -0300
Subject: [PATCH 1604/1958] rbtree.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Mark document title;
- Use :Author: for authorship;
- mark a sub-section title as such;
- mark literal blocks;
- adjust identation where needed.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/rbtree.txt | 88 +++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 42 deletions(-)

diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index b9d9cc57be189..b8a8c70b0188d 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -1,7 +1,10 @@
+=================================
 Red-black Trees (rbtree) in Linux
-January 18, 2007
-Rob Landley <rob@landley.net>
-=============================
+=================================
+
+
+:Date: January 18, 2007
+:Author: Rob Landley <rob@landley.net>
 
 What are red-black trees, and what are they for?
 ------------------------------------------------
@@ -56,7 +59,7 @@ user of the rbtree code.
 Creating a new rbtree
 ---------------------
 
-Data nodes in an rbtree tree are structures containing a struct rb_node member:
+Data nodes in an rbtree tree are structures containing a struct rb_node member::
 
   struct mytype {
   	struct rb_node node;
@@ -78,7 +81,7 @@ Searching for a value in an rbtree
 Writing a search function for your tree is fairly straightforward: start at the
 root, compare each value, and follow the left or right branch as necessary.
 
-Example:
+Example::
 
   struct mytype *my_search(struct rb_root *root, char *string)
   {
@@ -110,7 +113,7 @@ The search for insertion differs from the previous search by finding the
 location of the pointer on which to graft the new node.  The new node also
 needs a link to its parent node for rebalancing purposes.
 
-Example:
+Example::
 
   int my_insert(struct rb_root *root, struct mytype *data)
   {
@@ -140,11 +143,11 @@ Example:
 Removing or replacing existing data in an rbtree
 ------------------------------------------------
 
-To remove an existing node from a tree, call:
+To remove an existing node from a tree, call::
 
   void rb_erase(struct rb_node *victim, struct rb_root *tree);
 
-Example:
+Example::
 
   struct mytype *data = mysearch(&mytree, "walrus");
 
@@ -153,7 +156,7 @@ Example:
   	myfree(data);
   }
 
-To replace an existing node in a tree with a new one with the same key, call:
+To replace an existing node in a tree with a new one with the same key, call::
 
   void rb_replace_node(struct rb_node *old, struct rb_node *new,
   			struct rb_root *tree);
@@ -166,7 +169,7 @@ Iterating through the elements stored in an rbtree (in sort order)
 
 Four functions are provided for iterating through an rbtree's contents in
 sorted order.  These work on arbitrary trees, and should not need to be
-modified or wrapped (except for locking purposes):
+modified or wrapped (except for locking purposes)::
 
   struct rb_node *rb_first(struct rb_root *tree);
   struct rb_node *rb_last(struct rb_root *tree);
@@ -184,7 +187,7 @@ which the containing data structure may be accessed with the container_of()
 macro, and individual members may be accessed directly via
 rb_entry(node, type, member).
 
-Example:
+Example::
 
   struct rb_node *node;
   for (node = rb_first(&mytree); node; node = rb_next(node))
@@ -241,7 +244,8 @@ user should have a single rb_erase_augmented() call site in order to limit
 compiled code size.
 
 
-Sample usage:
+Sample usage
+^^^^^^^^^^^^
 
 Interval tree is an example of augmented rb tree. Reference -
 "Introduction to Algorithms" by Cormen, Leiserson, Rivest and Stein.
@@ -259,12 +263,12 @@ This "extra information" stored in each node is the maximum hi
 information can be maintained at each node just be looking at the node
 and its immediate children. And this will be used in O(log n) lookup
 for lowest match (lowest start address among all possible matches)
-with something like:
+with something like::
 
-struct interval_tree_node *
-interval_tree_first_match(struct rb_root *root,
-			  unsigned long start, unsigned long last)
-{
+  struct interval_tree_node *
+  interval_tree_first_match(struct rb_root *root,
+			    unsigned long start, unsigned long last)
+  {
 	struct interval_tree_node *node;
 
 	if (!root->rb_node)
@@ -301,13 +305,13 @@ interval_tree_first_match(struct rb_root *root,
 		}
 		return NULL;	/* No match */
 	}
-}
+  }
 
-Insertion/removal are defined using the following augmented callbacks:
+Insertion/removal are defined using the following augmented callbacks::
 
-static inline unsigned long
-compute_subtree_last(struct interval_tree_node *node)
-{
+  static inline unsigned long
+  compute_subtree_last(struct interval_tree_node *node)
+  {
 	unsigned long max = node->last, subtree_last;
 	if (node->rb.rb_left) {
 		subtree_last = rb_entry(node->rb.rb_left,
@@ -322,10 +326,10 @@ compute_subtree_last(struct interval_tree_node *node)
 			max = subtree_last;
 	}
 	return max;
-}
+  }
 
-static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
-{
+  static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
+  {
 	while (rb != stop) {
 		struct interval_tree_node *node =
 			rb_entry(rb, struct interval_tree_node, rb);
@@ -335,20 +339,20 @@ static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
 		node->__subtree_last = subtree_last;
 		rb = rb_parent(&node->rb);
 	}
-}
+  }
 
-static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
-{
+  static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+  {
 	struct interval_tree_node *old =
 		rb_entry(rb_old, struct interval_tree_node, rb);
 	struct interval_tree_node *new =
 		rb_entry(rb_new, struct interval_tree_node, rb);
 
 	new->__subtree_last = old->__subtree_last;
-}
+  }
 
-static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
-{
+  static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+  {
 	struct interval_tree_node *old =
 		rb_entry(rb_old, struct interval_tree_node, rb);
 	struct interval_tree_node *new =
@@ -356,15 +360,15 @@ static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
 
 	new->__subtree_last = old->__subtree_last;
 	old->__subtree_last = compute_subtree_last(old);
-}
+  }
 
-static const struct rb_augment_callbacks augment_callbacks = {
+  static const struct rb_augment_callbacks augment_callbacks = {
 	augment_propagate, augment_copy, augment_rotate
-};
+  };
 
-void interval_tree_insert(struct interval_tree_node *node,
-			  struct rb_root *root)
-{
+  void interval_tree_insert(struct interval_tree_node *node,
+			    struct rb_root *root)
+  {
 	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
 	unsigned long start = node->start, last = node->last;
 	struct interval_tree_node *parent;
@@ -383,10 +387,10 @@ void interval_tree_insert(struct interval_tree_node *node,
 	node->__subtree_last = last;
 	rb_link_node(&node->rb, rb_parent, link);
 	rb_insert_augmented(&node->rb, root, &augment_callbacks);
-}
+  }
 
-void interval_tree_remove(struct interval_tree_node *node,
-			  struct rb_root *root)
-{
+  void interval_tree_remove(struct interval_tree_node *node,
+			    struct rb_root *root)
+  {
 	rb_erase_augmented(&node->rb, root, &augment_callbacks);
-}
+  }
-- 
GitLab


From 620b470bb41c9620875f9b7e3fe2d70a7602a6b6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 06:31:37 -0300
Subject: [PATCH 1605/1958] remoteproc.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark document and section titles;
- adjust identation;
- mark literal blocks

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/remoteproc.txt | 328 ++++++++++++++++++++---------------
 1 file changed, 189 insertions(+), 139 deletions(-)

diff --git a/Documentation/remoteproc.txt b/Documentation/remoteproc.txt
index f075974823513..77fb03acdbb4c 100644
--- a/Documentation/remoteproc.txt
+++ b/Documentation/remoteproc.txt
@@ -1,6 +1,9 @@
+==========================
 Remote Processor Framework
+==========================
 
-1. Introduction
+Introduction
+============
 
 Modern SoCs typically have heterogeneous remote processor devices in asymmetric
 multiprocessing (AMP) configurations, which may be running different instances
@@ -26,44 +29,62 @@ remoteproc will add those devices. This makes it possible to reuse the
 existing virtio drivers with remote processor backends at a minimal development
 cost.
 
-2. User API
+User API
+========
+
+::
 
   int rproc_boot(struct rproc *rproc)
-    - Boot a remote processor (i.e. load its firmware, power it on, ...).
-      If the remote processor is already powered on, this function immediately
-      returns (successfully).
-      Returns 0 on success, and an appropriate error value otherwise.
-      Note: to use this function you should already have a valid rproc
-      handle. There are several ways to achieve that cleanly (devres, pdata,
-      the way remoteproc_rpmsg.c does this, or, if this becomes prevalent, we
-      might also consider using dev_archdata for this).
+
+Boot a remote processor (i.e. load its firmware, power it on, ...).
+
+If the remote processor is already powered on, this function immediately
+returns (successfully).
+
+Returns 0 on success, and an appropriate error value otherwise.
+Note: to use this function you should already have a valid rproc
+handle. There are several ways to achieve that cleanly (devres, pdata,
+the way remoteproc_rpmsg.c does this, or, if this becomes prevalent, we
+might also consider using dev_archdata for this).
+
+::
 
   void rproc_shutdown(struct rproc *rproc)
-    - Power off a remote processor (previously booted with rproc_boot()).
-      In case @rproc is still being used by an additional user(s), then
-      this function will just decrement the power refcount and exit,
-      without really powering off the device.
-      Every call to rproc_boot() must (eventually) be accompanied by a call
-      to rproc_shutdown(). Calling rproc_shutdown() redundantly is a bug.
-      Notes:
-      - we're not decrementing the rproc's refcount, only the power refcount.
-        which means that the @rproc handle stays valid even after
-        rproc_shutdown() returns, and users can still use it with a subsequent
-        rproc_boot(), if needed.
+
+Power off a remote processor (previously booted with rproc_boot()).
+In case @rproc is still being used by an additional user(s), then
+this function will just decrement the power refcount and exit,
+without really powering off the device.
+
+Every call to rproc_boot() must (eventually) be accompanied by a call
+to rproc_shutdown(). Calling rproc_shutdown() redundantly is a bug.
+
+.. note::
+
+  we're not decrementing the rproc's refcount, only the power refcount.
+  which means that the @rproc handle stays valid even after
+  rproc_shutdown() returns, and users can still use it with a subsequent
+  rproc_boot(), if needed.
+
+::
 
   struct rproc *rproc_get_by_phandle(phandle phandle)
-    - Find an rproc handle using a device tree phandle. Returns the rproc
-      handle on success, and NULL on failure. This function increments
-      the remote processor's refcount, so always use rproc_put() to
-      decrement it back once rproc isn't needed anymore.
 
-3. Typical usage
+Find an rproc handle using a device tree phandle. Returns the rproc
+handle on success, and NULL on failure. This function increments
+the remote processor's refcount, so always use rproc_put() to
+decrement it back once rproc isn't needed anymore.
+
+Typical usage
+=============
 
-#include <linux/remoteproc.h>
+::
 
-/* in case we were given a valid 'rproc' handle */
-int dummy_rproc_example(struct rproc *my_rproc)
-{
+  #include <linux/remoteproc.h>
+
+  /* in case we were given a valid 'rproc' handle */
+  int dummy_rproc_example(struct rproc *my_rproc)
+  {
 	int ret;
 
 	/* let's power on and boot our remote processor */
@@ -80,84 +101,111 @@ int dummy_rproc_example(struct rproc *my_rproc)
 
 	/* let's shut it down now */
 	rproc_shutdown(my_rproc);
-}
+  }
+
+API for implementors
+====================
 
-4. API for implementors
+::
 
   struct rproc *rproc_alloc(struct device *dev, const char *name,
 				const struct rproc_ops *ops,
 				const char *firmware, int len)
-    - Allocate a new remote processor handle, but don't register
-      it yet. Required parameters are the underlying device, the
-      name of this remote processor, platform-specific ops handlers,
-      the name of the firmware to boot this rproc with, and the
-      length of private data needed by the allocating rproc driver (in bytes).
-
-      This function should be used by rproc implementations during
-      initialization of the remote processor.
-      After creating an rproc handle using this function, and when ready,
-      implementations should then call rproc_add() to complete
-      the registration of the remote processor.
-      On success, the new rproc is returned, and on failure, NULL.
-
-      Note: _never_ directly deallocate @rproc, even if it was not registered
-      yet. Instead, when you need to unroll rproc_alloc(), use rproc_free().
+
+Allocate a new remote processor handle, but don't register
+it yet. Required parameters are the underlying device, the
+name of this remote processor, platform-specific ops handlers,
+the name of the firmware to boot this rproc with, and the
+length of private data needed by the allocating rproc driver (in bytes).
+
+This function should be used by rproc implementations during
+initialization of the remote processor.
+
+After creating an rproc handle using this function, and when ready,
+implementations should then call rproc_add() to complete
+the registration of the remote processor.
+
+On success, the new rproc is returned, and on failure, NULL.
+
+.. note::
+
+  **never** directly deallocate @rproc, even if it was not registered
+  yet. Instead, when you need to unroll rproc_alloc(), use rproc_free().
+
+::
 
   void rproc_free(struct rproc *rproc)
-    - Free an rproc handle that was allocated by rproc_alloc.
-      This function essentially unrolls rproc_alloc(), by decrementing the
-      rproc's refcount. It doesn't directly free rproc; that would happen
-      only if there are no other references to rproc and its refcount now
-      dropped to zero.
+
+Free an rproc handle that was allocated by rproc_alloc.
+
+This function essentially unrolls rproc_alloc(), by decrementing the
+rproc's refcount. It doesn't directly free rproc; that would happen
+only if there are no other references to rproc and its refcount now
+dropped to zero.
+
+::
 
   int rproc_add(struct rproc *rproc)
-    - Register @rproc with the remoteproc framework, after it has been
-      allocated with rproc_alloc().
-      This is called by the platform-specific rproc implementation, whenever
-      a new remote processor device is probed.
-      Returns 0 on success and an appropriate error code otherwise.
-      Note: this function initiates an asynchronous firmware loading
-      context, which will look for virtio devices supported by the rproc's
-      firmware.
-      If found, those virtio devices will be created and added, so as a result
-      of registering this remote processor, additional virtio drivers might get
-      probed.
+
+Register @rproc with the remoteproc framework, after it has been
+allocated with rproc_alloc().
+
+This is called by the platform-specific rproc implementation, whenever
+a new remote processor device is probed.
+
+Returns 0 on success and an appropriate error code otherwise.
+Note: this function initiates an asynchronous firmware loading
+context, which will look for virtio devices supported by the rproc's
+firmware.
+
+If found, those virtio devices will be created and added, so as a result
+of registering this remote processor, additional virtio drivers might get
+probed.
+
+::
 
   int rproc_del(struct rproc *rproc)
-    - Unroll rproc_add().
-      This function should be called when the platform specific rproc
-      implementation decides to remove the rproc device. it should
-      _only_ be called if a previous invocation of rproc_add()
-      has completed successfully.
 
-      After rproc_del() returns, @rproc is still valid, and its
-      last refcount should be decremented by calling rproc_free().
+Unroll rproc_add().
+
+This function should be called when the platform specific rproc
+implementation decides to remove the rproc device. it should
+_only_ be called if a previous invocation of rproc_add()
+has completed successfully.
 
-      Returns 0 on success and -EINVAL if @rproc isn't valid.
+After rproc_del() returns, @rproc is still valid, and its
+last refcount should be decremented by calling rproc_free().
+
+Returns 0 on success and -EINVAL if @rproc isn't valid.
+
+::
 
   void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
-    - Report a crash in a remoteproc
-      This function must be called every time a crash is detected by the
-      platform specific rproc implementation. This should not be called from a
-      non-remoteproc driver. This function can be called from atomic/interrupt
-      context.
 
-5. Implementation callbacks
+Report a crash in a remoteproc
+
+This function must be called every time a crash is detected by the
+platform specific rproc implementation. This should not be called from a
+non-remoteproc driver. This function can be called from atomic/interrupt
+context.
+
+Implementation callbacks
+========================
 
 These callbacks should be provided by platform-specific remoteproc
-drivers:
-
-/**
- * struct rproc_ops - platform-specific device handlers
- * @start:	power on the device and boot it
- * @stop:	power off the device
- * @kick:	kick a virtqueue (virtqueue id given as a parameter)
- */
-struct rproc_ops {
+drivers::
+
+  /**
+   * struct rproc_ops - platform-specific device handlers
+   * @start:	power on the device and boot it
+   * @stop:	power off the device
+   * @kick:	kick a virtqueue (virtqueue id given as a parameter)
+   */
+  struct rproc_ops {
 	int (*start)(struct rproc *rproc);
 	int (*stop)(struct rproc *rproc);
 	void (*kick)(struct rproc *rproc, int vqid);
-};
+  };
 
 Every remoteproc implementation should at least provide the ->start and ->stop
 handlers. If rpmsg/virtio functionality is also desired, then the ->kick handler
@@ -179,7 +227,8 @@ the exact virtqueue index to look in is optional: it is easy (and not
 too expensive) to go through the existing virtqueues and look for new buffers
 in the used rings.
 
-6. Binary Firmware Structure
+Binary Firmware Structure
+=========================
 
 At this point remoteproc only supports ELF32 firmware binaries. However,
 it is quite expected that other platforms/devices which we'd want to
@@ -207,43 +256,43 @@ resource entries that publish the existence of supported features
 or configurations by the remote processor, such as trace buffers and
 supported virtio devices (and their configurations).
 
-The resource table begins with this header:
-
-/**
- * struct resource_table - firmware resource table header
- * @ver: version number
- * @num: number of resource entries
- * @reserved: reserved (must be zero)
- * @offset: array of offsets pointing at the various resource entries
- *
- * The header of the resource table, as expressed by this structure,
- * contains a version number (should we need to change this format in the
- * future), the number of available resource entries, and their offsets
- * in the table.
- */
-struct resource_table {
+The resource table begins with this header::
+
+  /**
+   * struct resource_table - firmware resource table header
+   * @ver: version number
+   * @num: number of resource entries
+   * @reserved: reserved (must be zero)
+   * @offset: array of offsets pointing at the various resource entries
+   *
+   * The header of the resource table, as expressed by this structure,
+   * contains a version number (should we need to change this format in the
+   * future), the number of available resource entries, and their offsets
+   * in the table.
+   */
+  struct resource_table {
 	u32 ver;
 	u32 num;
 	u32 reserved[2];
 	u32 offset[0];
-} __packed;
+  } __packed;
 
 Immediately following this header are the resource entries themselves,
-each of which begins with the following resource entry header:
-
-/**
- * struct fw_rsc_hdr - firmware resource entry header
- * @type: resource type
- * @data: resource data
- *
- * Every resource entry begins with a 'struct fw_rsc_hdr' header providing
- * its @type. The content of the entry itself will immediately follow
- * this header, and it should be parsed according to the resource type.
- */
-struct fw_rsc_hdr {
+each of which begins with the following resource entry header::
+
+  /**
+   * struct fw_rsc_hdr - firmware resource entry header
+   * @type: resource type
+   * @data: resource data
+   *
+   * Every resource entry begins with a 'struct fw_rsc_hdr' header providing
+   * its @type. The content of the entry itself will immediately follow
+   * this header, and it should be parsed according to the resource type.
+   */
+  struct fw_rsc_hdr {
 	u32 type;
 	u8 data[0];
-} __packed;
+  } __packed;
 
 Some resources entries are mere announcements, where the host is informed
 of specific remoteproc configuration. Other entries require the host to
@@ -252,32 +301,32 @@ is expected, where the firmware requests a resource, and once allocated,
 the host should provide back its details (e.g. address of an allocated
 memory region).
 
-Here are the various resource types that are currently supported:
-
-/**
- * enum fw_resource_type - types of resource entries
- *
- * @RSC_CARVEOUT:   request for allocation of a physically contiguous
- *		    memory region.
- * @RSC_DEVMEM:     request to iommu_map a memory-based peripheral.
- * @RSC_TRACE:	    announces the availability of a trace buffer into which
- *		    the remote processor will be writing logs.
- * @RSC_VDEV:       declare support for a virtio device, and serve as its
- *		    virtio header.
- * @RSC_LAST:       just keep this one at the end
- *
- * Please note that these values are used as indices to the rproc_handle_rsc
- * lookup table, so please keep them sane. Moreover, @RSC_LAST is used to
- * check the validity of an index before the lookup table is accessed, so
- * please update it as needed.
- */
-enum fw_resource_type {
+Here are the various resource types that are currently supported::
+
+  /**
+   * enum fw_resource_type - types of resource entries
+   *
+   * @RSC_CARVEOUT:   request for allocation of a physically contiguous
+   *		    memory region.
+   * @RSC_DEVMEM:     request to iommu_map a memory-based peripheral.
+   * @RSC_TRACE:	    announces the availability of a trace buffer into which
+   *		    the remote processor will be writing logs.
+   * @RSC_VDEV:       declare support for a virtio device, and serve as its
+   *		    virtio header.
+   * @RSC_LAST:       just keep this one at the end
+   *
+   * Please note that these values are used as indices to the rproc_handle_rsc
+   * lookup table, so please keep them sane. Moreover, @RSC_LAST is used to
+   * check the validity of an index before the lookup table is accessed, so
+   * please update it as needed.
+   */
+  enum fw_resource_type {
 	RSC_CARVEOUT	= 0,
 	RSC_DEVMEM	= 1,
 	RSC_TRACE	= 2,
 	RSC_VDEV	= 3,
 	RSC_LAST	= 4,
-};
+  };
 
 For more details regarding a specific resource type, please see its
 dedicated structure in include/linux/remoteproc.h.
@@ -286,7 +335,8 @@ We also expect that platform-specific resource entries will show up
 at some point. When that happens, we could easily add a new RSC_PLATFORM
 type, and hand those resources to the platform-specific rproc driver to handle.
 
-7. Virtio and remoteproc
+Virtio and remoteproc
+=====================
 
 The firmware should provide remoteproc information about virtio devices
 that it supports, and their configurations: a RSC_VDEV resource entry
-- 
GitLab


From 317a8455f18383bf339c3ff0e461c5d9fdcb0b3d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 06:38:02 -0300
Subject: [PATCH 1606/1958] rfkill.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark titles;
- comment contents index;
- mark literal blocks;
- adjust identation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/rfkill.txt | 43 ++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 8c174063b3f0c..a289285d2412e 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -1,13 +1,13 @@
+===============================
 rfkill - RF kill switch support
 ===============================
 
-1. Introduction
-2. Implementation details
-3. Kernel API
-4. Userspace support
 
+.. contents::
+   :depth: 2
 
-1. Introduction
+Introduction
+============
 
 The rfkill subsystem provides a generic interface to disabling any radio
 transmitter in the system. When a transmitter is blocked, it shall not
@@ -21,17 +21,24 @@ aircraft.
 The rfkill subsystem has a concept of "hard" and "soft" block, which
 differ little in their meaning (block == transmitters off) but rather in
 whether they can be changed or not:
- - hard block: read-only radio block that cannot be overridden by software
- - soft block: writable radio block (need not be readable) that is set by
-               the system software.
+
+ - hard block
+	read-only radio block that cannot be overridden by software
+
+ - soft block
+	writable radio block (need not be readable) that is set by
+        the system software.
 
 The rfkill subsystem has two parameters, rfkill.default_state and
-rfkill.master_switch_mode, which are documented in admin-guide/kernel-parameters.rst.
+rfkill.master_switch_mode, which are documented in
+admin-guide/kernel-parameters.rst.
 
 
-2. Implementation details
+Implementation details
+======================
 
 The rfkill subsystem is composed of three main components:
+
  * the rfkill core,
  * the deprecated rfkill-input module (an input layer handler, being
    replaced by userspace policy code) and
@@ -55,7 +62,8 @@ use the return value of rfkill_set_hw_state() unless the hardware actually
 keeps track of soft and hard block separately.
 
 
-3. Kernel API
+Kernel API
+==========
 
 
 Drivers for radio transmitters normally implement an rfkill driver.
@@ -69,7 +77,7 @@ For some platforms, it is possible that the hardware state changes during
 suspend/hibernation, in which case it will be necessary to update the rfkill
 core with the current state is at resume time.
 
-To create an rfkill driver, driver's Kconfig needs to have
+To create an rfkill driver, driver's Kconfig needs to have::
 
 	depends on RFKILL || !RFKILL
 
@@ -87,7 +95,8 @@ RFKill provides per-switch LED triggers, which can be used to drive LEDs
 according to the switch state (LED_FULL when blocked, LED_OFF otherwise).
 
 
-5. Userspace support
+Userspace support
+=================
 
 The recommended userspace interface to use is /dev/rfkill, which is a misc
 character device that allows userspace to obtain and set the state of rfkill
@@ -112,11 +121,11 @@ rfkill core framework.
 Additionally, each rfkill device is registered in sysfs and emits uevents.
 
 rfkill devices issue uevents (with an action of "change"), with the following
-environment variables set:
+environment variables set::
 
-RFKILL_NAME
-RFKILL_STATE
-RFKILL_TYPE
+	RFKILL_NAME
+	RFKILL_STATE
+	RFKILL_TYPE
 
 The contents of these variables corresponds to the "name", "state" and
 "type" sysfs files explained above.
-- 
GitLab


From ce3a966931a45065306b9a1840f22bfccbffe590 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 06:42:36 -0300
Subject: [PATCH 1607/1958] robust-futex-ABI.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- promote document title;
- use :Author: for authorship;
- mark literal blocks;
- add blank lines.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/robust-futex-ABI.txt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
index 16eb314f56cc4..8a5d34abf7264 100644
--- a/Documentation/robust-futex-ABI.txt
+++ b/Documentation/robust-futex-ABI.txt
@@ -1,7 +1,9 @@
-Started by Paul Jackson <pj@sgi.com>
-
+====================
 The robust futex ABI
---------------------
+====================
+
+:Author: Started by Paul Jackson <pj@sgi.com>
+
 
 Robust_futexes provide a mechanism that is used in addition to normal
 futexes, for kernel assist of cleanup of held locks on task exit.
@@ -32,7 +34,7 @@ probably causing deadlock or other such failure of the other threads
 waiting on the same locks.
 
 A thread that anticipates possibly using robust_futexes should first
-issue the system call:
+issue the system call::
 
     asmlinkage long
     sys_set_robust_list(struct robust_list_head __user *head, size_t len);
@@ -91,7 +93,7 @@ that lock using the futex mechanism.
 When a thread has invoked the above system call to indicate it
 anticipates using robust_futexes, the kernel stores the passed in 'head'
 pointer for that task.  The task may retrieve that value later on by
-using the system call:
+using the system call::
 
     asmlinkage long
     sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
@@ -135,6 +137,7 @@ manipulating this list), the user code must observe the following
 protocol on 'lock entry' insertion and removal:
 
 On insertion:
+
  1) set the 'list_op_pending' word to the address of the 'lock entry'
     to be inserted,
  2) acquire the futex lock,
@@ -143,6 +146,7 @@ On insertion:
  4) clear the 'list_op_pending' word.
 
 On removal:
+
  1) set the 'list_op_pending' word to the address of the 'lock entry'
     to be removed,
  2) remove the lock entry for this lock from the 'head' list,
-- 
GitLab


From 773810d30e7d81b8308fea272ed2ec3d832a6018 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 06:46:19 -0300
Subject: [PATCH 1608/1958] robust-futexes.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- add a title for the document;
- mark literal blocks;

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/robust-futexes.txt | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
index 61c22d608759e..6c42c75103eb6 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/robust-futexes.txt
@@ -1,4 +1,8 @@
-Started by: Ingo Molnar <mingo@redhat.com>
+========================================
+A description of what robust futexes are
+========================================
+
+:Started by: Ingo Molnar <mingo@redhat.com>
 
 Background
 ----------
@@ -163,7 +167,7 @@ Implementation details
 ----------------------
 
 The patch adds two new syscalls: one to register the userspace list, and
-one to query the registered list pointer:
+one to query the registered list pointer::
 
  asmlinkage long
  sys_set_robust_list(struct robust_list_head __user *head,
@@ -185,7 +189,7 @@ straightforward. The kernel doesn't have any internal distinction between
 robust and normal futexes.
 
 If a futex is found to be held at exit time, the kernel sets the
-following bit of the futex word:
+following bit of the futex word::
 
 	#define FUTEX_OWNER_DIED        0x40000000
 
@@ -193,7 +197,7 @@ and wakes up the next futex waiter (if any). User-space does the rest of
 the cleanup.
 
 Otherwise, robust futexes are acquired by glibc by putting the TID into
-the futex field atomically. Waiters set the FUTEX_WAITERS bit:
+the futex field atomically. Waiters set the FUTEX_WAITERS bit::
 
 	#define FUTEX_WAITERS           0x80000000
 
-- 
GitLab


From af3137f1322b2a25ce24da131675b090fbb9fa7e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 06:53:44 -0300
Subject: [PATCH 1609/1958] rpmsg.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark document and chapter titles;
- mark notes;
- mark literal blocks;
- adjust identation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/rpmsg.txt | 348 +++++++++++++++++++++++-----------------
 1 file changed, 204 insertions(+), 144 deletions(-)

diff --git a/Documentation/rpmsg.txt b/Documentation/rpmsg.txt
index a95e36a43288f..24b7a9e1a5f99 100644
--- a/Documentation/rpmsg.txt
+++ b/Documentation/rpmsg.txt
@@ -1,10 +1,15 @@
+============================================
 Remote Processor Messaging (rpmsg) Framework
+============================================
 
-Note: this document describes the rpmsg bus and how to write rpmsg drivers.
-To learn how to add rpmsg support for new platforms, check out remoteproc.txt
-(also a resident of Documentation/).
+.. note::
 
-1. Introduction
+  This document describes the rpmsg bus and how to write rpmsg drivers.
+  To learn how to add rpmsg support for new platforms, check out remoteproc.txt
+  (also a resident of Documentation/).
+
+Introduction
+============
 
 Modern SoCs typically employ heterogeneous remote processor devices in
 asymmetric multiprocessing (AMP) configurations, which may be running
@@ -58,170 +63,222 @@ to their destination address (this is done by invoking the driver's rx handler
 with the payload of the inbound message).
 
 
-2. User API
+User API
+========
+
+::
 
   int rpmsg_send(struct rpmsg_channel *rpdev, void *data, int len);
-   - sends a message across to the remote processor on a given channel.
-     The caller should specify the channel, the data it wants to send,
-     and its length (in bytes). The message will be sent on the specified
-     channel, i.e. its source and destination address fields will be
-     set to the channel's src and dst addresses.
-
-     In case there are no TX buffers available, the function will block until
-     one becomes available (i.e. until the remote processor consumes
-     a tx buffer and puts it back on virtio's used descriptor ring),
-     or a timeout of 15 seconds elapses. When the latter happens,
-     -ERESTARTSYS is returned.
-     The function can only be called from a process context (for now).
-     Returns 0 on success and an appropriate error value on failure.
+
+sends a message across to the remote processor on a given channel.
+The caller should specify the channel, the data it wants to send,
+and its length (in bytes). The message will be sent on the specified
+channel, i.e. its source and destination address fields will be
+set to the channel's src and dst addresses.
+
+In case there are no TX buffers available, the function will block until
+one becomes available (i.e. until the remote processor consumes
+a tx buffer and puts it back on virtio's used descriptor ring),
+or a timeout of 15 seconds elapses. When the latter happens,
+-ERESTARTSYS is returned.
+
+The function can only be called from a process context (for now).
+Returns 0 on success and an appropriate error value on failure.
+
+::
 
   int rpmsg_sendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst);
-   - sends a message across to the remote processor on a given channel,
-     to a destination address provided by the caller.
-     The caller should specify the channel, the data it wants to send,
-     its length (in bytes), and an explicit destination address.
-     The message will then be sent to the remote processor to which the
-     channel belongs, using the channel's src address, and the user-provided
-     dst address (thus the channel's dst address will be ignored).
-
-     In case there are no TX buffers available, the function will block until
-     one becomes available (i.e. until the remote processor consumes
-     a tx buffer and puts it back on virtio's used descriptor ring),
-     or a timeout of 15 seconds elapses. When the latter happens,
-     -ERESTARTSYS is returned.
-     The function can only be called from a process context (for now).
-     Returns 0 on success and an appropriate error value on failure.
+
+sends a message across to the remote processor on a given channel,
+to a destination address provided by the caller.
+
+The caller should specify the channel, the data it wants to send,
+its length (in bytes), and an explicit destination address.
+
+The message will then be sent to the remote processor to which the
+channel belongs, using the channel's src address, and the user-provided
+dst address (thus the channel's dst address will be ignored).
+
+In case there are no TX buffers available, the function will block until
+one becomes available (i.e. until the remote processor consumes
+a tx buffer and puts it back on virtio's used descriptor ring),
+or a timeout of 15 seconds elapses. When the latter happens,
+-ERESTARTSYS is returned.
+
+The function can only be called from a process context (for now).
+Returns 0 on success and an appropriate error value on failure.
+
+::
 
   int rpmsg_send_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst,
 							void *data, int len);
-   - sends a message across to the remote processor, using the src and dst
-     addresses provided by the user.
-     The caller should specify the channel, the data it wants to send,
-     its length (in bytes), and explicit source and destination addresses.
-     The message will then be sent to the remote processor to which the
-     channel belongs, but the channel's src and dst addresses will be
-     ignored (and the user-provided addresses will be used instead).
-
-     In case there are no TX buffers available, the function will block until
-     one becomes available (i.e. until the remote processor consumes
-     a tx buffer and puts it back on virtio's used descriptor ring),
-     or a timeout of 15 seconds elapses. When the latter happens,
-     -ERESTARTSYS is returned.
-     The function can only be called from a process context (for now).
-     Returns 0 on success and an appropriate error value on failure.
+
+
+sends a message across to the remote processor, using the src and dst
+addresses provided by the user.
+
+The caller should specify the channel, the data it wants to send,
+its length (in bytes), and explicit source and destination addresses.
+The message will then be sent to the remote processor to which the
+channel belongs, but the channel's src and dst addresses will be
+ignored (and the user-provided addresses will be used instead).
+
+In case there are no TX buffers available, the function will block until
+one becomes available (i.e. until the remote processor consumes
+a tx buffer and puts it back on virtio's used descriptor ring),
+or a timeout of 15 seconds elapses. When the latter happens,
+-ERESTARTSYS is returned.
+
+The function can only be called from a process context (for now).
+Returns 0 on success and an appropriate error value on failure.
+
+::
 
   int rpmsg_trysend(struct rpmsg_channel *rpdev, void *data, int len);
-   - sends a message across to the remote processor on a given channel.
-     The caller should specify the channel, the data it wants to send,
-     and its length (in bytes). The message will be sent on the specified
-     channel, i.e. its source and destination address fields will be
-     set to the channel's src and dst addresses.
 
-     In case there are no TX buffers available, the function will immediately
-     return -ENOMEM without waiting until one becomes available.
-     The function can only be called from a process context (for now).
-     Returns 0 on success and an appropriate error value on failure.
+sends a message across to the remote processor on a given channel.
+The caller should specify the channel, the data it wants to send,
+and its length (in bytes). The message will be sent on the specified
+channel, i.e. its source and destination address fields will be
+set to the channel's src and dst addresses.
+
+In case there are no TX buffers available, the function will immediately
+return -ENOMEM without waiting until one becomes available.
+
+The function can only be called from a process context (for now).
+Returns 0 on success and an appropriate error value on failure.
+
+::
 
   int rpmsg_trysendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst)
-   - sends a message across to the remote processor on a given channel,
-     to a destination address provided by the user.
-     The user should specify the channel, the data it wants to send,
-     its length (in bytes), and an explicit destination address.
-     The message will then be sent to the remote processor to which the
-     channel belongs, using the channel's src address, and the user-provided
-     dst address (thus the channel's dst address will be ignored).
-
-     In case there are no TX buffers available, the function will immediately
-     return -ENOMEM without waiting until one becomes available.
-     The function can only be called from a process context (for now).
-     Returns 0 on success and an appropriate error value on failure.
+
+
+sends a message across to the remote processor on a given channel,
+to a destination address provided by the user.
+
+The user should specify the channel, the data it wants to send,
+its length (in bytes), and an explicit destination address.
+
+The message will then be sent to the remote processor to which the
+channel belongs, using the channel's src address, and the user-provided
+dst address (thus the channel's dst address will be ignored).
+
+In case there are no TX buffers available, the function will immediately
+return -ENOMEM without waiting until one becomes available.
+
+The function can only be called from a process context (for now).
+Returns 0 on success and an appropriate error value on failure.
+
+::
 
   int rpmsg_trysend_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst,
 							void *data, int len);
-   - sends a message across to the remote processor, using source and
-     destination addresses provided by the user.
-     The user should specify the channel, the data it wants to send,
-     its length (in bytes), and explicit source and destination addresses.
-     The message will then be sent to the remote processor to which the
-     channel belongs, but the channel's src and dst addresses will be
-     ignored (and the user-provided addresses will be used instead).
-
-     In case there are no TX buffers available, the function will immediately
-     return -ENOMEM without waiting until one becomes available.
-     The function can only be called from a process context (for now).
-     Returns 0 on success and an appropriate error value on failure.
+
+
+sends a message across to the remote processor, using source and
+destination addresses provided by the user.
+
+The user should specify the channel, the data it wants to send,
+its length (in bytes), and explicit source and destination addresses.
+The message will then be sent to the remote processor to which the
+channel belongs, but the channel's src and dst addresses will be
+ignored (and the user-provided addresses will be used instead).
+
+In case there are no TX buffers available, the function will immediately
+return -ENOMEM without waiting until one becomes available.
+
+The function can only be called from a process context (for now).
+Returns 0 on success and an appropriate error value on failure.
+
+::
 
   struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_channel *rpdev,
 		void (*cb)(struct rpmsg_channel *, void *, int, void *, u32),
 		void *priv, u32 addr);
-   - every rpmsg address in the system is bound to an rx callback (so when
-     inbound messages arrive, they are dispatched by the rpmsg bus using the
-     appropriate callback handler) by means of an rpmsg_endpoint struct.
-
-     This function allows drivers to create such an endpoint, and by that,
-     bind a callback, and possibly some private data too, to an rpmsg address
-     (either one that is known in advance, or one that will be dynamically
-     assigned for them).
-
-     Simple rpmsg drivers need not call rpmsg_create_ept, because an endpoint
-     is already created for them when they are probed by the rpmsg bus
-     (using the rx callback they provide when they registered to the rpmsg bus).
-
-     So things should just work for simple drivers: they already have an
-     endpoint, their rx callback is bound to their rpmsg address, and when
-     relevant inbound messages arrive (i.e. messages which their dst address
-     equals to the src address of their rpmsg channel), the driver's handler
-     is invoked to process it.
-
-     That said, more complicated drivers might do need to allocate
-     additional rpmsg addresses, and bind them to different rx callbacks.
-     To accomplish that, those drivers need to call this function.
-     Drivers should provide their channel (so the new endpoint would bind
-     to the same remote processor their channel belongs to), an rx callback
-     function, an optional private data (which is provided back when the
-     rx callback is invoked), and an address they want to bind with the
-     callback. If addr is RPMSG_ADDR_ANY, then rpmsg_create_ept will
-     dynamically assign them an available rpmsg address (drivers should have
-     a very good reason why not to always use RPMSG_ADDR_ANY here).
-
-     Returns a pointer to the endpoint on success, or NULL on error.
+
+every rpmsg address in the system is bound to an rx callback (so when
+inbound messages arrive, they are dispatched by the rpmsg bus using the
+appropriate callback handler) by means of an rpmsg_endpoint struct.
+
+This function allows drivers to create such an endpoint, and by that,
+bind a callback, and possibly some private data too, to an rpmsg address
+(either one that is known in advance, or one that will be dynamically
+assigned for them).
+
+Simple rpmsg drivers need not call rpmsg_create_ept, because an endpoint
+is already created for them when they are probed by the rpmsg bus
+(using the rx callback they provide when they registered to the rpmsg bus).
+
+So things should just work for simple drivers: they already have an
+endpoint, their rx callback is bound to their rpmsg address, and when
+relevant inbound messages arrive (i.e. messages which their dst address
+equals to the src address of their rpmsg channel), the driver's handler
+is invoked to process it.
+
+That said, more complicated drivers might do need to allocate
+additional rpmsg addresses, and bind them to different rx callbacks.
+To accomplish that, those drivers need to call this function.
+Drivers should provide their channel (so the new endpoint would bind
+to the same remote processor their channel belongs to), an rx callback
+function, an optional private data (which is provided back when the
+rx callback is invoked), and an address they want to bind with the
+callback. If addr is RPMSG_ADDR_ANY, then rpmsg_create_ept will
+dynamically assign them an available rpmsg address (drivers should have
+a very good reason why not to always use RPMSG_ADDR_ANY here).
+
+Returns a pointer to the endpoint on success, or NULL on error.
+
+::
 
   void rpmsg_destroy_ept(struct rpmsg_endpoint *ept);
-   - destroys an existing rpmsg endpoint. user should provide a pointer
-     to an rpmsg endpoint that was previously created with rpmsg_create_ept().
+
+
+destroys an existing rpmsg endpoint. user should provide a pointer
+to an rpmsg endpoint that was previously created with rpmsg_create_ept().
+
+::
 
   int register_rpmsg_driver(struct rpmsg_driver *rpdrv);
-   - registers an rpmsg driver with the rpmsg bus. user should provide
-     a pointer to an rpmsg_driver struct, which contains the driver's
-     ->probe() and ->remove() functions, an rx callback, and an id_table
-     specifying the names of the channels this driver is interested to
-     be probed with.
+
+
+registers an rpmsg driver with the rpmsg bus. user should provide
+a pointer to an rpmsg_driver struct, which contains the driver's
+->probe() and ->remove() functions, an rx callback, and an id_table
+specifying the names of the channels this driver is interested to
+be probed with.
+
+::
 
   void unregister_rpmsg_driver(struct rpmsg_driver *rpdrv);
-   - unregisters an rpmsg driver from the rpmsg bus. user should provide
-     a pointer to a previously-registered rpmsg_driver struct.
-     Returns 0 on success, and an appropriate error value on failure.
 
 
-3. Typical usage
+unregisters an rpmsg driver from the rpmsg bus. user should provide
+a pointer to a previously-registered rpmsg_driver struct.
+Returns 0 on success, and an appropriate error value on failure.
+
+
+Typical usage
+=============
 
 The following is a simple rpmsg driver, that sends an "hello!" message
 on probe(), and whenever it receives an incoming message, it dumps its
 content to the console.
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/rpmsg.h>
+::
+
+  #include <linux/kernel.h>
+  #include <linux/module.h>
+  #include <linux/rpmsg.h>
 
-static void rpmsg_sample_cb(struct rpmsg_channel *rpdev, void *data, int len,
+  static void rpmsg_sample_cb(struct rpmsg_channel *rpdev, void *data, int len,
 						void *priv, u32 src)
-{
+  {
 	print_hex_dump(KERN_INFO, "incoming message:", DUMP_PREFIX_NONE,
 						16, 1, data, len, true);
-}
+  }
 
-static int rpmsg_sample_probe(struct rpmsg_channel *rpdev)
-{
+  static int rpmsg_sample_probe(struct rpmsg_channel *rpdev)
+  {
 	int err;
 
 	dev_info(&rpdev->dev, "chnl: 0x%x -> 0x%x\n", rpdev->src, rpdev->dst);
@@ -234,32 +291,35 @@ static int rpmsg_sample_probe(struct rpmsg_channel *rpdev)
 	}
 
 	return 0;
-}
+  }
 
-static void rpmsg_sample_remove(struct rpmsg_channel *rpdev)
-{
+  static void rpmsg_sample_remove(struct rpmsg_channel *rpdev)
+  {
 	dev_info(&rpdev->dev, "rpmsg sample client driver is removed\n");
-}
+  }
 
-static struct rpmsg_device_id rpmsg_driver_sample_id_table[] = {
+  static struct rpmsg_device_id rpmsg_driver_sample_id_table[] = {
 	{ .name	= "rpmsg-client-sample" },
 	{ },
-};
-MODULE_DEVICE_TABLE(rpmsg, rpmsg_driver_sample_id_table);
+  };
+  MODULE_DEVICE_TABLE(rpmsg, rpmsg_driver_sample_id_table);
 
-static struct rpmsg_driver rpmsg_sample_client = {
+  static struct rpmsg_driver rpmsg_sample_client = {
 	.drv.name	= KBUILD_MODNAME,
 	.id_table	= rpmsg_driver_sample_id_table,
 	.probe		= rpmsg_sample_probe,
 	.callback	= rpmsg_sample_cb,
 	.remove		= rpmsg_sample_remove,
-};
-module_rpmsg_driver(rpmsg_sample_client);
+  };
+  module_rpmsg_driver(rpmsg_sample_client);
+
+.. note::
 
-Note: a similar sample which can be built and loaded can be found
-in samples/rpmsg/.
+   a similar sample which can be built and loaded can be found
+   in samples/rpmsg/.
 
-4. Allocations of rpmsg channels:
+Allocations of rpmsg channels
+=============================
 
 At this point we only support dynamic allocations of rpmsg channels.
 
-- 
GitLab


From 2273194d1e236c6a8fab5b856ece8d478937b7b2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 07:45:57 -0300
Subject: [PATCH 1610/1958] SAK.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- mark document title;
- use :Author: and :Date: for authorship;
- adjust notation for literals and bold;
- mark literal blocks;
- adjust identation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/SAK.txt | 65 ++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/Documentation/SAK.txt b/Documentation/SAK.txt
index 74be14679ed89..260e1d3687bdc 100644
--- a/Documentation/SAK.txt
+++ b/Documentation/SAK.txt
@@ -1,5 +1,9 @@
-Linux 2.4.2 Secure Attention Key (SAK) handling
-18 March 2001, Andrew Morton
+=========================================
+Linux Secure Attention Key (SAK) handling
+=========================================
+
+:Date: 18 March 2001
+:Author: Andrew Morton
 
 An operating system's Secure Attention Key is a security tool which is
 provided as protection against trojan password capturing programs.  It
@@ -13,7 +17,7 @@ this sequence.  It is only available if the kernel was compiled with
 sysrq support.
 
 The proper way of generating a SAK is to define the key sequence using
-`loadkeys'.  This will work whether or not sysrq support is compiled
+``loadkeys``.  This will work whether or not sysrq support is compiled
 into the kernel.
 
 SAK works correctly when the keyboard is in raw mode.  This means that
@@ -25,64 +29,63 @@ What key sequence should you use? Well, CTRL-ALT-DEL is used to reboot
 the machine.  CTRL-ALT-BACKSPACE is magical to the X server.  We'll
 choose CTRL-ALT-PAUSE.
 
-In your rc.sysinit (or rc.local) file, add the command
+In your rc.sysinit (or rc.local) file, add the command::
 
 	echo "control alt keycode 101 = SAK" | /bin/loadkeys
 
 And that's it!  Only the superuser may reprogram the SAK key.
 
 
-NOTES
-=====
+.. note::
 
-1: Linux SAK is said to be not a "true SAK" as is required by
-   systems which implement C2 level security.  This author does not
-   know why.
+  1. Linux SAK is said to be not a "true SAK" as is required by
+     systems which implement C2 level security.  This author does not
+     know why.
 
 
-2: On the PC keyboard, SAK kills all applications which have
-   /dev/console opened.
+  2. On the PC keyboard, SAK kills all applications which have
+     /dev/console opened.
 
-   Unfortunately this includes a number of things which you don't
-   actually want killed.  This is because these applications are
-   incorrectly holding /dev/console open.  Be sure to complain to your
-   Linux distributor about this!
+     Unfortunately this includes a number of things which you don't
+     actually want killed.  This is because these applications are
+     incorrectly holding /dev/console open.  Be sure to complain to your
+     Linux distributor about this!
 
-   You can identify processes which will be killed by SAK with the
-   command
+     You can identify processes which will be killed by SAK with the
+     command::
 
 	# ls -l /proc/[0-9]*/fd/* | grep console
 	l-wx------    1 root     root           64 Mar 18 00:46 /proc/579/fd/0 -> /dev/console
 
-   Then:
+     Then::
 
 	# ps aux|grep 579
 	root       579  0.0  0.1  1088  436 ?        S    00:43   0:00 gpm -t ps/2
 
-   So `gpm' will be killed by SAK.  This is a bug in gpm.  It should
-   be closing standard input.  You can work around this by finding the
-   initscript which launches gpm and changing it thusly:
+     So ``gpm`` will be killed by SAK.  This is a bug in gpm.  It should
+     be closing standard input.  You can work around this by finding the
+     initscript which launches gpm and changing it thusly:
 
-   Old:
+     Old::
 
 	daemon gpm
 
-   New:
+     New::
 
 	daemon gpm < /dev/null
 
-   Vixie cron also seems to have this problem, and needs the same treatment.
+     Vixie cron also seems to have this problem, and needs the same treatment.
 
-   Also, one prominent Linux distribution has the following three
-   lines in its rc.sysinit and rc scripts:
+     Also, one prominent Linux distribution has the following three
+     lines in its rc.sysinit and rc scripts::
 
 	exec 3<&0
 	exec 4>&1
 	exec 5>&2
 
-   These commands cause *all* daemons which are launched by the
-   initscripts to have file descriptors 3, 4 and 5 attached to
-   /dev/console.  So SAK kills them all.  A workaround is to simply
-   delete these lines, but this may cause system management
-   applications to malfunction - test everything well.
+     These commands cause **all** daemons which are launched by the
+     initscripts to have file descriptors 3, 4 and 5 attached to
+     /dev/console.  So SAK kills them all.  A workaround is to simply
+     delete these lines, but this may cause system management
+     applications to malfunction - test everything well.
 
-- 
GitLab


From 53708b874820aeab86b90ab6deef7a6d9a8d4a3d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 07:50:05 -0300
Subject: [PATCH 1611/1958] sgi-ioc4.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx.

In this case, just adding a title is enough.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sgi-ioc4.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/sgi-ioc4.txt b/Documentation/sgi-ioc4.txt
index 876c96ae38dba..72709222d3c0f 100644
--- a/Documentation/sgi-ioc4.txt
+++ b/Documentation/sgi-ioc4.txt
@@ -1,3 +1,7 @@
+====================================
+SGI IOC4 PCI (multi function) device
+====================================
+
 The SGI IOC4 PCI device is a bit of a strange beast, so some notes on
 it are in order.
 
-- 
GitLab


From 9135bf4dcb437fa5bac908c5d3bff7894138a157 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 07:55:32 -0300
Subject: [PATCH 1612/1958] siphash.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Mark titles;
- Mark literal blocks;
- Use :Author: for authorship;
- Don't sumerate chapters;
- Adjust identation.

NOTE:

This file has actually two documents inside it, the first
one describing siphash, the second one describing halfsiphash.

It is likely a good idea to split them when it gets moved to
security/ (which is where it probably belongs).

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/siphash.txt | 164 +++++++++++++++++++++-----------------
 1 file changed, 89 insertions(+), 75 deletions(-)

diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt
index 908d348ff7776..9965821ab3334 100644
--- a/Documentation/siphash.txt
+++ b/Documentation/siphash.txt
@@ -1,6 +1,8 @@
-         SipHash - a short input PRF
------------------------------------------------
-Written by Jason A. Donenfeld <jason@zx2c4.com>
+===========================
+SipHash - a short input PRF
+===========================
+
+:Author: Written by Jason A. Donenfeld <jason@zx2c4.com>
 
 SipHash is a cryptographically secure PRF -- a keyed hash function -- that
 performs very well for short inputs, hence the name. It was designed by
@@ -13,58 +15,61 @@ an input buffer or several input integers. It spits out an integer that is
 indistinguishable from random. You may then use that integer as part of secure
 sequence numbers, secure cookies, or mask it off for use in a hash table.
 
-1. Generating a key
+Generating a key
+================
 
 Keys should always be generated from a cryptographically secure source of
-random numbers, either using get_random_bytes or get_random_once:
+random numbers, either using get_random_bytes or get_random_once::
 
-siphash_key_t key;
-get_random_bytes(&key, sizeof(key));
+	siphash_key_t key;
+	get_random_bytes(&key, sizeof(key));
 
 If you're not deriving your key from here, you're doing it wrong.
 
-2. Using the functions
+Using the functions
+===================
 
 There are two variants of the function, one that takes a list of integers, and
-one that takes a buffer:
+one that takes a buffer::
 
-u64 siphash(const void *data, size_t len, const siphash_key_t *key);
+	u64 siphash(const void *data, size_t len, const siphash_key_t *key);
 
-And:
+And::
 
-u64 siphash_1u64(u64, const siphash_key_t *key);
-u64 siphash_2u64(u64, u64, const siphash_key_t *key);
-u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key);
-u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key);
-u64 siphash_1u32(u32, const siphash_key_t *key);
-u64 siphash_2u32(u32, u32, const siphash_key_t *key);
-u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key);
-u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key);
+	u64 siphash_1u64(u64, const siphash_key_t *key);
+	u64 siphash_2u64(u64, u64, const siphash_key_t *key);
+	u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key);
+	u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key);
+	u64 siphash_1u32(u32, const siphash_key_t *key);
+	u64 siphash_2u32(u32, u32, const siphash_key_t *key);
+	u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key);
+	u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key);
 
 If you pass the generic siphash function something of a constant length, it
 will constant fold at compile-time and automatically choose one of the
 optimized functions.
 
-3. Hashtable key function usage:
+Hashtable key function usage::
 
-struct some_hashtable {
-	DECLARE_HASHTABLE(hashtable, 8);
-	siphash_key_t key;
-};
+	struct some_hashtable {
+		DECLARE_HASHTABLE(hashtable, 8);
+		siphash_key_t key;
+	};
 
-void init_hashtable(struct some_hashtable *table)
-{
-	get_random_bytes(&table->key, sizeof(table->key));
-}
+	void init_hashtable(struct some_hashtable *table)
+	{
+		get_random_bytes(&table->key, sizeof(table->key));
+	}
 
-static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
-{
-	return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
-}
+	static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
+	{
+		return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
+	}
 
 You may then iterate like usual over the returned hash bucket.
 
-4. Security
+Security
+========
 
 SipHash has a very high security margin, with its 128-bit key. So long as the
 key is kept secret, it is impossible for an attacker to guess the outputs of
@@ -73,7 +78,8 @@ is significant.
 
 Linux implements the "2-4" variant of SipHash.
 
-5. Struct-passing Pitfalls
+Struct-passing Pitfalls
+=======================
 
 Often times the XuY functions will not be large enough, and instead you'll
 want to pass a pre-filled struct to siphash. When doing this, it's important
@@ -81,30 +87,32 @@ to always ensure the struct has no padding holes. The easiest way to do this
 is to simply arrange the members of the struct in descending order of size,
 and to use offsetendof() instead of sizeof() for getting the size. For
 performance reasons, if possible, it's probably a good thing to align the
-struct to the right boundary. Here's an example:
-
-const struct {
-	struct in6_addr saddr;
-	u32 counter;
-	u16 dport;
-} __aligned(SIPHASH_ALIGNMENT) combined = {
-	.saddr = *(struct in6_addr *)saddr,
-	.counter = counter,
-	.dport = dport
-};
-u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret);
-
-6. Resources
+struct to the right boundary. Here's an example::
+
+	const struct {
+		struct in6_addr saddr;
+		u32 counter;
+		u16 dport;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.counter = counter,
+		.dport = dport
+	};
+	u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret);
+
+Resources
+=========
 
 Read the SipHash paper if you're interested in learning more:
 https://131002.net/siphash/siphash.pdf
 
+-------------------------------------------------------------------------------
 
-~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~
-
+===============================================
 HalfSipHash - SipHash's insecure younger cousin
------------------------------------------------
-Written by Jason A. Donenfeld <jason@zx2c4.com>
+===============================================
+
+:Author: Written by Jason A. Donenfeld <jason@zx2c4.com>
 
 On the off-chance that SipHash is not fast enough for your needs, you might be
 able to justify using HalfSipHash, a terrifying but potentially useful
@@ -120,7 +128,8 @@ then when you can be absolutely certain that the outputs will never be
 transmitted out of the kernel. This is only remotely useful over `jhash` as a
 means of mitigating hashtable flooding denial of service attacks.
 
-1. Generating a key
+Generating a key
+================
 
 Keys should always be generated from a cryptographically secure source of
 random numbers, either using get_random_bytes or get_random_once:
@@ -130,44 +139,49 @@ get_random_bytes(&key, sizeof(key));
 
 If you're not deriving your key from here, you're doing it wrong.
 
-2. Using the functions
+Using the functions
+===================
 
 There are two variants of the function, one that takes a list of integers, and
-one that takes a buffer:
+one that takes a buffer::
 
-u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key);
+	u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key);
 
-And:
+And::
 
-u32 hsiphash_1u32(u32, const hsiphash_key_t *key);
-u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key);
-u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key);
-u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key);
+	u32 hsiphash_1u32(u32, const hsiphash_key_t *key);
+	u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key);
+	u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key);
+	u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key);
 
 If you pass the generic hsiphash function something of a constant length, it
 will constant fold at compile-time and automatically choose one of the
 optimized functions.
 
-3. Hashtable key function usage:
+Hashtable key function usage
+============================
+
+::
 
-struct some_hashtable {
-	DECLARE_HASHTABLE(hashtable, 8);
-	hsiphash_key_t key;
-};
+	struct some_hashtable {
+		DECLARE_HASHTABLE(hashtable, 8);
+		hsiphash_key_t key;
+	};
 
-void init_hashtable(struct some_hashtable *table)
-{
-	get_random_bytes(&table->key, sizeof(table->key));
-}
+	void init_hashtable(struct some_hashtable *table)
+	{
+		get_random_bytes(&table->key, sizeof(table->key));
+	}
 
-static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
-{
-	return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
-}
+	static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
+	{
+		return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
+	}
 
 You may then iterate like usual over the returned hash bucket.
 
-4. Performance
+Performance
+===========
 
 HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
 this will not be a problem, as the hashtable lookup isn't the bottleneck. And
-- 
GitLab


From d1bb1d04f5c2d4e2cc03d8bc425699241ca4739e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 08:01:45 -0300
Subject: [PATCH 1613/1958] SM501.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- adjust document title;
- use the copyright markup.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/SM501.txt | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Documentation/SM501.txt b/Documentation/SM501.txt
index 561826f820935..882507453ba4e 100644
--- a/Documentation/SM501.txt
+++ b/Documentation/SM501.txt
@@ -1,7 +1,10 @@
-			SM501 Driver
-			============
+.. include:: <isonum.txt>
 
-Copyright 2006, 2007 Simtec Electronics
+============
+SM501 Driver
+============
+
+:Copyright: |copy| 2006, 2007 Simtec Electronics
 
 The Silicon Motion SM501 multimedia companion chip is a multifunction device
 which may provide numerous interfaces including USB host controller USB gadget,
-- 
GitLab


From c6d289d0ccf411b82196195318888901b7828b73 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 08:03:50 -0300
Subject: [PATCH 1614/1958] smsc_ece1099.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Add a title for the document.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/smsc_ece1099.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/smsc_ece1099.txt b/Documentation/smsc_ece1099.txt
index 6b492e82b43d9..079277421eaf8 100644
--- a/Documentation/smsc_ece1099.txt
+++ b/Documentation/smsc_ece1099.txt
@@ -1,3 +1,7 @@
+=================================================
+Msc Keyboard Scan Expansion/GPIO Expansion device
+=================================================
+
 What is smsc-ece1099?
 ----------------------
 
-- 
GitLab


From 603699bbfb786411f869da9a87d11fc2f3374afd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 08:19:34 -0300
Subject: [PATCH 1615/1958] static-keys.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- Mark titles;
- Add a warning mark;
- Mark literals and literal blocks;
- Adjust identation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/static-keys.txt | 207 ++++++++++++++++++----------------
 1 file changed, 108 insertions(+), 99 deletions(-)

diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index ef419fd0897f0..b83dfa1c06029 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -1,30 +1,34 @@
-			Static Keys
-			-----------
+===========
+Static Keys
+===========
 
-DEPRECATED API:
+.. warning::
 
-The use of 'struct static_key' directly, is now DEPRECATED. In addition
-static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following:
+   DEPRECATED API:
 
-struct static_key false = STATIC_KEY_INIT_FALSE;
-struct static_key true = STATIC_KEY_INIT_TRUE;
-static_key_true()
-static_key_false()
+   The use of 'struct static_key' directly, is now DEPRECATED. In addition
+   static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following::
 
-The updated API replacements are:
+	struct static_key false = STATIC_KEY_INIT_FALSE;
+	struct static_key true = STATIC_KEY_INIT_TRUE;
+	static_key_true()
+	static_key_false()
 
-DEFINE_STATIC_KEY_TRUE(key);
-DEFINE_STATIC_KEY_FALSE(key);
-DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
-DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
-static_branch_likely()
-static_branch_unlikely()
+   The updated API replacements are::
 
-0) Abstract
+	DEFINE_STATIC_KEY_TRUE(key);
+	DEFINE_STATIC_KEY_FALSE(key);
+	DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
+	DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
+	static_branch_likely()
+	static_branch_unlikely()
+
+Abstract
+========
 
 Static keys allows the inclusion of seldom used features in
 performance-sensitive fast-path kernel code, via a GCC feature and a code
-patching technique. A quick example:
+patching technique. A quick example::
 
 	DEFINE_STATIC_KEY_FALSE(key);
 
@@ -45,7 +49,8 @@ The static_branch_unlikely() branch will be generated into the code with as litt
 impact to the likely code path as possible.
 
 
-1) Motivation
+Motivation
+==========
 
 
 Currently, tracepoints are implemented using a conditional branch. The
@@ -60,7 +65,8 @@ possible. Although tracepoints are the original motivation for this work, other
 kernel code paths should be able to make use of the static keys facility.
 
 
-2) Solution
+Solution
+========
 
 
 gcc (v4.5) adds a new 'asm goto' statement that allows branching to a label:
@@ -71,7 +77,7 @@ Using the 'asm goto', we can create branches that are either taken or not taken
 by default, without the need to check memory. Then, at run-time, we can patch
 the branch site to change the branch direction.
 
-For example, if we have a simple branch that is disabled by default:
+For example, if we have a simple branch that is disabled by default::
 
 	if (static_branch_unlikely(&key))
 		printk("I am the true branch\n");
@@ -87,14 +93,15 @@ optimization.
 This lowlevel patching mechanism is called 'jump label patching', and it gives
 the basis for the static keys facility.
 
-3) Static key label API, usage and examples:
+Static key label API, usage and examples
+========================================
 
 
-In order to make use of this optimization you must first define a key:
+In order to make use of this optimization you must first define a key::
 
 	DEFINE_STATIC_KEY_TRUE(key);
 
-or:
+or::
 
 	DEFINE_STATIC_KEY_FALSE(key);
 
@@ -102,14 +109,14 @@ or:
 The key must be global, that is, it can't be allocated on the stack or dynamically
 allocated at run-time.
 
-The key is then used in code as:
+The key is then used in code as::
 
         if (static_branch_unlikely(&key))
                 do unlikely code
         else
                 do likely code
 
-Or:
+Or::
 
         if (static_branch_likely(&key))
                 do likely code
@@ -120,15 +127,15 @@ Keys defined via DEFINE_STATIC_KEY_TRUE(), or DEFINE_STATIC_KEY_FALSE, may
 be used in either static_branch_likely() or static_branch_unlikely()
 statements.
 
-Branch(es) can be set true via:
+Branch(es) can be set true via::
 
-static_branch_enable(&key);
+	static_branch_enable(&key);
 
-or false via:
+or false via::
 
-static_branch_disable(&key);
+	static_branch_disable(&key);
 
-The branch(es) can then be switched via reference counts:
+The branch(es) can then be switched via reference counts::
 
 	static_branch_inc(&key);
 	...
@@ -142,11 +149,11 @@ static_branch_inc(), will change the branch back to true. Likewise, if the
 key is initialized false, a 'static_branch_inc()', will change the branch to
 true. And then a 'static_branch_dec()', will again make the branch false.
 
-Where an array of keys is required, it can be defined as:
+Where an array of keys is required, it can be defined as::
 
 	DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
 
-or:
+or::
 
 	DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
 
@@ -159,96 +166,98 @@ simply fall back to a traditional, load, test, and jump sequence. Also, the
 struct jump_entry table must be at least 4-byte aligned because the
 static_key->entry field makes use of the two least significant bits.
 
-* select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig
-
-* #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h
+* ``select HAVE_ARCH_JUMP_LABEL``,
+    see: arch/x86/Kconfig
 
-* __always_inline bool arch_static_branch(struct static_key *key, bool branch), see:
-					arch/x86/include/asm/jump_label.h
+* ``#define JUMP_LABEL_NOP_SIZE``,
+    see: arch/x86/include/asm/jump_label.h
 
-* __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch),
-					see: arch/x86/include/asm/jump_label.h
+* ``__always_inline bool arch_static_branch(struct static_key *key, bool branch)``,
+    see: arch/x86/include/asm/jump_label.h
 
-* void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type),
-					see: arch/x86/kernel/jump_label.c
+* ``__always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)``,
+    see: arch/x86/include/asm/jump_label.h
 
-* __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type),
-					see: arch/x86/kernel/jump_label.c
+* ``void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type)``,
+    see: arch/x86/kernel/jump_label.c
 
+* ``__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type)``,
+    see: arch/x86/kernel/jump_label.c
 
-* struct jump_entry, see: arch/x86/include/asm/jump_label.h
+* ``struct jump_entry``,
+    see: arch/x86/include/asm/jump_label.h
 
 
 5) Static keys / jump label analysis, results (x86_64):
 
 
 As an example, let's add the following branch to 'getppid()', such that the
-system call now looks like:
+system call now looks like::
 
-SYSCALL_DEFINE0(getppid)
-{
+  SYSCALL_DEFINE0(getppid)
+  {
         int pid;
 
-+       if (static_branch_unlikely(&key))
-+               printk("I am the true branch\n");
+  +     if (static_branch_unlikely(&key))
+  +             printk("I am the true branch\n");
 
         rcu_read_lock();
         pid = task_tgid_vnr(rcu_dereference(current->real_parent));
         rcu_read_unlock();
 
         return pid;
-}
-
-The resulting instructions with jump labels generated by GCC is:
-
-ffffffff81044290 <sys_getppid>:
-ffffffff81044290:       55                      push   %rbp
-ffffffff81044291:       48 89 e5                mov    %rsp,%rbp
-ffffffff81044294:       e9 00 00 00 00          jmpq   ffffffff81044299 <sys_getppid+0x9>
-ffffffff81044299:       65 48 8b 04 25 c0 b6    mov    %gs:0xb6c0,%rax
-ffffffff810442a0:       00 00
-ffffffff810442a2:       48 8b 80 80 02 00 00    mov    0x280(%rax),%rax
-ffffffff810442a9:       48 8b 80 b0 02 00 00    mov    0x2b0(%rax),%rax
-ffffffff810442b0:       48 8b b8 e8 02 00 00    mov    0x2e8(%rax),%rdi
-ffffffff810442b7:       e8 f4 d9 00 00          callq  ffffffff81051cb0 <pid_vnr>
-ffffffff810442bc:       5d                      pop    %rbp
-ffffffff810442bd:       48 98                   cltq
-ffffffff810442bf:       c3                      retq
-ffffffff810442c0:       48 c7 c7 e3 54 98 81    mov    $0xffffffff819854e3,%rdi
-ffffffff810442c7:       31 c0                   xor    %eax,%eax
-ffffffff810442c9:       e8 71 13 6d 00          callq  ffffffff8171563f <printk>
-ffffffff810442ce:       eb c9                   jmp    ffffffff81044299 <sys_getppid+0x9>
-
-Without the jump label optimization it looks like:
-
-ffffffff810441f0 <sys_getppid>:
-ffffffff810441f0:       8b 05 8a 52 d8 00       mov    0xd8528a(%rip),%eax        # ffffffff81dc9480 <key>
-ffffffff810441f6:       55                      push   %rbp
-ffffffff810441f7:       48 89 e5                mov    %rsp,%rbp
-ffffffff810441fa:       85 c0                   test   %eax,%eax
-ffffffff810441fc:       75 27                   jne    ffffffff81044225 <sys_getppid+0x35>
-ffffffff810441fe:       65 48 8b 04 25 c0 b6    mov    %gs:0xb6c0,%rax
-ffffffff81044205:       00 00
-ffffffff81044207:       48 8b 80 80 02 00 00    mov    0x280(%rax),%rax
-ffffffff8104420e:       48 8b 80 b0 02 00 00    mov    0x2b0(%rax),%rax
-ffffffff81044215:       48 8b b8 e8 02 00 00    mov    0x2e8(%rax),%rdi
-ffffffff8104421c:       e8 2f da 00 00          callq  ffffffff81051c50 <pid_vnr>
-ffffffff81044221:       5d                      pop    %rbp
-ffffffff81044222:       48 98                   cltq
-ffffffff81044224:       c3                      retq
-ffffffff81044225:       48 c7 c7 13 53 98 81    mov    $0xffffffff81985313,%rdi
-ffffffff8104422c:       31 c0                   xor    %eax,%eax
-ffffffff8104422e:       e8 60 0f 6d 00          callq  ffffffff81715193 <printk>
-ffffffff81044233:       eb c9                   jmp    ffffffff810441fe <sys_getppid+0xe>
-ffffffff81044235:       66 66 2e 0f 1f 84 00    data32 nopw %cs:0x0(%rax,%rax,1)
-ffffffff8104423c:       00 00 00 00
+  }
+
+The resulting instructions with jump labels generated by GCC is::
+
+  ffffffff81044290 <sys_getppid>:
+  ffffffff81044290:       55                      push   %rbp
+  ffffffff81044291:       48 89 e5                mov    %rsp,%rbp
+  ffffffff81044294:       e9 00 00 00 00          jmpq   ffffffff81044299 <sys_getppid+0x9>
+  ffffffff81044299:       65 48 8b 04 25 c0 b6    mov    %gs:0xb6c0,%rax
+  ffffffff810442a0:       00 00
+  ffffffff810442a2:       48 8b 80 80 02 00 00    mov    0x280(%rax),%rax
+  ffffffff810442a9:       48 8b 80 b0 02 00 00    mov    0x2b0(%rax),%rax
+  ffffffff810442b0:       48 8b b8 e8 02 00 00    mov    0x2e8(%rax),%rdi
+  ffffffff810442b7:       e8 f4 d9 00 00          callq  ffffffff81051cb0 <pid_vnr>
+  ffffffff810442bc:       5d                      pop    %rbp
+  ffffffff810442bd:       48 98                   cltq
+  ffffffff810442bf:       c3                      retq
+  ffffffff810442c0:       48 c7 c7 e3 54 98 81    mov    $0xffffffff819854e3,%rdi
+  ffffffff810442c7:       31 c0                   xor    %eax,%eax
+  ffffffff810442c9:       e8 71 13 6d 00          callq  ffffffff8171563f <printk>
+  ffffffff810442ce:       eb c9                   jmp    ffffffff81044299 <sys_getppid+0x9>
+
+Without the jump label optimization it looks like::
+
+  ffffffff810441f0 <sys_getppid>:
+  ffffffff810441f0:       8b 05 8a 52 d8 00       mov    0xd8528a(%rip),%eax        # ffffffff81dc9480 <key>
+  ffffffff810441f6:       55                      push   %rbp
+  ffffffff810441f7:       48 89 e5                mov    %rsp,%rbp
+  ffffffff810441fa:       85 c0                   test   %eax,%eax
+  ffffffff810441fc:       75 27                   jne    ffffffff81044225 <sys_getppid+0x35>
+  ffffffff810441fe:       65 48 8b 04 25 c0 b6    mov    %gs:0xb6c0,%rax
+  ffffffff81044205:       00 00
+  ffffffff81044207:       48 8b 80 80 02 00 00    mov    0x280(%rax),%rax
+  ffffffff8104420e:       48 8b 80 b0 02 00 00    mov    0x2b0(%rax),%rax
+  ffffffff81044215:       48 8b b8 e8 02 00 00    mov    0x2e8(%rax),%rdi
+  ffffffff8104421c:       e8 2f da 00 00          callq  ffffffff81051c50 <pid_vnr>
+  ffffffff81044221:       5d                      pop    %rbp
+  ffffffff81044222:       48 98                   cltq
+  ffffffff81044224:       c3                      retq
+  ffffffff81044225:       48 c7 c7 13 53 98 81    mov    $0xffffffff81985313,%rdi
+  ffffffff8104422c:       31 c0                   xor    %eax,%eax
+  ffffffff8104422e:       e8 60 0f 6d 00          callq  ffffffff81715193 <printk>
+  ffffffff81044233:       eb c9                   jmp    ffffffff810441fe <sys_getppid+0xe>
+  ffffffff81044235:       66 66 2e 0f 1f 84 00    data32 nopw %cs:0x0(%rax,%rax,1)
+  ffffffff8104423c:       00 00 00 00
 
 Thus, the disable jump label case adds a 'mov', 'test' and 'jne' instruction
 vs. the jump label case just has a 'no-op' or 'jmp 0'. (The jmp 0, is patched
 to a 5 byte atomic no-op instruction at boot-time.) Thus, the disabled jump
-label case adds:
+label case adds::
 
-6 (mov) + 2 (test) + 2 (jne) = 10 - 5 (5 byte jump 0) = 5 addition bytes.
+  6 (mov) + 2 (test) + 2 (jne) = 10 - 5 (5 byte jump 0) = 5 addition bytes.
 
 If we then include the padding bytes, the jump label code saves, 16 total bytes
 of instruction memory for this small function. In this case the non-jump label
@@ -262,7 +271,7 @@ Since there are a number of static key API uses in the scheduler paths,
 'pipe-test' (also known as 'perf bench sched pipe') can be used to show the
 performance improvement. Testing done on 3.3.0-rc2:
 
-jump label disabled:
+jump label disabled::
 
  Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
 
@@ -279,7 +288,7 @@ jump label disabled:
 
        1.601607384 seconds time elapsed                                          ( +-  0.07% )
 
-jump label enabled:
+jump label enabled::
 
  Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
 
-- 
GitLab


From aa4d520358ed03ee5c2f54ef66e42f971839e62e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 09:00:17 -0300
Subject: [PATCH 1616/1958] svga.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- Use standard notation for titles;
- Use the note mark;
- mark literal blocks;
- adjust identation;
- mark the table.

Acked-By: Martin Mares <mj@ucw.cz>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/svga.txt | 146 ++++++++++++++++++++++-------------------
 1 file changed, 80 insertions(+), 66 deletions(-)

diff --git a/Documentation/svga.txt b/Documentation/svga.txt
index cd66ec836e4f4..119f1515b1acb 100644
--- a/Documentation/svga.txt
+++ b/Documentation/svga.txt
@@ -1,24 +1,31 @@
-		       Video Mode Selection Support 2.13
-		    (c) 1995--1999 Martin Mares, <mj@ucw.cz>
---------------------------------------------------------------------------------
+.. include:: <isonum.txt>
 
-1. Intro
-~~~~~~~~
-   This small document describes the "Video Mode Selection" feature which
+=================================
+Video Mode Selection Support 2.13
+=================================
+
+:Copyright: |copy| 1995--1999 Martin Mares, <mj@ucw.cz>
+
+Intro
+~~~~~
+
+This small document describes the "Video Mode Selection" feature which
 allows the use of various special video modes supported by the video BIOS. Due
 to usage of the BIOS, the selection is limited to boot time (before the
 kernel decompression starts) and works only on 80X86 machines.
 
-   **  Short intro for the impatient: Just use vga=ask for the first time,
-   **  enter `scan' on the video mode prompt, pick the mode you want to use,
-   **  remember its mode ID (the four-digit hexadecimal number) and then
-   **  set the vga parameter to this number (converted to decimal first).
+.. note::
 
-   The video mode to be used is selected by a kernel parameter which can be
+   Short intro for the impatient: Just use vga=ask for the first time,
+   enter ``scan`` on the video mode prompt, pick the mode you want to use,
+   remember its mode ID (the four-digit hexadecimal number) and then
+   set the vga parameter to this number (converted to decimal first).
+
+The video mode to be used is selected by a kernel parameter which can be
 specified in the kernel Makefile (the SVGA_MODE=... line) or by the "vga=..."
 option of LILO (or some other boot loader you use) or by the "vidmode" utility
 (present in standard Linux utility packages). You can use the following values
-of this parameter:
+of this parameter::
 
    NORMAL_VGA - Standard 80x25 mode available on all display adapters.
 
@@ -37,77 +44,79 @@ of this parameter:
       for exact meaning of the ID). Warning: rdev and LILO don't support
       hexadecimal numbers -- you have to convert it to decimal manually.
 
-2. Menu
-~~~~~~~
-   The ASK_VGA mode causes the kernel to offer a video mode menu upon
+Menu
+~~~~
+
+The ASK_VGA mode causes the kernel to offer a video mode menu upon
 bootup. It displays a "Press <RETURN> to see video modes available, <SPACE>
 to continue or wait 30 secs" message. If you press <RETURN>, you enter the
 menu, if you press <SPACE> or wait 30 seconds, the kernel will boot up in
 the standard 80x25 mode.
 
-   The menu looks like:
+The menu looks like::
 
-Video adapter: <name-of-detected-video-adapter>
-Mode:    COLSxROWS:
-0  0F00  80x25
-1  0F01  80x50
-2  0F02  80x43
-3  0F03  80x26
-....
-Enter mode number or `scan': <flashing-cursor-here>
+	Video adapter: <name-of-detected-video-adapter>
+	Mode:    COLSxROWS:
+	0  0F00  80x25
+	1  0F01  80x50
+	2  0F02  80x43
+	3  0F03  80x26
+	....
+	Enter mode number or ``scan``: <flashing-cursor-here>
 
-   <name-of-detected-video-adapter> tells what video adapter did Linux detect
+<name-of-detected-video-adapter> tells what video adapter did Linux detect
 -- it's either a generic adapter name (MDA, CGA, HGC, EGA, VGA, VESA VGA [a VGA
 with VESA-compliant BIOS]) or a chipset name (e.g., Trident). Direct detection
 of chipsets is turned off by default (see CONFIG_VIDEO_SVGA in chapter 4 to see
 how to enable it if you really want) as it's inherently unreliable due to
 absolutely insane PC design.
 
-   "0  0F00  80x25" means that the first menu item (the menu items are numbered
+"0  0F00  80x25" means that the first menu item (the menu items are numbered
 from "0" to "9" and from "a" to "z") is a 80x25 mode with ID=0x0f00 (see the
 next section for a description of mode IDs).
 
-   <flashing-cursor-here> encourages you to enter the item number or mode ID
+<flashing-cursor-here> encourages you to enter the item number or mode ID
 you wish to set and press <RETURN>. If the computer complains something about
 "Unknown mode ID", it is trying to tell you that it isn't possible to set such
 a mode. It's also possible to press only <RETURN> which leaves the current mode.
 
-   The mode list usually contains a few basic modes and some VESA modes.  In
+The mode list usually contains a few basic modes and some VESA modes.  In
 case your chipset has been detected, some chipset-specific modes are shown as
 well (some of these might be missing or unusable on your machine as different
 BIOSes are often shipped with the same card and the mode numbers depend purely
 on the VGA BIOS).
 
-   The modes displayed on the menu are partially sorted: The list starts with
+The modes displayed on the menu are partially sorted: The list starts with
 the standard modes (80x25 and 80x50) followed by "special" modes (80x28 and
 80x43), local modes (if the local modes feature is enabled), VESA modes and
 finally SVGA modes for the auto-detected adapter.
 
-   If you are not happy with the mode list offered (e.g., if you think your card
+If you are not happy with the mode list offered (e.g., if you think your card
 is able to do more), you can enter "scan" instead of item number / mode ID.  The
 program will try to ask the BIOS for all possible video mode numbers and test
 what happens then. The screen will be probably flashing wildly for some time and
 strange noises will be heard from inside the monitor and so on and then, really
 all consistent video modes supported by your BIOS will appear (plus maybe some
-`ghost modes'). If you are afraid this could damage your monitor, don't use this
-function.
+``ghost modes``). If you are afraid this could damage your monitor, don't use
+this function.
 
-   After scanning, the mode ordering is a bit different: the auto-detected SVGA
-modes are not listed at all and the modes revealed by `scan' are shown before
+After scanning, the mode ordering is a bit different: the auto-detected SVGA
+modes are not listed at all and the modes revealed by ``scan`` are shown before
 all VESA modes.
 
-3. Mode IDs
-~~~~~~~~~~~
-   Because of the complexity of all the video stuff, the video mode IDs
+Mode IDs
+~~~~~~~~
+
+Because of the complexity of all the video stuff, the video mode IDs
 used here are also a bit complex. A video mode ID is a 16-bit number usually
 expressed in a hexadecimal notation (starting with "0x"). You can set a mode
 by entering its mode directly if you know it even if it isn't shown on the menu.
 
-The ID numbers can be divided to three regions:
+The ID numbers can be divided to those regions::
 
    0x0000 to 0x00ff - menu item references. 0x0000 is the first item. Don't use
 	outside the menu as this can change from boot to boot (especially if you
-	have used the `scan' feature).
+	have used the ``scan`` feature).
 
    0x0100 to 0x017f - standard BIOS modes. The ID is a BIOS video mode number
 	(as presented to INT 10, function 00) increased by 0x0100.
@@ -142,53 +151,54 @@ The ID numbers can be divided to three regions:
 	0xffff	equivalent to 0x0f00 (standard 80x25)
 	0xfffe	equivalent to 0x0f01 (EGA 80x43 or VGA 80x50)
 
-   If you add 0x8000 to the mode ID, the program will try to recalculate
+If you add 0x8000 to the mode ID, the program will try to recalculate
 vertical display timing according to mode parameters, which can be used to
 eliminate some annoying bugs of certain VGA BIOSes (usually those used for
 cards with S3 chipsets and old Cirrus Logic BIOSes) -- mainly extra lines at the
 end of the display.
 
-4. Options
-~~~~~~~~~~
-   Some options can be set in the source text (in arch/i386/boot/video.S).
+Options
+~~~~~~~
+
+Some options can be set in the source text (in arch/i386/boot/video.S).
 All of them are simple #define's -- change them to #undef's when you want to
 switch them off. Currently supported:
 
-   CONFIG_VIDEO_SVGA - enables autodetection of SVGA cards. This is switched
+CONFIG_VIDEO_SVGA - enables autodetection of SVGA cards. This is switched
 off by default as it's a bit unreliable due to terribly bad PC design. If you
-really want to have the adapter autodetected (maybe in case the `scan' feature
+really want to have the adapter autodetected (maybe in case the ``scan`` feature
 doesn't work on your machine), switch this on and don't cry if the results
 are not completely sane. In case you really need this feature, please drop me
 a mail as I think of removing it some day.
 
-   CONFIG_VIDEO_VESA - enables autodetection of VESA modes. If it doesn't work
+CONFIG_VIDEO_VESA - enables autodetection of VESA modes. If it doesn't work
 on your machine (or displays a "Error: Scanning of VESA modes failed" message),
 you can switch it off and report as a bug.
 
-   CONFIG_VIDEO_COMPACT - enables compacting of the video mode list. If there
+CONFIG_VIDEO_COMPACT - enables compacting of the video mode list. If there
 are more modes with the same screen size, only the first one is kept (see above
 for more info on mode ordering). However, in very strange cases it's possible
 that the first "version" of the mode doesn't work although some of the others
 do -- in this case turn this switch off to see the rest.
 
-   CONFIG_VIDEO_RETAIN - enables retaining of screen contents when switching
+CONFIG_VIDEO_RETAIN - enables retaining of screen contents when switching
 video modes. Works only with some boot loaders which leave enough room for the
 buffer. (If you have old LILO, you can adjust heap_end_ptr and loadflags
 in setup.S, but it's better to upgrade the boot loader...)
 
-   CONFIG_VIDEO_LOCAL - enables inclusion of "local modes" in the list. The
+CONFIG_VIDEO_LOCAL - enables inclusion of "local modes" in the list. The
 local modes are added automatically to the beginning of the list not depending
 on hardware configuration. The local modes are listed in the source text after
 the "local_mode_table:" line. The comment before this line describes the format
 of the table (which also includes a video card name to be displayed on the
 top of the menu).
 
-   CONFIG_VIDEO_400_HACK - force setting of 400 scan lines for standard VGA
+CONFIG_VIDEO_400_HACK - force setting of 400 scan lines for standard VGA
 modes. This option is intended to be used on certain buggy BIOSes which draw
 some useless logo using font download and then fail to reset the correct mode.
 Don't use unless needed as it forces resetting the video card.
 
-   CONFIG_VIDEO_GFX_HACK - includes special hack for setting of graphics modes
+CONFIG_VIDEO_GFX_HACK - includes special hack for setting of graphics modes
 to be used later by special drivers (e.g., 800x600 on IBM ThinkPad -- see
 ftp://ftp.phys.keio.ac.jp/pub/XFree86/800x600/XF86Configs/XF86Config.IBM_TP560).
 Allows to set _any_ BIOS mode including graphic ones and forcing specific
@@ -196,33 +206,36 @@ text screen resolution instead of peeking it from BIOS variables. Don't use
 unless you think you know what you're doing. To activate this setup, use
 mode number 0x0f08 (see section 3).
 
-5. Still doesn't work?
-~~~~~~~~~~~~~~~~~~~~~~
-   When the mode detection doesn't work (e.g., the mode list is incorrect or
+Still doesn't work?
+~~~~~~~~~~~~~~~~~~~
+
+When the mode detection doesn't work (e.g., the mode list is incorrect or
 the machine hangs instead of displaying the menu), try to switch off some of
 the configuration options listed in section 4. If it fails, you can still use
 your kernel with the video mode set directly via the kernel parameter.
 
-   In either case, please send me a bug report containing what _exactly_
+In either case, please send me a bug report containing what _exactly_
 happens and how do the configuration switches affect the behaviour of the bug.
 
-   If you start Linux from M$-DOS, you might also use some DOS tools for
+If you start Linux from M$-DOS, you might also use some DOS tools for
 video mode setting. In this case, you must specify the 0x0f04 mode ("leave
 current settings") to Linux, because if you don't and you use any non-standard
 mode, Linux will switch to 80x25 automatically.
 
-   If you set some extended mode and there's one or more extra lines on the
+If you set some extended mode and there's one or more extra lines on the
 bottom of the display containing already scrolled-out text, your VGA BIOS
 contains the most common video BIOS bug called "incorrect vertical display
 end setting". Adding 0x8000 to the mode ID might fix the problem. Unfortunately,
 this must be done manually -- no autodetection mechanisms are available.
 
-   If you have a VGA card and your display still looks as on EGA, your BIOS
+If you have a VGA card and your display still looks as on EGA, your BIOS
 is probably broken and you need to set the CONFIG_VIDEO_400_HACK switch to
 force setting of the correct mode.
 
-6. History
-~~~~~~~~~~
+History
+~~~~~~~
+
+=============== ================================================================
 1.0 (??-Nov-95)	First version supporting all adapters supported by the old
 		setup.S + Cirrus Logic 54XX. Present in some 1.3.4? kernels
 		and then removed due to instability on some machines.
@@ -260,17 +273,18 @@ force setting of the correct mode.
 		  original version written by hhanemaa@cs.ruu.nl, patched by
 		  Jeff Chua, rewritten by me).
 		- Screen store/restore fixed.
-2.8 (14-Apr-96)	- Previous release was not compilable without CONFIG_VIDEO_SVGA.
+2.8 (14-Apr-96) - Previous release was not compilable without CONFIG_VIDEO_SVGA.
 		- Better recognition of text modes during mode scan.
 2.9 (12-May-96)	- Ignored VESA modes 0x80 - 0xff (more VESA BIOS bugs!)
-2.10 (11-Nov-96)- The whole thing made optional.
+2.10(11-Nov-96) - The whole thing made optional.
 		- Added the CONFIG_VIDEO_400_HACK switch.
 		- Added the CONFIG_VIDEO_GFX_HACK switch.
 		- Code cleanup.
-2.11 (03-May-97)- Yet another cleanup, now including also the documentation.
-		- Direct testing of SVGA adapters turned off by default, `scan'
+2.11(03-May-97) - Yet another cleanup, now including also the documentation.
+		- Direct testing of SVGA adapters turned off by default, ``scan``
 		  offered explicitly on the prompt line.
 		- Removed the doc section describing adding of new probing
 		  functions as I try to get rid of _all_ hardware probing here.
-2.12 (25-May-98)- Added support for VESA frame buffer graphics.
-2.13 (14-May-99)- Minor documentation fixes.
+2.12(25-May-98) Added support for VESA frame buffer graphics.
+2.13(14-May-99) Minor documentation fixes.
+=============== ================================================================
-- 
GitLab


From 79ab3b0d21ea1ac48ce0e6b44997dd0a8c8f72e6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 09:10:48 -0300
Subject: [PATCH 1617/1958] this_cpu_ops.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- promote document title one level;
- mark literal blocks;
- move authorship to the beginning of the file and use markups.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/this_cpu_ops.txt | 49 +++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/Documentation/this_cpu_ops.txt b/Documentation/this_cpu_ops.txt
index 2cbf71975381d..5cb8b883ae832 100644
--- a/Documentation/this_cpu_ops.txt
+++ b/Documentation/this_cpu_ops.txt
@@ -1,5 +1,9 @@
+===================
 this_cpu operations
--------------------
+===================
+
+:Author: Christoph Lameter, August 4th, 2014
+:Author: Pranith Kumar, Aug 2nd, 2014
 
 this_cpu operations are a way of optimizing access to per cpu
 variables associated with the *currently* executing processor. This is
@@ -39,7 +43,7 @@ operations.
 
 The following this_cpu() operations with implied preemption protection
 are defined. These operations can be used without worrying about
-preemption and interrupts.
+preemption and interrupts::
 
 	this_cpu_read(pcp)
 	this_cpu_write(pcp, val)
@@ -67,14 +71,14 @@ to relocate a per cpu relative address to the proper per cpu area for
 the processor. So the relocation to the per cpu base is encoded in the
 instruction via a segment register prefix.
 
-For example:
+For example::
 
 	DEFINE_PER_CPU(int, x);
 	int z;
 
 	z = this_cpu_read(x);
 
-results in a single instruction
+results in a single instruction::
 
 	mov ax, gs:[x]
 
@@ -84,16 +88,16 @@ this_cpu_ops such sequence also required preempt disable/enable to
 prevent the kernel from moving the thread to a different processor
 while the calculation is performed.
 
-Consider the following this_cpu operation:
+Consider the following this_cpu operation::
 
 	this_cpu_inc(x)
 
-The above results in the following single instruction (no lock prefix!)
+The above results in the following single instruction (no lock prefix!)::
 
 	inc gs:[x]
 
 instead of the following operations required if there is no segment
-register:
+register::
 
 	int *y;
 	int cpu;
@@ -121,8 +125,10 @@ has to be paid for this optimization is the need to add up the per cpu
 counters when the value of a counter is needed.
 
 
-Special operations:
--------------------
+Special operations
+------------------
+
+::
 
 	y = this_cpu_ptr(&x)
 
@@ -153,11 +159,15 @@ Therefore the use of x or &x outside of the context of per cpu
 operations is invalid and will generally be treated like a NULL
 pointer dereference.
 
+::
+
 	DEFINE_PER_CPU(int, x);
 
 In the context of per cpu operations the above implies that x is a per
 cpu variable. Most this_cpu operations take a cpu variable.
 
+::
+
 	int __percpu *p = &x;
 
 &x and hence p is the *offset* of a per cpu variable. this_cpu_ptr()
@@ -168,7 +178,7 @@ strange.
 Operations on a field of a per cpu structure
 --------------------------------------------
 
-Let's say we have a percpu structure
+Let's say we have a percpu structure::
 
 	struct s {
 		int n,m;
@@ -177,14 +187,14 @@ Let's say we have a percpu structure
 	DEFINE_PER_CPU(struct s, p);
 
 
-Operations on these fields are straightforward
+Operations on these fields are straightforward::
 
 	this_cpu_inc(p.m)
 
 	z = this_cpu_cmpxchg(p.m, 0, 1);
 
 
-If we have an offset to struct s:
+If we have an offset to struct s::
 
 	struct s __percpu *ps = &p;
 
@@ -194,7 +204,7 @@ If we have an offset to struct s:
 
 
 The calculation of the pointer may require the use of this_cpu_ptr()
-if we do not make use of this_cpu ops later to manipulate fields:
+if we do not make use of this_cpu ops later to manipulate fields::
 
 	struct s *pp;
 
@@ -206,7 +216,7 @@ if we do not make use of this_cpu ops later to manipulate fields:
 
 
 Variants of this_cpu ops
--------------------------
+------------------------
 
 this_cpu ops are interrupt safe. Some architectures do not support
 these per cpu local operations. In that case the operation must be
@@ -222,7 +232,7 @@ preemption. If a per cpu variable is not used in an interrupt context
 and the scheduler cannot preempt, then they are safe. If any interrupts
 still occur while an operation is in progress and if the interrupt too
 modifies the variable, then RMW actions can not be guaranteed to be
-safe.
+safe::
 
 	__this_cpu_read(pcp)
 	__this_cpu_write(pcp, val)
@@ -279,7 +289,7 @@ unless absolutely necessary. Please consider using an IPI to wake up
 the remote CPU and perform the update to its per cpu area.
 
 To access per-cpu data structure remotely, typically the per_cpu_ptr()
-function is used:
+function is used::
 
 
 	DEFINE_PER_CPU(struct data, datap);
@@ -289,7 +299,7 @@ function is used:
 This makes it explicit that we are getting ready to access a percpu
 area remotely.
 
-You can also do the following to convert the datap offset to an address
+You can also do the following to convert the datap offset to an address::
 
 	struct data *p = this_cpu_ptr(&datap);
 
@@ -305,7 +315,7 @@ the following scenario that occurs because two per cpu variables
 share a cache-line but the relaxed synchronization is applied to
 only one process updating the cache-line.
 
-Consider the following example
+Consider the following example::
 
 
 	struct test {
@@ -327,6 +337,3 @@ mind that a remote write will evict the cache line from the processor
 that most likely will access it. If the processor wakes up and finds a
 missing local cache line of a per cpu area, its performance and hence
 the wake up times will be affected.
-
-Christoph Lameter, August 4th, 2014
-Pranith Kumar, Aug 2nd, 2014
-- 
GitLab


From c6ebaf6bdb10469910afdcfc6af5c26846883e98 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 09:16:19 -0300
Subject: [PATCH 1618/1958] unaligned-memory-access.txt: standardize document
 format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- promote document title one level;
- use markups for authorship and put it at the beginning;
- mark literal blocks;
- adjust identation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/unaligned-memory-access.txt | 57 ++++++++++++-----------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
index 3f76c0c379206..51b4ff0315865 100644
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/unaligned-memory-access.txt
@@ -1,6 +1,15 @@
+=========================
 UNALIGNED MEMORY ACCESSES
 =========================
 
+:Author: Daniel Drake <dsd@gentoo.org>,
+:Author: Johannes Berg <johannes@sipsolutions.net>
+
+:With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
+  Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
+  Vadim Lobanov
+
+
 Linux runs on a wide variety of architectures which have varying behaviour
 when it comes to memory access. This document presents some details about
 unaligned accesses, why you need to write code that doesn't cause them,
@@ -73,7 +82,7 @@ memory addresses of certain variables, etc.
 
 Fortunately things are not too complex, as in most cases, the compiler
 ensures that things will work for you. For example, take the following
-structure:
+structure::
 
 	struct foo {
 		u16 field1;
@@ -106,7 +115,7 @@ On a related topic, with the above considerations in mind you may observe
 that you could reorder the fields in the structure in order to place fields
 where padding would otherwise be inserted, and hence reduce the overall
 resident memory size of structure instances. The optimal layout of the
-above example is:
+above example is::
 
 	struct foo {
 		u32 field2;
@@ -139,21 +148,21 @@ Code that causes unaligned access
 With the above in mind, let's move onto a real life example of a function
 that can cause an unaligned memory access. The following function taken
 from include/linux/etherdevice.h is an optimized routine to compare two
-ethernet MAC addresses for equality.
+ethernet MAC addresses for equality::
 
-bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
-{
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+  bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
+  {
+  #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 	u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
 		   ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
 
 	return fold == 0;
-#else
+  #else
 	const u16 *a = (const u16 *)addr1;
 	const u16 *b = (const u16 *)addr2;
 	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
-#endif
-}
+  #endif
+  }
 
 In the above function, when the hardware has efficient unaligned access
 capability, there is no issue with this code.  But when the hardware isn't
@@ -171,7 +180,8 @@ as it is a decent optimization for the cases when you can ensure alignment,
 which is true almost all of the time in ethernet networking context.
 
 
-Here is another example of some code that could cause unaligned accesses:
+Here is another example of some code that could cause unaligned accesses::
+
 	void myfunc(u8 *data, u32 value)
 	{
 		[...]
@@ -184,6 +194,7 @@ to an address that is not evenly divisible by 4.
 
 In summary, the 2 main scenarios where you may run into unaligned access
 problems involve:
+
  1. Casting variables to types of different lengths
  2. Pointer arithmetic followed by access to at least 2 bytes of data
 
@@ -195,7 +206,7 @@ The easiest way to avoid unaligned access is to use the get_unaligned() and
 put_unaligned() macros provided by the <asm/unaligned.h> header file.
 
 Going back to an earlier example of code that potentially causes unaligned
-access:
+access::
 
 	void myfunc(u8 *data, u32 value)
 	{
@@ -204,7 +215,7 @@ access:
 		[...]
 	}
 
-To avoid the unaligned memory access, you would rewrite it as follows:
+To avoid the unaligned memory access, you would rewrite it as follows::
 
 	void myfunc(u8 *data, u32 value)
 	{
@@ -215,7 +226,7 @@ To avoid the unaligned memory access, you would rewrite it as follows:
 	}
 
 The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
-memory and you wish to avoid unaligned access, its usage is as follows:
+memory and you wish to avoid unaligned access, its usage is as follows::
 
 	u32 value = get_unaligned((u32 *) data);
 
@@ -245,18 +256,10 @@ For some ethernet hardware that cannot DMA to unaligned addresses like
 4*n+2 or non-ethernet hardware, this can be a problem, and it is then
 required to copy the incoming frame into an aligned buffer. Because this is
 unnecessary on architectures that can do unaligned accesses, the code can be
-made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so:
-
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	skb = original skb
-#else
-	skb = copy skb
-#endif
-
---
-Authors: Daniel Drake <dsd@gentoo.org>,
-         Johannes Berg <johannes@sipsolutions.net>
-With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
-Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
-Vadim Lobanov
+made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so::
 
+	#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+		skb = original skb
+	#else
+		skb = copy skb
+	#endif
-- 
GitLab


From 2a26ed8e4afff2bb48c044dc3ad69da19d66debf Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 09:26:06 -0300
Subject: [PATCH 1619/1958] vfio-mediated-device.txt: standardize document
 format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

In this specific document, the title, copyright and authorship
are added as if it were a C file!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- convert document preambule to the proper format;
- mark literal blocks;
- adjust identation;
- use numbered lists for references.

Reviewed by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vfio-mediated-device.txt | 266 +++++++++++++------------
 1 file changed, 137 insertions(+), 129 deletions(-)

diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt
index e5e57b40f8af7..1b39503465320 100644
--- a/Documentation/vfio-mediated-device.txt
+++ b/Documentation/vfio-mediated-device.txt
@@ -1,14 +1,17 @@
-/*
- * VFIO Mediated devices
- *
- * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
- *     Author: Neo Jia <cjia@nvidia.com>
- *             Kirti Wankhede <kwankhede@nvidia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+.. include:: <isonum.txt>
+
+=====================
+VFIO Mediated devices
+=====================
+
+:Copyright: |copy| 2016, NVIDIA CORPORATION. All rights reserved.
+:Author: Neo Jia <cjia@nvidia.com>
+:Author: Kirti Wankhede <kwankhede@nvidia.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License version 2 as
+published by the Free Software Foundation.
+
 
 Virtual Function I/O (VFIO) Mediated devices[1]
 ===============================================
@@ -42,7 +45,7 @@ removes it from a VFIO group.
 
 The following high-level block diagram shows the main components and interfaces
 in the VFIO mediated driver framework. The diagram shows NVIDIA, Intel, and IBM
-devices as examples, as these devices are the first devices to use this module.
+devices as examples, as these devices are the first devices to use this module::
 
      +---------------+
      |               |
@@ -91,7 +94,7 @@ Registration Interface for a Mediated Bus Driver
 ------------------------------------------------
 
 The registration interface for a mediated bus driver provides the following
-structure to represent a mediated device's driver:
+structure to represent a mediated device's driver::
 
      /*
       * struct mdev_driver [2] - Mediated device's driver
@@ -110,14 +113,14 @@ structure to represent a mediated device's driver:
 A mediated bus driver for mdev should use this structure in the function calls
 to register and unregister itself with the core driver:
 
-* Register:
+* Register::
 
-  extern int  mdev_register_driver(struct mdev_driver *drv,
+    extern int  mdev_register_driver(struct mdev_driver *drv,
 				   struct module *owner);
 
-* Unregister:
+* Unregister::
 
-  extern void mdev_unregister_driver(struct mdev_driver *drv);
+    extern void mdev_unregister_driver(struct mdev_driver *drv);
 
 The mediated bus driver is responsible for adding mediated devices to the VFIO
 group when devices are bound to the driver and removing mediated devices from
@@ -152,15 +155,15 @@ The callbacks in the mdev_parent_ops structure are as follows:
 * mmap: mmap emulation callback
 
 A driver should use the mdev_parent_ops structure in the function call to
-register itself with the mdev core driver:
+register itself with the mdev core driver::
 
-extern int  mdev_register_device(struct device *dev,
-                                 const struct mdev_parent_ops *ops);
+	extern int  mdev_register_device(struct device *dev,
+	                                 const struct mdev_parent_ops *ops);
 
 However, the mdev_parent_ops structure is not required in the function call
-that a driver should use to unregister itself with the mdev core driver:
+that a driver should use to unregister itself with the mdev core driver::
 
-extern void mdev_unregister_device(struct device *dev);
+	extern void mdev_unregister_device(struct device *dev);
 
 
 Mediated Device Management Interface Through sysfs
@@ -183,30 +186,32 @@ with the mdev core driver.
 Directories and files under the sysfs for Each Physical Device
 --------------------------------------------------------------
 
-|- [parent physical device]
-|--- Vendor-specific-attributes [optional]
-|--- [mdev_supported_types]
-|     |--- [<type-id>]
-|     |   |--- create
-|     |   |--- name
-|     |   |--- available_instances
-|     |   |--- device_api
-|     |   |--- description
-|     |   |--- [devices]
-|     |--- [<type-id>]
-|     |   |--- create
-|     |   |--- name
-|     |   |--- available_instances
-|     |   |--- device_api
-|     |   |--- description
-|     |   |--- [devices]
-|     |--- [<type-id>]
-|          |--- create
-|          |--- name
-|          |--- available_instances
-|          |--- device_api
-|          |--- description
-|          |--- [devices]
+::
+
+  |- [parent physical device]
+  |--- Vendor-specific-attributes [optional]
+  |--- [mdev_supported_types]
+  |     |--- [<type-id>]
+  |     |   |--- create
+  |     |   |--- name
+  |     |   |--- available_instances
+  |     |   |--- device_api
+  |     |   |--- description
+  |     |   |--- [devices]
+  |     |--- [<type-id>]
+  |     |   |--- create
+  |     |   |--- name
+  |     |   |--- available_instances
+  |     |   |--- device_api
+  |     |   |--- description
+  |     |   |--- [devices]
+  |     |--- [<type-id>]
+  |          |--- create
+  |          |--- name
+  |          |--- available_instances
+  |          |--- device_api
+  |          |--- description
+  |          |--- [devices]
 
 * [mdev_supported_types]
 
@@ -219,12 +224,12 @@ Directories and files under the sysfs for Each Physical Device
 
   The [<type-id>] name is created by adding the device driver string as a prefix
   to the string provided by the vendor driver. This format of this name is as
-  follows:
+  follows::
 
 	sprintf(buf, "%s-%s", dev_driver_string(parent->dev), group->name);
 
   (or using mdev_parent_dev(mdev) to arrive at the parent device outside
-   of the core mdev code)
+  of the core mdev code)
 
 * device_api
 
@@ -239,7 +244,7 @@ Directories and files under the sysfs for Each Physical Device
 * [device]
 
   This directory contains links to the devices of type <type-id> that have been
-created.
+  created.
 
 * name
 
@@ -253,21 +258,25 @@ created.
 Directories and Files Under the sysfs for Each mdev Device
 ----------------------------------------------------------
 
-|- [parent phy device]
-|--- [$MDEV_UUID]
+::
+
+  |- [parent phy device]
+  |--- [$MDEV_UUID]
          |--- remove
          |--- mdev_type {link to its type}
          |--- vendor-specific-attributes [optional]
 
 * remove (write only)
+
 Writing '1' to the 'remove' file destroys the mdev device. The vendor driver can
 fail the remove() callback if that device is active and the vendor driver
 doesn't support hot unplug.
 
-Example:
+Example::
+
 	# echo 1 > /sys/bus/mdev/devices/$mdev_UUID/remove
 
-Mediated device Hot plug:
+Mediated device Hot plug
 ------------------------
 
 Mediated devices can be created and assigned at runtime. The procedure to hot
@@ -277,13 +286,13 @@ Translation APIs for Mediated Devices
 =====================================
 
 The following APIs are provided for translating user pfn to host pfn in a VFIO
-driver:
+driver::
 
-extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
-                          int npage, int prot, unsigned long *phys_pfn);
+	extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
+				  int npage, int prot, unsigned long *phys_pfn);
 
-extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
-                            int npage);
+	extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
+				    int npage);
 
 These functions call back into the back-end IOMMU module by using the pin_pages
 and unpin_pages callbacks of the struct vfio_iommu_driver_ops[4]. Currently
@@ -304,81 +313,80 @@ card.
 
    This step creates a dummy device, /sys/devices/virtual/mtty/mtty/
 
-   Files in this device directory in sysfs are similar to the following:
-
-   # tree /sys/devices/virtual/mtty/mtty/
-      /sys/devices/virtual/mtty/mtty/
-      |-- mdev_supported_types
-      |   |-- mtty-1
-      |   |   |-- available_instances
-      |   |   |-- create
-      |   |   |-- device_api
-      |   |   |-- devices
-      |   |   `-- name
-      |   `-- mtty-2
-      |       |-- available_instances
-      |       |-- create
-      |       |-- device_api
-      |       |-- devices
-      |       `-- name
-      |-- mtty_dev
-      |   `-- sample_mtty_dev
-      |-- power
-      |   |-- autosuspend_delay_ms
-      |   |-- control
-      |   |-- runtime_active_time
-      |   |-- runtime_status
-      |   `-- runtime_suspended_time
-      |-- subsystem -> ../../../../class/mtty
-      `-- uevent
+   Files in this device directory in sysfs are similar to the following::
+
+     # tree /sys/devices/virtual/mtty/mtty/
+        /sys/devices/virtual/mtty/mtty/
+        |-- mdev_supported_types
+        |   |-- mtty-1
+        |   |   |-- available_instances
+        |   |   |-- create
+        |   |   |-- device_api
+        |   |   |-- devices
+        |   |   `-- name
+        |   `-- mtty-2
+        |       |-- available_instances
+        |       |-- create
+        |       |-- device_api
+        |       |-- devices
+        |       `-- name
+        |-- mtty_dev
+        |   `-- sample_mtty_dev
+        |-- power
+        |   |-- autosuspend_delay_ms
+        |   |-- control
+        |   |-- runtime_active_time
+        |   |-- runtime_status
+        |   `-- runtime_suspended_time
+        |-- subsystem -> ../../../../class/mtty
+        `-- uevent
 
 2. Create a mediated device by using the dummy device that you created in the
-   previous step.
+   previous step::
 
-   # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" >	\
+     # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" >	\
               /sys/devices/virtual/mtty/mtty/mdev_supported_types/mtty-2/create
 
-3. Add parameters to qemu-kvm.
+3. Add parameters to qemu-kvm::
 
-   -device vfio-pci,\
-    sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
+     -device vfio-pci,\
+      sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
 
 4. Boot the VM.
 
    In the Linux guest VM, with no hardware on the host, the device appears
-   as  follows:
-
-   # lspci -s 00:05.0 -xxvv
-   00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])
-           Subsystem: Device 4348:3253
-           Physical Slot: 5
-           Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
-   Stepping- SERR- FastB2B- DisINTx-
-           Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort-
-   <TAbort- <MAbort- >SERR- <PERR- INTx-
-           Interrupt: pin A routed to IRQ 10
-           Region 0: I/O ports at c150 [size=8]
-           Region 1: I/O ports at c158 [size=8]
-           Kernel driver in use: serial
-   00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00
-   10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00
-   20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32
-   30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00
-
-   In the Linux guest VM, dmesg output for the device is as follows:
-
-   serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ
-10
-   0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A
-   0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A
-
-
-5. In the Linux guest VM, check the serial ports.
-
-   # setserial -g /dev/ttyS*
-   /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4
-   /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
-   /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
+   as  follows::
+
+     # lspci -s 00:05.0 -xxvv
+     00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])
+             Subsystem: Device 4348:3253
+             Physical Slot: 5
+             Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
+     Stepping- SERR- FastB2B- DisINTx-
+             Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort-
+     <TAbort- <MAbort- >SERR- <PERR- INTx-
+             Interrupt: pin A routed to IRQ 10
+             Region 0: I/O ports at c150 [size=8]
+             Region 1: I/O ports at c158 [size=8]
+             Kernel driver in use: serial
+     00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00
+     10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00
+     20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32
+     30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00
+
+     In the Linux guest VM, dmesg output for the device is as follows:
+
+     serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10
+     0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A
+     0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A
+
+
+5. In the Linux guest VM, check the serial ports::
+
+     # setserial -g /dev/ttyS*
+     /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4
+     /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
+     /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
 
 6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or
    /dev/ttyS2 with hardware flow control disabled.
@@ -388,14 +396,14 @@ card.
 
    Data is loop backed from hosts mtty driver.
 
-8. Destroy the mediated device that you created.
+8. Destroy the mediated device that you created::
 
-   # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove
+     # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove
 
 References
 ==========
 
-[1] See Documentation/vfio.txt for more information on VFIO.
-[2] struct mdev_driver in include/linux/mdev.h
-[3] struct mdev_parent_ops in include/linux/mdev.h
-[4] struct vfio_iommu_driver_ops in include/linux/vfio.h
+1. See Documentation/vfio.txt for more information on VFIO.
+2. struct mdev_driver in include/linux/mdev.h
+3. struct mdev_parent_ops in include/linux/mdev.h
+4. struct vfio_iommu_driver_ops in include/linux/vfio.h
-- 
GitLab


From c6f4d41338a78bcc3ddcc4e00f5de63c8ee2ad20 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 09:38:00 -0300
Subject: [PATCH 1620/1958] vfio.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- adjust title marks;
- use footnote marks;
- mark literal blocks;
- adjust identation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vfio.txt | 281 +++++++++++++++++++++--------------------
 1 file changed, 144 insertions(+), 137 deletions(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 1dd3fddfd3a1e..ef6a5111eaa1c 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -1,5 +1,7 @@
-VFIO - "Virtual Function I/O"[1]
--------------------------------------------------------------------------------
+==================================
+VFIO - "Virtual Function I/O" [1]_
+==================================
+
 Many modern system now provide DMA and interrupt remapping facilities
 to help ensure I/O devices behave within the boundaries they've been
 allotted.  This includes x86 hardware with AMD-Vi and Intel VT-d,
@@ -7,14 +9,14 @@ POWER systems with Partitionable Endpoints (PEs) and embedded PowerPC
 systems such as Freescale PAMU.  The VFIO driver is an IOMMU/device
 agnostic framework for exposing direct device access to userspace, in
 a secure, IOMMU protected environment.  In other words, this allows
-safe[2], non-privileged, userspace drivers.
+safe [2]_, non-privileged, userspace drivers.
 
 Why do we want that?  Virtual machines often make use of direct device
 access ("device assignment") when configured for the highest possible
 I/O performance.  From a device and host perspective, this simply
 turns the VM into a userspace driver, with the benefits of
 significantly reduced latency, higher bandwidth, and direct use of
-bare-metal device drivers[3].
+bare-metal device drivers [3]_.
 
 Some applications, particularly in the high performance computing
 field, also benefit from low-overhead, direct device access from
@@ -31,7 +33,7 @@ KVM PCI specific device assignment code as well as provide a more
 secure, more featureful userspace driver environment than UIO.
 
 Groups, Devices, and IOMMUs
--------------------------------------------------------------------------------
+---------------------------
 
 Devices are the main target of any I/O driver.  Devices typically
 create a programming interface made up of I/O access, interrupts,
@@ -114,40 +116,40 @@ well as mechanisms for describing and registering interrupt
 notifications.
 
 VFIO Usage Example
--------------------------------------------------------------------------------
+------------------
 
-Assume user wants to access PCI device 0000:06:0d.0
+Assume user wants to access PCI device 0000:06:0d.0::
 
-$ readlink /sys/bus/pci/devices/0000:06:0d.0/iommu_group
-../../../../kernel/iommu_groups/26
+	$ readlink /sys/bus/pci/devices/0000:06:0d.0/iommu_group
+	../../../../kernel/iommu_groups/26
 
 This device is therefore in IOMMU group 26.  This device is on the
 pci bus, therefore the user will make use of vfio-pci to manage the
-group:
+group::
 
-# modprobe vfio-pci
+	# modprobe vfio-pci
 
 Binding this device to the vfio-pci driver creates the VFIO group
-character devices for this group:
+character devices for this group::
 
-$ lspci -n -s 0000:06:0d.0
-06:0d.0 0401: 1102:0002 (rev 08)
-# echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind
-# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id
+	$ lspci -n -s 0000:06:0d.0
+	06:0d.0 0401: 1102:0002 (rev 08)
+	# echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind
+	# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id
 
 Now we need to look at what other devices are in the group to free
-it for use by VFIO:
-
-$ ls -l /sys/bus/pci/devices/0000:06:0d.0/iommu_group/devices
-total 0
-lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:00:1e.0 ->
-	../../../../devices/pci0000:00/0000:00:1e.0
-lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.0 ->
-	../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.0
-lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.1 ->
-	../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.1
-
-This device is behind a PCIe-to-PCI bridge[4], therefore we also
+it for use by VFIO::
+
+	$ ls -l /sys/bus/pci/devices/0000:06:0d.0/iommu_group/devices
+	total 0
+	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:00:1e.0 ->
+		../../../../devices/pci0000:00/0000:00:1e.0
+	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.0 ->
+		../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.0
+	lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.1 ->
+		../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.1
+
+This device is behind a PCIe-to-PCI bridge [4]_, therefore we also
 need to add device 0000:06:0d.1 to the group following the same
 procedure as above.  Device 0000:00:1e.0 is a bridge that does
 not currently have a host driver, therefore it's not required to
@@ -157,12 +159,12 @@ support PCI bridges).
 The final step is to provide the user with access to the group if
 unprivileged operation is desired (note that /dev/vfio/vfio provides
 no capabilities on its own and is therefore expected to be set to
-mode 0666 by the system).
+mode 0666 by the system)::
 
-# chown user:user /dev/vfio/26
+	# chown user:user /dev/vfio/26
 
 The user now has full access to all the devices and the iommu for this
-group and can access them as follows:
+group and can access them as follows::
 
 	int container, group, device, i;
 	struct vfio_group_status group_status =
@@ -248,31 +250,31 @@ VFIO bus driver API
 VFIO bus drivers, such as vfio-pci make use of only a few interfaces
 into VFIO core.  When devices are bound and unbound to the driver,
 the driver should call vfio_add_group_dev() and vfio_del_group_dev()
-respectively:
+respectively::
 
-extern int vfio_add_group_dev(struct iommu_group *iommu_group,
-                              struct device *dev,
-                              const struct vfio_device_ops *ops,
-                              void *device_data);
+	extern int vfio_add_group_dev(struct iommu_group *iommu_group,
+	                              struct device *dev,
+				      const struct vfio_device_ops *ops,
+				      void *device_data);
 
-extern void *vfio_del_group_dev(struct device *dev);
+	extern void *vfio_del_group_dev(struct device *dev);
 
 vfio_add_group_dev() indicates to the core to begin tracking the
 specified iommu_group and register the specified dev as owned by
 a VFIO bus driver.  The driver provides an ops structure for callbacks
-similar to a file operations structure:
-
-struct vfio_device_ops {
-	int	(*open)(void *device_data);
-	void	(*release)(void *device_data);
-	ssize_t	(*read)(void *device_data, char __user *buf,
-			size_t count, loff_t *ppos);
-	ssize_t	(*write)(void *device_data, const char __user *buf,
-			 size_t size, loff_t *ppos);
-	long	(*ioctl)(void *device_data, unsigned int cmd,
-			 unsigned long arg);
-	int	(*mmap)(void *device_data, struct vm_area_struct *vma);
-};
+similar to a file operations structure::
+
+	struct vfio_device_ops {
+		int	(*open)(void *device_data);
+		void	(*release)(void *device_data);
+		ssize_t	(*read)(void *device_data, char __user *buf,
+				size_t count, loff_t *ppos);
+		ssize_t	(*write)(void *device_data, const char __user *buf,
+				 size_t size, loff_t *ppos);
+		long	(*ioctl)(void *device_data, unsigned int cmd,
+				 unsigned long arg);
+		int	(*mmap)(void *device_data, struct vm_area_struct *vma);
+	};
 
 Each function is passed the device_data that was originally registered
 in the vfio_add_group_dev() call above.  This allows the bus driver
@@ -285,50 +287,55 @@ own VFIO_DEVICE_GET_REGION_INFO ioctl.
 
 
 PPC64 sPAPR implementation note
--------------------------------------------------------------------------------
+-------------------------------
 
 This implementation has some specifics:
 
 1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per
-container is supported as an IOMMU table is allocated at the boot time,
-one table per a IOMMU group which is a Partitionable Endpoint (PE)
-(PE is often a PCI domain but not always).
-Newer systems (POWER8 with IODA2) have improved hardware design which allows
-to remove this limitation and have multiple IOMMU groups per a VFIO container.
+   container is supported as an IOMMU table is allocated at the boot time,
+   one table per a IOMMU group which is a Partitionable Endpoint (PE)
+   (PE is often a PCI domain but not always).
+
+   Newer systems (POWER8 with IODA2) have improved hardware design which allows
+   to remove this limitation and have multiple IOMMU groups per a VFIO
+   container.
 
 2) The hardware supports so called DMA windows - the PCI address range
-within which DMA transfer is allowed, any attempt to access address space
-out of the window leads to the whole PE isolation.
+   within which DMA transfer is allowed, any attempt to access address space
+   out of the window leads to the whole PE isolation.
 
 3) PPC64 guests are paravirtualized but not fully emulated. There is an API
-to map/unmap pages for DMA, and it normally maps 1..32 pages per call and
-currently there is no way to reduce the number of calls. In order to make things
-faster, the map/unmap handling has been implemented in real mode which provides
-an excellent performance which has limitations such as inability to do
-locked pages accounting in real time.
+   to map/unmap pages for DMA, and it normally maps 1..32 pages per call and
+   currently there is no way to reduce the number of calls. In order to make
+   things faster, the map/unmap handling has been implemented in real mode
+   which provides an excellent performance which has limitations such as
+   inability to do locked pages accounting in real time.
 
 4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
-subtree that can be treated as a unit for the purposes of partitioning and
-error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
-function of a multi-function IOA, or multiple IOAs (possibly including switch
-and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
-and recover from them via EEH RTAS services, which works on the basis of
-additional ioctl commands.
+   subtree that can be treated as a unit for the purposes of partitioning and
+   error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+   function of a multi-function IOA, or multiple IOAs (possibly including
+   switch and bridge structures above the multiple IOAs). PPC64 guests detect
+   PCI errors and recover from them via EEH RTAS services, which works on the
+   basis of additional ioctl commands.
 
-So 4 additional ioctls have been added:
+   So 4 additional ioctls have been added:
 
-	VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
-		of the DMA window on the PCI bus.
+	VFIO_IOMMU_SPAPR_TCE_GET_INFO
+		returns the size and the start of the DMA window on the PCI bus.
 
-	VFIO_IOMMU_ENABLE - enables the container. The locked pages accounting
+	VFIO_IOMMU_ENABLE
+		enables the container. The locked pages accounting
 		is done at this point. This lets user first to know what
 		the DMA window is and adjust rlimit before doing any real job.
 
-	VFIO_IOMMU_DISABLE - disables the container.
+	VFIO_IOMMU_DISABLE
+		disables the container.
 
-	VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
+	VFIO_EEH_PE_OP
+		provides an API for EEH setup, error detection and recovery.
 
-The code flow from the example above should be slightly changed:
+   The code flow from the example above should be slightly changed::
 
 	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
 
@@ -442,73 +449,73 @@ The code flow from the example above should be slightly changed:
 	....
 
 5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/
-VFIO_IOMMU_DISABLE and implements 2 new ioctls:
-VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY
-(which are unsupported in v1 IOMMU).
+   VFIO_IOMMU_DISABLE and implements 2 new ioctls:
+   VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY
+   (which are unsupported in v1 IOMMU).
 
-PPC64 paravirtualized guests generate a lot of map/unmap requests,
-and the handling of those includes pinning/unpinning pages and updating
-mm::locked_vm counter to make sure we do not exceed the rlimit.
-The v2 IOMMU splits accounting and pinning into separate operations:
+   PPC64 paravirtualized guests generate a lot of map/unmap requests,
+   and the handling of those includes pinning/unpinning pages and updating
+   mm::locked_vm counter to make sure we do not exceed the rlimit.
+   The v2 IOMMU splits accounting and pinning into separate operations:
 
-- VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls
-receive a user space address and size of the block to be pinned.
-Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to
-be called with the exact address and size used for registering
-the memory block. The userspace is not expected to call these often.
-The ranges are stored in a linked list in a VFIO container.
+   - VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls
+     receive a user space address and size of the block to be pinned.
+     Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to
+     be called with the exact address and size used for registering
+     the memory block. The userspace is not expected to call these often.
+     The ranges are stored in a linked list in a VFIO container.
 
-- VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual
-IOMMU table and do not do pinning; instead these check that the userspace
-address is from pre-registered range.
+   - VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual
+     IOMMU table and do not do pinning; instead these check that the userspace
+     address is from pre-registered range.
 
-This separation helps in optimizing DMA for guests.
+   This separation helps in optimizing DMA for guests.
 
 6) sPAPR specification allows guests to have an additional DMA window(s) on
-a PCI bus with a variable page size. Two ioctls have been added to support
-this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
-The platform has to support the functionality or error will be returned to
-the userspace. The existing hardware supports up to 2 DMA windows, one is
-2GB long, uses 4K pages and called "default 32bit window"; the other can
-be as big as entire RAM, use different page size, it is optional - guests
-create those in run-time if the guest driver supports 64bit DMA.
-
-VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
-a number of TCE table levels (if a TCE table is going to be big enough and
-the kernel may not be able to allocate enough of physically contiguous memory).
-It creates a new window in the available slot and returns the bus address where
-the new window starts. Due to hardware limitation, the user space cannot choose
-the location of DMA windows.
-
-VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
-and removes it.
+   a PCI bus with a variable page size. Two ioctls have been added to support
+   this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
+   The platform has to support the functionality or error will be returned to
+   the userspace. The existing hardware supports up to 2 DMA windows, one is
+   2GB long, uses 4K pages and called "default 32bit window"; the other can
+   be as big as entire RAM, use different page size, it is optional - guests
+   create those in run-time if the guest driver supports 64bit DMA.
+
+   VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
+   a number of TCE table levels (if a TCE table is going to be big enough and
+   the kernel may not be able to allocate enough of physically contiguous
+   memory). It creates a new window in the available slot and returns the bus
+   address where the new window starts. Due to hardware limitation, the user
+   space cannot choose the location of DMA windows.
+
+   VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
+   and removes it.
 
 -------------------------------------------------------------------------------
 
-[1] VFIO was originally an acronym for "Virtual Function I/O" in its
-initial implementation by Tom Lyon while as Cisco.  We've since
-outgrown the acronym, but it's catchy.
-
-[2] "safe" also depends upon a device being "well behaved".  It's
-possible for multi-function devices to have backdoors between
-functions and even for single function devices to have alternative
-access to things like PCI config space through MMIO registers.  To
-guard against the former we can include additional precautions in the
-IOMMU driver to group multi-function PCI devices together
-(iommu=group_mf).  The latter we can't prevent, but the IOMMU should
-still provide isolation.  For PCI, SR-IOV Virtual Functions are the
-best indicator of "well behaved", as these are designed for
-virtualization usage models.
-
-[3] As always there are trade-offs to virtual machine device
-assignment that are beyond the scope of VFIO.  It's expected that
-future IOMMU technologies will reduce some, but maybe not all, of
-these trade-offs.
-
-[4] In this case the device is below a PCI bridge, so transactions
-from either function of the device are indistinguishable to the iommu:
-
--[0000:00]-+-1e.0-[06]--+-0d.0
-                        \-0d.1
-
-00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 90)
+.. [1] VFIO was originally an acronym for "Virtual Function I/O" in its
+   initial implementation by Tom Lyon while as Cisco.  We've since
+   outgrown the acronym, but it's catchy.
+
+.. [2] "safe" also depends upon a device being "well behaved".  It's
+   possible for multi-function devices to have backdoors between
+   functions and even for single function devices to have alternative
+   access to things like PCI config space through MMIO registers.  To
+   guard against the former we can include additional precautions in the
+   IOMMU driver to group multi-function PCI devices together
+   (iommu=group_mf).  The latter we can't prevent, but the IOMMU should
+   still provide isolation.  For PCI, SR-IOV Virtual Functions are the
+   best indicator of "well behaved", as these are designed for
+   virtualization usage models.
+
+.. [3] As always there are trade-offs to virtual machine device
+   assignment that are beyond the scope of VFIO.  It's expected that
+   future IOMMU technologies will reduce some, but maybe not all, of
+   these trade-offs.
+
+.. [4] In this case the device is below a PCI bridge, so transactions
+   from either function of the device are indistinguishable to the iommu::
+
+	-[0000:00]-+-1e.0-[06]--+-0d.0
+				\-0d.1
+
+	00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 90)
-- 
GitLab


From 05a07392c30cd38b466c7d4a1ab3979ce1fe3cb2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 09:45:31 -0300
Subject: [PATCH 1621/1958] xillybus.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- Adjust indentation;
- Mark authorship;
- Comment internal contents table;
- Mark literal blocks;
- Don't use all-upercase titles.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/xillybus.txt | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/Documentation/xillybus.txt b/Documentation/xillybus.txt
index 1660145b9969f..2446ee303c095 100644
--- a/Documentation/xillybus.txt
+++ b/Documentation/xillybus.txt
@@ -1,12 +1,11 @@
+==========================================
+Xillybus driver for generic FPGA interface
+==========================================
 
-               ==========================================
-               Xillybus driver for generic FPGA interface
-               ==========================================
+:Author: Eli Billauer, Xillybus Ltd. (http://xillybus.com)
+:Email:  eli.billauer@gmail.com or as advertised on Xillybus' site.
 
-Author: Eli Billauer, Xillybus Ltd. (http://xillybus.com)
-Email:  eli.billauer@gmail.com or as advertised on Xillybus' site.
-
-Contents:
+.. Contents:
 
  - Introduction
   -- Background
@@ -17,7 +16,7 @@ Contents:
   -- Synchronization
   -- Seekable pipes
 
-- Internals
+ - Internals
   -- Source code organization
   -- Pipe attributes
   -- Host never reads from the FPGA
@@ -29,7 +28,7 @@ Contents:
   -- The "nonempty" message (supporting poll)
 
 
-INTRODUCTION
+Introduction
 ============
 
 Background
@@ -105,7 +104,7 @@ driver is used to work out of the box with any Xillybus IP core.
 The data structure just mentioned should not be confused with PCI's
 configuration space or the Flattened Device Tree.
 
-USAGE
+Usage
 =====
 
 User interface
@@ -117,11 +116,11 @@ names of these files depend on the IP core that is loaded in the FPGA (see
 Probing below). To communicate with the FPGA, open the device file that
 corresponds to the hardware FIFO you want to send data or receive data from,
 and use plain write() or read() calls, just like with a regular pipe. In
-particular, it makes perfect sense to go:
+particular, it makes perfect sense to go::
 
-$ cat mydata > /dev/xillybus_thisfifo
+	$ cat mydata > /dev/xillybus_thisfifo
 
-$ cat /dev/xillybus_thatfifo > hisdata
+	$ cat /dev/xillybus_thatfifo > hisdata
 
 possibly pressing CTRL-C as some stage, even though the xillybus_* pipes have
 the capability to send an EOF (but may not use it).
@@ -178,7 +177,7 @@ the attached memory is done by seeking to the desired address, and calling
 read() or write() as required.
 
 
-INTERNALS
+Internals
 =========
 
 Source code organization
@@ -365,7 +364,7 @@ into that page. It can be shown that all pages requested from the kernel
 (except possibly for the last) are 100% utilized this way.
 
 The "nonempty" message (supporting poll)
----------------------------------------
+----------------------------------------
 
 In order to support the "poll" method (and hence select() ), there is a small
 catch regarding the FPGA to host direction: The FPGA may have filled a DMA
-- 
GitLab


From 29c8c4ac9525b15457266a8fb8bb59f366f5d65b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 09:54:22 -0300
Subject: [PATCH 1622/1958] xz.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- Use marks for titles;
- Adjust indentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/xz.txt | 200 ++++++++++++++++++++++---------------------
 1 file changed, 103 insertions(+), 97 deletions(-)

diff --git a/Documentation/xz.txt b/Documentation/xz.txt
index 2cf3e2608de32..b2220d03aa509 100644
--- a/Documentation/xz.txt
+++ b/Documentation/xz.txt
@@ -1,121 +1,127 @@
-
+============================
 XZ data compression in Linux
 ============================
 
 Introduction
+============
 
-    XZ is a general purpose data compression format with high compression
-    ratio and relatively fast decompression. The primary compression
-    algorithm (filter) is LZMA2. Additional filters can be used to improve
-    compression ratio even further. E.g. Branch/Call/Jump (BCJ) filters
-    improve compression ratio of executable data.
+XZ is a general purpose data compression format with high compression
+ratio and relatively fast decompression. The primary compression
+algorithm (filter) is LZMA2. Additional filters can be used to improve
+compression ratio even further. E.g. Branch/Call/Jump (BCJ) filters
+improve compression ratio of executable data.
 
-    The XZ decompressor in Linux is called XZ Embedded. It supports
-    the LZMA2 filter and optionally also BCJ filters. CRC32 is supported
-    for integrity checking. The home page of XZ Embedded is at
-    <http://tukaani.org/xz/embedded.html>, where you can find the
-    latest version and also information about using the code outside
-    the Linux kernel.
+The XZ decompressor in Linux is called XZ Embedded. It supports
+the LZMA2 filter and optionally also BCJ filters. CRC32 is supported
+for integrity checking. The home page of XZ Embedded is at
+<http://tukaani.org/xz/embedded.html>, where you can find the
+latest version and also information about using the code outside
+the Linux kernel.
 
-    For userspace, XZ Utils provide a zlib-like compression library
-    and a gzip-like command line tool. XZ Utils can be downloaded from
-    <http://tukaani.org/xz/>.
+For userspace, XZ Utils provide a zlib-like compression library
+and a gzip-like command line tool. XZ Utils can be downloaded from
+<http://tukaani.org/xz/>.
 
 XZ related components in the kernel
-
-    The xz_dec module provides XZ decompressor with single-call (buffer
-    to buffer) and multi-call (stateful) APIs. The usage of the xz_dec
-    module is documented in include/linux/xz.h.
-
-    The xz_dec_test module is for testing xz_dec. xz_dec_test is not
-    useful unless you are hacking the XZ decompressor. xz_dec_test
-    allocates a char device major dynamically to which one can write
-    .xz files from userspace. The decompressed output is thrown away.
-    Keep an eye on dmesg to see diagnostics printed by xz_dec_test.
-    See the xz_dec_test source code for the details.
-
-    For decompressing the kernel image, initramfs, and initrd, there
-    is a wrapper function in lib/decompress_unxz.c. Its API is the
-    same as in other decompress_*.c files, which is defined in
-    include/linux/decompress/generic.h.
-
-    scripts/xz_wrap.sh is a wrapper for the xz command line tool found
-    from XZ Utils. The wrapper sets compression options to values suitable
-    for compressing the kernel image.
-
-    For kernel makefiles, two commands are provided for use with
-    $(call if_needed). The kernel image should be compressed with
-    $(call if_needed,xzkern) which will use a BCJ filter and a big LZMA2
-    dictionary. It will also append a four-byte trailer containing the
-    uncompressed size of the file, which is needed by the boot code.
-    Other things should be compressed with $(call if_needed,xzmisc)
-    which will use no BCJ filter and 1 MiB LZMA2 dictionary.
+===================================
+
+The xz_dec module provides XZ decompressor with single-call (buffer
+to buffer) and multi-call (stateful) APIs. The usage of the xz_dec
+module is documented in include/linux/xz.h.
+
+The xz_dec_test module is for testing xz_dec. xz_dec_test is not
+useful unless you are hacking the XZ decompressor. xz_dec_test
+allocates a char device major dynamically to which one can write
+.xz files from userspace. The decompressed output is thrown away.
+Keep an eye on dmesg to see diagnostics printed by xz_dec_test.
+See the xz_dec_test source code for the details.
+
+For decompressing the kernel image, initramfs, and initrd, there
+is a wrapper function in lib/decompress_unxz.c. Its API is the
+same as in other decompress_*.c files, which is defined in
+include/linux/decompress/generic.h.
+
+scripts/xz_wrap.sh is a wrapper for the xz command line tool found
+from XZ Utils. The wrapper sets compression options to values suitable
+for compressing the kernel image.
+
+For kernel makefiles, two commands are provided for use with
+$(call if_needed). The kernel image should be compressed with
+$(call if_needed,xzkern) which will use a BCJ filter and a big LZMA2
+dictionary. It will also append a four-byte trailer containing the
+uncompressed size of the file, which is needed by the boot code.
+Other things should be compressed with $(call if_needed,xzmisc)
+which will use no BCJ filter and 1 MiB LZMA2 dictionary.
 
 Notes on compression options
+============================
 
-    Since the XZ Embedded supports only streams with no integrity check or
-    CRC32, make sure that you don't use some other integrity check type
-    when encoding files that are supposed to be decoded by the kernel. With
-    liblzma, you need to use either LZMA_CHECK_NONE or LZMA_CHECK_CRC32
-    when encoding. With the xz command line tool, use --check=none or
-    --check=crc32.
-
-    Using CRC32 is strongly recommended unless there is some other layer
-    which will verify the integrity of the uncompressed data anyway.
-    Double checking the integrity would probably be waste of CPU cycles.
-    Note that the headers will always have a CRC32 which will be validated
-    by the decoder; you can only change the integrity check type (or
-    disable it) for the actual uncompressed data.
-
-    In userspace, LZMA2 is typically used with dictionary sizes of several
-    megabytes. The decoder needs to have the dictionary in RAM, thus big
-    dictionaries cannot be used for files that are intended to be decoded
-    by the kernel. 1 MiB is probably the maximum reasonable dictionary
-    size for in-kernel use (maybe more is OK for initramfs). The presets
-    in XZ Utils may not be optimal when creating files for the kernel,
-    so don't hesitate to use custom settings. Example:
-
-        xz --check=crc32 --lzma2=dict=512KiB inputfile
-
-    An exception to above dictionary size limitation is when the decoder
-    is used in single-call mode. Decompressing the kernel itself is an
-    example of this situation. In single-call mode, the memory usage
-    doesn't depend on the dictionary size, and it is perfectly fine to
-    use a big dictionary: for maximum compression, the dictionary should
-    be at least as big as the uncompressed data itself.
+Since the XZ Embedded supports only streams with no integrity check or
+CRC32, make sure that you don't use some other integrity check type
+when encoding files that are supposed to be decoded by the kernel. With
+liblzma, you need to use either LZMA_CHECK_NONE or LZMA_CHECK_CRC32
+when encoding. With the xz command line tool, use --check=none or
+--check=crc32.
+
+Using CRC32 is strongly recommended unless there is some other layer
+which will verify the integrity of the uncompressed data anyway.
+Double checking the integrity would probably be waste of CPU cycles.
+Note that the headers will always have a CRC32 which will be validated
+by the decoder; you can only change the integrity check type (or
+disable it) for the actual uncompressed data.
+
+In userspace, LZMA2 is typically used with dictionary sizes of several
+megabytes. The decoder needs to have the dictionary in RAM, thus big
+dictionaries cannot be used for files that are intended to be decoded
+by the kernel. 1 MiB is probably the maximum reasonable dictionary
+size for in-kernel use (maybe more is OK for initramfs). The presets
+in XZ Utils may not be optimal when creating files for the kernel,
+so don't hesitate to use custom settings. Example::
+
+	xz --check=crc32 --lzma2=dict=512KiB inputfile
+
+An exception to above dictionary size limitation is when the decoder
+is used in single-call mode. Decompressing the kernel itself is an
+example of this situation. In single-call mode, the memory usage
+doesn't depend on the dictionary size, and it is perfectly fine to
+use a big dictionary: for maximum compression, the dictionary should
+be at least as big as the uncompressed data itself.
 
 Future plans
+============
 
-    Creating a limited XZ encoder may be considered if people think it is
-    useful. LZMA2 is slower to compress than e.g. Deflate or LZO even at
-    the fastest settings, so it isn't clear if LZMA2 encoder is wanted
-    into the kernel.
+Creating a limited XZ encoder may be considered if people think it is
+useful. LZMA2 is slower to compress than e.g. Deflate or LZO even at
+the fastest settings, so it isn't clear if LZMA2 encoder is wanted
+into the kernel.
 
-    Support for limited random-access reading is planned for the
-    decompression code. I don't know if it could have any use in the
-    kernel, but I know that it would be useful in some embedded projects
-    outside the Linux kernel.
+Support for limited random-access reading is planned for the
+decompression code. I don't know if it could have any use in the
+kernel, but I know that it would be useful in some embedded projects
+outside the Linux kernel.
 
 Conformance to the .xz file format specification
+================================================
 
-    There are a couple of corner cases where things have been simplified
-    at expense of detecting errors as early as possible. These should not
-    matter in practice all, since they don't cause security issues. But
-    it is good to know this if testing the code e.g. with the test files
-    from XZ Utils.
+There are a couple of corner cases where things have been simplified
+at expense of detecting errors as early as possible. These should not
+matter in practice all, since they don't cause security issues. But
+it is good to know this if testing the code e.g. with the test files
+from XZ Utils.
 
 Reporting bugs
+==============
 
-    Before reporting a bug, please check that it's not fixed already
-    at upstream. See <http://tukaani.org/xz/embedded.html> to get the
-    latest code.
+Before reporting a bug, please check that it's not fixed already
+at upstream. See <http://tukaani.org/xz/embedded.html> to get the
+latest code.
 
-    Report bugs to <lasse.collin@tukaani.org> or visit #tukaani on
-    Freenode and talk to Larhzu. I don't actively read LKML or other
-    kernel-related mailing lists, so if there's something I should know,
-    you should email to me personally or use IRC.
+Report bugs to <lasse.collin@tukaani.org> or visit #tukaani on
+Freenode and talk to Larhzu. I don't actively read LKML or other
+kernel-related mailing lists, so if there's something I should know,
+you should email to me personally or use IRC.
 
-    Don't bother Igor Pavlov with questions about the XZ implementation
-    in the kernel or about XZ Utils. While these two implementations
-    include essential code that is directly based on Igor Pavlov's code,
-    these implementations aren't maintained nor supported by him.
+Don't bother Igor Pavlov with questions about the XZ implementation
+in the kernel or about XZ Utils. While these two implementations
+include essential code that is directly based on Igor Pavlov's code,
+these implementations aren't maintained nor supported by him.
-- 
GitLab


From 998ff0b5796d53af66a5604d6261f9385be7b6ae Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 17 May 2017 09:55:46 -0300
Subject: [PATCH 1623/1958] zorro.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Use right marks for titles;
- Use authorship marks;
- Mark literals and literal blocks;
- Use autonumbered list for references.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/zorro.txt | 59 +++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/Documentation/zorro.txt b/Documentation/zorro.txt
index d530971beb003..664072b017e3f 100644
--- a/Documentation/zorro.txt
+++ b/Documentation/zorro.txt
@@ -1,12 +1,13 @@
-		Writing Device Drivers for Zorro Devices
-		----------------------------------------
+========================================
+Writing Device Drivers for Zorro Devices
+========================================
 
-Written by Geert Uytterhoeven <geert@linux-m68k.org>
-Last revised: September 5, 2003
+:Author: Written by Geert Uytterhoeven <geert@linux-m68k.org>
+:Last revised: September 5, 2003
 
 
-1. Introduction
----------------
+Introduction
+------------
 
 The Zorro bus is the bus used in the Amiga family of computers. Thanks to
 AutoConfig(tm), it's 100% Plug-and-Play.
@@ -20,12 +21,12 @@ There are two types of Zorro buses, Zorro II and Zorro III:
     with Zorro II. The Zorro III address space lies outside the first 16 MB.
 
 
-2. Probing for Zorro Devices
-----------------------------
+Probing for Zorro Devices
+-------------------------
 
-Zorro devices are found by calling `zorro_find_device()', which returns a
-pointer to the `next' Zorro device with the specified Zorro ID. A probe loop
-for the board with Zorro ID `ZORRO_PROD_xxx' looks like:
+Zorro devices are found by calling ``zorro_find_device()``, which returns a
+pointer to the ``next`` Zorro device with the specified Zorro ID. A probe loop
+for the board with Zorro ID ``ZORRO_PROD_xxx`` looks like::
 
     struct zorro_dev *z = NULL;
 
@@ -35,8 +36,8 @@ for the board with Zorro ID `ZORRO_PROD_xxx' looks like:
 	...
     }
 
-`ZORRO_WILDCARD' acts as a wildcard and finds any Zorro device. If your driver
-supports different types of boards, you can use a construct like:
+``ZORRO_WILDCARD`` acts as a wildcard and finds any Zorro device. If your driver
+supports different types of boards, you can use a construct like::
 
     struct zorro_dev *z = NULL;
 
@@ -49,24 +50,24 @@ supports different types of boards, you can use a construct like:
     }
 
 
-3. Zorro Resources
-------------------
+Zorro Resources
+---------------
 
 Before you can access a Zorro device's registers, you have to make sure it's
 not yet in use. This is done using the I/O memory space resource management
-functions:
+functions::
 
     request_mem_region()
     release_mem_region()
 
-Shortcuts to claim the whole device's address space are provided as well:
+Shortcuts to claim the whole device's address space are provided as well::
 
     zorro_request_device
     zorro_release_device
 
 
-4. Accessing the Zorro Address Space
-------------------------------------
+Accessing the Zorro Address Space
+---------------------------------
 
 The address regions in the Zorro device resources are Zorro bus address
 regions. Due to the identity bus-physical address mapping on the Zorro bus,
@@ -78,26 +79,26 @@ The treatment of these regions depends on the type of Zorro space:
     explicitly using z_ioremap().
     
     Conversion from bus/physical Zorro II addresses to kernel virtual addresses
-    and vice versa is done using:
+    and vice versa is done using::
 
 	virt_addr = ZTWO_VADDR(bus_addr);
 	bus_addr = ZTWO_PADDR(virt_addr);
 
   - Zorro III address space must be mapped explicitly using z_ioremap() first
-    before it can be accessed:
+    before it can be accessed::
  
 	virt_addr = z_ioremap(bus_addr, size);
 	...
 	z_iounmap(virt_addr);
 
 
-5. References
--------------
+References
+----------
 
-linux/include/linux/zorro.h
-linux/include/uapi/linux/zorro.h
-linux/include/uapi/linux/zorro_ids.h
-linux/arch/m68k/include/asm/zorro.h
-linux/drivers/zorro
-/proc/bus/zorro
+#. linux/include/linux/zorro.h
+#. linux/include/uapi/linux/zorro.h
+#. linux/include/uapi/linux/zorro_ids.h
+#. linux/arch/m68k/include/asm/zorro.h
+#. linux/drivers/zorro
+#. /proc/bus/zorro
 
-- 
GitLab


From 58ef0e5bb30b72ce3ddc59ea2a02073b03f454bc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Wed, 17 May 2017 14:25:05 -0300
Subject: [PATCH 1624/1958] dell_rbu.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx.

Currently, the document is completely unformatted. Add
titles, do indentation, mark literal blocks.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dell_rbu.txt | 81 ++++++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 25 deletions(-)

diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
index d262e22bddec0..0fdb6aa2704c5 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/dell_rbu.txt
@@ -1,18 +1,30 @@
-Purpose:
-Demonstrate the usage of the new open sourced rbu (Remote BIOS Update) driver
+=============================================================
+Usage of the new open sourced rbu (Remote BIOS Update) driver
+=============================================================
+
+Purpose
+=======
+
+Document demonstrating the use of the Dell Remote BIOS Update driver.
 for updating BIOS images on Dell servers and desktops.
 
-Scope:
+Scope
+=====
+
 This document discusses the functionality of the rbu driver only.
 It does not cover the support needed from applications to enable the BIOS to
 update itself with the image downloaded in to the memory.
 
-Overview:
+Overview
+========
+
 This driver works with Dell OpenManage or Dell Update Packages for updating
 the BIOS on Dell servers (starting from servers sold since 1999), desktops
 and notebooks (starting from those sold in 2005).
+
 Please go to  http://support.dell.com register and you can find info on
 OpenManage and Dell Update packages (DUP).
+
 Libsmbios can also be used to update BIOS on Dell systems go to
 http://linux.dell.com/libsmbios/ for details.
 
@@ -22,6 +34,7 @@ of physical pages having the BIOS image. In case of packetized the app
 using the driver breaks the image in to packets of fixed sizes and the driver
 would place each packet in contiguous physical memory. The driver also
 maintains a link list of packets for reading them back.
+
 If the dell_rbu driver is unloaded all the allocated memory is freed.
 
 The rbu driver needs to have an application (as mentioned above)which will
@@ -30,28 +43,33 @@ inform the BIOS to enable the update in the next system reboot.
 The user should not unload the rbu driver after downloading the BIOS image
 or updating.
 
-The driver load creates the following directories under the /sys file system.
-/sys/class/firmware/dell_rbu/loading
-/sys/class/firmware/dell_rbu/data
-/sys/devices/platform/dell_rbu/image_type
-/sys/devices/platform/dell_rbu/data
-/sys/devices/platform/dell_rbu/packet_size
+The driver load creates the following directories under the /sys file system::
+
+	/sys/class/firmware/dell_rbu/loading
+	/sys/class/firmware/dell_rbu/data
+	/sys/devices/platform/dell_rbu/image_type
+	/sys/devices/platform/dell_rbu/data
+	/sys/devices/platform/dell_rbu/packet_size
 
 The driver supports two types of update mechanism; monolithic and packetized.
 These update mechanism depends upon the BIOS currently running on the system.
 Most of the Dell systems support a monolithic update where the BIOS image is
 copied to a single contiguous block of physical memory.
+
 In case of packet mechanism the single memory can be broken in smaller chunks
 of contiguous memory and the BIOS image is scattered in these packets.
 
 By default the driver uses monolithic memory for the update type. This can be
 changed to packets during the driver load time by specifying the load
-parameter image_type=packet.  This can also be changed later as below
-echo packet > /sys/devices/platform/dell_rbu/image_type
+parameter image_type=packet.  This can also be changed later as below::
+
+	echo packet > /sys/devices/platform/dell_rbu/image_type
 
 In packet update mode the packet size has to be given before any packets can
-be downloaded. It is done as below
-echo XXXX > /sys/devices/platform/dell_rbu/packet_size
+be downloaded. It is done as below::
+
+	echo XXXX > /sys/devices/platform/dell_rbu/packet_size
+
 In the packet update mechanism, the user needs to create a new file having
 packets of data arranged back to back. It can be done as follows
 The user creates packets header, gets the chunk of the BIOS image and
@@ -60,41 +78,54 @@ added together should match the specified packet_size. This makes one
 packet, the user needs to create more such packets out of the entire BIOS
 image file and then arrange all these packets back to back in to one single
 file.
+
 This file is then copied to /sys/class/firmware/dell_rbu/data.
 Once this file gets to the driver, the driver extracts packet_size data from
 the file and spreads it across the physical memory in contiguous packet_sized
 space.
+
 This method makes sure that all the packets get to the driver in a single operation.
 
 In monolithic update the user simply get the BIOS image (.hdr file) and copies
 to the data file as is without any change to the BIOS image itself.
 
 Do the steps below to download the BIOS image.
+
 1) echo 1 > /sys/class/firmware/dell_rbu/loading
 2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data
 3) echo 0 > /sys/class/firmware/dell_rbu/loading
 
 The /sys/class/firmware/dell_rbu/ entries will remain till the following is
 done.
-echo -1 > /sys/class/firmware/dell_rbu/loading
+
+::
+
+	echo -1 > /sys/class/firmware/dell_rbu/loading
+
 Until this step is completed the driver cannot be unloaded.
+
 Also echoing either mono, packet or init in to image_type will free up the
 memory allocated by the driver.
 
 If a user by accident executes steps 1 and 3 above without executing step 2;
 it will make the /sys/class/firmware/dell_rbu/ entries disappear.
-The entries can be recreated by doing the following
-echo init > /sys/devices/platform/dell_rbu/image_type
-NOTE: echoing init in image_type does not change it original value.
+
+The entries can be recreated by doing the following::
+
+	echo init > /sys/devices/platform/dell_rbu/image_type
+
+.. note:: echoing init in image_type does not change it original value.
 
 Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
 read back the image downloaded.
 
-NOTE:
-This driver requires a patch for firmware_class.c which has the modified
-request_firmware_nowait function.
-Also after updating the BIOS image a user mode application needs to execute
-code which sends the BIOS update request to the BIOS. So on the next reboot
-the BIOS knows about the new image downloaded and it updates itself.
-Also don't unload the rbu driver if the image has to be updated.
+.. note::
+
+   This driver requires a patch for firmware_class.c which has the modified
+   request_firmware_nowait function.
+
+   Also after updating the BIOS image a user mode application needs to execute
+   code which sends the BIOS update request to the BIOS. So on the next reboot
+   the BIOS knows about the new image downloaded and it updates itself.
+   Also don't unload the rbu driver if the image has to be updated.
 
-- 
GitLab


From 633b11bee40c37fed3e9384f04a889af32fb8cf1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Sun, 14 May 2017 08:48:40 -0300
Subject: [PATCH 1625/1958] cgroup-v2.txt: standardize document format

Each text file under Documentation follows a different
format. Some doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- Comment the internal index;
- Use :Date: and :Author: for authorship;
- Mark titles;
- Mark literal blocks;
- Adjust witespaces;
- Mark notes;
- Use table notation for the existing tables.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cgroup-v2.txt | 460 +++++++++++++++++++-----------------
 1 file changed, 239 insertions(+), 221 deletions(-)

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index e6101976e0f18..bde1771035671 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -1,7 +1,9 @@
-
+================
 Control Group v2
+================
 
-October, 2015		Tejun Heo <tj@kernel.org>
+:Date: October, 2015
+:Author: Tejun Heo <tj@kernel.org>
 
 This is the authoritative documentation on the design, interface and
 conventions of cgroup v2.  It describes all userland-visible aspects
@@ -9,70 +11,72 @@ of cgroup including core and specific controller behaviors.  All
 future changes must be reflected in this document.  Documentation for
 v1 is available under Documentation/cgroup-v1/.
 
-CONTENTS
-
-1. Introduction
-  1-1. Terminology
-  1-2. What is cgroup?
-2. Basic Operations
-  2-1. Mounting
-  2-2. Organizing Processes
-  2-3. [Un]populated Notification
-  2-4. Controlling Controllers
-    2-4-1. Enabling and Disabling
-    2-4-2. Top-down Constraint
-    2-4-3. No Internal Process Constraint
-  2-5. Delegation
-    2-5-1. Model of Delegation
-    2-5-2. Delegation Containment
-  2-6. Guidelines
-    2-6-1. Organize Once and Control
-    2-6-2. Avoid Name Collisions
-3. Resource Distribution Models
-  3-1. Weights
-  3-2. Limits
-  3-3. Protections
-  3-4. Allocations
-4. Interface Files
-  4-1. Format
-  4-2. Conventions
-  4-3. Core Interface Files
-5. Controllers
-  5-1. CPU
-    5-1-1. CPU Interface Files
-  5-2. Memory
-    5-2-1. Memory Interface Files
-    5-2-2. Usage Guidelines
-    5-2-3. Memory Ownership
-  5-3. IO
-    5-3-1. IO Interface Files
-    5-3-2. Writeback
-  5-4. PID
-    5-4-1. PID Interface Files
-  5-5. RDMA
-    5-5-1. RDMA Interface Files
-  5-6. Misc
-    5-6-1. perf_event
-6. Namespace
-  6-1. Basics
-  6-2. The Root and Views
-  6-3. Migration and setns(2)
-  6-4. Interaction with Other Namespaces
-P. Information on Kernel Programming
-  P-1. Filesystem Support for Writeback
-D. Deprecated v1 Core Features
-R. Issues with v1 and Rationales for v2
-  R-1. Multiple Hierarchies
-  R-2. Thread Granularity
-  R-3. Competition Between Inner Nodes and Threads
-  R-4. Other Interface Issues
-  R-5. Controller Issues and Remedies
-    R-5-1. Memory
-
-
-1. Introduction
-
-1-1. Terminology
+.. CONTENTS
+
+   1. Introduction
+     1-1. Terminology
+     1-2. What is cgroup?
+   2. Basic Operations
+     2-1. Mounting
+     2-2. Organizing Processes
+     2-3. [Un]populated Notification
+     2-4. Controlling Controllers
+       2-4-1. Enabling and Disabling
+       2-4-2. Top-down Constraint
+       2-4-3. No Internal Process Constraint
+     2-5. Delegation
+       2-5-1. Model of Delegation
+       2-5-2. Delegation Containment
+     2-6. Guidelines
+       2-6-1. Organize Once and Control
+       2-6-2. Avoid Name Collisions
+   3. Resource Distribution Models
+     3-1. Weights
+     3-2. Limits
+     3-3. Protections
+     3-4. Allocations
+   4. Interface Files
+     4-1. Format
+     4-2. Conventions
+     4-3. Core Interface Files
+   5. Controllers
+     5-1. CPU
+       5-1-1. CPU Interface Files
+     5-2. Memory
+       5-2-1. Memory Interface Files
+       5-2-2. Usage Guidelines
+       5-2-3. Memory Ownership
+     5-3. IO
+       5-3-1. IO Interface Files
+       5-3-2. Writeback
+     5-4. PID
+       5-4-1. PID Interface Files
+     5-5. RDMA
+       5-5-1. RDMA Interface Files
+     5-6. Misc
+       5-6-1. perf_event
+   6. Namespace
+     6-1. Basics
+     6-2. The Root and Views
+     6-3. Migration and setns(2)
+     6-4. Interaction with Other Namespaces
+   P. Information on Kernel Programming
+     P-1. Filesystem Support for Writeback
+   D. Deprecated v1 Core Features
+   R. Issues with v1 and Rationales for v2
+     R-1. Multiple Hierarchies
+     R-2. Thread Granularity
+     R-3. Competition Between Inner Nodes and Threads
+     R-4. Other Interface Issues
+     R-5. Controller Issues and Remedies
+       R-5-1. Memory
+
+
+Introduction
+============
+
+Terminology
+-----------
 
 "cgroup" stands for "control group" and is never capitalized.  The
 singular form is used to designate the whole feature and also as a
@@ -80,7 +84,8 @@ qualifier as in "cgroup controllers".  When explicitly referring to
 multiple individual control groups, the plural form "cgroups" is used.
 
 
-1-2. What is cgroup?
+What is cgroup?
+---------------
 
 cgroup is a mechanism to organize processes hierarchically and
 distribute system resources along the hierarchy in a controlled and
@@ -110,12 +115,14 @@ restrictions set closer to the root in the hierarchy can not be
 overridden from further away.
 
 
-2. Basic Operations
+Basic Operations
+================
 
-2-1. Mounting
+Mounting
+--------
 
 Unlike v1, cgroup v2 has only single hierarchy.  The cgroup v2
-hierarchy can be mounted with the following mount command.
+hierarchy can be mounted with the following mount command::
 
   # mount -t cgroup2 none $MOUNT_POINT
 
@@ -160,10 +167,11 @@ cgroup v2 currently supports the following mount options.
 	Delegation section for details.
 
 
-2-2. Organizing Processes
+Organizing Processes
+--------------------
 
 Initially, only the root cgroup exists to which all processes belong.
-A child cgroup can be created by creating a sub-directory.
+A child cgroup can be created by creating a sub-directory::
 
   # mkdir $CGROUP_NAME
 
@@ -190,28 +198,29 @@ moved to another cgroup.
 A cgroup which doesn't have any children or live processes can be
 destroyed by removing the directory.  Note that a cgroup which doesn't
 have any children and is associated only with zombie processes is
-considered empty and can be removed.
+considered empty and can be removed::
 
   # rmdir $CGROUP_NAME
 
 "/proc/$PID/cgroup" lists a process's cgroup membership.  If legacy
 cgroup is in use in the system, this file may contain multiple lines,
 one for each hierarchy.  The entry for cgroup v2 is always in the
-format "0::$PATH".
+format "0::$PATH"::
 
   # cat /proc/842/cgroup
   ...
   0::/test-cgroup/test-cgroup-nested
 
 If the process becomes a zombie and the cgroup it was associated with
-is removed subsequently, " (deleted)" is appended to the path.
+is removed subsequently, " (deleted)" is appended to the path::
 
   # cat /proc/842/cgroup
   ...
   0::/test-cgroup/test-cgroup-nested (deleted)
 
 
-2-3. [Un]populated Notification
+[Un]populated Notification
+--------------------------
 
 Each non-root cgroup has a "cgroup.events" file which contains
 "populated" field indicating whether the cgroup's sub-hierarchy has
@@ -222,7 +231,7 @@ example, to start a clean-up operation after all processes of a given
 sub-hierarchy have exited.  The populated state updates and
 notifications are recursive.  Consider the following sub-hierarchy
 where the numbers in the parentheses represent the numbers of processes
-in each cgroup.
+in each cgroup::
 
   A(4) - B(0) - C(1)
               \ D(0)
@@ -233,18 +242,20 @@ file modified events will be generated on the "cgroup.events" files of
 both cgroups.
 
 
-2-4. Controlling Controllers
+Controlling Controllers
+-----------------------
 
-2-4-1. Enabling and Disabling
+Enabling and Disabling
+~~~~~~~~~~~~~~~~~~~~~~
 
 Each cgroup has a "cgroup.controllers" file which lists all
-controllers available for the cgroup to enable.
+controllers available for the cgroup to enable::
 
   # cat cgroup.controllers
   cpu io memory
 
 No controller is enabled by default.  Controllers can be enabled and
-disabled by writing to the "cgroup.subtree_control" file.
+disabled by writing to the "cgroup.subtree_control" file::
 
   # echo "+cpu +memory -io" > cgroup.subtree_control
 
@@ -256,7 +267,7 @@ are specified, the last one is effective.
 Enabling a controller in a cgroup indicates that the distribution of
 the target resource across its immediate children will be controlled.
 Consider the following sub-hierarchy.  The enabled controllers are
-listed in parentheses.
+listed in parentheses::
 
   A(cpu,memory) - B(memory) - C()
                             \ D()
@@ -276,7 +287,8 @@ controller interface files - anything which doesn't start with
 "cgroup." are owned by the parent rather than the cgroup itself.
 
 
-2-4-2. Top-down Constraint
+Top-down Constraint
+~~~~~~~~~~~~~~~~~~~
 
 Resources are distributed top-down and a cgroup can further distribute
 a resource only if the resource has been distributed to it from the
@@ -287,7 +299,8 @@ the parent has the controller enabled and a controller can't be
 disabled if one or more children have it enabled.
 
 
-2-4-3. No Internal Process Constraint
+No Internal Process Constraint
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Non-root cgroups can only distribute resources to their children when
 they don't have any processes of their own.  In other words, only
@@ -314,9 +327,11 @@ children before enabling controllers in its "cgroup.subtree_control"
 file.
 
 
-2-5. Delegation
+Delegation
+----------
 
-2-5-1. Model of Delegation
+Model of Delegation
+~~~~~~~~~~~~~~~~~~~
 
 A cgroup can be delegated in two ways.  First, to a less privileged
 user by granting write access of the directory and its "cgroup.procs"
@@ -345,7 +360,8 @@ cgroups in or nesting depth of a delegated sub-hierarchy; however,
 this may be limited explicitly in the future.
 
 
-2-5-2. Delegation Containment
+Delegation Containment
+~~~~~~~~~~~~~~~~~~~~~~
 
 A delegated sub-hierarchy is contained in the sense that processes
 can't be moved into or out of the sub-hierarchy by the delegatee.
@@ -366,7 +382,7 @@ in from or push out to outside the sub-hierarchy.
 
 For an example, let's assume cgroups C0 and C1 have been delegated to
 user U0 who created C00, C01 under C0 and C10 under C1 as follows and
-all processes under C0 and C1 belong to U0.
+all processes under C0 and C1 belong to U0::
 
   ~~~~~~~~~~~~~ - C0 - C00
   ~ cgroup    ~      \ C01
@@ -386,9 +402,11 @@ namespace of the process which is attempting the migration.  If either
 is not reachable, the migration is rejected with -ENOENT.
 
 
-2-6. Guidelines
+Guidelines
+----------
 
-2-6-1. Organize Once and Control
+Organize Once and Control
+~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Migrating a process across cgroups is a relatively expensive operation
 and stateful resources such as memory are not moved together with the
@@ -404,7 +422,8 @@ distribution can be made by changing controller configuration through
 the interface files.
 
 
-2-6-2. Avoid Name Collisions
+Avoid Name Collisions
+~~~~~~~~~~~~~~~~~~~~~
 
 Interface files for a cgroup and its children cgroups occupy the same
 directory and it is possible to create children cgroups which collide
@@ -422,14 +441,16 @@ cgroup doesn't do anything to prevent name collisions and it's the
 user's responsibility to avoid them.
 
 
-3. Resource Distribution Models
+Resource Distribution Models
+============================
 
 cgroup controllers implement several resource distribution schemes
 depending on the resource type and expected use cases.  This section
 describes major schemes in use along with their expected behaviors.
 
 
-3-1. Weights
+Weights
+-------
 
 A parent's resource is distributed by adding up the weights of all
 active children and giving each the fraction matching the ratio of its
@@ -450,7 +471,8 @@ process migrations.
 and is an example of this type.
 
 
-3-2. Limits
+Limits
+------
 
 A child can only consume upto the configured amount of the resource.
 Limits can be over-committed - the sum of the limits of children can
@@ -466,7 +488,8 @@ process migrations.
 on an IO device and is an example of this type.
 
 
-3-3. Protections
+Protections
+-----------
 
 A cgroup is protected to be allocated upto the configured amount of
 the resource if the usages of all its ancestors are under their
@@ -486,7 +509,8 @@ process migrations.
 example of this type.
 
 
-3-4. Allocations
+Allocations
+-----------
 
 A cgroup is exclusively allocated a certain amount of a finite
 resource.  Allocations can't be over-committed - the sum of the
@@ -505,12 +529,14 @@ may be rejected.
 type.
 
 
-4. Interface Files
+Interface Files
+===============
 
-4-1. Format
+Format
+------
 
 All interface files should be in one of the following formats whenever
-possible.
+possible::
 
   New-line separated values
   (when only one value can be written at once)
@@ -545,7 +571,8 @@ can be written at a time.  For nested keyed files, the sub key pairs
 may be specified in any order and not all pairs have to be specified.
 
 
-4-2. Conventions
+Conventions
+-----------
 
 - Settings for a single feature should be contained in a single file.
 
@@ -581,25 +608,25 @@ may be specified in any order and not all pairs have to be specified.
   with "default" as the value must not appear when read.
 
   For example, a setting which is keyed by major:minor device numbers
-  with integer values may look like the following.
+  with integer values may look like the following::
 
     # cat cgroup-example-interface-file
     default 150
     8:0 300
 
-  The default value can be updated by
+  The default value can be updated by::
 
     # echo 125 > cgroup-example-interface-file
 
-  or
+  or::
 
     # echo "default 125" > cgroup-example-interface-file
 
-  An override can be set by
+  An override can be set by::
 
     # echo "8:16 170" > cgroup-example-interface-file
 
-  and cleared by
+  and cleared by::
 
     # echo "8:0 default" > cgroup-example-interface-file
     # cat cgroup-example-interface-file
@@ -612,12 +639,12 @@ may be specified in any order and not all pairs have to be specified.
   generated on the file.
 
 
-4-3. Core Interface Files
+Core Interface Files
+--------------------
 
 All cgroup core files are prefixed with "cgroup."
 
   cgroup.procs
-
 	A read-write new-line separated values file which exists on
 	all cgroups.
 
@@ -643,7 +670,6 @@ All cgroup core files are prefixed with "cgroup."
 	should be granted along with the containing directory.
 
   cgroup.controllers
-
 	A read-only space separated values file which exists on all
 	cgroups.
 
@@ -651,7 +677,6 @@ All cgroup core files are prefixed with "cgroup."
 	the cgroup.  The controllers are not ordered.
 
   cgroup.subtree_control
-
 	A read-write space separated values file which exists on all
 	cgroups.  Starts out empty.
 
@@ -667,23 +692,25 @@ All cgroup core files are prefixed with "cgroup."
 	operations are specified, either all succeed or all fail.
 
   cgroup.events
-
 	A read-only flat-keyed file which exists on non-root cgroups.
 	The following entries are defined.  Unless specified
 	otherwise, a value change in this file generates a file
 	modified event.
 
 	  populated
-
 		1 if the cgroup or its descendants contains any live
 		processes; otherwise, 0.
 
 
-5. Controllers
+Controllers
+===========
 
-5-1. CPU
+CPU
+---
 
-[NOTE: The interface for the cpu controller hasn't been merged yet]
+.. note::
+
+	The interface for the cpu controller hasn't been merged yet
 
 The "cpu" controllers regulates distribution of CPU cycles.  This
 controller implements weight and absolute bandwidth limit models for
@@ -691,36 +718,34 @@ normal scheduling policy and absolute bandwidth allocation model for
 realtime scheduling policy.
 
 
-5-1-1. CPU Interface Files
+CPU Interface Files
+~~~~~~~~~~~~~~~~~~~
 
 All time durations are in microseconds.
 
   cpu.stat
-
 	A read-only flat-keyed file which exists on non-root cgroups.
 
-	It reports the following six stats.
+	It reports the following six stats:
 
-	  usage_usec
-	  user_usec
-	  system_usec
-	  nr_periods
-	  nr_throttled
-	  throttled_usec
+	- usage_usec
+	- user_usec
+	- system_usec
+	- nr_periods
+	- nr_throttled
+	- throttled_usec
 
   cpu.weight
-
 	A read-write single value file which exists on non-root
 	cgroups.  The default is "100".
 
 	The weight in the range [1, 10000].
 
   cpu.max
-
 	A read-write two value file which exists on non-root cgroups.
 	The default is "max 100000".
 
-	The maximum bandwidth limit.  It's in the following format.
+	The maximum bandwidth limit.  It's in the following format::
 
 	  $MAX $PERIOD
 
@@ -729,9 +754,10 @@ All time durations are in microseconds.
 	one number is written, $MAX is updated.
 
   cpu.rt.max
+	.. note::
 
-  [NOTE: The semantics of this file is still under discussion and the
-   interface hasn't been merged yet]
+	   The semantics of this file is still under discussion and the
+	   interface hasn't been merged yet
 
 	A read-write two value file which exists on all cgroups.
 	The default is "0 100000".
@@ -739,7 +765,7 @@ All time durations are in microseconds.
 	The maximum realtime runtime allocation.  Over-committing
 	configurations are disallowed and process migrations are
 	rejected if not enough bandwidth is available.  It's in the
-	following format.
+	following format::
 
 	  $MAX $PERIOD
 
@@ -748,7 +774,8 @@ All time durations are in microseconds.
 	updated.
 
 
-5-2. Memory
+Memory
+------
 
 The "memory" controller regulates distribution of memory.  Memory is
 stateful and implements both limit and protection models.  Due to the
@@ -770,14 +797,14 @@ following types of memory usages are tracked.
 The above list may expand in the future for better coverage.
 
 
-5-2-1. Memory Interface Files
+Memory Interface Files
+~~~~~~~~~~~~~~~~~~~~~~
 
 All memory amounts are in bytes.  If a value which is not aligned to
 PAGE_SIZE is written, the value may be rounded up to the closest
 PAGE_SIZE multiple when read back.
 
   memory.current
-
 	A read-only single value file which exists on non-root
 	cgroups.
 
@@ -785,7 +812,6 @@ PAGE_SIZE multiple when read back.
 	and its descendants.
 
   memory.low
-
 	A read-write single value file which exists on non-root
 	cgroups.  The default is "0".
 
@@ -798,7 +824,6 @@ PAGE_SIZE multiple when read back.
 	protection is discouraged.
 
   memory.high
-
 	A read-write single value file which exists on non-root
 	cgroups.  The default is "max".
 
@@ -811,7 +836,6 @@ PAGE_SIZE multiple when read back.
 	under extreme conditions the limit may be breached.
 
   memory.max
-
 	A read-write single value file which exists on non-root
 	cgroups.  The default is "max".
 
@@ -826,21 +850,18 @@ PAGE_SIZE multiple when read back.
 	utility is limited to providing the final safety net.
 
   memory.events
-
 	A read-only flat-keyed file which exists on non-root cgroups.
 	The following entries are defined.  Unless specified
 	otherwise, a value change in this file generates a file
 	modified event.
 
 	  low
-
 		The number of times the cgroup is reclaimed due to
 		high memory pressure even though its usage is under
 		the low boundary.  This usually indicates that the low
 		boundary is over-committed.
 
 	  high
-
 		The number of times processes of the cgroup are
 		throttled and routed to perform direct memory reclaim
 		because the high memory boundary was exceeded.  For a
@@ -849,13 +870,11 @@ PAGE_SIZE multiple when read back.
 		occurrences are expected.
 
 	  max
-
 		The number of times the cgroup's memory usage was
 		about to go over the max boundary.  If direct reclaim
 		fails to bring it down, the cgroup goes to OOM state.
 
 	  oom
-
 		The number of time the cgroup's memory usage was
 		reached the limit and allocation was about to fail.
 
@@ -864,16 +883,14 @@ PAGE_SIZE multiple when read back.
 
 		Failed allocation in its turn could be returned into
 		userspace as -ENOMEM or siletly ignored in cases like
-		disk readahead.	 For now OOM in memory cgroup kills
+		disk readahead.  For now OOM in memory cgroup kills
 		tasks iff shortage has happened inside page fault.
 
 	  oom_kill
-
 		The number of processes belonging to this cgroup
 		killed by any kind of OOM killer.
 
   memory.stat
-
 	A read-only flat-keyed file which exists on non-root cgroups.
 
 	This breaks down the cgroup's memory footprint into different
@@ -887,73 +904,55 @@ PAGE_SIZE multiple when read back.
 	fixed position; use the keys to look up specific values!
 
 	  anon
-
 		Amount of memory used in anonymous mappings such as
 		brk(), sbrk(), and mmap(MAP_ANONYMOUS)
 
 	  file
-
 		Amount of memory used to cache filesystem data,
 		including tmpfs and shared memory.
 
 	  kernel_stack
-
 		Amount of memory allocated to kernel stacks.
 
 	  slab
-
 		Amount of memory used for storing in-kernel data
 		structures.
 
 	  sock
-
 		Amount of memory used in network transmission buffers
 
 	  shmem
-
 		Amount of cached filesystem data that is swap-backed,
 		such as tmpfs, shm segments, shared anonymous mmap()s
 
 	  file_mapped
-
 		Amount of cached filesystem data mapped with mmap()
 
 	  file_dirty
-
 		Amount of cached filesystem data that was modified but
 		not yet written back to disk
 
 	  file_writeback
-
 		Amount of cached filesystem data that was modified and
 		is currently being written back to disk
 
-	  inactive_anon
-	  active_anon
-	  inactive_file
-	  active_file
-	  unevictable
-
+	  inactive_anon, active_anon, inactive_file, active_file, unevictable
 		Amount of memory, swap-backed and filesystem-backed,
 		on the internal memory management lists used by the
 		page reclaim algorithm
 
 	  slab_reclaimable
-
 		Part of "slab" that might be reclaimed, such as
 		dentries and inodes.
 
 	  slab_unreclaimable
-
 		Part of "slab" that cannot be reclaimed on memory
 		pressure.
 
 	  pgfault
-
 		Total number of page faults incurred
 
 	  pgmajfault
-
 		Number of major page faults incurred
 
 	  workingset_refault
@@ -997,7 +996,6 @@ PAGE_SIZE multiple when read back.
 		Amount of reclaimed lazyfree pages
 
   memory.swap.current
-
 	A read-only single value file which exists on non-root
 	cgroups.
 
@@ -1005,7 +1003,6 @@ PAGE_SIZE multiple when read back.
 	and its descendants.
 
   memory.swap.max
-
 	A read-write single value file which exists on non-root
 	cgroups.  The default is "max".
 
@@ -1013,7 +1010,8 @@ PAGE_SIZE multiple when read back.
 	limit, anonymous meomry of the cgroup will not be swapped out.
 
 
-5-2-2. Usage Guidelines
+Usage Guidelines
+~~~~~~~~~~~~~~~~
 
 "memory.high" is the main mechanism to control memory usage.
 Over-committing on high limit (sum of high limits > available memory)
@@ -1036,7 +1034,8 @@ memory; unfortunately, memory pressure monitoring mechanism isn't
 implemented yet.
 
 
-5-2-3. Memory Ownership
+Memory Ownership
+~~~~~~~~~~~~~~~~
 
 A memory area is charged to the cgroup which instantiated it and stays
 charged to the cgroup until the area is released.  Migrating a process
@@ -1054,7 +1053,8 @@ POSIX_FADV_DONTNEED to relinquish the ownership of memory areas
 belonging to the affected files to ensure correct memory ownership.
 
 
-5-3. IO
+IO
+--
 
 The "io" controller regulates the distribution of IO resources.  This
 controller implements both weight based and absolute bandwidth or IOPS
@@ -1063,28 +1063,29 @@ only if cfq-iosched is in use and neither scheme is available for
 blk-mq devices.
 
 
-5-3-1. IO Interface Files
+IO Interface Files
+~~~~~~~~~~~~~~~~~~
 
   io.stat
-
 	A read-only nested-keyed file which exists on non-root
 	cgroups.
 
 	Lines are keyed by $MAJ:$MIN device numbers and not ordered.
 	The following nested keys are defined.
 
+	  ======	===================
 	  rbytes	Bytes read
 	  wbytes	Bytes written
 	  rios		Number of read IOs
 	  wios		Number of write IOs
+	  ======	===================
 
-	An example read output follows.
+	An example read output follows:
 
 	  8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353
 	  8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252
 
   io.weight
-
 	A read-write flat-keyed file which exists on non-root cgroups.
 	The default is "default 100".
 
@@ -1098,14 +1099,13 @@ blk-mq devices.
 	$WEIGHT" or simply "$WEIGHT".  Overrides can be set by writing
 	"$MAJ:$MIN $WEIGHT" and unset by writing "$MAJ:$MIN default".
 
-	An example read output follows.
+	An example read output follows::
 
 	  default 100
 	  8:16 200
 	  8:0 50
 
   io.max
-
 	A read-write nested-keyed file which exists on non-root
 	cgroups.
 
@@ -1113,10 +1113,12 @@ blk-mq devices.
 	device numbers and not ordered.  The following nested keys are
 	defined.
 
+	  =====		==================================
 	  rbps		Max read bytes per second
 	  wbps		Max write bytes per second
 	  riops		Max read IO operations per second
 	  wiops		Max write IO operations per second
+	  =====		==================================
 
 	When writing, any number of nested key-value pairs can be
 	specified in any order.  "max" can be specified as the value
@@ -1126,24 +1128,25 @@ blk-mq devices.
 	BPS and IOPS are measured in each IO direction and IOs are
 	delayed if limit is reached.  Temporary bursts are allowed.
 
-	Setting read limit at 2M BPS and write at 120 IOPS for 8:16.
+	Setting read limit at 2M BPS and write at 120 IOPS for 8:16::
 
 	  echo "8:16 rbps=2097152 wiops=120" > io.max
 
-	Reading returns the following.
+	Reading returns the following::
 
 	  8:16 rbps=2097152 wbps=max riops=max wiops=120
 
-	Write IOPS limit can be removed by writing the following.
+	Write IOPS limit can be removed by writing the following::
 
 	  echo "8:16 wiops=max" > io.max
 
-	Reading now returns the following.
+	Reading now returns the following::
 
 	  8:16 rbps=2097152 wbps=max riops=max wiops=max
 
 
-5-3-2. Writeback
+Writeback
+~~~~~~~~~
 
 Page cache is dirtied through buffered writes and shared mmaps and
 written asynchronously to the backing filesystem by the writeback
@@ -1191,22 +1194,19 @@ patterns.
 The sysctl knobs which affect writeback behavior are applied to cgroup
 writeback as follows.
 
-  vm.dirty_background_ratio
-  vm.dirty_ratio
-
+  vm.dirty_background_ratio, vm.dirty_ratio
 	These ratios apply the same to cgroup writeback with the
 	amount of available memory capped by limits imposed by the
 	memory controller and system-wide clean memory.
 
-  vm.dirty_background_bytes
-  vm.dirty_bytes
-
+  vm.dirty_background_bytes, vm.dirty_bytes
 	For cgroup writeback, this is calculated into ratio against
 	total available memory and applied the same way as
 	vm.dirty[_background]_ratio.
 
 
-5-4. PID
+PID
+---
 
 The process number controller is used to allow a cgroup to stop any
 new tasks from being fork()'d or clone()'d after a specified limit is
@@ -1221,17 +1221,16 @@ Note that PIDs used in this controller refer to TIDs, process IDs as
 used by the kernel.
 
 
-5-4-1. PID Interface Files
+PID Interface Files
+~~~~~~~~~~~~~~~~~~~
 
   pids.max
-
 	A read-write single value file which exists on non-root
 	cgroups.  The default is "max".
 
 	Hard limit of number of processes.
 
   pids.current
-
 	A read-only single value file which exists on all cgroups.
 
 	The number of processes currently in the cgroup and its
@@ -1246,12 +1245,14 @@ through fork() or clone(). These will return -EAGAIN if the creation
 of a new process would cause a cgroup policy to be violated.
 
 
-5-5. RDMA
+RDMA
+----
 
 The "rdma" controller regulates the distribution and accounting of
 of RDMA resources.
 
-5-5-1. RDMA Interface Files
+RDMA Interface Files
+~~~~~~~~~~~~~~~~~~~~
 
   rdma.max
 	A readwrite nested-keyed file that exists for all the cgroups
@@ -1264,10 +1265,12 @@ of RDMA resources.
 
 	The following nested keys are defined.
 
+	  ==========	=============================
 	  hca_handle	Maximum number of HCA Handles
 	  hca_object 	Maximum number of HCA Objects
+	  ==========	=============================
 
-	An example for mlx4 and ocrdma device follows.
+	An example for mlx4 and ocrdma device follows::
 
 	  mlx4_0 hca_handle=2 hca_object=2000
 	  ocrdma1 hca_handle=3 hca_object=max
@@ -1276,15 +1279,17 @@ of RDMA resources.
 	A read-only file that describes current resource usage.
 	It exists for all the cgroup except root.
 
-	An example for mlx4 and ocrdma device follows.
+	An example for mlx4 and ocrdma device follows::
 
 	  mlx4_0 hca_handle=1 hca_object=20
 	  ocrdma1 hca_handle=1 hca_object=23
 
 
-5-6. Misc
+Misc
+----
 
-5-6-1. perf_event
+perf_event
+~~~~~~~~~~
 
 perf_event controller, if not mounted on a legacy hierarchy, is
 automatically enabled on the v2 hierarchy so that perf events can
@@ -1292,9 +1297,11 @@ always be filtered by cgroup v2 path.  The controller can still be
 moved to a legacy hierarchy after v2 hierarchy is populated.
 
 
-6. Namespace
+Namespace
+=========
 
-6-1. Basics
+Basics
+------
 
 cgroup namespace provides a mechanism to virtualize the view of the
 "/proc/$PID/cgroup" file and cgroup mounts.  The CLONE_NEWCGROUP clone
@@ -1308,7 +1315,7 @@ Without cgroup namespace, the "/proc/$PID/cgroup" file shows the
 complete path of the cgroup of a process.  In a container setup where
 a set of cgroups and namespaces are intended to isolate processes the
 "/proc/$PID/cgroup" file may leak potential system level information
-to the isolated processes.  For Example:
+to the isolated processes.  For Example::
 
   # cat /proc/self/cgroup
   0::/batchjobs/container_id1
@@ -1316,14 +1323,14 @@ to the isolated processes.  For Example:
 The path '/batchjobs/container_id1' can be considered as system-data
 and undesirable to expose to the isolated processes.  cgroup namespace
 can be used to restrict visibility of this path.  For example, before
-creating a cgroup namespace, one would see:
+creating a cgroup namespace, one would see::
 
   # ls -l /proc/self/ns/cgroup
   lrwxrwxrwx 1 root root 0 2014-07-15 10:37 /proc/self/ns/cgroup -> cgroup:[4026531835]
   # cat /proc/self/cgroup
   0::/batchjobs/container_id1
 
-After unsharing a new namespace, the view changes.
+After unsharing a new namespace, the view changes::
 
   # ls -l /proc/self/ns/cgroup
   lrwxrwxrwx 1 root root 0 2014-07-15 10:35 /proc/self/ns/cgroup -> cgroup:[4026532183]
@@ -1341,7 +1348,8 @@ namespace is destroyed.  The cgroupns root and the actual cgroups
 remain.
 
 
-6-2. The Root and Views
+The Root and Views
+------------------
 
 The 'cgroupns root' for a cgroup namespace is the cgroup in which the
 process calling unshare(2) is running.  For example, if a process in
@@ -1350,7 +1358,7 @@ process calling unshare(2) is running.  For example, if a process in
 init_cgroup_ns, this is the real root ('/') cgroup.
 
 The cgroupns root cgroup does not change even if the namespace creator
-process later moves to a different cgroup.
+process later moves to a different cgroup::
 
   # ~/unshare -c # unshare cgroupns in some cgroup
   # cat /proc/self/cgroup
@@ -1364,7 +1372,7 @@ Each process gets its namespace-specific view of "/proc/$PID/cgroup"
 
 Processes running inside the cgroup namespace will be able to see
 cgroup paths (in /proc/self/cgroup) only inside their root cgroup.
-From within an unshared cgroupns:
+From within an unshared cgroupns::
 
   # sleep 100000 &
   [1] 7353
@@ -1373,7 +1381,7 @@ From within an unshared cgroupns:
   0::/sub_cgrp_1
 
 From the initial cgroup namespace, the real cgroup path will be
-visible:
+visible::
 
   $ cat /proc/7353/cgroup
   0::/batchjobs/container_id1/sub_cgrp_1
@@ -1381,7 +1389,7 @@ visible:
 From a sibling cgroup namespace (that is, a namespace rooted at a
 different cgroup), the cgroup path relative to its own cgroup
 namespace root will be shown.  For instance, if PID 7353's cgroup
-namespace root is at '/batchjobs/container_id2', then it will see
+namespace root is at '/batchjobs/container_id2', then it will see::
 
   # cat /proc/7353/cgroup
   0::/../container_id2/sub_cgrp_1
@@ -1390,13 +1398,14 @@ Note that the relative path always starts with '/' to indicate that
 its relative to the cgroup namespace root of the caller.
 
 
-6-3. Migration and setns(2)
+Migration and setns(2)
+----------------------
 
 Processes inside a cgroup namespace can move into and out of the
 namespace root if they have proper access to external cgroups.  For
 example, from inside a namespace with cgroupns root at
 /batchjobs/container_id1, and assuming that the global hierarchy is
-still accessible inside cgroupns:
+still accessible inside cgroupns::
 
   # cat /proc/7353/cgroup
   0::/sub_cgrp_1
@@ -1418,10 +1427,11 @@ namespace.  It is expected that the someone moves the attaching
 process under the target cgroup namespace root.
 
 
-6-4. Interaction with Other Namespaces
+Interaction with Other Namespaces
+---------------------------------
 
 Namespace specific cgroup hierarchy can be mounted by a process
-running inside a non-init cgroup namespace.
+running inside a non-init cgroup namespace::
 
   # mount -t cgroup2 none $MOUNT_POINT
 
@@ -1434,27 +1444,27 @@ the view of cgroup hierarchy by namespace-private cgroupfs mount
 provides a properly isolated cgroup view inside the container.
 
 
-P. Information on Kernel Programming
+Information on Kernel Programming
+=================================
 
 This section contains kernel programming information in the areas
 where interacting with cgroup is necessary.  cgroup core and
 controllers are not covered.
 
 
-P-1. Filesystem Support for Writeback
+Filesystem Support for Writeback
+--------------------------------
 
 A filesystem can support cgroup writeback by updating
 address_space_operations->writepage[s]() to annotate bio's using the
 following two functions.
 
   wbc_init_bio(@wbc, @bio)
-
 	Should be called for each bio carrying writeback data and
 	associates the bio with the inode's owner cgroup.  Can be
 	called anytime between bio allocation and submission.
 
   wbc_account_io(@wbc, @page, @bytes)
-
 	Should be called for each data segment being written out.
 	While this function doesn't care exactly when it's called
 	during the writeback session, it's the easiest and most
@@ -1475,7 +1485,8 @@ cases by skipping wbc_init_bio() or using bio_associate_blkcg()
 directly.
 
 
-D. Deprecated v1 Core Features
+Deprecated v1 Core Features
+===========================
 
 - Multiple hierarchies including named ones are not supported.
 
@@ -1489,9 +1500,11 @@ D. Deprecated v1 Core Features
   at the root instead.
 
 
-R. Issues with v1 and Rationales for v2
+Issues with v1 and Rationales for v2
+====================================
 
-R-1. Multiple Hierarchies
+Multiple Hierarchies
+--------------------
 
 cgroup v1 allowed an arbitrary number of hierarchies and each
 hierarchy could host any number of controllers.  While this seemed to
@@ -1543,7 +1556,8 @@ how memory is distributed beyond a certain level while still wanting
 to control how CPU cycles are distributed.
 
 
-R-2. Thread Granularity
+Thread Granularity
+------------------
 
 cgroup v1 allowed threads of a process to belong to different cgroups.
 This didn't make sense for some controllers and those controllers
@@ -1586,7 +1600,8 @@ misbehaving and poorly abstracted interfaces and kernel exposing and
 locked into constructs inadvertently.
 
 
-R-3. Competition Between Inner Nodes and Threads
+Competition Between Inner Nodes and Threads
+-------------------------------------------
 
 cgroup v1 allowed threads to be in any cgroups which created an
 interesting problem where threads belonging to a parent cgroup and its
@@ -1605,7 +1620,7 @@ simply weren't available for threads.
 
 The io controller implicitly created a hidden leaf node for each
 cgroup to host the threads.  The hidden leaf had its own copies of all
-the knobs with "leaf_" prefixed.  While this allowed equivalent
+the knobs with ``leaf_`` prefixed.  While this allowed equivalent
 control over internal threads, it was with serious drawbacks.  It
 always added an extra layer of nesting which wouldn't be necessary
 otherwise, made the interface messy and significantly complicated the
@@ -1626,7 +1641,8 @@ This clearly is a problem which needs to be addressed from cgroup core
 in a uniform way.
 
 
-R-4. Other Interface Issues
+Other Interface Issues
+----------------------
 
 cgroup v1 grew without oversight and developed a large number of
 idiosyncrasies and inconsistencies.  One issue on the cgroup core side
@@ -1654,9 +1670,11 @@ cgroup v2 establishes common conventions where appropriate and updates
 controllers so that they expose minimal and consistent interfaces.
 
 
-R-5. Controller Issues and Remedies
+Controller Issues and Remedies
+------------------------------
 
-R-5-1. Memory
+Memory
+~~~~~~
 
 The original lower boundary, the soft limit, is defined as a limit
 that is per default unset.  As a result, the set of cgroups that
-- 
GitLab


From 4297739f2b5d4693d9b9f9e3dffeecf2ae9f8081 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 12 Jul 2017 10:06:20 -0300
Subject: [PATCH 1626/1958] tee.txt: standardize document format

Each text file under Documentation follows a different format. Some
doesn't even have titles!

Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:

- adjust identation of titles;
- mark ascii artwork as a literal block;
- adjust references.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/tee.txt | 53 +++++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/Documentation/tee.txt b/Documentation/tee.txt
index 7185993575964..56ea85ffebf24 100644
--- a/Documentation/tee.txt
+++ b/Documentation/tee.txt
@@ -1,4 +1,7 @@
+=============
 TEE subsystem
+=============
+
 This document describes the TEE subsystem in Linux.
 
 A TEE (Trusted Execution Environment) is a trusted OS running in some
@@ -80,27 +83,27 @@ The GlobalPlatform TEE Client API [5] is implemented on top of the generic
 TEE API.
 
 Picture of the relationship between the different components in the
-OP-TEE architecture.
-
-    User space                  Kernel                   Secure world
-    ~~~~~~~~~~                  ~~~~~~                   ~~~~~~~~~~~~
- +--------+                                             +-------------+
- | Client |                                             | Trusted     |
- +--------+                                             | Application |
-    /\                                                  +-------------+
-    || +----------+                                           /\
-    || |tee-      |                                           ||
-    || |supplicant|                                           \/
-    || +----------+                                     +-------------+
-    \/      /\                                          | TEE Internal|
- +-------+  ||                                          | API         |
- + TEE   |  ||            +--------+--------+           +-------------+
- | Client|  ||            | TEE    | OP-TEE |           | OP-TEE      |
- | API   |  \/            | subsys | driver |           | Trusted OS  |
- +-------+----------------+----+-------+----+-----------+-------------+
- |      Generic TEE API        |       |     OP-TEE MSG               |
- |      IOCTL (TEE_IOC_*)      |       |     SMCCC (OPTEE_SMC_CALL_*) |
- +-----------------------------+       +------------------------------+
+OP-TEE architecture::
+
+      User space                  Kernel                   Secure world
+      ~~~~~~~~~~                  ~~~~~~                   ~~~~~~~~~~~~
+   +--------+                                             +-------------+
+   | Client |                                             | Trusted     |
+   +--------+                                             | Application |
+      /\                                                  +-------------+
+      || +----------+                                           /\
+      || |tee-      |                                           ||
+      || |supplicant|                                           \/
+      || +----------+                                     +-------------+
+      \/      /\                                          | TEE Internal|
+   +-------+  ||                                          | API         |
+   + TEE   |  ||            +--------+--------+           +-------------+
+   | Client|  ||            | TEE    | OP-TEE |           | OP-TEE      |
+   | API   |  \/            | subsys | driver |           | Trusted OS  |
+   +-------+----------------+----+-------+----+-----------+-------------+
+   |      Generic TEE API        |       |     OP-TEE MSG               |
+   |      IOCTL (TEE_IOC_*)      |       |     SMCCC (OPTEE_SMC_CALL_*) |
+   +-----------------------------+       +------------------------------+
 
 RPC (Remote Procedure Call) are requests from secure world to kernel driver
 or tee-supplicant. An RPC is identified by a special range of SMCCC return
@@ -109,10 +112,16 @@ kernel are handled by the kernel driver. Other RPC messages will be forwarded to
 tee-supplicant without further involvement of the driver, except switching
 shared memory buffer representation.
 
-References:
+References
+==========
+
 [1] https://github.com/OP-TEE/optee_os
+
 [2] http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html
+
 [3] drivers/tee/optee/optee_smc.h
+
 [4] drivers/tee/optee/optee_msg.h
+
 [5] http://www.globalplatform.org/specificationsdevice.asp look for
     "TEE Client API Specification v1.0" and click download.
-- 
GitLab


From 43e5f7e1fa66531777c49791014c3124ea9208d8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 12 Jul 2017 10:03:09 -0300
Subject: [PATCH 1627/1958] docs: kprobes.txt: Fix whitespacing

The notes at the end of this file start with a blank space,
instead of a blank line, violating ReST format.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/kprobes.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index bb5ff6d04bac2..2335715bf471a 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -819,7 +819,8 @@ a knob to globally and forcibly turn jump optimization (see section
 is allowed (ON). If you echo "0" to this file or set
 "debug.kprobes_optimization" to 0 via sysctl, all optimized probes will be
 unoptimized, and any new probes registered after that will not be optimized.
- Note that this knob *changes* the optimized state. This means that optimized
+
+Note that this knob *changes* the optimized state. This means that optimized
 probes (marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be
 removed). If the knob is turned on, they will be optimized again.
 
-- 
GitLab


From 1ed134e6526b1b513a14fba938f6d96aa1c7f3dd Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 22 Jun 2017 22:25:26 +0200
Subject: [PATCH 1628/1958] drm/vc4: Fix VBLANK handling in crtc->enable() path

When we are enabling a CRTC, drm_crtc_vblank_get() is called before
drm_crtc_vblank_on(), which is not supposed to happen (hence the
WARN_ON() in the code). To solve the problem, we delay the 'update
display list' operation after the CRTC is actually enabled.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/1498163126-26678-1-git-send-email-boris.brezillon@free-electrons.com
Fixes: 34c8ea400ff6 ("drm/vc4: Mimic drm_atomic_helper_commit() behavior")
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 66 ++++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 403bbd5f99a9f..a12cc7ea99b60 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -520,6 +520,34 @@ static void vc4_crtc_disable(struct drm_crtc *crtc)
 		     SCALER_DISPSTATX_EMPTY);
 }
 
+static void vc4_crtc_update_dlist(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+
+	if (crtc->state->event) {
+		unsigned long flags;
+
+		crtc->state->event->pipe = drm_crtc_index(crtc);
+
+		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		vc4_crtc->event = crtc->state->event;
+		crtc->state->event = NULL;
+
+		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+			  vc4_state->mm.start);
+
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	} else {
+		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+			  vc4_state->mm.start);
+	}
+}
+
 static void vc4_crtc_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -530,6 +558,12 @@ static void vc4_crtc_enable(struct drm_crtc *crtc)
 
 	require_hvs_enabled(dev);
 
+	/* Enable vblank irq handling before crtc is started otherwise
+	 * drm_crtc_get_vblank() fails in vc4_crtc_update_dlist().
+	 */
+	drm_crtc_vblank_on(crtc);
+	vc4_crtc_update_dlist(crtc);
+
 	/* Turn on the scaler, which will wait for vstart to start
 	 * compositing.
 	 */
@@ -541,9 +575,6 @@ static void vc4_crtc_enable(struct drm_crtc *crtc)
 	/* Turn on the pixel valve, which will emit the vstart signal. */
 	CRTC_WRITE(PV_V_CONTROL,
 		   CRTC_READ(PV_V_CONTROL) | PV_VCONTROL_VIDEN);
-
-	/* Enable vblank irq handling after crtc is started. */
-	drm_crtc_vblank_on(crtc);
 }
 
 static bool vc4_crtc_mode_fixup(struct drm_crtc *crtc,
@@ -598,7 +629,6 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 {
 	struct drm_device *dev = crtc->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
 	struct drm_plane *plane;
 	bool debug_dump_regs = false;
@@ -620,25 +650,15 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 
 	WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
 
-	if (crtc->state->event) {
-		unsigned long flags;
-
-		crtc->state->event->pipe = drm_crtc_index(crtc);
-
-		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
-
-		spin_lock_irqsave(&dev->event_lock, flags);
-		vc4_crtc->event = crtc->state->event;
-		crtc->state->event = NULL;
-
-		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
-			  vc4_state->mm.start);
-
-		spin_unlock_irqrestore(&dev->event_lock, flags);
-	} else {
-		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
-			  vc4_state->mm.start);
-	}
+	/* Only update DISPLIST if the CRTC was already running and is not
+	 * being disabled.
+	 * vc4_crtc_enable() takes care of updating the dlist just after
+	 * re-enabling VBLANK interrupts and before enabling the engine.
+	 * If the CRTC is being disabled, there's no point in updating this
+	 * information.
+	 */
+	if (crtc->state->active && old_state->active)
+		vc4_crtc_update_dlist(crtc);
 
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
-- 
GitLab


From e996bfd4287eb7df2354764d47677af08f5867fa Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 17 May 2017 00:20:26 +0200
Subject: [PATCH 1629/1958] ubifs: Unexport ubifs_inode_slab

This SLAB is only being used in super.c, there is no need to expose
it into the global namespace.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/super.c | 2 +-
 fs/ubifs/ubifs.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index cf4cc99b75b55..ce684f367f9c1 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -45,7 +45,7 @@
 #define UBIFS_KMALLOC_OK (128*1024)
 
 /* Slab cache for UBIFS inodes */
-struct kmem_cache *ubifs_inode_slab;
+static struct kmem_cache *ubifs_inode_slab;
 
 /* UBIFS TNC shrinker description */
 static struct shrinker ubifs_shrinker_info = {
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 298b4d89eee98..aa530e82e9a8a 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1451,7 +1451,6 @@ struct ubifs_info {
 extern struct list_head ubifs_infos;
 extern spinlock_t ubifs_infos_lock;
 extern atomic_long_t ubifs_clean_zn_cnt;
-extern struct kmem_cache *ubifs_inode_slab;
 extern const struct super_operations ubifs_super_operations;
 extern const struct address_space_operations ubifs_file_address_operations;
 extern const struct file_operations ubifs_file_operations;
-- 
GitLab


From 272eda8298dc82eb411ece82bbb2c62911087b24 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 17 May 2017 00:20:27 +0200
Subject: [PATCH 1630/1958] ubifs: Correctly evict xattr inodes

UBIFS handles extended attributes just like files, as consequence of
that, they also have inodes.
Therefore UBIFS does all the inode machinery also for xattrs. Since new
inodes have i_nlink of 1, a file or xattr inode will be evicted
if i_nlink goes down to 0 after an unlink. UBIFS assumes this model also
for xattrs, which is not correct.
One can create a file "foo" with xattr "user.test". By reading
"user.test" an inode will be created, and by deleting "user.test" it
will get evicted later. The assumption breaks if the file "foo", which
hosts the xattrs, will be removed. VFS nor UBIFS does not remove each
xattr via ubifs_xattr_remove(), it just removes the host inode from
the TNC and all underlying xattr nodes too and the inode will remain
in the cache and wastes memory.

To solve this problem, remove xattr inodes from the VFS inode cache in
ubifs_xattr_remove() to make sure that they get evicted.

Fixes: 1e51764a3c2ac05a ("UBIFS: add new flash file system")
Cc: <stable@vger.kernel.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/tnc.c   |  2 ++
 fs/ubifs/ubifs.h |  1 +
 fs/ubifs/xattr.c | 22 ++++++++++++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 709aa098dd46e..96374a39ffbae 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2802,6 +2802,8 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
 		dbg_tnc("xent '%s', ino %lu", xent->name,
 			(unsigned long)xattr_inum);
 
+		ubifs_evict_xattr_inode(c, xattr_inum);
+
 		fname_name(&nm) = xent->name;
 		fname_len(&nm) = le16_to_cpu(xent->nlen);
 		err = ubifs_tnc_remove_nm(c, &key1, &nm);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index aa530e82e9a8a..998fb6eea5ac0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1756,6 +1756,7 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
 		    size_t size, int flags);
 ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
 			size_t size);
+void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum);
 
 #ifdef CONFIG_UBIFS_FS_SECURITY
 extern int ubifs_init_security(struct inode *dentry, struct inode *inode,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 6c9e62c2ef559..98f11257d66c0 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -513,6 +513,28 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
 	return err;
 }
 
+/**
+ * ubifs_evict_xattr_inode - Evict an xattr inode.
+ * @c: UBIFS file-system description object
+ * @xattr_inum: xattr inode number
+ *
+ * When an inode that hosts xattrs is being removed we have to make sure
+ * that cached inodes of the xattrs also get removed from the inode cache
+ * otherwise we'd waste memory. This function looks up an inode from the
+ * inode cache and clears the link counter such that iput() will evict
+ * the inode.
+ */
+void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum)
+{
+	struct inode *inode;
+
+	inode = ilookup(c->vfs_sb, xattr_inum);
+	if (inode) {
+		clear_nlink(inode);
+		iput(inode);
+	}
+}
+
 static int ubifs_xattr_remove(struct inode *host, const char *name)
 {
 	struct inode *inode;
-- 
GitLab


From 4d35ca4f774c29ec9ed676fd5112bfc83ef9853e Mon Sep 17 00:00:00 2001
From: Hyunchul Lee <cheol.lee@lge.com>
Date: Wed, 17 May 2017 08:57:18 +0900
Subject: [PATCH 1631/1958] ubifs: Fix inode data budget in ubifs_mknod

Assign inode data budget to budget request correctly.

Signed-off-by: Hyunchul Lee <cheol.lee@lge.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 566079d9b402c..4e4bf8f6320fc 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1061,7 +1061,6 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
 	int sz_change;
 	int err, devlen = 0;
 	struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
-					.new_ino_d = ALIGN(devlen, 8),
 					.dirtied_ino = 1 };
 	struct fscrypt_name nm;
 
@@ -1079,6 +1078,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
 		devlen = ubifs_encode_dev(dev, rdev);
 	}
 
+	req.new_ino_d = ALIGN(devlen, 8);
 	err = ubifs_budget_space(c, &req);
 	if (err) {
 		kfree(dev);
-- 
GitLab


From bb50c63244687706e11c9b636a5ad3090cd783ac Mon Sep 17 00:00:00 2001
From: Hyunchul Lee <cheol.lee@lge.com>
Date: Wed, 17 May 2017 08:58:02 +0900
Subject: [PATCH 1632/1958] ubifs: Fix memory leak in RENAME_WHITEOUT error
 path in do_rename

in RENAME_WHITEOUT error path, fscrypt_name should be freed.

Signed-off-by: Hyunchul Lee <cheol.lee@lge.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/dir.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 4e4bf8f6320fc..996e4215fd289 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1396,17 +1396,14 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 		dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
 		if (!dev) {
-			ubifs_release_budget(c, &req);
-			ubifs_release_budget(c, &ino_req);
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto out_release;
 		}
 
 		err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout);
 		if (err) {
-			ubifs_release_budget(c, &req);
-			ubifs_release_budget(c, &ino_req);
 			kfree(dev);
-			return err;
+			goto out_release;
 		}
 
 		whiteout->i_state |= I_LINKABLE;
@@ -1494,12 +1491,10 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 		err = ubifs_budget_space(c, &wht_req);
 		if (err) {
-			ubifs_release_budget(c, &req);
-			ubifs_release_budget(c, &ino_req);
 			kfree(whiteout_ui->data);
 			whiteout_ui->data_len = 0;
 			iput(whiteout);
-			return err;
+			goto out_release;
 		}
 
 		inc_nlink(whiteout);
@@ -1554,6 +1549,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 		iput(whiteout);
 	}
 	unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+out_release:
 	ubifs_release_budget(c, &ino_req);
 	ubifs_release_budget(c, &req);
 	fscrypt_free_filename(&old_nm);
-- 
GitLab


From f34e87f58dabc31eb69f61cf4a79e951d4176743 Mon Sep 17 00:00:00 2001
From: David Gstir <david@sigma-star.at>
Date: Wed, 17 May 2017 13:36:16 +0200
Subject: [PATCH 1633/1958] ubifs: Don't encrypt special files on creation

When a new inode is created, we check if the containing folder has a encryption
policy set and inherit that. This should however only be done for regular
files, links and subdirectories. Not for sockes fifos etc.

Fixes: d475a507457b ("ubifs: Add skeleton for fscrypto")
Cc: stable@vger.kernel.org
Signed-off-by: David Gstir <david@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/dir.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 996e4215fd289..90a3f01c55d1d 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -143,6 +143,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
 	case S_IFBLK:
 	case S_IFCHR:
 		inode->i_op  = &ubifs_file_inode_operations;
+		encrypted = false;
 		break;
 	default:
 		BUG();
-- 
GitLab


From 59a74990f85bb372c9117e40c6cd27693b6df670 Mon Sep 17 00:00:00 2001
From: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Date: Wed, 17 May 2017 10:36:45 +0200
Subject: [PATCH 1634/1958] ubifs: Fix data node size for truncating
 uncompressed nodes

Currently, the function truncate_data_node only updates the
destination data node size if compression is used. For
uncompressed nodes, the old length is incorrectly retained.

This patch makes sure that the length is correctly set when
compression is disabled.

Fixes: 7799953b34d1 ("ubifs: Implement encrypt/decrypt for all IO")
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/journal.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 294519b988740..f3b620cbdda4c 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1298,7 +1298,9 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in
 			goto out;
 	}
 
-	if (compr_type != UBIFS_COMPR_NONE) {
+	if (compr_type == UBIFS_COMPR_NONE) {
+		out_len = *new_len;
+	} else {
 		err = ubifs_decompress(c, &dn->data, dlen, buf, &out_len, compr_type);
 		if (err)
 			goto out;
-- 
GitLab


From 781f675e2d7ec120e8c0803f88d7bf00fe3f0703 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 17 May 2017 10:36:46 +0200
Subject: [PATCH 1635/1958] ubifs: Fix unlink code wrt. double hash lookups

When removing an encrypted file with a long name and without having
the key we have to be able to locate and remove the directory entry
via a double hash. This corner case was simply forgotten.

Fixes: 528e3d178f25 ("ubifs: Add full hash lookup support")
Reported-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/journal.c |  10 +++-
 fs/ubifs/tnc.c     | 129 +++++++++++++++++++++++++++++++++++++--------
 fs/ubifs/ubifs.h   |   2 +
 3 files changed, 117 insertions(+), 24 deletions(-)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index f3b620cbdda4c..7aef413ea2a94 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -585,7 +585,10 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 
 	if (!xent) {
 		dent->ch.node_type = UBIFS_DENT_NODE;
-		dent_key_init(c, &dent_key, dir->i_ino, nm);
+		if (nm->hash)
+			dent_key_init_hash(c, &dent_key, dir->i_ino, nm->hash);
+		else
+			dent_key_init(c, &dent_key, dir->i_ino, nm);
 	} else {
 		dent->ch.node_type = UBIFS_XENT_NODE;
 		xent_key_init(c, &dent_key, dir->i_ino, nm);
@@ -629,7 +632,10 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 	kfree(dent);
 
 	if (deletion) {
-		err = ubifs_tnc_remove_nm(c, &dent_key, nm);
+		if (nm->hash)
+			err = ubifs_tnc_remove_dh(c, &dent_key, nm->minor_hash);
+		else
+			err = ubifs_tnc_remove_nm(c, &dent_key, nm);
 		if (err)
 			goto out_ro;
 		err = ubifs_add_dirt(c, lnum, dlen);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 96374a39ffbae..79d1f18db4365 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1880,48 +1880,65 @@ int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
 	return do_lookup_nm(c, key, node, nm);
 }
 
-static int do_lookup_dh(struct ubifs_info *c, const union ubifs_key *key,
-			struct ubifs_dent_node *dent, uint32_t cookie)
+static int search_dh_cookie(struct ubifs_info *c, const union ubifs_key *key,
+			    struct ubifs_dent_node *dent, uint32_t cookie,
+			    struct ubifs_znode **zn, int *n)
 {
-	int n, err, type = key_type(c, key);
-	struct ubifs_znode *znode;
+	int err;
+	struct ubifs_znode *znode = *zn;
 	struct ubifs_zbranch *zbr;
-	union ubifs_key *dkey, start_key;
-
-	ubifs_assert(is_hash_key(c, key));
-
-	lowest_dent_key(c, &start_key, key_inum(c, key));
-
-	mutex_lock(&c->tnc_mutex);
-	err = ubifs_lookup_level0(c, &start_key, &znode, &n);
-	if (unlikely(err < 0))
-		goto out_unlock;
+	union ubifs_key *dkey;
 
 	for (;;) {
 		if (!err) {
-			err = tnc_next(c, &znode, &n);
+			err = tnc_next(c, &znode, n);
 			if (err)
-				goto out_unlock;
+				goto out;
 		}
 
-		zbr = &znode->zbranch[n];
+		zbr = &znode->zbranch[*n];
 		dkey = &zbr->key;
 
 		if (key_inum(c, dkey) != key_inum(c, key) ||
-		    key_type(c, dkey) != type) {
+		    key_type(c, dkey) != key_type(c, key)) {
 			err = -ENOENT;
-			goto out_unlock;
+			goto out;
 		}
 
 		err = tnc_read_hashed_node(c, zbr, dent);
 		if (err)
-			goto out_unlock;
+			goto out;
 
 		if (key_hash(c, key) == key_hash(c, dkey) &&
-		    le32_to_cpu(dent->cookie) == cookie)
-			goto out_unlock;
+		    le32_to_cpu(dent->cookie) == cookie) {
+			*zn = znode;
+			goto out;
+		}
 	}
 
+out:
+
+	return err;
+}
+
+static int do_lookup_dh(struct ubifs_info *c, const union ubifs_key *key,
+			struct ubifs_dent_node *dent, uint32_t cookie)
+{
+	int n, err;
+	struct ubifs_znode *znode;
+	union ubifs_key start_key;
+
+	ubifs_assert(is_hash_key(c, key));
+
+	lowest_dent_key(c, &start_key, key_inum(c, key));
+
+	mutex_lock(&c->tnc_mutex);
+	err = ubifs_lookup_level0(c, &start_key, &znode, &n);
+	if (unlikely(err < 0))
+		goto out_unlock;
+
+	err = search_dh_cookie(c, key, dent, cookie, &znode, &n);
+
 out_unlock:
 	mutex_unlock(&c->tnc_mutex);
 	return err;
@@ -2662,6 +2679,74 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
 	return err;
 }
 
+/**
+ * ubifs_tnc_remove_dh - remove an index entry for a "double hashed" node.
+ * @c: UBIFS file-system description object
+ * @key: key of node
+ * @cookie: node cookie for collision resolution
+ *
+ * Returns %0 on success or negative error code on failure.
+ */
+int ubifs_tnc_remove_dh(struct ubifs_info *c, const union ubifs_key *key,
+			uint32_t cookie)
+{
+	int n, err;
+	struct ubifs_znode *znode;
+	struct ubifs_dent_node *dent;
+	struct ubifs_zbranch *zbr;
+
+	if (!c->double_hash)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&c->tnc_mutex);
+	err = lookup_level0_dirty(c, key, &znode, &n);
+	if (err <= 0)
+		goto out_unlock;
+
+	zbr = &znode->zbranch[n];
+	dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
+	if (!dent) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	err = tnc_read_hashed_node(c, zbr, dent);
+	if (err)
+		goto out_free;
+
+	/* If the cookie does not match, we're facing a hash collision. */
+	if (le32_to_cpu(dent->cookie) != cookie) {
+		union ubifs_key start_key;
+
+		lowest_dent_key(c, &start_key, key_inum(c, key));
+
+		err = ubifs_lookup_level0(c, &start_key, &znode, &n);
+		if (unlikely(err < 0))
+			goto out_free;
+
+		err = search_dh_cookie(c, key, dent, cookie, &znode, &n);
+		if (err)
+			goto out_free;
+	}
+
+	if (znode->cnext || !ubifs_zn_dirty(znode)) {
+		znode = dirty_cow_bottom_up(c, znode);
+		if (IS_ERR(znode)) {
+			err = PTR_ERR(znode);
+			goto out_free;
+		}
+	}
+	err = tnc_delete(c, znode, n);
+
+out_free:
+	kfree(dent);
+out_unlock:
+	if (!err)
+		err = dbg_check_tnc(c, 0);
+	mutex_unlock(&c->tnc_mutex);
+	return err;
+}
+
 /**
  * key_in_range - determine if a key falls within a range of keys.
  * @c: UBIFS file-system description object
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 998fb6eea5ac0..d933edade14a9 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1589,6 +1589,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
 int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key);
 int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
 			const struct fscrypt_name *nm);
+int ubifs_tnc_remove_dh(struct ubifs_info *c, const union ubifs_key *key,
+			uint32_t cookie);
 int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
 			   union ubifs_key *to_key);
 int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum);
-- 
GitLab


From 8b2900c01716077a42dbb44c691849ab694fa062 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 17 May 2017 10:36:47 +0200
Subject: [PATCH 1636/1958] ubifs: Add assert to dent_key_init()

...to make sure that we don't use it for double hashed lookups
instead of dent_key_init_hash().

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/key.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 7547be512db26..b1f7c0caa3acb 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -162,6 +162,7 @@ static inline void dent_key_init(const struct ubifs_info *c,
 	uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm));
 
 	ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+	ubifs_assert(!nm->hash && !nm->minor_hash);
 	key->u32[0] = inum;
 	key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
 }
-- 
GitLab


From 35ee314c84cd4b3e3689292f82c0af2af246c343 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 17 May 2017 10:36:48 +0200
Subject: [PATCH 1637/1958] ubifs: Massage debug prints wrt. fscrypt

If file names are encrypted we can no longer print them.
That's why we have to change these prints or remove them completely.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/journal.c | 10 ----------
 fs/ubifs/tnc.c     |  9 ++++-----
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 7aef413ea2a94..419c79ff377e1 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -549,8 +549,6 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 	struct ubifs_ino_node *ino;
 	union ubifs_key dent_key, ino_key;
 
-	//dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu",
-	//	inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino);
 	ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
 
 	dlen = UBIFS_DENT_NODE_SZ + fname_len(nm) + 1;
@@ -956,9 +954,6 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	int twoparents = (fst_dir != snd_dir);
 	void *p;
 
-	//dbg_jnl("dent '%pd' in dir ino %lu between dent '%pd' in dir ino %lu",
-	//	fst_dentry, fst_dir->i_ino, snd_dentry, snd_dir->i_ino);
-
 	ubifs_assert(ubifs_inode(fst_dir)->data_len == 0);
 	ubifs_assert(ubifs_inode(snd_dir)->data_len == 0);
 	ubifs_assert(mutex_is_locked(&ubifs_inode(fst_dir)->ui_mutex));
@@ -1100,8 +1095,6 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	int move = (old_dir != new_dir);
 	struct ubifs_inode *uninitialized_var(new_ui);
 
-	//dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu",
-	//	old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino);
 	ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
 	ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
 	ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
@@ -1493,9 +1486,6 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 	int sync = IS_DIRSYNC(host);
 	struct ubifs_inode *host_ui = ubifs_inode(host);
 
-	//dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d",
-	//	host->i_ino, inode->i_ino, nm->name,
-	//	ubifs_inode(inode)->data_len);
 	ubifs_assert(inode->i_nlink == 0);
 	ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
 
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 79d1f18db4365..0a213dcba2a1f 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1812,7 +1812,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
 	int found, n, err;
 	struct ubifs_znode *znode;
 
-	//dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name);
+	dbg_tnck(key, "key ");
 	mutex_lock(&c->tnc_mutex);
 	found = ubifs_lookup_level0(c, key, &znode, &n);
 	if (!found) {
@@ -2410,8 +2410,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
 	struct ubifs_znode *znode;
 
 	mutex_lock(&c->tnc_mutex);
-	//dbg_tnck(key, "LEB %d:%d, name '%.*s', key ",
-	//	 lnum, offs, nm->len, nm->name);
+	dbg_tnck(key, "LEB %d:%d, key ", lnum, offs);
 	found = lookup_level0_dirty(c, key, &znode, &n);
 	if (found < 0) {
 		err = found;
@@ -2645,7 +2644,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
 	struct ubifs_znode *znode;
 
 	mutex_lock(&c->tnc_mutex);
-	//dbg_tnck(key, "%.*s, key ", nm->len, nm->name);
+	dbg_tnck(key, "key ");
 	err = lookup_level0_dirty(c, key, &znode, &n);
 	if (err < 0)
 		goto out_unlock;
@@ -2950,7 +2949,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
 	struct ubifs_zbranch *zbr;
 	union ubifs_key *dkey;
 
-	//dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)");
+	dbg_tnck(key, "key ");
 	ubifs_assert(is_hash_key(c, key));
 
 	mutex_lock(&c->tnc_mutex);
-- 
GitLab


From d2eb85226fcd981767d1829b09ae1070cf695b98 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 17 May 2017 10:36:49 +0200
Subject: [PATCH 1638/1958] ubifs: Remove dead code from ubifs_get_link()

We check the length already, no need to check later
again for an empty string.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/file.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index c1d352842ee2e..c58efc1470f3f 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1697,12 +1697,6 @@ static const char *ubifs_get_link(struct dentry *dentry,
 
 	pstr.name[pstr.len] = '\0';
 
-	// XXX this probably won't happen anymore...
-	if (pstr.name[0] == '\0') {
-		fscrypt_fname_free_buffer(&pstr);
-		return ERR_PTR(-ENOENT);
-	}
-
 	set_delayed_call(done, kfree_link, pstr.name);
 	return pstr.name;
 }
-- 
GitLab


From a02a6eba9999221b27c691330e93bb54e4b45bb0 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 21 May 2017 00:16:26 +0200
Subject: [PATCH 1639/1958] ubifs: Wire-up statx() support

statx() can report what flags a file has, expose flags that UBIFS
supports. Especially STATX_ATTR_COMPRESSED and STATX_ATTR_ENCRYPTED
can be interesting for userspace.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/dir.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 90a3f01c55d1d..417fe0b29f23e 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1644,6 +1644,21 @@ int ubifs_getattr(const struct path *path, struct kstat *stat,
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
 	mutex_lock(&ui->ui_mutex);
+
+	if (ui->flags & UBIFS_APPEND_FL)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (ui->flags & UBIFS_COMPR_FL)
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+	if (ui->flags & UBIFS_CRYPT_FL)
+		stat->attributes |= STATX_ATTR_ENCRYPTED;
+	if (ui->flags & UBIFS_IMMUTABLE_FL)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+
+	stat->attributes_mask |= (STATX_ATTR_APPEND |
+				STATX_ATTR_COMPRESSED |
+				STATX_ATTR_ENCRYPTED |
+				STATX_ATTR_IMMUTABLE);
+
 	generic_fillattr(inode, stat);
 	stat->blksize = UBIFS_BLOCK_SIZE;
 	stat->size = ui->ui_size;
-- 
GitLab


From 319c10427498620d220931189dcdba7d4244feba Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Wed, 31 May 2017 11:40:27 +0200
Subject: [PATCH 1640/1958] ubifs: allow userspace to map mounts to volumes

There currently appears to be no way for userspace to find out the
underlying volume number for a mounted ubifs file system, since ubifs
uses anonymous block devices.  The volume name is present in
/proc/mounts but UBI volumes can be renamed after the volume has been
mounted.

To remedy this, show the UBI number and UBI volume number as part of the
options visible under /proc/mounts.

Also, accept and ignore the ubi= vol= options if they are used mounting
(patch from Richard Weinberger).

 # mount -t ubifs ubi:baz x
 # mount
 ubi:baz on /root/x type ubifs (rw,relatime,ubi=0,vol=2)
 # ubirename /dev/ubi0 baz bazz
 # mount
 ubi:baz on /root/x type ubifs (rw,relatime,ubi=0,vol=2)
 # ubinfo -d 0 -n 2
 Volume ID:   2 (on ubi0)
 Type:        dynamic
 Alignment:   1
 Size:        67 LEBs (1063424 bytes, 1.0 MiB)
 State:       OK
 Name:        bazz
 Character device major/minor: 254:3

Signed-off-by: Rabin Vincent <rabinv@axis.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/super.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ce684f367f9c1..845c5d7af9091 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -446,6 +446,8 @@ static int ubifs_show_options(struct seq_file *s, struct dentry *root)
 			   ubifs_compr_name(c->mount_opts.compr_type));
 	}
 
+	seq_printf(s, ",ubi=%d,vol=%d", c->vi.ubi_num, c->vi.vol_id);
+
 	return 0;
 }
 
@@ -931,6 +933,7 @@ enum {
 	Opt_chk_data_crc,
 	Opt_no_chk_data_crc,
 	Opt_override_compr,
+	Opt_ignore,
 	Opt_err,
 };
 
@@ -942,6 +945,8 @@ static const match_table_t tokens = {
 	{Opt_chk_data_crc, "chk_data_crc"},
 	{Opt_no_chk_data_crc, "no_chk_data_crc"},
 	{Opt_override_compr, "compr=%s"},
+	{Opt_ignore, "ubi=%s"},
+	{Opt_ignore, "vol=%s"},
 	{Opt_err, NULL},
 };
 
@@ -1042,6 +1047,8 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
 			c->default_compr = c->mount_opts.compr_type;
 			break;
 		}
+		case Opt_ignore:
+			break;
 		default:
 		{
 			unsigned long flag;
-- 
GitLab


From df71b09145b66e4cf6f7a1ec69d181bf2ccb0efd Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 7 Jun 2017 23:33:35 +0200
Subject: [PATCH 1641/1958] ubifs: Fail commit if TNC is obviously inconsistent

A reference to LEB 0 or with length 0 in the TNC
is never correct and could be caused by a memory corruption.
Don't write such a bad index node to the MTD.
Instead fail the commit which will turn UBIFS into read-only mode.

This is less painful than having the bad reference on the MTD
from where UBFIS has no chance to recover.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/tnc_commit.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 51157da3f76ed..aa31f60220ef4 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -57,6 +57,8 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
 			ubifs_dump_znode(c, znode);
 			if (zbr->znode)
 				ubifs_dump_znode(c, zbr->znode);
+
+			return -EINVAL;
 		}
 	}
 	ubifs_prepare_node(c, idx, len, 0);
@@ -859,6 +861,8 @@ static int write_index(struct ubifs_info *c)
 				ubifs_dump_znode(c, znode);
 				if (zbr->znode)
 					ubifs_dump_znode(c, zbr->znode);
+
+				return -EINVAL;
 			}
 		}
 		len = ubifs_idx_node_sz(c, znode->child_cnt);
-- 
GitLab


From 07d41c3cf254a58b7ab69beb1f08a85ffce02626 Mon Sep 17 00:00:00 2001
From: "karam.lee" <karam.lee@lge.com>
Date: Mon, 12 Jun 2017 10:46:31 +0900
Subject: [PATCH 1642/1958] ubifs: Fix oops when remounting with no_bulk_read.

When remounting with the no_bulk_read option,
there is a problem accessing the "bulk_read buffer(bu.buf)"
which has already been freed.

If the bulk_read option is enabled,
ubifs_tnc_bulk_read uses the pre-allocated bu.buf.

While bu.buf is being used by ubifs_tnc_bulk_read,
remounting with no_bulk_read frees bu.buf.

So I added code to check the use of "bu.buf" to avoid this situation.

------
I tested as follows(kernel v3.18) :

Use the script to repeat "no_bulk_read <-> bulk_read"
	remount.sh
	#!/bin/sh
	while true do;
		mount -o remount,no_bulk_read ${MOUNT_POINT};
		sleep 1;
		mount -o remount,bulk_read ${MOUNT_POINT};
		sleep 1;
	done

Perform read operation
	cat ${MOUNT_POINT}/* > /dev/null

The problem is reproduced immediately.

[  234.256845][kernel.0]Internal error: Oops: 17 [#1] PREEMPT ARM
[  234.258557][kernel.0]CPU: 0 PID: 2752 Comm: cat Tainted: G        W  O   3.18.31+ #51
[  234.259531][kernel.0]task: cbff8580 ti: cbd66000 task.ti: cbd66000
[  234.260306][kernel.0]PC is at validate_data_node+0x10/0x264
[  234.260994][kernel.0]LR is at ubifs_tnc_bulk_read+0x388/0x3ec
[  234.261712][kernel.0]pc : [<c01d98fc>]    lr : [<c01dc300>]    psr: 80000013
[  234.261712][kernel.0]sp : cbd67ba0  ip : 00000001  fp : 00000000
[  234.263337][kernel.0]r10: cd3e0260  r9 : c0df2008  r8 : 00000000
[  234.264087][kernel.0]r7 : cd3e0000  r6 : 00000000  r5 : cd3e0278  r4 : cd3e0000
[  234.264999][kernel.0]r3 : 00000003  r2 : cd3e0280  r1 : 00000000  r0 : cd3e0000
[  234.265910][kernel.0]Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
[  234.266896][kernel.0]Control: 10c53c7d  Table: 8c40c059  DAC: 00000015
[  234.267711][kernel.0]Process cat (pid: 2752, stack limit = 0xcbd66400)
[  234.268525][kernel.0]Stack: (0xcbd67ba0 to 0xcbd68000)
[  234.269169][kernel.0]7ba0: cd7c3940 c03d8650 0001bfe0 00002ab2 00000000 cbd67c5c cbd67c58 0001bfe0
[  234.270287][kernel.0]7bc0: cd3e0000 00002ab2 0001bfe0 00000014 cbd66000 cd3e0260 00000000 c01d6660
[  234.271403][kernel.0]7be0: 00002ab2 00000000 c82a5800 ffffffff cd3e0298 cd3e0278 00000000 cd3e0000
[  234.272520][kernel.0]7c00: 00000000 00000000 cd3e0260 c01dc300 00002ab2 00000000 60000013 d663affa
[  234.273639][kernel.0]7c20: cd3e01f0 cd3e01f0 60000013 c09397ec 00000000 cd3e0278 00002ab2 00000000
[  234.274755][kernel.0]7c40: cd3e0000 c01dbf48 00000014 00000003 00000160 00000015 00000004 d663affa
[  234.275874][kernel.0]7c60: ccdaa978 cd3e0278 cd3e0000 cf32a5f4 ccdaa820 00000044 cbd66000 cd3e0260
[  234.276992][kernel.0]7c80: 00000003 c01cec84 ccdaa8dc cbd67cc4 cbd67ec0 00000010 ccdaa978 00000000
[  234.278108][kernel.0]7ca0: 0000015e ccdaa8dc 00000000 00000000 cf32a5d0 00000000 0000015f ccdaa8dc
[  234.279228][kernel.0]7cc0: 00000000 c8488300 0009e5a4 0000000e cbd66000 0000015e cf32a5f4 c0113c04
[  234.280346][kernel.0]7ce0: 0000009f 0000003c c00098c4 ffffffff 00001000 00000000 000000ad 00000010
[  234.281463][kernel.0]7d00: 00000038 cd68f580 00000150 c8488360 00000000 cbd67d30 cbd67d70 0000000e
[  234.282579][kernel.0]7d20: 00000010 00000000 c0951874 c0112a9c cf379b60 cf379b84 cf379890 cf3798b4
[  234.283699][kernel.0]7d40: cf379578 cf37959c cf379380 cf3793a4 cf3790b0 cf3790d4 cf378fd8 cf378ffc
[  234.284814][kernel.0]7d60: cf378f48 cf378f6c cf32a5f4 cf32a5d0 00000000 00001000 00000018 00000000
[  234.285932][kernel.0]7d80: 00001000 c0050da4 00000000 00001000 cec04c00 00000000 00001000 c0e11328
[  234.287049][kernel.0]7da0: 00000000 00001000 cbd66000 00000000 00001000 c0012a60 00000000 00001000
[  234.288166][kernel.0]7dc0: cbd67dd4 00000000 00001000 80000013 00000000 00001000 cd68f580 00000000
[  234.289285][kernel.0]7de0: 00001000 c915d600 00000000 00001000 cbd67e48 00000000 00001000 00000018
[  234.290402][kernel.0]7e00: 00000000 00001000 00000000 00000000 00001000 c915d768 c915d768 c0113550
[  234.291522][kernel.0]7e20: cd68f580 cbd67e48 cd68f580 cb6713c0 00010000 000ac5a4 00000000 001fc5a4
[  234.292637][kernel.0]7e40: 00000000 c8488300 cbd67ec0 00eb0000 cd68f580 c0113ee4 00000000 cbd67ec0
[  234.293754][kernel.0]7e60: cd68f580 c8488300 cbd67ec0 00eb0000 cd68f580 00150000 c8488300 00eb0000
[  234.294874][kernel.0]7e80: 00010000 c0112fd0 00000000 cbd67ec0 cd68f580 00150000 00000000 cd68f580
[  234.295991][kernel.0]7ea0: cbd67ef0 c011308c 00000000 00000002 cd768850 00010000 00000000 c01133fc
[  234.297110][kernel.0]7ec0: 00150000 00000000 cbd67f50 00000000 00000000 cb6713c0 01000000 cbd67f48
[  234.298226][kernel.0]7ee0: cbd67f50 c8488300 00000000 c0113204 00010000 01000000 00000000 cb6713c0
[  234.299342][kernel.0]7f00: 00150000 00000000 cbd67f50 00000000 00000000 00000000 00000000 00000000
[  234.300462][kernel.0]7f20: cbd67f50 01000000 01000000 cb6713c0 c8488300 c00ebba8 01000000 00000000
[  234.301577][kernel.0]7f40: c8488300 cb6713c0 00000000 00000000 00000000 00000000 ccdaa820 00000000
[  234.302697][kernel.0]7f60: 00000000 01000000 00000003 00000001 cbd66000 00000000 00000001 c00ec678
[  234.303813][kernel.0]7f80: 00000000 00000200 00000000 01000000 01000000 00000000 00000000 000000ef
[  234.304933][kernel.0]7fa0: c000e904 c000e780 01000000 00000000 00000001 00000003 00000000 01000000
[  234.306049][kernel.0]7fc0: 01000000 00000000 00000000 000000ef 00000001 00000003 01000000 00000001
[  234.307165][kernel.0]7fe0: 00000000 beafb78c 0000ad08 00128d1c 60000010 00000001 00000000 00000000
[  234.308292][kernel.0][<c01d98fc>] (validate_data_node) from [<c01dc300>] (ubifs_tnc_bulk_read+0x388/0x3ec)
[  234.309493][kernel.0][<c01dc300>] (ubifs_tnc_bulk_read) from [<c01cec84>] (ubifs_readpage+0x1dc/0x46c)
[  234.310656][kernel.0][<c01cec84>] (ubifs_readpage) from [<c0113c04>] (__generic_file_splice_read+0x29c/0x4cc)
[  234.311890][kernel.0][<c0113c04>] (__generic_file_splice_read) from [<c0113ee4>] (generic_file_splice_read+0xb0/0xf4)
[  234.313214][kernel.0][<c0113ee4>] (generic_file_splice_read) from [<c0112fd0>] (do_splice_to+0x68/0x7c)
[  234.314386][kernel.0][<c0112fd0>] (do_splice_to) from [<c011308c>] (splice_direct_to_actor+0xa8/0x190)
[  234.315544][kernel.0][<c011308c>] (splice_direct_to_actor) from [<c0113204>] (do_splice_direct+0x90/0xb8)
[  234.316741][kernel.0][<c0113204>] (do_splice_direct) from [<c00ebba8>] (do_sendfile+0x17c/0x2b8)
[  234.317838][kernel.0][<c00ebba8>] (do_sendfile) from [<c00ec678>] (SyS_sendfile64+0xc4/0xcc)
[  234.318890][kernel.0][<c00ec678>] (SyS_sendfile64) from [<c000e780>] (ret_fast_syscall+0x0/0x38)
[  234.319983][kernel.0]Code: e92d47f0 e24dd050 e59f9228 e1a04000 (e5d18014)

Signed-off-by: karam.lee <karam.lee@lge.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 845c5d7af9091..bffadbb67e475 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1876,8 +1876,10 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
 		bu_init(c);
 	else {
 		dbg_gen("disable bulk-read");
+		mutex_lock(&c->bu_mutex);
 		kfree(c->bu.buf);
 		c->bu.buf = NULL;
+		mutex_unlock(&c->bu_mutex);
 	}
 
 	ubifs_assert(c->lst.taken_empty_lebs > 0);
-- 
GitLab


From 480a1a6a3ef6fb6be4cd2f37b34314fbf64867dd Mon Sep 17 00:00:00 2001
From: Hyunchul Lee <cheol.lee@lge.com>
Date: Wed, 14 Jun 2017 09:31:49 +0900
Subject: [PATCH 1643/1958] ubifs: Change gfp flags in page allocation for bulk
 read

In low memory situations, page allocations for bulk read
can kill applications for reclaiming memory, and print an
failure message when allocations are failed.
Because bulk read is just an optimization, we don't have
to do these and can stop page allocations.

Though this siutation happens rarely, add __GFP_NORETRY
to prevent from excessive memory reclaim and killing
applications, and __GFP_WARN to suppress this failure
message.

For this, Use readahead_gfp_mask for gfp flags when
allocating pages.

Signed-off-by: Hyunchul Lee <cheol.lee@lge.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index c58efc1470f3f..8cad0b19b404c 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -735,6 +735,7 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
 	int err, page_idx, page_cnt, ret = 0, n = 0;
 	int allocate = bu->buf ? 0 : 1;
 	loff_t isize;
+	gfp_t ra_gfp_mask = readahead_gfp_mask(mapping) & ~__GFP_FS;
 
 	err = ubifs_tnc_get_bu_keys(c, bu);
 	if (err)
@@ -796,8 +797,7 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
 
 		if (page_offset > end_index)
 			break;
-		page = find_or_create_page(mapping, page_offset,
-					   GFP_NOFS | __GFP_COLD);
+		page = find_or_create_page(mapping, page_offset, ra_gfp_mask);
 		if (!page)
 			break;
 		if (!PageUptodate(page))
-- 
GitLab


From 4acadda74ff8b949c448c0282765ae747e088c87 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Fri, 16 Jun 2017 16:21:44 +0200
Subject: [PATCH 1644/1958] ubifs: Don't leak kernel memory to the MTD

When UBIFS prepares data structures which will be written to the MTD it
ensues that their lengths are multiple of 8. Since it uses kmalloc() the
padded bytes are left uninitialized and we leak a few bytes of kernel
memory to the MTD.
To make sure that all bytes are initialized, let's switch to kzalloc().
Kzalloc() is fine in this case because the buffers are not huge and in
the IO path the performance bottleneck is anyway the MTD.

Cc: stable@vger.kernel.org
Fixes: 1e51764a3c2a ("UBIFS: add new flash file system")
Signed-off-by: Richard Weinberger <richard@nod.at>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/journal.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 419c79ff377e1..9f356432f35f9 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -572,7 +572,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 	/* Make sure to also account for extended attributes */
 	len += host_ui->data_len;
 
-	dent = kmalloc(len, GFP_NOFS);
+	dent = kzalloc(len, GFP_NOFS);
 	if (!dent)
 		return -ENOMEM;
 
@@ -968,7 +968,7 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	if (twoparents)
 		len += plen;
 
-	dent1 = kmalloc(len, GFP_NOFS);
+	dent1 = kzalloc(len, GFP_NOFS);
 	if (!dent1)
 		return -ENOMEM;
 
@@ -1116,7 +1116,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
 	if (move)
 		len += plen;
-	dent = kmalloc(len, GFP_NOFS);
+	dent = kzalloc(len, GFP_NOFS);
 	if (!dent)
 		return -ENOMEM;
 
@@ -1498,7 +1498,7 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 	hlen = host_ui->data_len + UBIFS_INO_NODE_SZ;
 	len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8);
 
-	xent = kmalloc(len, GFP_NOFS);
+	xent = kzalloc(len, GFP_NOFS);
 	if (!xent)
 		return -ENOMEM;
 
@@ -1605,7 +1605,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
 	aligned_len1 = ALIGN(len1, 8);
 	aligned_len = aligned_len1 + ALIGN(len2, 8);
 
-	ino = kmalloc(aligned_len, GFP_NOFS);
+	ino = kzalloc(aligned_len, GFP_NOFS);
 	if (!ino)
 		return -ENOMEM;
 
-- 
GitLab


From d8db5b1ca9d4c57e49893d0f78e6d5ce81450cc8 Mon Sep 17 00:00:00 2001
From: Xiaolei Li <xiaolei.li@mediatek.com>
Date: Fri, 23 Jun 2017 10:37:23 +0800
Subject: [PATCH 1645/1958] ubifs: Massage assert in ubifs_xattr_set() wrt.
 init_xattrs

The inode is not locked in init_xattrs when creating a new inode.

Without this patch, there will occurs assert when booting or creating
a new file, if the kernel config CONFIG_SECURITY_SMACK is enabled.

Log likes:

UBIFS assert failed in ubifs_xattr_set at 298 (pid 1156)
CPU: 1 PID: 1156 Comm: ldconfig Tainted: G S 4.12.0-rc1-207440-g1e70b02 #2
Hardware name: MediaTek MT2712 evaluation board (DT)
Call trace:
[<ffff000008088538>] dump_backtrace+0x0/0x238
[<ffff000008088834>] show_stack+0x14/0x20
[<ffff0000083d98d4>] dump_stack+0x9c/0xc0
[<ffff00000835d524>] ubifs_xattr_set+0x374/0x5e0
[<ffff00000835d7ec>] init_xattrs+0x5c/0xb8
[<ffff000008385788>] security_inode_init_security+0x110/0x190
[<ffff00000835e058>] ubifs_init_security+0x30/0x68
[<ffff00000833ada0>] ubifs_mkdir+0x100/0x200
[<ffff00000820669c>] vfs_mkdir+0x11c/0x1b8
[<ffff00000820b73c>] SyS_mkdirat+0x74/0xd0
[<ffff000008082f8c>] __sys_trace_return+0x0/0x4

Signed-off-by: Xiaolei Li <xiaolei.li@mediatek.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/crypto.c |  7 ++++++-
 fs/ubifs/ubifs.h  |  2 +-
 fs/ubifs/xattr.c  | 17 ++++++++---------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 382ed428cfd21..114ba455bac3d 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -9,8 +9,13 @@ static int ubifs_crypt_get_context(struct inode *inode, void *ctx, size_t len)
 static int ubifs_crypt_set_context(struct inode *inode, const void *ctx,
 				   size_t len, void *fs_data)
 {
+	/*
+	 * Creating an encryption context is done unlocked since we
+	 * operate on a new inode which is not visible to other users
+	 * at this point. So, no need to check whether inode is locked.
+	 */
 	return ubifs_xattr_set(inode, UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT,
-			       ctx, len, 0);
+			       ctx, len, 0, false);
 }
 
 static bool ubifs_crypt_empty_dir(struct inode *inode)
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d933edade14a9..cd43651f17314 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1755,7 +1755,7 @@ int ubifs_check_dir_empty(struct inode *dir);
 extern const struct xattr_handler *ubifs_xattr_handlers[];
 ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
-		    size_t size, int flags);
+		    size_t size, int flags, bool check_lock);
 ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
 			size_t size);
 void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 98f11257d66c0..c13eae819cbc8 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -280,7 +280,7 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum)
 }
 
 int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
-		    size_t size, int flags)
+		    size_t size, int flags, bool check_lock)
 {
 	struct inode *inode;
 	struct ubifs_info *c = host->i_sb->s_fs_info;
@@ -289,12 +289,7 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
 	union ubifs_key key;
 	int err;
 
-	/*
-	 * Creating an encryption context is done unlocked since we
-	 * operate on a new inode which is not visible to other users
-	 * at this point.
-	 */
-	if (strcmp(name, UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT) != 0)
+	if (check_lock)
 		ubifs_assert(inode_is_locked(host));
 
 	if (size > UBIFS_MAX_INO_DATA)
@@ -598,8 +593,12 @@ static int init_xattrs(struct inode *inode, const struct xattr *xattr_array,
 		}
 		strcpy(name, XATTR_SECURITY_PREFIX);
 		strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
+		/*
+		 * creating a new inode without holding the inode rwsem,
+		 * no need to check whether inode is locked.
+		 */
 		err = ubifs_xattr_set(inode, name, xattr->value,
-				      xattr->value_len, 0);
+				      xattr->value_len, 0, false);
 		kfree(name);
 		if (err < 0)
 			break;
@@ -646,7 +645,7 @@ static int xattr_set(const struct xattr_handler *handler,
 	name = xattr_full_name(handler, name);
 
 	if (value)
-		return ubifs_xattr_set(inode, name, value, size, flags);
+		return ubifs_xattr_set(inode, name, value, size, flags, true);
 	else
 		return ubifs_xattr_remove(inode, name);
 }
-- 
GitLab


From a6664433d383eeb71cbdeb9aea2c66eeea76e742 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 26 Jun 2017 13:49:04 +0200
Subject: [PATCH 1646/1958] ubifs: Set double hash cookie also for
 RENAME_EXCHANGE

We developed RENAME_EXCHANGE and UBIFS_FLG_DOUBLE_HASH more or less in
parallel and this case was forgotten. :-(

Cc: stable@vger.kernel.org
Fixes: d63d61c16972 ("ubifs: Implement UBIFS_FLG_DOUBLE_HASH")
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/journal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 9f356432f35f9..04c4ec6483e52 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -985,6 +985,7 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	dent1->nlen = cpu_to_le16(fname_len(snd_nm));
 	memcpy(dent1->name, fname_name(snd_nm), fname_len(snd_nm));
 	dent1->name[fname_len(snd_nm)] = '\0';
+	set_dent_cookie(c, dent1);
 	zero_dent_node_unused(dent1);
 	ubifs_prep_grp_node(c, dent1, dlen1, 0);
 
@@ -997,6 +998,7 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	dent2->nlen = cpu_to_le16(fname_len(fst_nm));
 	memcpy(dent2->name, fname_name(fst_nm), fname_len(fst_nm));
 	dent2->name[fname_len(fst_nm)] = '\0';
+	set_dent_cookie(c, dent2);
 	zero_dent_node_unused(dent2);
 	ubifs_prep_grp_node(c, dent2, dlen2, 0);
 
-- 
GitLab


From 37511fb5c91db93d8bd6e3f52f86e5a7ff7cfcdf Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 14 Jul 2017 14:49:38 -0700
Subject: [PATCH 1647/1958] mm: fix overflow check in expand_upwards()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

JÃ¶rn Engel noticed that the expand_upwards() function might not return
-ENOMEM in case the requested address is (unsigned long)-PAGE_SIZE and
if the architecture didn't defined TASK_SIZE as multiple of PAGE_SIZE.

Affected architectures are arm, frv, m68k, blackfin, h8300 and xtensa
which all define TASK_SIZE as 0xffffffff, but since none of those have
an upwards-growing stack we currently have no actual issue.

Nevertheless let's fix this just in case any of the architectures with
an upward-growing stack (currently parisc, metag and partly ia64) define
TASK_SIZE similar.

Link: http://lkml.kernel.org/r/20170702192452.GA11868@p100.box
Fixes: bd726c90b6b8 ("Allow stack to grow up to address space limit")
Signed-off-by: Helge Deller <deller@gmx.de>
Reported-by: Jörn Engel <joern@purestorage.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 7fa6759322d1f..f19efcf754187 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2231,7 +2231,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 
 	/* Guard against exceeding limits of the address space. */
 	address &= PAGE_MASK;
-	if (address >= TASK_SIZE)
+	if (address >= (TASK_SIZE & PAGE_MASK))
 		return -ENOMEM;
 	address += PAGE_SIZE;
 
-- 
GitLab


From ffba19ccae8d98beb0a17345a0b1ee9e415b23b8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 14 Jul 2017 14:49:41 -0700
Subject: [PATCH 1648/1958] lib/atomic64_test.c: add a test that
 atomic64_inc_not_zero() returns an int

atomic64_inc_not_zero() returns a "truth value" which in C is
traditionally an int.  That means callers are likely to expect the
result will fit in an int.

If an implementation returns a "true" value which does not fit in an
int, then there's a possibility that callers will truncate it when they
store it in an int.

In fact this happened in practice, see commit 966d2b04e070
("percpu-refcount: fix reference leak during percpu-atomic transition").

So add a test that the result fits in an int, even when the input
doesn't.  This catches the case where an implementation just passes the
non-zero input value out as the result.

Link: http://lkml.kernel.org/r/1499775133-1231-1-git-send-email-mpe@ellerman.id.au
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Douglas Miller <dougmill@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/atomic64_test.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index fd70c0e0e6731..62ab629f51ca2 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -153,8 +153,10 @@ static __init void test_atomic64(void)
 	long long v0 = 0xaaa31337c001d00dLL;
 	long long v1 = 0xdeadbeefdeafcafeLL;
 	long long v2 = 0xfaceabadf00df001LL;
+	long long v3 = 0x8000000000000000LL;
 	long long onestwos = 0x1111111122222222LL;
 	long long one = 1LL;
+	int r_int;
 
 	atomic64_t v = ATOMIC64_INIT(v0);
 	long long r = v0;
@@ -240,6 +242,11 @@ static __init void test_atomic64(void)
 	BUG_ON(!atomic64_inc_not_zero(&v));
 	r += one;
 	BUG_ON(v.counter != r);
+
+	/* Confirm the return value fits in an int, even if the value doesn't */
+	INIT(v3);
+	r_int = atomic64_inc_not_zero(&v);
+	BUG_ON(!r_int);
 }
 
 static __init int test_atomics_init(void)
-- 
GitLab


From 5624a8b00cb206390d465e0cb5a1e5eedd23a15c Mon Sep 17 00:00:00 2001
From: Luis de Bethencourt <luisbg@kernel.org>
Date: Fri, 14 Jul 2017 14:49:44 -0700
Subject: [PATCH 1649/1958] MAINTAINERS: move the befs tree to kernel.org

Update the location of the befs git tree and my email address.

Link: http://lkml.kernel.org/r/20170709110012.2991-1-luisbg@kernel.org
Signed-off-by: Luis de Bethencourt <luisbg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7d9bd4a041aff..30dde8cd401ed 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2516,10 +2516,10 @@ S:	Supported
 F:	drivers/media/platform/sti/delta
 
 BEFS FILE SYSTEM
-M:	Luis de Bethencourt <luisbg@osg.samsung.com>
+M:	Luis de Bethencourt <luisbg@kernel.org>
 M:	Salah Triki <salah.triki@gmail.com>
 S:	Maintained
-T:	git git://github.com/luisbg/linux-befs.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/luisbg/linux-befs.git
 F:	Documentation/filesystems/befs.txt
 F:	fs/befs/
 
-- 
GitLab


From 5f92a7b0fcd627fbd06ceb1cee3bbe5d08d13356 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 14 Jul 2017 14:49:46 -0700
Subject: [PATCH 1650/1958] kernel/watchdog.c: use better pr_fmt prefix

After commit 73ce0511c436 ("kernel/watchdog.c: move hardlockup
detector to separate file"), 'NMI watchdog' is inappropriate in
kernel/watchdog.c, using 'watchdog' only.

Link: http://lkml.kernel.org/r/1499928642-48983-1-git-send-email-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Babu Moger <babu.moger@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index cabe3e9fb620f..06d3389bca0df 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -9,7 +9,7 @@
  * to those contributors as well.
  */
 
-#define pr_fmt(fmt) "NMI watchdog: " fmt
+#define pr_fmt(fmt) "watchdog: " fmt
 
 #include <linux/mm.h>
 #include <linux/cpu.h>
-- 
GitLab


From ecaad81ca0dfaa5f6ab7a5a9bc16a10816e2bd27 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 14 Jul 2017 14:49:49 -0700
Subject: [PATCH 1651/1958] fault-inject: automatically detect the number base
 for fail-nth write interface

Automatically detect the number base to use when writing to fail-nth
file instead of always parsing as a decimal number.

Link: http://lkml.kernel.org/r/1491490561-10485-2-git-send-email-akinobu.mita@gmail.com
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 88b773f318cd0..5d93512beea11 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1368,7 +1368,7 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
 	put_task_struct(task);
 	if (task != current)
 		return -EPERM;
-	err = kstrtoint_from_user(buf, count, 10, &n);
+	err = kstrtoint_from_user(buf, count, 0, &n);
 	if (err)
 		return err;
 	if (n < 0 || n == INT_MAX)
-- 
GitLab


From 9049f2f6e7bdfb5de0c63c2635bf3cdb70c4efb5 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 14 Jul 2017 14:49:52 -0700
Subject: [PATCH 1652/1958] fault-inject: parse as natural 1-based value for
 fail-nth write interface

The value written to fail-nth file is parsed as 0-based.  Parsing as
one-based is more natural to understand and it enables to cancel the
previous setup by simply writing '0'.

This change also converts task->fail_nth from signed to unsigned int.

Link: http://lkml.kernel.org/r/1491490561-10485-3-git-send-email-akinobu.mita@gmail.com
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/fault-injection/fault-injection.txt | 7 +++----
 fs/proc/base.c                                    | 9 ++++-----
 include/linux/sched.h                             | 2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 192d8cbcc5f99..a32190508751c 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -138,8 +138,8 @@ o proc entries
 
 - /proc/self/task/<current-tid>/fail-nth:
 
-	Write to this file of integer N makes N-th call in the current task fail
-	(N is 0-based). Read from this file returns a single char 'Y' or 'N'
+	Write to this file of integer N makes N-th call in the task fail.
+	Read from this file returns a single char 'Y' or 'N'
 	that says if the fault setup with a previous write to this file was
 	injected or not, and disables the fault if it wasn't yet injected.
 	Note that this file enables all types of faults (slab, futex, etc).
@@ -320,7 +320,7 @@ int main()
 	system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
 	sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
 	fail_nth = open(buf, O_RDWR);
-	for (i = 0;; i++) {
+	for (i = 1;; i++) {
 		sprintf(buf, "%d", i);
 		write(fail_nth, buf, strlen(buf));
 		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
@@ -339,7 +339,6 @@ int main()
 
 An example output:
 
-0-th fault Y: res=-1/23
 1-th fault Y: res=-1/23
 2-th fault Y: res=-1/23
 3-th fault Y: res=-1/12
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5d93512beea11..c1fdaecb8d23c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1360,7 +1360,8 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
 				   size_t count, loff_t *ppos)
 {
 	struct task_struct *task;
-	int err, n;
+	int err;
+	unsigned int n;
 
 	task = get_proc_task(file_inode(file));
 	if (!task)
@@ -1368,12 +1369,10 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
 	put_task_struct(task);
 	if (task != current)
 		return -EPERM;
-	err = kstrtoint_from_user(buf, count, 0, &n);
+	err = kstrtouint_from_user(buf, count, 0, &n);
 	if (err)
 		return err;
-	if (n < 0 || n == INT_MAX)
-		return -EINVAL;
-	current->fail_nth = n + 1;
+	current->fail_nth = n;
 	return count;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3822d749fc9ee..2ba9ec93423f9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -974,7 +974,7 @@ struct task_struct {
 
 #ifdef CONFIG_FAULT_INJECTION
 	int				make_it_fail;
-	int fail_nth;
+	unsigned int			fail_nth;
 #endif
 	/*
 	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
-- 
GitLab


From bfc740938d151001cb1158580796f8f3be3bf0c1 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 14 Jul 2017 14:49:54 -0700
Subject: [PATCH 1653/1958] fault-inject: make fail-nth read/write interface
 symmetric

The read interface for fail-nth looks a bit odd.  Read from this file
returns "NYYYY..." or "YYYYY..." (this makes me surprise when cat this
file).  Because there is no EOF condition.  The first character
indicates current->fail_nth is zero or not, and then current->fail_nth
is reset to zero.

Just returning task->fail_nth value is more natural to understand.

Link: http://lkml.kernel.org/r/1491490561-10485-4-git-send-email-akinobu.mita@gmail.com
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/fault-injection/fault-injection.txt | 13 +++++++------
 fs/proc/base.c                                    | 14 ++++++--------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index a32190508751c..370ddcbc2b441 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -139,9 +139,9 @@ o proc entries
 - /proc/self/task/<current-tid>/fail-nth:
 
 	Write to this file of integer N makes N-th call in the task fail.
-	Read from this file returns a single char 'Y' or 'N'
-	that says if the fault setup with a previous write to this file was
-	injected or not, and disables the fault if it wasn't yet injected.
+	Read from this file returns a integer value. A value of '0' indicates
+	that the fault setup with a previous write to this file was injected.
+	A positive integer N indicates that the fault wasn't yet injected.
 	Note that this file enables all types of faults (slab, futex, etc).
 	This setting takes precedence over all other generic debugfs settings
 	like probability, interval, times, etc. But per-capability settings
@@ -325,13 +325,14 @@ int main()
 		write(fail_nth, buf, strlen(buf));
 		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
 		err = errno;
-		read(fail_nth, buf, 1);
+		pread(fail_nth, buf, sizeof(buf), 0);
 		if (res == 0) {
 			close(fds[0]);
 			close(fds[1]);
 		}
-		printf("%d-th fault %c: res=%d/%d\n", i, buf[0], res, err);
-		if (buf[0] != 'Y')
+		printf("%d-th fault %c: res=%d/%d\n", i, atoi(buf) ? 'N' : 'Y',
+			res, err);
+		if (atoi(buf))
 			break;
 	}
 	return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c1fdaecb8d23c..7d795d28dd029 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1380,7 +1380,8 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
 				  size_t count, loff_t *ppos)
 {
 	struct task_struct *task;
-	int err;
+	char numbuf[PROC_NUMBUF];
+	ssize_t len;
 
 	task = get_proc_task(file_inode(file));
 	if (!task)
@@ -1388,13 +1389,10 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
 	put_task_struct(task);
 	if (task != current)
 		return -EPERM;
-	if (count < 1)
-		return -EINVAL;
-	err = put_user((char)(current->fail_nth ? 'N' : 'Y'), buf);
-	if (err)
-		return err;
-	current->fail_nth = 0;
-	return 1;
+	len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
+	len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
+
+	return len;
 }
 
 static const struct file_operations proc_fail_nth_operations = {
-- 
GitLab


From 1203c8e6fb0aa1e9c39d2323607a74c3adc34fd8 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 14 Jul 2017 14:49:57 -0700
Subject: [PATCH 1654/1958] fault-inject: simplify access check for fail-nth

The fail-nth file is created with 0666 and the access is permitted if
and only if the task is current.

This file is owned by the currnet user.  So we can create it with 0644
and allow the owner to write it.  This enables to watch the status of
task->fail_nth from another processes.

[akinobu.mita@gmail.com: don't convert unsigned type value as signed int]
  Link: http://lkml.kernel.org/r/1492444483-9239-1-git-send-email-akinobu.mita@gmail.com
[akinobu.mita@gmail.com: avoid unwanted data race to task->fail_nth]
  Link: http://lkml.kernel.org/r/1499962492-8931-1-git-send-email-akinobu.mita@gmail.com
Link: http://lkml.kernel.org/r/1491490561-10485-5-git-send-email-akinobu.mita@gmail.com
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c     | 25 ++++++++++---------------
 lib/fault-inject.c |  7 +++++--
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7d795d28dd029..872a3f28bfe41 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1363,16 +1363,16 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
 	int err;
 	unsigned int n;
 
+	err = kstrtouint_from_user(buf, count, 0, &n);
+	if (err)
+		return err;
+
 	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
+	WRITE_ONCE(task->fail_nth, n);
 	put_task_struct(task);
-	if (task != current)
-		return -EPERM;
-	err = kstrtouint_from_user(buf, count, 0, &n);
-	if (err)
-		return err;
-	current->fail_nth = n;
+
 	return count;
 }
 
@@ -1386,11 +1386,10 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
 	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
-	put_task_struct(task);
-	if (task != current)
-		return -EPERM;
-	len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
+	len = snprintf(numbuf, sizeof(numbuf), "%u\n",
+			READ_ONCE(task->fail_nth));
 	len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
+	put_task_struct(task);
 
 	return len;
 }
@@ -3355,11 +3354,7 @@ static const struct pid_entry tid_base_stuff[] = {
 #endif
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
-	/*
-	 * Operations on the file check that the task is current,
-	 * so we create it with 0666 to support testing under unprivileged user.
-	 */
-	REG("fail-nth", 0666, proc_fail_nth_operations),
+	REG("fail-nth", 0644, proc_fail_nth_operations),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	ONE("io",	S_IRUSR, proc_tid_io_accounting),
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 09ac73c177fd5..7d315fdb9f13d 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -107,9 +107,12 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
 
 bool should_fail(struct fault_attr *attr, ssize_t size)
 {
-	if (in_task() && current->fail_nth) {
-		if (--current->fail_nth == 0)
+	if (in_task()) {
+		unsigned int fail_nth = READ_ONCE(current->fail_nth);
+
+		if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1))
 			goto fail;
+
 		return false;
 	}
 
-- 
GitLab


From 168c42bc56d8d47f67f2a5206506cd4ba3c18475 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 14 Jul 2017 14:50:00 -0700
Subject: [PATCH 1655/1958] fault-inject: add /proc/<pid>/fail-nth

fail-nth interface is only created in /proc/self/task/<current-tid>/.
This change also adds it in /proc/<pid>/.

This makes shell based tool a bit simpler.

	$ bash -c "builtin echo 100 > /proc/self/fail-nth && exec ls /"

Link: http://lkml.kernel.org/r/1491490561-10485-6-git-send-email-akinobu.mita@gmail.com
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/fault-injection/fault-injection.txt | 3 ++-
 fs/proc/base.c                                    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 370ddcbc2b441..918972babcd8f 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -136,7 +136,8 @@ use the boot option:
 
 o proc entries
 
-- /proc/self/task/<current-tid>/fail-nth:
+- /proc/<pid>/fail-nth:
+- /proc/self/task/<tid>/fail-nth:
 
 	Write to this file of integer N makes N-th call in the task fail.
 	Read from this file returns a integer value. A value of '0' indicates
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 872a3f28bfe41..719c2e943ea10 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2962,6 +2962,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #endif
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
+	REG("fail-nth", 0644, proc_fail_nth_operations),
 #endif
 #ifdef CONFIG_ELF_CORE
 	REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
-- 
GitLab


From 20cf0c54e641071d90c4043021217165f1499056 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 14 Jul 2017 14:50:03 -0700
Subject: [PATCH 1656/1958] xtensa: use generic fb.h

The arch uses a verbatim copy of the asm-generic version and does not
add any own implementations to the header, so use asm-generic/fb.h
instead of duplicating code.

Link: http://lkml.kernel.org/r/20170517083545.2115-1-tklauser@distanz.ch
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/xtensa/include/asm/Kbuild |  1 +
 arch/xtensa/include/asm/fb.h   | 12 ------------
 2 files changed, 1 insertion(+), 12 deletions(-)
 delete mode 100644 arch/xtensa/include/asm/fb.h

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index c04efde775a59..2d716ebc5a5e9 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += dma-contiguous.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
+generic-y += fb.h
 generic-y += hardirq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
diff --git a/arch/xtensa/include/asm/fb.h b/arch/xtensa/include/asm/fb.h
deleted file mode 100644
index c7df380309920..0000000000000
--- a/arch/xtensa/include/asm/fb.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/fb.h>
-
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
-- 
GitLab


From 062b87406d0d73b3894b562dc3067d9ea760bd3e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 14 Jul 2017 14:50:05 -0700
Subject: [PATCH 1657/1958] MAINTAINERS: give kmod some maintainer love

As suggested by Jessica, I've been actively working on kmod, so might as
well reflect its maintained status.

Changes are expected to go through akpm's tree.

Link: http://lkml.kernel.org/r/20170628223155.26472-2-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Suggested-by: Jessica Yu <jeyu@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michal Marek <mmarek@suse.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 30dde8cd401ed..c917d41614278 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7554,6 +7554,13 @@ F:	include/linux/kmemleak.h
 F:	mm/kmemleak.c
 F:	mm/kmemleak-test.c
 
+KMOD MODULE USERMODE HELPER
+M:	"Luis R. Rodriguez" <mcgrof@kernel.org>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	kernel/kmod.c
+F:	include/linux/kmod.h
+
 KPROBES
 M:	Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
-- 
GitLab


From d9c6a72d6fa29d3a7999dda726577e5d1fccafa5 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 14 Jul 2017 14:50:08 -0700
Subject: [PATCH 1658/1958] kmod: add test driver to stress test the module
 loader

This adds a new stress test driver for kmod: the kernel module loader.
The new stress test driver, test_kmod, is only enabled as a module right
now.  It should be possible to load this as built-in and load tests
early (refer to the force_init_test module parameter), however since a
lot of test can get a system out of memory fast we leave this disabled
for now.

Using a system with 1024 MiB of RAM can *easily* get your kernel OOM
fast with this test driver.

The test_kmod driver exposes API knobs for us to fine tune simple
request_module() and get_fs_type() calls.  Since these API calls only
allow each one parameter a test driver for these is rather simple.
Other factors that can help out test driver though are the number of
calls we issue and knowing current limitations of each.  This exposes
configuration as much as possible through userspace to be able to build
tests directly from userspace.

Since it allows multiple misc devices its will eventually (once we add a
knob to let us create new devices at will) also be possible to perform
more tests in parallel, provided you have enough memory.

We only enable tests we know work as of right now.

Demo screenshots:

 # tools/testing/selftests/kmod/kmod.sh
kmod_test_0001_driver: OK! - loading kmod test
kmod_test_0001_driver: OK! - Return value: 256 (MODULE_NOT_FOUND), expected MODULE_NOT_FOUND
kmod_test_0001_fs: OK! - loading kmod test
kmod_test_0001_fs: OK! - Return value: -22 (-EINVAL), expected -EINVAL
kmod_test_0002_driver: OK! - loading kmod test
kmod_test_0002_driver: OK! - Return value: 256 (MODULE_NOT_FOUND), expected MODULE_NOT_FOUND
kmod_test_0002_fs: OK! - loading kmod test
kmod_test_0002_fs: OK! - Return value: -22 (-EINVAL), expected -EINVAL
kmod_test_0003: OK! - loading kmod test
kmod_test_0003: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0004: OK! - loading kmod test
kmod_test_0004: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0005: OK! - loading kmod test
kmod_test_0005: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0006: OK! - loading kmod test
kmod_test_0006: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0005: OK! - loading kmod test
kmod_test_0005: OK! - Return value: 0 (SUCCESS), expected SUCCESS
kmod_test_0006: OK! - loading kmod test
kmod_test_0006: OK! - Return value: 0 (SUCCESS), expected SUCCESS
XXX: add test restult for 0007
Test completed

You can also request for specific tests:

 # tools/testing/selftests/kmod/kmod.sh -t 0001
kmod_test_0001_driver: OK! - loading kmod test
kmod_test_0001_driver: OK! - Return value: 256 (MODULE_NOT_FOUND), expected MODULE_NOT_FOUND
kmod_test_0001_fs: OK! - loading kmod test
kmod_test_0001_fs: OK! - Return value: -22 (-EINVAL), expected -EINVAL
Test completed

Lastly, the current available number of tests:

 # tools/testing/selftests/kmod/kmod.sh --help
Usage: tools/testing/selftests/kmod/kmod.sh [ -t <4-number-digit> ]
Valid tests: 0001-0009

0001 - Simple test - 1 thread  for empty string
0002 - Simple test - 1 thread  for modules/filesystems that do not exist
0003 - Simple test - 1 thread  for get_fs_type() only
0004 - Simple test - 2 threads for get_fs_type() only
0005 - multithreaded tests with default setup - request_module() only
0006 - multithreaded tests with default setup - get_fs_type() only
0007 - multithreaded tests with default setup test request_module() and get_fs_type()
0008 - multithreaded - push kmod_concurrent over max_modprobes for request_module()
0009 - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()

The following test cases currently fail, as such they are not currently
enabled by default:

 # tools/testing/selftests/kmod/kmod.sh -t 0008
 # tools/testing/selftests/kmod/kmod.sh -t 0009

To be sure to run them as intended please unload both of the modules:

  o test_module
  o xfs

And ensure they are not loaded on your system prior to testing them.  If
you use these paritions for your rootfs you can change the default test
driver used for get_fs_type() by exporting it into your environment.  For
example of other test defaults you can override refer to kmod.sh
allow_user_defaults().

Behind the scenes this is how we fine tune at a test case prior to
hitting a trigger to run it:

cat /sys/devices/virtual/misc/test_kmod0/config
echo -n "2" > /sys/devices/virtual/misc/test_kmod0/config_test_case
echo -n "ext4" > /sys/devices/virtual/misc/test_kmod0/config_test_fs
echo -n "80" > /sys/devices/virtual/misc/test_kmod0/config_num_threads
cat /sys/devices/virtual/misc/test_kmod0/config
echo -n "1" > /sys/devices/virtual/misc/test_kmod0/config_num_threads

Finally to trigger:

echo -n "1" > /sys/devices/virtual/misc/test_kmod0/trigger_config

The kmod.sh script uses the above constructs to build different test cases.

A bit of interpretation of the current failures follows, first two
premises:

a) When request_module() is used userspace figures out an optimized
   version of module order for us.  Once it finds the modules it needs, as
   per depmod symbol dep map, it will finit_module() the respective
   modules which are needed for the original request_module() request.

b) We have an optimization in place whereby if a kernel uses
   request_module() on a module already loaded we never bother userspace
   as the module already is loaded.  This is all handled by kernel/kmod.c.

A few things to consider to help identify root causes of issues:

0) kmod 19 has a broken heuristic for modules being assumed to be
   built-in to your kernel and will return 0 even though request_module()
   failed.  Upgrade to a newer version of kmod.

1) A get_fs_type() call for "xfs" will request_module() for "fs-xfs",
   not for "xfs".  The optimization in kernel described in b) fails to
   catch if we have a lot of consecutive get_fs_type() calls.  The reason
   is the optimization in place does not look for aliases.  This means two
   consecutive get_fs_type() calls will bump kmod_concurrent, whereas
   request_module() will not.

This one explanation why test case 0009 fails at least once for
get_fs_type().

2) If a module fails to load --- for whatever reason (kmod_concurrent
   limit reached, file not yet present due to rootfs switch, out of
   memory) we have a period of time during which module request for the
   same name either with request_module() or get_fs_type() will *also*
   fail to load even if the file for the module is ready.

This explains why *multiple* NULLs are possible on test 0009.

3) finit_module() consumes quite a bit of memory.

4) Filesystems typically also have more dependent modules than other
   modules, its important to note though that even though a get_fs_type()
   call does not incur additional kmod_concurrent bumps, since userspace
   loads dependencies it finds it needs via finit_module_fd(), it *will*
   take much more memory to load a module with a lot of dependencies.

Because of 3) and 4) we will easily run into out of memory failures with
certain tests.  For instance test 0006 fails on qemu with 1024 MiB of RAM.
It panics a box after reaping all userspace processes and still not
having enough memory to reap.

[arnd@arndb.de: add dependencies for test module]
  Link: http://lkml.kernel.org/r/20170630154834.3689272-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/20170628223155.26472-3-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michal Marek <mmarek@suse.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS                           |    2 +
 lib/Kconfig.debug                     |   27 +
 lib/Makefile                          |    1 +
 lib/test_kmod.c                       | 1246 +++++++++++++++++++++++++
 tools/testing/selftests/kmod/Makefile |   11 +
 tools/testing/selftests/kmod/config   |    7 +
 tools/testing/selftests/kmod/kmod.sh  |  635 +++++++++++++
 7 files changed, 1929 insertions(+)
 create mode 100644 lib/test_kmod.c
 create mode 100644 tools/testing/selftests/kmod/Makefile
 create mode 100644 tools/testing/selftests/kmod/config
 create mode 100644 tools/testing/selftests/kmod/kmod.sh

diff --git a/MAINTAINERS b/MAINTAINERS
index c917d41614278..0876084a97da7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7560,6 +7560,8 @@ L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	kernel/kmod.c
 F:	include/linux/kmod.h
+F:	lib/test_kmod.c
+F:	tools/testing/selftests/kmod/
 
 KPROBES
 M:	Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0d01c6d4e03a..789c6e9e5e011 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1847,6 +1847,33 @@ config BUG_ON_DATA_CORRUPTION
 
 	  If unsure, say N.
 
+config TEST_KMOD
+	tristate "kmod stress tester"
+	default n
+	depends on m
+	depends on BLOCK && (64BIT || LBDAF)	  # for XFS, BTRFS
+	depends on NETDEVICES && NET_CORE && INET # for TUN
+	select TEST_LKM
+	select XFS_FS
+	select TUN
+	select BTRFS_FS
+	help
+	  Test the kernel's module loading mechanism: kmod. kmod implements
+	  support to load modules using the Linux kernel's usermode helper.
+	  This test provides a series of tests against kmod.
+
+	  Although technically you can either build test_kmod as a module or
+	  into the kernel we disallow building it into the kernel since
+	  it stress tests request_module() and this will very likely cause
+	  some issues by taking over precious threads available from other
+	  module load requests, ultimately this could be fatal.
+
+	  To run tests run:
+
+	  tools/testing/selftests/kmod/kmod.sh --help
+
+	  If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/Makefile b/lib/Makefile
index 85e91e51a9fed..40c18372b301e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
+obj-$(CONFIG_TEST_KMOD) += test_kmod.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
new file mode 100644
index 0000000000000..6c1d678bcf8b0
--- /dev/null
+++ b/lib/test_kmod.c
@@ -0,0 +1,1246 @@
+/*
+ * kmod stress test driver
+ *
+ * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or at your option any
+ * later version; or, when distributed separately from the Linux kernel or
+ * when incorporated into other software packages, subject to the following
+ * license:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of copyleft-next (version 0.3.1 or later) as published
+ * at http://copyleft-next.org/.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+/*
+ * This driver provides an interface to trigger and test the kernel's
+ * module loader through a series of configurations and a few triggers.
+ * To test this driver use the following script as root:
+ *
+ * tools/testing/selftests/kmod/kmod.sh --help
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/printk.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#define TEST_START_NUM_THREADS	50
+#define TEST_START_DRIVER	"test_module"
+#define TEST_START_TEST_FS	"xfs"
+#define TEST_START_TEST_CASE	TEST_KMOD_DRIVER
+
+
+static bool force_init_test = false;
+module_param(force_init_test, bool_enable_only, 0644);
+MODULE_PARM_DESC(force_init_test,
+		 "Force kicking a test immediately after driver loads");
+
+/*
+ * For device allocation / registration
+ */
+static DEFINE_MUTEX(reg_dev_mutex);
+static LIST_HEAD(reg_test_devs);
+
+/*
+ * num_test_devs actually represents the *next* ID of the next
+ * device we will allow to create.
+ */
+static int num_test_devs;
+
+/**
+ * enum kmod_test_case - linker table test case
+ *
+ * If you add a  test case, please be sure to review if you need to se
+ * @need_mod_put for your tests case.
+ *
+ * @TEST_KMOD_DRIVER: stress tests request_module()
+ * @TEST_KMOD_FS_TYPE: stress tests get_fs_type()
+ */
+enum kmod_test_case {
+	__TEST_KMOD_INVALID = 0,
+
+	TEST_KMOD_DRIVER,
+	TEST_KMOD_FS_TYPE,
+
+	__TEST_KMOD_MAX,
+};
+
+struct test_config {
+	char *test_driver;
+	char *test_fs;
+	unsigned int num_threads;
+	enum kmod_test_case test_case;
+	int test_result;
+};
+
+struct kmod_test_device;
+
+/**
+ * kmod_test_device_info - thread info
+ *
+ * @ret_sync: return value if request_module() is used, sync request for
+ * 	@TEST_KMOD_DRIVER
+ * @fs_sync: return value of get_fs_type() for @TEST_KMOD_FS_TYPE
+ * @thread_idx: thread ID
+ * @test_dev: test device test is being performed under
+ * @need_mod_put: Some tests (get_fs_type() is one) requires putting the module
+ *	(module_put(fs_sync->owner)) when done, otherwise you will not be able
+ *	to unload the respective modules and re-test. We use this to keep
+ *	accounting of when we need this and to help out in case we need to
+ *	error out and deal with module_put() on error.
+ */
+struct kmod_test_device_info {
+	int ret_sync;
+	struct file_system_type *fs_sync;
+	struct task_struct *task_sync;
+	unsigned int thread_idx;
+	struct kmod_test_device *test_dev;
+	bool need_mod_put;
+};
+
+/**
+ * kmod_test_device - test device to help test kmod
+ *
+ * @dev_idx: unique ID for test device
+ * @config: configuration for the test
+ * @misc_dev: we use a misc device under the hood
+ * @dev: pointer to misc_dev's own struct device
+ * @config_mutex: protects configuration of test
+ * @trigger_mutex: the test trigger can only be fired once at a time
+ * @thread_lock: protects @done count, and the @info per each thread
+ * @done: number of threads which have completed or failed
+ * @test_is_oom: when we run out of memory, use this to halt moving forward
+ * @kthreads_done: completion used to signal when all work is done
+ * @list: needed to be part of the reg_test_devs
+ * @info: array of info for each thread
+ */
+struct kmod_test_device {
+	int dev_idx;
+	struct test_config config;
+	struct miscdevice misc_dev;
+	struct device *dev;
+	struct mutex config_mutex;
+	struct mutex trigger_mutex;
+	struct mutex thread_mutex;
+
+	unsigned int done;
+
+	bool test_is_oom;
+	struct completion kthreads_done;
+	struct list_head list;
+
+	struct kmod_test_device_info *info;
+};
+
+static const char *test_case_str(enum kmod_test_case test_case)
+{
+	switch (test_case) {
+	case TEST_KMOD_DRIVER:
+		return "TEST_KMOD_DRIVER";
+	case TEST_KMOD_FS_TYPE:
+		return "TEST_KMOD_FS_TYPE";
+	default:
+		return "invalid";
+	}
+}
+
+static struct miscdevice *dev_to_misc_dev(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static struct kmod_test_device *misc_dev_to_test_dev(struct miscdevice *misc_dev)
+{
+	return container_of(misc_dev, struct kmod_test_device, misc_dev);
+}
+
+static struct kmod_test_device *dev_to_test_dev(struct device *dev)
+{
+	struct miscdevice *misc_dev;
+
+	misc_dev = dev_to_misc_dev(dev);
+
+	return misc_dev_to_test_dev(misc_dev);
+}
+
+/* Must run with thread_mutex held */
+static void kmod_test_done_check(struct kmod_test_device *test_dev,
+				 unsigned int idx)
+{
+	struct test_config *config = &test_dev->config;
+
+	test_dev->done++;
+	dev_dbg(test_dev->dev, "Done thread count: %u\n", test_dev->done);
+
+	if (test_dev->done == config->num_threads) {
+		dev_info(test_dev->dev, "Done: %u threads have all run now\n",
+			 test_dev->done);
+		dev_info(test_dev->dev, "Last thread to run: %u\n", idx);
+		complete(&test_dev->kthreads_done);
+	}
+}
+
+static void test_kmod_put_module(struct kmod_test_device_info *info)
+{
+	struct kmod_test_device *test_dev = info->test_dev;
+	struct test_config *config = &test_dev->config;
+
+	if (!info->need_mod_put)
+		return;
+
+	switch (config->test_case) {
+	case TEST_KMOD_DRIVER:
+		break;
+	case TEST_KMOD_FS_TYPE:
+		if (info && info->fs_sync && info->fs_sync->owner)
+			module_put(info->fs_sync->owner);
+		break;
+	default:
+		BUG();
+	}
+
+	info->need_mod_put = true;
+}
+
+static int run_request(void *data)
+{
+	struct kmod_test_device_info *info = data;
+	struct kmod_test_device *test_dev = info->test_dev;
+	struct test_config *config = &test_dev->config;
+
+	switch (config->test_case) {
+	case TEST_KMOD_DRIVER:
+		info->ret_sync = request_module("%s", config->test_driver);
+		break;
+	case TEST_KMOD_FS_TYPE:
+		info->fs_sync = get_fs_type(config->test_fs);
+		info->need_mod_put = true;
+		break;
+	default:
+		/* __trigger_config_run() already checked for test sanity */
+		BUG();
+		return -EINVAL;
+	}
+
+	dev_dbg(test_dev->dev, "Ran thread %u\n", info->thread_idx);
+
+	test_kmod_put_module(info);
+
+	mutex_lock(&test_dev->thread_mutex);
+	info->task_sync = NULL;
+	kmod_test_done_check(test_dev, info->thread_idx);
+	mutex_unlock(&test_dev->thread_mutex);
+
+	return 0;
+}
+
+static int tally_work_test(struct kmod_test_device_info *info)
+{
+	struct kmod_test_device *test_dev = info->test_dev;
+	struct test_config *config = &test_dev->config;
+	int err_ret = 0;
+
+	switch (config->test_case) {
+	case TEST_KMOD_DRIVER:
+		/*
+		 * Only capture errors, if one is found that's
+		 * enough, for now.
+		 */
+		if (info->ret_sync != 0)
+			err_ret = info->ret_sync;
+		dev_info(test_dev->dev,
+			 "Sync thread %d return status: %d\n",
+			 info->thread_idx, info->ret_sync);
+		break;
+	case TEST_KMOD_FS_TYPE:
+		/* For now we make this simple */
+		if (!info->fs_sync)
+			err_ret = -EINVAL;
+		dev_info(test_dev->dev, "Sync thread %u fs: %s\n",
+			 info->thread_idx, info->fs_sync ? config->test_fs :
+			 "NULL");
+		break;
+	default:
+		BUG();
+	}
+
+	return err_ret;
+}
+
+/*
+ * XXX: add result option to display if all errors did not match.
+ * For now we just keep any error code if one was found.
+ *
+ * If this ran it means *all* tasks were created fine and we
+ * are now just collecting results.
+ *
+ * Only propagate errors, do not override with a subsequent sucess case.
+ */
+static void tally_up_work(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+	struct kmod_test_device_info *info;
+	unsigned int idx;
+	int err_ret = 0;
+	int ret = 0;
+
+	mutex_lock(&test_dev->thread_mutex);
+
+	dev_info(test_dev->dev, "Results:\n");
+
+	for (idx=0; idx < config->num_threads; idx++) {
+		info = &test_dev->info[idx];
+		ret = tally_work_test(info);
+		if (ret)
+			err_ret = ret;
+	}
+
+	/*
+	 * Note: request_module() returns 256 for a module not found even
+	 * though modprobe itself returns 1.
+	 */
+	config->test_result = err_ret;
+
+	mutex_unlock(&test_dev->thread_mutex);
+}
+
+static int try_one_request(struct kmod_test_device *test_dev, unsigned int idx)
+{
+	struct kmod_test_device_info *info = &test_dev->info[idx];
+	int fail_ret = -ENOMEM;
+
+	mutex_lock(&test_dev->thread_mutex);
+
+	info->thread_idx = idx;
+	info->test_dev = test_dev;
+	info->task_sync = kthread_run(run_request, info, "%s-%u",
+				      KBUILD_MODNAME, idx);
+
+	if (!info->task_sync || IS_ERR(info->task_sync)) {
+		test_dev->test_is_oom = true;
+		dev_err(test_dev->dev, "Setting up thread %u failed\n", idx);
+		info->task_sync = NULL;
+		goto err_out;
+	} else
+		dev_dbg(test_dev->dev, "Kicked off thread %u\n", idx);
+
+	mutex_unlock(&test_dev->thread_mutex);
+
+	return 0;
+
+err_out:
+	info->ret_sync = fail_ret;
+	mutex_unlock(&test_dev->thread_mutex);
+
+	return fail_ret;
+}
+
+static void test_dev_kmod_stop_tests(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+	struct kmod_test_device_info *info;
+	unsigned int i;
+
+	dev_info(test_dev->dev, "Ending request_module() tests\n");
+
+	mutex_lock(&test_dev->thread_mutex);
+
+	for (i=0; i < config->num_threads; i++) {
+		info = &test_dev->info[i];
+		if (info->task_sync && !IS_ERR(info->task_sync)) {
+			dev_info(test_dev->dev,
+				 "Stopping still-running thread %i\n", i);
+			kthread_stop(info->task_sync);
+		}
+
+		/*
+		 * info->task_sync is well protected, it can only be
+		 * NULL or a pointer to a struct. If its NULL we either
+		 * never ran, or we did and we completed the work. Completed
+		 * tasks *always* put the module for us. This is a sanity
+		 * check -- just in case.
+		 */
+		if (info->task_sync && info->need_mod_put)
+			test_kmod_put_module(info);
+	}
+
+	mutex_unlock(&test_dev->thread_mutex);
+}
+
+/*
+ * Only wait *iff* we did not run into any errors during all of our thread
+ * set up. If run into any issues we stop threads and just bail out with
+ * an error to the trigger. This also means we don't need any tally work
+ * for any threads which fail.
+ */
+static int try_requests(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+	unsigned int idx;
+	int ret;
+	bool any_error = false;
+
+	for (idx=0; idx < config->num_threads; idx++) {
+		if (test_dev->test_is_oom) {
+			any_error = true;
+			break;
+		}
+
+		ret = try_one_request(test_dev, idx);
+		if (ret) {
+			any_error = true;
+			break;
+		}
+	}
+
+	if (!any_error) {
+		test_dev->test_is_oom = false;
+		dev_info(test_dev->dev,
+			 "No errors were found while initializing threads\n");
+		wait_for_completion(&test_dev->kthreads_done);
+		tally_up_work(test_dev);
+	} else {
+		test_dev->test_is_oom = true;
+		dev_info(test_dev->dev,
+			 "At least one thread failed to start, stop all work\n");
+		test_dev_kmod_stop_tests(test_dev);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int run_test_driver(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+
+	dev_info(test_dev->dev, "Test case: %s (%u)\n",
+		 test_case_str(config->test_case),
+		 config->test_case);
+	dev_info(test_dev->dev, "Test driver to load: %s\n",
+		 config->test_driver);
+	dev_info(test_dev->dev, "Number of threads to run: %u\n",
+		 config->num_threads);
+	dev_info(test_dev->dev, "Thread IDs will range from 0 - %u\n",
+		 config->num_threads - 1);
+
+	return try_requests(test_dev);
+}
+
+static int run_test_fs_type(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+
+	dev_info(test_dev->dev, "Test case: %s (%u)\n",
+		 test_case_str(config->test_case),
+		 config->test_case);
+	dev_info(test_dev->dev, "Test filesystem to load: %s\n",
+		 config->test_fs);
+	dev_info(test_dev->dev, "Number of threads to run: %u\n",
+		 config->num_threads);
+	dev_info(test_dev->dev, "Thread IDs will range from 0 - %u\n",
+		 config->num_threads - 1);
+
+	return try_requests(test_dev);
+}
+
+static ssize_t config_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+	int len = 0;
+
+	mutex_lock(&test_dev->config_mutex);
+
+	len += snprintf(buf, PAGE_SIZE,
+			"Custom trigger configuration for: %s\n",
+			dev_name(dev));
+
+	len += snprintf(buf+len, PAGE_SIZE - len,
+			"Number of threads:\t%u\n",
+			config->num_threads);
+
+	len += snprintf(buf+len, PAGE_SIZE - len,
+			"Test_case:\t%s (%u)\n",
+			test_case_str(config->test_case),
+			config->test_case);
+
+	if (config->test_driver)
+		len += snprintf(buf+len, PAGE_SIZE - len,
+				"driver:\t%s\n",
+				config->test_driver);
+	else
+		len += snprintf(buf+len, PAGE_SIZE - len,
+				"driver:\tEMTPY\n");
+
+	if (config->test_fs)
+		len += snprintf(buf+len, PAGE_SIZE - len,
+				"fs:\t%s\n",
+				config->test_fs);
+	else
+		len += snprintf(buf+len, PAGE_SIZE - len,
+				"fs:\tEMTPY\n");
+
+	mutex_unlock(&test_dev->config_mutex);
+
+	return len;
+}
+static DEVICE_ATTR_RO(config);
+
+/*
+ * This ensures we don't allow kicking threads through if our configuration
+ * is faulty.
+ */
+static int __trigger_config_run(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+
+	test_dev->done = 0;
+
+	switch (config->test_case) {
+	case TEST_KMOD_DRIVER:
+		return run_test_driver(test_dev);
+	case TEST_KMOD_FS_TYPE:
+		return run_test_fs_type(test_dev);
+	default:
+		dev_warn(test_dev->dev,
+			 "Invalid test case requested: %u\n",
+			 config->test_case);
+		return -EINVAL;
+	}
+}
+
+static int trigger_config_run(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+	int ret;
+
+	mutex_lock(&test_dev->trigger_mutex);
+	mutex_lock(&test_dev->config_mutex);
+
+	ret = __trigger_config_run(test_dev);
+	if (ret < 0)
+		goto out;
+	dev_info(test_dev->dev, "General test result: %d\n",
+		 config->test_result);
+
+	/*
+	 * We must return 0 after a trigger even unless something went
+	 * wrong with the setup of the test. If the test setup went fine
+	 * then userspace must just check the result of config->test_result.
+	 * One issue with relying on the return from a call in the kernel
+	 * is if the kernel returns a possitive value using this trigger
+	 * will not return the value to userspace, it would be lost.
+	 *
+	 * By not relying on capturing the return value of tests we are using
+	 * through the trigger it also us to run tests with set -e and only
+	 * fail when something went wrong with the driver upon trigger
+	 * requests.
+	 */
+	ret = 0;
+
+out:
+	mutex_unlock(&test_dev->config_mutex);
+	mutex_unlock(&test_dev->trigger_mutex);
+
+	return ret;
+}
+
+static ssize_t
+trigger_config_store(struct device *dev,
+		     struct device_attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	int ret;
+
+	if (test_dev->test_is_oom)
+		return -ENOMEM;
+
+	/* For all intents and purposes we don't care what userspace
+	 * sent this trigger, we care only that we were triggered.
+	 * We treat the return value only for caputuring issues with
+	 * the test setup. At this point all the test variables should
+	 * have been allocated so typically this should never fail.
+	 */
+	ret = trigger_config_run(test_dev);
+	if (unlikely(ret < 0))
+		goto out;
+
+	/*
+	 * Note: any return > 0 will be treated as success
+	 * and the error value will not be available to userspace.
+	 * Do not rely on trying to send to userspace a test value
+	 * return value as possitive return errors will be lost.
+	 */
+	if (WARN_ON(ret > 0))
+		return -EINVAL;
+
+	ret = count;
+out:
+	return ret;
+}
+static DEVICE_ATTR_WO(trigger_config);
+
+/*
+ * XXX: move to kstrncpy() once merged.
+ *
+ * Users should use kfree_const() when freeing these.
+ */
+static int __kstrncpy(char **dst, const char *name, size_t count, gfp_t gfp)
+{
+	*dst = kstrndup(name, count, gfp);
+	if (!*dst)
+		return -ENOSPC;
+	return count;
+}
+
+static int config_copy_test_driver_name(struct test_config *config,
+				    const char *name,
+				    size_t count)
+{
+	return __kstrncpy(&config->test_driver, name, count, GFP_KERNEL);
+}
+
+
+static int config_copy_test_fs(struct test_config *config, const char *name,
+			       size_t count)
+{
+	return __kstrncpy(&config->test_fs, name, count, GFP_KERNEL);
+}
+
+static void __kmod_config_free(struct test_config *config)
+{
+	if (!config)
+		return;
+
+	kfree_const(config->test_driver);
+	config->test_driver = NULL;
+
+	kfree_const(config->test_fs);
+	config->test_driver = NULL;
+}
+
+static void kmod_config_free(struct kmod_test_device *test_dev)
+{
+	struct test_config *config;
+
+	if (!test_dev)
+		return;
+
+	config = &test_dev->config;
+
+	mutex_lock(&test_dev->config_mutex);
+	__kmod_config_free(config);
+	mutex_unlock(&test_dev->config_mutex);
+}
+
+static ssize_t config_test_driver_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+	int copied;
+
+	mutex_lock(&test_dev->config_mutex);
+
+	kfree_const(config->test_driver);
+	config->test_driver = NULL;
+
+	copied = config_copy_test_driver_name(config, buf, count);
+	mutex_unlock(&test_dev->config_mutex);
+
+	return copied;
+}
+
+/*
+ * As per sysfs_kf_seq_show() the buf is max PAGE_SIZE.
+ */
+static ssize_t config_test_show_str(struct mutex *config_mutex,
+				    char *dst,
+				    char *src)
+{
+	int len;
+
+	mutex_lock(config_mutex);
+	len = snprintf(dst, PAGE_SIZE, "%s\n", src);
+	mutex_unlock(config_mutex);
+
+	return len;
+}
+
+static ssize_t config_test_driver_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+
+	return config_test_show_str(&test_dev->config_mutex, buf,
+				    config->test_driver);
+}
+static DEVICE_ATTR(config_test_driver, 0644, config_test_driver_show,
+		   config_test_driver_store);
+
+static ssize_t config_test_fs_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+	int copied;
+
+	mutex_lock(&test_dev->config_mutex);
+
+	kfree_const(config->test_fs);
+	config->test_fs = NULL;
+
+	copied = config_copy_test_fs(config, buf, count);
+	mutex_unlock(&test_dev->config_mutex);
+
+	return copied;
+}
+
+static ssize_t config_test_fs_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+
+	return config_test_show_str(&test_dev->config_mutex, buf,
+				    config->test_fs);
+}
+static DEVICE_ATTR(config_test_fs, 0644, config_test_fs_show,
+		   config_test_fs_store);
+
+static int trigger_config_run_type(struct kmod_test_device *test_dev,
+				   enum kmod_test_case test_case,
+				   const char *test_str)
+{
+	int copied = 0;
+	struct test_config *config = &test_dev->config;
+
+	mutex_lock(&test_dev->config_mutex);
+
+	switch (test_case) {
+	case TEST_KMOD_DRIVER:
+		kfree_const(config->test_driver);
+		config->test_driver = NULL;
+		copied = config_copy_test_driver_name(config, test_str,
+						      strlen(test_str));
+		break;
+	case TEST_KMOD_FS_TYPE:
+		break;
+		kfree_const(config->test_fs);
+		config->test_driver = NULL;
+		copied = config_copy_test_fs(config, test_str,
+					     strlen(test_str));
+	default:
+		mutex_unlock(&test_dev->config_mutex);
+		return -EINVAL;
+	}
+
+	config->test_case = test_case;
+
+	mutex_unlock(&test_dev->config_mutex);
+
+	if (copied <= 0 || copied != strlen(test_str)) {
+		test_dev->test_is_oom = true;
+		return -ENOMEM;
+	}
+
+	test_dev->test_is_oom = false;
+
+	return trigger_config_run(test_dev);
+}
+
+static void free_test_dev_info(struct kmod_test_device *test_dev)
+{
+	vfree(test_dev->info);
+	test_dev->info = NULL;
+}
+
+static int kmod_config_sync_info(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+
+	free_test_dev_info(test_dev);
+	test_dev->info = vzalloc(config->num_threads *
+				 sizeof(struct kmod_test_device_info));
+	if (!test_dev->info) {
+		dev_err(test_dev->dev, "Cannot alloc test_dev info\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Old kernels may not have this, if you want to port this code to
+ * test it on older kernels.
+ */
+#ifdef get_kmod_umh_limit
+static unsigned int kmod_init_test_thread_limit(void)
+{
+	return get_kmod_umh_limit();
+}
+#else
+static unsigned int kmod_init_test_thread_limit(void)
+{
+	return TEST_START_NUM_THREADS;
+}
+#endif
+
+static int __kmod_config_init(struct kmod_test_device *test_dev)
+{
+	struct test_config *config = &test_dev->config;
+	int ret = -ENOMEM, copied;
+
+	__kmod_config_free(config);
+
+	copied = config_copy_test_driver_name(config, TEST_START_DRIVER,
+					      strlen(TEST_START_DRIVER));
+	if (copied != strlen(TEST_START_DRIVER))
+		goto err_out;
+
+	copied = config_copy_test_fs(config, TEST_START_TEST_FS,
+				     strlen(TEST_START_TEST_FS));
+	if (copied != strlen(TEST_START_TEST_FS))
+		goto err_out;
+
+	config->num_threads = kmod_init_test_thread_limit();
+	config->test_result = 0;
+	config->test_case = TEST_START_TEST_CASE;
+
+	ret = kmod_config_sync_info(test_dev);
+	if (ret)
+		goto err_out;
+
+	test_dev->test_is_oom = false;
+
+	return 0;
+
+err_out:
+	test_dev->test_is_oom = true;
+	WARN_ON(test_dev->test_is_oom);
+
+	__kmod_config_free(config);
+
+	return ret;
+}
+
+static ssize_t reset_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	int ret;
+
+	mutex_lock(&test_dev->trigger_mutex);
+	mutex_lock(&test_dev->config_mutex);
+
+	ret = __kmod_config_init(test_dev);
+	if (ret < 0) {
+		ret = -ENOMEM;
+		dev_err(dev, "could not alloc settings for config trigger: %d\n",
+		       ret);
+		goto out;
+	}
+
+	dev_info(dev, "reset\n");
+	ret = count;
+
+out:
+	mutex_unlock(&test_dev->config_mutex);
+	mutex_unlock(&test_dev->trigger_mutex);
+
+	return ret;
+}
+static DEVICE_ATTR_WO(reset);
+
+static int test_dev_config_update_uint_sync(struct kmod_test_device *test_dev,
+					    const char *buf, size_t size,
+					    unsigned int *config,
+					    int (*test_sync)(struct kmod_test_device *test_dev))
+{
+	int ret;
+	long new;
+	unsigned int old_val;
+
+	ret = kstrtol(buf, 10, &new);
+	if (ret)
+		return ret;
+
+	if (new > UINT_MAX)
+		return -EINVAL;
+
+	mutex_lock(&test_dev->config_mutex);
+
+	old_val = *config;
+	*(unsigned int *)config = new;
+
+	ret = test_sync(test_dev);
+	if (ret) {
+		*(unsigned int *)config = old_val;
+
+		ret = test_sync(test_dev);
+		WARN_ON(ret);
+
+		mutex_unlock(&test_dev->config_mutex);
+		return -EINVAL;
+	}
+
+	mutex_unlock(&test_dev->config_mutex);
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+
+static int test_dev_config_update_uint_range(struct kmod_test_device *test_dev,
+					     const char *buf, size_t size,
+					     unsigned int *config,
+					     unsigned int min,
+					     unsigned int max)
+{
+	int ret;
+	long new;
+
+	ret = kstrtol(buf, 10, &new);
+	if (ret)
+		return ret;
+
+	if (new < min || new >  max || new > UINT_MAX)
+		return -EINVAL;
+
+	mutex_lock(&test_dev->config_mutex);
+	*config = new;
+	mutex_unlock(&test_dev->config_mutex);
+
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+
+static int test_dev_config_update_int(struct kmod_test_device *test_dev,
+				      const char *buf, size_t size,
+				      int *config)
+{
+	int ret;
+	long new;
+
+	ret = kstrtol(buf, 10, &new);
+	if (ret)
+		return ret;
+
+	if (new > INT_MAX || new < INT_MIN)
+		return -EINVAL;
+
+	mutex_lock(&test_dev->config_mutex);
+	*config = new;
+	mutex_unlock(&test_dev->config_mutex);
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+
+static ssize_t test_dev_config_show_int(struct kmod_test_device *test_dev,
+					char *buf,
+					int config)
+{
+	int val;
+
+	mutex_lock(&test_dev->config_mutex);
+	val = config;
+	mutex_unlock(&test_dev->config_mutex);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t test_dev_config_show_uint(struct kmod_test_device *test_dev,
+					 char *buf,
+					 unsigned int config)
+{
+	unsigned int val;
+
+	mutex_lock(&test_dev->config_mutex);
+	val = config;
+	mutex_unlock(&test_dev->config_mutex);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", val);
+}
+
+static ssize_t test_result_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+
+	return test_dev_config_update_int(test_dev, buf, count,
+					  &config->test_result);
+}
+
+static ssize_t config_num_threads_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+
+	return test_dev_config_update_uint_sync(test_dev, buf, count,
+						&config->num_threads,
+						kmod_config_sync_info);
+}
+
+static ssize_t config_num_threads_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+
+	return test_dev_config_show_int(test_dev, buf, config->num_threads);
+}
+static DEVICE_ATTR(config_num_threads, 0644, config_num_threads_show,
+		   config_num_threads_store);
+
+static ssize_t config_test_case_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+
+	return test_dev_config_update_uint_range(test_dev, buf, count,
+						 &config->test_case,
+						 __TEST_KMOD_INVALID + 1,
+						 __TEST_KMOD_MAX - 1);
+}
+
+static ssize_t config_test_case_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+
+	return test_dev_config_show_uint(test_dev, buf, config->test_case);
+}
+static DEVICE_ATTR(config_test_case, 0644, config_test_case_show,
+		   config_test_case_store);
+
+static ssize_t test_result_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct kmod_test_device *test_dev = dev_to_test_dev(dev);
+	struct test_config *config = &test_dev->config;
+
+	return test_dev_config_show_int(test_dev, buf, config->test_result);
+}
+static DEVICE_ATTR(test_result, 0644, test_result_show, test_result_store);
+
+#define TEST_KMOD_DEV_ATTR(name)		&dev_attr_##name.attr
+
+static struct attribute *test_dev_attrs[] = {
+	TEST_KMOD_DEV_ATTR(trigger_config),
+	TEST_KMOD_DEV_ATTR(config),
+	TEST_KMOD_DEV_ATTR(reset),
+
+	TEST_KMOD_DEV_ATTR(config_test_driver),
+	TEST_KMOD_DEV_ATTR(config_test_fs),
+	TEST_KMOD_DEV_ATTR(config_num_threads),
+	TEST_KMOD_DEV_ATTR(config_test_case),
+	TEST_KMOD_DEV_ATTR(test_result),
+
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(test_dev);
+
+static int kmod_config_init(struct kmod_test_device *test_dev)
+{
+	int ret;
+
+	mutex_lock(&test_dev->config_mutex);
+	ret = __kmod_config_init(test_dev);
+	mutex_unlock(&test_dev->config_mutex);
+
+	return ret;
+}
+
+static struct kmod_test_device *alloc_test_dev_kmod(int idx)
+{
+	int ret;
+	struct kmod_test_device *test_dev;
+	struct miscdevice *misc_dev;
+
+	test_dev = vzalloc(sizeof(struct kmod_test_device));
+	if (!test_dev) {
+		pr_err("Cannot alloc test_dev\n");
+		goto err_out;
+	}
+
+	mutex_init(&test_dev->config_mutex);
+	mutex_init(&test_dev->trigger_mutex);
+	mutex_init(&test_dev->thread_mutex);
+
+	init_completion(&test_dev->kthreads_done);
+
+	ret = kmod_config_init(test_dev);
+	if (ret < 0) {
+		pr_err("Cannot alloc kmod_config_init()\n");
+		goto err_out_free;
+	}
+
+	test_dev->dev_idx = idx;
+	misc_dev = &test_dev->misc_dev;
+
+	misc_dev->minor = MISC_DYNAMIC_MINOR;
+	misc_dev->name = kasprintf(GFP_KERNEL, "test_kmod%d", idx);
+	if (!misc_dev->name) {
+		pr_err("Cannot alloc misc_dev->name\n");
+		goto err_out_free_config;
+	}
+	misc_dev->groups = test_dev_groups;
+
+	return test_dev;
+
+err_out_free_config:
+	free_test_dev_info(test_dev);
+	kmod_config_free(test_dev);
+err_out_free:
+	vfree(test_dev);
+	test_dev = NULL;
+err_out:
+	return NULL;
+}
+
+static void free_test_dev_kmod(struct kmod_test_device *test_dev)
+{
+	if (test_dev) {
+		kfree_const(test_dev->misc_dev.name);
+		test_dev->misc_dev.name = NULL;
+		free_test_dev_info(test_dev);
+		kmod_config_free(test_dev);
+		vfree(test_dev);
+		test_dev = NULL;
+	}
+}
+
+static struct kmod_test_device *register_test_dev_kmod(void)
+{
+	struct kmod_test_device *test_dev = NULL;
+	int ret;
+
+	mutex_unlock(&reg_dev_mutex);
+
+	/* int should suffice for number of devices, test for wrap */
+	if (unlikely(num_test_devs + 1) < 0) {
+		pr_err("reached limit of number of test devices\n");
+		goto out;
+	}
+
+	test_dev = alloc_test_dev_kmod(num_test_devs);
+	if (!test_dev)
+		goto out;
+
+	ret = misc_register(&test_dev->misc_dev);
+	if (ret) {
+		pr_err("could not register misc device: %d\n", ret);
+		free_test_dev_kmod(test_dev);
+		goto out;
+	}
+
+	test_dev->dev = test_dev->misc_dev.this_device;
+	list_add_tail(&test_dev->list, &reg_test_devs);
+	dev_info(test_dev->dev, "interface ready\n");
+
+	num_test_devs++;
+
+out:
+	mutex_unlock(&reg_dev_mutex);
+
+	return test_dev;
+
+}
+
+static int __init test_kmod_init(void)
+{
+	struct kmod_test_device *test_dev;
+	int ret;
+
+	test_dev = register_test_dev_kmod();
+	if (!test_dev) {
+		pr_err("Cannot add first test kmod device\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * With some work we might be able to gracefully enable
+	 * testing with this driver built-in, for now this seems
+	 * rather risky. For those willing to try have at it,
+	 * and enable the below. Good luck! If that works, try
+	 * lowering the init level for more fun.
+	 */
+	if (force_init_test) {
+		ret = trigger_config_run_type(test_dev,
+					      TEST_KMOD_DRIVER, "tun");
+		if (WARN_ON(ret))
+			return ret;
+		ret = trigger_config_run_type(test_dev,
+					      TEST_KMOD_FS_TYPE, "btrfs");
+		if (WARN_ON(ret))
+			return ret;
+	}
+
+	return 0;
+}
+late_initcall(test_kmod_init);
+
+static
+void unregister_test_dev_kmod(struct kmod_test_device *test_dev)
+{
+	mutex_lock(&test_dev->trigger_mutex);
+	mutex_lock(&test_dev->config_mutex);
+
+	test_dev_kmod_stop_tests(test_dev);
+
+	dev_info(test_dev->dev, "removing interface\n");
+	misc_deregister(&test_dev->misc_dev);
+	kfree(&test_dev->misc_dev.name);
+
+	mutex_unlock(&test_dev->config_mutex);
+	mutex_unlock(&test_dev->trigger_mutex);
+
+	free_test_dev_kmod(test_dev);
+}
+
+static void __exit test_kmod_exit(void)
+{
+	struct kmod_test_device *test_dev, *tmp;
+
+	mutex_lock(&reg_dev_mutex);
+	list_for_each_entry_safe(test_dev, tmp, &reg_test_devs, list) {
+		list_del(&test_dev->list);
+		unregister_test_dev_kmod(test_dev);
+	}
+	mutex_unlock(&reg_dev_mutex);
+}
+module_exit(test_kmod_exit);
+
+MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/kmod/Makefile b/tools/testing/selftests/kmod/Makefile
new file mode 100644
index 0000000000000..fa2ccc5fb3de2
--- /dev/null
+++ b/tools/testing/selftests/kmod/Makefile
@@ -0,0 +1,11 @@
+# Makefile for kmod loading selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := kmod.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config
new file mode 100644
index 0000000000000..259f4fd6b5e24
--- /dev/null
+++ b/tools/testing/selftests/kmod/config
@@ -0,0 +1,7 @@
+CONFIG_TEST_KMOD=m
+CONFIG_TEST_LKM=m
+CONFIG_XFS_FS=m
+
+# For the module parameter force_init_test is used
+CONFIG_TUN=m
+CONFIG_BTRFS_FS=m
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
new file mode 100644
index 0000000000000..10196a62ed09c
--- /dev/null
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -0,0 +1,635 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This is a stress test script for kmod, the kernel module loader. It uses
+# test_kmod which exposes a series of knobs for the API for us so we can
+# tweak each test in userspace rather than in kernelspace.
+#
+# The way kmod works is it uses the kernel's usermode helper API to eventually
+# call /sbin/modprobe. It has a limit of the number of concurrent calls
+# possible. The kernel interface to load modules is request_module(), however
+# mount uses get_fs_type(). Both behave slightly differently, but the
+# differences are important enough to test each call separately. For this
+# reason test_kmod starts by providing tests for both calls.
+#
+# The test driver test_kmod assumes a series of defaults which you can
+# override by exporting to your environment prior running this script.
+# For instance this script assumes you do not have xfs loaded upon boot.
+# If this is false, export DEFAULT_KMOD_FS="ext4" prior to running this
+# script if the filesyste module you don't have loaded upon bootup
+# is ext4 instead. Refer to allow_user_defaults() for a list of user
+# override variables possible.
+#
+# You'll want at least 4 GiB of RAM to expect to run these tests
+# without running out of memory on them. For other requirements refer
+# to test_reqs()
+
+set -e
+
+TEST_NAME="kmod"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:3:1"
+ALL_TESTS="$ALL_TESTS 0002:3:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:10:1"
+ALL_TESTS="$ALL_TESTS 0006:10:1"
+ALL_TESTS="$ALL_TESTS 0007:5:1"
+
+# Disabled tests:
+#
+# 0008 x 150 -  multithreaded - push kmod_concurrent over max_modprobes for request_module()"
+# Current best-effort failure interpretation:
+# Enough module requests get loaded in place fast enough to reach over the
+# max_modprobes limit and trigger a failure -- before we're even able to
+# start processing pending requests.
+ALL_TESTS="$ALL_TESTS 0008:150:0"
+
+# 0009 x 150 - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
+# Current best-effort failure interpretation:
+#
+# get_fs_type() requests modules using aliases as such the optimization in
+# place today to look for already loaded modules will not take effect and
+# we end up requesting a new module to load, this bumps the kmod_concurrent,
+# and in certain circumstances can lead to pushing the kmod_concurrent over
+# the max_modprobe limit.
+#
+# This test fails much easier than test 0008 since the alias optimizations
+# are not in place.
+ALL_TESTS="$ALL_TESTS 0009:150:0"
+
+test_modprobe()
+{
+       if [ ! -d $DIR ]; then
+               echo "$0: $DIR not present" >&2
+               echo "You must have the following enabled in your kernel:" >&2
+               cat $TEST_DIR/config >&2
+               exit 1
+       fi
+}
+
+function allow_user_defaults()
+{
+	if [ -z $DEFAULT_KMOD_DRIVER ]; then
+		DEFAULT_KMOD_DRIVER="test_module"
+	fi
+
+	if [ -z $DEFAULT_KMOD_FS ]; then
+		DEFAULT_KMOD_FS="xfs"
+	fi
+
+	if [ -z $PROC_DIR ]; then
+		PROC_DIR="/proc/sys/kernel/"
+	fi
+
+	if [ -z $MODPROBE_LIMIT ]; then
+		MODPROBE_LIMIT=50
+	fi
+
+	if [ -z $DIR ]; then
+		DIR="/sys/devices/virtual/misc/${TEST_DRIVER}0/"
+	fi
+
+	if [ -z $DEFAULT_NUM_TESTS ]; then
+		DEFAULT_NUM_TESTS=150
+	fi
+
+	MODPROBE_LIMIT_FILE="${PROC_DIR}/kmod-limit"
+}
+
+test_reqs()
+{
+	if ! which modprobe 2> /dev/null > /dev/null; then
+		echo "$0: You need modprobe installed" >&2
+		exit 1
+	fi
+
+	if ! which kmod 2> /dev/null > /dev/null; then
+		echo "$0: You need kmod installed" >&2
+		exit 1
+	fi
+
+	# kmod 19 has a bad bug where it returns 0 when modprobe
+	# gets called *even* if the module was not loaded due to
+	# some bad heuristics. For details see:
+	#
+	# A work around is possible in-kernel but its rather
+	# complex.
+	KMOD_VERSION=$(kmod --version | awk '{print $3}')
+	if [[ $KMOD_VERSION  -le 19 ]]; then
+		echo "$0: You need at least kmod 20" >&2
+		echo "kmod <= 19 is buggy, for details see:" >&2
+		echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
+		exit 1
+	fi
+
+	uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo $msg must be run as root >&2
+		exit 0
+	fi
+}
+
+function load_req_mod()
+{
+	trap "test_modprobe" EXIT
+
+	if [ ! -d $DIR ]; then
+		# Alanis: "Oh isn't it ironic?"
+		modprobe $TEST_DRIVER
+	fi
+}
+
+test_finish()
+{
+	echo "Test completed"
+}
+
+errno_name_to_val()
+{
+	case "$1" in
+	# kmod calls modprobe and upon of a module not found
+	# modprobe returns just 1... However in the kernel we
+	# *sometimes* see 256...
+	MODULE_NOT_FOUND)
+		echo 256;;
+	SUCCESS)
+		echo 0;;
+	-EPERM)
+		echo -1;;
+	-ENOENT)
+		echo -2;;
+	-EINVAL)
+		echo -22;;
+	-ERR_ANY)
+		echo -123456;;
+	*)
+		echo invalid;;
+	esac
+}
+
+errno_val_to_name()
+	case "$1" in
+	256)
+		echo MODULE_NOT_FOUND;;
+	0)
+		echo SUCCESS;;
+	-1)
+		echo -EPERM;;
+	-2)
+		echo -ENOENT;;
+	-22)
+		echo -EINVAL;;
+	-123456)
+		echo -ERR_ANY;;
+	*)
+		echo invalid;;
+	esac
+
+config_set_test_case_driver()
+{
+	if ! echo -n 1 >$DIR/config_test_case; then
+		echo "$0: Unable to set to test case to driver" >&2
+		exit 1
+	fi
+}
+
+config_set_test_case_fs()
+{
+	if ! echo -n 2 >$DIR/config_test_case; then
+		echo "$0: Unable to set to test case to fs" >&2
+		exit 1
+	fi
+}
+
+config_num_threads()
+{
+	if ! echo -n $1 >$DIR/config_num_threads; then
+		echo "$0: Unable to set to number of threads" >&2
+		exit 1
+	fi
+}
+
+config_get_modprobe_limit()
+{
+	if [[ -f ${MODPROBE_LIMIT_FILE} ]] ; then
+		MODPROBE_LIMIT=$(cat $MODPROBE_LIMIT_FILE)
+	fi
+	echo $MODPROBE_LIMIT
+}
+
+config_num_thread_limit_extra()
+{
+	MODPROBE_LIMIT=$(config_get_modprobe_limit)
+	let EXTRA_LIMIT=$MODPROBE_LIMIT+$1
+	config_num_threads $EXTRA_LIMIT
+}
+
+# For special characters use printf directly,
+# refer to kmod_test_0001
+config_set_driver()
+{
+	if ! echo -n $1 >$DIR/config_test_driver; then
+		echo "$0: Unable to set driver" >&2
+		exit 1
+	fi
+}
+
+config_set_fs()
+{
+	if ! echo -n $1 >$DIR/config_test_fs; then
+		echo "$0: Unable to set driver" >&2
+		exit 1
+	fi
+}
+
+config_get_driver()
+{
+	cat $DIR/config_test_driver
+}
+
+config_get_test_result()
+{
+	cat $DIR/test_result
+}
+
+config_reset()
+{
+	if ! echo -n "1" >"$DIR"/reset; then
+		echo "$0: reset shuld have worked" >&2
+		exit 1
+	fi
+}
+
+config_show_config()
+{
+	echo "----------------------------------------------------"
+	cat "$DIR"/config
+	echo "----------------------------------------------------"
+}
+
+config_trigger()
+{
+	if ! echo -n "1" >"$DIR"/trigger_config 2>/dev/null; then
+		echo "$1: FAIL - loading should have worked"
+		config_show_config
+		exit 1
+	fi
+	echo "$1: OK! - loading kmod test"
+}
+
+config_trigger_want_fail()
+{
+	if echo "1" > $DIR/trigger_config 2>/dev/null; then
+		echo "$1: FAIL - test case was expected to fail"
+		config_show_config
+		exit 1
+	fi
+	echo "$1: OK! - kmod test case failed as expected"
+}
+
+config_expect_result()
+{
+	RC=$(config_get_test_result)
+	RC_NAME=$(errno_val_to_name $RC)
+
+	ERRNO_NAME=$2
+	ERRNO=$(errno_name_to_val $ERRNO_NAME)
+
+	if [[ $ERRNO_NAME = "-ERR_ANY" ]]; then
+		if [[ $RC -ge 0 ]]; then
+			echo "$1: FAIL, test expects $ERRNO_NAME - got $RC_NAME ($RC)" >&2
+			config_show_config
+			exit 1
+		fi
+	elif [[ $RC != $ERRNO ]]; then
+		echo "$1: FAIL, test expects $ERRNO_NAME ($ERRNO) - got $RC_NAME ($RC)" >&2
+		config_show_config
+		exit 1
+	fi
+	echo "$1: OK! - Return value: $RC ($RC_NAME), expected $ERRNO_NAME"
+}
+
+kmod_defaults_driver()
+{
+	config_reset
+	modprobe -r $DEFAULT_KMOD_DRIVER
+	config_set_driver $DEFAULT_KMOD_DRIVER
+}
+
+kmod_defaults_fs()
+{
+	config_reset
+	modprobe -r $DEFAULT_KMOD_FS
+	config_set_fs $DEFAULT_KMOD_FS
+	config_set_test_case_fs
+}
+
+kmod_test_0001_driver()
+{
+	NAME='\000'
+
+	kmod_defaults_driver
+	config_num_threads 1
+	printf '\000' >"$DIR"/config_test_driver
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
+}
+
+kmod_test_0001_fs()
+{
+	NAME='\000'
+
+	kmod_defaults_fs
+	config_num_threads 1
+	printf '\000' >"$DIR"/config_test_fs
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} -EINVAL
+}
+
+kmod_test_0001()
+{
+	kmod_test_0001_driver
+	kmod_test_0001_fs
+}
+
+kmod_test_0002_driver()
+{
+	NAME="nope-$DEFAULT_KMOD_DRIVER"
+
+	kmod_defaults_driver
+	config_set_driver $NAME
+	config_num_threads 1
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
+}
+
+kmod_test_0002_fs()
+{
+	NAME="nope-$DEFAULT_KMOD_FS"
+
+	kmod_defaults_fs
+	config_set_fs $NAME
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} -EINVAL
+}
+
+kmod_test_0002()
+{
+	kmod_test_0002_driver
+	kmod_test_0002_fs
+}
+
+kmod_test_0003()
+{
+	kmod_defaults_fs
+	config_num_threads 1
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0004()
+{
+	kmod_defaults_fs
+	config_num_threads 2
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0005()
+{
+	kmod_defaults_driver
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0006()
+{
+	kmod_defaults_fs
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0007()
+{
+	kmod_test_0005
+	kmod_test_0006
+}
+
+kmod_test_0008()
+{
+	kmod_defaults_driver
+	MODPROBE_LIMIT=$(config_get_modprobe_limit)
+	let EXTRA=$MODPROBE_LIMIT/6
+	config_num_thread_limit_extra $EXTRA
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0009()
+{
+	kmod_defaults_fs
+	MODPROBE_LIMIT=$(config_get_modprobe_limit)
+	let EXTRA=$MODPROBE_LIMIT/4
+	config_num_thread_limit_extra $EXTRA
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+list_tests()
+{
+	echo "Test ID list:"
+	echo
+	echo "TEST_ID x NUM_TEST"
+	echo "TEST_ID:   Test ID"
+	echo "NUM_TESTS: Number of recommended times to run the test"
+	echo
+	echo "0001 x $(get_test_count 0001) - Simple test - 1 thread  for empty string"
+	echo "0002 x $(get_test_count 0002) - Simple test - 1 thread  for modules/filesystems that do not exist"
+	echo "0003 x $(get_test_count 0003) - Simple test - 1 thread  for get_fs_type() only"
+	echo "0004 x $(get_test_count 0004) - Simple test - 2 threads for get_fs_type() only"
+	echo "0005 x $(get_test_count 0005) - multithreaded tests with default setup - request_module() only"
+	echo "0006 x $(get_test_count 0006) - multithreaded tests with default setup - get_fs_type() only"
+	echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()"
+	echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()"
+	echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
+}
+
+usage()
+{
+	NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+	let NUM_TESTS=$NUM_TESTS+1
+	MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+	echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+	echo "		 [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+	echo "           [ all ] [ -h | --help ] [ -l ]"
+	echo ""
+	echo "Valid tests: 0001-$MAX_TEST"
+	echo ""
+	echo "    all     Runs all tests (default)"
+	echo "    -t      Run test ID the number amount of times is recommended"
+	echo "    -w      Watch test ID run until it runs into an error"
+	echo "    -c      Run test ID once"
+	echo "    -s      Run test ID x test-count number of times"
+	echo "    -l      List all test ID list"
+	echo " -h|--help  Help"
+	echo
+	echo "If an error every occurs execution will immediately terminate."
+	echo "If you are adding a new test try using -w <test-ID> first to"
+	echo "make sure the test passes a series of tests."
+	echo
+	echo Example uses:
+	echo
+	echo "${TEST_NAME}.sh		-- executes all tests"
+	echo "${TEST_NAME}.sh -t 0008	-- Executes test ID 0008 number of times is recomended"
+	echo "${TEST_NAME}.sh -w 0008	-- Watch test ID 0008 run until an error occurs"
+	echo "${TEST_NAME}.sh -s 0008	-- Run test ID 0008 once"
+	echo "${TEST_NAME}.sh -c 0008 3	-- Run test ID 0008 three times"
+	echo
+	list_tests
+	exit 1
+}
+
+function test_num()
+{
+	re='^[0-9]+$'
+	if ! [[ $1 =~ $re ]]; then
+		usage
+	fi
+}
+
+function get_test_count()
+{
+	test_num $1
+	TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+	LAST_TWO=${TEST_DATA#*:*}
+	echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+	test_num $1
+	TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+	echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+	for i in $ALL_TESTS ; do
+		TEST_ID=${i%:*:*}
+		ENABLED=$(get_test_enabled $TEST_ID)
+		TEST_COUNT=$(get_test_count $TEST_ID)
+		if [[ $ENABLED -eq "1" ]]; then
+			test_case $TEST_ID $TEST_COUNT
+		fi
+	done
+}
+
+function watch_log()
+{
+	if [ $# -ne 3 ]; then
+		clear
+	fi
+	date
+	echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+	i=0
+	while [ 1 ]; do
+
+		if [ $# -eq 1 ]; then
+			test_num $1
+			watch_log $i ${TEST_NAME}_test_$1
+			${TEST_NAME}_test_$1
+		else
+			watch_log $i all
+			run_all_tests
+		fi
+		let i=$i+1
+	done
+}
+
+function test_case()
+{
+	NUM_TESTS=$DEFAULT_NUM_TESTS
+	if [ $# -eq 2 ]; then
+		NUM_TESTS=$2
+	fi
+
+	i=0
+	while [ $i -lt $NUM_TESTS ]; do
+		test_num $1
+		watch_log $i ${TEST_NAME}_test_$1 noclear
+		RUN_TEST=${TEST_NAME}_test_$1
+		$RUN_TEST
+		let i=$i+1
+	done
+}
+
+function parse_args()
+{
+	if [ $# -eq 0 ]; then
+		run_all_tests
+	else
+		if [[ "$1" = "all" ]]; then
+			run_all_tests
+		elif [[ "$1" = "-w" ]]; then
+			shift
+			watch_case $@
+		elif [[ "$1" = "-t" ]]; then
+			shift
+			test_num $1
+			test_case $1 $(get_test_count $1)
+		elif [[ "$1" = "-c" ]]; then
+			shift
+			test_num $1
+			test_num $2
+			test_case $1 $2
+		elif [[ "$1" = "-s" ]]; then
+			shift
+			test_case $1 1
+		elif [[ "$1" = "-l" ]]; then
+			list_tests
+		elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+			usage
+		else
+			usage
+		fi
+	fi
+}
+
+test_reqs
+allow_user_defaults
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0
-- 
GitLab


From 6d7964a722afc8e4f880b947f174009063028c99 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 14 Jul 2017 14:50:11 -0700
Subject: [PATCH 1659/1958] kmod: throttle kmod thread limit

If we reach the limit of modprobe_limit threads running the next
request_module() call will fail.  The original reason for adding a kill
was to do away with possible issues with in old circumstances which would
create a recursive series of request_module() calls.

We can do better than just be super aggressive and reject calls once we've
reached the limit by simply making pending callers wait until the
threshold has been reduced, and then throttling them in, one by one.

This throttling enables requests over the kmod concurrent limit to be
processed once a pending request completes.  Only the first item queued up
to wait is woken up.  The assumption here is once a task is woken it will
have no other option to also kick the queue to check if there are more
pending tasks -- regardless of whether or not it was successful.

By throttling and processing only max kmod concurrent tasks we ensure we
avoid unexpected fatal request_module() calls, and we keep memory
consumption on module loading to a minimum.

With x86_64 qemu, with 4 cores, 4 GiB of RAM it takes the following run
time to run both tests:

time ./kmod.sh -t 0008
real    0m16.366s
user    0m0.883s
sys     0m8.916s

time ./kmod.sh -t 0009
real    0m50.803s
user    0m0.791s
sys     0m9.852s

Link: http://lkml.kernel.org/r/20170628223155.26472-4-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michal Marek <mmarek@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kmod.c                        | 16 +++++++---------
 tools/testing/selftests/kmod/kmod.sh | 24 ++----------------------
 2 files changed, 9 insertions(+), 31 deletions(-)

diff --git a/kernel/kmod.c b/kernel/kmod.c
index ff68198fe83bc..6d016c5d97c83 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -68,6 +68,7 @@ static DECLARE_RWSEM(umhelper_sem);
  */
 #define MAX_KMOD_CONCURRENT 50
 static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
+static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
 
 /*
 	modprobe_path is set via /proc/sys.
@@ -140,7 +141,6 @@ int __request_module(bool wait, const char *fmt, ...)
 	va_list args;
 	char module_name[MODULE_NAME_LEN];
 	int ret;
-	static int kmod_loop_msg;
 
 	/*
 	 * We don't allow synchronous module loading from async.  Module
@@ -164,14 +164,11 @@ int __request_module(bool wait, const char *fmt, ...)
 		return ret;
 
 	if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
-		/* We may be blaming an innocent here, but unlikely */
-		if (kmod_loop_msg < 5) {
-			printk(KERN_ERR
-			       "request_module: runaway loop modprobe %s\n",
-			       module_name);
-			kmod_loop_msg++;
-		}
-		return -ENOMEM;
+		pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
+				    atomic_read(&kmod_concurrent_max),
+				    MAX_KMOD_CONCURRENT, module_name);
+		wait_event_interruptible(kmod_wq,
+					 atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
 	}
 
 	trace_module_request(module_name, wait, _RET_IP_);
@@ -179,6 +176,7 @@ int __request_module(bool wait, const char *fmt, ...)
 	ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
 
 	atomic_inc(&kmod_concurrent_max);
+	wake_up(&kmod_wq);
 
 	return ret;
 }
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 10196a62ed09c..8cecae9a8bca1 100644
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -59,28 +59,8 @@ ALL_TESTS="$ALL_TESTS 0004:1:1"
 ALL_TESTS="$ALL_TESTS 0005:10:1"
 ALL_TESTS="$ALL_TESTS 0006:10:1"
 ALL_TESTS="$ALL_TESTS 0007:5:1"
-
-# Disabled tests:
-#
-# 0008 x 150 -  multithreaded - push kmod_concurrent over max_modprobes for request_module()"
-# Current best-effort failure interpretation:
-# Enough module requests get loaded in place fast enough to reach over the
-# max_modprobes limit and trigger a failure -- before we're even able to
-# start processing pending requests.
-ALL_TESTS="$ALL_TESTS 0008:150:0"
-
-# 0009 x 150 - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
-# Current best-effort failure interpretation:
-#
-# get_fs_type() requests modules using aliases as such the optimization in
-# place today to look for already loaded modules will not take effect and
-# we end up requesting a new module to load, this bumps the kmod_concurrent,
-# and in certain circumstances can lead to pushing the kmod_concurrent over
-# the max_modprobe limit.
-#
-# This test fails much easier than test 0008 since the alias optimizations
-# are not in place.
-ALL_TESTS="$ALL_TESTS 0009:150:0"
+ALL_TESTS="$ALL_TESTS 0008:150:1"
+ALL_TESTS="$ALL_TESTS 0009:150:1"
 
 test_modprobe()
 {
-- 
GitLab


From 077d2ba519b2e8bf1abd80cbade699b1de42cafe Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Fri, 14 Jul 2017 17:28:12 -0400
Subject: [PATCH 1660/1958] replace incorrect strscpy use in FORTIFY_SOURCE

Using strscpy was wrong because FORTIFY_SOURCE is passing the maximum
possible size of the outermost object, but strscpy defines the count
parameter as the exact buffer size, so this could copy past the end of
the source.  This would still be wrong with the planned usage of
__builtin_object_size(p, 1) for intra-object overflow checks since it's
the maximum possible size of the specified object with no guarantee of
it being that large.

Reuse of the fortified functions like this currently makes the runtime
error reporting less precise but that can be improved later on.

Noticed by Dave Jones and KASAN.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/linux/string.h b/include/linux/string.h
index 96f5a5fd03774..049866760e8bb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -202,17 +202,6 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob
 void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
 
 #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
-__FORTIFY_INLINE char *strcpy(char *p, const char *q)
-{
-	size_t p_size = __builtin_object_size(p, 0);
-	size_t q_size = __builtin_object_size(q, 0);
-	if (p_size == (size_t)-1 && q_size == (size_t)-1)
-		return __builtin_strcpy(p, q);
-	if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0)
-		fortify_panic(__func__);
-	return p;
-}
-
 __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
 {
 	size_t p_size = __builtin_object_size(p, 0);
@@ -391,6 +380,18 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
 		fortify_panic(__func__);
 	return __real_kmemdup(p, size, gfp);
 }
+
+/* defined after fortified strlen and memcpy to reuse them */
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __builtin_strcpy(p, q);
+	memcpy(p, q, strlen(q) + 1);
+	return p;
+}
+
 #endif
 
 #endif /* _LINUX_STRING_H_ */
-- 
GitLab


From eecabf567422eda02bd179f2707d8fe24f52d888 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 8 Jun 2017 04:16:59 -0400
Subject: [PATCH 1661/1958] random: suppress spammy warnings about unseeded
 randomness

Unfortunately, on some models of some architectures getting a fully
seeded CRNG is extremely difficult, and so this can result in dmesg
getting spammed for a surprisingly long time.  This is really bad from
a security perspective, and so architecture maintainers really need to
do what they can to get the CRNG seeded sooner after the system is
booted.  However, users can't do anything actionble to address this,
and spamming the kernel messages log will only just annoy people.

For developers who want to work on improving this situation,
CONFIG_WARN_UNSEEDED_RANDOM has been renamed to
CONFIG_WARN_ALL_UNSEEDED_RANDOM.  By default the kernel will always
print the first use of unseeded randomness.  This way, hopefully the
security obsessed will be happy that there is _some_ indication when
the kernel boots there may be a potential issue with that architecture
or subarchitecture.  To see all uses of unseeded randomness,
developers can enable CONFIG_WARN_ALL_UNSEEDED_RANDOM.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 56 ++++++++++++++++++++++++++++++-------------
 lib/Kconfig.debug     | 24 ++++++++++++++-----
 2 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index fa5bbd5a7ca0f..799d37981d997 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -436,6 +436,7 @@ static void _extract_crng(struct crng_state *crng,
 static void _crng_backtrack_protect(struct crng_state *crng,
 				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used);
 static void process_random_ready_list(void);
+static void _get_random_bytes(void *buf, int nbytes);
 
 /**********************************************************************
  *
@@ -776,7 +777,7 @@ static void crng_initialize(struct crng_state *crng)
 		_extract_entropy(&input_pool, &crng->state[4],
 				 sizeof(__u32) * 12, 0);
 	else
-		get_random_bytes(&crng->state[4], sizeof(__u32) * 12);
+		_get_random_bytes(&crng->state[4], sizeof(__u32) * 12);
 	for (i = 4; i < 16; i++) {
 		if (!arch_get_random_seed_long(&rv) &&
 		    !arch_get_random_long(&rv))
@@ -1466,6 +1467,30 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
 	return ret;
 }
 
+#define warn_unseeded_randomness(previous) \
+	_warn_unseeded_randomness(__func__, (void *) _RET_IP_, (previous))
+
+static void _warn_unseeded_randomness(const char *func_name, void *caller,
+				      void **previous)
+{
+#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+	const bool print_once = false;
+#else
+	static bool print_once __read_mostly;
+#endif
+
+	if (print_once ||
+	    crng_ready() ||
+	    (previous && (caller == READ_ONCE(*previous))))
+		return;
+	WRITE_ONCE(*previous, caller);
+#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+	print_once = true;
+#endif
+	pr_notice("random: %s called from %pF with crng_init=%d\n",
+		  func_name, caller, crng_init);
+}
+
 /*
  * This function is the exported kernel interface.  It returns some
  * number of good random numbers, suitable for key generation, seeding
@@ -1476,15 +1501,10 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
  * wait_for_random_bytes() should be called and return 0 at least once
  * at any point prior.
  */
-void get_random_bytes(void *buf, int nbytes)
+static void _get_random_bytes(void *buf, int nbytes)
 {
 	__u8 tmp[CHACHA20_BLOCK_SIZE];
 
-#ifdef CONFIG_WARN_UNSEEDED_RANDOM
-	if (!crng_ready())
-		printk(KERN_NOTICE "random: %pF get_random_bytes called "
-		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
-#endif
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
 	while (nbytes >= CHACHA20_BLOCK_SIZE) {
@@ -1501,6 +1521,14 @@ void get_random_bytes(void *buf, int nbytes)
 		crng_backtrack_protect(tmp, CHACHA20_BLOCK_SIZE);
 	memzero_explicit(tmp, sizeof(tmp));
 }
+
+void get_random_bytes(void *buf, int nbytes)
+{
+	static void *previous;
+
+	warn_unseeded_randomness(&previous);
+	_get_random_bytes(buf, nbytes);
+}
 EXPORT_SYMBOL(get_random_bytes);
 
 /*
@@ -2064,6 +2092,7 @@ u64 get_random_u64(void)
 	bool use_lock = READ_ONCE(crng_init) < 2;
 	unsigned long flags = 0;
 	struct batched_entropy *batch;
+	static void *previous;
 
 #if BITS_PER_LONG == 64
 	if (arch_get_random_long((unsigned long *)&ret))
@@ -2074,11 +2103,7 @@ u64 get_random_u64(void)
 	    return ret;
 #endif
 
-#ifdef CONFIG_WARN_UNSEEDED_RANDOM
-	if (!crng_ready())
-		printk(KERN_NOTICE "random: %pF get_random_u64 called "
-		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
-#endif
+	warn_unseeded_randomness(&previous);
 
 	batch = &get_cpu_var(batched_entropy_u64);
 	if (use_lock)
@@ -2102,15 +2127,12 @@ u32 get_random_u32(void)
 	bool use_lock = READ_ONCE(crng_init) < 2;
 	unsigned long flags = 0;
 	struct batched_entropy *batch;
+	static void *previous;
 
 	if (arch_get_random_int(&ret))
 		return ret;
 
-#ifdef CONFIG_WARN_UNSEEDED_RANDOM
-	if (!crng_ready())
-		printk(KERN_NOTICE "random: %pF get_random_u32 called "
-		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
-#endif
+	warn_unseeded_randomness(&previous);
 
 	batch = &get_cpu_var(batched_entropy_u32);
 	if (use_lock)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c4159605bfbfc..9d0a244074b9c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1209,10 +1209,9 @@ config STACKTRACE
 	  It is also used by various kernel debugging features that require
 	  stack trace generation.
 
-config WARN_UNSEEDED_RANDOM
-	bool "Warn when kernel uses unseeded randomness"
-	default y
-	depends on DEBUG_KERNEL
+config WARN_ALL_UNSEEDED_RANDOM
+	bool "Warn for all uses of unseeded randomness"
+	default n
 	help
 	  Some parts of the kernel contain bugs relating to their use of
 	  cryptographically secure random numbers before it's actually possible
@@ -1222,8 +1221,21 @@ config WARN_UNSEEDED_RANDOM
 	  are going wrong, so that they might contact developers about fixing
 	  it.
 
-	  Say Y here, unless you simply do not care about using unseeded
-	  randomness and do not want a potential warning message in your logs.
+	  Unfortunately, on some models of some architectures getting
+	  a fully seeded CRNG is extremely difficult, and so this can
+	  result in dmesg getting spammed for a surprisingly long
+	  time.  This is really bad from a security perspective, and
+	  so architecture maintainers really need to do what they can
+	  to get the CRNG seeded sooner after the system is booted.
+	  However, since users can not do anything actionble to
+	  address this, by default the kernel will issue only a single
+	  warning for the first use of unseeded randomness.
+
+	  Say Y here if you want to receive warnings for all uses of
+	  unseeded randomness.  This will be of use primarily for
+	  those developers interersted in improving the security of
+	  Linux kernels running on their architecture (or
+	  subarchitecture).
 
 config DEBUG_KOBJECT
 	bool "kobject debugging"
-- 
GitLab


From 72e5c740f6335e27253b8ff64d23d00337091535 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 30 Jun 2017 16:37:13 +0200
Subject: [PATCH 1662/1958] random: reorder READ_ONCE() in get_random_uXX

Avoid the READ_ONCE in commit 4a072c71f49b ("random: silence compiler
warnings and fix race") if we can leave the function after
arch_get_random_XXX().

Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 799d37981d997..05d255e1c112f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2089,7 +2089,7 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
 u64 get_random_u64(void)
 {
 	u64 ret;
-	bool use_lock = READ_ONCE(crng_init) < 2;
+	bool use_lock;
 	unsigned long flags = 0;
 	struct batched_entropy *batch;
 	static void *previous;
@@ -2105,6 +2105,7 @@ u64 get_random_u64(void)
 
 	warn_unseeded_randomness(&previous);
 
+	use_lock = READ_ONCE(crng_init) < 2;
 	batch = &get_cpu_var(batched_entropy_u64);
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
@@ -2124,7 +2125,7 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
 u32 get_random_u32(void)
 {
 	u32 ret;
-	bool use_lock = READ_ONCE(crng_init) < 2;
+	bool use_lock;
 	unsigned long flags = 0;
 	struct batched_entropy *batch;
 	static void *previous;
@@ -2134,6 +2135,7 @@ u32 get_random_u32(void)
 
 	warn_unseeded_randomness(&previous);
 
+	use_lock = READ_ONCE(crng_init) < 2;
 	batch = &get_cpu_var(batched_entropy_u32);
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
-- 
GitLab


From 83a5c5af26ef8005caebd50ce62383a02c5bae82 Mon Sep 17 00:00:00 2001
From: Abhishek Shah <abhishek.shah@broadcom.com>
Date: Fri, 14 Jul 2017 00:34:07 +0530
Subject: [PATCH 1663/1958] net: ethernet: bgmac: Remove unnecessary 'return'
 from platform_bgmac_idm_write

Return type for idm register write callback should be void as 'writel'
API is used for write operation. However, there no need to have 'return'
in this function.

Signed-off-by: Abhishek Shah <abhishek.shah@broadcom.com>
Reviewed-by: Oza Oza <oza.oza@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac-platform.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 73aca97a96bc7..1ca75dea1a778 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -50,7 +50,7 @@ static u32 platform_bgmac_idm_read(struct bgmac *bgmac, u16 offset)
 
 static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
 {
-	return writel(value, bgmac->plat.idm_base + offset);
+	writel(value, bgmac->plat.idm_base + offset);
 }
 
 static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
-- 
GitLab


From a163bdb02beb7df8b2768ce7c74a2b17803c96f9 Mon Sep 17 00:00:00 2001
From: Abhishek Shah <abhishek.shah@broadcom.com>
Date: Fri, 14 Jul 2017 00:34:08 +0530
Subject: [PATCH 1664/1958] net: ethernet: bgmac: Make IDM register space
 optional

IDM operations are usually one time ops and should be done in
firmware itself. Driver is not supposed to touch IDM registers.

However, for some SoCs', driver is performing IDM read/writes.
So this patch masks IDM operations in case firmware is taking
care of IDM operations.

Signed-off-by: Abhishek Shah <abhishek.shah@broadcom.com>
Reviewed-by: Oza Oza <oza.oza@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bgmac-platform.c    | 19 +++--
 drivers/net/ethernet/broadcom/bgmac.c         | 70 +++++++++++--------
 drivers/net/ethernet/broadcom/bgmac.h         |  1 +
 3 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 1ca75dea1a778..d937083db9a4a 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -55,6 +55,9 @@ static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
 
 static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 {
+	if (!bgmac->plat.idm_base)
+		return true;
+
 	if ((bgmac_idm_read(bgmac, BCMA_IOCTL) & BGMAC_CLK_EN) != BGMAC_CLK_EN)
 		return false;
 	if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
@@ -66,6 +69,9 @@ static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
 {
 	u32 val;
 
+	if (!bgmac->plat.idm_base)
+		return;
+
 	/* The Reset Control register only contains a single bit to show if the
 	 * controller is currently in reset.  Do a sanity check here, just in
 	 * case the bootloader happened to leave the device in reset.
@@ -180,6 +186,7 @@ static int bgmac_probe(struct platform_device *pdev)
 	bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
 	bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
 	bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
+	bgmac->feature_flags |= BGMAC_FEAT_IDM_MASK;
 
 	bgmac->dev = &pdev->dev;
 	bgmac->dma_dev = &pdev->dev;
@@ -207,15 +214,13 @@ static int bgmac_probe(struct platform_device *pdev)
 		return PTR_ERR(bgmac->plat.base);
 
 	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base");
-	if (!regs) {
-		dev_err(&pdev->dev, "Unable to obtain idm resource\n");
-		return -EINVAL;
+	if (regs) {
+		bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
+		if (IS_ERR(bgmac->plat.idm_base))
+			return PTR_ERR(bgmac->plat.idm_base);
+		bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK;
 	}
 
-	bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(bgmac->plat.idm_base))
-		return PTR_ERR(bgmac->plat.idm_base);
-
 	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base");
 	if (regs) {
 		bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev,
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index ba4d2e145bb9b..48d672b204a48 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -622,9 +622,11 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
 	BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base));
 	BUILD_BUG_ON(BGMAC_MAX_RX_RINGS > ARRAY_SIZE(ring_base));
 
-	if (!(bgmac_idm_read(bgmac, BCMA_IOST) & BCMA_IOST_DMA64)) {
-		dev_err(bgmac->dev, "Core does not report 64-bit DMA\n");
-		return -ENOTSUPP;
+	if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) {
+		if (!(bgmac_idm_read(bgmac, BCMA_IOST) & BCMA_IOST_DMA64)) {
+			dev_err(bgmac->dev, "Core does not report 64-bit DMA\n");
+			return -ENOTSUPP;
+		}
 	}
 
 	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
@@ -855,9 +857,11 @@ static void bgmac_mac_speed(struct bgmac *bgmac)
 static void bgmac_miiconfig(struct bgmac *bgmac)
 {
 	if (bgmac->feature_flags & BGMAC_FEAT_FORCE_SPEED_2500) {
-		bgmac_idm_write(bgmac, BCMA_IOCTL,
-				bgmac_idm_read(bgmac, BCMA_IOCTL) | 0x40 |
-				BGMAC_BCMA_IOCTL_SW_CLKEN);
+		if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) {
+			bgmac_idm_write(bgmac, BCMA_IOCTL,
+					bgmac_idm_read(bgmac, BCMA_IOCTL) |
+					0x40 | BGMAC_BCMA_IOCTL_SW_CLKEN);
+		}
 		bgmac->mac_speed = SPEED_2500;
 		bgmac->mac_duplex = DUPLEX_FULL;
 		bgmac_mac_speed(bgmac);
@@ -874,11 +878,36 @@ static void bgmac_miiconfig(struct bgmac *bgmac)
 	}
 }
 
+static void bgmac_chip_reset_idm_config(struct bgmac *bgmac)
+{
+	u32 iost;
+
+	iost = bgmac_idm_read(bgmac, BCMA_IOST);
+	if (bgmac->feature_flags & BGMAC_FEAT_IOST_ATTACHED)
+		iost &= ~BGMAC_BCMA_IOST_ATTACHED;
+
+	/* 3GMAC: for BCM4707 & BCM47094, only do core reset at bgmac_probe() */
+	if (!(bgmac->feature_flags & BGMAC_FEAT_NO_RESET)) {
+		u32 flags = 0;
+
+		if (iost & BGMAC_BCMA_IOST_ATTACHED) {
+			flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
+			if (!bgmac->has_robosw)
+				flags |= BGMAC_BCMA_IOCTL_SW_RESET;
+		}
+		bgmac_clk_enable(bgmac, flags);
+	}
+
+	if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
+		bgmac_idm_write(bgmac, BCMA_IOCTL,
+				bgmac_idm_read(bgmac, BCMA_IOCTL) &
+				~BGMAC_BCMA_IOCTL_SW_RESET);
+}
+
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipreset */
 static void bgmac_chip_reset(struct bgmac *bgmac)
 {
 	u32 cmdcfg_sr;
-	u32 iost;
 	int i;
 
 	if (bgmac_clk_enabled(bgmac)) {
@@ -899,20 +928,8 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 		/* TODO: Clear software multicast filter list */
 	}
 
-	iost = bgmac_idm_read(bgmac, BCMA_IOST);
-	if (bgmac->feature_flags & BGMAC_FEAT_IOST_ATTACHED)
-		iost &= ~BGMAC_BCMA_IOST_ATTACHED;
-
-	/* 3GMAC: for BCM4707 & BCM47094, only do core reset at bgmac_probe() */
-	if (!(bgmac->feature_flags & BGMAC_FEAT_NO_RESET)) {
-		u32 flags = 0;
-		if (iost & BGMAC_BCMA_IOST_ATTACHED) {
-			flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
-			if (!bgmac->has_robosw)
-				flags |= BGMAC_BCMA_IOCTL_SW_RESET;
-		}
-		bgmac_clk_enable(bgmac, flags);
-	}
+	if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK))
+		bgmac_chip_reset_idm_config(bgmac);
 
 	/* Request Misc PLL for corerev > 2 */
 	if (bgmac->feature_flags & BGMAC_FEAT_MISC_PLL_REQ) {
@@ -970,11 +987,6 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 				      BGMAC_CHIPCTL_7_IF_TYPE_RGMII);
 	}
 
-	if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
-		bgmac_idm_write(bgmac, BCMA_IOCTL,
-				bgmac_idm_read(bgmac, BCMA_IOCTL) &
-				~BGMAC_BCMA_IOCTL_SW_RESET);
-
 	/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_reset
 	 * Specs don't say about using BGMAC_CMDCFG_SR, but in this routine
 	 * BGMAC_CMDCFG is read _after_ putting chip in a reset. So it has to
@@ -1497,8 +1509,10 @@ int bgmac_enet_probe(struct bgmac *bgmac)
 	bgmac_clk_enable(bgmac, 0);
 
 	/* This seems to be fixing IRQ by assigning OOB #6 to the core */
-	if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6)
-		bgmac_idm_write(bgmac, BCMA_OOB_SEL_OUT_A30, 0x86);
+	if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) {
+		if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6)
+			bgmac_idm_write(bgmac, BCMA_OOB_SEL_OUT_A30, 0x86);
+	}
 
 	bgmac_chip_reset(bgmac);
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index c1818766c501f..443d57b102641 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -425,6 +425,7 @@
 #define BGMAC_FEAT_CC4_IF_SW_TYPE	BIT(17)
 #define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII	BIT(18)
 #define BGMAC_FEAT_CC7_IF_TYPE_RGMII	BIT(19)
+#define BGMAC_FEAT_IDM_MASK		BIT(20)
 
 struct bgmac_slot_info {
 	union {
-- 
GitLab


From 10d79f7d3c8358f13dc23619838b0d65bcc6e2af Mon Sep 17 00:00:00 2001
From: Abhishek Shah <abhishek.shah@broadcom.com>
Date: Fri, 14 Jul 2017 00:34:09 +0530
Subject: [PATCH 1665/1958] Documentation: devicetree: net: optional idm regs
 for bgmac

Specifying IDM register space in DT is not mendatory for SoCs
where firmware takes care of IDM operations. This patch updates
BGMAC driver's DT binding documentation indicating the same.

Signed-off-by: Abhishek Shah <abhishek.shah@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Reviewed-by: Oza Oza <oza.oza@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/brcm,amac.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/brcm,amac.txt b/Documentation/devicetree/bindings/net/brcm,amac.txt
index 2fefa1a44afd4..ad16c1f481f77 100644
--- a/Documentation/devicetree/bindings/net/brcm,amac.txt
+++ b/Documentation/devicetree/bindings/net/brcm,amac.txt
@@ -11,6 +11,7 @@ Required properties:
  - reg-names:	Names of the registers.
 		"amac_base":	Address and length of the GMAC registers
 		"idm_base":	Address and length of the GMAC IDM registers
+				(required for NSP and Northstar2)
 		"nicpm_base":	Address and length of the NIC Port Manager
 				registers (required for Northstar2)
  - interrupts:	Interrupt number
-- 
GitLab


From dfcc16c9d5b9c9c38fe18a91da63ab5af05d96ca Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 10 Jul 2017 14:00:32 +0200
Subject: [PATCH 1666/1958] ioc3-eth: store pointer to net_device for priviate
 area

Computing the alignment manually for going from priv to pub is probably
not such a good idea, and in general the assumption that going from priv
to pub is possible trivially could change, so rather than relying on
that, we change things to just store a pointer to pub. This was sugested
by DaveM in [1].

[1] http://www.spinics.net/lists/netdev/msg443992.html

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sgi/ioc3-eth.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index b607936e1b3ec..9c0488e0f08ec 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -90,17 +90,13 @@ struct ioc3_private {
 	spinlock_t ioc3_lock;
 	struct mii_if_info mii;
 
+	struct net_device *dev;
 	struct pci_dev *pdev;
 
 	/* Members used by autonegotiation  */
 	struct timer_list ioc3_timer;
 };
 
-static inline struct net_device *priv_netdev(struct ioc3_private *dev)
-{
-	return (void *)dev - ((sizeof(struct net_device) + 31) & ~31);
-}
-
 static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static void ioc3_set_multicast_list(struct net_device *dev);
 static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev);
@@ -427,7 +423,7 @@ static void ioc3_get_eaddr_nic(struct ioc3_private *ip)
 		nic[i] = nic_read_byte(ioc3);
 
 	for (i = 2; i < 8; i++)
-		priv_netdev(ip)->dev_addr[i - 2] = nic[i];
+		ip->dev->dev_addr[i - 2] = nic[i];
 }
 
 /*
@@ -439,7 +435,7 @@ static void ioc3_get_eaddr(struct ioc3_private *ip)
 {
 	ioc3_get_eaddr_nic(ip);
 
-	printk("Ethernet address is %pM.\n", priv_netdev(ip)->dev_addr);
+	printk("Ethernet address is %pM.\n", ip->dev->dev_addr);
 }
 
 static void __ioc3_set_mac_address(struct net_device *dev)
@@ -790,13 +786,12 @@ static void ioc3_timer(unsigned long data)
  */
 static int ioc3_mii_init(struct ioc3_private *ip)
 {
-	struct net_device *dev = priv_netdev(ip);
 	int i, found = 0, res = 0;
 	int ioc3_phy_workaround = 1;
 	u16 word;
 
 	for (i = 0; i < 32; i++) {
-		word = ioc3_mdio_read(dev, i, MII_PHYSID1);
+		word = ioc3_mdio_read(ip->dev, i, MII_PHYSID1);
 
 		if (word != 0xffff && word != 0x0000) {
 			found = 1;
@@ -1276,6 +1271,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	ip = netdev_priv(dev);
+	ip->dev = dev;
 
 	dev->irq = pdev->irq;
 
-- 
GitLab


From e36fef66f4bbe8a51e8f8334a058a42c0f16e373 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Thu, 13 Jul 2017 23:01:27 -0400
Subject: [PATCH 1667/1958] mlx4_en: remove unnecessary returned value check

The function __mlx4_zone_remove_one_entry always returns zero. So
it is not necessary to check it.

Cc: Joe Jin <joe.jin@oracle.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/alloc.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 249a4584401ad..710d6c61150ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -283,7 +283,7 @@ int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc,
 }
 
 /* Should be called under a lock */
-static int __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry)
+static void __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry)
 {
 	struct mlx4_zone_allocator *zone_alloc = entry->allocator;
 
@@ -315,8 +315,6 @@ static int __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry)
 		}
 		zone_alloc->mask = mask;
 	}
-
-	return 0;
 }
 
 void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc)
@@ -457,7 +455,7 @@ struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32
 int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid)
 {
 	struct mlx4_zone_entry *zone;
-	int res;
+	int res = 0;
 
 	spin_lock(&zones->lock);
 
@@ -468,7 +466,7 @@ int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid)
 		goto out;
 	}
 
-	res = __mlx4_zone_remove_one_entry(zone);
+	__mlx4_zone_remove_one_entry(zone);
 
 out:
 	spin_unlock(&zones->lock);
-- 
GitLab


From 254d900b801fc04aa524ff7bafe28fdd1dbf0ed6 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Fri, 14 Jul 2017 12:04:16 +0300
Subject: [PATCH 1668/1958] ipv4: ip_do_fragment: fix headroom tests

Some time ago David Woodhouse reported skb_under_panic
when we try to push ethernet header to fragmented ipv6 skbs.
It was fixed for ipv6 by Florian Westphal in
commit 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak")

However similar problem still exist in ipv4.

It does not trigger skb_under_panic due paranoid check
in ip_finish_output2, however according to Alexey Kuznetsov
current state is abnormal and ip_fragment should be fixed too.

Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7eb252dceceea..50c74cd890bc7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -599,6 +599,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	hlen = iph->ihl * 4;
 	mtu = mtu - hlen;	/* Size of data space */
 	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
+	ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
 
 	/* When frag_list is given, use it. First, check its validity:
 	 * some transformers could create wrong frag_list or break existing
@@ -614,14 +615,15 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		if (first_len - hlen > mtu ||
 		    ((first_len - hlen) & 7) ||
 		    ip_is_fragment(iph) ||
-		    skb_cloned(skb))
+		    skb_cloned(skb) ||
+		    skb_headroom(skb) < ll_rs)
 			goto slow_path;
 
 		skb_walk_frags(skb, frag) {
 			/* Correct geometry. */
 			if (frag->len > mtu ||
 			    ((frag->len & 7) && frag->next) ||
-			    skb_headroom(frag) < hlen)
+			    skb_headroom(frag) < hlen + ll_rs)
 				goto slow_path_clean;
 
 			/* Partially cloned skb? */
@@ -711,8 +713,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;		/* Where to start from */
 
-	ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
-
 	/*
 	 *	Fragment the datagram.
 	 */
-- 
GitLab


From b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 14 Jul 2017 18:32:45 +0200
Subject: [PATCH 1669/1958] sctp: don't dereference ptr before leaving
 _sctp_walk_{params, errors}()

If the length field of the iterator (|pos.p| or |err|) is past the end
of the chunk, we shouldn't access it.

This bug has been detected by KMSAN. For the following pair of system
calls:

  socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3
  sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0),
         inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0,
         sin6_scope_id=0}, 28) = 1

the tool has reported a use of uninitialized memory:

  ==================================================================
  BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0
  CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
  01/01/2011
  Call Trace:
   <IRQ>
   __dump_stack lib/dump_stack.c:16
   dump_stack+0x172/0x1c0 lib/dump_stack.c:52
   kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
   __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
   __sctp_rcv_init_lookup net/sctp/input.c:1074
   __sctp_rcv_lookup_harder net/sctp/input.c:1233
   __sctp_rcv_lookup net/sctp/input.c:1255
   sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170
   sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984
   ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
   NF_HOOK ./include/linux/netfilter.h:257
   ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
   dst_input ./include/net/dst.h:492
   ip6_rcv_finish net/ipv6/ip6_input.c:69
   NF_HOOK ./include/linux/netfilter.h:257
   ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
   __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
   __netif_receive_skb net/core/dev.c:4246
   process_backlog+0x667/0xba0 net/core/dev.c:4866
   napi_poll net/core/dev.c:5268
   net_rx_action+0xc95/0x1590 net/core/dev.c:5333
   __do_softirq+0x485/0x942 kernel/softirq.c:284
   do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902
   </IRQ>
   do_softirq kernel/softirq.c:328
   __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181
   local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31
   rcu_read_unlock_bh ./include/linux/rcupdate.h:931
   ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124
   ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149
   NF_HOOK_COND ./include/linux/netfilter.h:246
   ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163
   dst_output ./include/net/dst.h:486
   NF_HOOK ./include/linux/netfilter.h:257
   ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261
   sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225
   sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632
   sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885
   sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750
   sctp_side_effects net/sctp/sm_sideeffect.c:1773
   sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147
   sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88
   sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954
   inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
   sock_sendmsg_nosec net/socket.c:633
   sock_sendmsg net/socket.c:643
   SYSC_sendto+0x608/0x710 net/socket.c:1696
   SyS_sendto+0x8a/0xb0 net/socket.c:1664
   do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285
   entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246
  RIP: 0033:0x401133
  RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
  RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133
  RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003
  RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c
  R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000
  R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000
  origin:
   save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
   kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
   kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211
   slab_alloc_node mm/slub.c:2743
   __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351
   __kmalloc_reserve net/core/skbuff.c:138
   __alloc_skb+0x26b/0x840 net/core/skbuff.c:231
   alloc_skb ./include/linux/skbuff.h:933
   sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570
   sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885
   sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750
   sctp_side_effects net/sctp/sm_sideeffect.c:1773
   sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147
   sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88
   sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954
   inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
   sock_sendmsg_nosec net/socket.c:633
   sock_sendmsg net/socket.c:643
   SYSC_sendto+0x608/0x710 net/socket.c:1696
   SyS_sendto+0x8a/0xb0 net/socket.c:1664
   do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285
   return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246
  ==================================================================

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index a9519a06a23b2..980807d7506fe 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -469,6 +469,8 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
 
 #define _sctp_walk_params(pos, chunk, end, member)\
 for (pos.v = chunk->member;\
+     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
+      (void *)chunk + end) &&\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
      ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\
      pos.v += SCTP_PAD4(ntohs(pos.p->length)))
@@ -479,6 +481,8 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 #define _sctp_walk_errors(err, chunk_hdr, end)\
 for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
 	    sizeof(struct sctp_chunkhdr));\
+     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
+      (void *)chunk_hdr + end) &&\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
      err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
-- 
GitLab


From 8b97ac5bda17cfaa257bcab6180af0f43a2e87e0 Mon Sep 17 00:00:00 2001
From: Greg Rose <gvrose8192@gmail.com>
Date: Fri, 14 Jul 2017 12:42:49 -0700
Subject: [PATCH 1670/1958] openvswitch: Fix for force/commit action failures

When there is an established connection in direction A->B, it is
possible to receive a packet on port B which then executes
ct(commit,force) without first performing ct() - ie, a lookup.
In this case, we would expect that this packet can delete the existing
entry so that we can commit a connection with direction B->A. However,
currently we only perform a check in skb_nfct_cached() for whether
OVS_CS_F_TRACKED is set and OVS_CS_F_INVALID is not set, ie that a
lookup previously occurred. In the above scenario, a lookup has not
occurred but we should still be able to statelessly look up the
existing entry and potentially delete the entry if it is in the
opposite direction.

This patch extends the check to also hint that if the action has the
force flag set, then we will lookup the existing entry so that the
force check at the end of skb_nfct_cached has the ability to delete
the connection.

Fixes: dd41d330b03 ("openvswitch: Add force commit.")
CC: Pravin Shelar <pshelar@nicira.com>
CC: dev@openvswitch.org
Signed-off-by: Joe Stringer <joe@ovn.org>
Signed-off-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 51 ++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 08679ebb30682..e3c4c6c3fef7f 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -629,6 +629,34 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 	return ct;
 }
 
+static
+struct nf_conn *ovs_ct_executed(struct net *net,
+				const struct sw_flow_key *key,
+				const struct ovs_conntrack_info *info,
+				struct sk_buff *skb,
+				bool *ct_executed)
+{
+	struct nf_conn *ct = NULL;
+
+	/* If no ct, check if we have evidence that an existing conntrack entry
+	 * might be found for this skb.  This happens when we lose a skb->_nfct
+	 * due to an upcall, or if the direction is being forced.  If the
+	 * connection was not confirmed, it is not cached and needs to be run
+	 * through conntrack again.
+	 */
+	*ct_executed = (key->ct_state & OVS_CS_F_TRACKED) &&
+		       !(key->ct_state & OVS_CS_F_INVALID) &&
+		       (key->ct_zone == info->zone.id);
+
+	if (*ct_executed || (!key->ct_state && info->force)) {
+		ct = ovs_ct_find_existing(net, &info->zone, info->family, skb,
+					  !!(key->ct_state &
+					  OVS_CS_F_NAT_MASK));
+	}
+
+	return ct;
+}
+
 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
 static bool skb_nfct_cached(struct net *net,
 			    const struct sw_flow_key *key,
@@ -637,24 +665,17 @@ static bool skb_nfct_cached(struct net *net,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
+	bool ct_executed = true;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	/* If no ct, check if we have evidence that an existing conntrack entry
-	 * might be found for this skb.  This happens when we lose a skb->_nfct
-	 * due to an upcall.  If the connection was not confirmed, it is not
-	 * cached and needs to be run through conntrack again.
-	 */
-	if (!ct && key->ct_state & OVS_CS_F_TRACKED &&
-	    !(key->ct_state & OVS_CS_F_INVALID) &&
-	    key->ct_zone == info->zone.id) {
-		ct = ovs_ct_find_existing(net, &info->zone, info->family, skb,
-					  !!(key->ct_state
-					     & OVS_CS_F_NAT_MASK));
-		if (ct)
-			nf_ct_get(skb, &ctinfo);
-	}
 	if (!ct)
+		ct = ovs_ct_executed(net, key, info, skb, &ct_executed);
+
+	if (ct)
+		nf_ct_get(skb, &ctinfo);
+	else
 		return false;
+
 	if (!net_eq(net, read_pnet(&ct->ct_net)))
 		return false;
 	if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
@@ -679,7 +700,7 @@ static bool skb_nfct_cached(struct net *net,
 		return false;
 	}
 
-	return true;
+	return ct_executed;
 }
 
 #ifdef CONFIG_NF_NAT_NEEDED
-- 
GitLab


From 4aea287e90dd61a48268ff2994b56f9799441b62 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 14 Jul 2017 17:49:21 -0400
Subject: [PATCH 1671/1958] tcp_bbr: cut pacing rate only if filled pipe

In bbr_set_pacing_rate(), which decides whether to cut the pacing
rate, there was some code that considered exiting STARTUP to be
equivalent to the notion of filling the pipe (i.e.,
bbr_full_bw_reached()). Specifically, as the code was structured,
exiting STARTUP and going into PROBE_RTT could cause us to cut the
pacing rate down to something silly and low, based on whatever
bandwidth samples we've had so far, when it's possible that all of
them have been small app-limited bandwidth samples that are not
representative of the bandwidth available in the path. (The code was
correct at the time it was written, but the state machine changed
without this spot being adjusted correspondingly.)

Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bbr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index dbcc9352a48f0..743e97511dc8e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -220,12 +220,11 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
  */
 static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 {
-	struct bbr *bbr = inet_csk_ca(sk);
 	u64 rate = bw;
 
 	rate = bbr_rate_bytes_per_sec(sk, rate, gain);
 	rate = min_t(u64, rate, sk->sk_max_pacing_rate);
-	if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
+	if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
 		sk->sk_pacing_rate = rate;
 }
 
-- 
GitLab


From f19fd62dafaf1ed6cf615dba655b82fa9df59074 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 14 Jul 2017 17:49:22 -0400
Subject: [PATCH 1672/1958] tcp_bbr: introduce bbr_bw_to_pacing_rate() helper

Introduce a helper to convert a BBR bandwidth and gain factor to a
pacing rate in bytes per second. This is a pure refactor, but is
needed for two following fixes.

Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bbr.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 743e97511dc8e..29e23b851b975 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -211,6 +211,16 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
 	return rate >> BW_SCALE;
 }
 
+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
+static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+	u64 rate = bw;
+
+	rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+	rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+	return rate;
+}
+
 /* Pace using current bw estimate and a gain factor. In order to help drive the
  * network toward lower queues while maintaining high utilization and low
  * latency, the average pacing rate aims to be slightly (~1%) lower than the
@@ -220,10 +230,8 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
  */
 static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 {
-	u64 rate = bw;
+	u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
 
-	rate = bbr_rate_bytes_per_sec(sk, rate, gain);
-	rate = min_t(u64, rate, sk->sk_max_pacing_rate);
 	if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
 		sk->sk_pacing_rate = rate;
 }
-- 
GitLab


From 79135b89b8af304456bd67916b80116ddf03d7b6 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 14 Jul 2017 17:49:23 -0400
Subject: [PATCH 1673/1958] tcp_bbr: introduce bbr_init_pacing_rate_from_rtt()
 helper

Introduce a helper to initialize the BBR pacing rate unconditionally,
based on the current cwnd and RTT estimate. This is a pure refactor,
but is needed for two following fixes.

Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bbr.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 29e23b851b975..3276140c25067 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -221,6 +221,23 @@ static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
 	return rate;
 }
 
+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u64 bw;
+	u32 rtt_us;
+
+	if (tp->srtt_us) {		/* any RTT sample yet? */
+		rtt_us = max(tp->srtt_us >> 3, 1U);
+	} else {			 /* no RTT sample yet */
+		rtt_us = USEC_PER_MSEC;	 /* use nominal default RTT */
+	}
+	bw = (u64)tp->snd_cwnd * BW_UNIT;
+	do_div(bw, rtt_us);
+	sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+}
+
 /* Pace using current bw estimate and a gain factor. In order to help drive the
  * network toward lower queues while maintaining high utilization and low
  * latency, the average pacing rate aims to be slightly (~1%) lower than the
@@ -805,7 +822,6 @@ static void bbr_init(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
-	u64 bw;
 
 	bbr->prior_cwnd = 0;
 	bbr->tso_segs_goal = 0;	 /* default segs per skb until first ACK */
@@ -821,11 +837,8 @@ static void bbr_init(struct sock *sk)
 
 	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
 
-	/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
-	bw = (u64)tp->snd_cwnd * BW_UNIT;
-	do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
 	sk->sk_pacing_rate = 0;		/* force an update of sk_pacing_rate */
-	bbr_set_pacing_rate(sk, bw, bbr_high_gain);
+	bbr_init_pacing_rate_from_rtt(sk);
 
 	bbr->restore_cwnd = 0;
 	bbr->round_start = 0;
-- 
GitLab


From 1d3648eb5d1fe9ed3d095ed8fa19ad11ca4c8bc0 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 14 Jul 2017 17:49:24 -0400
Subject: [PATCH 1674/1958] tcp_bbr: remove sk_pacing_rate=0 transient during
 init

Fix a corner case noticed by Eric Dumazet, where BBR's setting
sk->sk_pacing_rate to 0 during initialization could theoretically
cause packets in the sending host to hang if there were packets "in
flight" in the pacing infrastructure at the time the BBR congestion
control state is initialized. This could occur if the pacing
infrastructure happened to race with bbr_init() in a way such that the
pacer read the 0 rather than the immediately following non-zero pacing
rate.

Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bbr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 3276140c25067..42e0017f2ebcc 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -837,7 +837,6 @@ static void bbr_init(struct sock *sk)
 
 	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
 
-	sk->sk_pacing_rate = 0;		/* force an update of sk_pacing_rate */
 	bbr_init_pacing_rate_from_rtt(sk);
 
 	bbr->restore_cwnd = 0;
-- 
GitLab


From 32984565574da7ed3afa10647bb4020d7a9e6c93 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 14 Jul 2017 17:49:25 -0400
Subject: [PATCH 1675/1958] tcp_bbr: init pacing rate on first RTT sample

Fixes the following behavior: for connections that had no RTT sample
at the time of initializing congestion control, BBR was initializing
the pacing rate to a high nominal rate (based an a guess of RTT=1ms,
in case this is LAN traffic). Then BBR never adjusted the pacing rate
downward upon obtaining an actual RTT sample, if the connection never
filled the pipe (e.g. all sends were small app-limited writes()).

This fix adjusts the pacing rate upon obtaining the first RTT sample.

Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bbr.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 42e0017f2ebcc..69ee877574d08 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -112,7 +112,8 @@ struct bbr {
 		cwnd_gain:10,	/* current gain for setting cwnd */
 		full_bw_cnt:3,	/* number of rounds without large bw gains */
 		cycle_idx:3,	/* current index in pacing_gain cycle array */
-		unused_b:6;
+		has_seen_rtt:1, /* have we seen an RTT sample yet? */
+		unused_b:5;
 	u32	prior_cwnd;	/* prior cwnd upon entering loss recovery */
 	u32	full_bw;	/* recent bw, to estimate if pipe is full */
 };
@@ -225,11 +226,13 @@ static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
 static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
 	u64 bw;
 	u32 rtt_us;
 
 	if (tp->srtt_us) {		/* any RTT sample yet? */
 		rtt_us = max(tp->srtt_us >> 3, 1U);
+		bbr->has_seen_rtt = 1;
 	} else {			 /* no RTT sample yet */
 		rtt_us = USEC_PER_MSEC;	 /* use nominal default RTT */
 	}
@@ -247,8 +250,12 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
  */
 static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
 	u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
 
+	if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
+		bbr_init_pacing_rate_from_rtt(sk);
 	if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
 		sk->sk_pacing_rate = rate;
 }
@@ -837,6 +844,7 @@ static void bbr_init(struct sock *sk)
 
 	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
 
+	bbr->has_seen_rtt = 0;
 	bbr_init_pacing_rate_from_rtt(sk);
 
 	bbr->restore_cwnd = 0;
-- 
GitLab


From 5771a8c08880cdca3bfb4a3fc6d309d6bba20877 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 15 Jul 2017 15:22:10 -0700
Subject: [PATCH 1676/1958] Linux v4.13-rc1

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 44f5bfd9fa55a..b4fb9a1d1594e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 12
+PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
GitLab


From 1009ccdc64ee2c8451f76b548589f6b989d13412 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 11 Jul 2017 20:53:55 -0700
Subject: [PATCH 1677/1958] hwmon: (applesmc) Avoid buffer overruns

gcc 7.1 complains that the driver uses sprintf() and thus does not validate
the length of output buffers.

drivers/hwmon/applesmc.c: In function 'applesmc_show_fan_position':
drivers/hwmon/applesmc.c:82:21: warning:
	'%d' directive writing between 1 and 5 bytes into a region of size 4

Fix the problem by using scnprintf() instead of sprintf() throughout the
driver. Also explicitly limit the number of supported fans to avoid actual
buffer overruns and thus invalid keys.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/applesmc.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 0af7fd311979d..76c34f4fde132 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -566,6 +566,8 @@ static int applesmc_init_smcreg_try(void)
 	if (ret)
 		return ret;
 	s->fan_count = tmp[0];
+	if (s->fan_count > 10)
+		s->fan_count = 10;
 
 	ret = applesmc_get_lower_bound(&s->temp_begin, "T");
 	if (ret)
@@ -811,7 +813,8 @@ static ssize_t applesmc_show_fan_speed(struct device *dev,
 	char newkey[5];
 	u8 buffer[2];
 
-	sprintf(newkey, fan_speed_fmt[to_option(attr)], to_index(attr));
+	scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
+		  to_index(attr));
 
 	ret = applesmc_read_key(newkey, buffer, 2);
 	speed = ((buffer[0] << 8 | buffer[1]) >> 2);
@@ -834,7 +837,8 @@ static ssize_t applesmc_store_fan_speed(struct device *dev,
 	if (kstrtoul(sysfsbuf, 10, &speed) < 0 || speed >= 0x4000)
 		return -EINVAL;		/* Bigger than a 14-bit value */
 
-	sprintf(newkey, fan_speed_fmt[to_option(attr)], to_index(attr));
+	scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)],
+		  to_index(attr));
 
 	buffer[0] = (speed >> 6) & 0xff;
 	buffer[1] = (speed << 2) & 0xff;
@@ -903,7 +907,7 @@ static ssize_t applesmc_show_fan_position(struct device *dev,
 	char newkey[5];
 	u8 buffer[17];
 
-	sprintf(newkey, FAN_ID_FMT, to_index(attr));
+	scnprintf(newkey, sizeof(newkey), FAN_ID_FMT, to_index(attr));
 
 	ret = applesmc_read_key(newkey, buffer, 16);
 	buffer[16] = 0;
@@ -1116,7 +1120,8 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num)
 		}
 		for (i = 0; i < num; i++) {
 			node = &grp->nodes[i];
-			sprintf(node->name, grp->format, i + 1);
+			scnprintf(node->name, sizeof(node->name), grp->format,
+				  i + 1);
 			node->sda.index = (grp->option << 16) | (i & 0xffff);
 			node->sda.dev_attr.show = grp->show;
 			node->sda.dev_attr.store = grp->store;
-- 
GitLab


From 5ffff2854a0b88fdd279fdb9cbf053d16c8fb2d6 Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Tue, 11 Jul 2017 17:30:33 +0100
Subject: [PATCH 1678/1958] f2fs: remove extra inode_unlock() in error path

This commit removes an extra inode_unlock() that is being done in function
f2fs_ioc_setflags error path.  While there, get rid of a useless 'out'
label as well.

Fixes: 0abd675e97e6 ("f2fs: support plain user/group quota")
Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a0e6d2c65a9ec..2706130c261b0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1538,7 +1538,6 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
 	/* Is it quota file? Do not allow user to mess with it */
 	if (IS_NOQUOTA(inode)) {
-		inode_unlock(inode);
 		ret = -EPERM;
 		goto unlock_out;
 	}
@@ -1549,9 +1548,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
 	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
 		if (!capable(CAP_LINUX_IMMUTABLE)) {
-			inode_unlock(inode);
 			ret = -EPERM;
-			goto out;
+			goto unlock_out;
 		}
 	}
 
@@ -1564,7 +1562,6 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 	f2fs_mark_inode_dirty_sync(inode, false);
 unlock_out:
 	inode_unlock(inode);
-out:
 	mnt_drop_write_file(filp);
 	return ret;
 }
-- 
GitLab


From c925dc162f770578ff4a65ec9b08270382dba9e6 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 11 Jul 2017 14:56:49 -0700
Subject: [PATCH 1679/1958] f2fs: Don't clear SGID when inheriting ACLs

This patch copies commit b7f8a09f80:
"btrfs: Don't clear SGID when inheriting ACLs" written by Jan.

Fixes: 073931017b49d9458aa351605b43a7e34598caef
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index a140c5e3dc54e..b4b8438c42ef9 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -211,7 +211,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
-		if (acl) {
+		if (acl && !ipage) {
 			error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
 			if (error)
 				return error;
-- 
GitLab


From cd7b03e9cc94f249ae3b54cf5a41d4b9fb297e0b Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:42 +0530
Subject: [PATCH 1680/1958] isdn: hisax: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  13686	   2064	   4416	  20166	   4ec6	drivers/isdn/hisax/config.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  15030	    720	   4416	  20166	   4ec6	drivers/isdn/hisax/config.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hisax/config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index c7d68675b0287..7108bdb8742e7 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -1909,7 +1909,7 @@ static void EChannel_proc_rcv(struct hisax_d_if *d_if)
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
 
-static struct pci_device_id hisax_pci_tbl[] __used = {
+static const struct pci_device_id hisax_pci_tbl[] __used = {
 #ifdef CONFIG_HISAX_FRITZPCI
 	{PCI_VDEVICE(AVM,      PCI_DEVICE_ID_AVM_A1)			},
 #endif
-- 
GitLab


From 3651003d4fd805c3a7761d1db3a7491d5547afb3 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:43 +0530
Subject: [PATCH 1681/1958] isdn: hisax: hfc4s8s_l1: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  10512	    536	      4	  11052	   2b2c	drivers/isdn/hisax/hfc4s8s_l1.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  10672	    376	      4	  11052	   2b2c	drivers/isdn/hisax/hfc4s8s_l1.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hisax/hfc4s8s_l1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hisax/hfc4s8s_l1.c b/drivers/isdn/hisax/hfc4s8s_l1.c
index 90f051ce02590..9090cc1e1f299 100644
--- a/drivers/isdn/hisax/hfc4s8s_l1.c
+++ b/drivers/isdn/hisax/hfc4s8s_l1.c
@@ -86,7 +86,7 @@ typedef struct {
 	char *device_name;
 } hfc4s8s_param;
 
-static struct pci_device_id hfc4s8s_ids[] = {
+static const struct pci_device_id hfc4s8s_ids[] = {
 	{.vendor = PCI_VENDOR_ID_CCD,
 	 .device = PCI_DEVICE_ID_4S,
 	 .subvendor = 0x1397,
-- 
GitLab


From 6cfc3d86be2647686c8eebe41df69e5cd37dd8e6 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:44 +0530
Subject: [PATCH 1682/1958] isdn: hisax: hisax_fcpcipnp: constify
 pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   5989	    576	      0	   6565	   19a5 isdn/hisax/hisax_fcpcipnp.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   6085	    480	      0	   6565	   19a5 isdn/hisax/hisax_fcpcipnp.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hisax/hisax_fcpcipnp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hisax/hisax_fcpcipnp.c b/drivers/isdn/hisax/hisax_fcpcipnp.c
index 5a9f39ed1d5d9..e4f7573ba9bf6 100644
--- a/drivers/isdn/hisax/hisax_fcpcipnp.c
+++ b/drivers/isdn/hisax/hisax_fcpcipnp.c
@@ -52,7 +52,7 @@ module_param(debug, int, 0);
 MODULE_AUTHOR("Kai Germaschewski <kai.germaschewski@gmx.de>/Karsten Keil <kkeil@suse.de>");
 MODULE_DESCRIPTION("AVM Fritz!PCI/PnP ISDN driver");
 
-static struct pci_device_id fcpci_ids[] = {
+static const struct pci_device_id fcpci_ids[] = {
 	{ .vendor      = PCI_VENDOR_ID_AVM,
 	  .device      = PCI_DEVICE_ID_AVM_A1,
 	  .subvendor   = PCI_ANY_ID,
-- 
GitLab


From cf46d3518934bc61d9a01eb25aaaa364f325f876 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:45 +0530
Subject: [PATCH 1683/1958] isdn: eicon: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   6224	    655	      8	   6887	   1ae7	isdn/hardware/eicon/divasmain.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   6608	    271	      8	   6887	   1ae7	isdn/hardware/eicon/divasmain.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/eicon/divasmain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index 8b7ad4f1ab016..b2023e08dcd28 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -110,7 +110,7 @@ typedef struct _diva_os_thread_dpc {
 /*
   This table should be sorted by PCI device ID
 */
-static struct pci_device_id divas_pci_tbl[] = {
+static const struct pci_device_id divas_pci_tbl[] = {
 	/* Diva Server BRI-2M PCI 0xE010 */
 	{ PCI_VDEVICE(EICON, PCI_DEVICE_ID_EICON_MAESTRA),
 	  CARDTYPE_MAESTRA_PCI },
-- 
GitLab


From 0d41668987f5c0df5ad5b5e1e3bf69aaf4b36c52 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:46 +0530
Subject: [PATCH 1684/1958] isdn: mISDN: netjet: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  10941	   1776	     16	  12733	   31bd isdn/hardware/mISDN/netjet.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  11005	   1712	     16	  12733	   31bd isdn/hardware/mISDN/netjet.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/netjet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
index afde4edef9ae8..6a6d848bd18ee 100644
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ b/drivers/isdn/hardware/mISDN/netjet.c
@@ -1137,7 +1137,7 @@ static void nj_remove(struct pci_dev *pdev)
 /* We cannot select cards with PCI_SUB... IDs, since here are cards with
  * SUB IDs set to PCI_ANY_ID, so we need to match all and reject
  * known other cards which not work with this driver - see probe function */
-static struct pci_device_id nj_pci_ids[] = {
+static const struct pci_device_id nj_pci_ids[] = {
 	{ PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_300,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{ }
-- 
GitLab


From e3b79fcff622fb5537d40db30fc9a2632d6a8456 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:47 +0530
Subject: [PATCH 1685/1958] isdn: mISDN: hfcmulti: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  63450	   1536	   1492	  66478	  103ae	isdn/hardware/mISDN/hfcmulti.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  64698	    288	   1492	  66478	  103ae	isdn/hardware/mISDN/hfcmulti.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index aea0c9616ea51..3cf07b8ced1c0 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -5348,7 +5348,7 @@ static const struct hm_map hfcm_map[] = {
 
 #undef H
 #define H(x)	((unsigned long)&hfcm_map[x])
-static struct pci_device_id hfmultipci_ids[] = {
+static const struct pci_device_id hfmultipci_ids[] = {
 
 	/* Cards with HFC-4S Chip */
 	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
-- 
GitLab


From e8336ed0b35261603871a6c5e829f69051530505 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:48 +0530
Subject: [PATCH 1686/1958] isdn: mISDN: w6692: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  13959	   4080	     24	  18063	   468f isdn/hardware/mISDN/w6692.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  14087	   3952	     24	  18063	   468f isdn/hardware/mISDN/w6692.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/w6692.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c
index 3052c836b89f7..d80072fef4341 100644
--- a/drivers/isdn/hardware/mISDN/w6692.c
+++ b/drivers/isdn/hardware/mISDN/w6692.c
@@ -1398,7 +1398,7 @@ w6692_remove_pci(struct pci_dev *pdev)
 			pr_notice("%s: drvdata already removed\n", __func__);
 }
 
-static struct pci_device_id w6692_ids[] = {
+static const struct pci_device_id w6692_ids[] = {
 	{ PCI_VENDOR_ID_DYNALINK, PCI_DEVICE_ID_DYNALINK_IS64PH,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, (ulong)&w6692_map[0]},
 	{ PCI_VENDOR_ID_WINBOND2, PCI_DEVICE_ID_WINBOND2_6692,
-- 
GitLab


From 1d9c8fa013cdea5d864f5332d0e203312de3a93d Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:49 +0530
Subject: [PATCH 1687/1958] isdn: mISDN: avmfritz: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   9963	   1936	     16	  11915	   2e8b	isdn/hardware/mISDN/avmfritz.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  10091	   1808	     16	  11915	   2e8b	isdn/hardware/mISDN/avmfritz.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/avmfritz.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c
index e3fa1cd64470c..dce6632daae1c 100644
--- a/drivers/isdn/hardware/mISDN/avmfritz.c
+++ b/drivers/isdn/hardware/mISDN/avmfritz.c
@@ -1142,7 +1142,7 @@ fritz_remove_pci(struct pci_dev *pdev)
 			pr_info("%s: drvdata already removed\n", __func__);
 }
 
-static struct pci_device_id fcpci_ids[] = {
+static const struct pci_device_id fcpci_ids[] = {
 	{ PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_A1, PCI_ANY_ID, PCI_ANY_ID,
 	  0, 0, (unsigned long) "Fritz!Card PCI"},
 	{ PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_A1_V2, PCI_ANY_ID, PCI_ANY_ID,
-- 
GitLab


From ed038e7e68ca8fc92b5cb877cd44d930ef98c52a Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:50 +0530
Subject: [PATCH 1688/1958] isdn: mISDN: hfcpci: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  21656	   1024	     96	  22776	   58f8	isdn/hardware/mISDN/hfcpci.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  22424	    256	     96	  22776	   58f8	isdn/hardware/mISDN/hfcpci.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfcpci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index 5dc246d71c167..d2e401a8090e3 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -2161,7 +2161,7 @@ static const struct _hfc_map hfc_map[] =
 	{},
 };
 
-static struct pci_device_id hfc_ids[] =
+static const struct pci_device_id hfc_ids[] =
 {
 	{ PCI_VDEVICE(CCD, PCI_DEVICE_ID_CCD_2BD0),
 	  (unsigned long) &hfc_map[0] },
-- 
GitLab


From 65f96417363148989bc5b7735b2fc4e824c9d2b9 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Sat, 15 Jul 2017 09:55:51 +0530
Subject: [PATCH 1689/1958] isdn: avm: c4: constify pci_device_id.

pci_device_id are not supposed to change at runtime. All functions
working with pci_device_id provided by <linux/pci.h> work with
const pci_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
  11803	    544	      1	  12348	   303c	isdn/hardware/avm/c4.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
  11931	    416	      1	  12348	   303c	isdn/hardware/avm/c4.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/avm/c4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 40c7e2cf423bf..034cabac699dc 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -42,7 +42,7 @@ static char *revision = "$Revision: 1.1.2.2 $";
 
 static bool suppress_pollack;
 
-static struct pci_device_id c4_pci_tbl[] = {
+static const struct pci_device_id c4_pci_tbl[] = {
 	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_C4, 0, 0, (unsigned long)4 },
 	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_C2, 0, 0, (unsigned long)2 },
 	{ }			/* Terminating entry */
-- 
GitLab


From ea6c3077678f969577e0f75aecda9478c3840912 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 15 Jul 2017 10:24:47 -0700
Subject: [PATCH 1690/1958] dt-bindings: net: Remove duplicate NSP Ethernet MAC
 binding document

Commit 07d4510f5251 ("dt-bindings: net: bgmac: add bindings documentation for
bgmac") added both brcm,amac-nsp.txt and brcm,bgmac-nsp.txt. The former is
actually the one that got updated and is in use by the bgmac driver while the
latter is duplicating the former and is not used nor updated.

Fixes: 07d4510f5251 ("dt-bindings: net: bgmac: add bindings documentation for bgmac")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/brcm,bgmac-nsp.txt           | 24 -------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt

diff --git a/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt b/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt
deleted file mode 100644
index 022946caa7e23..0000000000000
--- a/Documentation/devicetree/bindings/net/brcm,bgmac-nsp.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Broadcom GMAC Ethernet Controller Device Tree Bindings
--------------------------------------------------------------
-
-Required properties:
- - compatible:	"brcm,bgmac-nsp"
- - reg:		Address and length of the GMAC registers,
-		Address and length of the GMAC IDM registers
- - reg-names:	Names of the registers.  Must have both "gmac_base" and
-		"idm_base"
- - interrupts:	Interrupt number
-
-Optional properties:
-- mac-address:	See ethernet.txt file in the same directory
-
-Examples:
-
-gmac0: ethernet@18022000 {
-	compatible = "brcm,bgmac-nsp";
-	reg = <0x18022000 0x1000>,
-	      <0x18110000 0x1000>;
-	reg-names = "gmac_base", "idm_base";
-	interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
-	status = "disabled";
-};
-- 
GitLab


From 876dbadd53a7102e2a84afc84ea2bd3ee6dc5636 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Fri, 14 Jul 2017 16:12:09 -0700
Subject: [PATCH 1691/1958] net: bcmgenet: Fix unmapping of fragments in
 bcmgenet_xmit()

In case we fail to map a single fragment, we would be leaving the
transmit ring populated with stale entries.

This commit introduces the helper function bcmgenet_put_txcb()
which takes care of rewinding the per-ring write pointer back to
where we left.

It also consolidates the functionality of bcmgenet_xmit_single()
and bcmgenet_xmit_frag() into the bcmgenet_xmit() function to
make the unmapping of control blocks cleaner.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/genet/bcmgenet.c    | 191 ++++++++----------
 1 file changed, 85 insertions(+), 106 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index daca1c9d254be..20021525f7954 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1202,6 +1202,23 @@ static struct enet_cb *bcmgenet_get_txcb(struct bcmgenet_priv *priv,
 	return tx_cb_ptr;
 }
 
+static struct enet_cb *bcmgenet_put_txcb(struct bcmgenet_priv *priv,
+					 struct bcmgenet_tx_ring *ring)
+{
+	struct enet_cb *tx_cb_ptr;
+
+	tx_cb_ptr = ring->cbs;
+	tx_cb_ptr += ring->write_ptr - ring->cb_ptr;
+
+	/* Rewinding local write pointer */
+	if (ring->write_ptr == ring->cb_ptr)
+		ring->write_ptr = ring->end_ptr;
+	else
+		ring->write_ptr--;
+
+	return tx_cb_ptr;
+}
+
 /* Simple helper to free a control block's resources */
 static void bcmgenet_free_cb(struct enet_cb *cb)
 {
@@ -1380,95 +1397,6 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev)
 	bcmgenet_tx_reclaim(dev, &priv->tx_rings[DESC_INDEX]);
 }
 
-/* Transmits a single SKB (either head of a fragment or a single SKB)
- * caller must hold priv->lock
- */
-static int bcmgenet_xmit_single(struct net_device *dev,
-				struct sk_buff *skb,
-				u16 dma_desc_flags,
-				struct bcmgenet_tx_ring *ring)
-{
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct device *kdev = &priv->pdev->dev;
-	struct enet_cb *tx_cb_ptr;
-	unsigned int skb_len;
-	dma_addr_t mapping;
-	u32 length_status;
-	int ret;
-
-	tx_cb_ptr = bcmgenet_get_txcb(priv, ring);
-
-	if (unlikely(!tx_cb_ptr))
-		BUG();
-
-	tx_cb_ptr->skb = skb;
-
-	skb_len = skb_headlen(skb);
-
-	mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE);
-	ret = dma_mapping_error(kdev, mapping);
-	if (ret) {
-		priv->mib.tx_dma_failed++;
-		netif_err(priv, tx_err, dev, "Tx DMA map failed\n");
-		dev_kfree_skb(skb);
-		return ret;
-	}
-
-	dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping);
-	dma_unmap_len_set(tx_cb_ptr, dma_len, skb_len);
-	length_status = (skb_len << DMA_BUFLENGTH_SHIFT) | dma_desc_flags |
-			(priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT) |
-			DMA_TX_APPEND_CRC;
-
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		length_status |= DMA_TX_DO_CSUM;
-
-	dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping, length_status);
-
-	return 0;
-}
-
-/* Transmit a SKB fragment */
-static int bcmgenet_xmit_frag(struct net_device *dev,
-			      skb_frag_t *frag,
-			      u16 dma_desc_flags,
-			      struct bcmgenet_tx_ring *ring)
-{
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct device *kdev = &priv->pdev->dev;
-	struct enet_cb *tx_cb_ptr;
-	unsigned int frag_size;
-	dma_addr_t mapping;
-	int ret;
-
-	tx_cb_ptr = bcmgenet_get_txcb(priv, ring);
-
-	if (unlikely(!tx_cb_ptr))
-		BUG();
-
-	tx_cb_ptr->skb = NULL;
-
-	frag_size = skb_frag_size(frag);
-
-	mapping = skb_frag_dma_map(kdev, frag, 0, frag_size, DMA_TO_DEVICE);
-	ret = dma_mapping_error(kdev, mapping);
-	if (ret) {
-		priv->mib.tx_dma_failed++;
-		netif_err(priv, tx_err, dev, "%s: Tx DMA map failed\n",
-			  __func__);
-		return ret;
-	}
-
-	dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping);
-	dma_unmap_len_set(tx_cb_ptr, dma_len, frag_size);
-
-	dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping,
-		    (frag_size << DMA_BUFLENGTH_SHIFT) | dma_desc_flags |
-		    (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT));
-
-	return 0;
-}
-
 /* Reallocate the SKB to put enough headroom in front of it and insert
  * the transmit checksum offsets in the descriptors
  */
@@ -1535,11 +1463,16 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
 static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct device *kdev = &priv->pdev->dev;
 	struct bcmgenet_tx_ring *ring = NULL;
+	struct enet_cb *tx_cb_ptr;
 	struct netdev_queue *txq;
 	unsigned long flags = 0;
 	int nr_frags, index;
-	u16 dma_desc_flags;
+	dma_addr_t mapping;
+	unsigned int size;
+	skb_frag_t *frag;
+	u32 len_stat;
 	int ret;
 	int i;
 
@@ -1592,27 +1525,49 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	dma_desc_flags = DMA_SOP;
-	if (nr_frags == 0)
-		dma_desc_flags |= DMA_EOP;
+	for (i = 0; i <= nr_frags; i++) {
+		tx_cb_ptr = bcmgenet_get_txcb(priv, ring);
 
-	/* Transmit single SKB or head of fragment list */
-	ret = bcmgenet_xmit_single(dev, skb, dma_desc_flags, ring);
-	if (ret) {
-		ret = NETDEV_TX_OK;
-		goto out;
-	}
+		if (unlikely(!tx_cb_ptr))
+			BUG();
 
-	/* xmit fragment */
-	for (i = 0; i < nr_frags; i++) {
-		ret = bcmgenet_xmit_frag(dev,
-					 &skb_shinfo(skb)->frags[i],
-					 (i == nr_frags - 1) ? DMA_EOP : 0,
-					 ring);
+		if (!i) {
+			/* Transmit single SKB or head of fragment list */
+			tx_cb_ptr->skb = skb;
+			size = skb_headlen(skb);
+			mapping = dma_map_single(kdev, skb->data, size,
+						 DMA_TO_DEVICE);
+		} else {
+			/* xmit fragment */
+			tx_cb_ptr->skb = NULL;
+			frag = &skb_shinfo(skb)->frags[i - 1];
+			size = skb_frag_size(frag);
+			mapping = skb_frag_dma_map(kdev, frag, 0, size,
+						   DMA_TO_DEVICE);
+		}
+
+		ret = dma_mapping_error(kdev, mapping);
 		if (ret) {
+			priv->mib.tx_dma_failed++;
+			netif_err(priv, tx_err, dev, "Tx DMA map failed\n");
 			ret = NETDEV_TX_OK;
-			goto out;
+			goto out_unmap_frags;
 		}
+		dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping);
+		dma_unmap_len_set(tx_cb_ptr, dma_len, size);
+
+		len_stat = (size << DMA_BUFLENGTH_SHIFT) |
+			   (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT);
+
+		if (!i) {
+			len_stat |= DMA_TX_APPEND_CRC | DMA_SOP;
+			if (skb->ip_summed == CHECKSUM_PARTIAL)
+				len_stat |= DMA_TX_DO_CSUM;
+		}
+		if (i == nr_frags)
+			len_stat |= DMA_EOP;
+
+		dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping, len_stat);
 	}
 
 	skb_tx_timestamp(skb);
@@ -1635,6 +1590,30 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 	spin_unlock_irqrestore(&ring->lock, flags);
 
 	return ret;
+
+out_unmap_frags:
+	/* Back up for failed control block mapping */
+	bcmgenet_put_txcb(priv, ring);
+
+	/* Unmap successfully mapped control blocks */
+	while (i-- > 0) {
+		tx_cb_ptr = bcmgenet_put_txcb(priv, ring);
+		if (tx_cb_ptr->skb)
+			dma_unmap_single(kdev,
+					 dma_unmap_addr(tx_cb_ptr, dma_addr),
+					 dma_unmap_len(tx_cb_ptr, dma_len),
+					 DMA_TO_DEVICE);
+		else
+			dma_unmap_page(kdev,
+				       dma_unmap_addr(tx_cb_ptr, dma_addr),
+				       dma_unmap_len(tx_cb_ptr, dma_len),
+				       DMA_TO_DEVICE);
+		dma_unmap_addr_set(tx_cb_ptr, dma_addr, 0);
+		tx_cb_ptr->skb = NULL;
+	}
+
+	dev_kfree_skb(skb);
+	goto out;
 }
 
 static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv,
-- 
GitLab


From f48bed16a756f5bc0244acd581f61968f7d7c2a4 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Fri, 14 Jul 2017 16:12:10 -0700
Subject: [PATCH 1692/1958] net: bcmgenet: Free skb after last Tx frag

Since the skb is attached to the first control block of a fragmented
skb it is possible that the skb could be freed when reclaiming that
control block before all fragments of the skb have been consumed by
the hardware and unmapped.

This commit introduces first_cb and last_cb pointers to the skb
control block used by the driver to keep track of which transmit
control blocks within a transmit ring are the first and last ones
associated with the skb.

It then splits the bcmgenet_free_cb() function into transmit
(bcmgenet_free_tx_cb) and receive (bcmgenet_free_rx_cb) versions
that can handle the unmapping of dma mapped memory and cleaning up
the corresponding control block structure so that the skb is only
freed after the last associated transmit control block is reclaimed.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/genet/bcmgenet.c    | 142 ++++++++++--------
 .../net/ethernet/broadcom/genet/bcmgenet.h    |   2 +
 2 files changed, 84 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 20021525f7954..7b0b399aaedd4 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1219,14 +1219,6 @@ static struct enet_cb *bcmgenet_put_txcb(struct bcmgenet_priv *priv,
 	return tx_cb_ptr;
 }
 
-/* Simple helper to free a control block's resources */
-static void bcmgenet_free_cb(struct enet_cb *cb)
-{
-	dev_kfree_skb_any(cb->skb);
-	cb->skb = NULL;
-	dma_unmap_addr_set(cb, dma_addr, 0);
-}
-
 static inline void bcmgenet_rx_ring16_int_disable(struct bcmgenet_rx_ring *ring)
 {
 	bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_RXDMA_DONE,
@@ -1277,18 +1269,72 @@ static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_tx_ring *ring)
 				 INTRL2_CPU_MASK_SET);
 }
 
+/* Simple helper to free a transmit control block's resources
+ * Returns an skb when the last transmit control block associated with the
+ * skb is freed.  The skb should be freed by the caller if necessary.
+ */
+static struct sk_buff *bcmgenet_free_tx_cb(struct device *dev,
+					   struct enet_cb *cb)
+{
+	struct sk_buff *skb;
+
+	skb = cb->skb;
+
+	if (skb) {
+		cb->skb = NULL;
+		if (cb == GENET_CB(skb)->first_cb)
+			dma_unmap_single(dev, dma_unmap_addr(cb, dma_addr),
+					 dma_unmap_len(cb, dma_len),
+					 DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev, dma_unmap_addr(cb, dma_addr),
+				       dma_unmap_len(cb, dma_len),
+				       DMA_TO_DEVICE);
+		dma_unmap_addr_set(cb, dma_addr, 0);
+
+		if (cb == GENET_CB(skb)->last_cb)
+			return skb;
+
+	} else if (dma_unmap_addr(cb, dma_addr)) {
+		dma_unmap_page(dev,
+			       dma_unmap_addr(cb, dma_addr),
+			       dma_unmap_len(cb, dma_len),
+			       DMA_TO_DEVICE);
+		dma_unmap_addr_set(cb, dma_addr, 0);
+	}
+
+	return 0;
+}
+
+/* Simple helper to free a receive control block's resources */
+static struct sk_buff *bcmgenet_free_rx_cb(struct device *dev,
+					   struct enet_cb *cb)
+{
+	struct sk_buff *skb;
+
+	skb = cb->skb;
+	cb->skb = NULL;
+
+	if (dma_unmap_addr(cb, dma_addr)) {
+		dma_unmap_single(dev, dma_unmap_addr(cb, dma_addr),
+				 dma_unmap_len(cb, dma_len), DMA_FROM_DEVICE);
+		dma_unmap_addr_set(cb, dma_addr, 0);
+	}
+
+	return skb;
+}
+
 /* Unlocked version of the reclaim routine */
 static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 					  struct bcmgenet_tx_ring *ring)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct device *kdev = &priv->pdev->dev;
-	struct enet_cb *tx_cb_ptr;
-	unsigned int pkts_compl = 0;
+	unsigned int txbds_processed = 0;
 	unsigned int bytes_compl = 0;
-	unsigned int c_index;
+	unsigned int pkts_compl = 0;
 	unsigned int txbds_ready;
-	unsigned int txbds_processed = 0;
+	unsigned int c_index;
+	struct sk_buff *skb;
 
 	/* Clear status before servicing to reduce spurious interrupts */
 	if (ring->index == DESC_INDEX)
@@ -1309,21 +1355,12 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 
 	/* Reclaim transmitted buffers */
 	while (txbds_processed < txbds_ready) {
-		tx_cb_ptr = &priv->tx_cbs[ring->clean_ptr];
-		if (tx_cb_ptr->skb) {
+		skb = bcmgenet_free_tx_cb(&priv->pdev->dev,
+					  &priv->tx_cbs[ring->clean_ptr]);
+		if (skb) {
 			pkts_compl++;
-			bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent;
-			dma_unmap_single(kdev,
-					 dma_unmap_addr(tx_cb_ptr, dma_addr),
-					 dma_unmap_len(tx_cb_ptr, dma_len),
-					 DMA_TO_DEVICE);
-			bcmgenet_free_cb(tx_cb_ptr);
-		} else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) {
-			dma_unmap_page(kdev,
-				       dma_unmap_addr(tx_cb_ptr, dma_addr),
-				       dma_unmap_len(tx_cb_ptr, dma_len),
-				       DMA_TO_DEVICE);
-			dma_unmap_addr_set(tx_cb_ptr, dma_addr, 0);
+			bytes_compl += GENET_CB(skb)->bytes_sent;
+			dev_kfree_skb_any(skb);
 		}
 
 		txbds_processed++;
@@ -1533,13 +1570,12 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		if (!i) {
 			/* Transmit single SKB or head of fragment list */
-			tx_cb_ptr->skb = skb;
+			GENET_CB(skb)->first_cb = tx_cb_ptr;
 			size = skb_headlen(skb);
 			mapping = dma_map_single(kdev, skb->data, size,
 						 DMA_TO_DEVICE);
 		} else {
 			/* xmit fragment */
-			tx_cb_ptr->skb = NULL;
 			frag = &skb_shinfo(skb)->frags[i - 1];
 			size = skb_frag_size(frag);
 			mapping = skb_frag_dma_map(kdev, frag, 0, size,
@@ -1556,6 +1592,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 		dma_unmap_addr_set(tx_cb_ptr, dma_addr, mapping);
 		dma_unmap_len_set(tx_cb_ptr, dma_len, size);
 
+		tx_cb_ptr->skb = skb;
+
 		len_stat = (size << DMA_BUFLENGTH_SHIFT) |
 			   (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT);
 
@@ -1570,6 +1608,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 		dmadesc_set(priv, tx_cb_ptr->bd_addr, mapping, len_stat);
 	}
 
+	GENET_CB(skb)->last_cb = tx_cb_ptr;
 	skb_tx_timestamp(skb);
 
 	/* Decrement total BD count and advance our write pointer */
@@ -1598,18 +1637,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Unmap successfully mapped control blocks */
 	while (i-- > 0) {
 		tx_cb_ptr = bcmgenet_put_txcb(priv, ring);
-		if (tx_cb_ptr->skb)
-			dma_unmap_single(kdev,
-					 dma_unmap_addr(tx_cb_ptr, dma_addr),
-					 dma_unmap_len(tx_cb_ptr, dma_len),
-					 DMA_TO_DEVICE);
-		else
-			dma_unmap_page(kdev,
-				       dma_unmap_addr(tx_cb_ptr, dma_addr),
-				       dma_unmap_len(tx_cb_ptr, dma_len),
-				       DMA_TO_DEVICE);
-		dma_unmap_addr_set(tx_cb_ptr, dma_addr, 0);
-		tx_cb_ptr->skb = NULL;
+		bcmgenet_free_tx_cb(kdev, tx_cb_ptr);
 	}
 
 	dev_kfree_skb(skb);
@@ -1645,14 +1673,12 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv,
 	}
 
 	/* Grab the current Rx skb from the ring and DMA-unmap it */
-	rx_skb = cb->skb;
-	if (likely(rx_skb))
-		dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr),
-				 priv->rx_buf_len, DMA_FROM_DEVICE);
+	rx_skb = bcmgenet_free_rx_cb(kdev, cb);
 
 	/* Put the new Rx skb on the ring */
 	cb->skb = skb;
 	dma_unmap_addr_set(cb, dma_addr, mapping);
+	dma_unmap_len_set(cb, dma_len, priv->rx_buf_len);
 	dmadesc_set_addr(priv, cb->bd_addr, mapping);
 
 	/* Return the current Rx skb to caller */
@@ -1859,22 +1885,16 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
 
 static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv)
 {
-	struct device *kdev = &priv->pdev->dev;
+	struct sk_buff *skb;
 	struct enet_cb *cb;
 	int i;
 
 	for (i = 0; i < priv->num_rx_bds; i++) {
 		cb = &priv->rx_cbs[i];
 
-		if (dma_unmap_addr(cb, dma_addr)) {
-			dma_unmap_single(kdev,
-					 dma_unmap_addr(cb, dma_addr),
-					 priv->rx_buf_len, DMA_FROM_DEVICE);
-			dma_unmap_addr_set(cb, dma_addr, 0);
-		}
-
-		if (cb->skb)
-			bcmgenet_free_cb(cb);
+		skb = bcmgenet_free_rx_cb(&priv->pdev->dev, cb);
+		if (skb)
+			dev_kfree_skb_any(skb);
 	}
 }
 
@@ -2458,8 +2478,10 @@ static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv)
 
 static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
 {
-	int i;
 	struct netdev_queue *txq;
+	struct sk_buff *skb;
+	struct enet_cb *cb;
+	int i;
 
 	bcmgenet_fini_rx_napi(priv);
 	bcmgenet_fini_tx_napi(priv);
@@ -2468,10 +2490,10 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
 	bcmgenet_dma_teardown(priv);
 
 	for (i = 0; i < priv->num_tx_bds; i++) {
-		if (priv->tx_cbs[i].skb != NULL) {
-			dev_kfree_skb(priv->tx_cbs[i].skb);
-			priv->tx_cbs[i].skb = NULL;
-		}
+		cb = priv->tx_cbs + i;
+		skb = bcmgenet_free_tx_cb(&priv->pdev->dev, cb);
+		if (skb)
+			dev_kfree_skb(skb);
 	}
 
 	for (i = 0; i < priv->hw_params->tx_queues; i++) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index efd07020b89fc..b9344de669f84 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -544,6 +544,8 @@ struct bcmgenet_hw_params {
 };
 
 struct bcmgenet_skb_cb {
+	struct enet_cb *first_cb;	/* First control block of SKB */
+	struct enet_cb *last_cb;	/* Last control block of SKB */
 	unsigned int bytes_sent;	/* bytes on the wire (no TSB) */
 };
 
-- 
GitLab


From 15d5193104a457d5151840247e3bce561c42e3e9 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Fri, 30 Jun 2017 12:02:18 +0100
Subject: [PATCH 1693/1958] staging: comedi: ni_mio_common: fix AO timer
 off-by-one regression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As reported by Éric Piel on the Comedi mailing list (see
<https://groups.google.com/forum/#!topic/comedi_list/ueZiR7vTLOU/discussion>),
the analog output asynchronous commands are running too fast with a
period 50 ns shorter than it should be.  This affects all boards with AO
command support that are supported by the "ni_pcimio", "ni_atmio", and
"ni_mio_cs" drivers.

This is a regression bug introduced by commit 080e6795cba3 ("staging:
comedi: ni_mio_common: Cleans up/clarifies ni_ao_cmd"), specifically,
this line in `ni_ao_cmd_set_update()`:

		/* following line: N-1 per STC */
		ni_stc_writel(dev, trigvar - 1, NISTC_AO_UI_LOADA_REG);

The `trigvar` variable value comes from a call to `ni_ns_to_timer()`
which converts a timer period in nanoseconds to a hardware divisor
value. The function already reduces the divisor by 1 as required by the
hardware, so the above line should not reduce it further by 1.  Fix it
by replacing `trigvar` by `trigvar - 1` in the above line, and remove
the misleading comment.

Reported-by: Éric Piel <piel@delmic.com>
Fixes: 080e6795cba3 ("staging: comedi: ni_mio_common: Cleans up/clarifies ni_ao_cmd")
Cc: Éric Piel <piel@delmic.com>
Cc: Spencer E. Olson <olsonse@umich.edu>
Cc: <stable@vger.kernel.org> # 4.7+
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/ni_mio_common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index b2e3828889813..2f7bfc1c59e51 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -3116,8 +3116,7 @@ static void ni_ao_cmd_set_update(struct comedi_device *dev,
 		/* following line: 2-1 per STC */
 		ni_stc_writel(dev, 1, NISTC_AO_UI_LOADA_REG);
 		ni_stc_writew(dev, NISTC_AO_CMD1_UI_LOAD, NISTC_AO_CMD1_REG);
-		/* following line: N-1 per STC */
-		ni_stc_writel(dev, trigvar - 1, NISTC_AO_UI_LOADA_REG);
+		ni_stc_writel(dev, trigvar, NISTC_AO_UI_LOADA_REG);
 	} else { /* TRIG_EXT */
 		/* FIXME:  assert scan_begin_arg != 0, ret failure otherwise */
 		devpriv->ao_cmd2  |= NISTC_AO_CMD2_BC_GATE_ENA;
-- 
GitLab


From 397fcd12e4890624228e25468a1ab5f6fc78a1e3 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 11 Jul 2017 22:55:04 +0300
Subject: [PATCH 1694/1958] staging: vchiq_arm: fix error codes in probe

If vchiq_debugfs_init() fails, then we accidentally return a valid
pointer casted to int on error.  This code is simpler if we get rid of
the "ptr_err" variable and just use "err" throughout.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../vc04_services/interface/vchiq_arm/vchiq_arm.c      | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index 030bec855d86a..314ffac50bb83 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -3391,7 +3391,6 @@ static int vchiq_probe(struct platform_device *pdev)
 	struct device_node *fw_node;
 	struct rpi_firmware *fw;
 	int err;
-	void *ptr_err;
 
 	fw_node = of_parse_phandle(pdev->dev.of_node, "firmware", 0);
 	if (!fw_node) {
@@ -3427,14 +3426,14 @@ static int vchiq_probe(struct platform_device *pdev)
 
 	/* create sysfs entries */
 	vchiq_class = class_create(THIS_MODULE, DEVICE_NAME);
-	ptr_err = vchiq_class;
-	if (IS_ERR(ptr_err))
+	err = PTR_ERR(vchiq_class);
+	if (IS_ERR(vchiq_class))
 		goto failed_class_create;
 
 	vchiq_dev = device_create(vchiq_class, NULL,
 		vchiq_devid, NULL, "vchiq");
-	ptr_err = vchiq_dev;
-	if (IS_ERR(ptr_err))
+	err = PTR_ERR(vchiq_dev);
+	if (IS_ERR(vchiq_dev))
 		goto failed_device_create;
 
 	/* create debugfs entries */
@@ -3455,7 +3454,6 @@ static int vchiq_probe(struct platform_device *pdev)
 	class_destroy(vchiq_class);
 failed_class_create:
 	cdev_del(&vchiq_cdev);
-	err = PTR_ERR(ptr_err);
 failed_cdev_add:
 	unregister_chrdev_region(vchiq_devid, 1);
 failed_platform_init:
-- 
GitLab


From 011ce71609e9cd0751b495e24ff8c83fa8d7c7db Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 Jul 2017 10:51:33 +0300
Subject: [PATCH 1695/1958] staging: rtl8188eu: memory leak in
 rtw_free_cmd_obj()

We were fixing checkpatch.pl warnings and accidentally reversed this
condition.

Fixes: 5b29aaaa1e3c ("staging: rtl8188eu: removes comparison to null")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8188eu/core/rtw_cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8188eu/core/rtw_cmd.c b/drivers/staging/rtl8188eu/core/rtw_cmd.c
index 002d09159896e..a69007ef77bf0 100644
--- a/drivers/staging/rtl8188eu/core/rtw_cmd.c
+++ b/drivers/staging/rtl8188eu/core/rtw_cmd.c
@@ -132,7 +132,7 @@ void rtw_free_cmd_obj(struct cmd_obj *pcmd)
 		kfree(pcmd->parmbuf);
 	}
 
-	if (!pcmd->rsp) {
+	if (pcmd->rsp) {
 		if (pcmd->rspsz != 0) {
 			/* free rsp in cmd_obj */
 			kfree(pcmd->rsp);
-- 
GitLab


From 677e6a1ab4edcf4b5c48ab68acf9d7f01555a8fb Mon Sep 17 00:00:00 2001
From: Lynn Lei <lynnl.wit@gmail.com>
Date: Mon, 3 Jul 2017 20:05:10 +0800
Subject: [PATCH 1696/1958] staging: sm750fb: fixed a assignment typo

fixed a typo issue in get_mxclk_freq() function.

the original code using PLL_CTRL_M_SHIFT for shifting to set N flag.
which is not right, it should be PLL_CTRL_N_SHIFT.

both PLL_CTRL_M_SHIFT and PLL_CTRL_N_SHIFT
  defined in drivers/staging/sm750fb/ddk750_reg.h

Signed-off-by: Lynn Lei <lynnl.wit@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/sm750fb/ddk750_chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/sm750fb/ddk750_chip.c b/drivers/staging/sm750fb/ddk750_chip.c
index 944dd25924beb..4754f7a206846 100644
--- a/drivers/staging/sm750fb/ddk750_chip.c
+++ b/drivers/staging/sm750fb/ddk750_chip.c
@@ -40,7 +40,7 @@ static unsigned int get_mxclk_freq(void)
 
 	pll_reg = peek32(MXCLK_PLL_CTRL);
 	M = (pll_reg & PLL_CTRL_M_MASK) >> PLL_CTRL_M_SHIFT;
-	N = (pll_reg & PLL_CTRL_N_MASK) >> PLL_CTRL_M_SHIFT;
+	N = (pll_reg & PLL_CTRL_N_MASK) >> PLL_CTRL_N_SHIFT;
 	OD = (pll_reg & PLL_CTRL_OD_MASK) >> PLL_CTRL_OD_SHIFT;
 	POD = (pll_reg & PLL_CTRL_POD_MASK) >> PLL_CTRL_POD_SHIFT;
 
-- 
GitLab


From 5f8a16156aa1b2d0223eaee9dacdfb9bc096f610 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 11 Jul 2017 07:44:05 -0400
Subject: [PATCH 1697/1958] x86/cpu: Use indirect call to measure performance
 in init_amd_k6()

This old piece of code is supposed to measure the performance of indirect
calls to determine if the processor is buggy or not, however the compiler
optimizer turns it into a direct call.

Use the OPTIMIZER_HIDE_VAR() macro to thwart the optimization, so that a real
indirect call is generated.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1707110737530.8746@file01.intranet.prod.int.rdu2.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8f5fd46..3b9e220621f83 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -134,6 +134,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 
 		n = K6_BUG_LOOP;
 		f_vide = vide;
+		OPTIMIZER_HIDE_VAR(f_vide);
 		d = rdtsc();
 		while (n--)
 			f_vide();
-- 
GitLab


From dfc1ed1caef09e6be147f9e8d72631e788ffefd9 Mon Sep 17 00:00:00 2001
From: Justin Ernst <justin.ernst@hpe.com>
Date: Thu, 13 Jul 2017 13:33:23 -0500
Subject: [PATCH 1698/1958] x86/platform/uv/BAU: Fix congested_response_us not
 taking effect

Bug fix for the BAU tunable congested_cycles not being set to the user
defined value. Instead of referencing a global variable when deciding
on BAU shutdown, a node will reference its own tunable set
value ( cong_response_us). This results in the user set
tunable value congested_response_us taking effect correctly.

Signed-off-by: Justin Ernst <justin.ernst@hpe.com>
Acked-by: Andrew Banman <abanman@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: mike.travis@hpe.com
Cc: sivanich@hpe.com
Link: http://lkml.kernel.org/r/1499970803-282432-1-git-send-email-justin.ernst@hpe.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/uv/tlb_uv.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index d4a61ddf9e622..fd8759111b65b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -40,7 +40,6 @@ static int timeout_base_ns[] = {
 static int timeout_us;
 static bool nobau = true;
 static int nobau_perm;
-static cycles_t congested_cycles;
 
 /* tunables: */
 static int max_concurr		= MAX_BAU_CONCURRENT;
@@ -829,10 +828,10 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
 		if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
 			bcp->period_requests++;
 			bcp->period_time += elapsed;
-			if ((elapsed > congested_cycles) &&
+			if ((elapsed > usec_2_cycles(bcp->cong_response_us)) &&
 			    (bcp->period_requests > bcp->cong_reps) &&
 			    ((bcp->period_time / bcp->period_requests) >
-							congested_cycles)) {
+					usec_2_cycles(bcp->cong_response_us))) {
 				stat->s_congested++;
 				disable_for_period(bcp, stat);
 			}
@@ -2228,7 +2227,6 @@ static int __init uv_bau_init(void)
 	}
 
 	nuvhubs = uv_num_possible_blades();
-	congested_cycles = usec_2_cycles(congested_respns_us);
 
 	uv_base_pnode = 0x7fffffff;
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
-- 
GitLab


From a86054236d356b973b360c43c4d43fef6555db13 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 16 Jul 2017 13:57:27 +0200
Subject: [PATCH 1699/1958] binfmt_flat: Use %u to format u32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Several variables had their types changed from unsigned long to u32, but
the printk()-style format to print them wasn't updated, leading to:

    fs/binfmt_flat.c: In function ‘load_flat_file’:
    fs/binfmt_flat.c:577: warning: format ‘%ld’ expects type ‘long int’, but argument 3 has type ‘u32’

Fixes: 468138d78510688f ("binfmt_flat: flat_{get,put}_addr_from_rp() should be able to fail")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_flat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 69ec23daa25e0..a1e6860b6f46a 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -574,7 +574,7 @@ static int load_flat_file(struct linux_binprm *bprm,
 				MAX_SHARED_LIBS * sizeof(unsigned long),
 				FLAT_DATA_ALIGN);
 
-		pr_debug("Allocated data+bss+stack (%ld bytes): %lx\n",
+		pr_debug("Allocated data+bss+stack (%u bytes): %lx\n",
 			 data_len + bss_len + stack_len, datapos);
 
 		fpos = ntohl(hdr->data_start);
-- 
GitLab


From 9f42ef4bc26b83bd33feb9a9fa55e25e5752a495 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 16 Jul 2017 13:57:28 +0200
Subject: [PATCH 1700/1958] blackfin, m68k: Fix flat_set_persistent() for
 unsigned long to u32 changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Several variables had their types changed from unsigned long to u32, but
the arch-specific implementations of flat_set_persistent() weren't
updated, leading to compiler warnings on blackfin and m68k:

    fs/binfmt_flat.c: In function ‘load_flat_file’:
    fs/binfmt_flat.c:799: warning: passing argument 2 of ‘flat_set_persistent’ from incompatible pointer type

Fixes: 468138d78510688f ("binfmt_flat: flat_{get,put}_addr_from_rp() should be able to fail")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/blackfin/include/asm/flat.h | 3 +--
 arch/m68k/include/asm/flat.h     | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/blackfin/include/asm/flat.h b/arch/blackfin/include/asm/flat.h
index 296d7f56fbfd0..f1d6ba7afbf24 100644
--- a/arch/blackfin/include/asm/flat.h
+++ b/arch/blackfin/include/asm/flat.h
@@ -44,8 +44,7 @@ flat_get_relocate_addr (unsigned long relval)
 	return relval & 0x03ffffff; /* Mask out top 6 bits */
 }
 
-static inline int flat_set_persistent(unsigned long relval,
-				      unsigned long *persistent)
+static inline int flat_set_persistent(u32 relval, u32 *persistent)
 {
 	int type = (relval >> 26) & 7;
 	if (type == 3) {
diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h
index 48b62790fe70f..b2a41f5b3890a 100644
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -30,8 +30,7 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel)
 }
 #define	flat_get_relocate_addr(rel)		(rel)
 
-static inline int flat_set_persistent(unsigned long relval,
-				      unsigned long *persistent)
+static inline int flat_set_persistent(u32 relval, u32 *persistent)
 {
 	return 0;
 }
-- 
GitLab


From 87b2c3fc63175bb32c96d4ec58152d4fdd5a4ae1 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 16 Jul 2017 13:57:29 +0200
Subject: [PATCH 1701/1958] h8300: Add missing closing parenthesis in
 flat_get_addr_from_rp()

    In file included from include/linux/flat.h:13:0,
                     from fs/binfmt_flat.c:36:
    arch/h8300/include/asm/flat.h: In function 'flat_get_addr_from_rp':
    arch/h8300/include/asm/flat.h:28:3: error: expected ')' before 'val'
       val &= 0x00ffffff;
       ^
    arch/h8300/include/asm/flat.h:31:1: error: expected expression before '}' token
     }
     ^
    In file included from include/linux/flat.h:13:0,
                     from fs/binfmt_flat.c:36:
    arch/h8300/include/asm/flat.h:26:6: warning: unused variable 'val' [-Wunused-variable]
      u32 val = get_unaligned((__force u32 *)rp);
          ^
    In file included from include/linux/flat.h:13:0,
                     from fs/binfmt_flat.c:36:
    arch/h8300/include/asm/flat.h:31:1: warning: no return statement in function returning non-void [-Wreturn-type]
     }
     ^

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Fixes: 468138d78510688f ("binfmt_flat: flat_{get,put}_addr_from_rp() should be able to fail")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/h8300/include/asm/flat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h
index 18d024251738d..7e0bd6fa15324 100644
--- a/arch/h8300/include/asm/flat.h
+++ b/arch/h8300/include/asm/flat.h
@@ -24,7 +24,7 @@ static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
 					u32 *addr, u32 *persistent)
 {
 	u32 val = get_unaligned((__force u32 *)rp);
-	if (!(flags & FLAT_FLAG_GOTPIC)
+	if (!(flags & FLAT_FLAG_GOTPIC))
 		val &= 0x00ffffff;
 	*addr = val;
 	return 0;
-- 
GitLab


From 541768b08a400d9d292cfd9c898401b8178856ac Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Thu, 29 Jun 2017 10:55:14 +0530
Subject: [PATCH 1702/1958] usb: dwc3: core: Call dwc3_core_get_phy() before
 initializing phys

commit f54edb539c116 ("usb: dwc3: core: initialize ULPI before trying to
get the PHY") moved call to dwc3_core_get_phy() from dwc3_probe() to
dwc3_core_init() after dwc3_core_soft_reset(). But
dwc3_core_soft_reset() calls phy_init(), therefore dwc3_core_get_phy()
needs to be called before dwc3_core_soft_reset().

Fix this by moving call to dwc3_core_get_phy() before
dwc3_core_soft_reset().

This fixes the following abort seen on DRA7xx platforms
[   24.769118] usb usb2: SerialNumber: xhci-hcd.1.auto
[   24.781144] hub 2-0:1.0: USB hub found
[   24.787836] hub 2-0:1.0: 1 port detected
[   24.809939] Unhandled fault: imprecise external abort (0x1406) at 0x00000000

Reported-by: Carlos Hernandez <ceh@ti.com>
Signed-off-by: Vignesh R <vigneshr@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 326b302fc440d..03474d3575abe 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -766,15 +766,15 @@ static int dwc3_core_init(struct dwc3 *dwc)
 			dwc->maximum_speed = USB_SPEED_HIGH;
 	}
 
-	ret = dwc3_core_soft_reset(dwc);
+	ret = dwc3_core_get_phy(dwc);
 	if (ret)
 		goto err0;
 
-	ret = dwc3_phy_setup(dwc);
+	ret = dwc3_core_soft_reset(dwc);
 	if (ret)
 		goto err0;
 
-	ret = dwc3_core_get_phy(dwc);
+	ret = dwc3_phy_setup(dwc);
 	if (ret)
 		goto err0;
 
-- 
GitLab


From ee249b4554947de3be77be4e9e6077b20c0fe055 Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Fri, 7 Jul 2017 11:52:52 +0530
Subject: [PATCH 1703/1958] usb: dwc3: omap: remove IRQ_NOAUTOEN used with
 shared irq

IRQ_NOAUTOEN cannot be used with shared IRQs, since commit 04c848d39879
("genirq: Warn when IRQ_NOAUTOEN is used with shared interrupts") and
kernel now throws a warn dump. But OMAP DWC3 driver uses this flag. As
per commit 12a7f17fac5b ("usb: dwc3: omap: fix race of pm runtime with
irq handler in probe") that introduced this flag, PM runtime can race
with IRQ handler when deferred probing happens due to extcon,
therefore IRQ_NOAUTOEN needs to be set so that irq is not enabled until
extcon is registered.

Remove setting of IRQ_NOAUTOEN and move the registration of
shared irq to a point after dwc3_omap_extcon_register() and
of_platform_populate(). This avoids possibility of probe deferring and
above said race condition.

Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vignesh R <vigneshr@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-omap.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 98926504b55b5..f5aaa0cf38734 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -512,15 +512,6 @@ static int dwc3_omap_probe(struct platform_device *pdev)
 
 	/* check the DMA Status */
 	reg = dwc3_omap_readl(omap->base, USBOTGSS_SYSCONFIG);
-	irq_set_status_flags(omap->irq, IRQ_NOAUTOEN);
-	ret = devm_request_threaded_irq(dev, omap->irq, dwc3_omap_interrupt,
-					dwc3_omap_interrupt_thread, IRQF_SHARED,
-					"dwc3-omap", omap);
-	if (ret) {
-		dev_err(dev, "failed to request IRQ #%d --> %d\n",
-				omap->irq, ret);
-		goto err1;
-	}
 
 	ret = dwc3_omap_extcon_register(omap);
 	if (ret < 0)
@@ -532,8 +523,15 @@ static int dwc3_omap_probe(struct platform_device *pdev)
 		goto err1;
 	}
 
+	ret = devm_request_threaded_irq(dev, omap->irq, dwc3_omap_interrupt,
+					dwc3_omap_interrupt_thread, IRQF_SHARED,
+					"dwc3-omap", omap);
+	if (ret) {
+		dev_err(dev, "failed to request IRQ #%d --> %d\n",
+			omap->irq, ret);
+		goto err1;
+	}
 	dwc3_omap_enable_irqs(omap);
-	enable_irq(omap->irq);
 	return 0;
 
 err1:
-- 
GitLab


From bee91869436dfb0e444139a52228cef0ce758cd8 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 7 Jul 2017 12:22:00 +0800
Subject: [PATCH 1704/1958] usb: gadget: f_mass_storage: Fix the logic to
 iterate all common->luns

It is wrong to do --i in the for loop.

Fixes: dd02ea5a3305 ("usb: gadget: mass_storage: Use static array for luns")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_mass_storage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index e80b9c123a9dd..f95bddd6513f7 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2490,7 +2490,7 @@ static int fsg_main_thread(void *common_)
 		int i;
 
 		down_write(&common->filesem);
-		for (i = 0; i < ARRAY_SIZE(common->luns); --i) {
+		for (i = 0; i < ARRAY_SIZE(common->luns); i++) {
 			struct fsg_lun *curlun = common->luns[i];
 			if (!curlun || !fsg_lun_is_open(curlun))
 				continue;
-- 
GitLab


From f84a31eb982950e3b999022b742424e080ff72e8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 6 Jul 2017 11:48:26 +0200
Subject: [PATCH 1705/1958] usb: gadget: udc: USB_RENESAS_USB3 should depend on
 HAS_DMA

If NO_DMA=y:

    ERROR: "usb_gadget_map_request" [drivers/usb/gadget/udc/renesas_usb3.ko] undefined!
    ERROR: "usb_gadget_unmap_request" [drivers/usb/gadget/udc/renesas_usb3.ko] undefined!
    ERROR: "bad_dma_ops" [drivers/usb/gadget/udc/renesas_usb3.ko] undefined!

Add a dependency on HAS_DMA to fix this.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 9ffb11ec9ed96..643226476a7c7 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -192,7 +192,7 @@ config USB_RENESAS_USBHS_UDC
 config USB_RENESAS_USB3
 	tristate 'Renesas USB3.0 Peripheral controller'
 	depends on ARCH_RENESAS || COMPILE_TEST
-	depends on EXTCON
+	depends on EXTCON && HAS_DMA
 	help
 	   Renesas USB3.0 Peripheral controller is a USB peripheral controller
 	   that supports super, high, and full speed USB 3.0 data transfers.
-- 
GitLab


From a9ef5c47d065e3c8a6eb0e5911e95809811e6c6b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 6 Jul 2017 11:48:27 +0200
Subject: [PATCH 1706/1958] usb: gadget: udc: USB_SNP_CORE should depend on
 HAS_DMA

If NO_DMA=y:

    ERROR: "usb_gadget_map_request" [drivers/usb/gadget/udc/snps_udc_core.ko] undefined!
    ERROR: "dma_pool_destroy" [drivers/usb/gadget/udc/snps_udc_core.ko] undefined!
    ERROR: "usb_gadget_unmap_request" [drivers/usb/gadget/udc/snps_udc_core.ko] undefined!
    ERROR: "dma_pool_free" [drivers/usb/gadget/udc/snps_udc_core.ko] undefined!
    ERROR: "dma_pool_alloc" [drivers/usb/gadget/udc/snps_udc_core.ko] undefined!
    ERROR: "dma_pool_create" [drivers/usb/gadget/udc/snps_udc_core.ko] undefined!

As USB_SNP_CORE is selected by USB_SNP_UDC_PLAT and USB_AMD5536UDC,
these should depend on HAS_DMA, too.  For USB_AMD5536UDC, this is
already fulfilled through the dependency on USB_PCI (PCI implies
HAS_DMA).

Add dependencies on HAS_DMA to fix this.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 643226476a7c7..7cd5c969fcbe9 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -257,6 +257,7 @@ config USB_MV_U3D
 
 config USB_SNP_CORE
 	depends on (USB_AMD5536UDC || USB_SNP_UDC_PLAT)
+	depends on HAS_DMA
 	tristate
 	help
 	  This enables core driver support for Synopsys USB 2.0 Device
@@ -271,7 +272,7 @@ config USB_SNP_CORE
 
 config USB_SNP_UDC_PLAT
 	tristate "Synopsys USB 2.0 Device controller"
-	depends on (USB_GADGET && OF)
+	depends on USB_GADGET && OF && HAS_DMA
 	select USB_GADGET_DUALSPEED
 	select USB_SNP_CORE
 	default ARCH_BCM_IPROC
-- 
GitLab


From 4a71fcb8ac5f94c07bf47a43b13258a52e4fe3ad Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Thu, 29 Jun 2017 00:53:31 -0700
Subject: [PATCH 1707/1958] usb: dwc3: gadget: only unmap requests from DMA if
 mapped

A recent optimization was made so that a request put on the
pending_list wouldn't get mapped for DMA until just before
preparing a TRB for it. However, this poses a problem in case
the request is dequeued or the endpoint is disabled before the
mapping is done as that would lead to dwc3_gadget_giveback()
unconditionally calling usb_gadget_unmap_request_for_dev() with
an invalid request->dma handle. Depending on the platform's DMA
implementation the unmap operation could result in a panic.

Since we know a successful mapping is a prerequisite for getting
a TRB, the unmap can be conditionally called only when req->trb
is non-NULL.

Fixes: cdb55b39fab8 ("usb: dwc3: gadget: lazily map requests for DMA")
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 9e41605a276ba..6b299c7b76561 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -191,14 +191,16 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 
 	req->started = false;
 	list_del(&req->list);
-	req->trb = NULL;
 	req->remaining = 0;
 
 	if (req->request.status == -EINPROGRESS)
 		req->request.status = status;
 
-	usb_gadget_unmap_request_by_dev(dwc->sysdev,
-					&req->request, req->direction);
+	if (req->trb)
+		usb_gadget_unmap_request_by_dev(dwc->sysdev,
+						&req->request, req->direction);
+
+	req->trb = NULL;
 
 	trace_dwc3_gadget_giveback(req);
 
-- 
GitLab


From 6883cd7f68245e43e91e5ee583b7550abf14523f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 22 Jun 2017 09:32:49 +0200
Subject: [PATCH 1708/1958] reiserfs: Don't clear SGID when inheriting ACLs

When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit
set, DIR1 is expected to have SGID bit set (and owning group equal to
the owning group of 'DIR0'). However when 'DIR0' also has some default
ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on
'DIR1' to get cleared if user is not member of the owning group.

Fix the problem by moving posix_acl_update_mode() out of
__reiserfs_set_acl() into reiserfs_set_acl(). That way the function will
not be called when inheriting ACLs which is what we want as it prevents
SGID bit clearing and the mode has been properly set by
posix_acl_create() anyway.

Fixes: 073931017b49d9458aa351605b43a7e34598caef
CC: stable@vger.kernel.org
CC: reiserfs-devel@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/reiserfs/xattr_acl.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 3d2256a425ee7..d92a1dc6ee70f 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -37,7 +37,14 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	error = journal_begin(&th, inode->i_sb, jcreate_blocks);
 	reiserfs_write_unlock(inode->i_sb);
 	if (error == 0) {
+		if (type == ACL_TYPE_ACCESS && acl) {
+			error = posix_acl_update_mode(inode, &inode->i_mode,
+						      &acl);
+			if (error)
+				goto unlock;
+		}
 		error = __reiserfs_set_acl(&th, inode, type, acl);
+unlock:
 		reiserfs_write_lock(inode->i_sb);
 		error2 = journal_end(&th);
 		reiserfs_write_unlock(inode->i_sb);
@@ -241,11 +248,6 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = XATTR_NAME_POSIX_ACL_ACCESS;
-		if (acl) {
-			error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
-			if (error)
-				return error;
-		}
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
-- 
GitLab


From a992f2d38e4ce17b8c7d1f7f67b2de0eebdea069 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 21 Jun 2017 14:34:15 +0200
Subject: [PATCH 1709/1958] ext2: Don't clear SGID when inheriting ACLs

When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit
set, DIR1 is expected to have SGID bit set (and owning group equal to
the owning group of 'DIR0'). However when 'DIR0' also has some default
ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on
'DIR1' to get cleared if user is not member of the owning group.

Fix the problem by creating __ext2_set_acl() function that does not call
posix_acl_update_mode() and use it when inheriting ACLs. That prevents
SGID bit clearing and the mode has been properly set by
posix_acl_create() anyway.

Fixes: 073931017b49d9458aa351605b43a7e34598caef
CC: stable@vger.kernel.org
CC: linux-ext4@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/acl.c | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 79dafa71effdd..069c0dceda01e 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -175,11 +175,8 @@ ext2_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-/*
- * inode->i_mutex: down
- */
-int
-ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+static int
+__ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	int name_index;
 	void *value = NULL;
@@ -189,13 +186,6 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
-			if (acl) {
-				error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
-				if (error)
-					return error;
-				inode->i_ctime = current_time(inode);
-				mark_inode_dirty(inode);
-			}
 			break;
 
 		case ACL_TYPE_DEFAULT:
@@ -221,6 +211,24 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	return error;
 }
 
+/*
+ * inode->i_mutex: down
+ */
+int
+ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	int error;
+
+	if (type == ACL_TYPE_ACCESS && acl) {
+		error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+		if (error)
+			return error;
+		inode->i_ctime = current_time(inode);
+		mark_inode_dirty(inode);
+	}
+	return __ext2_set_acl(inode, acl, type);
+}
+
 /*
  * Initialize the ACLs of a new inode. Called from ext2_new_inode.
  *
@@ -238,12 +246,12 @@ ext2_init_acl(struct inode *inode, struct inode *dir)
 		return error;
 
 	if (default_acl) {
-		error = ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		error = __ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
 		posix_acl_release(default_acl);
 	}
 	if (acl) {
 		if (!error)
-			error = ext2_set_acl(inode, acl, ACL_TYPE_ACCESS);
+			error = __ext2_set_acl(inode, acl, ACL_TYPE_ACCESS);
 		posix_acl_release(acl);
 	}
 	return error;
-- 
GitLab


From dd55d44f408419278c00887bfcb2261d0caae350 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 6 Jul 2017 16:06:01 +0200
Subject: [PATCH 1710/1958] staging: vboxvideo: Add vboxvideo to
 drivers/staging

This commit adds the vboxvideo drm/kms driver for the virtual graphics
card used in Virtual Box virtual machines to drivers/staging.

Why drivers/staging? This driver is already being patched into the kernel
by several distros, thus it is good to get this driver upstream soon, so
that work on the driver can be easily shared.

At the same time we want to take our time to get this driver properly
cleaned up (mainly converted to the new atomic modesetting APIs) before
submitting it as a normal driver under drivers/gpu/drm, putting this
driver in staging for now allows both.

Note this driver has already been significantly cleaned up, when I started
working on this the files under /usr/src/vboxguest/vboxvideo as installed
by Virtual Box 5.1.18 Guest Additions had a total linecount of 52681
lines. The version in this commit has 4874 lines.

Cc: vbox-dev@virtualbox.org
Cc: Michael Thayer <michael.thayer@oracle.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Michael Thayer <michael.thayer@oracle.com>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/Kconfig                     |   2 +
 drivers/staging/Makefile                    |   1 +
 drivers/staging/vboxvideo/Kconfig           |  12 +
 drivers/staging/vboxvideo/Makefile          |   7 +
 drivers/staging/vboxvideo/TODO              |   9 +
 drivers/staging/vboxvideo/hgsmi_base.c      | 246 ++++++
 drivers/staging/vboxvideo/hgsmi_ch_setup.h  |  66 ++
 drivers/staging/vboxvideo/hgsmi_channels.h  |  53 ++
 drivers/staging/vboxvideo/hgsmi_defs.h      |  92 ++
 drivers/staging/vboxvideo/modesetting.c     | 142 ++++
 drivers/staging/vboxvideo/vbox_drv.c        | 286 +++++++
 drivers/staging/vboxvideo/vbox_drv.h        | 296 +++++++
 drivers/staging/vboxvideo/vbox_err.h        |  50 ++
 drivers/staging/vboxvideo/vbox_fb.c         | 412 +++++++++
 drivers/staging/vboxvideo/vbox_hgsmi.c      | 115 +++
 drivers/staging/vboxvideo/vbox_irq.c        | 197 +++++
 drivers/staging/vboxvideo/vbox_main.c       | 534 ++++++++++++
 drivers/staging/vboxvideo/vbox_mode.c       | 877 ++++++++++++++++++++
 drivers/staging/vboxvideo/vbox_prime.c      |  74 ++
 drivers/staging/vboxvideo/vbox_ttm.c        | 472 +++++++++++
 drivers/staging/vboxvideo/vboxvideo.h       | 491 +++++++++++
 drivers/staging/vboxvideo/vboxvideo_guest.h |  95 +++
 drivers/staging/vboxvideo/vboxvideo_vbe.h   |  84 ++
 drivers/staging/vboxvideo/vbva_base.c       | 233 ++++++
 24 files changed, 4846 insertions(+)
 create mode 100644 drivers/staging/vboxvideo/Kconfig
 create mode 100644 drivers/staging/vboxvideo/Makefile
 create mode 100644 drivers/staging/vboxvideo/TODO
 create mode 100644 drivers/staging/vboxvideo/hgsmi_base.c
 create mode 100644 drivers/staging/vboxvideo/hgsmi_ch_setup.h
 create mode 100644 drivers/staging/vboxvideo/hgsmi_channels.h
 create mode 100644 drivers/staging/vboxvideo/hgsmi_defs.h
 create mode 100644 drivers/staging/vboxvideo/modesetting.c
 create mode 100644 drivers/staging/vboxvideo/vbox_drv.c
 create mode 100644 drivers/staging/vboxvideo/vbox_drv.h
 create mode 100644 drivers/staging/vboxvideo/vbox_err.h
 create mode 100644 drivers/staging/vboxvideo/vbox_fb.c
 create mode 100644 drivers/staging/vboxvideo/vbox_hgsmi.c
 create mode 100644 drivers/staging/vboxvideo/vbox_irq.c
 create mode 100644 drivers/staging/vboxvideo/vbox_main.c
 create mode 100644 drivers/staging/vboxvideo/vbox_mode.c
 create mode 100644 drivers/staging/vboxvideo/vbox_prime.c
 create mode 100644 drivers/staging/vboxvideo/vbox_ttm.c
 create mode 100644 drivers/staging/vboxvideo/vboxvideo.h
 create mode 100644 drivers/staging/vboxvideo/vboxvideo_guest.h
 create mode 100644 drivers/staging/vboxvideo/vboxvideo_vbe.h
 create mode 100644 drivers/staging/vboxvideo/vbva_base.c

diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 268d4e6ef48a6..ef28a1cb64aec 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -110,4 +110,6 @@ source "drivers/staging/ccree/Kconfig"
 
 source "drivers/staging/typec/Kconfig"
 
+source "drivers/staging/vboxvideo/Kconfig"
+
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index b93e6f5f0f6ea..2918580bdb9e2 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -44,3 +44,4 @@ obj-$(CONFIG_KS7010)		+= ks7010/
 obj-$(CONFIG_GREYBUS)		+= greybus/
 obj-$(CONFIG_BCM2835_VCHIQ)	+= vc04_services/
 obj-$(CONFIG_CRYPTO_DEV_CCREE)	+= ccree/
+obj-$(CONFIG_DRM_VBOXVIDEO)	+= vboxvideo/
diff --git a/drivers/staging/vboxvideo/Kconfig b/drivers/staging/vboxvideo/Kconfig
new file mode 100644
index 0000000000000..a52746f9a670b
--- /dev/null
+++ b/drivers/staging/vboxvideo/Kconfig
@@ -0,0 +1,12 @@
+config DRM_VBOXVIDEO
+	tristate "Virtual Box Graphics Card"
+	depends on DRM && X86 && PCI
+	select DRM_KMS_HELPER
+	help
+	  This is a KMS driver for the virtual Graphics Card used in
+	  Virtual Box virtual machines.
+
+	  Although it is possible to builtin this module, it is advised
+	  to build this driver as a module, so that it can be updated
+	  independently of the kernel. Select M to built this driver as a
+	  module and add support for these devices via drm/kms interfaces.
diff --git a/drivers/staging/vboxvideo/Makefile b/drivers/staging/vboxvideo/Makefile
new file mode 100644
index 0000000000000..2d0b3bc7ad738
--- /dev/null
+++ b/drivers/staging/vboxvideo/Makefile
@@ -0,0 +1,7 @@
+ccflags-y := -Iinclude/drm
+
+vboxvideo-y :=  hgsmi_base.o modesetting.o vbva_base.o \
+		vbox_drv.o vbox_fb.o vbox_hgsmi.o vbox_irq.o vbox_main.o \
+		vbox_mode.o vbox_prime.o vbox_ttm.o
+
+obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo.o
diff --git a/drivers/staging/vboxvideo/TODO b/drivers/staging/vboxvideo/TODO
new file mode 100644
index 0000000000000..ce764309b079b
--- /dev/null
+++ b/drivers/staging/vboxvideo/TODO
@@ -0,0 +1,9 @@
+TODO:
+-Move the driver over to the atomic API
+-Stop using old load / unload drm_driver hooks
+-Get a full review from the drm-maintainers on dri-devel done on this driver
+-Extend this TODO with the results of that review
+
+Please send any patches to Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
+Hans de Goede <hdegoede@redhat.com> and
+Michael Thayer <michael.thayer@oracle.com>.
diff --git a/drivers/staging/vboxvideo/hgsmi_base.c b/drivers/staging/vboxvideo/hgsmi_base.c
new file mode 100644
index 0000000000000..15ff5f42e2cd8
--- /dev/null
+++ b/drivers/staging/vboxvideo/hgsmi_base.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2006-2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "vbox_drv.h"
+#include "vbox_err.h"
+#include "vboxvideo_guest.h"
+#include "vboxvideo_vbe.h"
+#include "hgsmi_channels.h"
+#include "hgsmi_ch_setup.h"
+
+/**
+ * Inform the host of the location of the host flags in VRAM via an HGSMI cmd.
+ * @param    ctx          the context of the guest heap to use.
+ * @param    location     the offset chosen for the flags within guest VRAM.
+ * @returns 0 on success, -errno on failure
+ */
+int hgsmi_report_flags_location(struct gen_pool *ctx, u32 location)
+{
+	struct hgsmi_buffer_location *p;
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p), HGSMI_CH_HGSMI,
+			       HGSMI_CC_HOST_FLAGS_LOCATION);
+	if (!p)
+		return -ENOMEM;
+
+	p->buf_location = location;
+	p->buf_len = sizeof(struct hgsmi_host_flags);
+
+	hgsmi_buffer_submit(ctx, p);
+	hgsmi_buffer_free(ctx, p);
+
+	return 0;
+}
+
+/**
+ * Notify the host of HGSMI-related guest capabilities via an HGSMI command.
+ * @param    ctx                 the context of the guest heap to use.
+ * @param    caps                the capabilities to report, see vbva_caps.
+ * @returns 0 on success, -errno on failure
+ */
+int hgsmi_send_caps_info(struct gen_pool *ctx, u32 caps)
+{
+	struct vbva_caps *p;
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p), HGSMI_CH_VBVA, VBVA_INFO_CAPS);
+	if (!p)
+		return -ENOMEM;
+
+	p->rc = VERR_NOT_IMPLEMENTED;
+	p->caps = caps;
+
+	hgsmi_buffer_submit(ctx, p);
+
+	WARN_ON_ONCE(RT_FAILURE(p->rc));
+
+	hgsmi_buffer_free(ctx, p);
+
+	return 0;
+}
+
+int hgsmi_test_query_conf(struct gen_pool *ctx)
+{
+	u32 value = 0;
+	int ret;
+
+	ret = hgsmi_query_conf(ctx, U32_MAX, &value);
+	if (ret)
+		return ret;
+
+	return value == U32_MAX ? 0 : -EIO;
+}
+
+/**
+ * Query the host for an HGSMI configuration parameter via an HGSMI command.
+ * @param  ctx        the context containing the heap used
+ * @param  index      the index of the parameter to query,
+ *                    @see vbva_conf32::index
+ * @param  value_ret  where to store the value of the parameter on success
+ * @returns 0 on success, -errno on failure
+ */
+int hgsmi_query_conf(struct gen_pool *ctx, u32 index, u32 *value_ret)
+{
+	struct vbva_conf32 *p;
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p), HGSMI_CH_VBVA,
+			       VBVA_QUERY_CONF32);
+	if (!p)
+		return -ENOMEM;
+
+	p->index = index;
+	p->value = U32_MAX;
+
+	hgsmi_buffer_submit(ctx, p);
+
+	*value_ret = p->value;
+
+	hgsmi_buffer_free(ctx, p);
+
+	return 0;
+}
+
+/**
+ * Pass the host a new mouse pointer shape via an HGSMI command.
+ *
+ * @param  ctx      the context containing the heap to be used
+ * @param  flags    cursor flags, @see VMMDevReqMousePointer::flags
+ * @param  hot_x    horizontal position of the hot spot
+ * @param  hot_y    vertical position of the hot spot
+ * @param  width    width in pixels of the cursor
+ * @param  height   height in pixels of the cursor
+ * @param  pixels   pixel data, @see VMMDevReqMousePointer for the format
+ * @param  len      size in bytes of the pixel data
+ * @returns 0 on success, -errno on failure
+ */
+int hgsmi_update_pointer_shape(struct gen_pool *ctx, u32 flags,
+			       u32 hot_x, u32 hot_y, u32 width, u32 height,
+			       u8 *pixels, u32 len)
+{
+	struct vbva_mouse_pointer_shape *p;
+	u32 pixel_len = 0;
+	int rc;
+
+	if (flags & VBOX_MOUSE_POINTER_SHAPE) {
+		/*
+		 * Size of the pointer data:
+		 * sizeof (AND mask) + sizeof (XOR_MASK)
+		 */
+		pixel_len = ((((width + 7) / 8) * height + 3) & ~3) +
+			 width * 4 * height;
+		if (pixel_len > len)
+			return -EINVAL;
+
+		/*
+		 * If shape is supplied, then always create the pointer visible.
+		 * See comments in 'vboxUpdatePointerShape'
+		 */
+		flags |= VBOX_MOUSE_POINTER_VISIBLE;
+	}
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len, HGSMI_CH_VBVA,
+			       VBVA_MOUSE_POINTER_SHAPE);
+	if (!p)
+		return -ENOMEM;
+
+	p->result = VINF_SUCCESS;
+	p->flags = flags;
+	p->hot_X = hot_x;
+	p->hot_y = hot_y;
+	p->width = width;
+	p->height = height;
+	if (pixel_len)
+		memcpy(p->data, pixels, pixel_len);
+
+	hgsmi_buffer_submit(ctx, p);
+
+	switch (p->result) {
+	case VINF_SUCCESS:
+		rc = 0;
+		break;
+	case VERR_NO_MEMORY:
+		rc = -ENOMEM;
+		break;
+	case VERR_NOT_SUPPORTED:
+		rc = -EBUSY;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+
+	hgsmi_buffer_free(ctx, p);
+
+	return rc;
+}
+
+/**
+ * Report the guest cursor position.  The host may wish to use this information
+ * to re-position its own cursor (though this is currently unlikely).  The
+ * current host cursor position is returned.
+ * @param  ctx              The context containing the heap used.
+ * @param  report_position  Are we reporting a position?
+ * @param  x                Guest cursor X position.
+ * @param  y                Guest cursor Y position.
+ * @param  x_host           Host cursor X position is stored here.  Optional.
+ * @param  y_host           Host cursor Y position is stored here.  Optional.
+ * @returns 0 on success, -errno on failure
+ */
+int hgsmi_cursor_position(struct gen_pool *ctx, bool report_position,
+			  u32 x, u32 y, u32 *x_host, u32 *y_host)
+{
+	struct vbva_cursor_position *p;
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p), HGSMI_CH_VBVA,
+			       VBVA_CURSOR_POSITION);
+	if (!p)
+		return -ENOMEM;
+
+	p->report_position = report_position;
+	p->x = x;
+	p->y = y;
+
+	hgsmi_buffer_submit(ctx, p);
+
+	*x_host = p->x;
+	*y_host = p->y;
+
+	hgsmi_buffer_free(ctx, p);
+
+	return 0;
+}
+
+/**
+ * @todo Mouse pointer position to be read from VMMDev memory, address of the
+ * memory region can be queried from VMMDev via an IOCTL. This VMMDev memory
+ * region will contain host information which is needed by the guest.
+ *
+ * Reading will not cause a switch to the host.
+ *
+ * Have to take into account:
+ *  * synchronization: host must write to the memory only from EMT,
+ *    large structures must be read under flag, which tells the host
+ *    that the guest is currently reading the memory (OWNER flag?).
+ *  * guest writes: may be allocate a page for the host info and make
+ *    the page readonly for the guest.
+ *  * the information should be available only for additions drivers.
+ *  * VMMDev additions driver will inform the host which version of the info
+ *    it expects, host must support all versions.
+ */
diff --git a/drivers/staging/vboxvideo/hgsmi_ch_setup.h b/drivers/staging/vboxvideo/hgsmi_ch_setup.h
new file mode 100644
index 0000000000000..8e6d9e11a69c4
--- /dev/null
+++ b/drivers/staging/vboxvideo/hgsmi_ch_setup.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2006-2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __HGSMI_CH_SETUP_H__
+#define __HGSMI_CH_SETUP_H__
+
+/*
+ * Tell the host the location of hgsmi_host_flags structure, where the host
+ * can write information about pending buffers, etc, and which can be quickly
+ * polled by the guest without a need to port IO.
+ */
+#define HGSMI_CC_HOST_FLAGS_LOCATION 0
+
+struct hgsmi_buffer_location {
+	u32 buf_location;
+	u32 buf_len;
+} __packed;
+
+/* HGSMI setup and configuration data structures. */
+/* host->guest commands pending, should be accessed under FIFO lock only */
+#define HGSMIHOSTFLAGS_COMMANDS_PENDING    0x01u
+/* IRQ is fired, should be accessed under VGAState::lock only  */
+#define HGSMIHOSTFLAGS_IRQ                 0x02u
+/* vsync interrupt flag, should be accessed under VGAState::lock only */
+#define HGSMIHOSTFLAGS_VSYNC               0x10u
+/** monitor hotplug flag, should be accessed under VGAState::lock only */
+#define HGSMIHOSTFLAGS_HOTPLUG             0x20u
+/**
+ * Cursor capability state change flag, should be accessed under
+ * VGAState::lock only. @see vbva_conf32.
+ */
+#define HGSMIHOSTFLAGS_CURSOR_CAPABILITIES 0x40u
+
+struct hgsmi_host_flags {
+	/*
+	 * Host flags can be accessed and modified in multiple threads
+	 * concurrently, e.g. CrOpenGL HGCM and GUI threads when completing
+	 * HGSMI 3D and Video Accel respectively, EMT thread when dealing with
+	 * HGSMI command processing, etc.
+	 * Besides settings/cleaning flags atomically, some flags have their
+	 * own special sync restrictions, see comments for flags above.
+	 */
+	u32 host_flags;
+	u32 reserved[3];
+} __packed;
+
+#endif
diff --git a/drivers/staging/vboxvideo/hgsmi_channels.h b/drivers/staging/vboxvideo/hgsmi_channels.h
new file mode 100644
index 0000000000000..a2a34b2167b47
--- /dev/null
+++ b/drivers/staging/vboxvideo/hgsmi_channels.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2006-2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __HGSMI_CHANNELS_H__
+#define __HGSMI_CHANNELS_H__
+
+/*
+ * Each channel has an 8 bit identifier. There are a number of predefined
+ * (hardcoded) channels.
+ *
+ * HGSMI_CH_HGSMI channel can be used to map a string channel identifier
+ * to a free 16 bit numerical value. values are allocated in range
+ * [HGSMI_CH_STRING_FIRST;HGSMI_CH_STRING_LAST].
+ */
+
+/* A reserved channel value */
+#define HGSMI_CH_RESERVED				0x00
+/* HGCMI: setup and configuration */
+#define HGSMI_CH_HGSMI					0x01
+/* Graphics: VBVA */
+#define HGSMI_CH_VBVA					0x02
+/* Graphics: Seamless with a single guest region */
+#define HGSMI_CH_SEAMLESS				0x03
+/* Graphics: Seamless with separate host windows */
+#define HGSMI_CH_SEAMLESS2				0x04
+/* Graphics: OpenGL HW acceleration */
+#define HGSMI_CH_OPENGL					0x05
+
+/* The first channel index to be used for string mappings (inclusive) */
+#define HGSMI_CH_STRING_FIRST				0x20
+/* The last channel index for string mappings (inclusive) */
+#define HGSMI_CH_STRING_LAST				0xff
+
+#endif
diff --git a/drivers/staging/vboxvideo/hgsmi_defs.h b/drivers/staging/vboxvideo/hgsmi_defs.h
new file mode 100644
index 0000000000000..5b21fb974d200
--- /dev/null
+++ b/drivers/staging/vboxvideo/hgsmi_defs.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2006-2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __HGSMI_DEFS_H__
+#define __HGSMI_DEFS_H__
+
+/* Buffer sequence type mask. */
+#define HGSMI_BUFFER_HEADER_F_SEQ_MASK     0x03
+/* Single buffer, not a part of a sequence. */
+#define HGSMI_BUFFER_HEADER_F_SEQ_SINGLE   0x00
+/* The first buffer in a sequence. */
+#define HGSMI_BUFFER_HEADER_F_SEQ_START    0x01
+/* A middle buffer in a sequence. */
+#define HGSMI_BUFFER_HEADER_F_SEQ_CONTINUE 0x02
+/* The last buffer in a sequence. */
+#define HGSMI_BUFFER_HEADER_F_SEQ_END      0x03
+
+/* 16 bytes buffer header. */
+struct hgsmi_buffer_header {
+	u32 data_size;		/* Size of data that follows the header. */
+	u8 flags;		/* HGSMI_BUFFER_HEADER_F_* */
+	u8 channel;		/* The channel the data must be routed to. */
+	u16 channel_info;	/* Opaque to the HGSMI, used by the channel. */
+
+	union {
+		/* Opaque placeholder to make the union 8 bytes. */
+		u8 header_data[8];
+
+		/* HGSMI_BUFFER_HEADER_F_SEQ_SINGLE */
+		struct {
+			u32 reserved1;	/* A reserved field, initialize to 0. */
+			u32 reserved2;	/* A reserved field, initialize to 0. */
+		} buffer;
+
+		/* HGSMI_BUFFER_HEADER_F_SEQ_START */
+		struct {
+			/* Must be the same for all buffers in the sequence. */
+			u32 sequence_number;
+			/* The total size of the sequence. */
+			u32 sequence_size;
+		} sequence_start;
+
+		/*
+		 * HGSMI_BUFFER_HEADER_F_SEQ_CONTINUE and
+		 * HGSMI_BUFFER_HEADER_F_SEQ_END
+		 */
+		struct {
+			/* Must be the same for all buffers in the sequence. */
+			u32 sequence_number;
+			/* Data offset in the entire sequence. */
+			u32 sequence_offset;
+		} sequence_continue;
+	} u;
+} __packed;
+
+/* 8 bytes buffer tail. */
+struct hgsmi_buffer_tail {
+	/* Reserved, must be initialized to 0. */
+	u32 reserved;
+	/*
+	 * One-at-a-Time Hash: http://www.burtleburtle.net/bob/hash/doobs.html
+	 * Over the header, offset and for first 4 bytes of the tail.
+	 */
+	u32 checksum;
+} __packed;
+
+/*
+ * The size of the array of channels. Array indexes are u8.
+ * Note: the value must not be changed.
+ */
+#define HGSMI_NUMBER_OF_CHANNELS 0x100
+
+#endif
diff --git a/drivers/staging/vboxvideo/modesetting.c b/drivers/staging/vboxvideo/modesetting.c
new file mode 100644
index 0000000000000..7616b8aab23ae
--- /dev/null
+++ b/drivers/staging/vboxvideo/modesetting.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2006-2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "vbox_drv.h"
+#include "vbox_err.h"
+#include "vboxvideo_guest.h"
+#include "vboxvideo_vbe.h"
+#include "hgsmi_channels.h"
+
+/**
+ * Set a video mode via an HGSMI request.  The views must have been
+ * initialised first using @a VBoxHGSMISendViewInfo and if the mode is being
+ * set on the first display then it must be set first using registers.
+ * @param  ctx           The context containing the heap to use
+ * @param  display       The screen number
+ * @param  origin_x      The horizontal displacement relative to the first scrn
+ * @param  origin_y      The vertical displacement relative to the first screen
+ * @param  start_offset  The offset of the visible area of the framebuffer
+ *                       relative to the framebuffer start
+ * @param  pitch         The offset in bytes between the starts of two adjecent
+ *                       scan lines in video RAM
+ * @param  width         The mode width
+ * @param  height        The mode height
+ * @param  bpp           The colour depth of the mode
+ * @param  flags         Flags
+ */
+void hgsmi_process_display_info(struct gen_pool *ctx, u32 display,
+				s32 origin_x, s32 origin_y, u32 start_offset,
+				u32 pitch, u32 width, u32 height,
+				u16 bpp, u16 flags)
+{
+	struct vbva_infoscreen *p;
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p), HGSMI_CH_VBVA,
+			       VBVA_INFO_SCREEN);
+	if (!p)
+		return;
+
+	p->view_index = display;
+	p->origin_x = origin_x;
+	p->origin_y = origin_y;
+	p->start_offset = start_offset;
+	p->line_size = pitch;
+	p->width = width;
+	p->height = height;
+	p->bits_per_pixel = bpp;
+	p->flags = flags;
+
+	hgsmi_buffer_submit(ctx, p);
+	hgsmi_buffer_free(ctx, p);
+}
+
+/**
+ * Report the rectangle relative to which absolute pointer events should be
+ * expressed.  This information remains valid until the next VBVA resize event
+ * for any screen, at which time it is reset to the bounding rectangle of all
+ * virtual screens.
+ * @param  ctx       The context containing the heap to use.
+ * @param  origin_x  Upper left X co-ordinate relative to the first screen.
+ * @param  origin_y  Upper left Y co-ordinate relative to the first screen.
+ * @param  width     Rectangle width.
+ * @param  height    Rectangle height.
+ * @returns 0 on success, -errno on failure
+ */
+int hgsmi_update_input_mapping(struct gen_pool *ctx, s32 origin_x, s32 origin_y,
+			       u32 width, u32 height)
+{
+	struct vbva_report_input_mapping *p;
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p), HGSMI_CH_VBVA,
+			       VBVA_REPORT_INPUT_MAPPING);
+	if (!p)
+		return -ENOMEM;
+
+	p->x = origin_x;
+	p->y = origin_y;
+	p->cx = width;
+	p->cy = height;
+
+	hgsmi_buffer_submit(ctx, p);
+	hgsmi_buffer_free(ctx, p);
+
+	return 0;
+}
+
+/**
+ * Get most recent video mode hints.
+ * @param  ctx      The context containing the heap to use.
+ * @param  screens  The number of screens to query hints for, starting at 0.
+ * @param  hints    Array of vbva_modehint structures for receiving the hints.
+ * @returns 0 on success, -errno on failure
+ */
+int hgsmi_get_mode_hints(struct gen_pool *ctx, unsigned int screens,
+			 struct vbva_modehint *hints)
+{
+	struct vbva_query_mode_hints *p;
+	size_t size;
+
+	if (WARN_ON(!hints))
+		return -EINVAL;
+
+	size = screens * sizeof(struct vbva_modehint);
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p) + size, HGSMI_CH_VBVA,
+			       VBVA_QUERY_MODE_HINTS);
+	if (!p)
+		return -ENOMEM;
+
+	p->hints_queried_count = screens;
+	p->hint_structure_guest_size = sizeof(struct vbva_modehint);
+	p->rc = VERR_NOT_SUPPORTED;
+
+	hgsmi_buffer_submit(ctx, p);
+
+	if (RT_FAILURE(p->rc)) {
+		hgsmi_buffer_free(ctx, p);
+		return -EIO;
+	}
+
+	memcpy(hints, ((u8 *)p) + sizeof(struct vbva_query_mode_hints), size);
+	hgsmi_buffer_free(ctx, p);
+
+	return 0;
+}
diff --git a/drivers/staging/vboxvideo/vbox_drv.c b/drivers/staging/vboxvideo/vbox_drv.c
new file mode 100644
index 0000000000000..92ae1560a16db
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_drv.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2013-2017 Oracle Corporation
+ * This file is based on ast_drv.c
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Michael Thayer <michael.thayer@oracle.com,
+ *          Hans de Goede <hdegoede@redhat.com>
+ */
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/vt_kern.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "vbox_drv.h"
+
+int vbox_modeset = -1;
+
+MODULE_PARM_DESC(modeset, "Disable/Enable modesetting");
+module_param_named(modeset, vbox_modeset, int, 0400);
+
+static struct drm_driver driver;
+
+static const struct pci_device_id pciidlist[] = {
+	{ 0x80ee, 0xbeef, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+	{ 0, 0, 0},
+};
+MODULE_DEVICE_TABLE(pci, pciidlist);
+
+static int vbox_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	return drm_get_pci_dev(pdev, ent, &driver);
+}
+
+static void vbox_pci_remove(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+
+	drm_put_dev(dev);
+}
+
+static int vbox_drm_freeze(struct drm_device *dev)
+{
+	struct vbox_private *vbox = dev->dev_private;
+
+	drm_kms_helper_poll_disable(dev);
+
+	pci_save_state(dev->pdev);
+
+	drm_fb_helper_set_suspend_unlocked(&vbox->fbdev->helper, true);
+
+	return 0;
+}
+
+static int vbox_drm_thaw(struct drm_device *dev)
+{
+	struct vbox_private *vbox = dev->dev_private;
+
+	drm_mode_config_reset(dev);
+	drm_helper_resume_force_mode(dev);
+	drm_fb_helper_set_suspend_unlocked(&vbox->fbdev->helper, false);
+
+	return 0;
+}
+
+static int vbox_drm_resume(struct drm_device *dev)
+{
+	int ret;
+
+	if (pci_enable_device(dev->pdev))
+		return -EIO;
+
+	ret = vbox_drm_thaw(dev);
+	if (ret)
+		return ret;
+
+	drm_kms_helper_poll_enable(dev);
+
+	return 0;
+}
+
+static int vbox_pm_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *ddev = pci_get_drvdata(pdev);
+	int error;
+
+	error = vbox_drm_freeze(ddev);
+	if (error)
+		return error;
+
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+
+	return 0;
+}
+
+static int vbox_pm_resume(struct device *dev)
+{
+	struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev));
+
+	return vbox_drm_resume(ddev);
+}
+
+static int vbox_pm_freeze(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *ddev = pci_get_drvdata(pdev);
+
+	if (!ddev || !ddev->dev_private)
+		return -ENODEV;
+
+	return vbox_drm_freeze(ddev);
+}
+
+static int vbox_pm_thaw(struct device *dev)
+{
+	struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev));
+
+	return vbox_drm_thaw(ddev);
+}
+
+static int vbox_pm_poweroff(struct device *dev)
+{
+	struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev));
+
+	return vbox_drm_freeze(ddev);
+}
+
+static const struct dev_pm_ops vbox_pm_ops = {
+	.suspend = vbox_pm_suspend,
+	.resume = vbox_pm_resume,
+	.freeze = vbox_pm_freeze,
+	.thaw = vbox_pm_thaw,
+	.poweroff = vbox_pm_poweroff,
+	.restore = vbox_pm_resume,
+};
+
+static struct pci_driver vbox_pci_driver = {
+	.name = DRIVER_NAME,
+	.id_table = pciidlist,
+	.probe = vbox_pci_probe,
+	.remove = vbox_pci_remove,
+	.driver.pm = &vbox_pm_ops,
+};
+
+static const struct file_operations vbox_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = vbox_mmap,
+	.poll = drm_poll,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = drm_compat_ioctl,
+#endif
+	.read = drm_read,
+};
+
+static int vbox_master_set(struct drm_device *dev,
+			   struct drm_file *file_priv, bool from_open)
+{
+	struct vbox_private *vbox = dev->dev_private;
+
+	/*
+	 * We do not yet know whether the new owner can handle hotplug, so we
+	 * do not advertise dynamic modes on the first query and send a
+	 * tentative hotplug notification after that to see if they query again.
+	 */
+	vbox->initial_mode_queried = false;
+
+	mutex_lock(&vbox->hw_mutex);
+	/*
+	 * Disable VBVA when someone releases master in case the next person
+	 * tries tries to do VESA.
+	 */
+	/** @todo work out if anyone is likely to and whether it will work. */
+	/*
+	 * Update: we also disable it because if the new master does not do
+	 * dirty rectangle reporting (e.g. old versions of Plymouth) then at
+	 * least the first screen will still be updated. We enable it as soon
+	 * as we receive a dirty rectangle report.
+	 */
+	vbox_disable_accel(vbox);
+	mutex_unlock(&vbox->hw_mutex);
+
+	return 0;
+}
+
+static void vbox_master_drop(struct drm_device *dev, struct drm_file *file_priv)
+{
+	struct vbox_private *vbox = dev->dev_private;
+
+	/* See vbox_master_set() */
+	vbox->initial_mode_queried = false;
+
+	mutex_lock(&vbox->hw_mutex);
+	vbox_disable_accel(vbox);
+	mutex_unlock(&vbox->hw_mutex);
+}
+
+static struct drm_driver driver = {
+	.driver_features =
+	    DRIVER_MODESET | DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED |
+	    DRIVER_PRIME,
+	.dev_priv_size = 0,
+
+	.load = vbox_driver_load,
+	.unload = vbox_driver_unload,
+	.lastclose = vbox_driver_lastclose,
+	.master_set = vbox_master_set,
+	.master_drop = vbox_master_drop,
+	.set_busid = drm_pci_set_busid,
+
+	.fops = &vbox_fops,
+	.irq_handler = vbox_irq_handler,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+
+	.gem_free_object = vbox_gem_free_object,
+	.dumb_create = vbox_dumb_create,
+	.dumb_map_offset = vbox_dumb_mmap_offset,
+	.dumb_destroy = drm_gem_dumb_destroy,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_pin = vbox_gem_prime_pin,
+	.gem_prime_unpin = vbox_gem_prime_unpin,
+	.gem_prime_get_sg_table = vbox_gem_prime_get_sg_table,
+	.gem_prime_import_sg_table = vbox_gem_prime_import_sg_table,
+	.gem_prime_vmap = vbox_gem_prime_vmap,
+	.gem_prime_vunmap = vbox_gem_prime_vunmap,
+	.gem_prime_mmap = vbox_gem_prime_mmap,
+};
+
+static int __init vbox_init(void)
+{
+#ifdef CONFIG_VGA_CONSOLE
+	if (vgacon_text_force() && vbox_modeset == -1)
+		return -EINVAL;
+#endif
+
+	if (vbox_modeset == 0)
+		return -EINVAL;
+
+	return drm_pci_init(&driver, &vbox_pci_driver);
+}
+
+static void __exit vbox_exit(void)
+{
+	drm_pci_exit(&driver, &vbox_pci_driver);
+}
+
+module_init(vbox_init);
+module_exit(vbox_exit);
+
+MODULE_AUTHOR("Oracle Corporation");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/staging/vboxvideo/vbox_drv.h b/drivers/staging/vboxvideo/vbox_drv.h
new file mode 100644
index 0000000000000..4b9302703b362
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_drv.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2013-2017 Oracle Corporation
+ * This file is based on ast_drv.h
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Michael Thayer <michael.thayer@oracle.com,
+ *          Hans de Goede <hdegoede@redhat.com>
+ */
+#ifndef __VBOX_DRV_H__
+#define __VBOX_DRV_H__
+
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/version.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem.h>
+
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_memory.h>
+#include <drm/ttm/ttm_module.h>
+
+#include "vboxvideo_guest.h"
+#include "vboxvideo_vbe.h"
+#include "hgsmi_ch_setup.h"
+
+#define DRIVER_NAME         "vboxvideo"
+#define DRIVER_DESC         "Oracle VM VirtualBox Graphics Card"
+#define DRIVER_DATE         "20130823"
+
+#define DRIVER_MAJOR        1
+#define DRIVER_MINOR        0
+#define DRIVER_PATCHLEVEL   0
+
+#define VBOX_MAX_CURSOR_WIDTH  64
+#define VBOX_MAX_CURSOR_HEIGHT 64
+#define CURSOR_PIXEL_COUNT (VBOX_MAX_CURSOR_WIDTH * VBOX_MAX_CURSOR_HEIGHT)
+#define CURSOR_DATA_SIZE (CURSOR_PIXEL_COUNT * 4 + CURSOR_PIXEL_COUNT / 8)
+
+#define VBOX_MAX_SCREENS  32
+
+#define GUEST_HEAP_OFFSET(vbox) ((vbox)->full_vram_size - \
+				 VBVA_ADAPTER_INFORMATION_SIZE)
+#define GUEST_HEAP_SIZE   VBVA_ADAPTER_INFORMATION_SIZE
+#define GUEST_HEAP_USABLE_SIZE (VBVA_ADAPTER_INFORMATION_SIZE - \
+				sizeof(struct hgsmi_host_flags))
+#define HOST_FLAGS_OFFSET GUEST_HEAP_USABLE_SIZE
+
+struct vbox_fbdev;
+
+struct vbox_private {
+	struct drm_device *dev;
+
+	u8 __iomem *guest_heap;
+	u8 __iomem *vbva_buffers;
+	struct gen_pool *guest_pool;
+	struct vbva_buf_ctx *vbva_info;
+	bool any_pitch;
+	u32 num_crtcs;
+	/** Amount of available VRAM, including space used for buffers. */
+	u32 full_vram_size;
+	/** Amount of available VRAM, not including space used for buffers. */
+	u32 available_vram_size;
+	/** Array of structures for receiving mode hints. */
+	struct vbva_modehint *last_mode_hints;
+
+	struct vbox_fbdev *fbdev;
+
+	int fb_mtrr;
+
+	struct {
+		struct drm_global_reference mem_global_ref;
+		struct ttm_bo_global_ref bo_global_ref;
+		struct ttm_bo_device bdev;
+	} ttm;
+
+	struct mutex hw_mutex; /* protects modeset and accel/vbva accesses */
+	/**
+	 * We decide whether or not user-space supports display hot-plug
+	 * depending on whether they react to a hot-plug event after the initial
+	 * mode query.
+	 */
+	bool initial_mode_queried;
+	struct work_struct hotplug_work;
+	u32 input_mapping_width;
+	u32 input_mapping_height;
+	/**
+	 * Is user-space using an X.Org-style layout of one large frame-buffer
+	 * encompassing all screen ones or is the fbdev console active?
+	 */
+	bool single_framebuffer;
+	u32 cursor_width;
+	u32 cursor_height;
+	u32 cursor_hot_x;
+	u32 cursor_hot_y;
+	size_t cursor_data_size;
+	u8 cursor_data[CURSOR_DATA_SIZE];
+};
+
+#undef CURSOR_PIXEL_COUNT
+#undef CURSOR_DATA_SIZE
+
+int vbox_driver_load(struct drm_device *dev, unsigned long flags);
+void vbox_driver_unload(struct drm_device *dev);
+void vbox_driver_lastclose(struct drm_device *dev);
+
+struct vbox_gem_object;
+
+struct vbox_connector {
+	struct drm_connector base;
+	char name[32];
+	struct vbox_crtc *vbox_crtc;
+	struct {
+		u16 width;
+		u16 height;
+		bool disconnected;
+	} mode_hint;
+};
+
+struct vbox_crtc {
+	struct drm_crtc base;
+	bool blanked;
+	bool disconnected;
+	unsigned int crtc_id;
+	u32 fb_offset;
+	bool cursor_enabled;
+	u16 x_hint;
+	u16 y_hint;
+};
+
+struct vbox_encoder {
+	struct drm_encoder base;
+};
+
+struct vbox_framebuffer {
+	struct drm_framebuffer base;
+	struct drm_gem_object *obj;
+};
+
+struct vbox_fbdev {
+	struct drm_fb_helper helper;
+	struct vbox_framebuffer afb;
+	int size;
+	struct ttm_bo_kmap_obj mapping;
+	int x1, y1, x2, y2;	/* dirty rect */
+	spinlock_t dirty_lock;
+};
+
+#define to_vbox_crtc(x) container_of(x, struct vbox_crtc, base)
+#define to_vbox_connector(x) container_of(x, struct vbox_connector, base)
+#define to_vbox_encoder(x) container_of(x, struct vbox_encoder, base)
+#define to_vbox_framebuffer(x) container_of(x, struct vbox_framebuffer, base)
+
+int vbox_mode_init(struct drm_device *dev);
+void vbox_mode_fini(struct drm_device *dev);
+
+#define DRM_MODE_FB_CMD drm_mode_fb_cmd2
+#define CRTC_FB(crtc) ((crtc)->primary->fb)
+
+void vbox_enable_accel(struct vbox_private *vbox);
+void vbox_disable_accel(struct vbox_private *vbox);
+void vbox_report_caps(struct vbox_private *vbox);
+
+void vbox_framebuffer_dirty_rectangles(struct drm_framebuffer *fb,
+				       struct drm_clip_rect *rects,
+				       unsigned int num_rects);
+
+int vbox_framebuffer_init(struct drm_device *dev,
+			  struct vbox_framebuffer *vbox_fb,
+			  const struct DRM_MODE_FB_CMD *mode_cmd,
+			  struct drm_gem_object *obj);
+
+int vbox_fbdev_init(struct drm_device *dev);
+void vbox_fbdev_fini(struct drm_device *dev);
+void vbox_fbdev_set_base(struct vbox_private *vbox, unsigned long gpu_addr);
+
+struct vbox_bo {
+	struct ttm_buffer_object bo;
+	struct ttm_placement placement;
+	struct ttm_bo_kmap_obj kmap;
+	struct drm_gem_object gem;
+	struct ttm_place placements[3];
+	int pin_count;
+};
+
+#define gem_to_vbox_bo(gobj) container_of((gobj), struct vbox_bo, gem)
+
+static inline struct vbox_bo *vbox_bo(struct ttm_buffer_object *bo)
+{
+	return container_of(bo, struct vbox_bo, bo);
+}
+
+#define to_vbox_obj(x) container_of(x, struct vbox_gem_object, base)
+
+int vbox_dumb_create(struct drm_file *file,
+		     struct drm_device *dev,
+		     struct drm_mode_create_dumb *args);
+
+void vbox_gem_free_object(struct drm_gem_object *obj);
+int vbox_dumb_mmap_offset(struct drm_file *file,
+			  struct drm_device *dev,
+			  u32 handle, u64 *offset);
+
+#define DRM_FILE_PAGE_OFFSET (0x10000000ULL >> PAGE_SHIFT)
+
+int vbox_mm_init(struct vbox_private *vbox);
+void vbox_mm_fini(struct vbox_private *vbox);
+
+int vbox_bo_create(struct drm_device *dev, int size, int align,
+		   u32 flags, struct vbox_bo **pvboxbo);
+
+int vbox_gem_create(struct drm_device *dev,
+		    u32 size, bool iskernel, struct drm_gem_object **obj);
+
+int vbox_bo_pin(struct vbox_bo *bo, u32 pl_flag, u64 *gpu_addr);
+int vbox_bo_unpin(struct vbox_bo *bo);
+
+static inline int vbox_bo_reserve(struct vbox_bo *bo, bool no_wait)
+{
+	int ret;
+
+	ret = ttm_bo_reserve(&bo->bo, true, no_wait, NULL);
+	if (ret) {
+		if (ret != -ERESTARTSYS && ret != -EBUSY)
+			DRM_ERROR("reserve failed %p\n", bo);
+		return ret;
+	}
+	return 0;
+}
+
+static inline void vbox_bo_unreserve(struct vbox_bo *bo)
+{
+	ttm_bo_unreserve(&bo->bo);
+}
+
+void vbox_ttm_placement(struct vbox_bo *bo, int domain);
+int vbox_bo_push_sysram(struct vbox_bo *bo);
+int vbox_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/* vbox_prime.c */
+int vbox_gem_prime_pin(struct drm_gem_object *obj);
+void vbox_gem_prime_unpin(struct drm_gem_object *obj);
+struct sg_table *vbox_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *vbox_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *table);
+void *vbox_gem_prime_vmap(struct drm_gem_object *obj);
+void vbox_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int vbox_gem_prime_mmap(struct drm_gem_object *obj,
+			struct vm_area_struct *area);
+
+/* vbox_irq.c */
+int vbox_irq_init(struct vbox_private *vbox);
+void vbox_irq_fini(struct vbox_private *vbox);
+void vbox_report_hotplug(struct vbox_private *vbox);
+irqreturn_t vbox_irq_handler(int irq, void *arg);
+
+/* vbox_hgsmi.c */
+void *hgsmi_buffer_alloc(struct gen_pool *guest_pool, size_t size,
+			 u8 channel, u16 channel_info);
+void hgsmi_buffer_free(struct gen_pool *guest_pool, void *buf);
+int hgsmi_buffer_submit(struct gen_pool *guest_pool, void *buf);
+
+static inline void vbox_write_ioport(u16 index, u16 data)
+{
+	outw(index, VBE_DISPI_IOPORT_INDEX);
+	outw(data, VBE_DISPI_IOPORT_DATA);
+}
+
+#endif
diff --git a/drivers/staging/vboxvideo/vbox_err.h b/drivers/staging/vboxvideo/vbox_err.h
new file mode 100644
index 0000000000000..562db8630eb0e
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_err.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __VBOX_ERR_H__
+#define __VBOX_ERR_H__
+
+/**
+ * @name VirtualBox virtual-hardware error macros
+ * @{
+ */
+
+#define VINF_SUCCESS                        0
+#define VERR_INVALID_PARAMETER              (-2)
+#define VERR_INVALID_POINTER                (-6)
+#define VERR_NO_MEMORY                      (-8)
+#define VERR_NOT_IMPLEMENTED                (-12)
+#define VERR_INVALID_FUNCTION               (-36)
+#define VERR_NOT_SUPPORTED                  (-37)
+#define VERR_TOO_MUCH_DATA                  (-42)
+#define VERR_INVALID_STATE                  (-79)
+#define VERR_OUT_OF_RESOURCES               (-80)
+#define VERR_ALREADY_EXISTS                 (-105)
+#define VERR_INTERNAL_ERROR                 (-225)
+
+#define RT_SUCCESS_NP(rc)   ((int)(rc) >= VINF_SUCCESS)
+#define RT_SUCCESS(rc)      (likely(RT_SUCCESS_NP(rc)))
+#define RT_FAILURE(rc)      (unlikely(!RT_SUCCESS_NP(rc)))
+
+/** @}  */
+
+#endif
diff --git a/drivers/staging/vboxvideo/vbox_fb.c b/drivers/staging/vboxvideo/vbox_fb.c
new file mode 100644
index 0000000000000..35f6d9f8c2038
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_fb.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright (C) 2013-2017 Oracle Corporation
+ * This file is based on ast_fb.c
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Michael Thayer <michael.thayer@oracle.com,
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/sysrq.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "vbox_drv.h"
+#include "vboxvideo.h"
+
+#define VBOX_DIRTY_DELAY (HZ / 30)
+/**
+ * Tell the host about dirty rectangles to update.
+ */
+static void vbox_dirty_update(struct vbox_fbdev *fbdev,
+			      int x, int y, int width, int height)
+{
+	struct drm_gem_object *obj;
+	struct vbox_bo *bo;
+	int ret = -EBUSY;
+	bool store_for_later = false;
+	int x2, y2;
+	unsigned long flags;
+	struct drm_clip_rect rect;
+
+	obj = fbdev->afb.obj;
+	bo = gem_to_vbox_bo(obj);
+
+	/*
+	 * try and reserve the BO, if we fail with busy
+	 * then the BO is being moved and we should
+	 * store up the damage until later.
+	 */
+	if (drm_can_sleep())
+		ret = vbox_bo_reserve(bo, true);
+	if (ret) {
+		if (ret != -EBUSY)
+			return;
+
+		store_for_later = true;
+	}
+
+	x2 = x + width - 1;
+	y2 = y + height - 1;
+	spin_lock_irqsave(&fbdev->dirty_lock, flags);
+
+	if (fbdev->y1 < y)
+		y = fbdev->y1;
+	if (fbdev->y2 > y2)
+		y2 = fbdev->y2;
+	if (fbdev->x1 < x)
+		x = fbdev->x1;
+	if (fbdev->x2 > x2)
+		x2 = fbdev->x2;
+
+	if (store_for_later) {
+		fbdev->x1 = x;
+		fbdev->x2 = x2;
+		fbdev->y1 = y;
+		fbdev->y2 = y2;
+		spin_unlock_irqrestore(&fbdev->dirty_lock, flags);
+		return;
+	}
+
+	fbdev->x1 = INT_MAX;
+	fbdev->y1 = INT_MAX;
+	fbdev->x2 = 0;
+	fbdev->y2 = 0;
+
+	spin_unlock_irqrestore(&fbdev->dirty_lock, flags);
+
+	/*
+	 * Not sure why the original code subtracted 1 here, but I will keep
+	 * it that way to avoid unnecessary differences.
+	 */
+	rect.x1 = x;
+	rect.x2 = x2 + 1;
+	rect.y1 = y;
+	rect.y2 = y2 + 1;
+	vbox_framebuffer_dirty_rectangles(&fbdev->afb.base, &rect, 1);
+
+	vbox_bo_unreserve(bo);
+}
+
+#ifdef CONFIG_FB_DEFERRED_IO
+static void vbox_deferred_io(struct fb_info *info, struct list_head *pagelist)
+{
+	struct vbox_fbdev *fbdev = info->par;
+	unsigned long start, end, min, max;
+	struct page *page;
+	int y1, y2;
+
+	min = ULONG_MAX;
+	max = 0;
+	list_for_each_entry(page, pagelist, lru) {
+		start = page->index << PAGE_SHIFT;
+		end = start + PAGE_SIZE - 1;
+		min = min(min, start);
+		max = max(max, end);
+	}
+
+	if (min < max) {
+		y1 = min / info->fix.line_length;
+		y2 = (max / info->fix.line_length) + 1;
+		DRM_INFO("%s: Calling dirty update: 0, %d, %d, %d\n",
+			 __func__, y1, info->var.xres, y2 - y1 - 1);
+		vbox_dirty_update(fbdev, 0, y1, info->var.xres, y2 - y1 - 1);
+	}
+}
+
+static struct fb_deferred_io vbox_defio = {
+	.delay = VBOX_DIRTY_DELAY,
+	.deferred_io = vbox_deferred_io,
+};
+#endif
+
+static void vbox_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+	struct vbox_fbdev *fbdev = info->par;
+
+	sys_fillrect(info, rect);
+	vbox_dirty_update(fbdev, rect->dx, rect->dy, rect->width, rect->height);
+}
+
+static void vbox_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+{
+	struct vbox_fbdev *fbdev = info->par;
+
+	sys_copyarea(info, area);
+	vbox_dirty_update(fbdev, area->dx, area->dy, area->width, area->height);
+}
+
+static void vbox_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+	struct vbox_fbdev *fbdev = info->par;
+
+	sys_imageblit(info, image);
+	vbox_dirty_update(fbdev, image->dx, image->dy, image->width,
+			  image->height);
+}
+
+static struct fb_ops vboxfb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_fillrect = vbox_fillrect,
+	.fb_copyarea = vbox_copyarea,
+	.fb_imageblit = vbox_imageblit,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
+	.fb_setcmap = drm_fb_helper_setcmap,
+	.fb_debug_enter = drm_fb_helper_debug_enter,
+	.fb_debug_leave = drm_fb_helper_debug_leave,
+};
+
+static int vboxfb_create_object(struct vbox_fbdev *fbdev,
+				struct DRM_MODE_FB_CMD *mode_cmd,
+				struct drm_gem_object **gobj_p)
+{
+	struct drm_device *dev = fbdev->helper.dev;
+	u32 size;
+	struct drm_gem_object *gobj;
+	u32 pitch = mode_cmd->pitches[0];
+	int ret;
+
+	size = pitch * mode_cmd->height;
+	ret = vbox_gem_create(dev, size, true, &gobj);
+	if (ret)
+		return ret;
+
+	*gobj_p = gobj;
+
+	return 0;
+}
+
+static int vboxfb_create(struct drm_fb_helper *helper,
+			 struct drm_fb_helper_surface_size *sizes)
+{
+	struct vbox_fbdev *fbdev =
+	    container_of(helper, struct vbox_fbdev, helper);
+	struct drm_device *dev = fbdev->helper.dev;
+	struct DRM_MODE_FB_CMD mode_cmd;
+	struct drm_framebuffer *fb;
+	struct fb_info *info;
+	struct device *device = &dev->pdev->dev;
+	struct drm_gem_object *gobj;
+	struct vbox_bo *bo;
+	int size, ret;
+	u32 pitch;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	pitch = mode_cmd.width * ((sizes->surface_bpp + 7) / 8);
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+							  sizes->surface_depth);
+	mode_cmd.pitches[0] = pitch;
+
+	size = pitch * mode_cmd.height;
+
+	ret = vboxfb_create_object(fbdev, &mode_cmd, &gobj);
+	if (ret) {
+		DRM_ERROR("failed to create fbcon backing object %d\n", ret);
+		return ret;
+	}
+
+	ret = vbox_framebuffer_init(dev, &fbdev->afb, &mode_cmd, gobj);
+	if (ret)
+		return ret;
+
+	bo = gem_to_vbox_bo(gobj);
+
+	ret = vbox_bo_reserve(bo, false);
+	if (ret)
+		return ret;
+
+	ret = vbox_bo_pin(bo, TTM_PL_FLAG_VRAM, NULL);
+	if (ret) {
+		vbox_bo_unreserve(bo);
+		return ret;
+	}
+
+	ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
+	vbox_bo_unreserve(bo);
+	if (ret) {
+		DRM_ERROR("failed to kmap fbcon\n");
+		return ret;
+	}
+
+	info = framebuffer_alloc(0, device);
+	if (!info)
+		return -ENOMEM;
+	info->par = fbdev;
+
+	fbdev->size = size;
+
+	fb = &fbdev->afb.base;
+	fbdev->helper.fb = fb;
+	fbdev->helper.fbdev = info;
+
+	strcpy(info->fix.id, "vboxdrmfb");
+
+	/*
+	 * The last flag forces a mode set on VT switches even if the kernel
+	 * does not think it is needed.
+	 */
+	info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT |
+		      FBINFO_MISC_ALWAYS_SETPAR;
+	info->fbops = &vboxfb_ops;
+
+	ret = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (ret)
+		return -ENOMEM;
+
+	/*
+	 * This seems to be done for safety checking that the framebuffer
+	 * is not registered twice by different drivers.
+	 */
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures)
+		return -ENOMEM;
+	info->apertures->ranges[0].base = pci_resource_start(dev->pdev, 0);
+	info->apertures->ranges[0].size = pci_resource_len(dev->pdev, 0);
+
+	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth);
+	drm_fb_helper_fill_var(info, &fbdev->helper, sizes->fb_width,
+			       sizes->fb_height);
+
+	info->screen_base = bo->kmap.virtual;
+	info->screen_size = size;
+
+#ifdef CONFIG_FB_DEFERRED_IO
+	info->fbdefio = &vbox_defio;
+	fb_deferred_io_init(info);
+#endif
+
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+
+	DRM_DEBUG_KMS("allocated %dx%d\n", fb->width, fb->height);
+
+	return 0;
+}
+
+static void vbox_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
+			      u16 blue, int regno)
+{
+}
+
+static void vbox_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
+			      u16 *blue, int regno)
+{
+	*red = regno;
+	*green = regno;
+	*blue = regno;
+}
+
+static struct drm_fb_helper_funcs vbox_fb_helper_funcs = {
+	.gamma_set = vbox_fb_gamma_set,
+	.gamma_get = vbox_fb_gamma_get,
+	.fb_probe = vboxfb_create,
+};
+
+void vbox_fbdev_fini(struct drm_device *dev)
+{
+	struct vbox_private *vbox = dev->dev_private;
+	struct vbox_fbdev *fbdev = vbox->fbdev;
+	struct vbox_framebuffer *afb = &fbdev->afb;
+
+	drm_fb_helper_unregister_fbi(&fbdev->helper);
+
+	if (afb->obj) {
+		struct vbox_bo *bo = gem_to_vbox_bo(afb->obj);
+
+		if (!vbox_bo_reserve(bo, false)) {
+			if (bo->kmap.virtual)
+				ttm_bo_kunmap(&bo->kmap);
+			/*
+			 * QXL does this, but is it really needed before
+			 * freeing?
+			 */
+			if (bo->pin_count)
+				vbox_bo_unpin(bo);
+			vbox_bo_unreserve(bo);
+		}
+		drm_gem_object_unreference_unlocked(afb->obj);
+		afb->obj = NULL;
+	}
+	drm_fb_helper_fini(&fbdev->helper);
+
+	drm_framebuffer_unregister_private(&afb->base);
+	drm_framebuffer_cleanup(&afb->base);
+}
+
+int vbox_fbdev_init(struct drm_device *dev)
+{
+	struct vbox_private *vbox = dev->dev_private;
+	struct vbox_fbdev *fbdev;
+	int ret;
+
+	fbdev = devm_kzalloc(dev->dev, sizeof(*fbdev), GFP_KERNEL);
+	if (!fbdev)
+		return -ENOMEM;
+
+	vbox->fbdev = fbdev;
+	spin_lock_init(&fbdev->dirty_lock);
+
+	drm_fb_helper_prepare(dev, &fbdev->helper, &vbox_fb_helper_funcs);
+	ret = drm_fb_helper_init(dev, &fbdev->helper, vbox->num_crtcs);
+	if (ret)
+		return ret;
+
+	ret = drm_fb_helper_single_add_all_connectors(&fbdev->helper);
+	if (ret)
+		goto err_fini;
+
+	/* disable all the possible outputs/crtcs before entering KMS mode */
+	drm_helper_disable_unused_functions(dev);
+
+	ret = drm_fb_helper_initial_config(&fbdev->helper, 32);
+	if (ret)
+		goto err_fini;
+
+	return 0;
+
+err_fini:
+	drm_fb_helper_fini(&fbdev->helper);
+	return ret;
+}
+
+void vbox_fbdev_set_base(struct vbox_private *vbox, unsigned long gpu_addr)
+{
+	struct fb_info *fbdev = vbox->fbdev->helper.fbdev;
+
+	fbdev->fix.smem_start = fbdev->apertures->ranges[0].base + gpu_addr;
+	fbdev->fix.smem_len = vbox->available_vram_size - gpu_addr;
+}
diff --git a/drivers/staging/vboxvideo/vbox_hgsmi.c b/drivers/staging/vboxvideo/vbox_hgsmi.c
new file mode 100644
index 0000000000000..822fd31121cbc
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_hgsmi.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Hans de Goede <hdegoede@redhat.com>
+ */
+
+#include "vbox_drv.h"
+#include "vboxvideo_vbe.h"
+#include "hgsmi_defs.h"
+
+/* One-at-a-Time Hash from http://www.burtleburtle.net/bob/hash/doobs.html */
+static u32 hgsmi_hash_process(u32 hash, const u8 *data, int size)
+{
+	while (size--) {
+		hash += *data++;
+		hash += (hash << 10);
+		hash ^= (hash >> 6);
+	}
+
+	return hash;
+}
+
+static u32 hgsmi_hash_end(u32 hash)
+{
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+	hash += (hash << 15);
+
+	return hash;
+}
+
+/* Not really a checksum but that is the naming used in all vbox code */
+static u32 hgsmi_checksum(u32 offset,
+			  const struct hgsmi_buffer_header *header,
+			  const struct hgsmi_buffer_tail *tail)
+{
+	u32 checksum;
+
+	checksum = hgsmi_hash_process(0, (u8 *)&offset, sizeof(offset));
+	checksum = hgsmi_hash_process(checksum, (u8 *)header, sizeof(*header));
+	/* 4 -> Do not checksum the checksum itself */
+	checksum = hgsmi_hash_process(checksum, (u8 *)tail, 4);
+
+	return hgsmi_hash_end(checksum);
+}
+
+void *hgsmi_buffer_alloc(struct gen_pool *guest_pool, size_t size,
+			 u8 channel, u16 channel_info)
+{
+	struct hgsmi_buffer_header *h;
+	struct hgsmi_buffer_tail *t;
+	size_t total_size;
+	dma_addr_t offset;
+
+	total_size = size + sizeof(*h) + sizeof(*t);
+	h = gen_pool_dma_alloc(guest_pool, total_size, &offset);
+	if (!h)
+		return NULL;
+
+	t = (struct hgsmi_buffer_tail *)((u8 *)h + sizeof(*h) + size);
+
+	h->flags = HGSMI_BUFFER_HEADER_F_SEQ_SINGLE;
+	h->data_size = size;
+	h->channel = channel;
+	h->channel_info = channel_info;
+	memset(&h->u.header_data, 0, sizeof(h->u.header_data));
+
+	t->reserved = 0;
+	t->checksum = hgsmi_checksum(offset, h, t);
+
+	return (u8 *)h + sizeof(*h);
+}
+
+void hgsmi_buffer_free(struct gen_pool *guest_pool, void *buf)
+{
+	struct hgsmi_buffer_header *h =
+		(struct hgsmi_buffer_header *)((u8 *)buf - sizeof(*h));
+	size_t total_size = h->data_size + sizeof(*h) +
+					     sizeof(struct hgsmi_buffer_tail);
+
+	gen_pool_free(guest_pool, (unsigned long)h, total_size);
+}
+
+int hgsmi_buffer_submit(struct gen_pool *guest_pool, void *buf)
+{
+	phys_addr_t offset;
+
+	offset = gen_pool_virt_to_phys(guest_pool, (unsigned long)buf -
+				       sizeof(struct hgsmi_buffer_header));
+	outl(offset, VGA_PORT_HGSMI_GUEST);
+	/* Make the compiler aware that the host has changed memory. */
+	mb();
+
+	return 0;
+}
diff --git a/drivers/staging/vboxvideo/vbox_irq.c b/drivers/staging/vboxvideo/vbox_irq.c
new file mode 100644
index 0000000000000..3ca8bec62ac41
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_irq.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2016-2017 Oracle Corporation
+ * This file is based on qxl_irq.c
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alon Levy
+ *          Michael Thayer <michael.thayer@oracle.com,
+ *          Hans de Goede <hdegoede@redhat.com>
+ */
+
+#include <drm/drm_crtc_helper.h>
+
+#include "vbox_drv.h"
+#include "vboxvideo.h"
+
+static void vbox_clear_irq(void)
+{
+	outl((u32)~0, VGA_PORT_HGSMI_HOST);
+}
+
+static u32 vbox_get_flags(struct vbox_private *vbox)
+{
+	return readl(vbox->guest_heap + HOST_FLAGS_OFFSET);
+}
+
+void vbox_report_hotplug(struct vbox_private *vbox)
+{
+	schedule_work(&vbox->hotplug_work);
+}
+
+irqreturn_t vbox_irq_handler(int irq, void *arg)
+{
+	struct drm_device *dev = (struct drm_device *)arg;
+	struct vbox_private *vbox = (struct vbox_private *)dev->dev_private;
+	u32 host_flags = vbox_get_flags(vbox);
+
+	if (!(host_flags & HGSMIHOSTFLAGS_IRQ))
+		return IRQ_NONE;
+
+	/*
+	 * Due to a bug in the initial host implementation of hot-plug irqs,
+	 * the hot-plug and cursor capability flags were never cleared.
+	 * Fortunately we can tell when they would have been set by checking
+	 * that the VSYNC flag is not set.
+	 */
+	if (host_flags &
+	    (HGSMIHOSTFLAGS_HOTPLUG | HGSMIHOSTFLAGS_CURSOR_CAPABILITIES) &&
+	    !(host_flags & HGSMIHOSTFLAGS_VSYNC))
+		vbox_report_hotplug(vbox);
+
+	vbox_clear_irq();
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * Check that the position hints provided by the host are suitable for GNOME
+ * shell (i.e. all screens disjoint and hints for all enabled screens) and if
+ * not replace them with default ones.  Providing valid hints improves the
+ * chances that we will get a known screen layout for pointer mapping.
+ */
+static void validate_or_set_position_hints(struct vbox_private *vbox)
+{
+	struct vbva_modehint *hintsi, *hintsj;
+	bool valid = true;
+	u16 currentx = 0;
+	int i, j;
+
+	for (i = 0; i < vbox->num_crtcs; ++i) {
+		for (j = 0; j < i; ++j) {
+			hintsi = &vbox->last_mode_hints[i];
+			hintsj = &vbox->last_mode_hints[j];
+
+			if (hintsi->enabled && hintsj->enabled) {
+				if (hintsi->dx >= 0xffff ||
+				    hintsi->dy >= 0xffff ||
+				    hintsj->dx >= 0xffff ||
+				    hintsj->dy >= 0xffff ||
+				    (hintsi->dx <
+					hintsj->dx + (hintsj->cx & 0x8fff) &&
+				     hintsi->dx + (hintsi->cx & 0x8fff) >
+					hintsj->dx) ||
+				    (hintsi->dy <
+					hintsj->dy + (hintsj->cy & 0x8fff) &&
+				     hintsi->dy + (hintsi->cy & 0x8fff) >
+					hintsj->dy))
+					valid = false;
+			}
+		}
+	}
+	if (!valid)
+		for (i = 0; i < vbox->num_crtcs; ++i) {
+			if (vbox->last_mode_hints[i].enabled) {
+				vbox->last_mode_hints[i].dx = currentx;
+				vbox->last_mode_hints[i].dy = 0;
+				currentx +=
+				    vbox->last_mode_hints[i].cx & 0x8fff;
+			}
+		}
+}
+
+/**
+ * Query the host for the most recent video mode hints.
+ */
+static void vbox_update_mode_hints(struct vbox_private *vbox)
+{
+	struct drm_device *dev = vbox->dev;
+	struct drm_connector *connector;
+	struct vbox_connector *vbox_conn;
+	struct vbva_modehint *hints;
+	u16 flags;
+	bool disconnected;
+	unsigned int crtc_id;
+	int ret;
+
+	ret = hgsmi_get_mode_hints(vbox->guest_pool, vbox->num_crtcs,
+				   vbox->last_mode_hints);
+	if (ret) {
+		DRM_ERROR("vboxvideo: hgsmi_get_mode_hints failed: %d\n", ret);
+		return;
+	}
+
+	validate_or_set_position_hints(vbox);
+	drm_modeset_lock_all(dev);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		vbox_conn = to_vbox_connector(connector);
+
+		hints = &vbox->last_mode_hints[vbox_conn->vbox_crtc->crtc_id];
+		if (hints->magic != VBVAMODEHINT_MAGIC)
+			continue;
+
+		disconnected = !(hints->enabled);
+		crtc_id = vbox_conn->vbox_crtc->crtc_id;
+		vbox_conn->mode_hint.width = hints->cx & 0x8fff;
+		vbox_conn->mode_hint.height = hints->cy & 0x8fff;
+		vbox_conn->vbox_crtc->x_hint = hints->dx;
+		vbox_conn->vbox_crtc->y_hint = hints->dy;
+		vbox_conn->mode_hint.disconnected = disconnected;
+
+		if (vbox_conn->vbox_crtc->disconnected == disconnected)
+			continue;
+
+		if (disconnected)
+			flags = VBVA_SCREEN_F_ACTIVE | VBVA_SCREEN_F_DISABLED;
+		else
+			flags = VBVA_SCREEN_F_ACTIVE | VBVA_SCREEN_F_BLANK;
+
+		hgsmi_process_display_info(vbox->guest_pool, crtc_id, 0, 0, 0,
+					   hints->cx * 4, hints->cx,
+					   hints->cy, 0, flags);
+
+		vbox_conn->vbox_crtc->disconnected = disconnected;
+	}
+	drm_modeset_unlock_all(dev);
+}
+
+static void vbox_hotplug_worker(struct work_struct *work)
+{
+	struct vbox_private *vbox = container_of(work, struct vbox_private,
+						 hotplug_work);
+
+	vbox_update_mode_hints(vbox);
+	drm_kms_helper_hotplug_event(vbox->dev);
+}
+
+int vbox_irq_init(struct vbox_private *vbox)
+{
+	INIT_WORK(&vbox->hotplug_work, vbox_hotplug_worker);
+	vbox_update_mode_hints(vbox);
+
+	return drm_irq_install(vbox->dev, vbox->dev->pdev->irq);
+}
+
+void vbox_irq_fini(struct vbox_private *vbox)
+{
+	drm_irq_uninstall(vbox->dev);
+	flush_work(&vbox->hotplug_work);
+}
diff --git a/drivers/staging/vboxvideo/vbox_main.c b/drivers/staging/vboxvideo/vbox_main.c
new file mode 100644
index 0000000000000..d0c6ec75a3c71
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_main.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (C) 2013-2017 Oracle Corporation
+ * This file is based on ast_main.c
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>,
+ *          Michael Thayer <michael.thayer@oracle.com,
+ *          Hans de Goede <hdegoede@redhat.com>
+ */
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "vbox_drv.h"
+#include "vbox_err.h"
+#include "vboxvideo_guest.h"
+#include "vboxvideo_vbe.h"
+
+static void vbox_user_framebuffer_destroy(struct drm_framebuffer *fb)
+{
+	struct vbox_framebuffer *vbox_fb = to_vbox_framebuffer(fb);
+
+	if (vbox_fb->obj)
+		drm_gem_object_unreference_unlocked(vbox_fb->obj);
+
+	drm_framebuffer_cleanup(fb);
+	kfree(fb);
+}
+
+void vbox_enable_accel(struct vbox_private *vbox)
+{
+	unsigned int i;
+	struct vbva_buffer *vbva;
+
+	if (!vbox->vbva_info || !vbox->vbva_buffers) {
+		/* Should never happen... */
+		DRM_ERROR("vboxvideo: failed to set up VBVA.\n");
+		return;
+	}
+
+	for (i = 0; i < vbox->num_crtcs; ++i) {
+		if (vbox->vbva_info[i].vbva)
+			continue;
+
+		vbva = (void *)vbox->vbva_buffers + i * VBVA_MIN_BUFFER_SIZE;
+		if (!vbva_enable(&vbox->vbva_info[i],
+				 vbox->guest_pool, vbva, i)) {
+			/* very old host or driver error. */
+			DRM_ERROR("vboxvideo: vbva_enable failed\n");
+			return;
+		}
+	}
+}
+
+void vbox_disable_accel(struct vbox_private *vbox)
+{
+	unsigned int i;
+
+	for (i = 0; i < vbox->num_crtcs; ++i)
+		vbva_disable(&vbox->vbva_info[i], vbox->guest_pool, i);
+}
+
+void vbox_report_caps(struct vbox_private *vbox)
+{
+	u32 caps = VBVACAPS_DISABLE_CURSOR_INTEGRATION |
+		   VBVACAPS_IRQ | VBVACAPS_USE_VBVA_ONLY;
+
+	if (vbox->initial_mode_queried)
+		caps |= VBVACAPS_VIDEO_MODE_HINTS;
+
+	hgsmi_send_caps_info(vbox->guest_pool, caps);
+}
+
+/**
+ * Send information about dirty rectangles to VBVA.  If necessary we enable
+ * VBVA first, as this is normally disabled after a change of master in case
+ * the new master does not send dirty rectangle information (is this even
+ * allowed?)
+ */
+void vbox_framebuffer_dirty_rectangles(struct drm_framebuffer *fb,
+				       struct drm_clip_rect *rects,
+				       unsigned int num_rects)
+{
+	struct vbox_private *vbox = fb->dev->dev_private;
+	struct drm_crtc *crtc;
+	unsigned int i;
+
+	mutex_lock(&vbox->hw_mutex);
+	list_for_each_entry(crtc, &fb->dev->mode_config.crtc_list, head) {
+		if (CRTC_FB(crtc) != fb)
+			continue;
+
+		vbox_enable_accel(vbox);
+
+		for (i = 0; i < num_rects; ++i) {
+			struct vbva_cmd_hdr cmd_hdr;
+			unsigned int crtc_id = to_vbox_crtc(crtc)->crtc_id;
+
+			if ((rects[i].x1 > crtc->x + crtc->hwmode.hdisplay) ||
+			    (rects[i].y1 > crtc->y + crtc->hwmode.vdisplay) ||
+			    (rects[i].x2 < crtc->x) ||
+			    (rects[i].y2 < crtc->y))
+				continue;
+
+			cmd_hdr.x = (s16)rects[i].x1;
+			cmd_hdr.y = (s16)rects[i].y1;
+			cmd_hdr.w = (u16)rects[i].x2 - rects[i].x1;
+			cmd_hdr.h = (u16)rects[i].y2 - rects[i].y1;
+
+			if (!vbva_buffer_begin_update(&vbox->vbva_info[crtc_id],
+						      vbox->guest_pool))
+				continue;
+
+			vbva_write(&vbox->vbva_info[crtc_id], vbox->guest_pool,
+				   &cmd_hdr, sizeof(cmd_hdr));
+			vbva_buffer_end_update(&vbox->vbva_info[crtc_id]);
+		}
+	}
+	mutex_unlock(&vbox->hw_mutex);
+}
+
+static int vbox_user_framebuffer_dirty(struct drm_framebuffer *fb,
+				       struct drm_file *file_priv,
+				       unsigned int flags, unsigned int color,
+				       struct drm_clip_rect *rects,
+				       unsigned int num_rects)
+{
+	vbox_framebuffer_dirty_rectangles(fb, rects, num_rects);
+
+	return 0;
+}
+
+static const struct drm_framebuffer_funcs vbox_fb_funcs = {
+	.destroy = vbox_user_framebuffer_destroy,
+	.dirty = vbox_user_framebuffer_dirty,
+};
+
+int vbox_framebuffer_init(struct drm_device *dev,
+			  struct vbox_framebuffer *vbox_fb,
+			  const struct DRM_MODE_FB_CMD *mode_cmd,
+			  struct drm_gem_object *obj)
+{
+	int ret;
+
+	drm_helper_mode_fill_fb_struct(dev, &vbox_fb->base, mode_cmd);
+	vbox_fb->obj = obj;
+	ret = drm_framebuffer_init(dev, &vbox_fb->base, &vbox_fb_funcs);
+	if (ret) {
+		DRM_ERROR("framebuffer init failed %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct drm_framebuffer *vbox_user_framebuffer_create(
+		struct drm_device *dev,
+		struct drm_file *filp,
+		const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_gem_object *obj;
+	struct vbox_framebuffer *vbox_fb;
+	int ret = -ENOMEM;
+
+	obj = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
+	if (!obj)
+		return ERR_PTR(-ENOENT);
+
+	vbox_fb = kzalloc(sizeof(*vbox_fb), GFP_KERNEL);
+	if (!vbox_fb)
+		goto err_unref_obj;
+
+	ret = vbox_framebuffer_init(dev, vbox_fb, mode_cmd, obj);
+	if (ret)
+		goto err_free_vbox_fb;
+
+	return &vbox_fb->base;
+
+err_free_vbox_fb:
+	kfree(vbox_fb);
+err_unref_obj:
+	drm_gem_object_unreference_unlocked(obj);
+	return ERR_PTR(ret);
+}
+
+static const struct drm_mode_config_funcs vbox_mode_funcs = {
+	.fb_create = vbox_user_framebuffer_create,
+};
+
+static int vbox_accel_init(struct vbox_private *vbox)
+{
+	unsigned int i;
+
+	vbox->vbva_info = devm_kcalloc(vbox->dev->dev, vbox->num_crtcs,
+				       sizeof(*vbox->vbva_info), GFP_KERNEL);
+	if (!vbox->vbva_info)
+		return -ENOMEM;
+
+	/* Take a command buffer for each screen from the end of usable VRAM. */
+	vbox->available_vram_size -= vbox->num_crtcs * VBVA_MIN_BUFFER_SIZE;
+
+	vbox->vbva_buffers = pci_iomap_range(vbox->dev->pdev, 0,
+					     vbox->available_vram_size,
+					     vbox->num_crtcs *
+					     VBVA_MIN_BUFFER_SIZE);
+	if (!vbox->vbva_buffers)
+		return -ENOMEM;
+
+	for (i = 0; i < vbox->num_crtcs; ++i)
+		vbva_setup_buffer_context(&vbox->vbva_info[i],
+					  vbox->available_vram_size +
+					  i * VBVA_MIN_BUFFER_SIZE,
+					  VBVA_MIN_BUFFER_SIZE);
+
+	return 0;
+}
+
+static void vbox_accel_fini(struct vbox_private *vbox)
+{
+	vbox_disable_accel(vbox);
+	pci_iounmap(vbox->dev->pdev, vbox->vbva_buffers);
+}
+
+/** Do we support the 4.3 plus mode hint reporting interface? */
+static bool have_hgsmi_mode_hints(struct vbox_private *vbox)
+{
+	u32 have_hints, have_cursor;
+	int ret;
+
+	ret = hgsmi_query_conf(vbox->guest_pool,
+			       VBOX_VBVA_CONF32_MODE_HINT_REPORTING,
+			       &have_hints);
+	if (ret)
+		return false;
+
+	ret = hgsmi_query_conf(vbox->guest_pool,
+			       VBOX_VBVA_CONF32_GUEST_CURSOR_REPORTING,
+			       &have_cursor);
+	if (ret)
+		return false;
+
+	return have_hints == VINF_SUCCESS && have_cursor == VINF_SUCCESS;
+}
+
+static bool vbox_check_supported(u16 id)
+{
+	u16 dispi_id;
+
+	vbox_write_ioport(VBE_DISPI_INDEX_ID, id);
+	dispi_id = inw(VBE_DISPI_IOPORT_DATA);
+
+	return dispi_id == id;
+}
+
+/**
+ * Set up our heaps and data exchange buffers in VRAM before handing the rest
+ * to the memory manager.
+ */
+static int vbox_hw_init(struct vbox_private *vbox)
+{
+	int ret = -ENOMEM;
+
+	vbox->full_vram_size = inl(VBE_DISPI_IOPORT_DATA);
+	vbox->any_pitch = vbox_check_supported(VBE_DISPI_ID_ANYX);
+
+	DRM_INFO("VRAM %08x\n", vbox->full_vram_size);
+
+	/* Map guest-heap at end of vram */
+	vbox->guest_heap =
+	    pci_iomap_range(vbox->dev->pdev, 0, GUEST_HEAP_OFFSET(vbox),
+			    GUEST_HEAP_SIZE);
+	if (!vbox->guest_heap)
+		return -ENOMEM;
+
+	/* Create guest-heap mem-pool use 2^4 = 16 byte chunks */
+	vbox->guest_pool = gen_pool_create(4, -1);
+	if (!vbox->guest_pool)
+		goto err_unmap_guest_heap;
+
+	ret = gen_pool_add_virt(vbox->guest_pool,
+				(unsigned long)vbox->guest_heap,
+				GUEST_HEAP_OFFSET(vbox),
+				GUEST_HEAP_USABLE_SIZE, -1);
+	if (ret)
+		goto err_destroy_guest_pool;
+
+	ret = hgsmi_test_query_conf(vbox->guest_pool);
+	if (ret) {
+		DRM_ERROR("vboxvideo: hgsmi_test_query_conf failed\n");
+		goto err_destroy_guest_pool;
+	}
+
+	/* Reduce available VRAM size to reflect the guest heap. */
+	vbox->available_vram_size = GUEST_HEAP_OFFSET(vbox);
+	/* Linux drm represents monitors as a 32-bit array. */
+	hgsmi_query_conf(vbox->guest_pool, VBOX_VBVA_CONF32_MONITOR_COUNT,
+			 &vbox->num_crtcs);
+	vbox->num_crtcs = clamp_t(u32, vbox->num_crtcs, 1, VBOX_MAX_SCREENS);
+
+	if (!have_hgsmi_mode_hints(vbox)) {
+		ret = -ENOTSUPP;
+		goto err_destroy_guest_pool;
+	}
+
+	vbox->last_mode_hints = devm_kcalloc(vbox->dev->dev, vbox->num_crtcs,
+					     sizeof(struct vbva_modehint),
+					     GFP_KERNEL);
+	if (!vbox->last_mode_hints) {
+		ret = -ENOMEM;
+		goto err_destroy_guest_pool;
+	}
+
+	ret = vbox_accel_init(vbox);
+	if (ret)
+		goto err_destroy_guest_pool;
+
+	return 0;
+
+err_destroy_guest_pool:
+	gen_pool_destroy(vbox->guest_pool);
+err_unmap_guest_heap:
+	pci_iounmap(vbox->dev->pdev, vbox->guest_heap);
+	return ret;
+}
+
+static void vbox_hw_fini(struct vbox_private *vbox)
+{
+	vbox_accel_fini(vbox);
+	gen_pool_destroy(vbox->guest_pool);
+	pci_iounmap(vbox->dev->pdev, vbox->guest_heap);
+}
+
+int vbox_driver_load(struct drm_device *dev, unsigned long flags)
+{
+	struct vbox_private *vbox;
+	int ret = 0;
+
+	if (!vbox_check_supported(VBE_DISPI_ID_HGSMI))
+		return -ENODEV;
+
+	vbox = devm_kzalloc(dev->dev, sizeof(*vbox), GFP_KERNEL);
+	if (!vbox)
+		return -ENOMEM;
+
+	dev->dev_private = vbox;
+	vbox->dev = dev;
+
+	mutex_init(&vbox->hw_mutex);
+
+	ret = vbox_hw_init(vbox);
+	if (ret)
+		return ret;
+
+	ret = vbox_mm_init(vbox);
+	if (ret)
+		goto err_hw_fini;
+
+	drm_mode_config_init(dev);
+
+	dev->mode_config.funcs = (void *)&vbox_mode_funcs;
+	dev->mode_config.min_width = 64;
+	dev->mode_config.min_height = 64;
+	dev->mode_config.preferred_depth = 24;
+	dev->mode_config.max_width = VBE_DISPI_MAX_XRES;
+	dev->mode_config.max_height = VBE_DISPI_MAX_YRES;
+
+	ret = vbox_mode_init(dev);
+	if (ret)
+		goto err_drm_mode_cleanup;
+
+	ret = vbox_irq_init(vbox);
+	if (ret)
+		goto err_mode_fini;
+
+	ret = vbox_fbdev_init(dev);
+	if (ret)
+		goto err_irq_fini;
+
+	return 0;
+
+err_irq_fini:
+	vbox_irq_fini(vbox);
+err_mode_fini:
+	vbox_mode_fini(dev);
+err_drm_mode_cleanup:
+	drm_mode_config_cleanup(dev);
+	vbox_mm_fini(vbox);
+err_hw_fini:
+	vbox_hw_fini(vbox);
+	return ret;
+}
+
+void vbox_driver_unload(struct drm_device *dev)
+{
+	struct vbox_private *vbox = dev->dev_private;
+
+	vbox_fbdev_fini(dev);
+	vbox_irq_fini(vbox);
+	vbox_mode_fini(dev);
+	drm_mode_config_cleanup(dev);
+	vbox_mm_fini(vbox);
+	vbox_hw_fini(vbox);
+}
+
+/**
+ * @note this is described in the DRM framework documentation.  AST does not
+ * have it, but we get an oops on driver unload if it is not present.
+ */
+void vbox_driver_lastclose(struct drm_device *dev)
+{
+	struct vbox_private *vbox = dev->dev_private;
+
+	if (vbox->fbdev)
+		drm_fb_helper_restore_fbdev_mode_unlocked(&vbox->fbdev->helper);
+}
+
+int vbox_gem_create(struct drm_device *dev,
+		    u32 size, bool iskernel, struct drm_gem_object **obj)
+{
+	struct vbox_bo *vboxbo;
+	int ret;
+
+	*obj = NULL;
+
+	size = roundup(size, PAGE_SIZE);
+	if (size == 0)
+		return -EINVAL;
+
+	ret = vbox_bo_create(dev, size, 0, 0, &vboxbo);
+	if (ret) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("failed to allocate GEM object\n");
+		return ret;
+	}
+
+	*obj = &vboxbo->gem;
+
+	return 0;
+}
+
+int vbox_dumb_create(struct drm_file *file,
+		     struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	int ret;
+	struct drm_gem_object *gobj;
+	u32 handle;
+
+	args->pitch = args->width * ((args->bpp + 7) / 8);
+	args->size = args->pitch * args->height;
+
+	ret = vbox_gem_create(dev, args->size, false, &gobj);
+	if (ret)
+		return ret;
+
+	ret = drm_gem_handle_create(file, gobj, &handle);
+	drm_gem_object_unreference_unlocked(gobj);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+
+	return 0;
+}
+
+static void vbox_bo_unref(struct vbox_bo **bo)
+{
+	struct ttm_buffer_object *tbo;
+
+	if ((*bo) == NULL)
+		return;
+
+	tbo = &((*bo)->bo);
+	ttm_bo_unref(&tbo);
+	if (!tbo)
+		*bo = NULL;
+}
+
+void vbox_gem_free_object(struct drm_gem_object *obj)
+{
+	struct vbox_bo *vbox_bo = gem_to_vbox_bo(obj);
+
+	vbox_bo_unref(&vbox_bo);
+}
+
+static inline u64 vbox_bo_mmap_offset(struct vbox_bo *bo)
+{
+	return drm_vma_node_offset_addr(&bo->bo.vma_node);
+}
+
+int
+vbox_dumb_mmap_offset(struct drm_file *file,
+		      struct drm_device *dev,
+		      u32 handle, u64 *offset)
+{
+	struct drm_gem_object *obj;
+	int ret;
+	struct vbox_bo *bo;
+
+	mutex_lock(&dev->struct_mutex);
+	obj = drm_gem_object_lookup(file, handle);
+	if (!obj) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	bo = gem_to_vbox_bo(obj);
+	*offset = vbox_bo_mmap_offset(bo);
+
+	drm_gem_object_unreference(obj);
+	ret = 0;
+
+out_unlock:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
diff --git a/drivers/staging/vboxvideo/vbox_mode.c b/drivers/staging/vboxvideo/vbox_mode.c
new file mode 100644
index 0000000000000..f2b85f3256fab
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_mode.c
@@ -0,0 +1,877 @@
+/*
+ * Copyright (C) 2013-2017 Oracle Corporation
+ * This file is based on ast_mode.c
+ * Copyright 2012 Red Hat Inc.
+ * Parts based on xf86-video-ast
+ * Copyright (c) 2005 ASPEED Technology Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Michael Thayer <michael.thayer@oracle.com,
+ *          Hans de Goede <hdegoede@redhat.com>
+ */
+#include <linux/export.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_plane_helper.h>
+
+#include "vbox_drv.h"
+#include "vboxvideo.h"
+#include "hgsmi_channels.h"
+
+static int vbox_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
+			    u32 handle, u32 width, u32 height,
+			    s32 hot_x, s32 hot_y);
+static int vbox_cursor_move(struct drm_crtc *crtc, int x, int y);
+
+/**
+ * Set a graphics mode.  Poke any required values into registers, do an HGSMI
+ * mode set and tell the host we support advanced graphics functions.
+ */
+static void vbox_do_modeset(struct drm_crtc *crtc,
+			    const struct drm_display_mode *mode)
+{
+	struct vbox_crtc *vbox_crtc = to_vbox_crtc(crtc);
+	struct vbox_private *vbox;
+	int width, height, bpp, pitch;
+	unsigned int crtc_id;
+	u16 flags;
+	s32 x_offset, y_offset;
+
+	vbox = crtc->dev->dev_private;
+	width = mode->hdisplay ? mode->hdisplay : 640;
+	height = mode->vdisplay ? mode->vdisplay : 480;
+	crtc_id = vbox_crtc->crtc_id;
+	bpp = crtc->enabled ? CRTC_FB(crtc)->format->cpp[0] * 8 : 32;
+	pitch = crtc->enabled ? CRTC_FB(crtc)->pitches[0] : width * bpp / 8;
+	x_offset = vbox->single_framebuffer ? crtc->x : vbox_crtc->x_hint;
+	y_offset = vbox->single_framebuffer ? crtc->y : vbox_crtc->y_hint;
+
+	/*
+	 * This is the old way of setting graphics modes.  It assumed one screen
+	 * and a frame-buffer at the start of video RAM.  On older versions of
+	 * VirtualBox, certain parts of the code still assume that the first
+	 * screen is programmed this way, so try to fake it.
+	 */
+	if (vbox_crtc->crtc_id == 0 && crtc->enabled &&
+	    vbox_crtc->fb_offset / pitch < 0xffff - crtc->y &&
+	    vbox_crtc->fb_offset % (bpp / 8) == 0) {
+		vbox_write_ioport(VBE_DISPI_INDEX_XRES, width);
+		vbox_write_ioport(VBE_DISPI_INDEX_YRES, height);
+		vbox_write_ioport(VBE_DISPI_INDEX_VIRT_WIDTH, pitch * 8 / bpp);
+		vbox_write_ioport(VBE_DISPI_INDEX_BPP,
+				  CRTC_FB(crtc)->format->cpp[0] * 8);
+		vbox_write_ioport(VBE_DISPI_INDEX_ENABLE, VBE_DISPI_ENABLED);
+		vbox_write_ioport(
+			VBE_DISPI_INDEX_X_OFFSET,
+			vbox_crtc->fb_offset % pitch / bpp * 8 + crtc->x);
+		vbox_write_ioport(VBE_DISPI_INDEX_Y_OFFSET,
+				  vbox_crtc->fb_offset / pitch + crtc->y);
+	}
+
+	flags = VBVA_SCREEN_F_ACTIVE;
+	flags |= (crtc->enabled && !vbox_crtc->blanked) ?
+		 0 : VBVA_SCREEN_F_BLANK;
+	flags |= vbox_crtc->disconnected ? VBVA_SCREEN_F_DISABLED : 0;
+	hgsmi_process_display_info(vbox->guest_pool, vbox_crtc->crtc_id,
+				   x_offset, y_offset,
+				   crtc->x * bpp / 8 + crtc->y * pitch,
+				   pitch, width, height,
+				   vbox_crtc->blanked ? 0 : bpp, flags);
+}
+
+static int vbox_set_view(struct drm_crtc *crtc)
+{
+	struct vbox_crtc *vbox_crtc = to_vbox_crtc(crtc);
+	struct vbox_private *vbox = crtc->dev->dev_private;
+	struct vbva_infoview *p;
+
+	/*
+	 * Tell the host about the view.  This design originally targeted the
+	 * Windows XP driver architecture and assumed that each screen would
+	 * have a dedicated frame buffer with the command buffer following it,
+	 * the whole being a "view".  The host works out which screen a command
+	 * buffer belongs to by checking whether it is in the first view, then
+	 * whether it is in the second and so on.  The first match wins.  We
+	 * cheat around this by making the first view be the managed memory
+	 * plus the first command buffer, the second the same plus the second
+	 * buffer and so on.
+	 */
+	p = hgsmi_buffer_alloc(vbox->guest_pool, sizeof(*p),
+			       HGSMI_CH_VBVA, VBVA_INFO_VIEW);
+	if (!p)
+		return -ENOMEM;
+
+	p->view_index = vbox_crtc->crtc_id;
+	p->view_offset = vbox_crtc->fb_offset;
+	p->view_size = vbox->available_vram_size - vbox_crtc->fb_offset +
+		       vbox_crtc->crtc_id * VBVA_MIN_BUFFER_SIZE;
+	p->max_screen_size = vbox->available_vram_size - vbox_crtc->fb_offset;
+
+	hgsmi_buffer_submit(vbox->guest_pool, p);
+	hgsmi_buffer_free(vbox->guest_pool, p);
+
+	return 0;
+}
+
+static void vbox_crtc_load_lut(struct drm_crtc *crtc)
+{
+}
+
+static void vbox_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct vbox_crtc *vbox_crtc = to_vbox_crtc(crtc);
+	struct vbox_private *vbox = crtc->dev->dev_private;
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		vbox_crtc->blanked = false;
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		vbox_crtc->blanked = true;
+		break;
+	}
+
+	mutex_lock(&vbox->hw_mutex);
+	vbox_do_modeset(crtc, &crtc->hwmode);
+	mutex_unlock(&vbox->hw_mutex);
+}
+
+static bool vbox_crtc_mode_fixup(struct drm_crtc *crtc,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+/*
+ * Try to map the layout of virtual screens to the range of the input device.
+ * Return true if we need to re-set the crtc modes due to screen offset
+ * changes.
+ */
+static bool vbox_set_up_input_mapping(struct vbox_private *vbox)
+{
+	struct drm_crtc *crtci;
+	struct drm_connector *connectori;
+	struct drm_framebuffer *fb1 = NULL;
+	bool single_framebuffer = true;
+	bool old_single_framebuffer = vbox->single_framebuffer;
+	u16 width = 0, height = 0;
+
+	/*
+	 * Are we using an X.Org-style single large frame-buffer for all crtcs?
+	 * If so then screen layout can be deduced from the crtc offsets.
+	 * Same fall-back if this is the fbdev frame-buffer.
+	 */
+	list_for_each_entry(crtci, &vbox->dev->mode_config.crtc_list, head) {
+		if (!fb1) {
+			fb1 = CRTC_FB(crtci);
+			if (to_vbox_framebuffer(fb1) == &vbox->fbdev->afb)
+				break;
+		} else if (CRTC_FB(crtci) && fb1 != CRTC_FB(crtci)) {
+			single_framebuffer = false;
+		}
+	}
+	if (single_framebuffer) {
+		list_for_each_entry(crtci, &vbox->dev->mode_config.crtc_list,
+				    head) {
+			if (to_vbox_crtc(crtci)->crtc_id != 0)
+				continue;
+
+			vbox->single_framebuffer = true;
+			vbox->input_mapping_width = CRTC_FB(crtci)->width;
+			vbox->input_mapping_height = CRTC_FB(crtci)->height;
+			return old_single_framebuffer !=
+			       vbox->single_framebuffer;
+		}
+	}
+	/* Otherwise calculate the total span of all screens. */
+	list_for_each_entry(connectori, &vbox->dev->mode_config.connector_list,
+			    head) {
+		struct vbox_connector *vbox_connector =
+		    to_vbox_connector(connectori);
+		struct vbox_crtc *vbox_crtc = vbox_connector->vbox_crtc;
+
+		width = max_t(u16, width, vbox_crtc->x_hint +
+					  vbox_connector->mode_hint.width);
+		height = max_t(u16, height, vbox_crtc->y_hint +
+					    vbox_connector->mode_hint.height);
+	}
+
+	vbox->single_framebuffer = false;
+	vbox->input_mapping_width = width;
+	vbox->input_mapping_height = height;
+
+	return old_single_framebuffer != vbox->single_framebuffer;
+}
+
+static int vbox_crtc_do_set_base(struct drm_crtc *crtc,
+				 struct drm_framebuffer *old_fb, int x, int y)
+{
+	struct vbox_private *vbox = crtc->dev->dev_private;
+	struct vbox_crtc *vbox_crtc = to_vbox_crtc(crtc);
+	struct drm_gem_object *obj;
+	struct vbox_framebuffer *vbox_fb;
+	struct vbox_bo *bo;
+	int ret;
+	u64 gpu_addr;
+
+	/* Unpin the previous fb. */
+	if (old_fb) {
+		vbox_fb = to_vbox_framebuffer(old_fb);
+		obj = vbox_fb->obj;
+		bo = gem_to_vbox_bo(obj);
+		ret = vbox_bo_reserve(bo, false);
+		if (ret)
+			return ret;
+
+		vbox_bo_unpin(bo);
+		vbox_bo_unreserve(bo);
+	}
+
+	vbox_fb = to_vbox_framebuffer(CRTC_FB(crtc));
+	obj = vbox_fb->obj;
+	bo = gem_to_vbox_bo(obj);
+
+	ret = vbox_bo_reserve(bo, false);
+	if (ret)
+		return ret;
+
+	ret = vbox_bo_pin(bo, TTM_PL_FLAG_VRAM, &gpu_addr);
+	if (ret) {
+		vbox_bo_unreserve(bo);
+		return ret;
+	}
+
+	if (&vbox->fbdev->afb == vbox_fb)
+		vbox_fbdev_set_base(vbox, gpu_addr);
+	vbox_bo_unreserve(bo);
+
+	/* vbox_set_start_address_crt1(crtc, (u32)gpu_addr); */
+	vbox_crtc->fb_offset = gpu_addr;
+	if (vbox_set_up_input_mapping(vbox)) {
+		struct drm_crtc *crtci;
+
+		list_for_each_entry(crtci, &vbox->dev->mode_config.crtc_list,
+				    head) {
+			vbox_set_view(crtc);
+			vbox_do_modeset(crtci, &crtci->mode);
+		}
+	}
+
+	return 0;
+}
+
+static int vbox_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+				   struct drm_framebuffer *old_fb)
+{
+	return vbox_crtc_do_set_base(crtc, old_fb, x, y);
+}
+
+static int vbox_crtc_mode_set(struct drm_crtc *crtc,
+			      struct drm_display_mode *mode,
+			      struct drm_display_mode *adjusted_mode,
+			      int x, int y, struct drm_framebuffer *old_fb)
+{
+	struct vbox_private *vbox = crtc->dev->dev_private;
+	int ret;
+
+	vbox_crtc_mode_set_base(crtc, x, y, old_fb);
+
+	mutex_lock(&vbox->hw_mutex);
+	ret = vbox_set_view(crtc);
+	if (!ret)
+		vbox_do_modeset(crtc, mode);
+	hgsmi_update_input_mapping(vbox->guest_pool, 0, 0,
+				   vbox->input_mapping_width,
+				   vbox->input_mapping_height);
+	mutex_unlock(&vbox->hw_mutex);
+
+	return ret;
+}
+
+static void vbox_crtc_disable(struct drm_crtc *crtc)
+{
+}
+
+static void vbox_crtc_prepare(struct drm_crtc *crtc)
+{
+}
+
+static void vbox_crtc_commit(struct drm_crtc *crtc)
+{
+}
+
+static const struct drm_crtc_helper_funcs vbox_crtc_helper_funcs = {
+	.dpms = vbox_crtc_dpms,
+	.mode_fixup = vbox_crtc_mode_fixup,
+	.mode_set = vbox_crtc_mode_set,
+	/* .mode_set_base = vbox_crtc_mode_set_base, */
+	.disable = vbox_crtc_disable,
+	.load_lut = vbox_crtc_load_lut,
+	.prepare = vbox_crtc_prepare,
+	.commit = vbox_crtc_commit,
+};
+
+static void vbox_crtc_reset(struct drm_crtc *crtc)
+{
+}
+
+static void vbox_crtc_destroy(struct drm_crtc *crtc)
+{
+	drm_crtc_cleanup(crtc);
+	kfree(crtc);
+}
+
+static const struct drm_crtc_funcs vbox_crtc_funcs = {
+	.cursor_move = vbox_cursor_move,
+	.cursor_set2 = vbox_cursor_set2,
+	.reset = vbox_crtc_reset,
+	.set_config = drm_crtc_helper_set_config,
+	/* .gamma_set = vbox_crtc_gamma_set, */
+	.destroy = vbox_crtc_destroy,
+};
+
+static struct vbox_crtc *vbox_crtc_init(struct drm_device *dev, unsigned int i)
+{
+	struct vbox_crtc *vbox_crtc;
+
+	vbox_crtc = kzalloc(sizeof(*vbox_crtc), GFP_KERNEL);
+	if (!vbox_crtc)
+		return NULL;
+
+	vbox_crtc->crtc_id = i;
+
+	drm_crtc_init(dev, &vbox_crtc->base, &vbox_crtc_funcs);
+	drm_mode_crtc_set_gamma_size(&vbox_crtc->base, 256);
+	drm_crtc_helper_add(&vbox_crtc->base, &vbox_crtc_helper_funcs);
+
+	return vbox_crtc;
+}
+
+static void vbox_encoder_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static struct drm_encoder *vbox_best_single_encoder(struct drm_connector
+						    *connector)
+{
+	int enc_id = connector->encoder_ids[0];
+
+	/* pick the encoder ids */
+	if (enc_id)
+		return drm_encoder_find(connector->dev, enc_id);
+
+	return NULL;
+}
+
+static const struct drm_encoder_funcs vbox_enc_funcs = {
+	.destroy = vbox_encoder_destroy,
+};
+
+static void vbox_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+}
+
+static bool vbox_mode_fixup(struct drm_encoder *encoder,
+			    const struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void vbox_encoder_mode_set(struct drm_encoder *encoder,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode)
+{
+}
+
+static void vbox_encoder_prepare(struct drm_encoder *encoder)
+{
+}
+
+static void vbox_encoder_commit(struct drm_encoder *encoder)
+{
+}
+
+static const struct drm_encoder_helper_funcs vbox_enc_helper_funcs = {
+	.dpms = vbox_encoder_dpms,
+	.mode_fixup = vbox_mode_fixup,
+	.prepare = vbox_encoder_prepare,
+	.commit = vbox_encoder_commit,
+	.mode_set = vbox_encoder_mode_set,
+};
+
+static struct drm_encoder *vbox_encoder_init(struct drm_device *dev,
+					     unsigned int i)
+{
+	struct vbox_encoder *vbox_encoder;
+
+	vbox_encoder = kzalloc(sizeof(*vbox_encoder), GFP_KERNEL);
+	if (!vbox_encoder)
+		return NULL;
+
+	drm_encoder_init(dev, &vbox_encoder->base, &vbox_enc_funcs,
+			 DRM_MODE_ENCODER_DAC, NULL);
+	drm_encoder_helper_add(&vbox_encoder->base, &vbox_enc_helper_funcs);
+
+	vbox_encoder->base.possible_crtcs = 1 << i;
+	return &vbox_encoder->base;
+}
+
+/**
+ * Generate EDID data with a mode-unique serial number for the virtual
+ *  monitor to try to persuade Unity that different modes correspond to
+ *  different monitors and it should not try to force the same resolution on
+ *  them.
+ */
+static void vbox_set_edid(struct drm_connector *connector, int width,
+			  int height)
+{
+	enum { EDID_SIZE = 128 };
+	unsigned char edid[EDID_SIZE] = {
+		0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,	/* header */
+		0x58, 0x58,	/* manufacturer (VBX) */
+		0x00, 0x00,	/* product code */
+		0x00, 0x00, 0x00, 0x00,	/* serial number goes here */
+		0x01,		/* week of manufacture */
+		0x00,		/* year of manufacture */
+		0x01, 0x03,	/* EDID version */
+		0x80,		/* capabilities - digital */
+		0x00,		/* horiz. res in cm, zero for projectors */
+		0x00,		/* vert. res in cm */
+		0x78,		/* display gamma (120 == 2.2). */
+		0xEE,		/* features (standby, suspend, off, RGB, std */
+				/* colour space, preferred timing mode) */
+		0xEE, 0x91, 0xA3, 0x54, 0x4C, 0x99, 0x26, 0x0F, 0x50, 0x54,
+		/* chromaticity for standard colour space. */
+		0x00, 0x00, 0x00,	/* no default timings */
+		0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+		    0x01, 0x01,
+		0x01, 0x01, 0x01, 0x01,	/* no standard timings */
+		0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x06, 0x00, 0x02, 0x02,
+		    0x02, 0x02,
+		/* descriptor block 1 goes below */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		/* descriptor block 2, monitor ranges */
+		0x00, 0x00, 0x00, 0xFD, 0x00,
+		0x00, 0xC8, 0x00, 0xC8, 0x64, 0x00, 0x0A, 0x20, 0x20, 0x20,
+		    0x20, 0x20,
+		/* 0-200Hz vertical, 0-200KHz horizontal, 1000MHz pixel clock */
+		0x20,
+		/* descriptor block 3, monitor name */
+		0x00, 0x00, 0x00, 0xFC, 0x00,
+		'V', 'B', 'O', 'X', ' ', 'm', 'o', 'n', 'i', 't', 'o', 'r',
+		'\n',
+		/* descriptor block 4: dummy data */
+		0x00, 0x00, 0x00, 0x10, 0x00,
+		0x0A, 0x20, 0x20, 0x20, 0x20, 0x20,
+		0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+		0x20,
+		0x00,		/* number of extensions */
+		0x00		/* checksum goes here */
+	};
+	int clock = (width + 6) * (height + 6) * 60 / 10000;
+	unsigned int i, sum = 0;
+
+	edid[12] = width & 0xff;
+	edid[13] = width >> 8;
+	edid[14] = height & 0xff;
+	edid[15] = height >> 8;
+	edid[54] = clock & 0xff;
+	edid[55] = clock >> 8;
+	edid[56] = width & 0xff;
+	edid[58] = (width >> 4) & 0xf0;
+	edid[59] = height & 0xff;
+	edid[61] = (height >> 4) & 0xf0;
+	for (i = 0; i < EDID_SIZE - 1; ++i)
+		sum += edid[i];
+	edid[EDID_SIZE - 1] = (0x100 - (sum & 0xFF)) & 0xFF;
+	drm_mode_connector_update_edid_property(connector, (struct edid *)edid);
+}
+
+static int vbox_get_modes(struct drm_connector *connector)
+{
+	struct vbox_connector *vbox_connector = NULL;
+	struct drm_display_mode *mode = NULL;
+	struct vbox_private *vbox = NULL;
+	unsigned int num_modes = 0;
+	int preferred_width, preferred_height;
+
+	vbox_connector = to_vbox_connector(connector);
+	vbox = connector->dev->dev_private;
+	/*
+	 * Heuristic: we do not want to tell the host that we support dynamic
+	 * resizing unless we feel confident that the user space client using
+	 * the video driver can handle hot-plug events.  So the first time modes
+	 * are queried after a "master" switch we tell the host that we do not,
+	 * and immediately after we send the client a hot-plug notification as
+	 * a test to see if they will respond and query again.
+	 * That is also the reason why capabilities are reported to the host at
+	 * this place in the code rather than elsewhere.
+	 * We need to report the flags location before reporting the IRQ
+	 * capability.
+	 */
+	hgsmi_report_flags_location(vbox->guest_pool, GUEST_HEAP_OFFSET(vbox) +
+				    HOST_FLAGS_OFFSET);
+	if (vbox_connector->vbox_crtc->crtc_id == 0)
+		vbox_report_caps(vbox);
+	if (!vbox->initial_mode_queried) {
+		if (vbox_connector->vbox_crtc->crtc_id == 0) {
+			vbox->initial_mode_queried = true;
+			vbox_report_hotplug(vbox);
+		}
+		return drm_add_modes_noedid(connector, 800, 600);
+	}
+	num_modes = drm_add_modes_noedid(connector, 2560, 1600);
+	preferred_width = vbox_connector->mode_hint.width ?
+			  vbox_connector->mode_hint.width : 1024;
+	preferred_height = vbox_connector->mode_hint.height ?
+			   vbox_connector->mode_hint.height : 768;
+	mode = drm_cvt_mode(connector->dev, preferred_width, preferred_height,
+			    60, false, false, false);
+	if (mode) {
+		mode->type |= DRM_MODE_TYPE_PREFERRED;
+		drm_mode_probed_add(connector, mode);
+		++num_modes;
+	}
+	vbox_set_edid(connector, preferred_width, preferred_height);
+	drm_object_property_set_value(
+		&connector->base, vbox->dev->mode_config.suggested_x_property,
+		vbox_connector->vbox_crtc->x_hint);
+	drm_object_property_set_value(
+		&connector->base, vbox->dev->mode_config.suggested_y_property,
+		vbox_connector->vbox_crtc->y_hint);
+
+	return num_modes;
+}
+
+static int vbox_mode_valid(struct drm_connector *connector,
+			   struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static void vbox_connector_destroy(struct drm_connector *connector)
+{
+	struct vbox_connector *vbox_connector;
+
+	vbox_connector = to_vbox_connector(connector);
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
+
+static enum drm_connector_status
+vbox_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct vbox_connector *vbox_connector;
+
+	vbox_connector = to_vbox_connector(connector);
+
+	return vbox_connector->mode_hint.disconnected ?
+	    connector_status_disconnected : connector_status_connected;
+}
+
+static int vbox_fill_modes(struct drm_connector *connector, u32 max_x,
+			   u32 max_y)
+{
+	struct vbox_connector *vbox_connector;
+	struct drm_device *dev;
+	struct drm_display_mode *mode, *iterator;
+
+	vbox_connector = to_vbox_connector(connector);
+	dev = vbox_connector->base.dev;
+	list_for_each_entry_safe(mode, iterator, &connector->modes, head) {
+		list_del(&mode->head);
+		drm_mode_destroy(dev, mode);
+	}
+
+	return drm_helper_probe_single_connector_modes(connector, max_x, max_y);
+}
+
+static const struct drm_connector_helper_funcs vbox_connector_helper_funcs = {
+	.mode_valid = vbox_mode_valid,
+	.get_modes = vbox_get_modes,
+	.best_encoder = vbox_best_single_encoder,
+};
+
+static const struct drm_connector_funcs vbox_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = vbox_connector_detect,
+	.fill_modes = vbox_fill_modes,
+	.destroy = vbox_connector_destroy,
+};
+
+static int vbox_connector_init(struct drm_device *dev,
+			       struct vbox_crtc *vbox_crtc,
+			       struct drm_encoder *encoder)
+{
+	struct vbox_connector *vbox_connector;
+	struct drm_connector *connector;
+
+	vbox_connector = kzalloc(sizeof(*vbox_connector), GFP_KERNEL);
+	if (!vbox_connector)
+		return -ENOMEM;
+
+	connector = &vbox_connector->base;
+	vbox_connector->vbox_crtc = vbox_crtc;
+
+	drm_connector_init(dev, connector, &vbox_connector_funcs,
+			   DRM_MODE_CONNECTOR_VGA);
+	drm_connector_helper_add(connector, &vbox_connector_helper_funcs);
+
+	connector->interlace_allowed = 0;
+	connector->doublescan_allowed = 0;
+
+	drm_mode_create_suggested_offset_properties(dev);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_x_property, -1);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_y_property, -1);
+	drm_connector_register(connector);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+
+	return 0;
+}
+
+int vbox_mode_init(struct drm_device *dev)
+{
+	struct vbox_private *vbox = dev->dev_private;
+	struct drm_encoder *encoder;
+	struct vbox_crtc *vbox_crtc;
+	unsigned int i;
+	int ret;
+
+	/* vbox_cursor_init(dev); */
+	for (i = 0; i < vbox->num_crtcs; ++i) {
+		vbox_crtc = vbox_crtc_init(dev, i);
+		if (!vbox_crtc)
+			return -ENOMEM;
+		encoder = vbox_encoder_init(dev, i);
+		if (!encoder)
+			return -ENOMEM;
+		ret = vbox_connector_init(dev, vbox_crtc, encoder);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void vbox_mode_fini(struct drm_device *dev)
+{
+	/* vbox_cursor_fini(dev); */
+}
+
+/**
+ * Copy the ARGB image and generate the mask, which is needed in case the host
+ * does not support ARGB cursors.  The mask is a 1BPP bitmap with the bit set
+ * if the corresponding alpha value in the ARGB image is greater than 0xF0.
+ */
+static void copy_cursor_image(u8 *src, u8 *dst, u32 width, u32 height,
+			      size_t mask_size)
+{
+	size_t line_size = (width + 7) / 8;
+	u32 i, j;
+
+	memcpy(dst + mask_size, src, width * height * 4);
+	for (i = 0; i < height; ++i)
+		for (j = 0; j < width; ++j)
+			if (((u32 *)src)[i * width + j] > 0xf0000000)
+				dst[i * line_size + j / 8] |= (0x80 >> (j % 8));
+}
+
+static int vbox_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
+			    u32 handle, u32 width, u32 height,
+			    s32 hot_x, s32 hot_y)
+{
+	struct vbox_private *vbox = crtc->dev->dev_private;
+	struct vbox_crtc *vbox_crtc = to_vbox_crtc(crtc);
+	struct ttm_bo_kmap_obj uobj_map;
+	size_t data_size, mask_size;
+	struct drm_gem_object *obj;
+	u32 flags, caps = 0;
+	struct vbox_bo *bo;
+	bool src_isiomem;
+	u8 *dst = NULL;
+	u8 *src;
+	int ret;
+
+	/*
+	 * Re-set this regularly as in 5.0.20 and earlier the information was
+	 * lost on save and restore.
+	 */
+	hgsmi_update_input_mapping(vbox->guest_pool, 0, 0,
+				   vbox->input_mapping_width,
+				   vbox->input_mapping_height);
+	if (!handle) {
+		bool cursor_enabled = false;
+		struct drm_crtc *crtci;
+
+		/* Hide cursor. */
+		vbox_crtc->cursor_enabled = false;
+		list_for_each_entry(crtci, &vbox->dev->mode_config.crtc_list,
+				    head) {
+			if (to_vbox_crtc(crtci)->cursor_enabled)
+				cursor_enabled = true;
+		}
+
+		if (!cursor_enabled)
+			hgsmi_update_pointer_shape(vbox->guest_pool, 0, 0, 0,
+						   0, 0, NULL, 0);
+		return 0;
+	}
+
+	vbox_crtc->cursor_enabled = true;
+
+	if (width > VBOX_MAX_CURSOR_WIDTH || height > VBOX_MAX_CURSOR_HEIGHT ||
+	    width == 0 || height == 0)
+		return -EINVAL;
+
+	ret = hgsmi_query_conf(vbox->guest_pool,
+			       VBOX_VBVA_CONF32_CURSOR_CAPABILITIES, &caps);
+	if (ret)
+		return ret;
+
+	if (!(caps & VBOX_VBVA_CURSOR_CAPABILITY_HARDWARE)) {
+		/*
+		 * -EINVAL means cursor_set2() not supported, -EAGAIN means
+		 * retry at once.
+		 */
+		return -EBUSY;
+	}
+
+	obj = drm_gem_object_lookup(file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object %x for crtc\n", handle);
+		return -ENOENT;
+	}
+
+	bo = gem_to_vbox_bo(obj);
+	ret = vbox_bo_reserve(bo, false);
+	if (ret)
+		goto out_unref_obj;
+
+	/*
+	 * The mask must be calculated based on the alpha
+	 * channel, one bit per ARGB word, and must be 32-bit
+	 * padded.
+	 */
+	mask_size = ((width + 7) / 8 * height + 3) & ~3;
+	data_size = width * height * 4 + mask_size;
+	vbox->cursor_hot_x = min_t(u32, max(hot_x, 0), width);
+	vbox->cursor_hot_y = min_t(u32, max(hot_y, 0), height);
+	vbox->cursor_width = width;
+	vbox->cursor_height = height;
+	vbox->cursor_data_size = data_size;
+	dst = vbox->cursor_data;
+
+	ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &uobj_map);
+	if (ret) {
+		vbox->cursor_data_size = 0;
+		goto out_unreserve_bo;
+	}
+
+	src = ttm_kmap_obj_virtual(&uobj_map, &src_isiomem);
+	if (src_isiomem) {
+		DRM_ERROR("src cursor bo not in main memory\n");
+		ret = -EIO;
+		goto out_unmap_bo;
+	}
+
+	copy_cursor_image(src, dst, width, height, mask_size);
+
+	flags = VBOX_MOUSE_POINTER_VISIBLE | VBOX_MOUSE_POINTER_SHAPE |
+		VBOX_MOUSE_POINTER_ALPHA;
+	ret = hgsmi_update_pointer_shape(vbox->guest_pool, flags,
+					 vbox->cursor_hot_x, vbox->cursor_hot_y,
+					 width, height, dst, data_size);
+out_unmap_bo:
+	ttm_bo_kunmap(&uobj_map);
+out_unreserve_bo:
+	vbox_bo_unreserve(bo);
+out_unref_obj:
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int vbox_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct vbox_private *vbox = crtc->dev->dev_private;
+	u32 flags = VBOX_MOUSE_POINTER_VISIBLE |
+	    VBOX_MOUSE_POINTER_SHAPE | VBOX_MOUSE_POINTER_ALPHA;
+	s32 crtc_x =
+	    vbox->single_framebuffer ? crtc->x : to_vbox_crtc(crtc)->x_hint;
+	s32 crtc_y =
+	    vbox->single_framebuffer ? crtc->y : to_vbox_crtc(crtc)->y_hint;
+	u32 host_x, host_y;
+	u32 hot_x = 0;
+	u32 hot_y = 0;
+	int ret;
+
+	/*
+	 * We compare these to unsigned later and don't
+	 * need to handle negative.
+	 */
+	if (x + crtc_x < 0 || y + crtc_y < 0 || vbox->cursor_data_size == 0)
+		return 0;
+
+	ret = hgsmi_cursor_position(vbox->guest_pool, true, x + crtc_x,
+				    y + crtc_y, &host_x, &host_y);
+
+	/*
+	 * The only reason we have vbox_cursor_move() is that some older clients
+	 * might use DRM_IOCTL_MODE_CURSOR instead of DRM_IOCTL_MODE_CURSOR2 and
+	 * use DRM_MODE_CURSOR_MOVE to set the hot-spot.
+	 *
+	 * However VirtualBox 5.0.20 and earlier has a bug causing it to return
+	 * 0,0 as host cursor location after a save and restore.
+	 *
+	 * To work around this we ignore a 0, 0 return, since missing the odd
+	 * time when it legitimately happens is not going to hurt much.
+	 */
+	if (ret || (host_x == 0 && host_y == 0))
+		return ret;
+
+	if (x + crtc_x < host_x)
+		hot_x = min(host_x - x - crtc_x, vbox->cursor_width);
+	if (y + crtc_y < host_y)
+		hot_y = min(host_y - y - crtc_y, vbox->cursor_height);
+
+	if (hot_x == vbox->cursor_hot_x && hot_y == vbox->cursor_hot_y)
+		return 0;
+
+	vbox->cursor_hot_x = hot_x;
+	vbox->cursor_hot_y = hot_y;
+
+	return hgsmi_update_pointer_shape(vbox->guest_pool, flags,
+			hot_x, hot_y, vbox->cursor_width, vbox->cursor_height,
+			vbox->cursor_data, vbox->cursor_data_size);
+}
diff --git a/drivers/staging/vboxvideo/vbox_prime.c b/drivers/staging/vboxvideo/vbox_prime.c
new file mode 100644
index 0000000000000..b7453e427a1de
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_prime.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2017 Oracle Corporation
+ * Copyright 2017 Canonical
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andreas Pokorny
+ */
+
+#include "vbox_drv.h"
+
+/*
+ * Based on qxl_prime.c:
+ * Empty Implementations as there should not be any other driver for a virtual
+ * device that might share buffers with vboxvideo
+ */
+
+int vbox_gem_prime_pin(struct drm_gem_object *obj)
+{
+	WARN_ONCE(1, "not implemented");
+	return -ENOSYS;
+}
+
+void vbox_gem_prime_unpin(struct drm_gem_object *obj)
+{
+	WARN_ONCE(1, "not implemented");
+}
+
+struct sg_table *vbox_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	WARN_ONCE(1, "not implemented");
+	return ERR_PTR(-ENOSYS);
+}
+
+struct drm_gem_object *vbox_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *table)
+{
+	WARN_ONCE(1, "not implemented");
+	return ERR_PTR(-ENOSYS);
+}
+
+void *vbox_gem_prime_vmap(struct drm_gem_object *obj)
+{
+	WARN_ONCE(1, "not implemented");
+	return ERR_PTR(-ENOSYS);
+}
+
+void vbox_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+	WARN_ONCE(1, "not implemented");
+}
+
+int vbox_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *area)
+{
+	WARN_ONCE(1, "not implemented");
+	return -ENOSYS;
+}
diff --git a/drivers/staging/vboxvideo/vbox_ttm.c b/drivers/staging/vboxvideo/vbox_ttm.c
new file mode 100644
index 0000000000000..34a905d40735f
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbox_ttm.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (C) 2013-2017 Oracle Corporation
+ * This file is based on ast_ttm.c
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Michael Thayer <michael.thayer@oracle.com>
+ */
+#include "vbox_drv.h"
+#include <ttm/ttm_page_alloc.h>
+
+static inline struct vbox_private *vbox_bdev(struct ttm_bo_device *bd)
+{
+	return container_of(bd, struct vbox_private, ttm.bdev);
+}
+
+static int vbox_ttm_mem_global_init(struct drm_global_reference *ref)
+{
+	return ttm_mem_global_init(ref->object);
+}
+
+static void vbox_ttm_mem_global_release(struct drm_global_reference *ref)
+{
+	ttm_mem_global_release(ref->object);
+}
+
+/**
+ * Adds the vbox memory manager object/structures to the global memory manager.
+ */
+static int vbox_ttm_global_init(struct vbox_private *vbox)
+{
+	struct drm_global_reference *global_ref;
+	int ret;
+
+	global_ref = &vbox->ttm.mem_global_ref;
+	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
+	global_ref->size = sizeof(struct ttm_mem_global);
+	global_ref->init = &vbox_ttm_mem_global_init;
+	global_ref->release = &vbox_ttm_mem_global_release;
+	ret = drm_global_item_ref(global_ref);
+	if (ret) {
+		DRM_ERROR("Failed setting up TTM memory subsystem.\n");
+		return ret;
+	}
+
+	vbox->ttm.bo_global_ref.mem_glob = vbox->ttm.mem_global_ref.object;
+	global_ref = &vbox->ttm.bo_global_ref.ref;
+	global_ref->global_type = DRM_GLOBAL_TTM_BO;
+	global_ref->size = sizeof(struct ttm_bo_global);
+	global_ref->init = &ttm_bo_global_init;
+	global_ref->release = &ttm_bo_global_release;
+
+	ret = drm_global_item_ref(global_ref);
+	if (ret) {
+		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
+		drm_global_item_unref(&vbox->ttm.mem_global_ref);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * Removes the vbox memory manager object from the global memory manager.
+ */
+static void vbox_ttm_global_release(struct vbox_private *vbox)
+{
+	drm_global_item_unref(&vbox->ttm.bo_global_ref.ref);
+	drm_global_item_unref(&vbox->ttm.mem_global_ref);
+}
+
+static void vbox_bo_ttm_destroy(struct ttm_buffer_object *tbo)
+{
+	struct vbox_bo *bo;
+
+	bo = container_of(tbo, struct vbox_bo, bo);
+
+	drm_gem_object_release(&bo->gem);
+	kfree(bo);
+}
+
+static bool vbox_ttm_bo_is_vbox_bo(struct ttm_buffer_object *bo)
+{
+	if (bo->destroy == &vbox_bo_ttm_destroy)
+		return true;
+
+	return false;
+}
+
+static int
+vbox_bo_init_mem_type(struct ttm_bo_device *bdev, u32 type,
+		      struct ttm_mem_type_manager *man)
+{
+	switch (type) {
+	case TTM_PL_SYSTEM:
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case TTM_PL_VRAM:
+		man->func = &ttm_bo_manager_func;
+		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
+		man->default_caching = TTM_PL_FLAG_WC;
+		break;
+	default:
+		DRM_ERROR("Unsupported memory type %u\n", (unsigned int)type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+vbox_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
+{
+	struct vbox_bo *vboxbo = vbox_bo(bo);
+
+	if (!vbox_ttm_bo_is_vbox_bo(bo))
+		return;
+
+	vbox_ttm_placement(vboxbo, TTM_PL_FLAG_SYSTEM);
+	*pl = vboxbo->placement;
+}
+
+static int vbox_bo_verify_access(struct ttm_buffer_object *bo,
+				 struct file *filp)
+{
+	return 0;
+}
+
+static int vbox_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
+				   struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+	struct vbox_private *vbox = vbox_bdev(bdev);
+
+	mem->bus.addr = NULL;
+	mem->bus.offset = 0;
+	mem->bus.size = mem->num_pages << PAGE_SHIFT;
+	mem->bus.base = 0;
+	mem->bus.is_iomem = false;
+	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
+		return -EINVAL;
+	switch (mem->mem_type) {
+	case TTM_PL_SYSTEM:
+		/* system memory */
+		return 0;
+	case TTM_PL_VRAM:
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+		mem->bus.base = pci_resource_start(vbox->dev->pdev, 0);
+		mem->bus.is_iomem = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void vbox_ttm_io_mem_free(struct ttm_bo_device *bdev,
+				 struct ttm_mem_reg *mem)
+{
+}
+
+static int vbox_bo_move(struct ttm_buffer_object *bo,
+			bool evict, bool interruptible,
+			bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+{
+	return ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
+}
+
+static void vbox_ttm_backend_destroy(struct ttm_tt *tt)
+{
+	ttm_tt_fini(tt);
+	kfree(tt);
+}
+
+static struct ttm_backend_func vbox_tt_backend_func = {
+	.destroy = &vbox_ttm_backend_destroy,
+};
+
+static struct ttm_tt *vbox_ttm_tt_create(struct ttm_bo_device *bdev,
+					 unsigned long size,
+					 u32 page_flags,
+					 struct page *dummy_read_page)
+{
+	struct ttm_tt *tt;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt)
+		return NULL;
+
+	tt->func = &vbox_tt_backend_func;
+	if (ttm_tt_init(tt, bdev, size, page_flags, dummy_read_page)) {
+		kfree(tt);
+		return NULL;
+	}
+
+	return tt;
+}
+
+static int vbox_ttm_tt_populate(struct ttm_tt *ttm)
+{
+	return ttm_pool_populate(ttm);
+}
+
+static void vbox_ttm_tt_unpopulate(struct ttm_tt *ttm)
+{
+	ttm_pool_unpopulate(ttm);
+}
+
+struct ttm_bo_driver vbox_bo_driver = {
+	.ttm_tt_create = vbox_ttm_tt_create,
+	.ttm_tt_populate = vbox_ttm_tt_populate,
+	.ttm_tt_unpopulate = vbox_ttm_tt_unpopulate,
+	.init_mem_type = vbox_bo_init_mem_type,
+	.eviction_valuable = ttm_bo_eviction_valuable,
+	.evict_flags = vbox_bo_evict_flags,
+	.move = vbox_bo_move,
+	.verify_access = vbox_bo_verify_access,
+	.io_mem_reserve = &vbox_ttm_io_mem_reserve,
+	.io_mem_free = &vbox_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
+};
+
+int vbox_mm_init(struct vbox_private *vbox)
+{
+	int ret;
+	struct drm_device *dev = vbox->dev;
+	struct ttm_bo_device *bdev = &vbox->ttm.bdev;
+
+	ret = vbox_ttm_global_init(vbox);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_device_init(&vbox->ttm.bdev,
+				 vbox->ttm.bo_global_ref.ref.object,
+				 &vbox_bo_driver,
+				 dev->anon_inode->i_mapping,
+				 DRM_FILE_PAGE_OFFSET, true);
+	if (ret) {
+		DRM_ERROR("Error initialising bo driver; %d\n", ret);
+		goto err_ttm_global_release;
+	}
+
+	ret = ttm_bo_init_mm(bdev, TTM_PL_VRAM,
+			     vbox->available_vram_size >> PAGE_SHIFT);
+	if (ret) {
+		DRM_ERROR("Failed ttm VRAM init: %d\n", ret);
+		goto err_device_release;
+	}
+
+#ifdef DRM_MTRR_WC
+	vbox->fb_mtrr = drm_mtrr_add(pci_resource_start(dev->pdev, 0),
+				     pci_resource_len(dev->pdev, 0),
+				     DRM_MTRR_WC);
+#else
+	vbox->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
+					 pci_resource_len(dev->pdev, 0));
+#endif
+	return 0;
+
+err_device_release:
+	ttm_bo_device_release(&vbox->ttm.bdev);
+err_ttm_global_release:
+	vbox_ttm_global_release(vbox);
+	return ret;
+}
+
+void vbox_mm_fini(struct vbox_private *vbox)
+{
+#ifdef DRM_MTRR_WC
+	drm_mtrr_del(vbox->fb_mtrr,
+		     pci_resource_start(vbox->dev->pdev, 0),
+		     pci_resource_len(vbox->dev->pdev, 0), DRM_MTRR_WC);
+#else
+	arch_phys_wc_del(vbox->fb_mtrr);
+#endif
+	ttm_bo_device_release(&vbox->ttm.bdev);
+	vbox_ttm_global_release(vbox);
+}
+
+void vbox_ttm_placement(struct vbox_bo *bo, int domain)
+{
+	unsigned int i;
+	u32 c = 0;
+
+	bo->placement.placement = bo->placements;
+	bo->placement.busy_placement = bo->placements;
+
+	if (domain & TTM_PL_FLAG_VRAM)
+		bo->placements[c++].flags =
+		    TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
+	if (domain & TTM_PL_FLAG_SYSTEM)
+		bo->placements[c++].flags =
+		    TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+	if (!c)
+		bo->placements[c++].flags =
+		    TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+
+	bo->placement.num_placement = c;
+	bo->placement.num_busy_placement = c;
+
+	for (i = 0; i < c; ++i) {
+		bo->placements[i].fpfn = 0;
+		bo->placements[i].lpfn = 0;
+	}
+}
+
+int vbox_bo_create(struct drm_device *dev, int size, int align,
+		   u32 flags, struct vbox_bo **pvboxbo)
+{
+	struct vbox_private *vbox = dev->dev_private;
+	struct vbox_bo *vboxbo;
+	size_t acc_size;
+	int ret;
+
+	vboxbo = kzalloc(sizeof(*vboxbo), GFP_KERNEL);
+	if (!vboxbo)
+		return -ENOMEM;
+
+	ret = drm_gem_object_init(dev, &vboxbo->gem, size);
+	if (ret)
+		goto err_free_vboxbo;
+
+	vboxbo->bo.bdev = &vbox->ttm.bdev;
+
+	vbox_ttm_placement(vboxbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
+
+	acc_size = ttm_bo_dma_acc_size(&vbox->ttm.bdev, size,
+				       sizeof(struct vbox_bo));
+
+	ret = ttm_bo_init(&vbox->ttm.bdev, &vboxbo->bo, size,
+			  ttm_bo_type_device, &vboxbo->placement,
+			  align >> PAGE_SHIFT, false, NULL, acc_size,
+			  NULL, NULL, vbox_bo_ttm_destroy);
+	if (ret)
+		goto err_free_vboxbo;
+
+	*pvboxbo = vboxbo;
+
+	return 0;
+
+err_free_vboxbo:
+	kfree(vboxbo);
+	return ret;
+}
+
+static inline u64 vbox_bo_gpu_offset(struct vbox_bo *bo)
+{
+	return bo->bo.offset;
+}
+
+int vbox_bo_pin(struct vbox_bo *bo, u32 pl_flag, u64 *gpu_addr)
+{
+	int i, ret;
+
+	if (bo->pin_count) {
+		bo->pin_count++;
+		if (gpu_addr)
+			*gpu_addr = vbox_bo_gpu_offset(bo);
+
+		return 0;
+	}
+
+	vbox_ttm_placement(bo, pl_flag);
+
+	for (i = 0; i < bo->placement.num_placement; i++)
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
+
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
+	if (ret)
+		return ret;
+
+	bo->pin_count = 1;
+
+	if (gpu_addr)
+		*gpu_addr = vbox_bo_gpu_offset(bo);
+
+	return 0;
+}
+
+int vbox_bo_unpin(struct vbox_bo *bo)
+{
+	int i, ret;
+
+	if (!bo->pin_count) {
+		DRM_ERROR("unpin bad %p\n", bo);
+		return 0;
+	}
+	bo->pin_count--;
+	if (bo->pin_count)
+		return 0;
+
+	for (i = 0; i < bo->placement.num_placement; i++)
+		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
+
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Move a vbox-owned buffer object to system memory if no one else has it
+ * pinned.  The caller must have pinned it previously, and this call will
+ * release the caller's pin.
+ */
+int vbox_bo_push_sysram(struct vbox_bo *bo)
+{
+	int i, ret;
+
+	if (!bo->pin_count) {
+		DRM_ERROR("unpin bad %p\n", bo);
+		return 0;
+	}
+	bo->pin_count--;
+	if (bo->pin_count)
+		return 0;
+
+	if (bo->kmap.virtual)
+		ttm_bo_kunmap(&bo->kmap);
+
+	vbox_ttm_placement(bo, TTM_PL_FLAG_SYSTEM);
+
+	for (i = 0; i < bo->placement.num_placement; i++)
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
+
+	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
+	if (ret) {
+		DRM_ERROR("pushing to VRAM failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+int vbox_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv;
+	struct vbox_private *vbox;
+
+	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
+		return -EINVAL;
+
+	file_priv = filp->private_data;
+	vbox = file_priv->minor->dev->dev_private;
+
+	return ttm_bo_mmap(filp, vma, &vbox->ttm.bdev);
+}
diff --git a/drivers/staging/vboxvideo/vboxvideo.h b/drivers/staging/vboxvideo/vboxvideo.h
new file mode 100644
index 0000000000000..d835d75d761c5
--- /dev/null
+++ b/drivers/staging/vboxvideo/vboxvideo.h
@@ -0,0 +1,491 @@
+/*
+ * Copyright (C) 2006-2016 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ */
+
+#ifndef __VBOXVIDEO_H__
+#define __VBOXVIDEO_H__
+
+/*
+ * This should be in sync with monitorCount <xsd:maxInclusive value="64"/> in
+ * src/VBox/Main/xml/VirtualBox-settings-common.xsd
+ */
+#define VBOX_VIDEO_MAX_SCREENS 64
+
+/*
+ * The last 4096 bytes of the guest VRAM contains the generic info for all
+ * DualView chunks: sizes and offsets of chunks. This is filled by miniport.
+ *
+ * Last 4096 bytes of each chunk contain chunk specific data: framebuffer info,
+ * etc. This is used exclusively by the corresponding instance of a display
+ * driver.
+ *
+ * The VRAM layout:
+ *   Last 4096 bytes - Adapter information area.
+ *   4096 bytes aligned miniport heap (value specified in the config rouded up).
+ *   Slack - what left after dividing the VRAM.
+ *   4096 bytes aligned framebuffers:
+ *     last 4096 bytes of each framebuffer is the display information area.
+ *
+ * The Virtual Graphics Adapter information in the guest VRAM is stored by the
+ * guest video driver using structures prepended by VBOXVIDEOINFOHDR.
+ *
+ * When the guest driver writes dword 0 to the VBE_DISPI_INDEX_VBOX_VIDEO
+ * the host starts to process the info. The first element at the start of
+ * the 4096 bytes region should be normally be a LINK that points to
+ * actual information chain. That way the guest driver can have some
+ * fixed layout of the information memory block and just rewrite
+ * the link to point to relevant memory chain.
+ *
+ * The processing stops at the END element.
+ *
+ * The host can access the memory only when the port IO is processed.
+ * All data that will be needed later must be copied from these 4096 bytes.
+ * But other VRAM can be used by host until the mode is disabled.
+ *
+ * The guest driver writes dword 0xffffffff to the VBE_DISPI_INDEX_VBOX_VIDEO
+ * to disable the mode.
+ *
+ * VBE_DISPI_INDEX_VBOX_VIDEO is used to read the configuration information
+ * from the host and issue commands to the host.
+ *
+ * The guest writes the VBE_DISPI_INDEX_VBOX_VIDEO index register, the the
+ * following operations with the VBE data register can be performed:
+ *
+ * Operation            Result
+ * write 16 bit value   NOP
+ * read 16 bit value    count of monitors
+ * write 32 bit value   set the vbox cmd value and the cmd processed by the host
+ * read 32 bit value    result of the last vbox command is returned
+ */
+
+/**
+ * VBVA command header.
+ *
+ * @todo Where does this fit in?
+ */
+struct vbva_cmd_hdr {
+   /** Coordinates of affected rectangle. */
+	s16 x;
+	s16 y;
+	u16 w;
+	u16 h;
+} __packed;
+
+/** @name VBVA ring defines.
+ *
+ * The VBVA ring buffer is suitable for transferring large (< 2GB) amount of
+ * data. For example big bitmaps which do not fit to the buffer.
+ *
+ * Guest starts writing to the buffer by initializing a record entry in the
+ * records queue. VBVA_F_RECORD_PARTIAL indicates that the record is being
+ * written. As data is written to the ring buffer, the guest increases
+ * free_offset.
+ *
+ * The host reads the records on flushes and processes all completed records.
+ * When host encounters situation when only a partial record presents and
+ * len_and_flags & ~VBVA_F_RECORD_PARTIAL >= VBVA_RING_BUFFER_SIZE -
+ * VBVA_RING_BUFFER_THRESHOLD, the host fetched all record data and updates
+ * data_offset. After that on each flush the host continues fetching the data
+ * until the record is completed.
+ *
+ */
+#define VBVA_RING_BUFFER_SIZE        (4194304 - 1024)
+#define VBVA_RING_BUFFER_THRESHOLD   (4096)
+
+#define VBVA_MAX_RECORDS (64)
+
+#define VBVA_F_MODE_ENABLED         0x00000001u
+#define VBVA_F_MODE_VRDP            0x00000002u
+#define VBVA_F_MODE_VRDP_RESET      0x00000004u
+#define VBVA_F_MODE_VRDP_ORDER_MASK 0x00000008u
+
+#define VBVA_F_STATE_PROCESSING     0x00010000u
+
+#define VBVA_F_RECORD_PARTIAL       0x80000000u
+
+/**
+ * VBVA record.
+ */
+struct vbva_record {
+	/** The length of the record. Changed by guest. */
+	u32 len_and_flags;
+} __packed;
+
+/*
+ * The minimum HGSMI heap size is PAGE_SIZE (4096 bytes) and is a restriction of
+ * the runtime heapsimple API. Use minimum 2 pages here, because the info area
+ * also may contain other data (for example hgsmi_host_flags structure).
+ */
+#define VBVA_ADAPTER_INFORMATION_SIZE 65536
+#define VBVA_MIN_BUFFER_SIZE          65536
+
+/* The value for port IO to let the adapter to interpret the adapter memory. */
+#define VBOX_VIDEO_DISABLE_ADAPTER_MEMORY        0xFFFFFFFF
+
+/* The value for port IO to let the adapter to interpret the adapter memory. */
+#define VBOX_VIDEO_INTERPRET_ADAPTER_MEMORY      0x00000000
+
+/* The value for port IO to let the adapter to interpret the display memory.
+ * The display number is encoded in low 16 bits.
+ */
+#define VBOX_VIDEO_INTERPRET_DISPLAY_MEMORY_BASE 0x00010000
+
+struct vbva_host_flags {
+	u32 host_events;
+	u32 supported_orders;
+} __packed;
+
+struct vbva_buffer {
+	struct vbva_host_flags host_flags;
+
+	/* The offset where the data start in the buffer. */
+	u32 data_offset;
+	/* The offset where next data must be placed in the buffer. */
+	u32 free_offset;
+
+	/* The queue of record descriptions. */
+	struct vbva_record records[VBVA_MAX_RECORDS];
+	u32 record_first_index;
+	u32 record_free_index;
+
+	/* Space to leave free when large partial records are transferred. */
+	u32 partial_write_tresh;
+
+	u32 data_len;
+	/* variable size for the rest of the vbva_buffer area in VRAM. */
+	u8 data[0];
+} __packed;
+
+#define VBVA_MAX_RECORD_SIZE (128 * 1024 * 1024)
+
+/* guest->host commands */
+#define VBVA_QUERY_CONF32			 1
+#define VBVA_SET_CONF32				 2
+#define VBVA_INFO_VIEW				 3
+#define VBVA_INFO_HEAP				 4
+#define VBVA_FLUSH				 5
+#define VBVA_INFO_SCREEN			 6
+#define VBVA_ENABLE				 7
+#define VBVA_MOUSE_POINTER_SHAPE		 8
+/* informs host about HGSMI caps. see vbva_caps below */
+#define VBVA_INFO_CAPS				12
+/* configures scanline, see VBVASCANLINECFG below */
+#define VBVA_SCANLINE_CFG			13
+/* requests scanline info, see VBVASCANLINEINFO below */
+#define VBVA_SCANLINE_INFO			14
+/* inform host about VBVA Command submission */
+#define VBVA_CMDVBVA_SUBMIT			16
+/* inform host about VBVA Command submission */
+#define VBVA_CMDVBVA_FLUSH			17
+/* G->H DMA command */
+#define VBVA_CMDVBVA_CTL			18
+/* Query most recent mode hints sent */
+#define VBVA_QUERY_MODE_HINTS			19
+/**
+ * Report the guest virtual desktop position and size for mapping host and
+ * guest pointer positions.
+ */
+#define VBVA_REPORT_INPUT_MAPPING		20
+/** Report the guest cursor position and query the host position. */
+#define VBVA_CURSOR_POSITION			21
+
+/* host->guest commands */
+#define VBVAHG_EVENT				1
+#define VBVAHG_DISPLAY_CUSTOM			2
+
+/* vbva_conf32::index */
+#define VBOX_VBVA_CONF32_MONITOR_COUNT		0
+#define VBOX_VBVA_CONF32_HOST_HEAP_SIZE		1
+/**
+ * Returns VINF_SUCCESS if the host can report mode hints via VBVA.
+ * Set value to VERR_NOT_SUPPORTED before calling.
+ */
+#define VBOX_VBVA_CONF32_MODE_HINT_REPORTING	2
+/**
+ * Returns VINF_SUCCESS if the host can report guest cursor enabled status via
+ * VBVA.  Set value to VERR_NOT_SUPPORTED before calling.
+ */
+#define VBOX_VBVA_CONF32_GUEST_CURSOR_REPORTING	3
+/**
+ * Returns the currently available host cursor capabilities.  Available if
+ * vbva_conf32::VBOX_VBVA_CONF32_GUEST_CURSOR_REPORTING returns success.
+ * @see VMMDevReqMouseStatus::mouseFeatures.
+ */
+#define VBOX_VBVA_CONF32_CURSOR_CAPABILITIES	4
+/** Returns the supported flags in vbva_infoscreen::flags. */
+#define VBOX_VBVA_CONF32_SCREEN_FLAGS		5
+/** Returns the max size of VBVA record. */
+#define VBOX_VBVA_CONF32_MAX_RECORD_SIZE	6
+
+struct vbva_conf32 {
+	u32 index;
+	u32 value;
+} __packed;
+
+/** Reserved for historical reasons. */
+#define VBOX_VBVA_CURSOR_CAPABILITY_RESERVED0   BIT(0)
+/**
+ * Guest cursor capability: can the host show a hardware cursor at the host
+ * pointer location?
+ */
+#define VBOX_VBVA_CURSOR_CAPABILITY_HARDWARE    BIT(1)
+/** Reserved for historical reasons. */
+#define VBOX_VBVA_CURSOR_CAPABILITY_RESERVED2   BIT(2)
+/** Reserved for historical reasons.  Must always be unset. */
+#define VBOX_VBVA_CURSOR_CAPABILITY_RESERVED3   BIT(3)
+/** Reserved for historical reasons. */
+#define VBOX_VBVA_CURSOR_CAPABILITY_RESERVED4   BIT(4)
+/** Reserved for historical reasons. */
+#define VBOX_VBVA_CURSOR_CAPABILITY_RESERVED5   BIT(5)
+
+struct vbva_infoview {
+	/* Index of the screen, assigned by the guest. */
+	u32 view_index;
+
+	/* The screen offset in VRAM, the framebuffer starts here. */
+	u32 view_offset;
+
+	/* The size of the VRAM memory that can be used for the view. */
+	u32 view_size;
+
+	/* The recommended maximum size of the VRAM memory for the screen. */
+	u32 max_screen_size;
+} __packed;
+
+struct vbva_flush {
+	u32 reserved;
+} __packed;
+
+/* vbva_infoscreen::flags */
+#define VBVA_SCREEN_F_NONE			0x0000
+#define VBVA_SCREEN_F_ACTIVE			0x0001
+/**
+ * The virtual monitor has been disabled by the guest and should be removed
+ * by the host and ignored for purposes of pointer position calculation.
+ */
+#define VBVA_SCREEN_F_DISABLED			0x0002
+/**
+ * The virtual monitor has been blanked by the guest and should be blacked
+ * out by the host using width, height, etc values from the vbva_infoscreen
+ * request.
+ */
+#define VBVA_SCREEN_F_BLANK			0x0004
+/**
+ * The virtual monitor has been blanked by the guest and should be blacked
+ * out by the host using the previous mode values for width. height, etc.
+ */
+#define VBVA_SCREEN_F_BLANK2			0x0008
+
+struct vbva_infoscreen {
+	/* Which view contains the screen. */
+	u32 view_index;
+
+	/* Physical X origin relative to the primary screen. */
+	s32 origin_x;
+
+	/* Physical Y origin relative to the primary screen. */
+	s32 origin_y;
+
+	/* Offset of visible framebuffer relative to the framebuffer start. */
+	u32 start_offset;
+
+	/* The scan line size in bytes. */
+	u32 line_size;
+
+	/* Width of the screen. */
+	u32 width;
+
+	/* Height of the screen. */
+	u32 height;
+
+	/* Color depth. */
+	u16 bits_per_pixel;
+
+	/* VBVA_SCREEN_F_* */
+	u16 flags;
+} __packed;
+
+/* vbva_enable::flags */
+#define VBVA_F_NONE				0x00000000
+#define VBVA_F_ENABLE				0x00000001
+#define VBVA_F_DISABLE				0x00000002
+/* extended VBVA to be used with WDDM */
+#define VBVA_F_EXTENDED				0x00000004
+/* vbva offset is absolute VRAM offset */
+#define VBVA_F_ABSOFFSET			0x00000008
+
+struct vbva_enable {
+	u32 flags;
+	u32 offset;
+	s32 result;
+} __packed;
+
+struct vbva_enable_ex {
+	struct vbva_enable base;
+	u32 screen_id;
+} __packed;
+
+struct vbva_mouse_pointer_shape {
+	/* The host result. */
+	s32 result;
+
+	/* VBOX_MOUSE_POINTER_* bit flags. */
+	u32 flags;
+
+	/* X coordinate of the hot spot. */
+	u32 hot_X;
+
+	/* Y coordinate of the hot spot. */
+	u32 hot_y;
+
+	/* Width of the pointer in pixels. */
+	u32 width;
+
+	/* Height of the pointer in scanlines. */
+	u32 height;
+
+	/* Pointer data.
+	 *
+	 ****
+	 * The data consists of 1 bpp AND mask followed by 32 bpp XOR (color)
+	 * mask.
+	 *
+	 * For pointers without alpha channel the XOR mask pixels are 32 bit
+	 * values: (lsb)BGR0(msb). For pointers with alpha channel the XOR mask
+	 * consists of (lsb)BGRA(msb) 32 bit values.
+	 *
+	 * Guest driver must create the AND mask for pointers with alpha chan.,
+	 * so if host does not support alpha, the pointer could be displayed as
+	 * a normal color pointer. The AND mask can be constructed from alpha
+	 * values. For example alpha value >= 0xf0 means bit 0 in the AND mask.
+	 *
+	 * The AND mask is 1 bpp bitmap with byte aligned scanlines. Size of AND
+	 * mask, therefore, is and_len = (width + 7) / 8 * height. The padding
+	 * bits at the end of any scanline are undefined.
+	 *
+	 * The XOR mask follows the AND mask on the next 4 bytes aligned offset:
+	 * u8 *xor = and + (and_len + 3) & ~3
+	 * Bytes in the gap between the AND and the XOR mask are undefined.
+	 * XOR mask scanlines have no gap between them and size of XOR mask is:
+	 * xor_len = width * 4 * height.
+	 ****
+	 *
+	 * Preallocate 4 bytes for accessing actual data as p->data.
+	 */
+	u8 data[4];
+} __packed;
+
+/**
+ * @name vbva_mouse_pointer_shape::flags
+ * @note The VBOX_MOUSE_POINTER_* flags are used in the guest video driver,
+ *       values must be <= 0x8000 and must not be changed. (try make more sense
+ *       of this, please).
+ * @{
+ */
+
+/** pointer is visible */
+#define VBOX_MOUSE_POINTER_VISIBLE		0x0001
+/** pointer has alpha channel */
+#define VBOX_MOUSE_POINTER_ALPHA		0x0002
+/** pointerData contains new pointer shape */
+#define VBOX_MOUSE_POINTER_SHAPE		0x0004
+
+/** @} */
+
+/*
+ * The guest driver can handle asynch guest cmd completion by reading the
+ * command offset from io port.
+ */
+#define VBVACAPS_COMPLETEGCMD_BY_IOREAD		0x00000001
+/* the guest driver can handle video adapter IRQs */
+#define VBVACAPS_IRQ				0x00000002
+/** The guest can read video mode hints sent via VBVA. */
+#define VBVACAPS_VIDEO_MODE_HINTS		0x00000004
+/** The guest can switch to a software cursor on demand. */
+#define VBVACAPS_DISABLE_CURSOR_INTEGRATION	0x00000008
+/** The guest does not depend on host handling the VBE registers. */
+#define VBVACAPS_USE_VBVA_ONLY			0x00000010
+
+struct vbva_caps {
+	s32 rc;
+	u32 caps;
+} __packed;
+
+/** Query the most recent mode hints received from the host. */
+struct vbva_query_mode_hints {
+	/** The maximum number of screens to return hints for. */
+	u16 hints_queried_count;
+	/** The size of the mode hint structures directly following this one. */
+	u16 hint_structure_guest_size;
+	/** Return code for the operation. Initialise to VERR_NOT_SUPPORTED. */
+	s32 rc;
+} __packed;
+
+/**
+ * Structure in which a mode hint is returned. The guest allocates an array
+ * of these immediately after the vbva_query_mode_hints structure.
+ * To accommodate future extensions, the vbva_query_mode_hints structure
+ * specifies the size of the vbva_modehint structures allocated by the guest,
+ * and the host only fills out structure elements which fit into that size. The
+ * host should fill any unused members (e.g. dx, dy) or structure space on the
+ * end with ~0. The whole structure can legally be set to ~0 to skip a screen.
+ */
+struct vbva_modehint {
+	u32 magic;
+	u32 cx;
+	u32 cy;
+	u32 bpp;		/* Which has never been used... */
+	u32 display;
+	u32 dx;			/**< X offset into the virtual frame-buffer. */
+	u32 dy;			/**< Y offset into the virtual frame-buffer. */
+	u32 enabled;		/* Not flags. Add new members for new flags. */
+} __packed;
+
+#define VBVAMODEHINT_MAGIC 0x0801add9u
+
+/**
+ * Report the rectangle relative to which absolute pointer events should be
+ * expressed. This information remains valid until the next VBVA resize event
+ * for any screen, at which time it is reset to the bounding rectangle of all
+ * virtual screens and must be re-set.
+ * @see VBVA_REPORT_INPUT_MAPPING.
+ */
+struct vbva_report_input_mapping {
+	s32 x;	/**< Upper left X co-ordinate relative to the first screen. */
+	s32 y;	/**< Upper left Y co-ordinate relative to the first screen. */
+	u32 cx;	/**< Rectangle width. */
+	u32 cy;	/**< Rectangle height. */
+} __packed;
+
+/**
+ * Report the guest cursor position and query the host one. The host may wish
+ * to use the guest information to re-position its own cursor (though this is
+ * currently unlikely).
+ * @see VBVA_CURSOR_POSITION
+ */
+struct vbva_cursor_position {
+	u32 report_position;	/**< Are we reporting a position? */
+	u32 x;			/**< Guest cursor X position */
+	u32 y;			/**< Guest cursor Y position */
+} __packed;
+
+#endif
diff --git a/drivers/staging/vboxvideo/vboxvideo_guest.h b/drivers/staging/vboxvideo/vboxvideo_guest.h
new file mode 100644
index 0000000000000..d09da841711ae
--- /dev/null
+++ b/drivers/staging/vboxvideo/vboxvideo_guest.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2006-2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __VBOXVIDEO_GUEST_H__
+#define __VBOXVIDEO_GUEST_H__
+
+#include <linux/genalloc.h>
+#include "vboxvideo.h"
+
+/**
+ * Structure grouping the context needed for sending graphics acceleration
+ * information to the host via VBVA.  Each screen has its own VBVA buffer.
+ */
+struct vbva_buf_ctx {
+	/** Offset of the buffer in the VRAM section for the screen */
+	u32 buffer_offset;
+	/** Length of the buffer in bytes */
+	u32 buffer_length;
+	/** Set if we wrote to the buffer faster than the host could read it */
+	bool buffer_overflow;
+	/** VBVA record that we are currently preparing for the host, or NULL */
+	struct vbva_record *record;
+	/**
+	 * Pointer to the VBVA buffer mapped into the current address space.
+	 * Will be NULL if VBVA is not enabled.
+	 */
+	struct vbva_buffer *vbva;
+};
+
+/**
+ * @name Base HGSMI APIs
+ * @{
+ */
+int hgsmi_report_flags_location(struct gen_pool *ctx, u32 location);
+int hgsmi_send_caps_info(struct gen_pool *ctx, u32 caps);
+int hgsmi_test_query_conf(struct gen_pool *ctx);
+int hgsmi_query_conf(struct gen_pool *ctx, u32 index, u32 *value_ret);
+int hgsmi_update_pointer_shape(struct gen_pool *ctx, u32 flags,
+			       u32 hot_x, u32 hot_y, u32 width, u32 height,
+			       u8 *pixels, u32 len);
+int hgsmi_cursor_position(struct gen_pool *ctx, bool report_position,
+			  u32 x, u32 y, u32 *x_host, u32 *y_host);
+/** @}  */
+
+/**
+ * @name VBVA APIs
+ * @{
+ */
+bool vbva_enable(struct vbva_buf_ctx *vbva_ctx, struct gen_pool *ctx,
+		 struct vbva_buffer *vbva, s32 screen);
+void vbva_disable(struct vbva_buf_ctx *vbva_ctx, struct gen_pool *ctx,
+		  s32 screen);
+bool vbva_buffer_begin_update(struct vbva_buf_ctx *vbva_ctx,
+			      struct gen_pool *ctx);
+void vbva_buffer_end_update(struct vbva_buf_ctx *vbva_ctx);
+bool vbva_write(struct vbva_buf_ctx *vbva_ctx, struct gen_pool *ctx,
+		const void *p, u32 len);
+void vbva_setup_buffer_context(struct vbva_buf_ctx *vbva_ctx,
+			       u32 buffer_offset, u32 buffer_length);
+/** @}  */
+
+/**
+ * @name Modesetting APIs
+ * @{
+ */
+void hgsmi_process_display_info(struct gen_pool *ctx, u32 display,
+				s32 origin_x, s32 origin_y, u32 start_offset,
+				u32 pitch, u32 width, u32 height,
+				u16 bpp, u16 flags);
+int hgsmi_update_input_mapping(struct gen_pool *ctx, s32 origin_x, s32 origin_y,
+			       u32 width, u32 height);
+int hgsmi_get_mode_hints(struct gen_pool *ctx, unsigned int screens,
+			 struct vbva_modehint *hints);
+/** @}  */
+
+#endif
diff --git a/drivers/staging/vboxvideo/vboxvideo_vbe.h b/drivers/staging/vboxvideo/vboxvideo_vbe.h
new file mode 100644
index 0000000000000..f842f4d9c80a3
--- /dev/null
+++ b/drivers/staging/vboxvideo/vboxvideo_vbe.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2006-2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __VBOXVIDEO_VBE_H__
+#define __VBOXVIDEO_VBE_H__
+
+/* GUEST <-> HOST Communication API */
+
+/**
+ * @todo FIXME: Either dynamicly ask host for this or put somewhere high in
+ *              physical memory like 0xE0000000.
+ */
+
+#define VBE_DISPI_BANK_ADDRESS          0xA0000
+#define VBE_DISPI_BANK_SIZE_KB          64
+
+#define VBE_DISPI_MAX_XRES              16384
+#define VBE_DISPI_MAX_YRES              16384
+#define VBE_DISPI_MAX_BPP               32
+
+#define VBE_DISPI_IOPORT_INDEX          0x01CE
+#define VBE_DISPI_IOPORT_DATA           0x01CF
+
+#define VBE_DISPI_IOPORT_DAC_WRITE_INDEX  0x03C8
+#define VBE_DISPI_IOPORT_DAC_DATA         0x03C9
+
+#define VBE_DISPI_INDEX_ID              0x0
+#define VBE_DISPI_INDEX_XRES            0x1
+#define VBE_DISPI_INDEX_YRES            0x2
+#define VBE_DISPI_INDEX_BPP             0x3
+#define VBE_DISPI_INDEX_ENABLE          0x4
+#define VBE_DISPI_INDEX_BANK            0x5
+#define VBE_DISPI_INDEX_VIRT_WIDTH      0x6
+#define VBE_DISPI_INDEX_VIRT_HEIGHT     0x7
+#define VBE_DISPI_INDEX_X_OFFSET        0x8
+#define VBE_DISPI_INDEX_Y_OFFSET        0x9
+#define VBE_DISPI_INDEX_VBOX_VIDEO      0xa
+#define VBE_DISPI_INDEX_FB_BASE_HI      0xb
+
+#define VBE_DISPI_ID0                   0xB0C0
+#define VBE_DISPI_ID1                   0xB0C1
+#define VBE_DISPI_ID2                   0xB0C2
+#define VBE_DISPI_ID3                   0xB0C3
+#define VBE_DISPI_ID4                   0xB0C4
+
+#define VBE_DISPI_ID_VBOX_VIDEO         0xBE00
+/* The VBOX interface id. Indicates support for VBVA shared memory interface. */
+#define VBE_DISPI_ID_HGSMI              0xBE01
+#define VBE_DISPI_ID_ANYX               0xBE02
+
+#define VBE_DISPI_DISABLED              0x00
+#define VBE_DISPI_ENABLED               0x01
+#define VBE_DISPI_GETCAPS               0x02
+#define VBE_DISPI_8BIT_DAC              0x20
+/**
+ * @note this definition is a BOCHS legacy, used only in the video BIOS
+ * code and ignored by the emulated hardware.
+ */
+#define VBE_DISPI_LFB_ENABLED           0x40
+#define VBE_DISPI_NOCLEARMEM            0x80
+
+#define VGA_PORT_HGSMI_HOST             0x3b0
+#define VGA_PORT_HGSMI_GUEST            0x3d0
+
+#endif
diff --git a/drivers/staging/vboxvideo/vbva_base.c b/drivers/staging/vboxvideo/vbva_base.c
new file mode 100644
index 0000000000000..c10c782f94e14
--- /dev/null
+++ b/drivers/staging/vboxvideo/vbva_base.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2006-2017 Oracle Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "vbox_drv.h"
+#include "vbox_err.h"
+#include "vboxvideo_guest.h"
+#include "hgsmi_channels.h"
+
+/*
+ * There is a hardware ring buffer in the graphics device video RAM, formerly
+ * in the VBox VMMDev PCI memory space.
+ * All graphics commands go there serialized by vbva_buffer_begin_update.
+ * and vbva_buffer_end_update.
+ *
+ * free_offset is writing position. data_offset is reading position.
+ * free_offset == data_offset means buffer is empty.
+ * There must be always gap between data_offset and free_offset when data
+ * are in the buffer.
+ * Guest only changes free_offset, host changes data_offset.
+ */
+
+static u32 vbva_buffer_available(const struct vbva_buffer *vbva)
+{
+	s32 diff = vbva->data_offset - vbva->free_offset;
+
+	return diff > 0 ? diff : vbva->data_len + diff;
+}
+
+static void vbva_buffer_place_data_at(struct vbva_buf_ctx *vbva_ctx,
+				      const void *p, u32 len, u32 offset)
+{
+	struct vbva_buffer *vbva = vbva_ctx->vbva;
+	u32 bytes_till_boundary = vbva->data_len - offset;
+	u8 *dst = &vbva->data[offset];
+	s32 diff = len - bytes_till_boundary;
+
+	if (diff <= 0) {
+		/* Chunk will not cross buffer boundary. */
+		memcpy(dst, p, len);
+	} else {
+		/* Chunk crosses buffer boundary. */
+		memcpy(dst, p, bytes_till_boundary);
+		memcpy(&vbva->data[0], (u8 *)p + bytes_till_boundary, diff);
+	}
+}
+
+static void vbva_buffer_flush(struct gen_pool *ctx)
+{
+	struct vbva_flush *p;
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p), HGSMI_CH_VBVA, VBVA_FLUSH);
+	if (!p)
+		return;
+
+	p->reserved = 0;
+
+	hgsmi_buffer_submit(ctx, p);
+	hgsmi_buffer_free(ctx, p);
+}
+
+bool vbva_write(struct vbva_buf_ctx *vbva_ctx, struct gen_pool *ctx,
+		const void *p, u32 len)
+{
+	struct vbva_record *record;
+	struct vbva_buffer *vbva;
+	u32 available;
+
+	vbva = vbva_ctx->vbva;
+	record = vbva_ctx->record;
+
+	if (!vbva || vbva_ctx->buffer_overflow ||
+	    !record || !(record->len_and_flags & VBVA_F_RECORD_PARTIAL))
+		return false;
+
+	available = vbva_buffer_available(vbva);
+
+	while (len > 0) {
+		u32 chunk = len;
+
+		if (chunk >= available) {
+			vbva_buffer_flush(ctx);
+			available = vbva_buffer_available(vbva);
+		}
+
+		if (chunk >= available) {
+			if (WARN_ON(available <= vbva->partial_write_tresh)) {
+				vbva_ctx->buffer_overflow = true;
+				return false;
+			}
+			chunk = available - vbva->partial_write_tresh;
+		}
+
+		vbva_buffer_place_data_at(vbva_ctx, p, chunk,
+					  vbva->free_offset);
+
+		vbva->free_offset = (vbva->free_offset + chunk) %
+				    vbva->data_len;
+		record->len_and_flags += chunk;
+		available -= chunk;
+		len -= chunk;
+		p += chunk;
+	}
+
+	return true;
+}
+
+static bool vbva_inform_host(struct vbva_buf_ctx *vbva_ctx,
+			     struct gen_pool *ctx, s32 screen, bool enable)
+{
+	struct vbva_enable_ex *p;
+	bool ret;
+
+	p = hgsmi_buffer_alloc(ctx, sizeof(*p), HGSMI_CH_VBVA, VBVA_ENABLE);
+	if (!p)
+		return false;
+
+	p->base.flags = enable ? VBVA_F_ENABLE : VBVA_F_DISABLE;
+	p->base.offset = vbva_ctx->buffer_offset;
+	p->base.result = VERR_NOT_SUPPORTED;
+	if (screen >= 0) {
+		p->base.flags |= VBVA_F_EXTENDED | VBVA_F_ABSOFFSET;
+		p->screen_id = screen;
+	}
+
+	hgsmi_buffer_submit(ctx, p);
+
+	if (enable)
+		ret = RT_SUCCESS(p->base.result);
+	else
+		ret = true;
+
+	hgsmi_buffer_free(ctx, p);
+
+	return ret;
+}
+
+bool vbva_enable(struct vbva_buf_ctx *vbva_ctx, struct gen_pool *ctx,
+		 struct vbva_buffer *vbva, s32 screen)
+{
+	bool ret = false;
+
+	memset(vbva, 0, sizeof(*vbva));
+	vbva->partial_write_tresh = 256;
+	vbva->data_len = vbva_ctx->buffer_length - sizeof(struct vbva_buffer);
+	vbva_ctx->vbva = vbva;
+
+	ret = vbva_inform_host(vbva_ctx, ctx, screen, true);
+	if (!ret)
+		vbva_disable(vbva_ctx, ctx, screen);
+
+	return ret;
+}
+
+void vbva_disable(struct vbva_buf_ctx *vbva_ctx, struct gen_pool *ctx,
+		  s32 screen)
+{
+	vbva_ctx->buffer_overflow = false;
+	vbva_ctx->record = NULL;
+	vbva_ctx->vbva = NULL;
+
+	vbva_inform_host(vbva_ctx, ctx, screen, false);
+}
+
+bool vbva_buffer_begin_update(struct vbva_buf_ctx *vbva_ctx,
+			      struct gen_pool *ctx)
+{
+	struct vbva_record *record;
+	u32 next;
+
+	if (!vbva_ctx->vbva ||
+	    !(vbva_ctx->vbva->host_flags.host_events & VBVA_F_MODE_ENABLED))
+		return false;
+
+	WARN_ON(vbva_ctx->buffer_overflow || vbva_ctx->record);
+
+	next = (vbva_ctx->vbva->record_free_index + 1) % VBVA_MAX_RECORDS;
+
+	/* Flush if all slots in the records queue are used */
+	if (next == vbva_ctx->vbva->record_first_index)
+		vbva_buffer_flush(ctx);
+
+	/* If even after flush there is no place then fail the request */
+	if (next == vbva_ctx->vbva->record_first_index)
+		return false;
+
+	record = &vbva_ctx->vbva->records[vbva_ctx->vbva->record_free_index];
+	record->len_and_flags = VBVA_F_RECORD_PARTIAL;
+	vbva_ctx->vbva->record_free_index = next;
+	/* Remember which record we are using. */
+	vbva_ctx->record = record;
+
+	return true;
+}
+
+void vbva_buffer_end_update(struct vbva_buf_ctx *vbva_ctx)
+{
+	struct vbva_record *record = vbva_ctx->record;
+
+	WARN_ON(!vbva_ctx->vbva || !record ||
+		!(record->len_and_flags & VBVA_F_RECORD_PARTIAL));
+
+	/* Mark the record completed. */
+	record->len_and_flags &= ~VBVA_F_RECORD_PARTIAL;
+
+	vbva_ctx->buffer_overflow = false;
+	vbva_ctx->record = NULL;
+}
+
+void vbva_setup_buffer_context(struct vbva_buf_ctx *vbva_ctx,
+			       u32 buffer_offset, u32 buffer_length)
+{
+	vbva_ctx->buffer_offset = buffer_offset;
+	vbva_ctx->buffer_length = buffer_length;
+}
-- 
GitLab


From fe855789d605590e57f9cd968d85ecce46f5c3fd Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 12 Jul 2017 15:08:39 +0200
Subject: [PATCH 1711/1958] USB: cdc-acm: add device-id for quirky printer

Add device-id entry for DATECS FP-2000 fiscal printer needing the
NO_UNION_NORMAL quirk.

Reported-by: Anton Avramov <lukav@lukav.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 5357d83bbda2b..5e056064259c8 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1829,6 +1829,9 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x1576, 0x03b1), /* Maretron USB100 */
 	.driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
 	},
+	{ USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */
+	.driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
+	},
 
 	{ USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */
 	.driver_info = CLEAR_HALT_CONDITIONS,
-- 
GitLab


From 86be7f7b2d940ddc18143061e77989b017d93bf8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 30 Jun 2017 17:46:16 +0200
Subject: [PATCH 1712/1958] usb: typec: include linux/device.h in ucsi.h

The new driver causes a build failure in some configurations:

In file included from /git/arm-soc/drivers/usb/typec/ucsi/trace.h:9:0,
                 from /git/arm-soc/drivers/usb/typec/ucsi/trace.c:2:
drivers/usb/typec/ucsi/ucsi.h:331:39: error: 'struct device' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]

This includes the required header file.

Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h
index 6b0d2f0918c6d..8a88f45822e39 100644
--- a/drivers/usb/typec/ucsi/ucsi.h
+++ b/drivers/usb/typec/ucsi/ucsi.h
@@ -3,6 +3,7 @@
 #define __DRIVER_USB_TYPEC_UCSI_H
 
 #include <linux/bitops.h>
+#include <linux/device.h>
 #include <linux/types.h>
 
 /* -------------------------------------------------------------------------- */
-- 
GitLab


From 446230f52a5bef593554510302465eabab45a372 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 6 Jul 2017 16:06:32 +0100
Subject: [PATCH 1713/1958] usb: storage: return on error to avoid a null
 pointer dereference

When us->extra is null the driver is not initialized, however, a
later call to osd200_scsi_to_ata is made that dereferences
us->extra, causing a null pointer dereference.  The code
currently detects and reports that the driver is not initialized;
add a return to avoid the subsequent dereference issue in this
check.

Thanks to Alan Stern for pointing out that srb->result needs setting
to DID_ERROR << 16

Detected by CoverityScan, CID#100308 ("Dereference after null check")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: stable <stable@vger.kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/isd200.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
index fba4005dd737b..6a7720e665956 100644
--- a/drivers/usb/storage/isd200.c
+++ b/drivers/usb/storage/isd200.c
@@ -1529,8 +1529,11 @@ static void isd200_ata_command(struct scsi_cmnd *srb, struct us_data *us)
 
 	/* Make sure driver was initialized */
 
-	if (us->extra == NULL)
+	if (us->extra == NULL) {
 		usb_stor_dbg(us, "ERROR Driver not initialized\n");
+		srb->result = DID_ERROR << 16;
+		return;
+	}
 
 	scsi_set_resid(srb, 0);
 	/* scsi_bufflen might change in protocol translation to ata */
-- 
GitLab


From 2b01bfaeb41e1563322448d9b392ac924cbf22ef Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 17 Jul 2017 11:12:38 +0300
Subject: [PATCH 1714/1958] serial: st-asc: Potential error pointer dereference

It looks like we intended to return an error code here, because we
dereference "ascport->pinctrl" on the next lines.

Fixes: 6929cb00a501 ("serial: st-asc: Read in all Pinctrl states")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/st-asc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index f5335be344f67..6b0ca65027d00 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -758,6 +758,7 @@ static int asc_init_port(struct asc_port *ascport,
 	if (IS_ERR(ascport->pinctrl)) {
 		ret = PTR_ERR(ascport->pinctrl);
 		dev_err(&pdev->dev, "Failed to get Pinctrl: %d\n", ret);
+		return ret;
 	}
 
 	ascport->states[DEFAULT] =
-- 
GitLab


From 4ab3c51e0540ba8464fe34d84cc35821bb77ae92 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 17 Jul 2017 11:34:23 +0300
Subject: [PATCH 1715/1958] serial: sh-sci: Uninitialized variables in sysfs
 files

The kstrtol() function returns -ERANGE as well as -EINVAL so these tests
are not enough.  It's not a super serious bug, but my static checker
correctly complains that the "r" variable might be used uninitialized.

Fixes: 5d23188a473d ("serial: sh-sci: make RX FIFO parameters tunable via sysfs")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index da5ddfc147788..e08b16b070c0f 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1085,10 +1085,12 @@ static ssize_t rx_trigger_store(struct device *dev,
 {
 	struct uart_port *port = dev_get_drvdata(dev);
 	struct sci_port *sci = to_sci_port(port);
+	int ret;
 	long r;
 
-	if (kstrtol(buf, 0, &r) == -EINVAL)
-		return -EINVAL;
+	ret = kstrtol(buf, 0, &r);
+	if (ret)
+		return ret;
 
 	sci->rx_trigger = scif_set_rtrg(port, r);
 	if (port->type == PORT_SCIFA || port->type == PORT_SCIFB)
@@ -1116,10 +1118,12 @@ static ssize_t rx_fifo_timeout_store(struct device *dev,
 {
 	struct uart_port *port = dev_get_drvdata(dev);
 	struct sci_port *sci = to_sci_port(port);
+	int ret;
 	long r;
 
-	if (kstrtol(buf, 0, &r) == -EINVAL)
-		return -EINVAL;
+	ret = kstrtol(buf, 0, &r);
+	if (ret)
+		return ret;
 	sci->rx_fifo_timeout = r;
 	scif_set_rtrg(port, 1);
 	if (r > 0)
-- 
GitLab


From f55ce7b024090a51382ccab2730b96e2f7b4e9cf Mon Sep 17 00:00:00 2001
From: Mateusz Jurczyk <mjurczyk@google.com>
Date: Wed, 7 Jun 2017 15:50:38 +0200
Subject: [PATCH 1716/1958] netfilter: nfnetlink: Improve input length
 sanitization in nfnetlink_rcv

Verify that the length of the socket buffer is sufficient to cover the
nlmsghdr structure before accessing the nlh->nlmsg_len field for further
input sanitization. If the client only supplies 1-3 bytes of data in
sk_buff, then nlh->nlmsg_len remains partially uninitialized and
contains leftover memory from the corresponding kernel allocation.
Operating on such data may result in indeterminate evaluation of the
nlmsg_len < NLMSG_HDRLEN expression.

The bug was discovered by a runtime instrumentation designed to detect
use of uninitialized memory in the kernel. The patch prevents this and
other similar tools (e.g. KMSAN) from flagging this behavior in the future.

Signed-off-by: Mateusz Jurczyk <mjurczyk@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 92b05e188fd1f..733d3e4a30d85 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -472,8 +472,7 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (msglen > skb->len)
 		msglen = skb->len;
 
-	if (nlh->nlmsg_len < NLMSG_HDRLEN ||
-	    skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
+	if (skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
 		return;
 
 	err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy,
@@ -500,7 +499,8 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 
-	if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+	if (skb->len < NLMSG_HDRLEN ||
+	    nlh->nlmsg_len < NLMSG_HDRLEN ||
 	    skb->len < nlh->nlmsg_len)
 		return;
 
-- 
GitLab


From a70b487b07cf4201bc6702e7f646fa593b23009f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 17 Jul 2017 21:19:01 +1000
Subject: [PATCH 1717/1958] powerpc/powernv: Fix boot on Power8 bare metal due
 to opal_configure_cores()

In commit 1c0eaf0f56d6 ("powerpc/powernv: Tell OPAL about our MMU mode
on POWER9"), we added additional flags to the OPAL call to configure
CPUs at boot.

These flags only work on Power9 firmwares, and worse can cause boot
failures on Power8 machines, so we check for CPU_FTR_ARCH_300 (aka POWER9)
before adding the extra flags.

Unfortunately we forgot that opal_configure_cores() is called before
the CPU feature checks are dynamically patched, meaning the check
always returns true.

We definitely need to do something to make the CPU feature checks less
prone to bugs like this, but for now the minimal fix is to use
early_cpu_has_feature().

Reported-and-tested-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Fixes: 1c0eaf0f56d6 ("powerpc/powernv: Tell OPAL about our MMU mode on POWER9")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 9b87abb178f02..cad6b57ce494b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -78,7 +78,7 @@ void opal_configure_cores(void)
 	 *  ie. Host hash  supports  hash guests
 	 *      Host radix supports  hash/radix guests
 	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+	if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
 		reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
 		if (early_radix_enabled())
 			reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
-- 
GitLab


From 351ea50df545a4acaf8f61784949ceedbe923f03 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 17 Jul 2017 13:48:58 +0200
Subject: [PATCH 1718/1958] Revert "serial: imx-serial - move DMA buffer
 configuration to DT"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit a3015affdf76ef279fbbb3710a220bab7e9ea04b as there
are complaints that it is incorrect.

Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Nandor Han <nandor.han@ge.com>
Cc: Romain Perier <romain.perier@collabora.com>
Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
---
 .../bindings/serial/fsl-imx-uart.txt          |  2 --
 drivers/tty/serial/imx.c                      | 25 ++++++-------------
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt b/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
index e6b572409cf50..574c3a2c77d5c 100644
--- a/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
+++ b/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
@@ -9,7 +9,6 @@ Optional properties:
 - fsl,irda-mode : Indicate the uart supports irda mode
 - fsl,dte-mode : Indicate the uart works in DTE mode. The uart works
                   in DCE mode by default.
-- fsl,dma-size : Indicate the size of the DMA buffer and its periods
 
 Please check Documentation/devicetree/bindings/serial/serial.txt
 for the complete list of generic properties.
@@ -29,5 +28,4 @@ uart1: serial@73fbc000 {
 	interrupts = <31>;
 	uart-has-rtscts;
 	fsl,dte-mode;
-	fsl,dma-size = <1024 4>;
 };
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 9e3162bf3bd12..e33da75ceac57 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -186,11 +186,6 @@
 
 #define UART_NR 8
 
-/* RX DMA buffer periods */
-#define RX_DMA_PERIODS 4
-#define RX_BUF_SIZE	(PAGE_SIZE)
-
-
 /* i.MX21 type uart runs on all i.mx except i.MX1 and i.MX6q */
 enum imx_uart_type {
 	IMX1_UART,
@@ -226,7 +221,6 @@ struct imx_port {
 	struct dma_chan		*dma_chan_rx, *dma_chan_tx;
 	struct scatterlist	rx_sgl, tx_sgl[2];
 	void			*rx_buf;
-	unsigned int		rx_buf_size;
 	struct circ_buf		rx_ring;
 	unsigned int		rx_periods;
 	dma_cookie_t		rx_cookie;
@@ -967,6 +961,8 @@ static void imx_timeout(unsigned long data)
 	}
 }
 
+#define RX_BUF_SIZE	(PAGE_SIZE)
+
 /*
  * There are two kinds of RX DMA interrupts(such as in the MX6Q):
  *   [1] the RX DMA buffer is full.
@@ -1049,6 +1045,9 @@ static void dma_rx_callback(void *data)
 	}
 }
 
+/* RX DMA buffer periods */
+#define RX_DMA_PERIODS 4
+
 static int start_rx_dma(struct imx_port *sport)
 {
 	struct scatterlist *sgl = &sport->rx_sgl;
@@ -1059,8 +1058,9 @@ static int start_rx_dma(struct imx_port *sport)
 
 	sport->rx_ring.head = 0;
 	sport->rx_ring.tail = 0;
+	sport->rx_periods = RX_DMA_PERIODS;
 
-	sg_init_one(sgl, sport->rx_buf, sport->rx_buf_size);
+	sg_init_one(sgl, sport->rx_buf, RX_BUF_SIZE);
 	ret = dma_map_sg(dev, sgl, 1, DMA_FROM_DEVICE);
 	if (ret == 0) {
 		dev_err(dev, "DMA mapping error for RX.\n");
@@ -1171,7 +1171,7 @@ static int imx_uart_dma_init(struct imx_port *sport)
 		goto err;
 	}
 
-	sport->rx_buf = kzalloc(sport->rx_buf_size, GFP_KERNEL);
+	sport->rx_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!sport->rx_buf) {
 		ret = -ENOMEM;
 		goto err;
@@ -2036,7 +2036,6 @@ static int serial_imx_probe_dt(struct imx_port *sport,
 {
 	struct device_node *np = pdev->dev.of_node;
 	int ret;
-	u32 dma_buf_size[2];
 
 	sport->devdata = of_device_get_match_data(&pdev->dev);
 	if (!sport->devdata)
@@ -2060,14 +2059,6 @@ static int serial_imx_probe_dt(struct imx_port *sport,
 	if (of_get_property(np, "rts-gpios", NULL))
 		sport->have_rtsgpio = 1;
 
-	if (!of_property_read_u32_array(np, "fsl,dma-size", dma_buf_size, 2)) {
-		sport->rx_buf_size = dma_buf_size[0] * dma_buf_size[1];
-		sport->rx_periods = dma_buf_size[1];
-	} else {
-		sport->rx_buf_size = RX_BUF_SIZE;
-		sport->rx_periods = RX_DMA_PERIODS;
-	}
-
 	return 0;
 }
 #else
-- 
GitLab


From 514ab34dbad6c6fa824a1f56984c196e59082346 Mon Sep 17 00:00:00 2001
From: Ian Jamison <ian.dev@arkver.com>
Date: Fri, 14 Jul 2017 17:31:57 +0100
Subject: [PATCH 1719/1958] serial: imx: Prevent TX buffer PIO write when a DMA
 has been started

Function imx_transmit_buffer starts a TX DMA if DMA is enabled, since
commit 91a1a909f921 ("serial: imx: Support sw flow control in DMA mode").
It also carries on and attempts to write the same TX buffer using PIO.
This results in TX data corruption and double-incrementing xmit->tail
with the knock-on effect of tail passing head and a page of garbage
being sent out.

This seems to be triggered mostly when using RS485 half duplex on SMP
systems, but is probably not limited to just those.

Tested locally on an i.MX6Q with an RS485 half duplex transceiver on
UART3, and also by Clemens Gruber.

Tested-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Signed-off-by: Ian Jamison <ian.dev@arkver.com>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/imx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index e33da75ceac57..80934e7bd67f4 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -458,7 +458,7 @@ static inline void imx_transmit_buffer(struct imx_port *sport)
 		}
 	}
 
-	while (!uart_circ_empty(xmit) &&
+	while (!uart_circ_empty(xmit) && !sport->dma_is_txing &&
 	       !(readl(sport->port.membase + uts_reg(sport)) & UTS_TXFULL)) {
 		/* send xmit->buf[xmit->tail]
 		 * out the port here */
-- 
GitLab


From 3ee5447e8cd9a65d08fbb49fa9767cbf7fef6d91 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Tue, 11 Jul 2017 08:03:43 -0300
Subject: [PATCH 1720/1958] tty: serial: lpuart: Fix the logic for detecting
 the 32-bit type UART

Commit 0d6fce904452 ("tty: serial: lpuart: introduce lpuart_soc_data to
represent SoC property") introduced a buggy logic for detecting the 32-bit
type UART since the condition: "if (sport->port.iotype & UPIO_MEM32BE)"
is always true.

Performing such bitfield AND operation is not correct, because in the
case of Vybrid UART iotype is UPIO_MEM (2), so:

UPIO_MEM & UPIO_MEM32BE = 010 & 110 = 010, which is true.

Such logic tells the driver to always treat the UART operations as 32-bit,
leading to the driver misbehavior on Vybrid.

Fix the 32-bit type detection logic to avoid UART breakage on Vybrid.

While at it, introduce a lpuart_is_32() function to help readability.

Fixes: 0d6fce904452 ("tty: serial: lpuart: introduce lpuart_soc_data to represent SoC property")
Reported-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Tested-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 343de8c384b02..898dcb091a279 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -619,6 +619,12 @@ static unsigned int lpuart32_tx_empty(struct uart_port *port)
 		TIOCSER_TEMT : 0;
 }
 
+static bool lpuart_is_32(struct lpuart_port *sport)
+{
+	return sport->port.iotype == UPIO_MEM32 ||
+	       sport->port.iotype ==  UPIO_MEM32BE;
+}
+
 static irqreturn_t lpuart_txint(int irq, void *dev_id)
 {
 	struct lpuart_port *sport = dev_id;
@@ -627,7 +633,7 @@ static irqreturn_t lpuart_txint(int irq, void *dev_id)
 
 	spin_lock_irqsave(&sport->port.lock, flags);
 	if (sport->port.x_char) {
-		if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE))
+		if (lpuart_is_32(sport))
 			lpuart32_write(&sport->port, sport->port.x_char, UARTDATA);
 		else
 			writeb(sport->port.x_char, sport->port.membase + UARTDR);
@@ -635,14 +641,14 @@ static irqreturn_t lpuart_txint(int irq, void *dev_id)
 	}
 
 	if (uart_circ_empty(xmit) || uart_tx_stopped(&sport->port)) {
-		if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE))
+		if (lpuart_is_32(sport))
 			lpuart32_stop_tx(&sport->port);
 		else
 			lpuart_stop_tx(&sport->port);
 		goto out;
 	}
 
-	if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE))
+	if (lpuart_is_32(sport))
 		lpuart32_transmit_buffer(sport);
 	else
 		lpuart_transmit_buffer(sport);
@@ -1978,12 +1984,12 @@ static int __init lpuart_console_setup(struct console *co, char *options)
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
 	else
-		if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE))
+		if (lpuart_is_32(sport))
 			lpuart32_console_get_options(sport, &baud, &parity, &bits);
 		else
 			lpuart_console_get_options(sport, &baud, &parity, &bits);
 
-	if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE))
+	if (lpuart_is_32(sport))
 		lpuart32_setup_watermark(sport);
 	else
 		lpuart_setup_watermark(sport);
@@ -2118,7 +2124,7 @@ static int lpuart_probe(struct platform_device *pdev)
 	}
 	sport->port.irq = ret;
 	sport->port.iotype = sdata->iotype;
-	if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE))
+	if (lpuart_is_32(sport))
 		sport->port.ops = &lpuart32_pops;
 	else
 		sport->port.ops = &lpuart_pops;
@@ -2145,7 +2151,7 @@ static int lpuart_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, &sport->port);
 
-	if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE))
+	if (lpuart_is_32(sport))
 		lpuart_reg.cons = LPUART32_CONSOLE;
 	else
 		lpuart_reg.cons = LPUART_CONSOLE;
@@ -2198,7 +2204,7 @@ static int lpuart_suspend(struct device *dev)
 	struct lpuart_port *sport = dev_get_drvdata(dev);
 	unsigned long temp;
 
-	if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE)) {
+	if (lpuart_is_32(sport)) {
 		/* disable Rx/Tx and interrupts */
 		temp = lpuart32_read(&sport->port, UARTCTRL);
 		temp &= ~(UARTCTRL_TE | UARTCTRL_TIE | UARTCTRL_TCIE);
@@ -2249,7 +2255,7 @@ static int lpuart_resume(struct device *dev)
 	if (sport->port.suspended && !sport->port.irq_wake)
 		clk_prepare_enable(sport->clk);
 
-	if (sport->port.iotype & (UPIO_MEM32 | UPIO_MEM32BE)) {
+	if (lpuart_is_32(sport)) {
 		lpuart32_setup_watermark(sport);
 		temp = lpuart32_read(&sport->port, UARTCTRL);
 		temp |= (UARTCTRL_RIE | UARTCTRL_TIE | UARTCTRL_RE |
-- 
GitLab


From a2b18708ee14baec4ef9c0fba96070bba14d0081 Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@android.com>
Date: Wed, 5 Jul 2017 13:46:01 -0700
Subject: [PATCH 1721/1958] Revert "android: binder: Sanity check at binder
 ioctl"

This reverts commit a906d6931f3ccaf7de805643190765ddd7378e27.

The patch introduced a race in the binder driver. An attempt to fix the
race was submitted in "[PATCH v2] android: binder: fix dangling pointer
comparison", however the conclusion in the discussion for that patch
was that the original patch should be reverted.

The reversion is being done as part of the fine-grained locking
patchset since the patch would need to be refactored when
proc->vmm_vm_mm is removed from struct binder_proc and added
in the binder allocator.

Signed-off-by: Todd Kjos <tkjos@google.com>
Cc: stable <stable@vger.kernel.org> # 4.6+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index aae4d8d4be361..157bd3e49ff4a 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3247,10 +3247,6 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	/*pr_info("binder_ioctl: %d:%d %x %lx\n",
 			proc->pid, current->pid, cmd, arg);*/
 
-	if (unlikely(current->mm != proc->vma_vm_mm)) {
-		pr_err("current mm mismatch proc mm\n");
-		return -EINVAL;
-	}
 	trace_binder_ioctl(cmd, arg);
 
 	ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
@@ -3466,7 +3462,6 @@ static int binder_open(struct inode *nodp, struct file *filp)
 		return -ENOMEM;
 	get_task_struct(current);
 	proc->tsk = current;
-	proc->vma_vm_mm = current->mm;
 	INIT_LIST_HEAD(&proc->todo);
 	init_waitqueue_head(&proc->wait);
 	proc->default_priority = task_nice(current);
-- 
GitLab


From c4ea41ba195d01c9af66fb28711a16cc97caa9c5 Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@android.com>
Date: Thu, 29 Jun 2017 12:01:36 -0700
Subject: [PATCH 1722/1958] binder: use group leader instead of open thread

The binder allocator assumes that the thread that
called binder_open will never die for the lifetime of
that proc. That thread is normally the group_leader,
however it may not be. Use the group_leader instead
of current.

Signed-off-by: Todd Kjos <tkjos@google.com>
Cc: stable <stable@vger.kernel.org> # 4.4+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 157bd3e49ff4a..9393924ae8e86 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3460,8 +3460,8 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 	if (proc == NULL)
 		return -ENOMEM;
-	get_task_struct(current);
-	proc->tsk = current;
+	get_task_struct(current->group_leader);
+	proc->tsk = current->group_leader;
 	INIT_LIST_HEAD(&proc->todo);
 	init_waitqueue_head(&proc->wait);
 	proc->default_priority = task_nice(current);
-- 
GitLab


From 00b40d613352c623aaae88a44e5ded7c912909d7 Mon Sep 17 00:00:00 2001
From: Riley Andrews <riandrews@google.com>
Date: Thu, 29 Jun 2017 12:01:37 -0700
Subject: [PATCH 1723/1958] binder: Use wake up hint for synchronous
 transactions.

Use wake_up_interruptible_sync() to hint to the scheduler binder
transactions are synchronous wakeups. Disable preemption while waking
to avoid ping-ponging on the binder lock.

Signed-off-by: Todd Kjos <tkjos@google.com>
Signed-off-by: Omprakash Dhyade <odhyade@codeaurora.org>
Cc: stable <stable@vger.kernel.org> # 4.4+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 9393924ae8e86..f7665c31fecaf 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2200,8 +2200,12 @@ static void binder_transaction(struct binder_proc *proc,
 	list_add_tail(&t->work.entry, target_list);
 	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
 	list_add_tail(&tcomplete->entry, &thread->todo);
-	if (target_wait)
-		wake_up_interruptible(target_wait);
+	if (target_wait) {
+		if (reply || !(t->flags & TF_ONE_WAY))
+			wake_up_interruptible_sync(target_wait);
+		else
+			wake_up_interruptible(target_wait);
+	}
 	return;
 
 err_translate_failed:
-- 
GitLab


From e67ae2b7b23b283e657865b498b151e6a17b919d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 10 Jul 2017 13:17:26 +0200
Subject: [PATCH 1724/1958] libceph: fix old style declaration warnings

The new macros don't follow the usual style for declarations,
which we get a warning for with 'make W=1':

In file included from fs/ceph/mds_client.c:16:0:
include/linux/ceph/ceph_features.h:74:1: error: 'static' is not at beginning of declaration [-Werror=old-style-declaration]

This moves the 'static' keyword to the front of the
declaration.

Fixes: f179d3ba8cb9 ("libceph: new features macros")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/ceph_features.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index f0f6c537b64cb..040dd105c3e72 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -10,14 +10,14 @@
 #define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
 
 #define DEFINE_CEPH_FEATURE(bit, incarnation, name)			\
-	const static uint64_t CEPH_FEATURE_##name = (1ULL<<bit);		\
-	const static uint64_t CEPH_FEATUREMASK_##name =			\
+	static const uint64_t CEPH_FEATURE_##name = (1ULL<<bit);		\
+	static const uint64_t CEPH_FEATUREMASK_##name =			\
 		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
 
 /* this bit is ignored but still advertised by release *when* */
 #define DEFINE_CEPH_FEATURE_DEPRECATED(bit, incarnation, name, when) \
-	const static uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
-	const static uint64_t DEPRECATED_CEPH_FEATUREMASK_##name =		\
+	static const uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
+	static const uint64_t DEPRECATED_CEPH_FEATUREMASK_##name =		\
 		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
 
 /*
-- 
GitLab


From 00c8ebb360937bbc5da02929d62fcbf6a72801eb Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:45:17 +0300
Subject: [PATCH 1725/1958] libceph: NULL deref on osdmap_apply_incremental()
 error path

There are hidden gotos in the ceph_decode_* macros.  We need to set the
"err" variable on these error paths otherwise we end up returning
ERR_PTR(0) which is NULL.  It causes NULL dereferences in the callers.

Fixes: 6f428df47dae ("libceph: pg_upmap[_items] infrastructure")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
[idryomov@gmail.com: similar bug in osdmap_decode(), changelog tweak]
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 864789c5974e0..3ffc7a1fd8b15 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1544,7 +1544,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 	if (struct_v >= 3) {
 		/* erasure_code_profiles */
 		ceph_decode_skip_map_of_map(p, end, string, string, string,
-					    bad);
+					    e_inval);
 	}
 
 	if (struct_v >= 4) {
@@ -1825,9 +1825,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	if (struct_v >= 3) {
 		/* new_erasure_code_profiles */
 		ceph_decode_skip_map_of_map(p, end, string, string, string,
-					    bad);
+					    e_inval);
 		/* old_erasure_code_profiles */
-		ceph_decode_skip_set(p, end, string, bad);
+		ceph_decode_skip_set(p, end, string, e_inval);
 	}
 
 	if (struct_v >= 4) {
-- 
GitLab


From c2acfd95d0c7df0bf6826d9d1ca3796160728a42 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 13 Jul 2017 15:57:26 +0200
Subject: [PATCH 1726/1958] libceph: set -EINVAL in one place in crush_decode()

No sooner than Dan had fixed this issue in commit 293dffaad8d5
("libceph: NULL deref on crush_decode() error path"), I brought it
back.  Add a new label and set -EINVAL once, right before failing.

Fixes: 278b1d709c6a ("libceph: ceph_decode_skip_* helpers")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 3ffc7a1fd8b15..48e0ff82bde22 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -338,7 +338,7 @@ static void crush_finalize(struct crush_map *c)
 static struct crush_map *crush_decode(void *pbyval, void *end)
 {
 	struct crush_map *c;
-	int err = -EINVAL;
+	int err;
 	int i, j;
 	void **p = &pbyval;
 	void *start = pbyval;
@@ -407,7 +407,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 			size = sizeof(struct crush_bucket_straw2);
 			break;
 		default:
-			err = -EINVAL;
 			goto bad;
 		}
 		BUG_ON(size == 0);
@@ -439,31 +438,31 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 			err = crush_decode_uniform_bucket(p, end,
 				  (struct crush_bucket_uniform *)b);
 			if (err < 0)
-				goto bad;
+				goto fail;
 			break;
 		case CRUSH_BUCKET_LIST:
 			err = crush_decode_list_bucket(p, end,
 			       (struct crush_bucket_list *)b);
 			if (err < 0)
-				goto bad;
+				goto fail;
 			break;
 		case CRUSH_BUCKET_TREE:
 			err = crush_decode_tree_bucket(p, end,
 				(struct crush_bucket_tree *)b);
 			if (err < 0)
-				goto bad;
+				goto fail;
 			break;
 		case CRUSH_BUCKET_STRAW:
 			err = crush_decode_straw_bucket(p, end,
 				(struct crush_bucket_straw *)b);
 			if (err < 0)
-				goto bad;
+				goto fail;
 			break;
 		case CRUSH_BUCKET_STRAW2:
 			err = crush_decode_straw2_bucket(p, end,
 				(struct crush_bucket_straw2 *)b);
 			if (err < 0)
-				goto bad;
+				goto fail;
 			break;
 		}
 	}
@@ -474,7 +473,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		u32 yes;
 		struct crush_rule *r;
 
-		err = -EINVAL;
 		ceph_decode_32_safe(p, end, yes, bad);
 		if (!yes) {
 			dout("crush_decode NO rule %d off %x %p to %p\n",
@@ -489,7 +487,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		/* len */
 		ceph_decode_32_safe(p, end, yes, bad);
 #if BITS_PER_LONG == 32
-		err = -EINVAL;
 		if (yes > (ULONG_MAX - sizeof(*r))
 			  / sizeof(struct crush_rule_step))
 			goto bad;
@@ -557,7 +554,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	if (*p != end) {
 		err = decode_choose_args(p, end, c);
 		if (err)
-			goto bad;
+			goto fail;
 	}
 
 done:
@@ -567,10 +564,14 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 
 badmem:
 	err = -ENOMEM;
-bad:
+fail:
 	dout("crush_decode fail %d\n", err);
 	crush_destroy(c);
 	return ERR_PTR(err);
+
+bad:
+	err = -EINVAL;
+	goto fail;
 }
 
 int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
-- 
GitLab


From f5cc6898650210a90669437d2d9a3fd564ff7d88 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 7 Jul 2017 16:14:45 +0200
Subject: [PATCH 1727/1958] libceph: use alloc_pg_mapping() in
 __decode_pg_upmap_items()

... otherwise we die in insert_pg_mapping(), which wants pg->node to be
empty, i.e. initialized with RB_CLEAR_NODE.

Fixes: 6f428df47dae ("libceph: pg_upmap[_items] infrastructure")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 48e0ff82bde22..64ae9f89773a1 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1400,7 +1400,7 @@ static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
 		return ERR_PTR(-EINVAL);
 
 	ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
-	pg = kzalloc(sizeof(*pg) + 2 * len * sizeof(u32), GFP_NOIO);
+	pg = alloc_pg_mapping(2 * len * sizeof(u32));
 	if (!pg)
 		return ERR_PTR(-ENOMEM);
 
-- 
GitLab


From 914902af4f271884968f72c4fa144c723be2a699 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 14 Jul 2017 16:08:54 +0200
Subject: [PATCH 1728/1958] libceph: don't call encode_request_finish() on
 MOSDBackoff messages

encode_request_finish() is for MOSDOp messages.  Calling it on
MOSDBackoff ack-block messages corrupts them.

Fixes: a02a946dfe96 ("libceph: respect RADOS_BACKOFF backoffs")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osd_client.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 86a9737d8e3ff..901bb82213662 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5310,7 +5310,10 @@ static int invalidate_authorizer(struct ceph_connection *con)
 
 static void osd_reencode_message(struct ceph_msg *msg)
 {
-	encode_request_finish(msg);
+	int type = le16_to_cpu(msg->hdr.type);
+
+	if (type == CEPH_MSG_OSD_OP)
+		encode_request_finish(msg);
 }
 
 static int osd_sign_message(struct ceph_msg *msg)
-- 
GitLab


From 84583cfb973c4313955c6231cc9cb3772d280b15 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 6 Jul 2017 11:12:21 +0800
Subject: [PATCH 1729/1958] ceph: fix race in concurrent readdir

For a large directory, program needs to issue multiple readdir
syscalls to get all dentries. When there are multiple programs
read the directory concurrently. Following sequence of events
can happen.

 - program calls readdir with pos = 2. ceph sends readdir request
   to mds. The reply contains N1 entries. ceph adds these N1 entries
   to readdir cache.
 - program calls readdir with pos = N1+2. The readdir is satisfied
   by the readdir cache, N2 entries are returned. (Other program
   calls readdir in the middle, which fills the cache)
 - program calls readdir with pos = N1+N2+2. ceph sends readdir
   request to mds. The reply contains N3 entries and it reaches
   directory end. ceph adds these N3 entries to the readdir cache
   and marks directory complete.

The second readdir call does not update fi->readdir_cache_idx.
ceph add the last N3 entries to wrong places.

Cc: stable@vger.kernel.org # v4.3+
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/dir.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e071d23f61481..ef7240ace5767 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -271,6 +271,11 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 		if (ret < 0)
 			err = ret;
 		dput(last);
+		/* last_name no longer match cache index */
+		if (fi->readdir_cache_idx >= 0) {
+			fi->readdir_cache_idx = -1;
+			fi->dir_release_count = 0;
+		}
 	}
 	return err;
 }
-- 
GitLab


From 7c40b22f6f84c98a1d36e6d0a4346e58f05e45d8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 17 Jul 2017 11:13:35 +0300
Subject: [PATCH 1730/1958] libceph: potential NULL dereference in
 ceph_msg_data_create()

If kmem_cache_zalloc() returns NULL then the INIT_LIST_HEAD(&data->links);
will Oops.  The callers aren't really prepared for NULL returns so it
doesn't make a lot of difference in real life.

Fixes: 5240d9f95dfe ("libceph: replace message data pointer with list")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/messenger.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0c31035bbfee8..b7cc615d42efd 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3203,8 +3203,10 @@ static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
 		return NULL;
 
 	data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
-	if (data)
-		data->type = type;
+	if (!data)
+		return NULL;
+
+	data->type = type;
 	INIT_LIST_HEAD(&data->links);
 
 	return data;
-- 
GitLab


From d50daa2af2618dab6d21634e65a5fbcf4ae437d6 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Thu, 29 Jun 2017 14:46:44 -0700
Subject: [PATCH 1731/1958] spmi: Include OF based modalias in device uevent

Include the OF-based modalias in the uevent sent when registering SPMI
devices, so that user space has a chance to autoload the kernel module
for the device.

Tested-by: Rob Clark <robdclark@gmail.com>
Reported-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spmi/spmi.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/spmi/spmi.c b/drivers/spmi/spmi.c
index 2b9b0941d9eb5..6d23226e5f696 100644
--- a/drivers/spmi/spmi.c
+++ b/drivers/spmi/spmi.c
@@ -365,11 +365,23 @@ static int spmi_drv_remove(struct device *dev)
 	return 0;
 }
 
+static int spmi_drv_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	int ret;
+
+	ret = of_device_uevent_modalias(dev, env);
+	if (ret != -ENODEV)
+		return ret;
+
+	return 0;
+}
+
 static struct bus_type spmi_bus_type = {
 	.name		= "spmi",
 	.match		= spmi_device_match,
 	.probe		= spmi_drv_probe,
 	.remove		= spmi_drv_remove,
+	.uevent		= spmi_drv_uevent,
 };
 
 /**
-- 
GitLab


From 6b71016e4b253172545e8388ff646f0dcbda18a8 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 21 Jun 2017 11:47:50 -0700
Subject: [PATCH 1732/1958] MAINTAINERS: Add entry for SPMI subsystem

I have the hardware and I've been reviewing SPMI patches when
they come on the list. Add myself as a reviewer in this area and
add the linux-arm-msm list because people subscribed there also
have the hardware.

Cc: Kiran Gunda <kgunda@codeaurora.org>
Cc: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 205d3977ac46e..b49af4f695fc1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12527,6 +12527,15 @@ S:	Supported
 F:	Documentation/networking/spider_net.txt
 F:	drivers/net/ethernet/toshiba/spider_net*
 
+SPMI SUBSYSTEM
+R:	Stephen Boyd <sboyd@codeaurora.org>
+L:	linux-arm-msm@vger.kernel.org
+F:	Documentation/devicetree/bindings/spmi/
+F:	drivers/spmi/
+F:	include/dt-bindings/spmi/spmi.h
+F:	include/linux/spmi.h
+F:	include/trace/events/spmi.h
+
 SPU FILE SYSTEM
 M:	Jeremy Kerr <jk@ozlabs.org>
 L:	linuxppc-dev@lists.ozlabs.org
-- 
GitLab


From eba9718ed25b2f8a3c066bf985edd5046485a018 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 26 Jun 2017 19:17:46 -0700
Subject: [PATCH 1733/1958] spmi: pmic-arb: Always allocate ppid_to_apid table

After commit 7f1d4e58dabb ("spmi: pmic-arb: optimize table
lookups") we always need the ppid_to_apid table regardless of the
version of pmic arbiter we have. Otherwise, we will try to deref
the array when we don't allocate it on v2 hardware like the
msm8974 SoCs.

Cc: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
Cc: Kiran Gunda <kgunda@codeaurora.org>
Fixes: 7f1d4e58dabb ("spmi: pmic-arb: optimize table lookups")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Luca Weiss <luca@z3ntu.xyz>
Reviewed-by: Kiran Gunda <kgunda@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spmi/spmi-pmic-arb.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
index 2afe3597982e7..f4b7a98a7913e 100644
--- a/drivers/spmi/spmi-pmic-arb.c
+++ b/drivers/spmi/spmi-pmic-arb.c
@@ -134,7 +134,6 @@ struct apid_data {
  * @spmic:		SPMI controller object
  * @ver_ops:		version dependent operations.
  * @ppid_to_apid	in-memory copy of PPID -> channel (APID) mapping table.
- *			v2 only.
  */
 struct spmi_pmic_arb {
 	void __iomem		*rd_base;
@@ -1016,6 +1015,13 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev)
 		goto err_put_ctrl;
 	}
 
+	pa->ppid_to_apid = devm_kcalloc(&ctrl->dev, PMIC_ARB_MAX_PPID,
+					sizeof(*pa->ppid_to_apid), GFP_KERNEL);
+	if (!pa->ppid_to_apid) {
+		err = -ENOMEM;
+		goto err_put_ctrl;
+	}
+
 	hw_ver = readl_relaxed(core + PMIC_ARB_VERSION);
 
 	if (hw_ver < PMIC_ARB_VERSION_V2_MIN) {
@@ -1048,15 +1054,6 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev)
 			err = PTR_ERR(pa->wr_base);
 			goto err_put_ctrl;
 		}
-
-		pa->ppid_to_apid = devm_kcalloc(&ctrl->dev,
-						PMIC_ARB_MAX_PPID,
-						sizeof(*pa->ppid_to_apid),
-						GFP_KERNEL);
-		if (!pa->ppid_to_apid) {
-			err = -ENOMEM;
-			goto err_put_ctrl;
-		}
 	}
 
 	dev_info(&ctrl->dev, "PMIC arbiter version %s (0x%x)\n",
-- 
GitLab


From 6463a4571ceefc43908df4b016d8d5d8b8e85357 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sun, 25 Jun 2017 12:47:46 -0700
Subject: [PATCH 1734/1958] vmbus: re-enable channel tasklet

This problem shows up in 4.11 when netvsc driver is removed and reloaded.
The problem is that the channel is closed during module removal and the
tasklet for processing responses is disabled. When module is reloaded
the channel is reopened but the tasklet is marked as disabled.
The fix is to re-enable tasklet at the end of close which gets it back
to the initial state.

The issue is less urgent in 4.12 since network driver now uses NAPI
and not the tasklet; and other VMBUS devices are rarely unloaded/reloaded.

Fixes: dad72a1d2844 ("vmbus: remove hv_event_tasklet_disable/enable")

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index e9bf0bb87ac40..e57cc40cb768b 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -606,6 +606,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 
 out:
+	/* re-enable tasklet for use on re-open */
+	tasklet_enable(&channel->callback_event);
 	return ret;
 }
 
-- 
GitLab


From 800161bd0209a8db77f66af283c379ff8d58d88d Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 29 Jun 2017 14:19:50 +0300
Subject: [PATCH 1735/1958] thunderbolt: Correct access permissions for active
 NVM contents

Firmware upgrade tools that decide which NVM image should be uploaded to
the Thunderbolt controller need to access active parts of the NVM even
if they are not run as root. The information in active NVM is not
considered security critical so we can use the default permissions set
by the NVMem framework.

Writing the NVM image is still left as root only operation.

While there mark the active NVM as read-only in the filesystem.

Reported-by: Yehezkel Bernat <yehezkel.bernat@intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andreas Noever <andreas.noever@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thunderbolt/switch.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index ab3e8f4104444..40219a7063099 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -281,9 +281,11 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id,
 	if (active) {
 		config.name = "nvm_active";
 		config.reg_read = tb_switch_nvm_read;
+		config.read_only = true;
 	} else {
 		config.name = "nvm_non_active";
 		config.reg_write = tb_switch_nvm_write;
+		config.root_only = true;
 	}
 
 	config.id = id;
@@ -292,7 +294,6 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id,
 	config.size = size;
 	config.dev = &sw->dev;
 	config.owner = THIS_MODULE;
-	config.root_only = true;
 	config.priv = sw;
 
 	return nvmem_register(&config);
-- 
GitLab


From f2ad99fc46d5454e097692c8c8202fd5bc46c809 Mon Sep 17 00:00:00 2001
From: Laurentiu Palcu <laurentiu.palcu@nxp.com>
Date: Wed, 28 Jun 2017 12:31:34 +0300
Subject: [PATCH 1736/1958] drm/imx: fix typo in ipu_plane_formats[]

The BGRA8888 appears twice in the ipu_plane_formats[] list. The
duplicate should be BGRX8888.

The original commit is:

commit 59d6b7189a96 ("drm/imx: ipuv3-plane: enable support for RGBX8888
and RGBA8888 pixel formats")

Signed-off-by: Laurentiu Palcu <laurentiu.palcu@nxp.com>
Fixes: 59d6b7189a96 ("drm/imx: ipuv3-plane: enable support for RGBX8888 and RGBA8888 pixel")
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/drm/imx/ipuv3-plane.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index d63e853a03006..7b1bb0ffcb8d3 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -54,7 +54,7 @@ static const uint32_t ipu_plane_formats[] = {
 	DRM_FORMAT_RGBA8888,
 	DRM_FORMAT_RGBX8888,
 	DRM_FORMAT_BGRA8888,
-	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_BGRX8888,
 	DRM_FORMAT_UYVY,
 	DRM_FORMAT_VYUY,
 	DRM_FORMAT_YUYV,
-- 
GitLab


From 799ee2970485dc206c3bf347d6e6827c04d5e4f9 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 12 Jun 2017 17:54:29 +0200
Subject: [PATCH 1737/1958] drm/imx: parallel-display: Accept
 drm_of_find_panel_or_bridge failure

The parallel panel driver should continue to work without having an
endpoint linking to an panel in DT for backwards compatibility.
With the recent switch to drm_of_find_panel_or_bridge, an absent
panel results in a failure with -ENODEV error return code. To restore
the old behaviour, ignore the -ENODEV return code.

Reported-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
Fixes: ebc944613567 ("drm: convert drivers to use drm_of_find_panel_or_bridge")
Tested-by: Chris Healy <cphealy@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/drm/imx/parallel-display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index 636031a30e17d..8aca20209cb80 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -237,7 +237,7 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 
 	/* port@1 is the output port */
 	ret = drm_of_find_panel_or_bridge(np, 1, 0, &imxpd->panel, &imxpd->bridge);
-	if (ret)
+	if (ret && ret != -ENODEV)
 		return ret;
 
 	imxpd->dev = dev;
-- 
GitLab


From 496f8931b6460febac1dc91c03e86530f938483a Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Tue, 11 Jul 2017 17:00:39 +0930
Subject: [PATCH 1738/1958] fsi: core: register with postcore_initcall

When testing an i2c driver that is a fsi bus driver, I saw the following
oops:

 kernel BUG at drivers/base/driver.c:153!
 Internal error: Oops - BUG: 0 [#1] ARM

 [<8027cb1c>] (driver_register) from [<80344e88>] (fsi_driver_register+0x2c/0x38)
 [<80344e88>] (fsi_driver_register) from [<805f5ebc>] (fsi_i2c_driver_init+0x1c/0x24)
 [<805f5ebc>] (fsi_i2c_driver_init) from [<805d1f14>] (do_one_initcall+0xb4/0x170)
 [<805d1f14>] (do_one_initcall) from [<805d20f0>] (kernel_init_freeable+0x120/0x1dc)
 [<805d20f0>] (kernel_init_freeable) from [<8043f4a8>] (kernel_init+0x18/0x104)
 [<8043f4a8>] (kernel_init) from [<8000a5e8>] (ret_from_fork+0x14/0x2c)

This is because the fsi bus had not been registered. This fix registers the bus
with postcore_initcall instead, to ensure it is registered earlier on.

When the fsi core is used as a module this should not be a problem as the fsi
driver will depend on the fsi bus type symbol, and will therefore load the core
before the driver.

Fixes: 0508ad1fff11 ("drivers/fsi: Add empty fsi bus definitions")
Signed-off-by: Joel Stanley <joel@jms.id.au>
Acked-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fsi/fsi-core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index a485864cb5125..4019d3ca5eff7 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -883,17 +883,16 @@ struct bus_type fsi_bus_type = {
 };
 EXPORT_SYMBOL_GPL(fsi_bus_type);
 
-static int fsi_init(void)
+static int __init fsi_init(void)
 {
 	return bus_register(&fsi_bus_type);
 }
+postcore_initcall(fsi_init);
 
 static void fsi_exit(void)
 {
 	bus_unregister(&fsi_bus_type);
 }
-
-module_init(fsi_init);
 module_exit(fsi_exit);
 module_param(discard_errors, int, 0664);
 MODULE_LICENSE("GPL");
-- 
GitLab


From ceb8a12ff2d4b085f7cee1ac44523ee63ce51e20 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 20 Jun 2017 22:43:42 +0200
Subject: [PATCH 1739/1958] drivers/fsi: fix fsi_slave_mode prototype

gcc warns about the return type of this function:

drivers/fsi/fsi-core.c:535:8: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers]

This removes the 'const' attribute, as suggested by the warning.

Fixes: 2b37c3e285f9 ("drivers/fsi: Set slave SMODE to init communication")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fsi/fsi-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index 4019d3ca5eff7..06432d84cbf8c 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -532,7 +532,7 @@ static inline uint32_t fsi_smode_sid(int x)
 	return (x & FSI_SMODE_SID_MASK) << FSI_SMODE_SID_SHIFT;
 }
 
-static const uint32_t fsi_slave_smode(int id)
+static uint32_t fsi_slave_smode(int id)
 {
 	return FSI_SMODE_WSC | FSI_SMODE_ECRC
 		| fsi_smode_sid(id)
-- 
GitLab


From d6e4bd1b52bf35e7fc14b52be7dbe784896398e3 Mon Sep 17 00:00:00 2001
From: Frank Wang <frank.wang@rock-chips.com>
Date: Fri, 14 Jul 2017 16:38:43 +0800
Subject: [PATCH 1740/1958] nvmem: rockchip-efuse: amend compatible
 rk322x-efuse to rk3228-efuse

As the comments from Heiko Stuebner <heiko@sntech.de> that compatible
should not contain any placeholders, this patch fix it for rk3228 SoC.

Note that this is a fix for v4.13, due to fixing the current non-standard
binding name that should not become part of an official kernel release.

Signed-off-by: Frank Wang <frank.wang@rock-chips.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt | 2 +-
 drivers/nvmem/rockchip-efuse.c                             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt b/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
index 194926f77194c..1ff02afdc55a8 100644
--- a/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
+++ b/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
@@ -4,7 +4,7 @@ Required properties:
 - compatible: Should be one of the following.
   - "rockchip,rk3066a-efuse" - for RK3066a SoCs.
   - "rockchip,rk3188-efuse" - for RK3188 SoCs.
-  - "rockchip,rk322x-efuse" - for RK322x SoCs.
+  - "rockchip,rk3228-efuse" - for RK3228 SoCs.
   - "rockchip,rk3288-efuse" - for RK3288 SoCs.
   - "rockchip,rk3399-efuse" - for RK3399 SoCs.
 - reg: Should contain the registers location and exact eFuse size
diff --git a/drivers/nvmem/rockchip-efuse.c b/drivers/nvmem/rockchip-efuse.c
index a0d4ede9b8fc4..63e3eb55f3ac6 100644
--- a/drivers/nvmem/rockchip-efuse.c
+++ b/drivers/nvmem/rockchip-efuse.c
@@ -170,7 +170,7 @@ static const struct of_device_id rockchip_efuse_match[] = {
 		.data = (void *)&rockchip_rk3288_efuse_read,
 	},
 	{
-		.compatible = "rockchip,rk322x-efuse",
+		.compatible = "rockchip,rk3228-efuse",
 		.data = (void *)&rockchip_rk3288_efuse_read,
 	},
 	{
-- 
GitLab


From 4c19c0ec73241b29a12daf913d46f0c15aa33783 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Tue, 4 Jul 2017 10:22:44 +0200
Subject: [PATCH 1741/1958] mux: remove the Kconfig question for the subsystem

The MULTIPLEXER question in the Kconfig might be confusing and is
of dubious value. Remove it. This makes consumers responsible for
selecting MULTIPLEXER, which they already do.

Signed-off-by: Peter Rosin <peda@axentia.se>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mux/Kconfig | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/mux/Kconfig b/drivers/mux/Kconfig
index 7c754a0f14bb5..19e4e904c9bfc 100644
--- a/drivers/mux/Kconfig
+++ b/drivers/mux/Kconfig
@@ -2,20 +2,11 @@
 # Multiplexer devices
 #
 
-menuconfig MULTIPLEXER
-	tristate "Multiplexer subsystem"
-	help
-	  Multiplexer controller subsystem. Multiplexers are used in a
-	  variety of settings, and this subsystem abstracts their use
-	  so that the rest of the kernel sees a common interface. When
-	  multiple parallel multiplexers are controlled by one single
-	  multiplexer controller, this subsystem also coordinates the
-	  multiplexer accesses.
-
-	  To compile the subsystem as a module, choose M here: the module will
-	  be called mux-core.
+config MULTIPLEXER
+	tristate
 
-if MULTIPLEXER
+menu "Multiplexer drivers"
+	depends on MULTIPLEXER
 
 config MUX_ADG792A
 	tristate "Analog Devices ADG792A/ADG792G Multiplexers"
@@ -56,4 +47,4 @@ config MUX_MMIO
 	  To compile the driver as a module, choose M here: the module will
 	  be called mux-mmio.
 
-endif
+endmenu
-- 
GitLab


From 998849967c57705b2cb013d4aea439e7e76e2d8a Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Mon, 10 Jul 2017 14:45:34 +0200
Subject: [PATCH 1742/1958] mux: mux-core: unregister mux_class in mux_exit()

Fixes an obvious and nasty typo.

Fixes: a3b02a9c6591 ("mux: minimal mux subsystem")
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mux/mux-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mux/mux-core.c b/drivers/mux/mux-core.c
index 90b8995f07cba..2fe96c4701126 100644
--- a/drivers/mux/mux-core.c
+++ b/drivers/mux/mux-core.c
@@ -46,7 +46,7 @@ static int __init mux_init(void)
 
 static void __exit mux_exit(void)
 {
-	class_register(&mux_class);
+	class_unregister(&mux_class);
 	ida_destroy(&mux_ida);
 }
 
-- 
GitLab


From 2c927c0c73fd9f6de9ff576e08cd59f13b9d0ef7 Mon Sep 17 00:00:00 2001
From: "Alex A. Mihaylov" <minimumlaw@rambler.ru>
Date: Tue, 13 Jun 2017 18:57:56 +0300
Subject: [PATCH 1743/1958] w1: Fix slave count on 1-Wire bus (resend)

1-Wire bus have very fast algorith for exchange with single slave
device. Fix incorrect count of slave devices on connect second slave
device. This case on slave device probe() step we need use generic
(multislave) functions for read/write device.

Signed-off-by: Alex A. Mihaylov <minimumlaw@rambler.ru>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/w1/w1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index 95ea7e6b1d991..74471e7aa5cc4 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -728,6 +728,7 @@ int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn)
 	memcpy(&sl->reg_num, rn, sizeof(sl->reg_num));
 	atomic_set(&sl->refcnt, 1);
 	atomic_inc(&sl->master->refcnt);
+	dev->slave_count++;
 
 	/* slave modules need to be loaded in a context with unlocked mutex */
 	mutex_unlock(&dev->mutex);
@@ -747,11 +748,11 @@ int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn)
 
 	sl->family = f;
 
-
 	err = __w1_attach_slave_device(sl);
 	if (err < 0) {
 		dev_err(&dev->dev, "%s: Attaching %s failed.\n", __func__,
 			 sl->name);
+		dev->slave_count--;
 		w1_family_put(sl->family);
 		atomic_dec(&sl->master->refcnt);
 		kfree(sl);
@@ -759,7 +760,6 @@ int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn)
 	}
 
 	sl->ttl = dev->slave_ttl;
-	dev->slave_count++;
 
 	memcpy(msg.id.id, rn, sizeof(msg.id));
 	msg.type = W1_SLAVE_ADD;
-- 
GitLab


From cc84b824e489ef001597ea0ba0ddcec17fb1f05d Mon Sep 17 00:00:00 2001
From: "Alex A. Mihaylov" <minimumlaw@rambler.ru>
Date: Fri, 7 Jul 2017 18:38:22 +0300
Subject: [PATCH 1744/1958] regmap: regmap-w1: Fix build troubles

Fixes: cc5d0db390b0 ("regmap: Add 1-Wire bus support")
Commit de0d6dbdbdb2 ("w1: Add subsystem kernel public interface")
Fix place off w1.h header file

Cosmetic: Fix company name (local to international)

Signed-off-by: Alex A. Mihaylov <minimumlaw@rambler.ru>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Cc: Mark Brown <broonie@kernel.org>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/regmap/regmap-w1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap-w1.c b/drivers/base/regmap/regmap-w1.c
index 5f04e7bf063e1..e6c64b0be5b21 100644
--- a/drivers/base/regmap/regmap-w1.c
+++ b/drivers/base/regmap/regmap-w1.c
@@ -1,7 +1,7 @@
 /*
  * Register map access API - W1 (1-Wire) support
  *
- * Copyright (C) 2017 OAO Radioavionica
+ * Copyright (c) 2017 Radioavionica Corporation
  * Author: Alex A. Mihaylov <minimumlaw@rambler.ru>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -11,7 +11,7 @@
 
 #include <linux/regmap.h>
 #include <linux/module.h>
-#include "../../w1/w1.h"
+#include <linux/w1.h>
 
 #include "internal.h"
 
-- 
GitLab


From c89876dda01841a6a485cb29b9d1843db34958a3 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 30 Jun 2017 17:44:02 -0500
Subject: [PATCH 1745/1958] w1: omap-hdq: fix error return code in
 omap_hdq_probe()

platform_get_irq() returns an error code, but the omap_hdq
driver ignores it and always returns -ENXIO. This is not correct,
and prevents -EPROBE_DEFER from being propagated properly.
Notice that platform_get_irq() no longer returns 0 on error.

Print error message and propagate the return value of
platform_get_irq on failure.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/w1/masters/omap_hdq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c
index 3612542b60444..83fc9aab34e87 100644
--- a/drivers/w1/masters/omap_hdq.c
+++ b/drivers/w1/masters/omap_hdq.c
@@ -704,7 +704,8 @@ static int omap_hdq_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq	< 0) {
-		ret = -ENXIO;
+		dev_dbg(&pdev->dev, "Failed to get IRQ: %d\n", irq);
+		ret = irq;
 		goto err_irq;
 	}
 
-- 
GitLab


From 5b20a436835a3e4d0a8118594a864bc88ea53f93 Mon Sep 17 00:00:00 2001
From: "minimumlaw@rambler.ru" <minimumlaw@rambler.ru>
Date: Thu, 6 Jul 2017 16:10:17 +0300
Subject: [PATCH 1746/1958] regmap: regmap-w1: Fix build troubles

Fixes: cc5d0db390b0 ("regmap: Add 1-Wire bus support")
Commit de0d6dbdbdb2 ("w1: Add subsystem kernel public interface")
Fix place off w1.h header file

Cosmetic: Fix company name (local to international)
Signed-off-by: Alex A. Mihaylov <minimumlaw@rambler.ru>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-w1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap-w1.c b/drivers/base/regmap/regmap-w1.c
index 5f04e7bf063e1..e6c64b0be5b21 100644
--- a/drivers/base/regmap/regmap-w1.c
+++ b/drivers/base/regmap/regmap-w1.c
@@ -1,7 +1,7 @@
 /*
  * Register map access API - W1 (1-Wire) support
  *
- * Copyright (C) 2017 OAO Radioavionica
+ * Copyright (c) 2017 Radioavionica Corporation
  * Author: Alex A. Mihaylov <minimumlaw@rambler.ru>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -11,7 +11,7 @@
 
 #include <linux/regmap.h>
 #include <linux/module.h>
-#include "../../w1/w1.h"
+#include <linux/w1.h>
 
 #include "internal.h"
 
-- 
GitLab


From cf56c2f892a8a1870a8358114ad896772da7543a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 6 Jul 2017 23:17:44 +0200
Subject: [PATCH 1747/1958] netfilter: remove old pre-netns era hook api

no more users in the tree, remove this.

The old api is racy wrt. module removal, all users have been converted
to the netns-aware api.

The old api pretended we still have global hooks but that has not been
true for a long time.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h |   9 ---
 net/netfilter/core.c      | 143 --------------------------------------
 2 files changed, 152 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index a4b97be30b281..22f081065d496 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -61,8 +61,6 @@ typedef unsigned int nf_hookfn(void *priv,
 			       struct sk_buff *skb,
 			       const struct nf_hook_state *state);
 struct nf_hook_ops {
-	struct list_head	list;
-
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
 	struct net_device	*dev;
@@ -160,13 +158,6 @@ int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 			     unsigned int n);
 
-int nf_register_hook(struct nf_hook_ops *reg);
-void nf_unregister_hook(struct nf_hook_ops *reg);
-int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
-void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
-int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
-void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
-
 /* Functions to register get/setsockopt ranges (non-inclusive).  You
    need to check permissions yourself! */
 int nf_register_sockopt(struct nf_sockopt_ops *reg);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 552d606e57ca4..368610dbc3c00 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -227,114 +227,6 @@ void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 }
 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
-static LIST_HEAD(nf_hook_list);
-
-static int _nf_register_hook(struct nf_hook_ops *reg)
-{
-	struct net *net, *last;
-	int ret;
-
-	for_each_net(net) {
-		ret = nf_register_net_hook(net, reg);
-		if (ret && ret != -ENOENT)
-			goto rollback;
-	}
-	list_add_tail(&reg->list, &nf_hook_list);
-
-	return 0;
-rollback:
-	last = net;
-	for_each_net(net) {
-		if (net == last)
-			break;
-		nf_unregister_net_hook(net, reg);
-	}
-	return ret;
-}
-
-int nf_register_hook(struct nf_hook_ops *reg)
-{
-	int ret;
-
-	rtnl_lock();
-	ret = _nf_register_hook(reg);
-	rtnl_unlock();
-
-	return ret;
-}
-EXPORT_SYMBOL(nf_register_hook);
-
-static void _nf_unregister_hook(struct nf_hook_ops *reg)
-{
-	struct net *net;
-
-	list_del(&reg->list);
-	for_each_net(net)
-		nf_unregister_net_hook(net, reg);
-}
-
-void nf_unregister_hook(struct nf_hook_ops *reg)
-{
-	rtnl_lock();
-	_nf_unregister_hook(reg);
-	rtnl_unlock();
-}
-EXPORT_SYMBOL(nf_unregister_hook);
-
-int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
-{
-	unsigned int i;
-	int err = 0;
-
-	for (i = 0; i < n; i++) {
-		err = nf_register_hook(&reg[i]);
-		if (err)
-			goto err;
-	}
-	return err;
-
-err:
-	if (i > 0)
-		nf_unregister_hooks(reg, i);
-	return err;
-}
-EXPORT_SYMBOL(nf_register_hooks);
-
-/* Caller MUST take rtnl_lock() */
-int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
-{
-	unsigned int i;
-	int err = 0;
-
-	for (i = 0; i < n; i++) {
-		err = _nf_register_hook(&reg[i]);
-		if (err)
-			goto err;
-	}
-	return err;
-
-err:
-	if (i > 0)
-		_nf_unregister_hooks(reg, i);
-	return err;
-}
-EXPORT_SYMBOL(_nf_register_hooks);
-
-void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
-{
-	while (n-- > 0)
-		nf_unregister_hook(&reg[n]);
-}
-EXPORT_SYMBOL(nf_unregister_hooks);
-
-/* Caller MUST take rtnl_lock */
-void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
-{
-	while (n-- > 0)
-		_nf_unregister_hook(&reg[n]);
-}
-EXPORT_SYMBOL(_nf_unregister_hooks);
-
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
@@ -450,37 +342,6 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
 EXPORT_SYMBOL(nf_nat_decode_session_hook);
 #endif
 
-static int nf_register_hook_list(struct net *net)
-{
-	struct nf_hook_ops *elem;
-	int ret;
-
-	rtnl_lock();
-	list_for_each_entry(elem, &nf_hook_list, list) {
-		ret = nf_register_net_hook(net, elem);
-		if (ret && ret != -ENOENT)
-			goto out_undo;
-	}
-	rtnl_unlock();
-	return 0;
-
-out_undo:
-	list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
-		nf_unregister_net_hook(net, elem);
-	rtnl_unlock();
-	return ret;
-}
-
-static void nf_unregister_hook_list(struct net *net)
-{
-	struct nf_hook_ops *elem;
-
-	rtnl_lock();
-	list_for_each_entry(elem, &nf_hook_list, list)
-		nf_unregister_net_hook(net, elem);
-	rtnl_unlock();
-}
-
 static int __net_init netfilter_net_init(struct net *net)
 {
 	int i, h, ret;
@@ -500,16 +361,12 @@ static int __net_init netfilter_net_init(struct net *net)
 		return -ENOMEM;
 	}
 #endif
-	ret = nf_register_hook_list(net);
-	if (ret)
-		remove_proc_entry("netfilter", net->proc_net);
 
 	return ret;
 }
 
 static void __net_exit netfilter_net_exit(struct net *net)
 {
-	nf_unregister_hook_list(net);
 	remove_proc_entry("netfilter", net->proc_net);
 }
 
-- 
GitLab


From 97772bcd56efa21d9d8976db6f205574ea602f51 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 Jul 2017 13:07:17 +0200
Subject: [PATCH 1748/1958] netfilter: nat: fix src map lookup

When doing initial conversion to rhashtable I replaced the bucket
walk with a single rhashtable_lookup_fast().

When moving to rhlist I failed to properly walk the list of identical
tuples, but that is what is needed for this to work correctly.
The table contains the original tuples, so the reply tuples are all
distinct.

We currently decide that mapping is (not) in range only based on the
first entry, but in case its not we need to try the reply tuple of the
next entry until we either find an in-range mapping or we checked
all the entries.

This bug makes nat core attempt collision resolution while it might be
able to use the mapping as-is.

Fixes: 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable")
Reported-by: Jaco Kroon <jaco@uls.co.za>
Tested-by: Jaco Kroon <jaco@uls.co.za>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 832c5a08d9a58..eb541786ccb7c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -222,20 +222,21 @@ find_appropriate_src(struct net *net,
 		.tuple = tuple,
 		.zone = zone
 	};
-	struct rhlist_head *hl;
+	struct rhlist_head *hl, *h;
 
 	hl = rhltable_lookup(&nf_nat_bysource_table, &key,
 			     nf_nat_bysource_params);
-	if (!hl)
-		return 0;
 
-	ct = container_of(hl, typeof(*ct), nat_bysource);
+	rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
+		nf_ct_invert_tuplepr(result,
+				     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+		result->dst = tuple->dst;
 
-	nf_ct_invert_tuplepr(result,
-			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	result->dst = tuple->dst;
+		if (in_range(l3proto, l4proto, result, range))
+			return 1;
+	}
 
-	return in_range(l3proto, l4proto, result, range);
+	return 0;
 }
 
 /* For [FUTURE] fragmentation handling, we want the least-used
-- 
GitLab


From 974292defee033bc43ccfcb2fcefc3eba3905340 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 Jul 2017 13:29:03 +0200
Subject: [PATCH 1749/1958] netfilter: nf_tables: only allow in/output for arp
 packets

arp packets cannot be forwarded.

They can be bridged, but then they can be filtered using
either ebtables or nftables bridge family.

The bridge netfilter exposes a "call-arptables" switch which
pushes packets into arptables, but lets not expose this for nftables, so better
close this asap.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_tables_arp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 805c8ddfe8602..4bbc273b45e87 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -72,8 +72,7 @@ static const struct nf_chain_type filter_arp = {
 	.family		= NFPROTO_ARP,
 	.owner		= THIS_MODULE,
 	.hook_mask	= (1 << NF_ARP_IN) |
-			  (1 << NF_ARP_OUT) |
-			  (1 << NF_ARP_FORWARD),
+			  (1 << NF_ARP_OUT),
 };
 
 static int __init nf_tables_arp_init(void)
-- 
GitLab


From 36ac344e16e04e3e55e8fed7446095a6458c64e6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 10 Jul 2017 13:53:53 +0200
Subject: [PATCH 1750/1958] netfilter: expect: fix crash when putting uninited
 expectation

We crash in __nf_ct_expect_check, it calls nf_ct_remove_expect on the
uninitialised expectation instead of existing one, so del_timer chokes
on random memory address.

Fixes: ec0e3f01114ad32711243 ("netfilter: nf_ct_expect: Add nf_ct_remove_expect()")
Reported-by: Sergey Kvachonok <ravenexp@gmail.com>
Tested-by: Sergey Kvachonok <ravenexp@gmail.com>
Cc: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_expect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index e03d16ed550d6..899c2c36da136 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -422,7 +422,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	h = nf_ct_expect_dst_hash(net, &expect->tuple);
 	hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
 		if (expect_matches(i, expect)) {
-			if (nf_ct_remove_expect(expect))
+			if (nf_ct_remove_expect(i))
 				break;
 		} else if (expect_clash(i, expect)) {
 			ret = -EBUSY;
-- 
GitLab


From 6509f3096263ca2714ec938439a832b302a3a65e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 20 Jun 2017 22:34:13 +0200
Subject: [PATCH 1751/1958] tty: hide unused pty_get_peer function

TIOCGPTPEER is only used for unix98 PTYs, and we get a warning
when those are disabled:

drivers/tty/pty.c:466:12: error: 'pty_get_peer' defined but not used [-Werror=unused-function]

This moves the respective functions inside of the existing #ifdef.

Fixes: 54ebbfb16034 ("tty: add TIOCGPTPEER ioctl")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Aleksa Sarai <asarai@suse.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c | 85 +++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index d1399aac05a17..284749fb0f6b9 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -448,48 +448,6 @@ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty,
 	return retval;
 }
 
-/**
- *	pty_open_peer - open the peer of a pty
- *	@tty: the peer of the pty being opened
- *
- *	Open the cached dentry in tty->link, providing a safe way for userspace
- *	to get the slave end of a pty (where they have the master fd and cannot
- *	access or trust the mount namespace /dev/pts was mounted inside).
- */
-static struct file *pty_open_peer(struct tty_struct *tty, int flags)
-{
-	if (tty->driver->subtype != PTY_TYPE_MASTER)
-		return ERR_PTR(-EIO);
-	return dentry_open(tty->link->driver_data, flags, current_cred());
-}
-
-static int pty_get_peer(struct tty_struct *tty, int flags)
-{
-	int fd = -1;
-	struct file *filp = NULL;
-	int retval = -EINVAL;
-
-	fd = get_unused_fd_flags(0);
-	if (fd < 0) {
-		retval = fd;
-		goto err;
-	}
-
-	filp = pty_open_peer(tty, flags);
-	if (IS_ERR(filp)) {
-		retval = PTR_ERR(filp);
-		goto err_put;
-	}
-
-	fd_install(fd, filp);
-	return fd;
-
-err_put:
-	put_unused_fd(fd);
-err:
-	return retval;
-}
-
 static void pty_cleanup(struct tty_struct *tty)
 {
 	tty_port_put(tty->port);
@@ -646,9 +604,50 @@ static inline void legacy_pty_init(void) { }
 
 /* Unix98 devices */
 #ifdef CONFIG_UNIX98_PTYS
-
 static struct cdev ptmx_cdev;
 
+/**
+ *	pty_open_peer - open the peer of a pty
+ *	@tty: the peer of the pty being opened
+ *
+ *	Open the cached dentry in tty->link, providing a safe way for userspace
+ *	to get the slave end of a pty (where they have the master fd and cannot
+ *	access or trust the mount namespace /dev/pts was mounted inside).
+ */
+static struct file *pty_open_peer(struct tty_struct *tty, int flags)
+{
+	if (tty->driver->subtype != PTY_TYPE_MASTER)
+		return ERR_PTR(-EIO);
+	return dentry_open(tty->link->driver_data, flags, current_cred());
+}
+
+static int pty_get_peer(struct tty_struct *tty, int flags)
+{
+	int fd = -1;
+	struct file *filp = NULL;
+	int retval = -EINVAL;
+
+	fd = get_unused_fd_flags(0);
+	if (fd < 0) {
+		retval = fd;
+		goto err;
+	}
+
+	filp = pty_open_peer(tty, flags);
+	if (IS_ERR(filp)) {
+		retval = PTR_ERR(filp);
+		goto err_put;
+	}
+
+	fd_install(fd, filp);
+	return fd;
+
+err_put:
+	put_unused_fd(fd);
+err:
+	return retval;
+}
+
 static int pty_unix98_ioctl(struct tty_struct *tty,
 			    unsigned int cmd, unsigned long arg)
 {
-- 
GitLab


From c6325179238f1d4683edbec53d8322575d76d7e2 Mon Sep 17 00:00:00 2001
From: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Date: Mon, 17 Jul 2017 16:29:46 +0300
Subject: [PATCH 1752/1958] tty: Fix TIOCGPTPEER ioctl definition

This ioctl does nothing to justify an _IOC_READ or _IOC_WRITE flag
because it doesn't copy anything from/to userspace to access the
argument.

Fixes: 54ebbfb16034 ("tty: add TIOCGPTPEER ioctl")
Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Acked-by: Aleksa Sarai <asarai@suse.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/include/uapi/asm/ioctls.h   | 2 +-
 arch/mips/include/uapi/asm/ioctls.h    | 2 +-
 arch/parisc/include/uapi/asm/ioctls.h  | 2 +-
 arch/powerpc/include/uapi/asm/ioctls.h | 2 +-
 arch/sh/include/uapi/asm/ioctls.h      | 2 +-
 arch/sparc/include/uapi/asm/ioctls.h   | 2 +-
 arch/xtensa/include/uapi/asm/ioctls.h  | 2 +-
 include/uapi/asm-generic/ioctls.h      | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
index ff67b8373bf78..1cd7dc7d48701 100644
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -100,7 +100,7 @@
 #define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
-#define TIOCGPTPEER	_IOR('T', 0x41, int) /* Safely open the slave */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
 
 #define TIOCSERCONFIG	0x5453
 #define TIOCSERGWILD	0x5454
diff --git a/arch/mips/include/uapi/asm/ioctls.h b/arch/mips/include/uapi/asm/ioctls.h
index 68e19b689a00c..1609cb0907ace 100644
--- a/arch/mips/include/uapi/asm/ioctls.h
+++ b/arch/mips/include/uapi/asm/ioctls.h
@@ -91,7 +91,7 @@
 #define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
-#define TIOCGPTPEER	_IOR('T', 0x41, int) /* Safely open the slave */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
 
 /* I hope the range from 0x5480 on is free ... */
 #define TIOCSCTTY	0x5480		/* become controlling tty */
diff --git a/arch/parisc/include/uapi/asm/ioctls.h b/arch/parisc/include/uapi/asm/ioctls.h
index 674c68a5bbd03..d0e3321403bed 100644
--- a/arch/parisc/include/uapi/asm/ioctls.h
+++ b/arch/parisc/include/uapi/asm/ioctls.h
@@ -60,7 +60,7 @@
 #define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
-#define TIOCGPTPEER	_IOR('T', 0x41, int) /* Safely open the slave */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
 
 #define FIONCLEX	0x5450  /* these numbers need to be adjusted. */
 #define FIOCLEX		0x5451
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index bfd609a3e928f..e3b10469f7877 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -100,7 +100,7 @@
 #define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
-#define TIOCGPTPEER	_IOR('T', 0x41, int) /* Safely open the slave */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
 
 #define TIOCSERCONFIG	0x5453
 #define TIOCSERGWILD	0x5454
diff --git a/arch/sh/include/uapi/asm/ioctls.h b/arch/sh/include/uapi/asm/ioctls.h
index eec7901e9e658..787bac9f67da3 100644
--- a/arch/sh/include/uapi/asm/ioctls.h
+++ b/arch/sh/include/uapi/asm/ioctls.h
@@ -93,7 +93,7 @@
 #define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
-#define TIOCGPTPEER	_IOR('T', 0x41, int) /* Safely open the slave */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
 
 #define TIOCSERCONFIG	_IO('T', 83) /* 0x5453 */
 #define TIOCSERGWILD	_IOR('T', 84,  int) /* 0x5454 */
diff --git a/arch/sparc/include/uapi/asm/ioctls.h b/arch/sparc/include/uapi/asm/ioctls.h
index 6d27398632eac..f5df72b93bb27 100644
--- a/arch/sparc/include/uapi/asm/ioctls.h
+++ b/arch/sparc/include/uapi/asm/ioctls.h
@@ -88,7 +88,7 @@
 #define TIOCGPTN	_IOR('t', 134, unsigned int) /* Get Pty Number */
 #define TIOCSPTLCK	_IOW('t', 135, int) /* Lock/unlock PTY */
 #define TIOCSIG		_IOW('t', 136, int) /* Generate signal on Pty slave */
-#define TIOCGPTPEER	_IOR('t', 137, int) /* Safely open the slave */
+#define TIOCGPTPEER	_IO('t', 137) /* Safely open the slave */
 
 /* Little f */
 #define FIOCLEX		_IO('f', 1)
diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h
index 98b004e24e852..47d82c09be7b0 100644
--- a/arch/xtensa/include/uapi/asm/ioctls.h
+++ b/arch/xtensa/include/uapi/asm/ioctls.h
@@ -105,7 +105,7 @@
 #define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
-#define TIOCGPTPEER	_IOR('T', 0x41, int) /* Safely open the slave */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
 
 #define TIOCSERCONFIG	_IO('T', 83)
 #define TIOCSERGWILD	_IOR('T', 84,  int)
diff --git a/include/uapi/asm-generic/ioctls.h b/include/uapi/asm-generic/ioctls.h
index 06d5f7ddf84e7..14baf9f23a14b 100644
--- a/include/uapi/asm-generic/ioctls.h
+++ b/include/uapi/asm-generic/ioctls.h
@@ -77,7 +77,7 @@
 #define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
-#define TIOCGPTPEER	_IOR('T', 0x41, int) /* Safely open the slave */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
 
 #define FIONCLEX	0x5450
 #define FIOCLEX		0x5451
-- 
GitLab


From 91647f4c2d66e16b30524613410a638c2c4532bf Mon Sep 17 00:00:00 2001
From: Dennis Dalessandro <dennis.dalessandro@intel.com>
Date: Mon, 29 May 2017 17:18:14 -0700
Subject: [PATCH 1753/1958] IB/hfi1: Ensure dd->gi_mask can not be overflowed

As the code stands today the array access in remap_intr() is OK. To
future proof the code though we should explicitly check to ensure the
index value is not outside of the valid range. This is not a straight
forward calculation so err on the side of caution.

Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/chip.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 2ba00b89df6a0..94b54850ec75b 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -12847,7 +12847,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
 	/* clear from the handled mask of the general interrupt */
 	m = isrc / 64;
 	n = isrc % 64;
-	dd->gi_mask[m] &= ~((u64)1 << n);
+	if (likely(m < CCE_NUM_INT_CSRS)) {
+		dd->gi_mask[m] &= ~((u64)1 << n);
+	} else {
+		dd_dev_err(dd, "remap interrupt err\n");
+		return;
+	}
 
 	/* direct the chip source to the given MSI-X interrupt */
 	m = isrc / 8;
-- 
GitLab


From 99975cd4fda52974a767aa44fe0b1a8f74950d9d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Mon, 24 Apr 2017 15:15:28 -0700
Subject: [PATCH 1754/1958] mlx5: Avoid that mlx5_ib_sg_to_klms() overflows the
 klms[] array

ib_map_mr_sg() can pass an SG-list to .map_mr_sg() that is larger
than what fits into a single MR. .map_mr_sg() must not attempt to
map more SG-list elements than what fits into a single MR.
Hence make sure that mlx5_ib_sg_to_klms() does not write outside
the MR klms[] array.

Fixes: b005d3164713 ("mlx5: Add arbitrary sg list support")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Leon Romanovsky <leonro@mellanox.com>
Cc: Israel Rukshin <israelr@mellanox.com>
Cc: <stable@vger.kernel.org>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 763bb5b36144b..2046a69874533 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1779,7 +1779,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
 	mr->ndescs = sg_nents;
 
 	for_each_sg(sgl, sg, sg_nents, i) {
-		if (unlikely(i > mr->max_descs))
+		if (unlikely(i >= mr->max_descs))
 			break;
 		klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
 		klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
-- 
GitLab


From 28b5b3a23ba67970f4f534b15c4e4d687136605a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Thu, 4 May 2017 20:38:20 -0500
Subject: [PATCH 1755/1958] RDMA/core: Document confusing code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While looking into Coverity ID 1351047 I ran into the following
piece of code at
drivers/infiniband/core/verbs.c:496:

ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
                                   ah_attr->dmac,
                                   wc->wc_flags & IB_WC_WITH_VLAN ?
                                   NULL : &vlan_id,
                                   &if_index, &hoplimit);

The issue here is that the position of arguments in the call to
rdma_addr_find_l2_eth_by_grh() function do not match the order of
the parameters:

&dgid is passed to sgid
&sgid is passed to dgid

This is the function prototype:

int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 				 const union ib_gid *dgid,
 				 u8 *dmac, u16 *vlan_id, int *if_index,
 				 int *hoplimit)

My question here is if this is intentional?

Answer:
Yes. ib_init_ah_from_wc() creates ah from the incoming packet.
Incoming packet has dgid of the receiver node on which this code is
getting executed and sgid contains the GID of the sender.

When resolving mac address of destination, you use arrived dgid as
sgid and use sgid as dgid because sgid contains destinations GID whom to
respond to.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/verbs.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c973a83c898b4..47ee1f83c9a97 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -452,6 +452,19 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
 }
 EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
 
+/*
+ * This function creates ah from the incoming packet.
+ * Incoming packet has dgid of the receiver node on which this code is
+ * getting executed and, sgid contains the GID of the sender.
+ *
+ * When resolving mac address of destination, the arrived dgid is used
+ * as sgid and, sgid is used as dgid because sgid contains destinations
+ * GID whom to respond to.
+ *
+ * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
+ * position of arguments dgid and sgid do not match the order of the
+ * parameters.
+ */
 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 		       const struct ib_wc *wc, const struct ib_grh *grh,
 		       struct rdma_ah_attr *ah_attr)
-- 
GitLab


From c8c16d3bae967f1c7af541e8d016e5c51e4f010a Mon Sep 17 00:00:00 2001
From: Vladimir Neyelov <vladimirn@mellanox.com>
Date: Sun, 21 May 2017 19:17:31 +0300
Subject: [PATCH 1756/1958] IB/iser: Fix connection teardown race condition

Under heavy iser target(scst) start/stop stress during login/logout
on iser intitiator side happened trace call provided below.

The function iscsi_iser_slave_alloc iser_conn pointer could be NULL,
due to the fact that function iscsi_iser_conn_stop can be called before
and free iser connection. Let's protect that flow by introducing global mutex.

BUG: unable to handle kernel paging request at 0000000000001018
IP: [<ffffffffc0426f7e>] iscsi_iser_slave_alloc+0x1e/0x50 [ib_iser]
Call Trace:
? scsi_alloc_sdev+0x242/0x300
scsi_probe_and_add_lun+0x9e1/0xea0
? kfree_const+0x21/0x30
? kobject_set_name_vargs+0x76/0x90
? __pm_runtime_resume+0x5b/0x70
__scsi_scan_target+0xf6/0x250
scsi_scan_target+0xea/0x100
iscsi_user_scan_session.part.13+0x101/0x130 [scsi_transport_iscsi]
? iscsi_user_scan_session.part.13+0x130/0x130 [scsi_transport_iscsi]
iscsi_user_scan_session+0x1e/0x30 [scsi_transport_iscsi]
device_for_each_child+0x50/0x90
iscsi_user_scan+0x44/0x60 [scsi_transport_iscsi]
store_scan+0xa8/0x100
? common_file_perm+0x5d/0x1c0
dev_attr_store+0x18/0x30
sysfs_kf_write+0x37/0x40
kernfs_fop_write+0x12c/0x1c0
__vfs_write+0x18/0x40
vfs_write+0xb5/0x1a0
SyS_write+0x55/0xc0

Fixes: 318d311e8f01 ("iser: Accept arbitrary sg lists mapping if the device supports it")
Cc: <stable@vger.kernel.org> # v4.5+
Signed-off-by: Vladimir Neyelov <vladimirn@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimbeg.me>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 5a887efb4bdf1..37b33d708c2dd 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -83,6 +83,7 @@ static struct scsi_host_template iscsi_iser_sht;
 static struct iscsi_transport iscsi_iser_transport;
 static struct scsi_transport_template *iscsi_iser_scsi_transport;
 static struct workqueue_struct *release_wq;
+static DEFINE_MUTEX(unbind_iser_conn_mutex);
 struct iser_global ig;
 
 int iser_debug_level = 0;
@@ -550,12 +551,14 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 	 */
 	if (iser_conn) {
 		mutex_lock(&iser_conn->state_mutex);
+		mutex_lock(&unbind_iser_conn_mutex);
 		iser_conn_terminate(iser_conn);
 		iscsi_conn_stop(cls_conn, flag);
 
 		/* unbind */
 		iser_conn->iscsi_conn = NULL;
 		conn->dd_data = NULL;
+		mutex_unlock(&unbind_iser_conn_mutex);
 
 		complete(&iser_conn->stop_completion);
 		mutex_unlock(&iser_conn->state_mutex);
@@ -977,13 +980,21 @@ static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
 	struct iser_conn *iser_conn;
 	struct ib_device *ib_dev;
 
+	mutex_lock(&unbind_iser_conn_mutex);
+
 	session = starget_to_session(scsi_target(sdev))->dd_data;
 	iser_conn = session->leadconn->dd_data;
+	if (!iser_conn) {
+		mutex_unlock(&unbind_iser_conn_mutex);
+		return -ENOTCONN;
+	}
 	ib_dev = iser_conn->ib_conn.device->ib_device;
 
 	if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
 		blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
 
+	mutex_unlock(&unbind_iser_conn_mutex);
+
 	return 0;
 }
 
-- 
GitLab


From bebb2a473a43c8f84a8210687d1cbdde503046d7 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 23 May 2017 10:48:44 +0300
Subject: [PATCH 1757/1958] IB/core: Namespace is mandatory input for address
 resolution

In function addr_resolve() the namespace is a required input parameter
and not an output. It is passed later for searching the routing table
and device addresses. Also, it shouldn't be copied back to the caller.

Fixes: 565edd1d5555 ('IB/addr: Pass network namespace as a parameter')
Cc: <stable@vger.kernel.org> # v4.3+
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/addr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index a6cb379a4ebcb..d78bc74bc9a96 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -509,6 +509,11 @@ static int addr_resolve(struct sockaddr *src_in,
 	struct dst_entry *dst;
 	int ret;
 
+	if (!addr->net) {
+		pr_warn_ratelimited("%s: missing namespace\n", __func__);
+		return -EINVAL;
+	}
+
 	if (src_in->sa_family == AF_INET) {
 		struct rtable *rt = NULL;
 		const struct sockaddr_in *dst_in4 =
@@ -546,7 +551,6 @@ static int addr_resolve(struct sockaddr *src_in,
 	}
 
 	addr->bound_dev_if = ndev->ifindex;
-	addr->net = dev_net(ndev);
 	dev_put(ndev);
 
 	return ret;
-- 
GitLab


From cbd09aebc2d62095b05797af5c9a315e3a71dcea Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 23 May 2017 10:48:45 +0300
Subject: [PATCH 1758/1958] IB/core: Don't resolve IP address to the loopback
 device

When resolving an IP address that is on the host of the caller the
result from querying the routing table is the loopback device. This is
not a valid response, because it doesn't represent the RDMA device and
the port.

Therefore, callers need to check the resolved device and if it is a
loopback device find an alternative way to resolve it. To avoid this we
make sure that the response from rdma_resolve_ip() will not be the
loopback device.

While that, we fix an static checker warning about dereferencing an
unintitialized pointer using the same solution as in commit abeffce90c7f
("net/mlx5e: Fix a -Wmaybe-uninitialized warning") as a reference.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/addr.c  | 40 ++++++++++++++++++++++++---------
 drivers/infiniband/core/cma.c   | 32 +++-----------------------
 drivers/infiniband/core/verbs.c |  5 -----
 3 files changed, 32 insertions(+), 45 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index d78bc74bc9a96..01236cef7bfb1 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -268,6 +268,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
 			return ret;
 
 		ret = rdma_copy_addr(dev_addr, dev, NULL);
+		dev_addr->bound_dev_if = dev->ifindex;
 		if (vlan_id)
 			*vlan_id = rdma_vlan_dev_vlan_id(dev);
 		dev_put(dev);
@@ -280,6 +281,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
 					  &((const struct sockaddr_in6 *)addr)->sin6_addr,
 					  dev, 1)) {
 				ret = rdma_copy_addr(dev_addr, dev, NULL);
+				dev_addr->bound_dev_if = dev->ifindex;
 				if (vlan_id)
 					*vlan_id = rdma_vlan_dev_vlan_id(dev);
 				break;
@@ -405,10 +407,10 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 	fl4.saddr = src_ip;
 	fl4.flowi4_oif = addr->bound_dev_if;
 	rt = ip_route_output_key(addr->net, &fl4);
-	if (IS_ERR(rt)) {
-		ret = PTR_ERR(rt);
-		goto out;
-	}
+	ret = PTR_ERR_OR_ZERO(rt);
+	if (ret)
+		return ret;
+
 	src_in->sin_family = AF_INET;
 	src_in->sin_addr.s_addr = fl4.saddr;
 
@@ -423,8 +425,6 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 
 	*prt = rt;
 	return 0;
-out:
-	return ret;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -527,8 +527,12 @@ static int addr_resolve(struct sockaddr *src_in,
 		if (resolve_neigh)
 			ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
 
-		ndev = rt->dst.dev;
-		dev_hold(ndev);
+		if (addr->bound_dev_if) {
+			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+		} else {
+			ndev = rt->dst.dev;
+			dev_hold(ndev);
+		}
 
 		ip_rt_put(rt);
 	} else {
@@ -544,13 +548,27 @@ static int addr_resolve(struct sockaddr *src_in,
 		if (resolve_neigh)
 			ret = addr_resolve_neigh(dst, dst_in, addr, seq);
 
-		ndev = dst->dev;
-		dev_hold(ndev);
+		if (addr->bound_dev_if) {
+			ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+		} else {
+			ndev = dst->dev;
+			dev_hold(ndev);
+		}
 
 		dst_release(dst);
 	}
 
-	addr->bound_dev_if = ndev->ifindex;
+	if (ndev->flags & IFF_LOOPBACK) {
+		ret = rdma_translate_ip(dst_in, addr, NULL);
+		/*
+		 * Put the loopback device and get the translated
+		 * device instead.
+		 */
+		dev_put(ndev);
+		ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
+	} else {
+		addr->bound_dev_if = ndev->ifindex;
+	}
 	dev_put(ndev);
 
 	return ret;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 31bb82d8ecd7f..11aff923b633f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -623,22 +623,11 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
 	if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
 		return ret;
 
-	if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+	if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port))
 		ndev = dev_get_by_index(&init_net, bound_if_index);
-		if (ndev && ndev->flags & IFF_LOOPBACK) {
-			pr_info("detected loopback device\n");
-			dev_put(ndev);
-
-			if (!device->get_netdev)
-				return -EOPNOTSUPP;
-
-			ndev = device->get_netdev(device, port);
-			if (!ndev)
-				return -ENODEV;
-		}
-	} else {
+	else
 		gid_type = IB_GID_TYPE_IB;
-	}
+
 
 	ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
 					 ndev, NULL);
@@ -2569,21 +2558,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 			goto err2;
 		}
 
-		if (ndev->flags & IFF_LOOPBACK) {
-			dev_put(ndev);
-			if (!id_priv->id.device->get_netdev) {
-				ret = -EOPNOTSUPP;
-				goto err2;
-			}
-
-			ndev = id_priv->id.device->get_netdev(id_priv->id.device,
-							      id_priv->id.port_num);
-			if (!ndev) {
-				ret = -ENODEV;
-				goto err2;
-			}
-		}
-
 		supported_gids = roce_gid_type_mask_support(id_priv->id.device,
 							    id_priv->id.port_num);
 		gid_type = cma_route_gid_type(addr->dev_addr.network,
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 47ee1f83c9a97..644fa0d13f02c 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -520,11 +520,6 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 		}
 
 		resolved_dev = dev_get_by_index(&init_net, if_index);
-		if (resolved_dev->flags & IFF_LOOPBACK) {
-			dev_put(resolved_dev);
-			resolved_dev = idev;
-			dev_hold(resolved_dev);
-		}
 		rcu_read_lock();
 		if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
 								   resolved_dev))
-- 
GitLab


From 13c401f33e19c20431d9888a91d9ea82e5133bd9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 14 Jul 2017 23:03:49 -0700
Subject: [PATCH 1759/1958] jhash: fix -Wimplicit-fallthrough warnings

GCC 7 added a new -Wimplicit-fallthrough warning.  It's only enabled
with W=1, but since linux/jhash.h is included in over hundred places
(including other global headers) it seems worthwhile fixing this
warning.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/jhash.h | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index 348c6f47e4cc3..8037850f31041 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -85,19 +85,18 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
 		k += 12;
 	}
 	/* Last block: affect all 32 bits of (c) */
-	/* All the case statements fall through */
 	switch (length) {
-	case 12: c += (u32)k[11]<<24;
-	case 11: c += (u32)k[10]<<16;
-	case 10: c += (u32)k[9]<<8;
-	case 9:  c += k[8];
-	case 8:  b += (u32)k[7]<<24;
-	case 7:  b += (u32)k[6]<<16;
-	case 6:  b += (u32)k[5]<<8;
-	case 5:  b += k[4];
-	case 4:  a += (u32)k[3]<<24;
-	case 3:  a += (u32)k[2]<<16;
-	case 2:  a += (u32)k[1]<<8;
+	case 12: c += (u32)k[11]<<24;	/* fall through */
+	case 11: c += (u32)k[10]<<16;	/* fall through */
+	case 10: c += (u32)k[9]<<8;	/* fall through */
+	case 9:  c += k[8];		/* fall through */
+	case 8:  b += (u32)k[7]<<24;	/* fall through */
+	case 7:  b += (u32)k[6]<<16;	/* fall through */
+	case 6:  b += (u32)k[5]<<8;	/* fall through */
+	case 5:  b += k[4];		/* fall through */
+	case 4:  a += (u32)k[3]<<24;	/* fall through */
+	case 3:  a += (u32)k[2]<<16;	/* fall through */
+	case 2:  a += (u32)k[1]<<8;	/* fall through */
 	case 1:  a += k[0];
 		 __jhash_final(a, b, c);
 	case 0: /* Nothing left to add */
@@ -131,10 +130,10 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
 		k += 3;
 	}
 
-	/* Handle the last 3 u32's: all the case statements fall through */
+	/* Handle the last 3 u32's */
 	switch (length) {
-	case 3: c += k[2];
-	case 2: b += k[1];
+	case 3: c += k[2];	/* fall through */
+	case 2: b += k[1];	/* fall through */
 	case 1: a += k[0];
 		__jhash_final(a, b, c);
 	case 0:	/* Nothing left to add */
-- 
GitLab


From eff793687792d3eed594d147aceef2000fb9ca3d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@primarydata.com>
Date: Sun, 16 Jul 2017 22:57:27 +0000
Subject: [PATCH 1760/1958] nfsd: Fix a memory scribble in the callback channel

The offset of the entry in struct rpc_version has to match the version
number.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Fixes: 1c5876ddbdb4 ("sunrpc: move p_count out of struct rpc_procinfo")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index b45083c0f9ae8..49b0a9e7ff18b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -720,8 +720,8 @@ static const struct rpc_version nfs_cb_version4 = {
 	.counts			= nfs4_cb_counts,
 };
 
-static const struct rpc_version *nfs_cb_version[] = {
-	&nfs_cb_version4,
+static const struct rpc_version *nfs_cb_version[2] = {
+	[1] = &nfs_cb_version4,
 };
 
 static const struct rpc_program cb_program;
@@ -795,7 +795,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		.saddress	= (struct sockaddr *) &conn->cb_saddr,
 		.timeout	= &timeparms,
 		.program	= &cb_program,
-		.version	= 0,
+		.version	= 1,
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
 	};
 	struct rpc_clnt *client;
-- 
GitLab


From cb0fbbf22aa1e0ffcef3f4a055b75a5b17e01ca5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 16 Jul 2017 20:26:28 +0200
Subject: [PATCH 1761/1958] Blackfin: flat: Use %x to format u32

Several variables had their types changed from unsigned long to u32,
but the printk()-style format to print them wasn't updated, leading to:

    arch/blackfin/kernel/flat.c: In function 'bfin_get_addr_from_rp':
    arch/blackfin/kernel/flat.c:35:3: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'u32' [-Wformat]
    arch/blackfin/kernel/flat.c: In function 'bfin_put_addr_at_rp':
    arch/blackfin/kernel/flat.c:80:3: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'u32' [-Wformat]

Fixes: 468138d78510688f ("binfmt_flat: flat_{get,put}_addr_from_rp() should be able to fail")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/blackfin/kernel/flat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/blackfin/kernel/flat.c b/arch/blackfin/kernel/flat.c
index d29ab6a2e909f..8ebc54daaa8e9 100644
--- a/arch/blackfin/kernel/flat.c
+++ b/arch/blackfin/kernel/flat.c
@@ -32,7 +32,7 @@ unsigned long bfin_get_addr_from_rp(u32 *ptr,
 		break;
 
 	case FLAT_BFIN_RELOC_TYPE_32_BIT:
-		pr_debug("*ptr = %lx", get_unaligned(ptr));
+		pr_debug("*ptr = %x", get_unaligned(ptr));
 		val = get_unaligned(ptr);
 		break;
 
@@ -77,7 +77,7 @@ void bfin_put_addr_at_rp(u32 *ptr, u32 addr, u32 relval)
 
 	case FLAT_BFIN_RELOC_TYPE_32_BIT:
 		put_unaligned(addr, ptr);
-		pr_debug("new ptr =%lx", get_unaligned(ptr));
+		pr_debug("new ptr =%x", get_unaligned(ptr));
 		break;
 	}
 }
-- 
GitLab


From 4e3f0701f25ab194c5362576b1146a1e6cc6c2e7 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Fri, 7 Jul 2017 17:44:26 -0600
Subject: [PATCH 1762/1958] libnvdimm: fix badblock range handling of ARS range

__add_badblock_range() does not account sector alignment when
it sets 'num_sectors'.  Therefore, an ARS error record range
spanning across two sectors is set to a single sector length,
which leaves the 2nd sector unprotected.

Change __add_badblock_range() to set 'num_sectors' properly.

Cc: <stable@vger.kernel.org>
Fixes: 0caeef63e6d2 ("libnvdimm: Add a poison list and export badblocks")
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 7cd99b1f8596b..75bc08c6838cc 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -421,14 +421,15 @@ static void set_badblock(struct badblocks *bb, sector_t s, int num)
 static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
 {
 	const unsigned int sector_size = 512;
-	sector_t start_sector;
+	sector_t start_sector, end_sector;
 	u64 num_sectors;
 	u32 rem;
 
 	start_sector = div_u64(ns_offset, sector_size);
-	num_sectors = div_u64_rem(len, sector_size, &rem);
+	end_sector = div_u64_rem(ns_offset + len, sector_size, &rem);
 	if (rem)
-		num_sectors++;
+		end_sector++;
+	num_sectors = end_sector - start_sector;
 
 	if (unlikely(num_sectors > (u64)INT_MAX)) {
 		u64 remaining = num_sectors;
-- 
GitLab


From 43fe51e11c194a6576634585f81ba33e104194a5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 12 Jul 2017 13:42:37 -0700
Subject: [PATCH 1763/1958] device-dax: fix 'passing zero to ERR_PTR()' warning

Dan Carpenter reports:

    The patch 7b6be8444e0f: "dax: refactor dax-fs into a generic provider
    of 'struct dax_device' instances" from Apr 11, 2017, leads to the
    following static checker warning:

        drivers/dax/device.c:643 devm_create_dev_dax()
        warn: passing zero to 'ERR_PTR'

Fix the case where we inadvertently leak 0 to ERR_PTR() by setting at
every error case, and make it clear that 'count' is never 0.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/device.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 12943d19bfc43..44d72e5e64cc2 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -567,7 +567,10 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
 	struct inode *inode;
 	struct device *dev;
 	struct cdev *cdev;
-	int rc = 0, i;
+	int rc, i;
+
+	if (!count)
+		return ERR_PTR(-EINVAL);
 
 	dev_dax = kzalloc(sizeof(*dev_dax) + sizeof(*res) * count, GFP_KERNEL);
 	if (!dev_dax)
@@ -598,8 +601,10 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
 	 * device outside of mmap of the resulting character device.
 	 */
 	dax_dev = alloc_dax(dev_dax, NULL, NULL);
-	if (!dax_dev)
+	if (!dax_dev) {
+		rc = -ENOMEM;
 		goto err_dax;
+	}
 
 	/* from here on we're committed to teardown via dax_dev_release() */
 	dev = &dev_dax->dev;
-- 
GitLab


From 7e700d2c59e5853c9126642976b4f5768f64c9b3 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Wed, 31 May 2017 13:32:00 -0400
Subject: [PATCH 1764/1958] acpi/nfit: Fix memory corruption/Unregister mce
 decoder on failure

nfit_init() calls nfit_mce_register() on module load.  When the module
load fails the nfit mce decoder is not unregistered.  The module's
memory is freed leaving the decoder chain referencing junk.  This will
cause panics as future registrations will reference the free'd memory.

Unregister the nfit mce decoder on module init failure.

[v2]: register and then unregister mce handler to avoid losing mce events
[v3]: also cleanup nfit workqueue

Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error")
Cc: <stable@vger.kernel.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: "Lee, Chun-Yi" <joeyli.kernel@gmail.com>
Cc: Linda Knippers <linda.knippers@hpe.com>
Cc: lszubowi@redhat.com
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index b75b734ee73ad..19182d0915873 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3160,6 +3160,8 @@ static struct acpi_driver acpi_nfit_driver = {
 
 static __init int nfit_init(void)
 {
+	int ret;
+
 	BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
 	BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
 	BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
@@ -3187,8 +3189,14 @@ static __init int nfit_init(void)
 		return -ENOMEM;
 
 	nfit_mce_register();
+	ret = acpi_bus_register_driver(&acpi_nfit_driver);
+	if (ret) {
+		nfit_mce_unregister();
+		destroy_workqueue(nfit_wq);
+	}
+
+	return ret;
 
-	return acpi_bus_register_driver(&acpi_nfit_driver);
 }
 
 static __exit void nfit_exit(void)
-- 
GitLab


From 23b9babb50ff5ac8eb9208b978b2a630e99bf90b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 17 Jul 2017 09:58:51 -0700
Subject: [PATCH 1765/1958] MAINTAINERS: list drivers/acpi/nfit/ files for
 libnvdimm sub-system

Patches that update the drivers/acpi/nfit/ directory need to be copied
to the nvdimm mailing list. The drivers/acpi/nfit* glob has been broken
ever since the nfit driver source was refactored into multiple files
under the drivers/acpi/nfit/ directory.

Reported-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 205d3977ac46e..0e55c91fcd12a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7730,6 +7730,7 @@ Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git
 S:	Supported
 F:	drivers/nvdimm/*
+F:	drivers/acpi/nfit/*
 F:	include/linux/nd.h
 F:	include/linux/libnvdimm.h
 F:	include/uapi/linux/ndctl.h
@@ -7741,7 +7742,6 @@ Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/blk.c
 F:	drivers/nvdimm/region_devs.c
-F:	drivers/acpi/nfit*
 
 LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
 M:	Vishal Verma <vishal.l.verma@intel.com>
-- 
GitLab


From a696712c3dd54eb58d2c5a807b4aaa27782d80d6 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 17 Jul 2017 19:47:02 +0200
Subject: [PATCH 1766/1958] genirq/PM: Properly pretend disabled state when
 force resuming interrupts

Interrupts with the IRQF_FORCE_RESUME flag set have also the
IRQF_NO_SUSPEND flag set. They are not disabled in the suspend path, but
must be forcefully resumed. That's used by XEN to keep IPIs enabled beyond
the suspension of device irqs. Force resume works by pretending that the
interrupt was disabled and then calling __irq_enable().

Incrementing the disabled depth counter was enough to do that, but with the
recent changes which use state flags to avoid unnecessary hardware access,
this is not longer sufficient. If the state flags are not set, then the
hardware callbacks are not invoked and the interrupt line stays disabled in
"hardware".

Set the disabled and masked state when pretending that an interrupt got
disabled by suspend.

Fixes: bf22ff45bed6 ("genirq: Avoid unnecessary low level irq function calls")
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: xen-devel@lists.xenproject.org
Cc: boris.ostrovsky@oracle.com
Link: http://lkml.kernel.org/r/20170717174703.4603-2-jgross@suse.com
---
 kernel/irq/chip.c      | 10 ----------
 kernel/irq/internals.h | 10 ++++++++++
 kernel/irq/pm.c        |  2 ++
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d171bc57e1e01..a3cc37c0c85e2 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -170,21 +170,11 @@ static void irq_state_clr_disabled(struct irq_desc *desc)
 	irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
 }
 
-static void irq_state_set_disabled(struct irq_desc *desc)
-{
-	irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
-}
-
 static void irq_state_clr_masked(struct irq_desc *desc)
 {
 	irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED);
 }
 
-static void irq_state_set_masked(struct irq_desc *desc)
-{
-	irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
-}
-
 static void irq_state_clr_started(struct irq_desc *desc)
 {
 	irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index dbfba9933ed25..a2c48058354c8 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -227,6 +227,16 @@ static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
 	return __irqd_to_state(d) & mask;
 }
 
+static inline void irq_state_set_disabled(struct irq_desc *desc)
+{
+	irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
+}
+
+static inline void irq_state_set_masked(struct irq_desc *desc)
+{
+	irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
+}
+
 #undef __irqd_to_state
 
 static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index cea1de0161f18..6bd9b58429ccb 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -149,6 +149,8 @@ static void resume_irq(struct irq_desc *desc)
 
 	/* Pretend that it got disabled ! */
 	desc->depth++;
+	irq_state_set_disabled(desc);
+	irq_state_set_masked(desc);
 resume:
 	desc->istate &= ~IRQS_SUSPENDED;
 	__enable_irq(desc);
-- 
GitLab


From df39a9f106d53532443a804352894480ca6ca5fd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 17 Jul 2017 11:42:55 -0700
Subject: [PATCH 1767/1958] bpf: check NULL for sk_to_full_sk() return value

When req->rsk_listener is NULL, sk_to_full_sk() returns
NULL too, so we have to check its return value against
NULL here.

Fixes: 40304b2a1567 ("bpf: BPF support for sock_ops")
Reported-by: David Ahern <dsahern@gmail.com>
Tested-by: David Ahern <dsahern@gmail.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 360c082e885c7..d41d40ac3efdb 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -85,7 +85,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled && (sock_ops)->sk) {	       \
 		typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk);	       \
-		if (sk_fullsock(__sk))					       \
+		if (__sk && sk_fullsock(__sk))				       \
 			__ret = __cgroup_bpf_run_filter_sock_ops(__sk,	       \
 								 sock_ops,     \
 							 BPF_CGROUP_SOCK_OPS); \
-- 
GitLab


From 32f2fea6e77e64cd4045ec2d5deb879aada3b476 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Mon, 17 Jul 2017 21:00:44 +0300
Subject: [PATCH 1768/1958] clocksource/drivers/timer-of: Handle
 of_irq_get_byname() result correctly

of_irq_get_byname() may return a negative error number as well as 0 on
failure, while timer_irq_init() only checks for 0, blithely continuing with
the call to request_[percpu_]irq() -- those functions expect *unsigned int*,
so would probably fail anyway when a large IRQ number resulting from a
conversion of a negative error number is passed to them... This, however,
is incorrect behavior -- error number is not IRQ number.

Filter out the negative error numbers, complain, and return them to the
timer_irq_init()'s callers...

Fixes: dc11bae78529 ("clocksource/drivers: Add timer-of common init routine")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: http://lkml.kernel.org/r/20170717180114.678825147@cogentembedded.com
---
 drivers/clocksource/timer-of.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c
index f6e7491c873cd..d509b500a7b5f 100644
--- a/drivers/clocksource/timer-of.c
+++ b/drivers/clocksource/timer-of.c
@@ -41,8 +41,16 @@ static __init int timer_irq_init(struct device_node *np,
 	struct timer_of *to = container_of(of_irq, struct timer_of, of_irq);
 	struct clock_event_device *clkevt = &to->clkevt;
 
-	of_irq->irq = of_irq->name ? of_irq_get_byname(np, of_irq->name):
-		irq_of_parse_and_map(np, of_irq->index);
+	if (of_irq->name) {
+		of_irq->irq = ret = of_irq_get_byname(np, of_irq->name);
+		if (ret < 0) {
+			pr_err("Failed to get interrupt %s for %s\n",
+			       of_irq->name, np->full_name);
+			return ret;
+		}
+	} else	{
+		of_irq->irq = irq_of_parse_and_map(np, of_irq->index);
+	}
 	if (!of_irq->irq) {
 		pr_err("Failed to map interrupt for %s\n", np->full_name);
 		return -EINVAL;
-- 
GitLab


From a512c2fbef9c700ee1ee0e045b75e140fef8f5ee Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 23 May 2017 11:26:08 +0300
Subject: [PATCH 1769/1958] IB/core: Introduce modify QP operation with udata

This patch adds new function ib_modify_qp_with_udata so that
uverbs layer can avoid handling L2 mac address at verbs layer
and depend on the core layer to resolve the mac address consistently
for all required QPs.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/verbs.c | 32 ++++++++++++++++++++++++--------
 include/rdma/ib_verbs.h         | 16 ++++++++++++++++
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 644fa0d13f02c..7f8fe443df46f 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1276,20 +1276,36 @@ int ib_resolve_eth_dmac(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_resolve_eth_dmac);
 
-int ib_modify_qp(struct ib_qp *qp,
-		 struct ib_qp_attr *qp_attr,
-		 int qp_attr_mask)
+/**
+ * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
+ * @qp: The QP to modify.
+ * @attr: On input, specifies the QP attributes to modify.  On output,
+ *   the current values of selected QP attributes are returned.
+ * @attr_mask: A bit-mask used to specify which attributes of the QP
+ *   are being modified.
+ * @udata: pointer to user's input output buffer information
+ *   are being modified.
+ * It returns 0 on success and returns appropriate error code on error.
+ */
+int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
+			    int attr_mask, struct ib_udata *udata)
 {
+	int ret;
 
-	if (qp_attr_mask & IB_QP_AV) {
-		int ret;
-
-		ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
+	if (attr_mask & IB_QP_AV) {
+		ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
 		if (ret)
 			return ret;
 	}
+	return ib_security_modify_qp(qp, attr, attr_mask, udata);
+}
+EXPORT_SYMBOL(ib_modify_qp_with_udata);
 
-	return ib_security_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
+int ib_modify_qp(struct ib_qp *qp,
+		 struct ib_qp_attr *qp_attr,
+		 int qp_attr_mask)
+{
+	return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 356953d3dbd18..6e62c9e2c971a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2947,6 +2947,22 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
 struct ib_qp *ib_create_qp(struct ib_pd *pd,
 			   struct ib_qp_init_attr *qp_init_attr);
 
+/**
+ * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
+ * @qp: The QP to modify.
+ * @attr: On input, specifies the QP attributes to modify.  On output,
+ *   the current values of selected QP attributes are returned.
+ * @attr_mask: A bit-mask used to specify which attributes of the QP
+ *   are being modified.
+ * @udata: pointer to user's input output buffer information
+ *   are being modified.
+ * It returns 0 on success and returns appropriate error code on error.
+ */
+int ib_modify_qp_with_udata(struct ib_qp *qp,
+			    struct ib_qp_attr *attr,
+			    int attr_mask,
+			    struct ib_udata *udata);
+
 /**
  * ib_modify_qp - Modifies the attributes for the specified QP and then
  *   transitions the QP to the given state.
-- 
GitLab


From f7c8f2e9ddc71db0ae344f3ffb19df03ef32b719 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 23 May 2017 11:26:09 +0300
Subject: [PATCH 1770/1958] IB/uverbs: Make use of ib_modify_qp variant to
 avoid resolving DMAC

This patch makes use of IB core's ib_modify_qp_with_udata function that
also resolves the DMAC and handles udata.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 8ba9bfb073d17..3f55d18a37914 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2005,28 +2005,13 @@ static int modify_qp(struct ib_uverbs_file *file,
 	rdma_ah_set_port_num(&attr->alt_ah_attr,
 			     cmd->base.alt_dest.port_num);
 
-	if (qp->real_qp == qp) {
-		if (cmd->base.attr_mask & IB_QP_AV) {
-			ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
-			if (ret)
-				goto release_qp;
-		}
-		ret = ib_security_modify_qp(qp,
-					    attr,
-					    modify_qp_mask(qp->qp_type,
-							   cmd->base.attr_mask),
-					    udata);
-	} else {
-		ret = ib_security_modify_qp(qp,
-					    attr,
-					    modify_qp_mask(qp->qp_type,
-							   cmd->base.attr_mask),
-					    NULL);
-	}
+	ret = ib_modify_qp_with_udata(qp, attr,
+				      modify_qp_mask(qp->qp_type,
+						     cmd->base.attr_mask),
+				      udata);
 
 release_qp:
 	uobj_put_obj_read(qp);
-
 out:
 	kfree(attr);
 
-- 
GitLab


From 98e77d9fd7dff05019436370e78c3ec0f9894e25 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 May 2017 11:29:42 +0300
Subject: [PATCH 1771/1958] IB: Convert msleep below 20ms to usleep_range

The msleep(1) may do not sleep 1 ms as expected
and will sleep longer. The simple conversion from
msleep to usleep_range between 1ms and 2ms can solve an
issue.

The full and comprehensive explanation can be found at [1] and [2].

[1] https://lkml.org/lkml/2007/8/3/250
[2] Documentation/timers/timers-howto.txt

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 3 ++-
 drivers/infiniband/hw/mlx4/main.c          | 2 +-
 drivers/infiniband/hw/mlx4/mcg.c           | 2 +-
 drivers/infiniband/hw/nes/nes_hw.c         | 4 ++--
 drivers/infiniband/ulp/ipoib/ipoib_cm.c    | 4 ++--
 drivers/infiniband/ulp/ipoib/ipoib_ib.c    | 2 +-
 6 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 37d5d29597a44..729f8cc8738be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -995,7 +995,8 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
 			goto free_work;
 		}
 		ne -= ret;
-		msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+		usleep_range(HNS_ROCE_V1_FREE_MR_WAIT_VALUE * 1000,
+			     (1 + HNS_ROCE_V1_FREE_MR_WAIT_VALUE) * 1000);
 	} while (ne && time_before_eq(jiffies, end));
 
 	if (ne != 0)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 75b2f7d4cd955..d1b43cbbfea77 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1155,7 +1155,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
 			 * call to mlx4_ib_vma_close.
 			 */
 			put_task_struct(owning_process);
-			msleep(1);
+			usleep_range(1000, 2000);
 			owning_process = get_pid_task(ibcontext->tgid,
 						      PIDTYPE_PID);
 			if (!owning_process ||
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 3405e947dc1eb..b73f89700ef9e 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -1091,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
 		if (!count)
 			break;
 
-		msleep(1);
+		usleep_range(1000, 2000);
 	} while (time_after(end, jiffies));
 
 	flush_workqueue(ctx->mcg_wq);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 8f9d8b4ad5839..b0adf65e4bdbf 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -551,7 +551,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
 			if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
 			    || (0x0F000100 == (pcs_control_status1 & 0x0F000100)))
 				int_cnt++;
-			msleep(1);
+			usleep_range(1000, 2000);
 		}
 		if (int_cnt > 1) {
 			spin_lock_irqsave(&nesadapter->phy_lock, flags);
@@ -592,7 +592,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
 						break;
 					}
 				}
-				msleep(1);
+				usleep_range(1000, 2000);
 			}
 		}
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 7cbcfdac6529c..d574d41bdf61c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -954,7 +954,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
 			break;
 		}
 		spin_unlock_irq(&priv->lock);
-		msleep(1);
+		usleep_range(1000, 2000);
 		ipoib_drain_cq(dev);
 		spin_lock_irq(&priv->lock);
 	}
@@ -1206,7 +1206,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
 				goto timeout;
 			}
 
-			msleep(1);
+			usleep_range(1000, 2000);
 		}
 	}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index efe7402f48852..57a9655e844de 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -770,7 +770,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
 
 		ipoib_drain_cq(dev);
 
-		msleep(1);
+		usleep_range(1000, 2000);
 	}
 
 	ipoib_dbg(priv, "All sends and receives done.\n");
-- 
GitLab


From ed7b521d8a98c3371e3c9300df8bf3cb774d8ea6 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Tue, 23 May 2017 11:42:52 +0300
Subject: [PATCH 1772/1958] IB/IPoIB: Forward MTU change to driver below

This patch checks if there is a driver below that
needs to be updated on the new MTU and calls it
accordingly.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Reviewed by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6e86eeee370e8..3e2b7988ead81 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -233,6 +233,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
 static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	int ret = 0;
 
 	/* dev->mtu > 2K ==> connected mode */
 	if (ipoib_cm_admin_enabled(dev)) {
@@ -256,9 +257,23 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
 		ipoib_dbg(priv, "MTU must be smaller than the underlying "
 				"link layer MTU - 4 (%u)\n", priv->mcast_mtu);
 
-	dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+	new_mtu = min(priv->mcast_mtu, priv->admin_mtu);
 
-	return 0;
+	if (priv->rn_ops->ndo_change_mtu) {
+		bool carrier_status = netif_carrier_ok(dev);
+
+		netif_carrier_off(dev);
+
+		/* notify lower level on the real mtu */
+		ret = priv->rn_ops->ndo_change_mtu(dev, new_mtu);
+
+		if (carrier_status)
+			netif_carrier_on(dev);
+	} else {
+		dev->mtu = new_mtu;
+	}
+
+	return ret;
 }
 
 /* Called with an RCU read lock taken */
-- 
GitLab


From d83187dda9b930dc268ab05da265f3d5d7eca451 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 May 2017 14:38:13 +0300
Subject: [PATCH 1773/1958] IB/IPoIB: Convert IPoIB to memalloc_noio_* calls

Commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O
during memory allocation") added the memalloc_noio_(save|restore) functions
to enable people to modify the MM behavior by disabling I/O during memory
allocation. This was further extended in Fixes: 934f3072c17c ("mm: clear
__GFP_FS when PF_MEMALLOC_NOIO is set"). memalloc_noio_* functions prevent
allocation paths recursing back into the filesystem without explicitly
changing the flags for every allocation site.

However the IPoIB hasn't been keeping up with the changes and missed
completely these memalloc_noio_* calls. This led to update of
allocation site with special QP creation flag, see commit 09b93088d750
("IB: Add a QP creation flag to use GFP_NOIO allocations"), while this
flag is supported by small number of drivers in IB stack.

Let's change it by updating to memalloc_noio_* calls and allow
for every driver underneath enjoy NOIO allocations.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index d574d41bdf61c..f87d104837dcf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -39,6 +39,7 @@
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 
 #include "ipoib.h"
 
@@ -1047,9 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
 		.sq_sig_type		= IB_SIGNAL_ALL_WR,
 		.qp_type		= IB_QPT_RC,
 		.qp_context		= tx,
-		.create_flags		= IB_QP_CREATE_USE_GFP_NOIO
+		.create_flags		= 0
 	};
-
 	struct ib_qp *tx_qp;
 
 	if (dev->features & NETIF_F_SG)
@@ -1057,10 +1057,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
 			min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
 
 	tx_qp = ib_create_qp(priv->pd, &attr);
-	if (PTR_ERR(tx_qp) == -EINVAL) {
-		attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
-		tx_qp = ib_create_qp(priv->pd, &attr);
-	}
 	tx->max_send_sge = attr.cap.max_send_sge;
 	return tx_qp;
 }
@@ -1131,10 +1127,11 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
 			    struct sa_path_rec *pathrec)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
+	unsigned int noio_flag;
 	int ret;
 
-	p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
-			       GFP_NOIO, PAGE_KERNEL);
+	noio_flag = memalloc_noio_save();
+	p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring));
 	if (!p->tx_ring) {
 		ret = -ENOMEM;
 		goto err_tx;
@@ -1142,9 +1139,10 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
 	memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
 
 	p->qp = ipoib_cm_create_tx_qp(p->dev, p);
+	memalloc_noio_restore(noio_flag);
 	if (IS_ERR(p->qp)) {
 		ret = PTR_ERR(p->qp);
-		ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
+		ipoib_warn(priv, "failed to create tx qp: %d\n", ret);
 		goto err_qp;
 	}
 
-- 
GitLab


From 0f4d027c3b4240ecb314daa948238d459fdc3a00 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 May 2017 14:38:14 +0300
Subject: [PATCH 1774/1958] IB/{rdmavt, qib, hfi1}: Remove gfp flags argument

The caller to the driver marks GFP_NOIO allocations with help
of memalloc_noio-* calls now. This makes redundant to pass down
to the driver gfp flags, which can be GFP_KERNEL only.

The patch removes the gfp flags argument and updates all driver paths.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Acked-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/qp.c       |  7 ++--
 drivers/infiniband/hw/hfi1/qp.h       |  3 +-
 drivers/infiniband/hw/qib/qib_qp.c    | 15 ++++-----
 drivers/infiniband/hw/qib/qib_verbs.h |  4 +--
 drivers/infiniband/sw/rdmavt/qp.c     | 48 ++++++++-------------------
 include/rdma/rdma_vt.h                |  5 ++-
 6 files changed, 28 insertions(+), 54 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 650305cc03730..1a7af9f60c137 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -647,18 +647,17 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 		   qp->pid);
 }
 
-void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
-		    gfp_t gfp)
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv;
 
-	priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
+	priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node);
 	if (!priv)
 		return ERR_PTR(-ENOMEM);
 
 	priv->owner = qp;
 
-	priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp,
+	priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL,
 				   rdi->dparms.node);
 	if (!priv->s_ahg) {
 		kfree(priv);
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 1eb9cd7b8c197..6fe542b6a9275 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -123,8 +123,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp);
 /*
  * Functions provided by hfi1 driver for rdmavt to use
  */
-void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
-		    gfp_t gfp);
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 unsigned free_all_qps(struct rvt_dev_info *rdi);
 void notify_qp_reset(struct rvt_qp *qp);
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 5984981e7dd41..a343e3b5d4cbf 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -104,10 +104,9 @@ const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = {
 
 };
 
-static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
-			 gfp_t gfp)
+static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map)
 {
-	unsigned long page = get_zeroed_page(gfp);
+	unsigned long page = get_zeroed_page(GFP_KERNEL);
 
 	/*
 	 * Free the page if someone raced with us installing it.
@@ -126,7 +125,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
  * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
  */
 int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
-		  enum ib_qp_type type, u8 port, gfp_t gfp)
+		  enum ib_qp_type type, u8 port)
 {
 	u32 i, offset, max_scan, qpn;
 	struct rvt_qpn_map *map;
@@ -160,7 +159,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 	max_scan = qpt->nmaps - !offset;
 	for (i = 0;;) {
 		if (unlikely(!map->page)) {
-			get_map_page(qpt, map, gfp);
+			get_map_page(qpt, map);
 			if (unlikely(!map->page))
 				break;
 		}
@@ -317,16 +316,16 @@ u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
 	return ib_mtu_enum_to_int(pmtu);
 }
 
-void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp)
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 {
 	struct qib_qp_priv *priv;
 
-	priv = kzalloc(sizeof(*priv), gfp);
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return ERR_PTR(-ENOMEM);
 	priv->owner = qp;
 
-	priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp);
+	priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), GFP_KERNEL);
 	if (!priv->s_hdr) {
 		kfree(priv);
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index da0db5485ddc7..a52fc67b40d73 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -274,11 +274,11 @@ int qib_get_counters(struct qib_pportdata *ppd,
  * Functions provided by qib driver for rdmavt to use
  */
 unsigned qib_free_all_qps(struct rvt_dev_info *rdi);
-void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp);
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 void qib_notify_qp_reset(struct rvt_qp *qp);
 int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
-		  enum ib_qp_type type, u8 port, gfp_t gfp);
+		  enum ib_qp_type type, u8 port);
 void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 727e81cc2c8f6..459865439a0bf 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -118,10 +118,9 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
 EXPORT_SYMBOL(ib_rvt_state_ops);
 
 static void get_map_page(struct rvt_qpn_table *qpt,
-			 struct rvt_qpn_map *map,
-			 gfp_t gfp)
+			 struct rvt_qpn_map *map)
 {
-	unsigned long page = get_zeroed_page(gfp);
+	unsigned long page = get_zeroed_page(GFP_KERNEL);
 
 	/*
 	 * Free the page if someone raced with us installing it.
@@ -173,7 +172,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
 		    rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
 	for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
 		if (!map->page) {
-			get_map_page(qpt, map, GFP_KERNEL);
+			get_map_page(qpt, map);
 			if (!map->page) {
 				ret = -ENOMEM;
 				break;
@@ -342,14 +341,14 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
  * Return: The queue pair number
  */
 static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
-		     enum ib_qp_type type, u8 port_num, gfp_t gfp)
+		     enum ib_qp_type type, u8 port_num)
 {
 	u32 i, offset, max_scan, qpn;
 	struct rvt_qpn_map *map;
 	u32 ret;
 
 	if (rdi->driver_f.alloc_qpn)
-		return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp);
+		return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num);
 
 	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 		unsigned n;
@@ -374,7 +373,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 	max_scan = qpt->nmaps - !offset;
 	for (i = 0;;) {
 		if (unlikely(!map->page)) {
-			get_map_page(qpt, map, gfp);
+			get_map_page(qpt, map);
 			if (unlikely(!map->page))
 				break;
 		}
@@ -672,7 +671,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 	struct ib_qp *ret = ERR_PTR(-ENOMEM);
 	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
 	void *priv = NULL;
-	gfp_t gfp;
 	size_t sqsize;
 
 	if (!rdi)
@@ -680,18 +678,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 
 	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
 	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
-	    init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+	    init_attr->create_flags)
 		return ERR_PTR(-EINVAL);
 
-	/* GFP_NOIO is applicable to RC QP's only */
-
-	if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
-	    init_attr->qp_type != IB_QPT_RC)
-		return ERR_PTR(-EINVAL);
-
-	gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
-						GFP_NOIO : GFP_KERNEL;
-
 	/* Check receive queue parameters if no SRQ is specified. */
 	if (!init_attr->srq) {
 		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
@@ -719,14 +708,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		sz = sizeof(struct rvt_sge) *
 			init_attr->cap.max_send_sge +
 			sizeof(struct rvt_swqe);
-		if (gfp == GFP_NOIO)
-			swq = __vmalloc(
-				sqsize * sz,
-				gfp | __GFP_ZERO, PAGE_KERNEL);
-		else
-			swq = vzalloc_node(
-				sqsize * sz,
-				rdi->dparms.node);
+		swq = vzalloc_node(sqsize * sz, rdi->dparms.node);
 		if (!swq)
 			return ERR_PTR(-ENOMEM);
 
@@ -741,7 +723,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		} else if (init_attr->cap.max_recv_sge > 1)
 			sg_list_sz = sizeof(*qp->r_sg_list) *
 				(init_attr->cap.max_recv_sge - 1);
-		qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node);
+		qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL,
+				  rdi->dparms.node);
 		if (!qp)
 			goto bail_swq;
 
@@ -751,7 +734,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 				kzalloc_node(
 					sizeof(*qp->s_ack_queue) *
 					 rvt_max_atomic(rdi),
-					gfp,
+					GFP_KERNEL,
 					rdi->dparms.node);
 			if (!qp->s_ack_queue)
 				goto bail_qp;
@@ -766,7 +749,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		 * Driver needs to set up it's private QP structure and do any
 		 * initialization that is needed.
 		 */
-		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
+		priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
 		if (IS_ERR(priv)) {
 			ret = priv;
 			goto bail_qp;
@@ -786,11 +769,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 				qp->r_rq.wq = vmalloc_user(
 						sizeof(struct rvt_rwq) +
 						qp->r_rq.size * sz);
-			else if (gfp == GFP_NOIO)
-				qp->r_rq.wq = __vmalloc(
-						sizeof(struct rvt_rwq) +
-						qp->r_rq.size * sz,
-						gfp | __GFP_ZERO, PAGE_KERNEL);
 			else
 				qp->r_rq.wq = vzalloc_node(
 						sizeof(struct rvt_rwq) +
@@ -824,7 +802,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 
 		err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
 				init_attr->qp_type,
-				init_attr->port_num, gfp);
+				init_attr->port_num);
 		if (err < 0) {
 			ret = ERR_PTR(err);
 			goto bail_rq_wq;
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 4878aaf7bdffd..55af692710539 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -229,8 +229,7 @@ struct rvt_driver_provided {
 	 * ERR_PTR(err).  The driver is free to return NULL or a valid
 	 * pointer.
 	 */
-	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
-				gfp_t gfp);
+	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 
 	/*
 	 * Free the driver's private qp structure.
@@ -319,7 +318,7 @@ struct rvt_driver_provided {
 
 	/* Let the driver pick the next queue pair number*/
 	int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
-			 enum ib_qp_type type, u8 port_num, gfp_t gfp);
+			 enum ib_qp_type type, u8 port_num);
 
 	/* Determine if its safe or allowed to modify the qp */
 	int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
-- 
GitLab


From 8900b894e769dd88b53e519e3502e0e3c349fe95 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 May 2017 14:38:15 +0300
Subject: [PATCH 1775/1958] {net, IB}/mlx4: Remove gfp flags argument

The caller to the driver marks GFP_NOIO allocations with help
of memalloc_noio-* calls now. This makes redundant to pass down
to the driver gfp flags, which can be GFP_KERNEL only.

The patch removes the gfp flags argument and updates all driver paths.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx4/cq.c               |  6 +--
 drivers/infiniband/hw/mlx4/mlx4_ib.h          |  1 -
 drivers/infiniband/hw/mlx4/qp.c               | 40 ++++++++-----------
 drivers/infiniband/hw/mlx4/srq.c              |  8 ++--
 drivers/net/ethernet/mellanox/mlx4/alloc.c    | 29 +++++++-------
 drivers/net/ethernet/mellanox/mlx4/cq.c       |  4 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c    |  7 ++--
 drivers/net/ethernet/mellanox/mlx4/en_tx.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx4/icm.c      |  7 ++--
 drivers/net/ethernet/mellanox/mlx4/icm.h      |  3 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h     |  4 +-
 drivers/net/ethernet/mellanox/mlx4/mr.c       | 17 ++++----
 drivers/net/ethernet/mellanox/mlx4/qp.c       | 20 +++++-----
 .../ethernet/mellanox/mlx4/resource_tracker.c |  4 +-
 drivers/net/ethernet/mellanox/mlx4/srq.c      |  4 +-
 include/linux/mlx4/device.h                   | 10 ++---
 16 files changed, 76 insertions(+), 90 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 4f5a143fc0a72..ff931c580557b 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	int err;
 
 	err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
-			     PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
+			     PAGE_SIZE * 2, &buf->buf);
 
 	if (err)
 		goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
+	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
 	if (err)
 		goto err_mtt;
 
@@ -219,7 +219,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
 
 		uar = &to_mucontext(context)->uar;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
+		err = mlx4_db_alloc(dev->dev, &cq->db, 1);
 		if (err)
 			goto err_cq;
 
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c2b9cbf4da050..9db82e67e9591 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -185,7 +185,6 @@ enum mlx4_ib_qp_flags {
 	MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
 	MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
-	MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
 
 	/* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
 	MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 996e9058e515a..75c0e6c5dd569 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -634,8 +634,8 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
 
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
-			    gfp_t gfp)
+			    struct ib_udata *udata, int sqpn,
+			    struct mlx4_ib_qp **caller_qp)
 {
 	int qpn;
 	int err;
@@ -691,14 +691,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
 		    (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
 				MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
-			sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
+			sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
 			if (!sqp)
 				return -ENOMEM;
 			qp = &sqp->qp;
 			qp->pri.vid = 0xFFFF;
 			qp->alt.vid = 0xFFFF;
 		} else {
-			qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
+			qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
 			if (!qp)
 				return -ENOMEM;
 			qp->pri.vid = 0xFFFF;
@@ -780,7 +780,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			goto err;
 
 		if (qp_has_rq(init_attr)) {
-			err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
+			err = mlx4_db_alloc(dev->dev, &qp->db, 0);
 			if (err)
 				goto err;
 
@@ -788,7 +788,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		}
 
 		if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
-				   &qp->buf, gfp)) {
+				   &qp->buf)) {
 			memcpy(&init_attr->cap, &backup_cap,
 			       sizeof(backup_cap));
 			err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
@@ -797,7 +797,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 				goto err_db;
 
 			if (mlx4_buf_alloc(dev->dev, qp->buf_size,
-					   PAGE_SIZE * 2, &qp->buf, gfp)) {
+					   PAGE_SIZE * 2, &qp->buf)) {
 				err = -ENOMEM;
 				goto err_db;
 			}
@@ -808,20 +808,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
+		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
 		if (err)
 			goto err_mtt;
 
 		qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
-					gfp | __GFP_NOWARN);
+					GFP_KERNEL | __GFP_NOWARN);
 		if (!qp->sq.wrid)
 			qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
-						gfp, PAGE_KERNEL);
+						GFP_KERNEL, PAGE_KERNEL);
 		qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
-					gfp | __GFP_NOWARN);
+					GFP_KERNEL | __GFP_NOWARN);
 		if (!qp->rq.wrid)
 			qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
-						gfp, PAGE_KERNEL);
+						GFP_KERNEL, PAGE_KERNEL);
 		if (!qp->sq.wrid || !qp->rq.wrid) {
 			err = -ENOMEM;
 			goto err_wrid;
@@ -859,7 +859,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
 		qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
 
-	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
+	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
 	if (err)
 		goto err_qpn;
 
@@ -1127,10 +1127,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	int err;
 	int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
 	u16 xrcdn = 0;
-	gfp_t gfp;
 
-	gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
-		GFP_NOIO : GFP_KERNEL;
 	/*
 	 * We only support LSO, vendor flag1, and multicast loopback blocking,
 	 * and only for kernel UD QPs.
@@ -1140,8 +1137,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 					MLX4_IB_SRIOV_TUNNEL_QP |
 					MLX4_IB_SRIOV_SQP |
 					MLX4_IB_QP_NETIF |
-					MLX4_IB_QP_CREATE_ROCE_V2_GSI |
-					MLX4_IB_QP_CREATE_USE_GFP_NOIO))
+					MLX4_IB_QP_CREATE_ROCE_V2_GSI))
 		return ERR_PTR(-EINVAL);
 
 	if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
@@ -1154,7 +1150,6 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 			return ERR_PTR(-EINVAL);
 
 		if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
-						 MLX4_IB_QP_CREATE_USE_GFP_NOIO |
 						 MLX4_IB_QP_CREATE_ROCE_V2_GSI  |
 						 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
 		     init_attr->qp_type != IB_QPT_UD) ||
@@ -1179,7 +1174,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_RAW_PACKET:
-		qp = kzalloc(sizeof *qp, gfp);
+		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 		if (!qp)
 			return ERR_PTR(-ENOMEM);
 		qp->pri.vid = 0xFFFF;
@@ -1188,7 +1183,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_UD:
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
-				       udata, 0, &qp, gfp);
+				       udata, 0, &qp);
 		if (err) {
 			kfree(qp);
 			return ERR_PTR(err);
@@ -1217,8 +1212,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 		}
 
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
-				       sqpn,
-				       &qp, gfp);
+				       sqpn, &qp);
 		if (err)
 			return ERR_PTR(err);
 
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index e32dd58937a82..0facaf5f6d237 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -135,14 +135,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_mtt;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
+		err = mlx4_db_alloc(dev->dev, &srq->db, 0);
 		if (err)
 			goto err_srq;
 
 		*srq->db.db = 0;
 
-		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
-				   GFP_KERNEL)) {
+		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2,
+				   &srq->buf)) {
 			err = -ENOMEM;
 			goto err_db;
 		}
@@ -167,7 +167,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
+		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
 		if (err)
 			goto err_mtt;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 249a4584401ad..d94b3744a5b91 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -578,7 +578,7 @@ u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u3
 }
 
 static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
-				 struct mlx4_buf *buf, gfp_t gfp)
+				 struct mlx4_buf *buf)
 {
 	dma_addr_t t;
 
@@ -587,7 +587,7 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
 	buf->page_shift   = get_order(size) + PAGE_SHIFT;
 	buf->direct.buf   =
 		dma_zalloc_coherent(&dev->persist->pdev->dev,
-				    size, &t, gfp);
+				    size, &t, GFP_KERNEL);
 	if (!buf->direct.buf)
 		return -ENOMEM;
 
@@ -607,10 +607,10 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
  *  multiple pages, so we don't require too much contiguous memory.
  */
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf, gfp_t gfp)
+		   struct mlx4_buf *buf)
 {
 	if (size <= max_direct) {
-		return mlx4_buf_direct_alloc(dev, size, buf, gfp);
+		return mlx4_buf_direct_alloc(dev, size, buf);
 	} else {
 		dma_addr_t t;
 		int i;
@@ -620,14 +620,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		buf->npages	= buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
 		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
-					   gfp);
+					   GFP_KERNEL);
 		if (!buf->page_list)
 			return -ENOMEM;
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf =
 				dma_zalloc_coherent(&dev->persist->pdev->dev,
-						    PAGE_SIZE, &t, gfp);
+						    PAGE_SIZE, &t, GFP_KERNEL);
 			if (!buf->page_list[i].buf)
 				goto err_free;
 
@@ -663,12 +663,11 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_free);
 
-static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
-						 gfp_t gfp)
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
 {
 	struct mlx4_db_pgdir *pgdir;
 
-	pgdir = kzalloc(sizeof *pgdir, gfp);
+	pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
 	if (!pgdir)
 		return NULL;
 
@@ -676,7 +675,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
 	pgdir->bits[0] = pgdir->order0;
 	pgdir->bits[1] = pgdir->order1;
 	pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
-					    &pgdir->db_dma, gfp);
+					    &pgdir->db_dma, GFP_KERNEL);
 	if (!pgdir->db_page) {
 		kfree(pgdir);
 		return NULL;
@@ -716,7 +715,7 @@ static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir,
 	return 0;
 }
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp)
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_db_pgdir *pgdir;
@@ -728,7 +727,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp
 		if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
 			goto out;
 
-	pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp);
+	pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev);
 	if (!pgdir) {
 		ret = -ENOMEM;
 		goto out;
@@ -780,13 +779,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 {
 	int err;
 
-	err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL);
+	err = mlx4_db_alloc(dev, &wqres->db, 1);
 	if (err)
 		return err;
 
 	*wqres->db.db = 0;
 
-	err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL);
+	err = mlx4_buf_direct_alloc(dev, size, &wqres->buf);
 	if (err)
 		goto err_db;
 
@@ -795,7 +794,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL);
+	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf);
 	if (err)
 		goto err_mtt;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index fa6d2354a0e91..c56a511b918e0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -224,11 +224,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
 	if (*cqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL);
+	err = mlx4_table_get(dev, &cq_table->table, *cqn);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL);
+	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
 	if (err)
 		goto err_put;
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index e5fb89505a134..436f7689a0321 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1042,7 +1042,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
 	if (!context)
 		return -ENOMEM;
 
-	err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
+	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
 	if (err) {
 		en_err(priv, "Failed to allocate qp #%x\n", qpn);
 		goto out;
@@ -1086,7 +1086,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
 		en_err(priv, "Failed reserving drop qpn\n");
 		return err;
 	}
-	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
+	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
 	if (err) {
 		en_err(priv, "Failed allocating drop qp\n");
 		mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
@@ -1158,8 +1158,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	}
 
 	/* Configure RSS indirection qp */
-	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp,
-			    GFP_KERNEL);
+	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp);
 	if (err) {
 		en_err(priv, "Failed to allocate RSS indirection QP\n");
 		goto rss_err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4f3a9b27ce4ad..73faa3d779214 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -111,7 +111,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 		goto err_hwq_res;
 	}
 
-	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp, GFP_KERNEL);
+	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp);
 	if (err) {
 		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
 		goto err_reserve;
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index e1f9e7cebf8f7..5a7816e7c7b4e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -251,8 +251,7 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
 			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
-		   gfp_t gfp)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
 {
 	u32 i = (obj & (table->num_obj - 1)) /
 			(MLX4_TABLE_CHUNK_SIZE / table->obj_size);
@@ -266,7 +265,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
 	}
 
 	table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
-				       (table->lowmem ? gfp : GFP_HIGHUSER) |
+				       (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
 				       __GFP_NOWARN, table->coherent);
 	if (!table->icm[i]) {
 		ret = -ENOMEM;
@@ -363,7 +362,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 	u32 i;
 
 	for (i = start; i <= end; i += inc) {
-		err = mlx4_table_get(dev, table, i, GFP_KERNEL);
+		err = mlx4_table_get(dev, table, i);
 		if (err)
 			goto fail;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index 0c73645501509..dee67fa39107f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h
@@ -71,8 +71,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 				gfp_t gfp_mask, int coherent);
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
-		   gfp_t gfp);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 			 u32 start, u32 end);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 30616cd0140d5..706d7f21ac5c1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -969,7 +969,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
 int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
 void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
@@ -977,7 +977,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
 void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
 int __mlx4_mpt_reserve(struct mlx4_dev *dev);
 void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index);
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
 u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
 void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index ce852ca22a968..24282cd017d37 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -479,14 +479,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
 	__mlx4_mpt_release(dev, index);
 }
 
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
 {
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 
-	return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp);
+	return mlx4_table_get(dev, &mr_table->dmpt_table, index);
 }
 
-static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
+static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
 {
 	u64 param = 0;
 
@@ -497,7 +497,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 							MLX4_CMD_TIME_CLASS_A,
 							MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_mpt_alloc_icm(dev, index, gfp);
+	return __mlx4_mpt_alloc_icm(dev, index);
 }
 
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
@@ -629,7 +629,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL);
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key));
 	if (err)
 		return err;
 
@@ -787,14 +787,13 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
 
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf, gfp_t gfp)
+		       struct mlx4_buf *buf)
 {
 	u64 *page_list;
 	int err;
 	int i;
 
-	page_list = kmalloc(buf->npages * sizeof *page_list,
-			    gfp);
+	page_list = kcalloc(buf->npages, sizeof(*page_list), GFP_KERNEL);
 	if (!page_list)
 		return -ENOMEM;
 
@@ -841,7 +840,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL);
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 5a310d313e94d..26747212526b0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -301,29 +301,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
 
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
 	int err;
 
-	err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
 	if (err)
 		goto err_put_qp;
 
-	err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
 	if (err)
 		goto err_put_auxc;
 
-	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
 	if (err)
 		goto err_put_altc;
 
-	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp);
+	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
 	if (err)
 		goto err_put_rdmarc;
 
@@ -345,7 +345,7 @@ int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 	return err;
 }
 
-static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
 {
 	u64 param = 0;
 
@@ -355,7 +355,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 				    MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_qp_alloc_icm(dev, qpn, gfp);
+	return __mlx4_qp_alloc_icm(dev, qpn);
 }
 
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
@@ -397,7 +397,7 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 	return qp;
 }
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
@@ -408,7 +408,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 
 	qp->qpn = qpn;
 
-	err = mlx4_qp_alloc_icm(dev, qpn, gfp);
+	err = mlx4_qp_alloc_icm(dev, qpn);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 8127838652057..215e21c3dc8a8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1822,7 +1822,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 			return err;
 
 		if (!fw_reserved(dev, qpn)) {
-			err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL);
+			err = __mlx4_qp_alloc_icm(dev, qpn);
 			if (err) {
 				res_abort_move(dev, slave, RES_QP, qpn);
 				return err;
@@ -1909,7 +1909,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 		if (err)
 			return err;
 
-		err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL);
+		err = __mlx4_mpt_alloc_icm(dev, mpt->key);
 		if (err) {
 			res_abort_move(dev, slave, RES_MPT, id);
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index f44d089e2ca60..bedf521268246 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -100,11 +100,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
 	if (*srqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL);
+	err = mlx4_table_get(dev, &srq_table->table, *srqn);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL);
+	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
 	if (err)
 		goto err_put;
 	return 0;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index d5bed0875d309..aad5d81dfb444 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1068,7 +1068,7 @@ static inline int mlx4_is_eth(struct mlx4_dev *dev, int port)
 }
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf, gfp_t gfp);
+		   struct mlx4_buf *buf);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
@@ -1105,10 +1105,9 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw);
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   int start_index, int npages, u64 *page_list);
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf, gfp_t gfp);
+		       struct mlx4_buf *buf);
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
-		  gfp_t gfp);
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order);
 void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
@@ -1124,8 +1123,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
 			  int *base, u8 flags);
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp,
-		  gfp_t gfp);
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
-- 
GitLab


From 7855f5842741e5835b9be073079780c444dca898 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 May 2017 14:38:16 +0300
Subject: [PATCH 1776/1958] IB/core: Remove NOIO QP create flag

There are no users for IB_QP_CREATE_USE_GFP_NOIO flag,
so let's remove it.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/ib_verbs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6e62c9e2c971a..b5732432bb297 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1056,7 +1056,7 @@ enum ib_qp_create_flags {
 	IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
 	IB_QP_CREATE_NETIF_QP			= 1 << 5,
 	IB_QP_CREATE_SIGNATURE_EN		= 1 << 6,
-	IB_QP_CREATE_USE_GFP_NOIO		= 1 << 7,
+	/* FREE					= 1 << 7, */
 	IB_QP_CREATE_SCATTER_FCS		= 1 << 8,
 	IB_QP_CREATE_CVLAN_STRIPPING		= 1 << 9,
 	/* reserve bits 26-31 for low level drivers' internal use */
-- 
GitLab


From 12cc1a027341338f54d8d3fcf5d188ae2b39c30d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 30 May 2017 09:44:48 +0300
Subject: [PATCH 1777/1958] IB/mlx5: Clean mr_cache debugfs in case of failure

The failure in creation of debugfs entries for mr_cache left entries,
which were already created.

It caused to mismatch and misguiding for the end users. The solution
is to clean mr_cache debugfs root, so no leftovers will be in the
system. In addition, let's document why the error is not needed to be
forwarded to user in case of failure.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 34 +++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 2046a69874533..8ab2f1360a456 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -582,6 +582,15 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 	}
 }
 
+static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+	if (!mlx5_debugfs_root)
+		return;
+
+	debugfs_remove_recursive(dev->cache.root);
+	dev->cache.root = NULL;
+}
+
 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
@@ -600,38 +609,34 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 		sprintf(ent->name, "%d", ent->order);
 		ent->dir = debugfs_create_dir(ent->name,  cache->root);
 		if (!ent->dir)
-			return -ENOMEM;
+			goto err;
 
 		ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
 						 &size_fops);
 		if (!ent->fsize)
-			return -ENOMEM;
+			goto err;
 
 		ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
 						  &limit_fops);
 		if (!ent->flimit)
-			return -ENOMEM;
+			goto err;
 
 		ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
 					       &ent->cur);
 		if (!ent->fcur)
-			return -ENOMEM;
+			goto err;
 
 		ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
 						&ent->miss);
 		if (!ent->fmiss)
-			return -ENOMEM;
+			goto err;
 	}
 
 	return 0;
-}
-
-static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
-{
-	if (!mlx5_debugfs_root)
-		return;
+err:
+	mlx5_mr_cache_debugfs_cleanup(dev);
 
-	debugfs_remove_recursive(dev->cache.root);
+	return -ENOMEM;
 }
 
 static void delay_time_func(unsigned long ctx)
@@ -692,6 +697,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 	if (err)
 		mlx5_ib_warn(dev, "cache debugfs failure\n");
 
+	/*
+	 * We don't want to fail driver if debugfs failed to initialize,
+	 * so we are not forwarding error to the user.
+	 */
+
 	return 0;
 }
 
-- 
GitLab


From 8fe8bacb92f249c91a1407b48aa1cb98067fe19d Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Tue, 30 May 2017 09:58:06 +0300
Subject: [PATCH 1778/1958] IB/core: Add ordered workqueue for RoCE GID
 management

Currently the RoCE GID management uses the ib_wq to do add and delete new GIDs
according to the netdev events.

The ib_wq isn't an ordered workqueue and thus two work elements can be executed
concurrently which will result in unexpected behavior and inconsistency of the
GIDs cache content.

Example:
ifconfig eth1 11.11.11.11/16 up

This command will invoke the following netdev events in the following order:
1. NETDEV_UP
2. NETDEV_DOWN
3. NETDEV_UP

If (2) and (3) will be executed concurrently or in reverse order, instead of
having a new GID with 11.11.11.11 IP, we will end up without any new GIDs.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/roce_gid_mgmt.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index db958d3207efc..94a9eefb3cfca 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -42,6 +42,8 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_addr.h>
 
+static struct workqueue_struct *gid_cache_wq;
+
 enum gid_op_type {
 	GID_DEL = 0,
 	GID_ADD
@@ -560,7 +562,7 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
 	}
 	INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
 
-	queue_work(ib_wq, &ndev_work->work);
+	queue_work(gid_cache_wq, &ndev_work->work);
 
 	return NOTIFY_DONE;
 }
@@ -693,7 +695,7 @@ static int addr_event(struct notifier_block *this, unsigned long event,
 	dev_hold(ndev);
 	work->gid_attr.ndev   = ndev;
 
-	queue_work(ib_wq, &work->work);
+	queue_work(gid_cache_wq, &work->work);
 
 	return NOTIFY_DONE;
 }
@@ -740,6 +742,10 @@ static struct notifier_block nb_inet6addr = {
 
 int __init roce_gid_mgmt_init(void)
 {
+	gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0);
+	if (!gid_cache_wq)
+		return -ENOMEM;
+
 	register_inetaddr_notifier(&nb_inetaddr);
 	if (IS_ENABLED(CONFIG_IPV6))
 		register_inet6addr_notifier(&nb_inet6addr);
@@ -764,4 +770,5 @@ void __exit roce_gid_mgmt_cleanup(void)
 	 * ib-core is removed, all physical devices have been removed,
 	 * so no issue with remaining hardware contexts.
 	 */
+	destroy_workqueue(gid_cache_wq);
 }
-- 
GitLab


From b6c871e5875798e5ed3744c725622dcd3c92be92 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Mon, 12 Jun 2017 10:45:21 +0300
Subject: [PATCH 1779/1958] IB/ipoib: Let lower driver handle get_stats64 call

The driver checks if the lower level driver supports get_stats, and if
so calls it to get the updated statistics, otherwise takes from the
current netdevice stats object.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Reviewed-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3e2b7988ead81..70dacaf9044ec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -276,6 +276,17 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
 	return ret;
 }
 
+static void ipoib_get_stats(struct net_device *dev,
+			    struct rtnl_link_stats64 *stats)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+	if (priv->rn_ops->ndo_get_stats64)
+		priv->rn_ops->ndo_get_stats64(dev, stats);
+	else
+		netdev_stats_to_stats64(stats, &dev->stats);
+}
+
 /* Called with an RCU read lock taken */
 static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr,
 					struct net_device *dev)
@@ -1823,6 +1834,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
 	.ndo_get_vf_stats	 = ipoib_get_vf_stats,
 	.ndo_set_vf_guid	 = ipoib_set_vf_guid,
 	.ndo_set_mac_address	 = ipoib_set_mac,
+	.ndo_get_stats64	 = ipoib_get_stats,
 };
 
 static const struct net_device_ops ipoib_netdev_ops_vf = {
-- 
GitLab


From fda85ce912401750e1e80757627af2784c7cc5a7 Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Thu, 22 Jun 2017 17:09:59 +0300
Subject: [PATCH 1780/1958] IB/rxe: Fix kernel panic from skb destructor

In the time between rxe_send has finished and skb destructor
called, the QP's ref count might be 0, leading to a possible
QP destruction. This will lead to a kernel panic when the destructor
dereferences the QP.

The operation of incrementing QP ref count at rxe_send and decrementing
from skb destructor will prevent this crash.

BUG: unable to handle kernel NULL pointer dereference at 000000000000072c
IP: [<ffffffffa05df765>] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe]
PGD 0 [16240.211178]
Oops: 0002 [#1] SMP
CPU: 3 PID: 0 Comm: swapper/3 Tainted: G           OE   4.9.0-mlnx #1
Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011
task: ffff88042d6b1480 task.stack: ffffc90001904000
RIP: 0010:[<ffffffffa05df765>]  [<ffffffffa05df765>] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe]
RSP: 0018:ffff88043fcc3df0  EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff880429684700 RCX: ffff88042d248200
RDX: 00000000ffffffff RSI: 00000000fffffe01 RDI: ffff880429684700
RBP: ffff88043fcc3e00 R08: ffff88043fcda240 R09: 00000000ff2d1de6
R10: 0000000000000000 R11: 00000000f49cf6fe R12: ffff880429684700
R13: ffffffff81893f96 R14: ffffffff817d66f0 R15: ffff880427f74200
FS:  0000000000000000(0000) GS:ffff88043fcc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000000072c CR3: 000000041d3df000 CR4: 00000000000006e0
Stack:
 ffffffff817b29cf ffff880429684700 ffff88043fcc3e18 ffffffff817b42c2
 ffff880429684700 ffff88043fcc3e40 ffffffff817b4332 ffff880429684700
 ffff880427f74238 ffff880427f74228 ffff88043fcc3e58 ffffffff81893f96
Call Trace:
 <IRQ> [16240.336345]  [<ffffffff817b29cf>] ? skb_release_head_state+0x4f/0xb0
 [<ffffffff817b42c2>] skb_release_all+0x12/0x30
 [<ffffffff817b4332>] kfree_skb+0x32/0x90
 [<ffffffff81893f96>] ndisc_error_report+0x36/0x40
 [<ffffffff817d4de1>] neigh_invalidate+0x81/0xf0
 [<ffffffff817d68f7>] neigh_timer_handler+0x207/0x2b0
 [<ffffffff81109295>] call_timer_fn+0x35/0x120
 [<ffffffff81109db7>] run_timer_softirq+0x1d7/0x460
 [<ffffffff8106155e>] ? kvm_sched_clock_read+0x1e/0x30
 [<ffffffff810366b9>] ? sched_clock+0x9/0x10
 [<ffffffff810cfed2>] ? sched_clock_cpu+0x72/0xa0
 [<ffffffff818dd537>] __do_softirq+0xd7/0x289
 [<ffffffff810a6c95>] irq_exit+0xb5/0xc0
 [<ffffffff818dd372>] smp_apic_timer_interrupt+0x42/0x50
 [<ffffffff818dc682>] apic_timer_interrupt+0x82/0x90
 <EOI> [16240.395776]  [<ffffffff818da156>] ? native_safe_halt+0x6/0x10
 [<ffffffff818d9e6e>] default_idle+0x1e/0xd0
 [<ffffffff8103797f>] arch_cpu_idle+0xf/0x20
 [<ffffffff818da2c5>] default_idle_call+0x35/0x40
 [<ffffffff810e3eb5>] cpu_startup_entry+0x185/0x210
 [<ffffffff81050433>] start_secondary+0x103/0x130
RIP  [<ffffffffa05df765>] rxe_skb_tx_dtor+0x15/0x50 [rdma_rxe]

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_net.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index c3a140ed4df27..08f3f90d29123 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -441,6 +441,8 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
 	if (unlikely(qp->need_req_skb &&
 		     skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
 		rxe_run_task(&qp->req.task, 1);
+
+	rxe_drop_ref(qp);
 }
 
 int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
@@ -473,6 +475,7 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
 		return -EAGAIN;
 	}
 
+	rxe_add_ref(pkt->qp);
 	atomic_inc(&pkt->qp->skb_out);
 	kfree_skb(skb);
 
-- 
GitLab


From 56012e1cada54460f9e456cd77276e765e06ce6c Mon Sep 17 00:00:00 2001
From: yonatanc <yonatanc@mellanox.com>
Date: Thu, 22 Jun 2017 17:10:00 +0300
Subject: [PATCH 1781/1958] IB/rxe: Set dma_mask and coherent_dma_mask

The RXE coupled with dummy device causes to the kernel panic attached
below.  The panic happens when ib_register_device tries to set dma_mask
by accessing a NULLed parent device.

The RXE does not actually use DMA, so we can set the dma_mask
to architecture value.

[16240.199689] RIP: 0010:ib_register_device+0x468/0x5a0 [ib_core]
[16240.205289] RSP: 0018:ffffc9000220fc10 EFLAGS: 00010246
[16240.209909] RAX: 0000000000000024 RBX: ffff880220d1a2a8 RCX: 0000000000000000
[16240.212244] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009
[16240.214385] RBP: ffffc9000220fcb0 R08: 0000000000000000 R09: 000000000000023f
[16240.254465] R10: 0000000000000007 R11: 0000000000000000 R12: 0000000000000000
[16240.259467] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880220d1a2a8
[16240.263314] FS:  00007fd8ecca0740(0000) GS:ffff8802364c0000(0000) knlGS:0000000000000000
[16240.267292] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[16240.273503] CR2: 0000000000000218 CR3: 00000002253ba000 CR4: 00000000000006e0
[16240.277066] Call Trace:
[16240.281836]  ? __kmalloc+0x26f/0x280
[16240.286596]  rxe_register_device+0x297/0x300 [rdma_rxe]
[16240.291377]  rxe_add+0x535/0x5b0 [rdma_rxe]
[16240.297586]  rxe_net_add+0x3e/0xc0 [rdma_rxe]
[16240.302375]  rxe_param_set_add+0x65/0x144 [rdma_rxe]
[16240.307769]  param_attr_store+0x68/0xd0
[16240.311640]  module_attr_store+0x1d/0x30
[16240.316421]  sysfs_kf_write+0x3a/0x50
[16240.317802]  kernfs_fop_write+0xff/0x180
[16240.322989]  __vfs_write+0x37/0x140
[16240.328164]  ? handle_mm_fault+0xce/0x240
[16240.333340]  vfs_write+0xb2/0x1b0
[16240.335013]  SyS_write+0x55/0xc0
[16240.340632]  entry_SYSCALL_64_fastpath+0x1a/0xa9

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_verbs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 073e66783f1dd..07511718d98d8 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1240,6 +1240,8 @@ int rxe_register_device(struct rxe_dev *rxe)
 	addrconf_addr_eui48((unsigned char *)&dev->node_guid,
 			    rxe->ndev->dev_addr);
 	dev->dev.dma_ops = &dma_virt_ops;
+	dma_coerce_mask_and_coherent(&dev->dev,
+				     dma_get_required_mask(dev->dev.parent));
 
 	dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
 	dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
-- 
GitLab


From 5802883d4b7c544012a1857660f78af41f6c183a Mon Sep 17 00:00:00 2001
From: oulijun <oulijun@huawei.com>
Date: Sat, 10 Jun 2017 18:49:21 +0800
Subject: [PATCH 1782/1958] IB/hns: Fix the bug of polling cq failed for
 loopback Qps

In hip06 SoC, RoCE driver creates 8 reserved loopback QPs to
ensure zero wqe when free mr. However, if the enabled phy
port number is less than 6, it will fail in polling cqe with
8 reserved loopback QPs.

In order to solve this problem, the number of loopback Qps
will be adjusted based on the number of enabled phy port.

Signed-off-by: Shaobo Xu <xushaobo2@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 53 ++++++++++++++--------
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 729f8cc8738be..c291b2a4c7cf9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -661,9 +661,11 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 	union ib_gid dgid;
 	u64 subnet_prefix;
 	int attr_mask = 0;
-	int i;
+	int i, j;
 	int ret;
+	u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 };
 	u8 phy_port;
+	u8 port = 0;
 	u8 sl;
 
 	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
@@ -709,11 +711,27 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 	attr.rnr_retry		= 7;
 	attr.timeout		= 0x12;
 	attr.path_mtu		= IB_MTU_256;
+	attr.ah_attr.type	= RDMA_AH_ATTR_TYPE_ROCE;
 	rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
 	rdma_ah_set_static_rate(&attr.ah_attr, 3);
 
 	subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
 	for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+		phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
+				(i % HNS_ROCE_MAX_PORTS);
+		sl = i / HNS_ROCE_MAX_PORTS;
+
+		for (j = 0; j < caps->num_ports; j++) {
+			if (hr_dev->iboe.phy_port[j] == phy_port) {
+				queue_en[i] = 1;
+				port = j;
+				break;
+			}
+		}
+
+		if (!queue_en[i])
+			continue;
+
 		free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
 		if (IS_ERR(free_mr->mr_free_qp[i])) {
 			dev_err(dev, "Create loop qp failed!\n");
@@ -721,15 +739,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 		}
 		hr_qp = free_mr->mr_free_qp[i];
 
-		sl = i / caps->num_ports;
-
-		if (caps->num_ports == HNS_ROCE_MAX_PORTS)
-			phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
-				(i % caps->num_ports);
-		else
-			phy_port = i % caps->num_ports;
-
-		hr_qp->port		= phy_port + 1;
+		hr_qp->port		= port;
 		hr_qp->phy_port		= phy_port;
 		hr_qp->ibqp.qp_type	= IB_QPT_RC;
 		hr_qp->ibqp.device	= &hr_dev->ib_dev;
@@ -739,23 +749,22 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 		hr_qp->ibqp.recv_cq	= cq;
 		hr_qp->ibqp.send_cq	= cq;
 
-		rdma_ah_set_port_num(&attr.ah_attr, phy_port + 1);
-		rdma_ah_set_sl(&attr.ah_attr, phy_port + 1);
-		attr.port_num		= phy_port + 1;
+		rdma_ah_set_port_num(&attr.ah_attr, port + 1);
+		rdma_ah_set_sl(&attr.ah_attr, sl);
+		attr.port_num		= port + 1;
 
 		attr.dest_qp_num	= hr_qp->qpn;
 		memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr),
-		       hr_dev->dev_addr[phy_port],
+		       hr_dev->dev_addr[port],
 		       MAC_ADDR_OCTET_NUM);
 
 		memcpy(&dgid.raw, &subnet_prefix, sizeof(u64));
-		memcpy(&dgid.raw[8], hr_dev->dev_addr[phy_port], 3);
-		memcpy(&dgid.raw[13], hr_dev->dev_addr[phy_port] + 3, 3);
+		memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3);
+		memcpy(&dgid.raw[13], hr_dev->dev_addr[port] + 3, 3);
 		dgid.raw[11] = 0xff;
 		dgid.raw[12] = 0xfe;
 		dgid.raw[8] ^= 2;
 		rdma_ah_set_dgid_raw(&attr.ah_attr, dgid.raw);
-		attr_mask |= IB_QP_PORT;
 
 		ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
 					    IB_QPS_RESET, IB_QPS_INIT);
@@ -812,6 +821,9 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
 
 	for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
 		hr_qp = free_mr->mr_free_qp[i];
+		if (!hr_qp)
+			continue;
+
 		ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp);
 		if (ret)
 			dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
@@ -963,7 +975,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
 		msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
 	int i;
 	int ret;
-	int ne;
+	int ne = 0;
 
 	mr_work = container_of(work, struct hns_roce_mr_free_work, work);
 	hr_mr = (struct hns_roce_mr *)mr_work->mr;
@@ -976,6 +988,10 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
 
 	for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
 		hr_qp = free_mr->mr_free_qp[i];
+		if (!hr_qp)
+			continue;
+		ne++;
+
 		ret = hns_roce_v1_send_lp_wqe(hr_qp);
 		if (ret) {
 			dev_err(dev,
@@ -985,7 +1001,6 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
 		}
 	}
 
-	ne = HNS_ROCE_V1_RESV_QP;
 	do {
 		ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
 		if (ret < 0) {
-- 
GitLab


From 58c4f0d85f59c458074f016c13991c0a81105180 Mon Sep 17 00:00:00 2001
From: oulijun <oulijun@huawei.com>
Date: Sat, 10 Jun 2017 18:49:22 +0800
Subject: [PATCH 1783/1958] IB/hns: Fix the bug with wild pointer when destroy
 rc qp

When destroyed rc qp, the hr_qp will be used after freed. This patch
will fix it.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index c291b2a4c7cf9..2fe353001b043 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -3657,6 +3657,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
 	struct hns_roce_dev *hr_dev;
 	struct hns_roce_qp *hr_qp;
 	struct device *dev;
+	unsigned long qpn;
 	int ret;
 
 	qp_work_entry = container_of(work, struct hns_roce_qp_work, work);
@@ -3664,8 +3665,9 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
 	dev = &hr_dev->pdev->dev;
 	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
 	hr_qp = qp_work_entry->qp;
+	qpn = hr_qp->qpn;
 
-	dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn);
+	dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", qpn);
 
 	qp_work_entry->sche_cnt++;
 
@@ -3676,7 +3678,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
 					 &qp_work_entry->db_wait_stage);
 	if (ret) {
 		dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
-			hr_qp->qpn);
+			qpn);
 		return;
 	}
 
@@ -3690,7 +3692,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
 	ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
 				    IB_QPS_RESET);
 	if (ret) {
-		dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn);
+		dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", qpn);
 		return;
 	}
 
@@ -3699,14 +3701,14 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
 
 	if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
 		/* RC QP, release QPN */
-		hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+		hns_roce_release_range_qp(hr_dev, qpn, 1);
 		kfree(hr_qp);
 	} else
 		kfree(hr_to_hr_sqp(hr_qp));
 
 	kfree(qp_work_entry);
 
-	dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn);
+	dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", qpn);
 }
 
 int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
-- 
GitLab


From 9de61d3fcdde06087f65b4022a1a966c10ab5803 Mon Sep 17 00:00:00 2001
From: oulijun <oulijun@huawei.com>
Date: Sat, 10 Jun 2017 18:49:23 +0800
Subject: [PATCH 1784/1958] IB/hns: Fix the bug with rdma operation

When opcode of work request is RDMA read and write, it
should use rdma_wr to get remote_addr and rkey. This
patch fixes it.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 2fe353001b043..c42e883a18a8e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -228,14 +228,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			switch (wr->opcode) {
 			case IB_WR_RDMA_READ:
 				ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ;
-				set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
-					      atomic_wr(wr)->rkey);
+				set_raddr_seg(wqe,  rdma_wr(wr)->remote_addr,
+					       rdma_wr(wr)->rkey);
 				break;
 			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
 				ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE;
-				set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
-					      atomic_wr(wr)->rkey);
+				set_raddr_seg(wqe,  rdma_wr(wr)->remote_addr,
+					      rdma_wr(wr)->rkey);
 				break;
 			case IB_WR_SEND:
 			case IB_WR_SEND_WITH_INV:
-- 
GitLab


From d322f004aaa647a5dc9dcddfe5ab1bff1e92f634 Mon Sep 17 00:00:00 2001
From: oulijun <oulijun@huawei.com>
Date: Sat, 10 Jun 2017 18:49:24 +0800
Subject: [PATCH 1785/1958] IB/hns: Fix the bug with modifying the MAC address
 without removing the driver

When modified the MAC address used hns_roce_mac function, we release and create
reserved qp again, It is not necessary to use spin_lock_bh and spin_unlock_bh in
handle_en_event, Otherwise, it will occur a error. This patch mainly fixes it.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hns/hns_roce_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index c3b41f95e70a5..d9777b662eba9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -125,8 +125,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
 		return -ENODEV;
 	}
 
-	spin_lock_bh(&hr_dev->iboe.lock);
-
 	switch (event) {
 	case NETDEV_UP:
 	case NETDEV_CHANGE:
@@ -144,7 +142,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
 		break;
 	}
 
-	spin_unlock_bh(&hr_dev->iboe.lock);
 	return 0;
 }
 
-- 
GitLab


From 5f110ac4bed8693adb21146067149a48c2b9bd07 Mon Sep 17 00:00:00 2001
From: oulijun <oulijun@huawei.com>
Date: Sat, 10 Jun 2017 18:49:25 +0800
Subject: [PATCH 1786/1958] IB/hns: Fix for checkpatch.pl comment style
 warnings

This patch correct the comment style warnings caught by
checkpatch.pl script.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index c42e883a18a8e..23fad6d969440 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2197,7 +2197,7 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
 		}
 		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
 		++wq->tail;
-		} else {
+	} else {
 		/* RQ conrespond to CQE */
 		wc->byte_len = le32_to_cpu(cqe->byte_cnt);
 		opcode = roce_get_field(cqe->cqe_byte_4,
@@ -3549,10 +3549,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
 					old_cnt = roce_get_field(old_send,
 					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
 					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
-					if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
+					if (cur_cnt - old_cnt >
+					    SDB_ST_CMP_VAL) {
 						success_flags = 1;
-					else {
-					    send_ptr = roce_get_field(old_send,
+					} else {
+						send_ptr =
+							roce_get_field(old_send,
 					    ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
 					    ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
 					    roce_get_field(sdb_retry_cnt,
-- 
GitLab


From ebc9ca43e1d52a85c72fc2d343f353386ed6c188 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Mon, 29 May 2017 17:20:53 -0700
Subject: [PATCH 1787/1958] IB/core: Allow QP state transition from reset to
 error

Playing with IP-O-IB interface can trigger a warning message:
"ib0: Failed to modify QP to ERROR state" to be logged.
This happens when the QP is in IB_QPS_RESET state and the stack
is trying to transition it to IB_QPS_ERR state in ipoib_ib_dev_stop().

According to the IB spec, Table 91 - "QP State Transition Properties"
it looks like the transition from reset to error is valid:

Transition: Any State to Error
Required Attributes: None
Optional Attributes: None allowed
Actions: Queue processing is stopped. Work Requests pending or in
process are completed in error, when possible.

This patch allows the transition and quiets the message.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 7f8fe443df46f..fb98ed67d5bc6 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -895,6 +895,7 @@ static const struct {
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = {
 		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
 		[IB_QPS_INIT]  = {
 			.valid = 1,
 			.req_param = {
-- 
GitLab


From 101dd590a7fa37954540cf3149a1c502c0acc524 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 10 Jul 2017 16:19:38 +1000
Subject: [PATCH 1788/1958] powerpc/perf: Avoid spurious PMU interrupts after
 idle

POWER9 DD2 can see spurious PMU interrupts after state-loss idle in
some conditions.

A solution is to save and reload MMCR0 over state-loss idle.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/idle_book3s.S | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 5adb390e773bd..516ebef905c06 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -30,6 +30,7 @@
  * Use unused space in the interrupt stack to save and restore
  * registers for winkle support.
  */
+#define _MMCR0	GPR0
 #define _SDR1	GPR3
 #define _PTCR	GPR3
 #define _RPR	GPR4
@@ -272,6 +273,14 @@ power_enter_stop:
 	b 	pnv_wakeup_noloss
 
 .Lhandle_esl_ec_set:
+	/*
+	 * POWER9 DD2 can incorrectly set PMAO when waking up after a
+	 * state-loss idle. Saving and restoring MMCR0 over idle is a
+	 * workaround.
+	 */
+	mfspr	r4,SPRN_MMCR0
+	std	r4,_MMCR0(r1)
+
 /*
  * Check if the requested state is a deep idle state.
  */
@@ -450,10 +459,14 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 pnv_restore_hyp_resource_arch300:
 	/*
 	 * Workaround for POWER9, if we lost resources, the ERAT
-	 * might have been mixed up and needs flushing.
+	 * might have been mixed up and needs flushing. We also need
+	 * to reload MMCR0 (see comment above).
 	 */
 	blt	cr3,1f
 	PPC_INVALIDATE_ERAT
+	ld	r1,PACAR1(r13)
+	ld	r4,_MMCR0(r1)
+	mtspr	SPRN_MMCR0,r4
 1:
 	/*
 	 * POWER ISA 3. Use PSSCR to determine if we
-- 
GitLab


From 299aa41a45fe21977532fbf992e4f7c5eef73bf0 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 13 Jul 2017 17:45:21 -0700
Subject: [PATCH 1789/1958] f2fs: include seq_file.h for sysfs.c

This patch includes seq_file.h to avoid compile error.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/sysfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 9adc202fcd6f7..71191d89917d8 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -11,6 +11,7 @@
  */
 #include <linux/proc_fs.h>
 #include <linux/f2fs_fs.h>
+#include <linux/seq_file.h>
 
 #include "f2fs.h"
 #include "segment.h"
-- 
GitLab


From 4db08d016ccedb5b97869724a096990acad59685 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 14 Jul 2017 11:45:21 -0700
Subject: [PATCH 1790/1958] f2fs: avoid cpu lockup

Before retrying to flush data or dentry pages, we need to release cpu in order
to prevent watchdog.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 56bbf592e4875..5b876f6d3f6b1 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -879,6 +879,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
 	struct inode *inode;
 	struct f2fs_inode_info *fi;
 	bool is_dir = (type == DIR_INODE);
+	unsigned long ino = 0;
 
 	trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
 				get_pages(sbi, is_dir ?
@@ -901,8 +902,17 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
 	inode = igrab(&fi->vfs_inode);
 	spin_unlock(&sbi->inode_lock[type]);
 	if (inode) {
+		unsigned long cur_ino = inode->i_ino;
+
 		filemap_fdatawrite(inode->i_mapping);
 		iput(inode);
+		/* We need to give cpu to another writers. */
+		if (ino == cur_ino) {
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
+			cond_resched();
+		} else {
+			ino = cur_ino;
+		}
 	} else {
 		/*
 		 * We should submit bio, since it exists several
-- 
GitLab


From 307bc11fcdf9f911a1adacf9e77fe1c490041ee3 Mon Sep 17 00:00:00 2001
From: Minas Harutyunyan <Minas.Harutyunyan@synopsys.com>
Date: Tue, 11 Jul 2017 14:25:13 +0400
Subject: [PATCH 1791/1958] usb: dwc2: gadget: On USB RESET reset device
 address to zero

Reseted DEVADDR field in DCFG to zero on USB RESET.

Device address in DCFG register does not reset to zero,
which required to pass enumeration, after disconnect and
reconnect.

Acked-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Minas Harutyunyan <hminas@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc2/gadget.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index bc3b3fda5000d..c4066cd77e473 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -3573,6 +3573,9 @@ static irqreturn_t dwc2_hsotg_irq(int irq, void *pw)
 		/* Report disconnection if it is not already done. */
 		dwc2_hsotg_disconnect(hsotg);
 
+		/* Reset device address to zero */
+		__bic32(hsotg->regs + DCFG, DCFG_DEVADDR_MASK);
+
 		if (usb_status & GOTGCTL_BSESVLD && connected)
 			dwc2_hsotg_core_init_disconnected(hsotg, true);
 	}
-- 
GitLab


From ab1d53d598be211968c5b4eeee4c5b67d6e21602 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Fri, 23 Jun 2017 19:20:21 +0800
Subject: [PATCH 1792/1958] usb: gadget: udc: start_udc() can be static

Fixes the following Sparse warnings:

>> drivers/usb/gadget/udc/snps_udc_plat.c:31:6: sparse: symbol 'start_udc' was not declared. Should it be static?
>> drivers/usb/gadget/udc/snps_udc_plat.c:41:6: sparse: symbol 'stop_udc' was not declared. Should it be static?
>> drivers/usb/gadget/udc/snps_udc_plat.c:79:6: sparse: symbol 'udc_drd_work' was not declared. Should it be static?

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/snps_udc_plat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/udc/snps_udc_plat.c b/drivers/usb/gadget/udc/snps_udc_plat.c
index 2e11f19e07ae8..f7b4d0f159e4e 100644
--- a/drivers/usb/gadget/udc/snps_udc_plat.c
+++ b/drivers/usb/gadget/udc/snps_udc_plat.c
@@ -28,7 +28,7 @@
 /* description */
 #define UDC_MOD_DESCRIPTION     "Synopsys UDC platform driver"
 
-void start_udc(struct udc *udc)
+static void start_udc(struct udc *udc)
 {
 	if (udc->driver) {
 		dev_info(udc->dev, "Connecting...\n");
@@ -38,7 +38,7 @@ void start_udc(struct udc *udc)
 	}
 }
 
-void stop_udc(struct udc *udc)
+static void stop_udc(struct udc *udc)
 {
 	int tmp;
 	u32 reg;
@@ -76,7 +76,7 @@ void stop_udc(struct udc *udc)
 	dev_info(udc->dev, "Device disconnected\n");
 }
 
-void udc_drd_work(struct work_struct *work)
+static void udc_drd_work(struct work_struct *work)
 {
 	struct udc *udc;
 
-- 
GitLab


From 8bd226f9a7dc18740a916dcba3112f2bfc3ad9e8 Mon Sep 17 00:00:00 2001
From: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Date: Sun, 25 Jun 2017 16:23:45 +0300
Subject: [PATCH 1793/1958] include: usb: audio: specify exact endiannes of
 descriptors

USB spec says that multiple byte fields are stored in
little-endian order (see chapter 8.1 of USB2.0 spec and
chapter 7.1 of USB3.0 spec), thus mark such fields as LE
for UAC1 and UAC2 headers

Signed-off-by: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/linux/usb/audio-v2.h   | 14 +++++++-------
 include/uapi/linux/usb/audio.h |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
index c5f2158ab00e4..fd73bc0e90274 100644
--- a/include/linux/usb/audio-v2.h
+++ b/include/linux/usb/audio-v2.h
@@ -115,13 +115,13 @@ struct uac2_input_terminal_descriptor {
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
 	__u8 bTerminalID;
-	__u16 wTerminalType;
+	__le16 wTerminalType;
 	__u8 bAssocTerminal;
 	__u8 bCSourceID;
 	__u8 bNrChannels;
-	__u32 bmChannelConfig;
+	__le32 bmChannelConfig;
 	__u8 iChannelNames;
-	__u16 bmControls;
+	__le16 bmControls;
 	__u8 iTerminal;
 } __attribute__((packed));
 
@@ -132,11 +132,11 @@ struct uac2_output_terminal_descriptor {
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
 	__u8 bTerminalID;
-	__u16 wTerminalType;
+	__le16 wTerminalType;
 	__u8 bAssocTerminal;
 	__u8 bSourceID;
 	__u8 bCSourceID;
-	__u16 bmControls;
+	__le16 bmControls;
 	__u8 iTerminal;
 } __attribute__((packed));
 
@@ -164,9 +164,9 @@ struct uac2_as_header_descriptor {
 	__u8 bTerminalLink;
 	__u8 bmControls;
 	__u8 bFormatType;
-	__u32 bmFormats;
+	__le32 bmFormats;
 	__u8 bNrChannels;
-	__u32 bmChannelConfig;
+	__le32 bmChannelConfig;
 	__u8 iChannelNames;
 } __attribute__((packed));
 
diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h
index d2314be4f0c03..a4680a5bf5dd4 100644
--- a/include/uapi/linux/usb/audio.h
+++ b/include/uapi/linux/usb/audio.h
@@ -333,7 +333,7 @@ struct uac_processing_unit_descriptor {
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
 	__u8 bUnitID;
-	__u16 wProcessType;
+	__le16 wProcessType;
 	__u8 bNrInPins;
 	__u8 baSourceID[];
 } __attribute__ ((packed));
@@ -491,8 +491,8 @@ struct uac_format_type_ii_ext_descriptor {
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
 	__u8 bFormatType;
-	__u16 wMaxBitRate;
-	__u16 wSamplesPerFrame;
+	__le16 wMaxBitRate;
+	__le16 wSamplesPerFrame;
 	__u8 bHeaderLength;
 	__u8 bSideBandProtocol;
 } __attribute__((packed));
-- 
GitLab


From 42370b821168e6730ec4c7d988aeadc1260c7b4d Mon Sep 17 00:00:00 2001
From: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Date: Sun, 25 Jun 2017 16:23:46 +0300
Subject: [PATCH 1794/1958] usb: gadget: f_uac1: endianness fixes.

As per USB spec, multiple-bytes fields are stored
in little-endian order. Use CPU<->LE helpers for
such fields.

Signed-off-by: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_uac1.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uac1.c b/drivers/usb/gadget/function/f_uac1.c
index 8656f84e17d95..29efbedc91f9b 100644
--- a/drivers/usb/gadget/function/f_uac1.c
+++ b/drivers/usb/gadget/function/f_uac1.c
@@ -92,9 +92,9 @@ static struct uac_input_terminal_descriptor usb_out_it_desc = {
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	UAC_INPUT_TERMINAL,
 	.bTerminalID =		USB_OUT_IT_ID,
-	.wTerminalType =	UAC_TERMINAL_STREAMING,
+	.wTerminalType =	cpu_to_le16(UAC_TERMINAL_STREAMING),
 	.bAssocTerminal =	0,
-	.wChannelConfig =	0x3,
+	.wChannelConfig =	cpu_to_le16(0x3),
 };
 
 #define IO_OUT_OT_ID	2
@@ -103,7 +103,7 @@ static struct uac1_output_terminal_descriptor io_out_ot_desc = {
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype	= UAC_OUTPUT_TERMINAL,
 	.bTerminalID		= IO_OUT_OT_ID,
-	.wTerminalType		= UAC_OUTPUT_TERMINAL_SPEAKER,
+	.wTerminalType		= cpu_to_le16(UAC_OUTPUT_TERMINAL_SPEAKER),
 	.bAssocTerminal		= 0,
 	.bSourceID		= USB_OUT_IT_ID,
 };
@@ -114,9 +114,9 @@ static struct uac_input_terminal_descriptor io_in_it_desc = {
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype	= UAC_INPUT_TERMINAL,
 	.bTerminalID		= IO_IN_IT_ID,
-	.wTerminalType		= UAC_INPUT_TERMINAL_MICROPHONE,
+	.wTerminalType		= cpu_to_le16(UAC_INPUT_TERMINAL_MICROPHONE),
 	.bAssocTerminal		= 0,
-	.wChannelConfig		= 0x3,
+	.wChannelConfig		= cpu_to_le16(0x3),
 };
 
 #define USB_IN_OT_ID	4
@@ -125,7 +125,7 @@ static struct uac1_output_terminal_descriptor usb_in_ot_desc = {
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	UAC_OUTPUT_TERMINAL,
 	.bTerminalID =		USB_IN_OT_ID,
-	.wTerminalType =	UAC_TERMINAL_STREAMING,
+	.wTerminalType =	cpu_to_le16(UAC_TERMINAL_STREAMING),
 	.bAssocTerminal =	0,
 	.bSourceID =		IO_IN_IT_ID,
 };
@@ -174,7 +174,7 @@ static struct uac1_as_header_descriptor as_out_header_desc = {
 	.bDescriptorSubtype =	UAC_AS_GENERAL,
 	.bTerminalLink =	USB_OUT_IT_ID,
 	.bDelay =		1,
-	.wFormatTag =		UAC_FORMAT_TYPE_I_PCM,
+	.wFormatTag =		cpu_to_le16(UAC_FORMAT_TYPE_I_PCM),
 };
 
 static struct uac1_as_header_descriptor as_in_header_desc = {
@@ -183,7 +183,7 @@ static struct uac1_as_header_descriptor as_in_header_desc = {
 	.bDescriptorSubtype =	UAC_AS_GENERAL,
 	.bTerminalLink =	USB_IN_OT_ID,
 	.bDelay =		1,
-	.wFormatTag =		UAC_FORMAT_TYPE_I_PCM,
+	.wFormatTag =		cpu_to_le16(UAC_FORMAT_TYPE_I_PCM),
 };
 
 DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1);
@@ -606,8 +606,8 @@ static int f_audio_bind(struct usb_configuration *c, struct usb_function *f)
 	if (status)
 		goto fail;
 
-	audio->out_ep_maxpsize = as_out_ep_desc.wMaxPacketSize;
-	audio->in_ep_maxpsize = as_in_ep_desc.wMaxPacketSize;
+	audio->out_ep_maxpsize = le16_to_cpu(as_out_ep_desc.wMaxPacketSize);
+	audio->in_ep_maxpsize = le16_to_cpu(as_in_ep_desc.wMaxPacketSize);
 	audio->params.c_chmask = audio_opts->c_chmask;
 	audio->params.c_srate = audio_opts->c_srate;
 	audio->params.c_ssize = audio_opts->c_ssize;
-- 
GitLab


From 14e1d56cbea6c02d29da945741a35c7e90a86e17 Mon Sep 17 00:00:00 2001
From: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Date: Sun, 25 Jun 2017 16:23:47 +0300
Subject: [PATCH 1795/1958] usb: gadget: f_uac2: endianness fixes.

As per USB spec, multiple-bytes fields are stored
in little-endian order. Use CPU<->LE helpers for
such fields.

Signed-off-by: Ruslan Bilovol <ruslan.bilovol@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_uac2.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 9082ce261e70a..f05c3f3e6103c 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -168,7 +168,7 @@ static struct uac2_input_terminal_descriptor usb_out_it_desc = {
 	.bAssocTerminal = 0,
 	.bCSourceID = USB_OUT_CLK_ID,
 	.iChannelNames = 0,
-	.bmControls = (CONTROL_RDWR << COPY_CTRL),
+	.bmControls = cpu_to_le16(CONTROL_RDWR << COPY_CTRL),
 };
 
 /* Input Terminal for I/O-In */
@@ -182,7 +182,7 @@ static struct uac2_input_terminal_descriptor io_in_it_desc = {
 	.bAssocTerminal = 0,
 	.bCSourceID = USB_IN_CLK_ID,
 	.iChannelNames = 0,
-	.bmControls = (CONTROL_RDWR << COPY_CTRL),
+	.bmControls = cpu_to_le16(CONTROL_RDWR << COPY_CTRL),
 };
 
 /* Ouput Terminal for USB_IN */
@@ -196,7 +196,7 @@ static struct uac2_output_terminal_descriptor usb_in_ot_desc = {
 	.bAssocTerminal = 0,
 	.bSourceID = IO_IN_IT_ID,
 	.bCSourceID = USB_IN_CLK_ID,
-	.bmControls = (CONTROL_RDWR << COPY_CTRL),
+	.bmControls = cpu_to_le16(CONTROL_RDWR << COPY_CTRL),
 };
 
 /* Ouput Terminal for I/O-Out */
@@ -210,7 +210,7 @@ static struct uac2_output_terminal_descriptor io_out_ot_desc = {
 	.bAssocTerminal = 0,
 	.bSourceID = USB_OUT_IT_ID,
 	.bCSourceID = USB_OUT_CLK_ID,
-	.bmControls = (CONTROL_RDWR << COPY_CTRL),
+	.bmControls = cpu_to_le16(CONTROL_RDWR << COPY_CTRL),
 };
 
 static struct uac2_ac_header_descriptor ac_hdr_desc = {
@@ -220,9 +220,10 @@ static struct uac2_ac_header_descriptor ac_hdr_desc = {
 	.bDescriptorSubtype = UAC_MS_HEADER,
 	.bcdADC = cpu_to_le16(0x200),
 	.bCategory = UAC2_FUNCTION_IO_BOX,
-	.wTotalLength = sizeof in_clk_src_desc + sizeof out_clk_src_desc
-			 + sizeof usb_out_it_desc + sizeof io_in_it_desc
-			+ sizeof usb_in_ot_desc + sizeof io_out_ot_desc,
+	.wTotalLength = cpu_to_le16(sizeof in_clk_src_desc
+			+ sizeof out_clk_src_desc + sizeof usb_out_it_desc
+			+ sizeof io_in_it_desc + sizeof usb_in_ot_desc
+			+ sizeof io_out_ot_desc),
 	.bmControls = 0,
 };
 
@@ -569,10 +570,12 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 		return ret;
 	}
 
-	agdev->in_ep_maxpsize = max(fs_epin_desc.wMaxPacketSize,
-					hs_epin_desc.wMaxPacketSize);
-	agdev->out_ep_maxpsize = max(fs_epout_desc.wMaxPacketSize,
-					hs_epout_desc.wMaxPacketSize);
+	agdev->in_ep_maxpsize = max_t(u16,
+				le16_to_cpu(fs_epin_desc.wMaxPacketSize),
+				le16_to_cpu(hs_epin_desc.wMaxPacketSize));
+	agdev->out_ep_maxpsize = max_t(u16,
+				le16_to_cpu(fs_epout_desc.wMaxPacketSize),
+				le16_to_cpu(hs_epout_desc.wMaxPacketSize));
 
 	hs_epout_desc.bEndpointAddress = fs_epout_desc.bEndpointAddress;
 	hs_epin_desc.bEndpointAddress = fs_epin_desc.bEndpointAddress;
-- 
GitLab


From 566e1ce22e04426fa52328b2adcdf1df49acd98e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 15 Jul 2017 11:32:08 -0400
Subject: [PATCH 1796/1958] staging: lustre: ko2iblnd: check
 copy_from_iter/copy_to_iter return code

We now get a helpful warning for code that calls copy_{from,to}_iter
without checking the return value, introduced by commit aa28de275a24
("iov_iter/hardening: move object size checks to inlined part").

drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c: In function 'kiblnd_send':
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c:1643:2: error: ignoring return value of 'copy_from_iter', declared with attribute warn_unused_result [-Werror=unused-result]
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c: In function 'kiblnd_recv':
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c:1744:3: error: ignoring return value of 'copy_to_iter', declared with attribute warn_unused_result [-Werror=unused-result]

In case we get short copies here, we may get incorrect behavior.
I've added failure handling for both rx and tx now, returning
-EFAULT as expected.

Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c    | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 85b242ec5f9b2..8fc191d999270 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1640,8 +1640,13 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
 	ibmsg = tx->tx_msg;
 	ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
 
-	copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, IBLND_MSG_SIZE,
-		       &from);
+	rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob,
+			    &from);
+	if (rc != payload_nob) {
+		kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
+		return -EFAULT;
+	}
+
 	nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
 	kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
 
@@ -1741,8 +1746,14 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
 			break;
 		}
 
-		copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload,
-			     IBLND_MSG_SIZE, to);
+		rc = copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, rlen,
+				  to);
+		if (rc != rlen) {
+			rc = -EFAULT;
+			break;
+		}
+
+		rc = 0;
 		lnet_finalize(ni, lntmsg, 0);
 		break;
 
-- 
GitLab


From 76fc0cfcc5b0f520062ca6d7225b224d4a8aa828 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 18 Jul 2017 15:32:44 +1000
Subject: [PATCH 1797/1958] powerpc/64s: Fix hypercall entry clobbering r12
 input

A previous optimisation incorrectly assumed the PAPR hcall does
not use r12, and clobbers it upon entry. In fact it is used as
an input. This can result in KVM guests crashing (observed with
PR KVM).

Instead of using r12 to save r13, tihs patch saves r13 in ctr.
This is more costly, but not as slow as using the SPRG.

Fixes: acd7d8cef0153 ("powerpc/64s: Optimize hypercall/syscall entry")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c18a5fbb4bbf..124091d306ff5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -824,7 +824,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
  * r3 volatile parameter and return value for status
  * r4-r10 volatile input and output value
  * r11 volatile hypercall number and output value
- * r12 volatile
+ * r12 volatile input and output value
  * r13-r31 nonvolatile
  * LR nonvolatile
  * CTR volatile
@@ -834,25 +834,26 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
  * Other registers nonvolatile
  *
  * The intersection of volatile registers that don't contain possible
- * inputs is: r12, cr0, xer, ctr. We may use these as scratch regs
- * upon entry without saving.
+ * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
+ * without saving, though xer is not a good idea to use, as hardware may
+ * interpret some bits so it may be costly to change them.
  */
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 	/*
 	 * There is a little bit of juggling to get syscall and hcall
-	 * working well. Save r10 in ctr to be restored in case it is a
-	 * hcall.
+	 * working well. Save r13 in ctr to avoid using SPRG scratch
+	 * register.
 	 *
 	 * Userspace syscalls have already saved the PPR, hcalls must save
 	 * it before setting HMT_MEDIUM.
 	 */
 #define SYSCALL_KVMTEST							\
-	mr	r12,r13;						\
+	mtctr	r13;							\
 	GET_PACA(r13);							\
-	mtctr	r10;							\
+	std	r10,PACA_EXGEN+EX_R10(r13);				\
 	KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
 	HMT_MEDIUM;							\
-	mr	r9,r12;							\
+	mfctr	r9;
 
 #else
 #define SYSCALL_KVMTEST							\
@@ -935,8 +936,8 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
 	 * This is a hcall, so register convention is as above, with these
 	 * differences:
 	 * r13 = PACA
-	 * r12 = orig r13
-	 * ctr = orig r10
+	 * ctr = orig r13
+	 * orig r10 saved in PACA
 	 */
 TRAMP_KVM_BEGIN(do_kvm_0xc00)
 	 /*
@@ -944,14 +945,13 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
 	  * HMT_MEDIUM. That allows the KVM code to save that value into the
 	  * guest state (it is the guest's PPR value).
 	  */
-	OPT_GET_SPR(r0, SPRN_PPR, CPU_FTR_HAS_PPR)
+	OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
 	HMT_MEDIUM
-	OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r0, CPU_FTR_HAS_PPR)
+	OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
 	mfctr	r10
-	SET_SCRATCH0(r12)
+	SET_SCRATCH0(r10)
 	std	r9,PACA_EXGEN+EX_R9(r13)
 	mfcr	r9
-	std	r10,PACA_EXGEN+EX_R10(r13)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
 #endif
 
-- 
GitLab


From 740c433ec35187b45abe08bb6c45a321a791be8e Mon Sep 17 00:00:00 2001
From: Teddy Wang <teddy.wang@siliconmotion.com>
Date: Fri, 30 Jun 2017 21:57:43 +0100
Subject: [PATCH 1798/1958] staging: sm750fb: avoid conflicting vesafb

If vesafb is enabled in the config then /dev/fb0 is created by vesa
and this sm750 driver gets fb1, fb2. But we need to be fb0 and fb1 to
effectively work with xorg.
So if it has been alloted fb1, then try to remove the other fb0.

In the previous send, why #ifdef is used was asked.
https://lkml.org/lkml/2017/6/25/57

Answered at: https://lkml.org/lkml/2017/6/25/69
Also pasting here for reference.

'Did a quick research into "why".
The patch d8801e4df91e ("x86/PCI: Set IORESOURCE_ROM_SHADOW only for the
default VGA device") has started setting IORESOURCE_ROM_SHADOW in flags
for a default VGA device and that is being done only for x86.
And so, we will need that #ifdef to check IORESOURCE_ROM_SHADOW as that
needs to be checked only for a x86 and not for other arch.'

Cc: <stable@vger.kernel.org> # v4.4+
Signed-off-by: Teddy Wang <teddy.wang@siliconmotion.com>
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/sm750fb/sm750.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c
index 3aa4128703d53..67207b0554cd4 100644
--- a/drivers/staging/sm750fb/sm750.c
+++ b/drivers/staging/sm750fb/sm750.c
@@ -1053,6 +1053,26 @@ static int sm750fb_frambuffer_alloc(struct sm750_dev *sm750_dev, int fbidx)
 	return err;
 }
 
+static int lynxfb_kick_out_firmware_fb(struct pci_dev *pdev)
+{
+	struct apertures_struct *ap;
+	bool primary = false;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return -ENOMEM;
+
+	ap->ranges[0].base = pci_resource_start(pdev, 0);
+	ap->ranges[0].size = pci_resource_len(pdev, 0);
+#ifdef CONFIG_X86
+	primary = pdev->resource[PCI_ROM_RESOURCE].flags &
+					IORESOURCE_ROM_SHADOW;
+#endif
+	remove_conflicting_framebuffers(ap, "sm750_fb1", primary);
+	kfree(ap);
+	return 0;
+}
+
 static int lynxfb_pci_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *ent)
 {
@@ -1061,6 +1081,10 @@ static int lynxfb_pci_probe(struct pci_dev *pdev,
 	int fbidx;
 	int err;
 
+	err = lynxfb_kick_out_firmware_fb(pdev);
+	if (err)
+		return err;
+
 	/* enable device */
 	err = pcim_enable_device(pdev);
 	if (err)
-- 
GitLab


From 216ce2962089b6218708bf87d96e6b1fbadba1d7 Mon Sep 17 00:00:00 2001
From: Okash Khawaja <okash.khawaja@gmail.com>
Date: Sun, 16 Jul 2017 10:28:21 +0100
Subject: [PATCH 1799/1958] staging: speakup: safely close tty

Speakup opens tty using tty_open_by_driver. When closing, it calls
tty_ldisc_release but doesn't close and remove the tty itself. As a
result, that tty cannot be opened from user space. This patch calls
tty_release_struct which ensures that tty is safely removed and freed
up. It also calls tty_ldisc_release, so speakup doesn't need to call it.

Signed-off-by: Okash Khawaja <okash.khawaja@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/speakup/spk_ttyio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c
index ed8e96b06ead2..924034a3de4b7 100644
--- a/drivers/staging/speakup/spk_ttyio.c
+++ b/drivers/staging/speakup/spk_ttyio.c
@@ -300,7 +300,7 @@ void spk_ttyio_release(void)
 
 	tty_ldisc_flush(speakup_tty);
 	tty_unlock(speakup_tty);
-	tty_ldisc_release(speakup_tty);
+	tty_release_struct(speakup_tty, speakup_tty->index);
 }
 EXPORT_SYMBOL_GPL(spk_ttyio_release);
 
-- 
GitLab


From 9f8dced2085c33a633d2f0a1abbf13ff5a7ed8c9 Mon Sep 17 00:00:00 2001
From: Okash Khawaja <okash.khawaja@gmail.com>
Date: Sun, 16 Jul 2017 17:18:25 +0100
Subject: [PATCH 1800/1958] staging: speakup: add functions to register and
 unregister ldisc

This patch adds the above two functions and makes them available to
main.c where they will be called during init and exit functions of
main speakup module. Following patch will make use of them.

Signed-off-by: Okash Khawaja <okash.khawaja@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/speakup/spk_priv.h  |  2 ++
 drivers/staging/speakup/spk_ttyio.c | 12 ++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/staging/speakup/spk_priv.h b/drivers/staging/speakup/spk_priv.h
index 87b6a0a4c54dc..046040ac074c6 100644
--- a/drivers/staging/speakup/spk_priv.h
+++ b/drivers/staging/speakup/spk_priv.h
@@ -48,6 +48,8 @@ void spk_stop_serial_interrupt(void);
 int spk_wait_for_xmitr(struct spk_synth *in_synth);
 void spk_serial_release(void);
 void spk_ttyio_release(void);
+void spk_ttyio_register_ldisc(void);
+void spk_ttyio_unregister_ldisc(void);
 
 void synth_buffer_skip_nonlatin1(void);
 u16 synth_buffer_getc(void);
diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c
index 924034a3de4b7..9b02345f66cc2 100644
--- a/drivers/staging/speakup/spk_ttyio.c
+++ b/drivers/staging/speakup/spk_ttyio.c
@@ -200,6 +200,18 @@ static int spk_ttyio_initialise_ldisc(struct spk_synth *synth)
 	return ret;
 }
 
+void spk_ttyio_register_ldisc(void)
+{
+	if (tty_register_ldisc(N_SPEAKUP, &spk_ttyio_ldisc_ops))
+		pr_warn("speakup: Error registering line discipline. Most synths won't work.\n");
+}
+
+void spk_ttyio_unregister_ldisc(void)
+{
+	if (tty_unregister_ldisc(N_SPEAKUP))
+		pr_warn("speakup: Couldn't unregister ldisc\n");
+}
+
 static int spk_ttyio_out(struct spk_synth *in_synth, const char ch)
 {
 	if (in_synth->alive && speakup_tty && speakup_tty->ops->write) {
-- 
GitLab


From e23a9b439ce9bd9cbd3d92e4c15db086d3e11410 Mon Sep 17 00:00:00 2001
From: Okash Khawaja <okash.khawaja@gmail.com>
Date: Sun, 16 Jul 2017 17:18:26 +0100
Subject: [PATCH 1801/1958] staging: speakup: safely register and unregister
 ldisc

This patch makes use of functions added in the previous patch. It
registers ldisc during init of main speakup module and unregisters it
during exit. It also removes the code to register ldisc every time a
synth module is loaded. This way we only register the ldisc once when
main speakup module is loaded. Since main speakup module is required by
all synth modules, it is only unloaded when all synths have been
unloaded. Therefore we unregister the ldisc once, when all speakup
related references to the ldisc have returned. In unlikely scenario of
something outside speakup using the ldisc, the ldisc refcount check in
tty_unregister_ldisc will ensure that it is not unregistered while in
use.

The function to register ldisc doesn't cause speakup init function to
fail. That is different from current behaviour where failure to register
ldisc results in failure to load the specific synth module. This is
because speakup module is also required by those synths which don't use
tty and ldisc. We don't want to prevent those modules from loading when
ldisc fails to register. The synth modules will correctly fail when
trying to set N_SPEAKUP to tty, if ldisc registrationi had failed.

Signed-off-by: Okash Khawaja <okash.khawaja@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/speakup/main.c      | 2 ++
 drivers/staging/speakup/spk_ttyio.c | 8 ++------
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c
index 82e5de248947f..67956e24779ce 100644
--- a/drivers/staging/speakup/main.c
+++ b/drivers/staging/speakup/main.c
@@ -2314,6 +2314,7 @@ static void __exit speakup_exit(void)
 	mutex_lock(&spk_mutex);
 	synth_release();
 	mutex_unlock(&spk_mutex);
+	spk_ttyio_unregister_ldisc();
 
 	speakup_kobj_exit();
 
@@ -2376,6 +2377,7 @@ static int __init speakup_init(void)
 	if (err)
 		goto error_kobjects;
 
+	spk_ttyio_register_ldisc();
 	synth_init(synth_name);
 	speakup_register_devsynth();
 	/*
diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c
index 9b02345f66cc2..fe340b07c482f 100644
--- a/drivers/staging/speakup/spk_ttyio.c
+++ b/drivers/staging/speakup/spk_ttyio.c
@@ -154,12 +154,6 @@ static int spk_ttyio_initialise_ldisc(struct spk_synth *synth)
 	struct ktermios tmp_termios;
 	dev_t dev;
 
-	ret = tty_register_ldisc(N_SPEAKUP, &spk_ttyio_ldisc_ops);
-	if (ret) {
-		pr_err("Error registering line discipline.\n");
-		return ret;
-	}
-
 	ret = get_dev_to_use(synth, &dev);
 	if (ret)
 		return ret;
@@ -196,6 +190,8 @@ static int spk_ttyio_initialise_ldisc(struct spk_synth *synth)
 	tty_unlock(tty);
 
 	ret = tty_set_ldisc(tty, N_SPEAKUP);
+	if (ret)
+		pr_err("speakup: Failed to set N_SPEAKUP on tty\n");
 
 	return ret;
 }
-- 
GitLab


From 5a1d4c5dd4eb2f1f8a9b30e61762f3b3b564df70 Mon Sep 17 00:00:00 2001
From: Michael Gugino <michael.gugino.2@gmail.com>
Date: Mon, 17 Jul 2017 13:29:09 -0400
Subject: [PATCH 1802/1958] staging: rtl8188eu: add TL-WN722N v2 support

Add support for USB Device TP-Link TL-WN722N v2.
VendorID: 0x2357, ProductID: 0x010c

Signed-off-by: Michael Gugino <michael.gugino.2@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
index 963235fd72921..d283341cfe435 100644
--- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c
+++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
@@ -43,6 +43,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = {
 	{USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */
 	{USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */
 	{USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */
+	{USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */
 	{USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */
 	{}	/* Terminating entry */
 };
-- 
GitLab


From ddac9c5bfa75029ed4b58e81d8d3c49694598ff6 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Tue, 18 Jul 2017 08:39:21 +0100
Subject: [PATCH 1803/1958] irqchip/mips-gic: Remove population of irq domain
 names

Since commit d59f6617eef0f ("genirq: Allow fwnode to carry name
information only") the irqdomain core sets the names of irq domains.
When the name is allocated the new IRQ_DOMAIN_NAME_ALLOCATED flag is
set. Replacing the allocated name with a constant one is not a good
idea, since calling the new irq_domain_update_bus_token() API, added to
the MIPS GIC driver by commit 96f0d93a487e1 ("irqchip/MSI: Use
irq_domain_update_bus_token instead of an open coded access") will
attempt to kfree the pointer, and result in a kernel OOPS.

Fix this by removing the names, now that they are set by the irqdomain
core. This effectively reverts commit 21c57fd13589 ("irqchip/mips-gic:
Populate irq_domain names").

Fixes: d59f6617eef0f ("genirq: Allow fwnode to carry name information only")
Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-mips@linux-mips.org
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1500363561-32213-1-git-send-email-matt.redfearn@imgtec.com
---
 drivers/irqchip/irq-mips-gic.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 832ebf4062f70..6ab1d3afec02b 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -950,7 +950,6 @@ static void __init __gic_init(unsigned long gic_base_addr,
 					       &gic_irq_domain_ops, NULL);
 	if (!gic_irq_domain)
 		panic("Failed to add GIC IRQ domain");
-	gic_irq_domain->name = "mips-gic-irq";
 
 	gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
 						  IRQ_DOMAIN_FLAG_IPI_PER_CPU,
@@ -959,7 +958,6 @@ static void __init __gic_init(unsigned long gic_base_addr,
 	if (!gic_ipi_domain)
 		panic("Failed to add GIC IPI domain");
 
-	gic_ipi_domain->name = "mips-gic-ipi";
 	irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI);
 
 	if (node &&
-- 
GitLab


From 4c07f9046e48c3126978911239e20a5622ad0ad6 Mon Sep 17 00:00:00 2001
From: Roman Kagan <rkagan@virtuozzo.com>
Date: Mon, 17 Jul 2017 12:49:07 +0300
Subject: [PATCH 1804/1958] x86/mm, KVM: Fix warning when !CONFIG_PREEMPT_COUNT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A recent commit:

  d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant")

introduced a VM_WARN_ON(!in_atomic()) which generates false positives
on every VM entry on !CONFIG_PREEMPT_COUNT kernels.

Replace it with a test for preemptible(), which appears to match the
original intent and works across different CONFIG_PREEMPT* variations.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Nadav Amit <namit@vmware.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kvm@vger.kernel.org
Cc: linux-mm@kvack.org
Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index ecfcb6643c9b4..265c907d7d4c9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -293,7 +293,7 @@ static inline unsigned long __get_current_cr3_fast(void)
 	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
 
 	/* For now, be very restrictive about when this can be called. */
-	VM_WARN_ON(in_nmi() || !in_atomic());
+	VM_WARN_ON(in_nmi() || preemptible());
 
 	VM_BUG_ON(cr3 != __read_cr3());
 	return cr3;
-- 
GitLab


From b134bd90286dc9f2952c35a91ab405474ca9374c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 14 Jul 2017 16:51:21 +1000
Subject: [PATCH 1805/1958] powerpc/mm/radix: Refactor radix__mark_rodata_ro()

Move the core logic into a helper, so we can use it for changing permissions
other than _PAGE_WRITE.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-radix.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 8c13e4282308a..336e52ec652cf 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -112,10 +112,9 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void radix__mark_rodata_ro(void)
+void radix__change_memory_range(unsigned long start, unsigned long end,
+				unsigned long clear)
 {
-	unsigned long start = (unsigned long)_stext;
-	unsigned long end = (unsigned long)__init_begin;
 	unsigned long idx;
 	pgd_t *pgdp;
 	pud_t *pudp;
@@ -125,7 +124,8 @@ void radix__mark_rodata_ro(void)
 	start = ALIGN_DOWN(start, PAGE_SIZE);
 	end = PAGE_ALIGN(end); // aligns up
 
-	pr_devel("marking ro start %lx, end %lx\n", start, end);
+	pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
+		 start, end, clear);
 
 	for (idx = start; idx < end; idx += PAGE_SIZE) {
 		pgdp = pgd_offset_k(idx);
@@ -147,11 +147,21 @@ void radix__mark_rodata_ro(void)
 		if (!ptep)
 			continue;
 update_the_pte:
-		radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0);
+		radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
 	}
 
 	radix__flush_tlb_kernel_range(start, end);
 }
+
+void radix__mark_rodata_ro(void)
+{
+	unsigned long start, end;
+
+	start = (unsigned long)_stext;
+	end = (unsigned long)__init_begin;
+
+	radix__change_memory_range(start, end, _PAGE_WRITE);
+}
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
 static inline void __meminit print_mapping(unsigned long start,
-- 
GitLab


From fa7f9189e017213bad63b93a76de5c715cd62a96 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 14 Jul 2017 16:51:22 +1000
Subject: [PATCH 1806/1958] powerpc/mm/hash: Refactor hash__mark_rodata_ro()

Move the core logic into a helper, so we can use it for changing other
permissions.

We also change the logic to align start down, and end up. This means
calling the function with a range will expand that range to be at
least 1 mmu_linear_psize page in size. We need that so we can use it
on __init_begin ...  __init_end which is not a full page in size.

This should always work for _stext/__init_begin, because we align
__init_begin to _stext + 16M in the linker script.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-hash64.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 188b4107584d2..73019c52141ff 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -425,33 +425,39 @@ int hash__has_transparent_hugepage(void)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void hash__mark_rodata_ro(void)
+static bool hash__change_memory_range(unsigned long start, unsigned long end,
+				      unsigned long newpp)
 {
-	unsigned long start = (unsigned long)_stext;
-	unsigned long end = (unsigned long)__init_begin;
 	unsigned long idx;
 	unsigned int step, shift;
-	unsigned long newpp = PP_RXXX;
 
 	shift = mmu_psize_defs[mmu_linear_psize].shift;
 	step = 1 << shift;
 
-	start = ((start + step - 1) >> shift) << shift;
-	end = (end >> shift) << shift;
+	start = ALIGN_DOWN(start, step);
+	end = ALIGN(end, step); // aligns up
 
-	pr_devel("marking ro start %lx, end %lx, step %x\n",
-			start, end, step);
+	if (start >= end)
+		return false;
 
-	if (start == end) {
-		pr_warn("could not set rodata ro, relocate the start"
-			" of the kernel to a 0x%x boundary\n", step);
-		return;
-	}
+	pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
+		 start, end, newpp, step);
 
 	for (idx = start; idx < end; idx += step)
 		/* Not sure if we can do much with the return value */
 		mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
 							mmu_kernel_ssize);
 
+	return true;
+}
+
+void hash__mark_rodata_ro(void)
+{
+	unsigned long start, end;
+
+	start = (unsigned long)_stext;
+	end = (unsigned long)__init_begin;
+
+	WARN_ON(!hash__change_memory_range(start, end, PP_RXXX));
 }
 #endif
-- 
GitLab


From fe26569eb9197d845d73abe7dd20f603d79eb031 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?=
 <ernesto.mnd.fernandez@gmail.com>
Date: Wed, 12 Jul 2017 06:54:19 -0300
Subject: [PATCH 1807/1958] ext2: preserve i_mode if ext2_set_acl() fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When changing a file's acl mask, ext2_set_acl() will first set the group
bits of i_mode to the value of the mask, and only then set the actual
extended attribute representing the new acl.

If the second part fails (due to lack of space, for example) and the file
had no acl attribute to begin with, the system will from now on assume
that the mask permission bits are actual group permission bits, potentially
granting access to the wrong users.

Prevent this by only changing the inode mode after the acl has been set.

[JK: Rebased on top of "ext2: Don't clear SGID when inheriting ACLs"]
Signed-off-by: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/acl.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 069c0dceda01e..51f0aea70cb43 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -218,15 +218,22 @@ int
 ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	int error;
+	int update_mode = 0;
+	umode_t mode = inode->i_mode;
 
 	if (type == ACL_TYPE_ACCESS && acl) {
-		error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+		error = posix_acl_update_mode(inode, &mode, &acl);
 		if (error)
 			return error;
+		update_mode = 1;
+	}
+	error = __ext2_set_acl(inode, acl, type);
+	if (!error && update_mode) {
+		inode->i_mode = mode;
 		inode->i_ctime = current_time(inode);
 		mark_inode_dirty(inode);
 	}
-	return __ext2_set_acl(inode, acl, type);
+	return error;
 }
 
 /*
-- 
GitLab


From fcea8aed91f53b51f9b943dc01f12d8aa666c720 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?=
 <ernesto.mnd.fernandez@gmail.com>
Date: Mon, 17 Jul 2017 18:42:41 +0200
Subject: [PATCH 1808/1958] reiserfs: preserve i_mode if __reiserfs_set_acl()
 fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When changing a file's acl mask, reiserfs_set_acl() will first set the
group bits of i_mode to the value of the mask, and only then set the
actual extended attribute representing the new acl.

If the second part fails (due to lack of space, for example) and the
file had no acl attribute to begin with, the system will from now on
assume that the mask permission bits are actual group permission bits,
potentially granting access to the wrong users.

Prevent this by only changing the inode mode after the acl has been set.

Signed-off-by: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/reiserfs/xattr_acl.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index d92a1dc6ee70f..54415f0e3d186 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -23,7 +23,8 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	struct reiserfs_transaction_handle th;
 	size_t jcreate_blocks;
 	int size = acl ? posix_acl_xattr_size(acl->a_count) : 0;
-
+	int update_mode = 0;
+	umode_t mode = inode->i_mode;
 
 	/*
 	 * Pessimism: We can't assume that anything from the xattr root up
@@ -38,12 +39,14 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	reiserfs_write_unlock(inode->i_sb);
 	if (error == 0) {
 		if (type == ACL_TYPE_ACCESS && acl) {
-			error = posix_acl_update_mode(inode, &inode->i_mode,
-						      &acl);
+			error = posix_acl_update_mode(inode, &mode, &acl);
 			if (error)
 				goto unlock;
+			update_mode = 1;
 		}
 		error = __reiserfs_set_acl(&th, inode, type, acl);
+		if (!error && update_mode)
+			inode->i_mode = mode;
 unlock:
 		reiserfs_write_lock(inode->i_sb);
 		error2 = journal_end(&th);
-- 
GitLab


From 029d9252b116fa52a95150819e62af1f6e420fe5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 14 Jul 2017 16:51:23 +1000
Subject: [PATCH 1809/1958] powerpc/mm: Mark __init memory no-execute when
 STRICT_KERNEL_RWX=y

Currently even with STRICT_KERNEL_RWX we leave the __init text marked
executable after init, which is bad.

Add a hook to mark it NX (no-execute) before we free it, and implement
it for radix and hash.

Note that we use __init_end as the end address, not _einittext,
because overlaps_kernel_text() uses __init_end, because there are
additional executable sections other than .init.text between
__init_begin and __init_end.

Tested on radix and hash with:

  0:mon> p $__init_begin
  *** 400 exception occurred

Fixes: 1e0fc9d1eb2b ("powerpc/Kconfig: Enable STRICT_KERNEL_RWX for some configs")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash.h    |  1 +
 arch/powerpc/include/asm/book3s/64/pgtable.h |  1 +
 arch/powerpc/include/asm/book3s/64/radix.h   |  1 +
 arch/powerpc/include/asm/pgtable.h           |  7 +++++++
 arch/powerpc/mm/mem.c                        |  1 +
 arch/powerpc/mm/pgtable-hash64.c             | 12 ++++++++++++
 arch/powerpc/mm/pgtable-radix.c              |  8 ++++++++
 arch/powerpc/mm/pgtable_64.c                 |  8 ++++++++
 8 files changed, 39 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 0ce513f2926f1..36fc7bfe9e114 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -91,6 +91,7 @@ static inline int hash__pgd_bad(pgd_t pgd)
 }
 #ifdef CONFIG_STRICT_KERNEL_RWX
 extern void hash__mark_rodata_ro(void);
+extern void hash__mark_initmem_nx(void);
 #endif
 
 extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index c0737c86a3627..d1da415e283cd 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1192,5 +1192,6 @@ static inline const int pud_pfn(pud_t pud)
 	BUILD_BUG();
 	return 0;
 }
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 487709ff6875b..544440b5aff39 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -118,6 +118,7 @@
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 extern void radix__mark_rodata_ro(void);
+extern void radix__mark_initmem_nx(void);
 #endif
 
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index dd01212935aca..afae9a336136a 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -80,6 +80,13 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr);
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_initmem_nx(void);
+#else
+static inline void mark_initmem_nx(void) { }
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 8541f18694a4a..46b4e67d23723 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -402,6 +402,7 @@ void __init mem_init(void)
 void free_initmem(void)
 {
 	ppc_md.progress = ppc_printk_progress;
+	mark_initmem_nx();
 	free_initmem_default(POISON_FREE_INITMEM);
 }
 
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 73019c52141ff..443a2c66a3046 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -460,4 +460,16 @@ void hash__mark_rodata_ro(void)
 
 	WARN_ON(!hash__change_memory_range(start, end, PP_RXXX));
 }
+
+void hash__mark_initmem_nx(void)
+{
+	unsigned long start, end, pp;
+
+	start = (unsigned long)__init_begin;
+	end = (unsigned long)__init_end;
+
+	pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL));
+
+	WARN_ON(!hash__change_memory_range(start, end, pp));
+}
 #endif
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 336e52ec652cf..5cc50d47ce3f9 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -162,6 +162,14 @@ void radix__mark_rodata_ro(void)
 
 	radix__change_memory_range(start, end, _PAGE_WRITE);
 }
+
+void radix__mark_initmem_nx(void)
+{
+	unsigned long start = (unsigned long)__init_begin;
+	unsigned long end = (unsigned long)__init_end;
+
+	radix__change_memory_range(start, end, _PAGE_EXEC);
+}
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
 static inline void __meminit print_mapping(unsigned long start,
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 5c0b795d656c4..0736e94c7615a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -505,4 +505,12 @@ void mark_rodata_ro(void)
 	else
 		hash__mark_rodata_ro();
 }
+
+void mark_initmem_nx(void)
+{
+	if (radix_enabled())
+		radix__mark_initmem_nx();
+	else
+		hash__mark_initmem_nx();
+}
 #endif
-- 
GitLab


From 34363c057b368114d8b93376ec6b65ef5f36a55b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 18 Jul 2017 12:27:56 +0200
Subject: [PATCH 1810/1958] isofs: Fix off-by-one in 'session' mount option
 parsing

According to ECMA-130 standard maximum valid track number is 99. Since
'session' mount option starts indexing at 0 (and we add 1 to the passed
number), we should refuse value 99. Also the condition in
isofs_get_last_session() unnecessarily repeats the check - remove it.

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/isofs/inode.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 8cf898a59730d..217a5e7815da6 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -410,7 +410,11 @@ static int parse_options(char *options, struct iso9660_options *popt)
 			if (match_int(&args[0], &option))
 				return 0;
 			n = option;
-			if (n > 99)
+			/*
+			 * Track numbers are supposed to be in range 1-99, the
+			 * mount option starts indexing at 0.
+			 */
+			if (n >= 99)
 				return 0;
 			popt->session = n + 1;
 			break;
@@ -543,7 +547,7 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
 
 	vol_desc_start=0;
 	ms_info.addr_format=CDROM_LBA;
-	if(session >= 0 && session <= 99) {
+	if (session > 0) {
 		struct cdrom_tocentry Te;
 		Te.cdte_track=session;
 		Te.cdte_format=CDROM_LBA;
-- 
GitLab


From 5c10b048c37cc08a21fa97a0575eccf4948948ca Mon Sep 17 00:00:00 2001
From: Harry Pan <harry.pan@intel.com>
Date: Mon, 17 Jul 2017 18:37:49 +0800
Subject: [PATCH 1811/1958] perf/x86/intel: Enable C-state residency events for
 Apollo Lake

Goldmont microarchitecture supports C1/C3/C6, PC2/PC3/PC6/PC10 state
residency counters, the patch enables them for Apollo Lake platform.

The MSR information is based on Intel Software Developers' Manual,
Vol. 4, Order No. 335592, Table 2-6 and 2-12.

Signed-off-by: Harry Pan <harry.pan@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: bp@suse.de
Cc: davidcc@google.com
Cc: gs0622@gmail.com
Cc: lukasz.odzioba@intel.com
Cc: piotr.luc@intel.com
Cc: srinivas.pandruvada@linux.intel.com
Link: http://lkml.kernel.org/r/20170717103749.24337-1-harry.pan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/cstate.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 238ae3248ba55..4cf100ff2a374 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -40,16 +40,16 @@
  * Model specific counters:
  *	MSR_CORE_C1_RES: CORE C1 Residency Counter
  *			 perf code: 0x00
- *			 Available model: SLM,AMT
+ *			 Available model: SLM,AMT,GLM
  *			 Scope: Core (each processor core has a MSR)
  *	MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
  *			       perf code: 0x01
- *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM
  *			       Scope: Core
  *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
  *			       perf code: 0x02
  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- *						SKL,KNL
+ *						SKL,KNL,GLM
  *			       Scope: Core
  *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
  *			       perf code: 0x03
@@ -57,16 +57,17 @@
  *			       Scope: Core
  *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
  *			       perf code: 0x00
- *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL
+ *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM
  *			       Scope: Package (physical package)
  *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
  *			       perf code: 0x01
  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL
+ *						GLM
  *			       Scope: Package (physical package)
  *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
  *			       perf code: 0x02
  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- *						SKL,KNL
+ *						SKL,KNL,GLM
  *			       Scope: Package (physical package)
  *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
  *			       perf code: 0x03
@@ -82,7 +83,7 @@
  *			       Scope: Package (physical package)
  *	MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
  *			       perf code: 0x06
- *			       Available model: HSW ULT only
+ *			       Available model: HSW ULT, GLM
  *			       Scope: Package (physical package)
  *
  */
@@ -504,6 +505,17 @@ static const struct cstate_model knl_cstates __initconst = {
 };
 
 
+static const struct cstate_model glm_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
+				  BIT(PERF_CSTATE_CORE_C3_RES) |
+				  BIT(PERF_CSTATE_CORE_C6_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
+				  BIT(PERF_CSTATE_PKG_C3_RES) |
+				  BIT(PERF_CSTATE_PKG_C6_RES) |
+				  BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
 
 #define X86_CSTATES_MODEL(model, states)				\
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
@@ -546,6 +558,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
+
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
-- 
GitLab


From dd0b06b551f6b14da19582e301814746d838965a Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Wed, 12 Jul 2017 09:44:23 -0400
Subject: [PATCH 1812/1958] perf/x86/intel: Add Goldmont Plus CPU PMU support

Add perf core PMU support for Intel Goldmont Plus CPU cores:

 - The init code is based on Goldmont.
 - There is a new cache event list, based on the Goldmont cache event
   list.
 - All four general-purpose performance counters support PEBS.
 - The first general-purpose performance counter is for reduced skid
   PEBS mechanism. Using :ppp to indicate the event which want to do
   reduced skid PEBS.
 - Goldmont Plus has 4-wide pipeline for Topdown

Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Link: http://lkml.kernel.org/r/20170712134423.17766-1-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 158 +++++++++++++++++++++++++++++++++++
 arch/x86/events/intel/ds.c   |   6 ++
 arch/x86/events/perf_event.h |   2 +
 3 files changed, 166 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aa62437d1aa14..ede97710c2f4f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1708,6 +1708,120 @@ static __initconst const u64 glm_hw_cache_extra_regs
 	},
 };
 
+static __initconst const u64 glp_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+			[C(RESULT_MISS)]	= 0x0,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+			[C(RESULT_MISS)]	= 0x0,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x0380,	/* ICACHE.ACCESSES */
+			[C(RESULT_MISS)]	= 0x0280,	/* ICACHE.MISSES */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+			[C(RESULT_MISS)]	= 0xe08,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+			[C(RESULT_MISS)]	= 0xe49,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x00c0,	/* INST_RETIRED.ANY_P */
+			[C(RESULT_MISS)]	= 0x0481,	/* ITLB.MISS */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x00c4,	/* BR_INST_RETIRED.ALL_BRANCHES */
+			[C(RESULT_MISS)]	= 0x00c5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+	},
+};
+
+static __initconst const u64 glp_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= GLM_DEMAND_READ|
+						  GLM_LLC_ACCESS,
+			[C(RESULT_MISS)]	= GLM_DEMAND_READ|
+						  GLM_LLC_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= GLM_DEMAND_WRITE|
+						  GLM_LLC_ACCESS,
+			[C(RESULT_MISS)]	= GLM_DEMAND_WRITE|
+						  GLM_LLC_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+};
+
 #define KNL_OT_L2_HITE		BIT_ULL(19) /* Other Tile L2 Hit */
 #define KNL_OT_L2_HITF		BIT_ULL(20) /* Other Tile L2 Hit */
 #define KNL_MCDRAM_LOCAL	BIT_ULL(21)
@@ -3016,6 +3130,9 @@ static int hsw_hw_config(struct perf_event *event)
 	return 0;
 }
 
+static struct event_constraint counter0_constraint =
+			INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
+
 static struct event_constraint counter2_constraint =
 			EVENT_CONSTRAINT(0, 0x4, 0);
 
@@ -3037,6 +3154,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	return c;
 }
 
+static struct event_constraint *
+glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	/* :ppp means to do reduced skid PEBS which is PMC0 only. */
+	if (event->attr.precise_ip == 3)
+		return &counter0_constraint;
+
+	c = intel_get_event_constraints(cpuc, idx, event);
+
+	return c;
+}
+
 /*
  * Broadwell:
  *
@@ -3838,6 +3970,32 @@ __init int intel_pmu_init(void)
 		pr_cont("Goldmont events, ");
 		break;
 
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_skl();
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_glm_extra_regs;
+		/*
+		 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+		 * for precise cycles.
+		 */
+		x86_pmu.pebs_aliases = NULL;
+		x86_pmu.pebs_prec_dist = true;
+		x86_pmu.lbr_pt_coexist = true;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.get_event_constraints = glp_get_event_constraints;
+		x86_pmu.cpu_events = glm_events_attrs;
+		/* Goldmont Plus has 4-wide pipeline */
+		event_attr_td_total_slots_scale_glm.event_str = "4";
+		pr_cont("Goldmont plus events, ");
+		break;
+
 	case INTEL_FAM6_WESTMERE:
 	case INTEL_FAM6_WESTMERE_EP:
 	case INTEL_FAM6_WESTMERE_EX:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c6d23ffe422d1..2ca4d2d7b5fc5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -651,6 +651,12 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_glp_pebs_event_constraints[] = {
+	/* Allow all events as PEBS with no flags */
+	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 53728eea1bede..476aec3a4cabd 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -879,6 +879,8 @@ extern struct event_constraint intel_slm_pebs_event_constraints[];
 
 extern struct event_constraint intel_glm_pebs_event_constraints[];
 
+extern struct event_constraint intel_glp_pebs_event_constraints[];
+
 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
 
 extern struct event_constraint intel_westmere_pebs_event_constraints[];
-- 
GitLab


From dc853e26f73e903e0c87e24f2695b5dcf33b3bc1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 14 Jul 2017 18:35:51 +0200
Subject: [PATCH 1813/1958] perf/x86/intel: Fix debug_store reset field for
 freq events

There's a bug in PEBs event enabling code, that prevents PEBS
freq events to work properly after non freq PEBS event was run.

freq events - perf_event_attr::freq set
              -F <freq> option of perf record

PEBS events - perf_event_attr::precise_ip > 0
              default for perf record

Like in following example with CPU 0 busy, we expect ~10000 samples
for following perf tool run:

  # perf record -F 10000 -C 0 sleep 1
  [ perf record: Woken up 2 times to write data ]
  [ perf record: Captured and wrote 0.640 MB perf.data (10031 samples) ]

Everything's fine, but once we run non freq PEBS event like:

  # perf record -c 10000 -C 0 sleep 1
  [ perf record: Woken up 4 times to write data ]
  [ perf record: Captured and wrote 1.053 MB perf.data (20061 samples) ]

the freq events start to fail like this:

  # perf record -F 10000 -C 0 sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.185 MB perf.data (40 samples) ]

The issue is in non freq PEBs event initialization of debug_store reset
field, which value is used to auto-reload the counter value after PEBS
event drain. This value is not being used for PEBS freq events, but once
we run non freq event it stays in debug_store data and screws the
sample_freq counting for PEBS freq events.

Setting the reset field to 0 for freq events.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170714163551.19459-1-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/ds.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2ca4d2d7b5fc5..6dc8a59e1bfb8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -895,6 +895,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
 		ds->pebs_event_reset[hwc->idx] =
 			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+	} else {
+		ds->pebs_event_reset[hwc->idx] = 0;
 	}
 }
 
-- 
GitLab


From 80584efcc6bc99871433cc5b8f639cc0154962ea Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 18 Jul 2017 21:26:40 +0900
Subject: [PATCH 1814/1958] usb: gadget: udc: renesas_usb3: fix free size in
 renesas_usb3_dma_free_prd()

The commit 2d4aa21a73ba ("usb: gadget: udc: renesas_usb3: add support
for dedicated DMAC") has a bug in the renesas_usb3_dma_free_prd().
The size of dma_free_coherent() should be the same with dma_alloc_coherent()
Otherwise, this code causes a WARNING by mm/page_alloc.c when
renesas_usb3_dma_free_prd() is called. So, this patch fixes it.

Fixes: 2d4aa21a73ba ("usb: gadget: udc: renesas_usb3: add support for dedicated DMAC")
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/renesas_usb3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index d8278322d5ac1..923ad5acc4824 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -1369,7 +1369,7 @@ static int renesas_usb3_dma_free_prd(struct renesas_usb3 *usb3,
 
 	usb3_for_each_dma(usb3, dma, i) {
 		if (dma->prd) {
-			dma_free_coherent(dev, USB3_DMA_MAX_XFER_SIZE,
+			dma_free_coherent(dev, USB3_DMA_PRD_SIZE,
 					  dma->prd, dma->prd_dma);
 			dma->prd = NULL;
 		}
-- 
GitLab


From ebe6b2b81464a2c083542feb1a1a6c78a2268151 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 18 Jul 2017 21:26:41 +0900
Subject: [PATCH 1815/1958] usb: gadget: udc: renesas_usb3: fix zlp transfer by
 the dmac

The dedicated dmac can transfer a zero-length-packet (zlp) if some bits
of the USB_COM_CON register. However, the commit 2d4aa21a73ba ("usb:
gadget: udc: renesas_usb3: add support for dedicated DMAC") didn't set
the bits to 1. So, this patch fixes it.

Fixes: 2d4aa21a73b ("usb: gadget: udc: renesas_usb3: add support for dedicated DMAC)
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/renesas_usb3.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 923ad5acc4824..1cc5f0deefba2 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -89,6 +89,9 @@
 
 /* USB_COM_CON */
 #define USB_COM_CON_CONF		BIT(24)
+#define USB_COM_CON_PN_WDATAIF_NL	BIT(23)
+#define USB_COM_CON_PN_RDATAIF_NL	BIT(22)
+#define USB_COM_CON_PN_LSTTR_PP		BIT(21)
 #define USB_COM_CON_SPD_MODE		BIT(17)
 #define USB_COM_CON_EP0_EN		BIT(16)
 #define USB_COM_CON_DEV_ADDR_SHIFT	8
@@ -686,6 +689,9 @@ static void renesas_usb3_init_controller(struct renesas_usb3 *usb3)
 {
 	usb3_init_axi_bridge(usb3);
 	usb3_init_epc_registers(usb3);
+	usb3_set_bit(usb3, USB_COM_CON_PN_WDATAIF_NL |
+		     USB_COM_CON_PN_RDATAIF_NL | USB_COM_CON_PN_LSTTR_PP,
+		     USB3_USB_COM_CON);
 	usb3_write(usb3, USB_OTG_IDMON, USB3_USB_OTG_INT_STA);
 	usb3_write(usb3, USB_OTG_IDMON, USB3_USB_OTG_INT_ENA);
 
-- 
GitLab


From 781001ff9678e5df048cca3d4290c1638a28cafa Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 18 Jul 2017 21:26:42 +0900
Subject: [PATCH 1816/1958] usb: gadget: udc: renesas_usb3: protect
 usb3_ep->started in usb3_start_pipen()

This patch fixes an issue that unexpected behavior happens when
both the interrupt handler and renesas_usb3_ep_enable() are called.
In this case, since usb3_start_pipen() checked the usb3_ep->started,
but the flags was not protected. So, this patch protects the flag
by usb3->lock. Since renesas_usb3_ep_enable() for EP0 will be not called,
this patch doesn't take care of usb3_start_pipe0().

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/renesas_usb3.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 1cc5f0deefba2..62dc9c7798e78 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -1415,12 +1415,12 @@ static void usb3_start_pipen(struct renesas_usb3_ep *usb3_ep,
 	int ret = -EAGAIN;
 	u32 enable_bits = 0;
 
+	spin_lock_irqsave(&usb3->lock, flags);
 	if (usb3_ep->halt || usb3_ep->started)
-		return;
+		goto out;
 	if (usb3_req != usb3_req_first)
-		return;
+		goto out;
 
-	spin_lock_irqsave(&usb3->lock, flags);
 	if (usb3_pn_change(usb3, usb3_ep->num) < 0)
 		goto out;
 
-- 
GitLab


From 84969465ddc4f8aeb3b993123b571aa01c5f2683 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 21 Jun 2017 15:02:47 +0200
Subject: [PATCH 1817/1958] hfsplus: Don't clear SGID when inheriting ACLs

When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit
set, DIR1 is expected to have SGID bit set (and owning group equal to
the owning group of 'DIR0'). However when 'DIR0' also has some default
ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on
'DIR1' to get cleared if user is not member of the owning group.

Fix the problem by creating __hfsplus_set_posix_acl() function that does
not call posix_acl_update_mode() and use it when inheriting ACLs. That
prevents SGID bit clearing and the mode has been properly set by
posix_acl_create() anyway.

Fixes: 073931017b49d9458aa351605b43a7e34598caef
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/hfsplus/posix_acl.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
index 9b92058a12409..6bb5d7c42888f 100644
--- a/fs/hfsplus/posix_acl.c
+++ b/fs/hfsplus/posix_acl.c
@@ -51,8 +51,8 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
 	return acl;
 }
 
-int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
-		int type)
+static int __hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
+				   int type)
 {
 	int err;
 	char *xattr_name;
@@ -64,12 +64,6 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
-		if (acl) {
-			err = posix_acl_update_mode(inode, &inode->i_mode, &acl);
-			if (err)
-				return err;
-		}
-		err = 0;
 		break;
 
 	case ACL_TYPE_DEFAULT:
@@ -105,6 +99,18 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
 	return err;
 }
 
+int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	int err;
+
+	if (type == ACL_TYPE_ACCESS && acl) {
+		err = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+		if (err)
+			return err;
+	}
+	return __hfsplus_set_posix_acl(inode, acl, type);
+}
+
 int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
 {
 	int err = 0;
@@ -122,15 +128,15 @@ int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
 		return err;
 
 	if (default_acl) {
-		err = hfsplus_set_posix_acl(inode, default_acl,
-					    ACL_TYPE_DEFAULT);
+		err = __hfsplus_set_posix_acl(inode, default_acl,
+					      ACL_TYPE_DEFAULT);
 		posix_acl_release(default_acl);
 	}
 
 	if (acl) {
 		if (!err)
-			err = hfsplus_set_posix_acl(inode, acl,
-						    ACL_TYPE_ACCESS);
+			err = __hfsplus_set_posix_acl(inode, acl,
+						      ACL_TYPE_ACCESS);
 		posix_acl_release(acl);
 	}
 	return err;
-- 
GitLab


From e5dadc65f9e0177eb649bcd9d333f1ebf871223e Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Mon, 17 Jul 2017 18:34:42 +0800
Subject: [PATCH 1818/1958] ppp: Fix false xmit recursion detect with two ppp
 devices

The global percpu variable ppp_xmit_recursion is used to detect the ppp
xmit recursion to avoid the deadlock, which is caused by one CPU tries to
lock the xmit lock twice. But it would report false recursion when one CPU
wants to send the skb from two different PPP devices, like one L2TP on the
PPPoE. It is a normal case actually.

Now use one percpu member of struct ppp instead of the gloable variable to
detect the xmit recursion of one ppp device.

Fixes: 55454a565836 ("ppp: avoid dealock on recursive xmit")
Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Liu Jianying <jianying.liu@ikuai8.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_generic.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 13028833bee39..bd4303944e440 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -120,6 +120,7 @@ struct ppp {
 	int		n_channels;	/* how many channels are attached 54 */
 	spinlock_t	rlock;		/* lock for receive side 58 */
 	spinlock_t	wlock;		/* lock for transmit side 5c */
+	int		*xmit_recursion __percpu; /* xmit recursion detect */
 	int		mru;		/* max receive unit 60 */
 	unsigned int	flags;		/* control bits 64 */
 	unsigned int	xstate;		/* transmit state bits 68 */
@@ -1025,6 +1026,7 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev,
 	struct ppp *ppp = netdev_priv(dev);
 	int indx;
 	int err;
+	int cpu;
 
 	ppp->dev = dev;
 	ppp->ppp_net = src_net;
@@ -1039,6 +1041,15 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev,
 	INIT_LIST_HEAD(&ppp->channels);
 	spin_lock_init(&ppp->rlock);
 	spin_lock_init(&ppp->wlock);
+
+	ppp->xmit_recursion = alloc_percpu(int);
+	if (!ppp->xmit_recursion) {
+		err = -ENOMEM;
+		goto err1;
+	}
+	for_each_possible_cpu(cpu)
+		(*per_cpu_ptr(ppp->xmit_recursion, cpu)) = 0;
+
 #ifdef CONFIG_PPP_MULTILINK
 	ppp->minseq = -1;
 	skb_queue_head_init(&ppp->mrq);
@@ -1050,11 +1061,15 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev,
 
 	err = ppp_unit_register(ppp, conf->unit, conf->ifname_is_set);
 	if (err < 0)
-		return err;
+		goto err2;
 
 	conf->file->private_data = &ppp->file;
 
 	return 0;
+err2:
+	free_percpu(ppp->xmit_recursion);
+err1:
+	return err;
 }
 
 static const struct nla_policy ppp_nl_policy[IFLA_PPP_MAX + 1] = {
@@ -1400,18 +1415,16 @@ static void __ppp_xmit_process(struct ppp *ppp)
 	ppp_xmit_unlock(ppp);
 }
 
-static DEFINE_PER_CPU(int, ppp_xmit_recursion);
-
 static void ppp_xmit_process(struct ppp *ppp)
 {
 	local_bh_disable();
 
-	if (unlikely(__this_cpu_read(ppp_xmit_recursion)))
+	if (unlikely(*this_cpu_ptr(ppp->xmit_recursion)))
 		goto err;
 
-	__this_cpu_inc(ppp_xmit_recursion);
+	(*this_cpu_ptr(ppp->xmit_recursion))++;
 	__ppp_xmit_process(ppp);
-	__this_cpu_dec(ppp_xmit_recursion);
+	(*this_cpu_ptr(ppp->xmit_recursion))--;
 
 	local_bh_enable();
 
@@ -1905,7 +1918,7 @@ static void __ppp_channel_push(struct channel *pch)
 		read_lock(&pch->upl);
 		ppp = pch->ppp;
 		if (ppp)
-			__ppp_xmit_process(ppp);
+			ppp_xmit_process(ppp);
 		read_unlock(&pch->upl);
 	}
 }
@@ -1914,9 +1927,7 @@ static void ppp_channel_push(struct channel *pch)
 {
 	local_bh_disable();
 
-	__this_cpu_inc(ppp_xmit_recursion);
 	__ppp_channel_push(pch);
-	__this_cpu_dec(ppp_xmit_recursion);
 
 	local_bh_enable();
 }
@@ -3057,6 +3068,7 @@ static void ppp_destroy_interface(struct ppp *ppp)
 #endif /* CONFIG_PPP_FILTER */
 
 	kfree_skb(ppp->xmit_pending);
+	free_percpu(ppp->xmit_recursion);
 
 	free_netdev(ppp->dev);
 }
-- 
GitLab


From 18bcf2907df935981266532e1e0d052aff2e6fae Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Mon, 17 Jul 2017 12:35:58 +0200
Subject: [PATCH 1819/1958] ipv4: ipv6: initialize treq->txhash in
 cookie_v[46]_check()

KMSAN reported use of uninitialized memory in skb_set_hash_from_sk(),
which originated from the TCP request socket created in
cookie_v6_check():

 ==================================================================
 BUG: KMSAN: use of uninitialized memory in tcp_transmit_skb+0xf77/0x3ec0
 CPU: 1 PID: 2949 Comm: syz-execprog Not tainted 4.11.0-rc5+ #2931
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
 TCP: request_sock_TCPv6: Possible SYN flooding on port 20028. Sending cookies.  Check SNMP counters.
 Call Trace:
  <IRQ>
  __dump_stack lib/dump_stack.c:16
  dump_stack+0x172/0x1c0 lib/dump_stack.c:52
  kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
  __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
  skb_set_hash_from_sk ./include/net/sock.h:2011
  tcp_transmit_skb+0xf77/0x3ec0 net/ipv4/tcp_output.c:983
  tcp_send_ack+0x75b/0x830 net/ipv4/tcp_output.c:3493
  tcp_delack_timer_handler+0x9a6/0xb90 net/ipv4/tcp_timer.c:284
  tcp_delack_timer+0x1b0/0x310 net/ipv4/tcp_timer.c:309
  call_timer_fn+0x240/0x520 kernel/time/timer.c:1268
  expire_timers kernel/time/timer.c:1307
  __run_timers+0xc13/0xf10 kernel/time/timer.c:1601
  run_timer_softirq+0x36/0xa0 kernel/time/timer.c:1614
  __do_softirq+0x485/0x942 kernel/softirq.c:284
  invoke_softirq kernel/softirq.c:364
  irq_exit+0x1fa/0x230 kernel/softirq.c:405
  exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:657
  smp_apic_timer_interrupt+0x5a/0x80 arch/x86/kernel/apic/apic.c:966
  apic_timer_interrupt+0x86/0x90 arch/x86/entry/entry_64.S:489
 RIP: 0010:native_restore_fl ./arch/x86/include/asm/irqflags.h:36
 RIP: 0010:arch_local_irq_restore ./arch/x86/include/asm/irqflags.h:77
 RIP: 0010:__msan_poison_alloca+0xed/0x120 mm/kmsan/kmsan_instr.c:440
 RSP: 0018:ffff880024917cd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10
 RAX: 0000000000000246 RBX: ffff8800224c0000 RCX: 0000000000000005
 RDX: 0000000000000004 RSI: ffff880000000000 RDI: ffffea0000b6d770
 RBP: ffff880024917d58 R08: 0000000000000dd8 R09: 0000000000000004
 R10: 0000160000000000 R11: 0000000000000000 R12: ffffffff85abf810
 R13: ffff880024917dd8 R14: 0000000000000010 R15: ffffffff81cabde4
  </IRQ>
  poll_select_copy_remaining+0xac/0x6b0 fs/select.c:293
  SYSC_select+0x4b4/0x4e0 fs/select.c:653
  SyS_select+0x76/0xa0 fs/select.c:634
  entry_SYSCALL_64_fastpath+0x13/0x94 arch/x86/entry/entry_64.S:204
 RIP: 0033:0x4597e7
 RSP: 002b:000000c420037ee0 EFLAGS: 00000246 ORIG_RAX: 0000000000000017
 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004597e7
 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
 RBP: 000000c420037ef0 R08: 000000c420037ee0 R09: 0000000000000059
 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000042dc20
 R13: 00000000000000f3 R14: 0000000000000030 R15: 0000000000000003
 chained origin:
  save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
  kmsan_save_stack mm/kmsan/kmsan.c:317
  kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547
  __msan_store_shadow_origin_4+0xac/0x110 mm/kmsan/kmsan_instr.c:259
  tcp_create_openreq_child+0x709/0x1ae0 net/ipv4/tcp_minisocks.c:472
  tcp_v6_syn_recv_sock+0x7eb/0x2a30 net/ipv6/tcp_ipv6.c:1103
  tcp_get_cookie_sock+0x136/0x5f0 net/ipv4/syncookies.c:212
  cookie_v6_check+0x17a9/0x1b50 net/ipv6/syncookies.c:245
  tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989
  tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298
  tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487
  ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
  NF_HOOK ./include/linux/netfilter.h:257
  ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
  dst_input ./include/net/dst.h:492
  ip6_rcv_finish net/ipv6/ip6_input.c:69
  NF_HOOK ./include/linux/netfilter.h:257
  ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
  __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
  __netif_receive_skb net/core/dev.c:4246
  process_backlog+0x667/0xba0 net/core/dev.c:4866
  napi_poll net/core/dev.c:5268
  net_rx_action+0xc95/0x1590 net/core/dev.c:5333
  __do_softirq+0x485/0x942 kernel/softirq.c:284
 origin:
  save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
  kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
  kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337
  kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766
  reqsk_alloc ./include/net/request_sock.h:87
  inet_reqsk_alloc+0xa4/0x5b0 net/ipv4/tcp_input.c:6200
  cookie_v6_check+0x4f4/0x1b50 net/ipv6/syncookies.c:169
  tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989
  tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298
  tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487
  ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
  NF_HOOK ./include/linux/netfilter.h:257
  ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
  dst_input ./include/net/dst.h:492
  ip6_rcv_finish net/ipv6/ip6_input.c:69
  NF_HOOK ./include/linux/netfilter.h:257
  ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
  __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
  __netif_receive_skb net/core/dev.c:4246
  process_backlog+0x667/0xba0 net/core/dev.c:4866
  napi_poll net/core/dev.c:5268
  net_rx_action+0xc95/0x1590 net/core/dev.c:5333
  __do_softirq+0x485/0x942 kernel/softirq.c:284
 ==================================================================

Similar error is reported for cookie_v4_check().

Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets")
Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 1 +
 net/ipv6/syncookies.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 0905cf04c2a4e..03ad8778c3953 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -335,6 +335,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
 	treq->ts_off		= 0;
+	treq->txhash		= net_tx_rndhash();
 	req->mss		= mss;
 	ireq->ir_num		= ntohs(th->dest);
 	ireq->ir_rmt_port	= th->source;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7b75b06207308..4e7817abc0b93 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -216,6 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 	treq->ts_off = 0;
+	treq->txhash = net_tx_rndhash();
 
 	/*
 	 * We need to lookup the dst_entry to get the correct window size.
-- 
GitLab


From 799f917233f6ed242ee9416bf80b14819f0c97f3 Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Mon, 17 Jul 2017 19:42:41 +0200
Subject: [PATCH 1820/1958] atm: zatm: Fix an error handling path in
 'zatm_init_one()'

If 'dma_set_mask_and_coherent()' fails, we must undo the previous
'pci_request_regions()' call.
Adjust corresponding 'goto' to jump at the right place of the error
handling path.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/zatm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 292dec18ffb87..07bdd51b3b9aa 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1613,7 +1613,7 @@ static int zatm_init_one(struct pci_dev *pci_dev,
 
 	ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
 	if (ret < 0)
-		goto out_disable;
+		goto out_release;
 
 	zatm_dev->pci_dev = pci_dev;
 	dev->dev_data = zatm_dev;
-- 
GitLab


From 0ddf3fb2c43d2e65aee5de158ed694ea11ef229d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 18 Jul 2017 11:57:55 +0200
Subject: [PATCH 1821/1958] udp: preserve skb->dst if required for IP options
 processing

Eric noticed that in udp_recvmsg() we still need to access
skb->dst while processing the IP options.
Since commit 0a463c78d25b ("udp: avoid a cache miss on dequeue")
skb->dst is no more available at recvmsg() time and bad things
will happen if we enter the relevant code path.

This commit address the issue, avoid clearing skb->dst if
any IP options are present into the relevant skb.
Since the IP CB is contained in the first skb cacheline, we can
test it to decide to leverage the consume_stateless_skb()
optimization, without measurable additional cost in the faster
path.

v1 -> v2: updated commit message tags

Fixes: 0a463c78d25b ("udp: avoid a cache miss on dequeue")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 25294d43e1470..b057653ceca92 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1388,6 +1388,11 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 		unlock_sock_fast(sk, slow);
 	}
 
+	/* we cleared the head states previously only if the skb lacks any IP
+	 * options, see __udp_queue_rcv_skb().
+	 */
+	if (unlikely(IPCB(skb)->opt.optlen > 0))
+		skb_release_head_state(skb);
 	consume_stateless_skb(skb);
 }
 EXPORT_SYMBOL_GPL(skb_consume_udp);
@@ -1779,8 +1784,12 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id_once(sk, skb);
 	}
 
-	/* clear all pending head states while they are hot in the cache */
-	skb_release_head_state(skb);
+	/* At recvmsg() time we need skb->dst to process IP options-related
+	 * cmsg, elsewhere can we clear all pending head states while they are
+	 * hot in the cache
+	 */
+	if (likely(IPCB(skb)->opt.optlen == 0))
+		skb_release_head_state(skb);
 
 	rc = __udp_enqueue_schedule_skb(sk, skb);
 	if (rc < 0) {
-- 
GitLab


From 512f9e790897e84d5b802436768508ee4628fc16 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sat, 15 Jul 2017 22:07:40 +0200
Subject: [PATCH 1822/1958] irqchip/gic/realview: Drop unnecessary static

Drop static on a local variable, when the variable is initialized before
any possible use.  Thus, the static has no benefit.

The semantic patch that fixes this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@bad exists@
position p;
identifier x;
type T;
@@
static T x@p;
...
x = <+...x...+>

@@
identifier x;
expression e;
type T;
position p != bad.p;
@@
-static
 T x@p;
 ... when != x
     when strict
?x = e;
// </smpl>


Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: kernel-janitors@vger.kernel.org
Cc: keescook@chromium.org
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1500149266-32357-6-git-send-email-Julia.Lawall@lip6.fr
---
 drivers/irqchip/irq-gic-realview.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c
index 54c296401525c..18d58d2b4ffeb 100644
--- a/drivers/irqchip/irq-gic-realview.c
+++ b/drivers/irqchip/irq-gic-realview.c
@@ -43,7 +43,7 @@ static const struct of_device_id syscon_pldset_of_match[] = {
 static int __init
 realview_gic_of_init(struct device_node *node, struct device_node *parent)
 {
-	static struct regmap *map;
+	struct regmap *map;
 	struct device_node *np;
 	const struct of_device_id *gic_id;
 	u32 pld1_ctrl;
-- 
GitLab


From 82faeffa7e130e2ae43aa681a34c02d56dabd177 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sat, 15 Jul 2017 22:07:41 +0200
Subject: [PATCH 1823/1958] irqchip/mips-cpu: Drop unnecessary static

Drop static on a local variable, when the variable is initialized before
any possible use.  Thus, the static has no benefit.

The semantic patch that fixes this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@bad exists@
position p;
identifier x;
type T;
@@
static T x@p;
...
x = <+...x...+>

@@
identifier x;
expression e;
type T;
position p != bad.p;
@@
-static
 T x@p;
 ... when != x
     when strict
?x = e;
// </smpl>


Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: kernel-janitors@vger.kernel.org
Cc: keescook@chromium.org
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1500149266-32357-7-git-send-email-Julia.Lawall@lip6.fr
---
 drivers/irqchip/irq-mips-cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-mips-cpu.c b/drivers/irqchip/irq-mips-cpu.c
index 0a8ed1c05518a..14461cbfab2fa 100644
--- a/drivers/irqchip/irq-mips-cpu.c
+++ b/drivers/irqchip/irq-mips-cpu.c
@@ -154,7 +154,7 @@ asmlinkage void __weak plat_irq_dispatch(void)
 static int mips_cpu_intc_map(struct irq_domain *d, unsigned int irq,
 			     irq_hw_number_t hw)
 {
-	static struct irq_chip *chip;
+	struct irq_chip *chip;
 
 	if (hw < 2 && cpu_has_mipsmt) {
 		/* Software interrupts are used for MT/CMT IPI */
-- 
GitLab


From acc80c39929b9f2ff8b45fcfe103385a3e45c1a7 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sat, 15 Jul 2017 22:07:45 +0200
Subject: [PATCH 1824/1958] irqchip/digicolor: Drop unnecessary static

Drop static on a local variable, when the variable is initialized before
any possible use.  Thus, the static has no benefit.

The semantic patch that fixes this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@bad exists@
position p;
identifier x;
type T;
@@
static T x@p;
...
x = <+...x...+>

@@
identifier x;
expression e;
type T;
position p != bad.p;
@@
-static
 T x@p;
 ... when != x
     when strict
?x = e;
// </smpl>


Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Baruch Siach <baruch@tkos.co.il>
Cc: keescook@chromium.org
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: kernel-janitors@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1500149266-32357-11-git-send-email-Julia.Lawall@lip6.fr
---
 drivers/irqchip/irq-digicolor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-digicolor.c b/drivers/irqchip/irq-digicolor.c
index dad85e74c37c0..3aae015469a51 100644
--- a/drivers/irqchip/irq-digicolor.c
+++ b/drivers/irqchip/irq-digicolor.c
@@ -71,7 +71,7 @@ static void __init digicolor_set_gc(void __iomem *reg_base, unsigned irq_base,
 static int __init digicolor_of_init(struct device_node *node,
 				struct device_node *parent)
 {
-	static void __iomem *reg_base;
+	void __iomem *reg_base;
 	unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
 	struct regmap *ucregs;
 	int ret;
-- 
GitLab


From 073dd5ad34b1d3aaadaa7e5e8cbe576d9545f163 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 18 Jul 2017 22:38:56 +0300
Subject: [PATCH 1825/1958] netfilter: fix netfilter_net_init() return

We accidentally return an uninitialized variable.

Fixes: cf56c2f892a8 ("netfilter: remove old pre-netns era hook api")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 368610dbc3c00..974cf2a3795aa 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -344,7 +344,7 @@ EXPORT_SYMBOL(nf_nat_decode_session_hook);
 
 static int __net_init netfilter_net_init(struct net *net)
 {
-	int i, h, ret;
+	int i, h;
 
 	for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
@@ -362,7 +362,7 @@ static int __net_init netfilter_net_init(struct net *net)
 	}
 #endif
 
-	return ret;
+	return 0;
 }
 
 static void __net_exit netfilter_net_exit(struct net *net)
-- 
GitLab


From bbb3be170ac2891526ad07b18af7db226879a8e7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 18 Jul 2017 17:49:14 -0700
Subject: [PATCH 1826/1958] device-dax: fix sysfs duplicate warnings

Fix warnings of the form...

     WARNING: CPU: 10 PID: 4983 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x62/0x80
     sysfs: cannot create duplicate filename '/class/dax/dax12.0'
     Call Trace:
      dump_stack+0x63/0x86
      __warn+0xcb/0xf0
      warn_slowpath_fmt+0x5a/0x80
      ? kernfs_path_from_node+0x4f/0x60
      sysfs_warn_dup+0x62/0x80
      sysfs_do_create_link_sd.isra.2+0x97/0xb0
      sysfs_create_link+0x25/0x40
      device_add+0x266/0x630
      devm_create_dax_dev+0x2cf/0x340 [dax]
      dax_pmem_probe+0x1f5/0x26e [dax_pmem]
      nvdimm_bus_probe+0x71/0x120

...by reusing the namespace id for the device-dax instance name.

Now that we have decided that there will never by more than one
device-dax instance per libnvdimm-namespace parent device [1], we can
directly reuse the namepace ids. There are some possible follow-on
cleanups, but those are saved for a later patch to simplify the -stable
backport.

[1]: https://lists.01.org/pipermail/linux-nvdimm/2016-December/008266.html

Fixes: 98a29c39dc68 ("libnvdimm, namespace: allow creation of multiple pmem...")
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: <stable@vger.kernel.org>
Reported-by: Dariusz Dokupil <dariusz.dokupil@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/device-dax.h |  2 +-
 drivers/dax/device.c     | 24 ++++++++++++++++--------
 drivers/dax/pmem.c       | 12 +++++++-----
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/dax/device-dax.h b/drivers/dax/device-dax.h
index fdcd9769ffded..688b051750bd7 100644
--- a/drivers/dax/device-dax.h
+++ b/drivers/dax/device-dax.h
@@ -21,5 +21,5 @@ struct dax_region *alloc_dax_region(struct device *parent,
 		int region_id, struct resource *res, unsigned int align,
 		void *addr, unsigned long flags);
 struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
-		struct resource *res, int count);
+		int id, struct resource *res, int count);
 #endif /* __DEVICE_DAX_H__ */
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 44d72e5e64cc2..e9f3b3e4bbf45 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -529,7 +529,8 @@ static void dev_dax_release(struct device *dev)
 	struct dax_region *dax_region = dev_dax->region;
 	struct dax_device *dax_dev = dev_dax->dax_dev;
 
-	ida_simple_remove(&dax_region->ida, dev_dax->id);
+	if (dev_dax->id >= 0)
+		ida_simple_remove(&dax_region->ida, dev_dax->id);
 	dax_region_put(dax_region);
 	put_dax(dax_dev);
 	kfree(dev_dax);
@@ -559,7 +560,7 @@ static void unregister_dev_dax(void *dev)
 }
 
 struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
-		struct resource *res, int count)
+		int id, struct resource *res, int count)
 {
 	struct device *parent = dax_region->dev;
 	struct dax_device *dax_dev;
@@ -590,10 +591,16 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
 	if (i < count)
 		goto err_id;
 
-	dev_dax->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
-	if (dev_dax->id < 0) {
-		rc = dev_dax->id;
-		goto err_id;
+	if (id < 0) {
+		id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
+		dev_dax->id = id;
+		if (id < 0) {
+			rc = id;
+			goto err_id;
+		}
+	} else {
+		/* region provider owns @id lifetime */
+		dev_dax->id = -1;
 	}
 
 	/*
@@ -625,7 +632,7 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
 	dev->parent = parent;
 	dev->groups = dax_attribute_groups;
 	dev->release = dev_dax_release;
-	dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
+	dev_set_name(dev, "dax%d.%d", dax_region->id, id);
 
 	rc = cdev_device_add(cdev, dev);
 	if (rc) {
@@ -641,7 +648,8 @@ struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
 	return dev_dax;
 
  err_dax:
-	ida_simple_remove(&dax_region->ida, dev_dax->id);
+	if (dev_dax->id >= 0)
+		ida_simple_remove(&dax_region->ida, dev_dax->id);
  err_id:
 	kfree(dev_dax);
 
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 9f2a0b4fd8012..8d8c852ba8f20 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -58,13 +58,12 @@ static void dax_pmem_percpu_kill(void *data)
 
 static int dax_pmem_probe(struct device *dev)
 {
-	int rc;
 	void *addr;
 	struct resource res;
+	int rc, id, region_id;
 	struct nd_pfn_sb *pfn_sb;
 	struct dev_dax *dev_dax;
 	struct dax_pmem *dax_pmem;
-	struct nd_region *nd_region;
 	struct nd_namespace_io *nsio;
 	struct dax_region *dax_region;
 	struct nd_namespace_common *ndns;
@@ -123,14 +122,17 @@ static int dax_pmem_probe(struct device *dev)
 	/* adjust the dax_region resource to the start of data */
 	res.start += le64_to_cpu(pfn_sb->dataoff);
 
-	nd_region = to_nd_region(dev->parent);
-	dax_region = alloc_dax_region(dev, nd_region->id, &res,
+	rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
+	if (rc != 2)
+		return -EINVAL;
+
+	dax_region = alloc_dax_region(dev, region_id, &res,
 			le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
 	if (!dax_region)
 		return -ENOMEM;
 
 	/* TODO: support for subdividing a dax region... */
-	dev_dax = devm_create_dev_dax(dax_region, &res, 1);
+	dev_dax = devm_create_dev_dax(dax_region, id, &res, 1);
 
 	/* child dev_dax instances now own the lifetime of the dax_region */
 	dax_region_put(dax_region);
-- 
GitLab


From 59a0879a0e17b2e43ecdc5e3299da85b8410d7ce Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 19 Jul 2017 16:16:54 +0900
Subject: [PATCH 1827/1958] usb: renesas_usbhs: fix usbhsc_resume() for
 !USBHSF_RUNTIME_PWCTRL

This patch fixes an issue that some registers may be not initialized
after resume if the USBHSF_RUNTIME_PWCTRL is not set. Otherwise,
if a cable is not connected, the driver will not enable INTENB0.VBSE
after resume. And then, the driver cannot detect the VBUS.

Fixes: ca8a282a5373 ("usb: gadget: renesas_usbhs: add suspend/resume support")
Cc: <stable@vger.kernel.org> # v3.2+
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/renesas_usbhs/common.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 623c513003930..f0ce304c5aaf5 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -752,8 +752,10 @@ static int usbhsc_resume(struct device *dev)
 	struct usbhs_priv *priv = dev_get_drvdata(dev);
 	struct platform_device *pdev = usbhs_priv_to_pdev(priv);
 
-	if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL))
+	if (!usbhsc_flags_has(priv, USBHSF_RUNTIME_PWCTRL)) {
 		usbhsc_power_ctrl(priv, 1);
+		usbhs_mod_autonomy_mode(priv);
+	}
 
 	usbhs_platform_call(priv, phy_reset, pdev);
 
-- 
GitLab


From b8b9c974afee685789fcbb191b52d1790be3608c Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 19 Jul 2017 16:16:55 +0900
Subject: [PATCH 1828/1958] usb: renesas_usbhs: gadget: disable all eps when
 the driver stops

A gadget driver will not disable eps immediately when ->disconnect()
is called. But, since this driver assumes all eps stop after
the ->disconnect(), unexpected behavior happens (especially in system
suspend).
So, this patch disables all eps in usbhsg_try_stop(). After disabling
eps by renesas_usbhs driver, since some functions will be called by
both a gadget and renesas_usbhs driver, renesas_usbhs driver should
protect uep->pipe. To protect uep->pipe easily, this patch adds a new
lock in struct usbhsg_uep.

Fixes: 2f98382dc ("usb: renesas_usbhs: Add Renesas USBHS Gadget")
Cc: <stable@vger.kernel.org> # v3.0+
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/renesas_usbhs/mod_gadget.c | 31 ++++++++++++++++++++------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 5bc7a6138855e..93fba9033b00a 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -37,6 +37,7 @@ struct usbhsg_gpriv;
 struct usbhsg_uep {
 	struct usb_ep		 ep;
 	struct usbhs_pipe	*pipe;
+	spinlock_t		lock;	/* protect the pipe */
 
 	char ep_name[EP_NAME_SIZE];
 
@@ -636,10 +637,16 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
 static int usbhsg_ep_disable(struct usb_ep *ep)
 {
 	struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep);
-	struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(uep);
+	struct usbhs_pipe *pipe;
+	unsigned long flags;
+	int ret = 0;
 
-	if (!pipe)
-		return -EINVAL;
+	spin_lock_irqsave(&uep->lock, flags);
+	pipe = usbhsg_uep_to_pipe(uep);
+	if (!pipe) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	usbhsg_pipe_disable(uep);
 	usbhs_pipe_free(pipe);
@@ -647,6 +654,9 @@ static int usbhsg_ep_disable(struct usb_ep *ep)
 	uep->pipe->mod_private	= NULL;
 	uep->pipe		= NULL;
 
+out:
+	spin_unlock_irqrestore(&uep->lock, flags);
+
 	return 0;
 }
 
@@ -696,8 +706,11 @@ static int usbhsg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
 {
 	struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep);
 	struct usbhsg_request *ureq = usbhsg_req_to_ureq(req);
-	struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(uep);
+	struct usbhs_pipe *pipe;
+	unsigned long flags;
 
+	spin_lock_irqsave(&uep->lock, flags);
+	pipe = usbhsg_uep_to_pipe(uep);
 	if (pipe)
 		usbhs_pkt_pop(pipe, usbhsg_ureq_to_pkt(ureq));
 
@@ -706,6 +719,7 @@ static int usbhsg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
 	 * even if the pipe is NULL.
 	 */
 	usbhsg_queue_pop(uep, ureq, -ECONNRESET);
+	spin_unlock_irqrestore(&uep->lock, flags);
 
 	return 0;
 }
@@ -852,10 +866,10 @@ static int usbhsg_try_stop(struct usbhs_priv *priv, u32 status)
 {
 	struct usbhsg_gpriv *gpriv = usbhsg_priv_to_gpriv(priv);
 	struct usbhs_mod *mod = usbhs_mod_get_current(priv);
-	struct usbhsg_uep *dcp = usbhsg_gpriv_to_dcp(gpriv);
+	struct usbhsg_uep *uep;
 	struct device *dev = usbhs_priv_to_dev(priv);
 	unsigned long flags;
-	int ret = 0;
+	int ret = 0, i;
 
 	/********************  spin lock ********************/
 	usbhs_lock(priv, flags);
@@ -887,7 +901,9 @@ static int usbhsg_try_stop(struct usbhs_priv *priv, u32 status)
 	usbhs_sys_set_test_mode(priv, 0);
 	usbhs_sys_function_ctrl(priv, 0);
 
-	usbhsg_ep_disable(&dcp->ep);
+	/* disable all eps */
+	usbhsg_for_each_uep_with_dcp(uep, gpriv, i)
+		usbhsg_ep_disable(&uep->ep);
 
 	dev_dbg(dev, "stop gadget\n");
 
@@ -1069,6 +1085,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv)
 		ret = -ENOMEM;
 		goto usbhs_mod_gadget_probe_err_gpriv;
 	}
+	spin_lock_init(&uep->lock);
 
 	gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED);
 	dev_info(dev, "%stransceiver found\n",
-- 
GitLab


From f85c758dbee54cc3612a6e873ef7cecdb66ebee5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 17 Jul 2017 11:14:26 +0300
Subject: [PATCH 1829/1958] KVM: x86: masking out upper bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kvm_read_cr3() returns an unsigned long and gfn is a u64.  We intended
to mask out the bottom 5 bits but because of the type issue we mask the
top 32 bits as well.  I don't know if this is a real problem, but it
causes static checker warnings.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5b8f07889f6a5..82a63c59f77b5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -597,8 +597,8 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
 		      (unsigned long *)&vcpu->arch.regs_avail))
 		return true;
 
-	gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
-	offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
+	gfn = (kvm_read_cr3(vcpu) & ~31ul) >> PAGE_SHIFT;
+	offset = (kvm_read_cr3(vcpu) & ~31ul) & (PAGE_SIZE - 1);
 	r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
 				       PFERR_USER_MASK | PFERR_WRITE_MASK);
 	if (r < 0)
-- 
GitLab


From 848618857d2535176037bdc085f8d012d907071f Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Wed, 12 Jul 2017 19:14:16 -0700
Subject: [PATCH 1830/1958] tracing/ring_buffer: Try harder to allocate

ftrace can fail to allocate per-CPU ring buffer on systems with a large
number of CPUs coupled while large amounts of cache happening in the
page cache. Currently the ring buffer allocation doesn't retry in the VM
implementation even if direct-reclaim made some progress but still
wasn't able to find a free page. On retrying I see that the allocations
almost always succeed. The retry doesn't happen because __GFP_NORETRY is
used in the tracer to prevent the case where we might OOM, however if we
drop __GFP_NORETRY, we risk destabilizing the system if OOM killer is
triggered. To prevent this situation, use the __GFP_RETRY_MAYFAIL flag
introduced recently [1].

Tested the following still succeeds without destabilizing a system with
1GB memory.
echo 300000 > /sys/kernel/debug/tracing/buffer_size_kb

[1] https://marc.info/?l=linux-mm&m=149820805124906&w=2

Link: http://lkml.kernel.org/r/20170713021416.8897-1-joelaf@google.com

Cc: Tim Murray <timmurray@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 4ae268e687fe1..529cc50d7243d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1136,12 +1136,12 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page;
 		/*
-		 * __GFP_NORETRY flag makes sure that the allocation fails
-		 * gracefully without invoking oom-killer and the system is
-		 * not destabilized.
+		 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
+		 * gracefully without invoking oom-killer and the system is not
+		 * destabilized.
 		 */
 		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
-				    GFP_KERNEL | __GFP_NORETRY,
+				    GFP_KERNEL | __GFP_RETRY_MAYFAIL,
 				    cpu_to_node(cpu));
 		if (!bpage)
 			goto free_pages;
@@ -1149,7 +1149,7 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
 		list_add(&bpage->list, pages);
 
 		page = alloc_pages_node(cpu_to_node(cpu),
-					GFP_KERNEL | __GFP_NORETRY, 0);
+					GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0);
 		if (!page)
 			goto free_pages;
 		bpage->page = page_address(page);
-- 
GitLab


From 975e83cfb8dc16e7a2fdc58188c77c0c605876c2 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <Sudeep.Holla@arm.com>
Date: Fri, 14 Jul 2017 11:51:48 +0100
Subject: [PATCH 1831/1958] PM / Domains: defer dev_pm_domain_set() until
 genpd->attach_dev succeeds if present

If the genpd->attach_dev or genpd->power_on fails, genpd_dev_pm_attach
may return -EPROBE_DEFER initially. However genpd_alloc_dev_data sets
the PM domain for the device unconditionally.

When subsequent attempts are made to call genpd_dev_pm_attach, it may
return -EEXISTS checking dev->pm_domain without re-attempting to call
attach_dev or power_on.

platform_drv_probe then attempts to call drv->probe as the return value
-EEXIST != -EPROBE_DEFER, which may end up in a situation where the
device is accessed without it's power domain switched on.

Fixes: f104e1e5ef57 (PM / Domains: Re-order initialization of generic_pm_domain_data)
Cc: 4.4+ <stable@vger.kernel.org> # v4.4+
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 2ac906288a6fe..91ba82688f4c3 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1222,8 +1222,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
 
 	spin_unlock_irq(&dev->power.lock);
 
-	dev_pm_domain_set(dev, &genpd->domain);
-
 	return gpd_data;
 
  err_free:
@@ -1237,8 +1235,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
 static void genpd_free_dev_data(struct device *dev,
 				struct generic_pm_domain_data *gpd_data)
 {
-	dev_pm_domain_set(dev, NULL);
-
 	spin_lock_irq(&dev->power.lock);
 
 	dev->power.subsys_data->domain_data = NULL;
@@ -1275,6 +1271,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	if (ret)
 		goto out;
 
+	dev_pm_domain_set(dev, &genpd->domain);
+
 	genpd->device_count++;
 	genpd->max_off_time_changed = true;
 
@@ -1336,6 +1334,8 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
 	if (genpd->detach_dev)
 		genpd->detach_dev(genpd, dev);
 
+	dev_pm_domain_set(dev, NULL);
+
 	list_del_init(&pdd->list_node);
 
 	genpd_unlock(genpd);
-- 
GitLab


From 4c4a6f790ee862ee9f0dc8b35c71f55bcf792b71 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 14 Jul 2017 13:36:11 +0200
Subject: [PATCH 1832/1958] KVM: nVMX: track NMI blocking state separately for
 each VMCS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vmx_recover_nmi_blocking is using a cached value of the guest
interruptibility info, which is stored in vmx->nmi_known_unmasked.
vmx_recover_nmi_blocking is run for both normal and nested guests,
so the cached value must be per-VMCS.

This fixes eventinj.flat in a nested non-EPT environment.  With EPT it
works, because the EPT violation handler doesn't have the
vmx->nmi_known_unmasked optimization (it is unnecessary because, unlike
vmx_recover_nmi_blocking, it can just look at the exit qualification).

Thanks to Wanpeng Li for debugging the testcase and providing an initial
patch.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 84e62acf2dd86..a0c472bcb2a2f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -198,7 +198,8 @@ struct loaded_vmcs {
 	struct vmcs *vmcs;
 	struct vmcs *shadow_vmcs;
 	int cpu;
-	int launched;
+	bool launched;
+	bool nmi_known_unmasked;
 	struct list_head loaded_vmcss_on_cpu_link;
 };
 
@@ -5510,10 +5511,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (!is_guest_mode(vcpu)) {
-		++vcpu->stat.nmi_injections;
-		vmx->nmi_known_unmasked = false;
-	}
+	++vcpu->stat.nmi_injections;
+	vmx->loaded_vmcs->nmi_known_unmasked = false;
 
 	if (vmx->rmode.vm86_active) {
 		if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
@@ -5527,16 +5526,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 
 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
 {
-	if (to_vmx(vcpu)->nmi_known_unmasked)
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	bool masked;
+
+	if (vmx->loaded_vmcs->nmi_known_unmasked)
 		return false;
-	return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)	& GUEST_INTR_STATE_NMI;
+	masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
+	vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+	return masked;
 }
 
 static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	vmx->nmi_known_unmasked = !masked;
+	vmx->loaded_vmcs->nmi_known_unmasked = !masked;
 	if (masked)
 		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 			      GUEST_INTR_STATE_NMI);
@@ -8736,7 +8740,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 
 	idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
-	if (vmx->nmi_known_unmasked)
+	if (vmx->loaded_vmcs->nmi_known_unmasked)
 		return;
 	/*
 	 * Can't use vmx->exit_intr_info since we're not sure what
@@ -8760,7 +8764,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 			      GUEST_INTR_STATE_NMI);
 	else
-		vmx->nmi_known_unmasked =
+		vmx->loaded_vmcs->nmi_known_unmasked =
 			!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
 			  & GUEST_INTR_STATE_NMI);
 }
-- 
GitLab


From b3f1dfb6e818a2352c38e3e37bb983eae98621b5 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Mon, 17 Jul 2017 12:00:34 -0700
Subject: [PATCH 1833/1958] KVM: nVMX: Disallow VM-entry in MOV-SS shadow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Immediately following MOV-to-SS/POP-to-SS, VM-entry is
disallowed. This check comes after the check for a valid VMCS. When
this check fails, the instruction pointer should fall through to the
next instruction, the ALU flags should be set to indicate VMfailValid,
and the VM-instruction error should be set to 26 ("VM entry with
events blocked by MOV SS").

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a0c472bcb2a2f..791c018a00343 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10492,6 +10492,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 {
 	struct vmcs12 *vmcs12;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
 	u32 exit_qual;
 	int ret;
 
@@ -10516,6 +10517,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 * for misconfigurations which will anyway be caught by the processor
 	 * when using the merged vmcs02.
 	 */
+	if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) {
+		nested_vmx_failValid(vcpu,
+				     VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
+		goto out;
+	}
+
 	if (vmcs12->launch_state == launch) {
 		nested_vmx_failValid(vcpu,
 			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
-- 
GitLab


From c2ce3f5d89d57301e2756ac325fe2ebc33bfec30 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jul 2017 14:53:04 +0200
Subject: [PATCH 1834/1958] x86: add MULTIUSER dependency for KVM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KVM tries to select 'TASKSTATS', which had additional dependencies:

warning: (KVM) selects TASKSTATS which has unmet direct dependencies (NET && MULTIUSER)

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 760433b2574a5..2688c7dc53234 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,7 +22,7 @@ config KVM
 	depends on HAVE_KVM
 	depends on HIGH_RES_TIMERS
 	# for TASKSTATS/TASK_DELAY_ACCT:
-	depends on NET
+	depends on NET && MULTIUSER
 	select PREEMPT_NOTIFIERS
 	select MMU_NOTIFIER
 	select ANON_INODES
-- 
GitLab


From b0659ae5e30074ede1dc08f2c6d64f0c11d64e0f Mon Sep 17 00:00:00 2001
From: Shu Wang <shuwang@redhat.com>
Date: Tue, 18 Jul 2017 14:37:24 +0800
Subject: [PATCH 1835/1958] audit: fix memleak in auditd_send_unicast_skb.

Found this issue by kmemleak report, auditd_send_unicast_skb
did not free skb if rcu_dereference(auditd_conn) returns null.

unreferenced object 0xffff88082568ce00 (size 256):
comm "auditd", pid 1119, jiffies 4294708499
backtrace:
[<ffffffff8176166a>] kmemleak_alloc+0x4a/0xa0
[<ffffffff8121820c>] kmem_cache_alloc_node+0xcc/0x210
[<ffffffff8161b99d>] __alloc_skb+0x5d/0x290
[<ffffffff8113c614>] audit_make_reply+0x54/0xd0
[<ffffffff8113dfa7>] audit_receive_msg+0x967/0xd70
----------------
(gdb) list *audit_receive_msg+0x967
0xffffffff8113dff7 is in audit_receive_msg (kernel/audit.c:1133).
1132    skb = audit_make_reply(0, AUDIT_REPLACE, 0,
                                0, &pvnr, sizeof(pvnr));
---------------
[<ffffffff8113e402>] audit_receive+0x52/0xa0
[<ffffffff8166c561>] netlink_unicast+0x181/0x240
[<ffffffff8166c8e2>] netlink_sendmsg+0x2c2/0x3b0
[<ffffffff816112e8>] sock_sendmsg+0x38/0x50
[<ffffffff816117a2>] SYSC_sendto+0x102/0x190
[<ffffffff81612f4e>] SyS_sendto+0xe/0x10
[<ffffffff8176d337>] entry_SYSCALL_64_fastpath+0x1a/0xa5
[<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: Shu Wang <shuwang@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/audit.c b/kernel/audit.c
index 7cad70214b81a..07def5e49cc9e 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -641,6 +641,7 @@ static int auditd_send_unicast_skb(struct sk_buff *skb)
 	ac = rcu_dereference(auditd_conn);
 	if (!ac) {
 		rcu_read_unlock();
+		kfree_skb(skb);
 		rc = -ECONNREFUSED;
 		goto err;
 	}
-- 
GitLab


From 89a6814d9b665b196aa3a102f96b6dc7e8cb669e Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Thu, 29 Jun 2017 11:48:26 -0400
Subject: [PATCH 1836/1958] mount: copy the port field into the cloned
 nfs_server structure.

Doing this copy eliminates the "port=0" entry in
the /proc/mounts entries

Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=69241

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee5ddbd36088e..efebe6cf4378e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -820,6 +820,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
 	target->caps = source->caps;
 	target->options = source->options;
 	target->auth_info = source->auth_info;
+	target->port = source->port;
 }
 EXPORT_SYMBOL_GPL(nfs_server_copy_userdata);
 
-- 
GitLab


From e39928f942ff7a9d1cb9005423e9e35a0a4bb2e0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 12 Jul 2017 19:10:56 -0400
Subject: [PATCH 1837/1958] NFS: Fix a COMMIT race in pNFS

We must make sure that cinfo->ds->nwritten is in sync with the
commit list, since it is checked as part of pnfs_scan_commit_lists().

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pnfs_nfs.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index d40755a0984bb..3e6de85faf428 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -159,13 +159,18 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst,
 {
 	struct pnfs_commit_bucket *b;
 	struct pnfs_layout_segment *freeme;
+	int nwritten;
 	int i;
 
 	lockdep_assert_held(&cinfo->inode->i_lock);
 restart:
 	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
-		if (pnfs_generic_transfer_commit_list(&b->written, dst,
-						      cinfo, 0)) {
+		nwritten = pnfs_generic_transfer_commit_list(&b->written,
+				dst, cinfo, 0);
+		if (!nwritten)
+			continue;
+		cinfo->ds->nwritten -= nwritten;
+		if (list_empty(&b->written)) {
 			freeme = b->wlseg;
 			b->wlseg = NULL;
 			spin_unlock(&cinfo->inode->i_lock);
@@ -174,7 +179,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst,
 			goto restart;
 		}
 	}
-	cinfo->ds->nwritten = 0;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
 
-- 
GitLab


From 41181886459b281ed1346369f27b3c3bb8e2dde3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 12 Jul 2017 19:10:57 -0400
Subject: [PATCH 1838/1958] NFS: Fix another COMMIT race in pNFS

We must make sure that cinfo->ds->ncommitting is in sync with the
commit list, since it is checked as part of pnfs_commit_list().

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pnfs_nfs.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 3e6de85faf428..7ceb86627e547 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -187,6 +187,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
 	struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
 	struct pnfs_commit_bucket *bucket;
 	struct pnfs_layout_segment *freeme;
+	struct list_head *pos;
 	LIST_HEAD(pages);
 	int i;
 
@@ -197,6 +198,8 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
 			continue;
 		freeme = bucket->clseg;
 		bucket->clseg = NULL;
+		list_for_each(pos, &bucket->committing)
+			cinfo->ds->ncommitting--;
 		list_splice_init(&bucket->committing, &pages);
 		spin_unlock(&cinfo->inode->i_lock);
 		nfs_retry_commit(&pages, freeme, cinfo, i);
@@ -247,9 +250,12 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
 		struct nfs_commit_info *cinfo)
 {
 	struct pnfs_commit_bucket *bucket;
+	struct list_head *pos;
 
 	bucket = &cinfo->ds->buckets[data->ds_commit_index];
 	spin_lock(&cinfo->inode->i_lock);
+	list_for_each(pos, &bucket->committing)
+		cinfo->ds->ncommitting--;
 	list_splice_init(&bucket->committing, pages);
 	data->lseg = bucket->clseg;
 	bucket->clseg = NULL;
@@ -334,7 +340,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 		}
 	}
 out:
-	cinfo->ds->ncommitting = 0;
 	return PNFS_ATTEMPTED;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
-- 
GitLab


From 4b75053e9bb6db4b700526d2d67c67a0d07f867e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 12 Jul 2017 19:10:58 -0400
Subject: [PATCH 1839/1958] pNFS/flexfiles: Handle expired layout segments in
 ff_layout_initiate_commit()

If the layout has expired due to a fencing event, then we should not
attempt to commit to the DS.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 1f2ac3dd0fe5c..b0fa83a607541 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1842,6 +1842,10 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
 	int vers, ret;
 	struct nfs_fh *fh;
 
+	if (!lseg || !(pnfs_is_valid_lseg(lseg) ||
+	    test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)))
+		goto out_err;
+
 	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
 	ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
 	if (!ds)
-- 
GitLab


From 213297369cf4900eba906dd32ce845074e30f487 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 12 Jul 2017 19:10:59 -0400
Subject: [PATCH 1840/1958] Revert commit 722f0b891198 ("pNFS: Don't send
 COMMITs to the DSes if...")

Doing the test without taking any locks is racy, and so really it makes
more sense to do it in the flexfiles code (which is the only case that
cares).

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/pnfs_nfs.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7ceb86627e547..25f28fa64c575 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -224,13 +224,6 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
 	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
 		if (list_empty(&bucket->committing))
 			continue;
-		/*
-		 * If the layout segment is invalid, then let
-		 * pnfs_generic_retry_commit() clean up the bucket.
-		 */
-		if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) &&
-		    !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags))
-			break;
 		data = nfs_commitdata_alloc(false);
 		if (!data)
 			break;
-- 
GitLab


From 98de4e0ea47d106846fc0e30ce4e644283fa7fc2 Mon Sep 17 00:00:00 2001
From: "Levin, Alexander" <alexander.levin@verizon.com>
Date: Tue, 18 Jul 2017 04:23:16 +0000
Subject: [PATCH 1841/1958] wireless: wext: terminate ifr name coming from
 userspace

ifr name is assumed to be a valid string by the kernel, but nothing
was forcing username to pass a valid string.

In turn, this would cause panics as we tried to access the string
past it's valid memory.

Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 82fd4c9c4a1bf..7657ad6bc13d1 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -424,6 +424,8 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		if (copy_from_user(&iwr, arg, sizeof(iwr)))
 			return -EFAULT;
 
+		iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+
 		return wext_handle_ioctl(net, &iwr, cmd, arg);
 	}
 
-- 
GitLab


From 63679112c536289826fec61c917621de95ba2ade Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 19 Jul 2017 13:33:24 -0700
Subject: [PATCH 1842/1958] net: Zero terminate ifr_name in dev_ifname().

The ifr.ifr_name is passed around and assumed to be NULL terminated.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7657ad6bc13d1..06b147d7d9e2e 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 
 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 		return -EFAULT;
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
 
 	error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
 	if (error)
-- 
GitLab


From 90f522a20e3d16d153e5a5f84cf4ff92281ee417 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Tue, 18 Jul 2017 17:07:15 +0300
Subject: [PATCH 1843/1958] NET: dwmac: Make dwmac reset unconditional

Unconditional reset dwmac before HW init if reset controller is present.

In existing implementation we reset dwmac only after second module
probing:
(module load -> unload -> load again [reset happens])

Now we reset dwmac at every module load:
(module load [reset happens] -> unload -> load again [reset happens])

Also some reset controllers have only reset callback instead of
assert + deassert callbacks pair, so handle this case.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1853f7ff66575..1763e48c84e20 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4120,8 +4120,15 @@ int stmmac_dvr_probe(struct device *device,
 	if ((phyaddr >= 0) && (phyaddr <= 31))
 		priv->plat->phy_addr = phyaddr;
 
-	if (priv->plat->stmmac_rst)
+	if (priv->plat->stmmac_rst) {
+		ret = reset_control_assert(priv->plat->stmmac_rst);
 		reset_control_deassert(priv->plat->stmmac_rst);
+		/* Some reset controllers have only reset callback instead of
+		 * assert + deassert callbacks pair.
+		 */
+		if (ret == -ENOTSUPP)
+			reset_control_reset(priv->plat->stmmac_rst);
+	}
 
 	/* Init MAC and get the capabilities */
 	ret = stmmac_hw_init(priv);
-- 
GitLab


From beaec533fc2701a28a4d667f67c9f59c6e4e0d13 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Wed, 19 Jul 2017 20:27:30 +0200
Subject: [PATCH 1844/1958] llist: clang: introduce member_address_is_nonnull()

Currently llist_for_each_entry() and llist_for_each_entry_safe() iterate
until &pos->member != NULL.  But when building the kernel with Clang,
the compiler assumes &pos->member cannot be NULL if the member's offset
is greater than 0 (which would be equivalent to the object being
non-contiguous in memory).  Therefore the loop condition is always true,
and the loops become infinite.

To work around this, introduce the member_address_is_nonnull() macro,
which casts object pointer to uintptr_t, thus letting the member pointer
to be NULL.

Signed-off-by: Alexander Potapenko <glider@google.com>
Tested-by: Sodagudi Prasad <psodagud@codeaurora.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/llist.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/include/linux/llist.h b/include/linux/llist.h
index d11738110a7ae..1957635e6d5f7 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -92,6 +92,23 @@ static inline void init_llist_head(struct llist_head *list)
 #define llist_entry(ptr, type, member)		\
 	container_of(ptr, type, member)
 
+/**
+ * member_address_is_nonnull - check whether the member address is not NULL
+ * @ptr:	the object pointer (struct type * that contains the llist_node)
+ * @member:	the name of the llist_node within the struct.
+ *
+ * This macro is conceptually the same as
+ *	&ptr->member != NULL
+ * but it works around the fact that compilers can decide that taking a member
+ * address is never a NULL pointer.
+ *
+ * Real objects that start at a high address and have a member at NULL are
+ * unlikely to exist, but such pointers may be returned e.g. by the
+ * container_of() macro.
+ */
+#define member_address_is_nonnull(ptr, member)	\
+	((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0)
+
 /**
  * llist_for_each - iterate over some deleted entries of a lock-less list
  * @pos:	the &struct llist_node to use as a loop cursor
@@ -145,7 +162,7 @@ static inline void init_llist_head(struct llist_head *list)
  */
 #define llist_for_each_entry(pos, node, member)				\
 	for ((pos) = llist_entry((node), typeof(*(pos)), member);	\
-	     &(pos)->member != NULL;					\
+	     member_address_is_nonnull(pos, member);			\
 	     (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
 
 /**
@@ -167,7 +184,7 @@ static inline void init_llist_head(struct llist_head *list)
  */
 #define llist_for_each_entry_safe(pos, n, node, member)			       \
 	for (pos = llist_entry((node), typeof(*pos), member);		       \
-	     &pos->member != NULL &&					       \
+	     member_address_is_nonnull(pos, member) &&			       \
 	        (n = llist_entry(pos->member.next, typeof(*n), member), true); \
 	     pos = n)
 
-- 
GitLab


From 65e3c766359992f6cbe3fa1ef0a19546ecbdf380 Mon Sep 17 00:00:00 2001
From: Arun Parameswaran <arun.parameswaran@broadcom.com>
Date: Thu, 6 Jul 2017 10:37:57 -0700
Subject: [PATCH 1845/1958] dt-binding: ptp: Add SoC compatibility strings for
 dte ptp clock

Add SoC specific compatibility strings to the Broadcom DTE
based PTP clock binding document.

Fixed the document heading and node name.

Fixes: 80d6076140b2 ("dt-binding: ptp: add bindings document for dte based ptp clock")
Signed-off-by: Arun Parameswaran <arun.parameswaran@broadcom.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/ptp/brcm,ptp-dte.txt      | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt b/Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt
index 07590bcdad15a..7c04e22a5d6af 100644
--- a/Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt
+++ b/Documentation/devicetree/bindings/ptp/brcm,ptp-dte.txt
@@ -1,13 +1,20 @@
-* Broadcom Digital Timing Engine(DTE) based PTP clock driver
+* Broadcom Digital Timing Engine(DTE) based PTP clock
 
 Required properties:
-- compatible: should be "brcm,ptp-dte"
+- compatible: should contain the core compatibility string
+              and the SoC compatibility string. The SoC
+              compatibility string is to handle SoC specific
+              hardware differences.
+              Core compatibility string:
+                 "brcm,ptp-dte"
+              SoC compatibility strings:
+                 "brcm,iproc-ptp-dte" - for iproc based SoC's
 - reg: address and length of the DTE block's NCO registers
 
 Example:
 
-ptp_dte: ptp_dte@180af650 {
-	compatible = "brcm,ptp-dte";
+ptp: ptp-dte@180af650 {
+	compatible = "brcm,iproc-ptp-dte", "brcm,ptp-dte";
 	reg = <0x180af650 0x10>;
 	status = "okay";
 };
-- 
GitLab


From bb0a2675f72b458e64f47071e8aabdb225a6af4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= <mnhu@prevas.dk>
Date: Wed, 19 Jul 2017 08:17:02 +0200
Subject: [PATCH 1846/1958] net: dsa: mv88e6xxx: Enable CMODE config support
 for 6390X
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390
ports 9 & 10') added support for setting the CMODE for the 6390X family,
but only enabled it for 9290 and 6390 - and left out 6390X.

Fix support for setting the CMODE on 6390X also by assigning
mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in
mv88e6390x_ops too.

Fixes: f39908d3b1c4 ("net: dsa: mv88e6xxx: Set the CMODE for mv88e6390 ports 9 & 10")
Signed-off-by: Martin Hundebøll <mnhu@prevas.dk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 53b088166c283..5bcdd33101b00 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3178,6 +3178,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
 	.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
 	.port_pause_limit = mv88e6390_port_pause_limit,
+	.port_set_cmode = mv88e6390x_port_set_cmode,
 	.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
 	.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
 	.stats_snapshot = mv88e6390_g1_stats_snapshot,
-- 
GitLab


From 3753654e541938717b13f2b25791c3171a3a06aa Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 19 Jul 2017 10:22:40 -0700
Subject: [PATCH 1847/1958] Revert "rtnetlink: Do not generate notifications
 for CHANGEADDR event"

This reverts commit cd8966e75ed3c6b41a37047a904617bc44fa481f.

The duplicate CHANGEADDR event message is sent regardless of link
status whereas the setlink changes only generate a notification when
the link is up. Not sending a notification when the link is down breaks
dhcpcd which only processes hwaddr changes when the link is down.

Fixes reported regression:
    https://bugzilla.kernel.org/show_bug.cgi?id=196355

Reported-by: Yaroslav Isakov <yaroslav.isakov@gmail.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d1ba90980be13..11b25fbf3dd29 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4241,6 +4241,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 
 	switch (event) {
 	case NETDEV_REBOOT:
+	case NETDEV_CHANGEADDR:
 	case NETDEV_CHANGENAME:
 	case NETDEV_FEAT_CHANGE:
 	case NETDEV_BONDING_FAILOVER:
-- 
GitLab


From 1e6c22aef28364dcc5f03c04a05ec463bc2b3431 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 19 Jul 2017 18:46:59 +0100
Subject: [PATCH 1848/1958] net: tehuti: don't process data if it has not been
 copied from userspace

The array data is only populated with valid information from userspace
if cmd != SIOCDEVPRIVATE, other cases the array contains garbage on
the stack. The subsequent switch statement acts on a subcommand in
data[0] which could be any garbage value if cmd is SIOCDEVPRIVATE which
seems incorrect to me.  Instead, just return EOPNOTSUPP for the case
where cmd == SIOCDEVPRIVATE to avoid this issue.

As a side note, I suspect that the original intention of the code
was for this ioctl to work just for cmd == SIOCDEVPRIVATE (and the
current logic is reversed). However, I don't wont to change the current
semantics in case any userspace code relies on this existing behaviour.

Detected by CoverityScan, CID#139647 ("Uninitialized scalar variable")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/tehuti/tehuti.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 711fbbbc4b1f7..163d8d16bc245 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -654,6 +654,8 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
 			RET(-EFAULT);
 		}
 		DBG("%d 0x%x 0x%x\n", data[0], data[1], data[2]);
+	} else {
+		return -EOPNOTSUPP;
 	}
 
 	if (!capable(CAP_SYS_RAWIO))
-- 
GitLab


From 6399f1fae4ec29fab5ec76070435555e256ca3a6 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 19 Jul 2017 22:28:55 +0200
Subject: [PATCH 1849/1958] ipv6: avoid overflow of offset in
 ip6_find_1stfragopt

In some cases, offset can overflow and can cause an infinite loop in
ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and
cap it at IPV6_MAXPLEN, since packets larger than that should be invalid.

This problem has been here since before the beginning of git history.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/output_core.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index e9065b8d3af85..abb2c307fbe83 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident);
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
-	u16 offset = sizeof(struct ipv6hdr);
+	unsigned int offset = sizeof(struct ipv6hdr);
 	unsigned int packet_len = skb_tail_pointer(skb) -
 		skb_network_header(skb);
 	int found_rhdr = 0;
@@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 	while (offset <= packet_len) {
 		struct ipv6_opt_hdr *exthdr;
+		unsigned int len;
 
 		switch (**nexthdr) {
 
@@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 						 offset);
-		offset += ipv6_optlen(exthdr);
+		len = ipv6_optlen(exthdr);
+		if (len + offset >= IPV6_MAXPLEN)
+			return -EINVAL;
+		offset += len;
 		*nexthdr = &exthdr->nexthdr;
 	}
 
-- 
GitLab


From 3bda69c1c3993a2bddbae01397d12bfef6054011 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 18 Jul 2017 14:08:34 +0300
Subject: [PATCH 1850/1958] perf/core: Fix scheduling regression of pinned
 groups

Vince Weaver reported:

> I was tracking down some regressions in my perf_event_test testsuite.
> Some of the tests broke in the 4.11-rc1 timeframe.
>
> I've bisected one of them, this report is about
>	tests/overflow/simul_oneshot_group_overflow
> This test creates an event group containing two sampling events, set
> to overflow to a signal handler (which disables and then refreshes the
> event).
>
> On a good kernel you get the following:
> 	Event perf::instructions with period 1000000
> 	Event perf::instructions with period 2000000
> 		fd 3 overflows: 946 (perf::instructions/1000000)
> 		fd 4 overflows: 473 (perf::instructions/2000000)
> 	Ending counts:
> 		Count 0: 946379875
> 		Count 1: 946365218
>
> With the broken kernels you get:
> 	Event perf::instructions with period 1000000
> 	Event perf::instructions with period 2000000
> 		fd 3 overflows: 938 (perf::instructions/1000000)
> 		fd 4 overflows: 318 (perf::instructions/2000000)
> 	Ending counts:
> 		Count 0: 946373080
> 		Count 1: 653373058

The root cause of the bug is that the following commit:

  487f05e18a ("perf/core: Optimize event rescheduling on active contexts")

erronously assumed that event's 'pinned' setting determines whether the
event belongs to a pinned group or not, but in fact, it's the group
leader's pinned state that matters.

This was discovered by Vince in the test case described above, where two instruction
counters are grouped, the group leader is pinned, but the other event is not;
in the regressed case the counters were off by 33% (the difference between events'
periods), but should be the same within the error margin.

Fix the problem by looking at the group leader's pinning.

Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: 487f05e18a ("perf/core: Optimize event rescheduling on active contexts")
Link: http://lkml.kernel.org/r/87lgnmvw7h.fsf@ashishki-desk.ger.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9747e422ab204..c9cdbd3967709 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1452,6 +1452,13 @@ static enum event_type_t get_event_type(struct perf_event *event)
 
 	lockdep_assert_held(&ctx->lock);
 
+	/*
+	 * It's 'group type', really, because if our group leader is
+	 * pinned, so are we.
+	 */
+	if (event->group_leader != event)
+		event = event->group_leader;
+
 	event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
 	if (!ctx->task)
 		event_type |= EVENT_CPU;
-- 
GitLab


From 448421b5e93b9177c5698f0cf6f5e72d2995eeca Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Wed, 19 Jul 2017 14:43:28 +0300
Subject: [PATCH 1851/1958] drm/mst: Fix error handling during MST sideband
 message reception

Handle any error due to partial reads, timeouts etc. to avoid parsing
uninitialized data subsequently. Also bail out if the parsing itself
fails.

Cc: Dave Airlie <airlied@redhat.com>
Cc: Lyude <lyude@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Lyude <lyude@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20170719114330.26540-2-imre.deak@intel.com
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index bfd237c15e76e..ce0e58442939b 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2196,11 +2196,17 @@ static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
 		ret = drm_dp_dpcd_read(mgr->aux, basereg + curreply,
 				    replyblock, len);
 		if (ret != len) {
-			DRM_DEBUG_KMS("failed to read a chunk\n");
+			DRM_DEBUG_KMS("failed to read a chunk (len %d, ret %d)\n",
+				      len, ret);
+			return;
 		}
+
 		ret = drm_dp_sideband_msg_build(msg, replyblock, len, false);
-		if (ret == false)
+		if (!ret) {
 			DRM_DEBUG_KMS("failed to build sideband msg\n");
+			return;
+		}
+
 		curreply += len;
 		replylen -= len;
 	}
-- 
GitLab


From 7f8b3987da54cb4d41ad2545cd4d7958b9a36bdf Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Wed, 19 Jul 2017 14:43:29 +0300
Subject: [PATCH 1852/1958] drm/mst: Avoid dereferencing a NULL mstb in
 drm_dp_mst_handle_up_req()

In case of an unknown broadcast message is sent mstb will remain unset,
so check for this.

Cc: Dave Airlie <airlied@redhat.com>
Cc: Lyude <lyude@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Lyude <lyude@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20170719114330.26540-3-imre.deak@intel.com
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index ce0e58442939b..7a6201133eb97 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2324,7 +2324,9 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
 			DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n", msg.u.resource_stat.port_number, msg.u.resource_stat.available_pbn);
 		}
 
-		drm_dp_put_mst_branch_device(mstb);
+		if (mstb)
+			drm_dp_put_mst_branch_device(mstb);
+
 		memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
 	}
 	return ret;
-- 
GitLab


From 636c4c3e762b62aa93632c645ca65879285b16e3 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Wed, 19 Jul 2017 16:46:32 +0300
Subject: [PATCH 1853/1958] drm/mst: Avoid processing partially received
 up/down message transactions

Currently we may process up/down message transactions containing
uninitialized data. This can happen if there was an error during the
reception of any message in the transaction, but we happened to receive
the last message correctly with the end-of-message flag set.

To avoid this abort the reception of the transaction when the first
error is detected, rejecting any messages until a message with the
start-of-message flag is received (which will start a new transaction).
This is also what the DP 1.4 spec 2.11.8.2 calls for in this case.

In addtion this also prevents receiving bogus transactions without the
first message with the the start-of-message flag set.

v2:
- unchanged
v3:
- git add the part that actually skips messages after an error in
  drm_dp_sideband_msg_build()

Cc: Dave Airlie <airlied@redhat.com>
Cc: Lyude <lyude@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Lyude <lyude@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20170719134632.13366-1-imre.deak@intel.com
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 31 +++++++++++++++++++++------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 7a6201133eb97..ae5f068955628 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -330,6 +330,13 @@ static bool drm_dp_sideband_msg_build(struct drm_dp_sideband_msg_rx *msg,
 			return false;
 		}
 
+		/*
+		 * ignore out-of-order messages or messages that are part of a
+		 * failed transaction
+		 */
+		if (!recv_hdr.somt && !msg->have_somt)
+			return false;
+
 		/* get length contained in this portion */
 		msg->curchunk_len = recv_hdr.msg_len;
 		msg->curchunk_hdrlen = hdrlen;
@@ -2164,7 +2171,7 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume);
 
-static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
+static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
 {
 	int len;
 	u8 replyblock[32];
@@ -2179,12 +2186,12 @@ static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
 			       replyblock, len);
 	if (ret != len) {
 		DRM_DEBUG_KMS("failed to read DPCD down rep %d %d\n", len, ret);
-		return;
+		return false;
 	}
 	ret = drm_dp_sideband_msg_build(msg, replyblock, len, true);
 	if (!ret) {
 		DRM_DEBUG_KMS("sideband msg build failed %d\n", replyblock[0]);
-		return;
+		return false;
 	}
 	replylen = msg->curchunk_len + msg->curchunk_hdrlen;
 
@@ -2198,25 +2205,30 @@ static void drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
 		if (ret != len) {
 			DRM_DEBUG_KMS("failed to read a chunk (len %d, ret %d)\n",
 				      len, ret);
-			return;
+			return false;
 		}
 
 		ret = drm_dp_sideband_msg_build(msg, replyblock, len, false);
 		if (!ret) {
 			DRM_DEBUG_KMS("failed to build sideband msg\n");
-			return;
+			return false;
 		}
 
 		curreply += len;
 		replylen -= len;
 	}
+	return true;
 }
 
 static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
 {
 	int ret = 0;
 
-	drm_dp_get_one_sb_msg(mgr, false);
+	if (!drm_dp_get_one_sb_msg(mgr, false)) {
+		memset(&mgr->down_rep_recv, 0,
+		       sizeof(struct drm_dp_sideband_msg_rx));
+		return 0;
+	}
 
 	if (mgr->down_rep_recv.have_eomt) {
 		struct drm_dp_sideband_msg_tx *txmsg;
@@ -2272,7 +2284,12 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
 static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
 {
 	int ret = 0;
-	drm_dp_get_one_sb_msg(mgr, true);
+
+	if (!drm_dp_get_one_sb_msg(mgr, true)) {
+		memset(&mgr->up_req_recv, 0,
+		       sizeof(struct drm_dp_sideband_msg_rx));
+		return 0;
+	}
 
 	if (mgr->up_req_recv.have_eomt) {
 		struct drm_dp_sideband_msg_req_body msg;
-- 
GitLab


From dad5ab0db8deac535d03e3fe3d8f2892173fa6a4 Mon Sep 17 00:00:00 2001
From: Seunghun Han <kkamagui@gmail.com>
Date: Tue, 18 Jul 2017 20:03:51 +0900
Subject: [PATCH 1854/1958] x86/acpi: Prevent out of bound access caused by
 broken ACPI tables

The bus_irq argument of mp_override_legacy_irq() is used as the index into
the isa_irq_to_gsi[] array. The bus_irq argument originates from
ACPI_MADT_TYPE_IO_APIC and ACPI_MADT_TYPE_INTERRUPT items in the ACPI
tables, but is nowhere sanity checked.

That allows broken or malicious ACPI tables to overwrite memory, which
might cause malfunction, panic or arbitrary code execution.

Add a sanity check and emit a warning when that triggers.

[ tglx: Added warning and rewrote changelog ]

Signed-off-by: Seunghun Han <kkamagui@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: security@kernel.org
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: stable@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/acpi/boot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6bb6806710886..7491e73d92530 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -346,6 +346,14 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 	int pin;
 	struct mpc_intsrc mp_irq;
 
+	/*
+	 * Check bus_irq boundary.
+	 */
+	if (bus_irq >= NR_IRQS_LEGACY) {
+		pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq);
+		return;
+	}
+
 	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
 	 */
-- 
GitLab


From e708e35ba6d89ff785b225cd07dcccab04fa954a Mon Sep 17 00:00:00 2001
From: Seunghun Han <kkamagui@gmail.com>
Date: Tue, 18 Jul 2017 18:20:44 +0900
Subject: [PATCH 1855/1958] x86/ioapic: Pass the correct data to
 unmask_ioapic_irq()

One of the rarely executed code pathes in check_timer() calls
unmask_ioapic_irq() passing irq_get_chip_data(0) as argument.

That's wrong as unmask_ioapic_irq() expects a pointer to the irq data of
interrupt 0. irq_get_chip_data(0) returns NULL, so the following
dereference in unmask_ioapic_irq() causes a kernel panic.

The issue went unnoticed in the first place because irq_get_chip_data()
returns a void pointer so the compiler cannot do a type check on the
argument. The code path was added for machines with broken configuration,
but it seems that those machines are either not running current kernels or
simply do not longer exist.

Hand in irq_get_irq_data(0) as argument which provides the correct data.

[ tglx: Rewrote changelog ]

Fixes: 4467715a44cc ("x86/irq: Move irq_cfg.irq_2_pin into io_apic.c")
Signed-off-by: Seunghun Han <kkamagui@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1500369644-45767-1-git-send-email-kkamagui@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b4f5f73febdb8..237e9c2341c71 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2093,7 +2093,7 @@ static inline void __init check_timer(void)
 			int idx;
 			idx = find_irq_entry(apic1, pin1, mp_INT);
 			if (idx != -1 && irq_trigger(idx))
-				unmask_ioapic_irq(irq_get_chip_data(0));
+				unmask_ioapic_irq(irq_get_irq_data(0));
 		}
 		irq_domain_deactivate_irq(irq_data);
 		irq_domain_activate_irq(irq_data);
-- 
GitLab


From 0e7f0b6c2371d41e9bd902fe863be2ec4b6d4e31 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 20 Jul 2017 07:01:25 +0200
Subject: [PATCH 1856/1958] x86/defconfig: Remove stale, old Kconfig options

Remove old, dead Kconfig options (in order appearing in this commit):

 - EXPERIMENTAL is gone since v3.9;
 - IP_NF_TARGET_ULOG: commit d4da843e6fad ("netfilter: kill remnants of ulog targets");
 - USB_LIBUSUAL: commit f61870ee6f8c ("usb: remove libusual");

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1500526885-4341-1-git-send-email-krzk@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 3 ---
 arch/x86/configs/x86_64_defconfig | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 6cf79e1a68301..0eb9f92f37179 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_64BIT is not set
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -125,7 +124,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=y
 CONFIG_NF_NAT=y
 CONFIG_IP_NF_TARGET_MASQUERADE=y
 CONFIG_IP_NF_MANGLE=y
@@ -255,7 +253,6 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
 CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
 CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index de45f57b410de..4a4b16e56d354 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -124,7 +123,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=y
 CONFIG_NF_NAT=y
 CONFIG_IP_NF_TARGET_MASQUERADE=y
 CONFIG_IP_NF_MANGLE=y
@@ -251,7 +249,6 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
 CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
 CONFIG_EDAC=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
-- 
GitLab


From 11d8b05855f3749bcb6c57e2c4052921b9605c77 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jul 2017 14:52:59 +0200
Subject: [PATCH 1857/1958] perf/x86: Shut up false-positive
 -Wmaybe-uninitialized warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The intialization function checks for various failure scenarios, but
unfortunately the compiler gets a little confused about the possible
combinations, leading to a false-positive build warning when
-Wmaybe-uninitialized is set:

  arch/x86/events/core.c: In function ‘init_hw_perf_events’:
  arch/x86/events/core.c:264:3: warning: ‘reg_fail’ may be used uninitialized in this function [-Wmaybe-uninitialized]
  arch/x86/events/core.c:264:3: warning: ‘val_fail’ may be used uninitialized in this function [-Wmaybe-uninitialized]
     pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",

We can't actually run into this case, so this shuts up the warning
by initializing the variables to a known-invalid state.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170719125310.2487451-2-arnd@arndb.de
Link: https://patchwork.kernel.org/patch/9392595/
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index ff1ea2fb97055..8e3db8f642a7a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -191,8 +191,8 @@ static void release_pmc_hardware(void) {}
 
 static bool check_hw_exists(void)
 {
-	u64 val, val_fail, val_new= ~0;
-	int i, reg, reg_fail, ret = 0;
+	u64 val, val_fail = -1, val_new= ~0;
+	int i, reg, reg_fail = -1, ret = 0;
 	int bios_fail = 0;
 	int reg_safe = -1;
 
-- 
GitLab


From 75e2f0a6b16141cb347f442033ec907380d4d66e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jul 2017 14:53:00 +0200
Subject: [PATCH 1858/1958] x86/fpu/math-emu: Fix possible uninitialized
 variable use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building the kernel with "make EXTRA_CFLAGS=...", this overrides
the "PARANOID" preprocessor macro defined in arch/x86/math-emu/Makefile,
and we run into a build warning:

  arch/x86/math-emu/reg_compare.c: In function ‘compare_i_st_st’:
  arch/x86/math-emu/reg_compare.c:254:6: error: ‘f’ may be used uninitialized in this function [-Werror=maybe-uninitialized]

This fixes the implementation to work correctly even without the PARANOID
flag, and also fixes the Makefile to not use the EXTRA_CFLAGS variable
but instead use the ccflags-y variable in the Makefile that is meant
for this purpose.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Bill Metzenthen <billm@melbpc.org.au>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170719125310.2487451-3-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/math-emu/Makefile      |  4 ++--
 arch/x86/math-emu/reg_compare.c | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile
index 9b0c63b603029..1b2dac1743213 100644
--- a/arch/x86/math-emu/Makefile
+++ b/arch/x86/math-emu/Makefile
@@ -5,8 +5,8 @@
 #DEBUG	= -DDEBUGGING
 DEBUG	=
 PARANOID = -DPARANOID
-EXTRA_CFLAGS	:= $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
-EXTRA_AFLAGS	:= $(PARANOID)
+ccflags-y += $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
+asflags-y += $(PARANOID)
 
 # From 'C' language sources:
 C_OBJS =fpu_entry.o errors.o \
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c
index b77360fdbf4a8..19b33b50adfac 100644
--- a/arch/x86/math-emu/reg_compare.c
+++ b/arch/x86/math-emu/reg_compare.c
@@ -168,7 +168,7 @@ static int compare(FPU_REG const *b, int tagb)
 /* This function requires that st(0) is not empty */
 int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 {
-	int f = 0, c;
+	int f, c;
 
 	c = compare(loaded_data, loaded_tag);
 
@@ -189,12 +189,12 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 		case COMP_No_Comp:
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#ifdef PARANOID
 		default:
+#ifdef PARANOID
 			EXCEPTION(EX_INTERNAL | 0x121);
+#endif /* PARANOID */
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#endif /* PARANOID */
 		}
 	setcc(f);
 	if (c & COMP_Denormal) {
@@ -205,7 +205,7 @@ int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 
 static int compare_st_st(int nr)
 {
-	int f = 0, c;
+	int f, c;
 	FPU_REG *st_ptr;
 
 	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
@@ -235,12 +235,12 @@ static int compare_st_st(int nr)
 		case COMP_No_Comp:
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#ifdef PARANOID
 		default:
+#ifdef PARANOID
 			EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
 			f = SW_C3 | SW_C2 | SW_C0;
 			break;
-#endif /* PARANOID */
 		}
 	setcc(f);
 	if (c & COMP_Denormal) {
@@ -283,12 +283,12 @@ static int compare_i_st_st(int nr)
 	case COMP_No_Comp:
 		f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
 		break;
-#ifdef PARANOID
 	default:
+#ifdef PARANOID
 		EXCEPTION(EX_INTERNAL | 0x122);
+#endif /* PARANOID */
 		f = 0;
 		break;
-#endif /* PARANOID */
 	}
 	FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
 	if (c & COMP_Denormal) {
-- 
GitLab


From 5623452a0eaec1d44cc9f0770444a48847c9953f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jul 2017 14:53:01 +0200
Subject: [PATCH 1859/1958] x86/fpu/math-emu: Avoid bogus -Wint-in-bool-context
 warning

gcc-7.1.1 produces this warning:

  arch/x86/math-emu/reg_add_sub.c: In function 'FPU_add':
  arch/x86/math-emu/reg_add_sub.c:80:48: error: ?: using integer constants in boolean context [-Werror=int-in-bool-context]

This appears to be a bug in gcc-7.1.1, and I have reported it as
PR81484. The compiler suggests that code written as

	if (a & b ? c : d)

is usually incorrect and should have been

	if (a & (b ? c : d))

However, in this case, we correctly write

	if ((a & b) ? c : d)

and should not get a warning for it.

This adds a dirty workaround for the problem, adding a comparison with
zero inside of the macro. The warning is currently disabled in the kernel,
so we may decide not to apply the patch, and instead wait for future gcc
releases to fix the problem. On the other hand, it seems to be the
only instance of this particular problem.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Bill Metzenthen <billm@melbpc.org.au>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170719125310.2487451-4-arnd@arndb.de
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81484
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/math-emu/fpu_emu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index afbc4d805d66f..c9c320dccca13 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -157,7 +157,7 @@ extern u_char const data_sizes_16[32];
 
 #define signbyte(a) (((u_char *)(a))[9])
 #define getsign(a) (signbyte(a) & 0x80)
-#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
+#define setsign(a,b) { if ((b) != 0) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
 #define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
                         else signbyte(b) &= 0x7f; }
 #define changesign(a) { signbyte(a) ^= 0x80; }
-- 
GitLab


From 7206f9bf108eb9513d170c73f151367a1bdf3dbf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jul 2017 14:53:02 +0200
Subject: [PATCH 1860/1958] x86/io: Add "memory" clobber to
 insb/insw/insl/outsb/outsw/outsl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The x86 version of insb/insw/insl uses an inline assembly that does
not have the target buffer listed as an output. This can confuse
the compiler, leading it to think that a subsequent access of the
buffer is uninitialized:

  drivers/net/wireless/wl3501_cs.c: In function ‘wl3501_mgmt_scan_confirm’:
  drivers/net/wireless/wl3501_cs.c:665:9: error: ‘sig.status’ is used uninitialized in this function [-Werror=uninitialized]
  drivers/net/wireless/wl3501_cs.c:668:12: error: ‘sig.cap_info’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
  drivers/net/sb1000.c: In function 'sb1000_rx':
  drivers/net/sb1000.c:775:9: error: 'st[0]' is used uninitialized in this function [-Werror=uninitialized]
  drivers/net/sb1000.c:776:10: error: 'st[1]' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  drivers/net/sb1000.c:784:11: error: 'st[1]' may be used uninitialized in this function [-Werror=maybe-uninitialized]

I tried to mark the exact input buffer as an output here, but couldn't
figure it out. As suggested by Linus, marking all memory as clobbered
however is good enough too. For the outs operations, I also add the
memory clobber, to force the input to be written to local variables.
This is probably already guaranteed by the "asm volatile", but it can't
hurt to do this for symmetry.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: http://lkml.kernel.org/r/20170719125310.2487451-5-arnd@arndb.de
Link: https://lkml.org/lkml/2017/7/12/605
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/io.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 7afb0e2f07f40..48febf07e8280 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -328,13 +328,13 @@ static inline unsigned type in##bwl##_p(int port)			\
 static inline void outs##bwl(int port, const void *addr, unsigned long count) \
 {									\
 	asm volatile("rep; outs" #bwl					\
-		     : "+S"(addr), "+c"(count) : "d"(port));		\
+		     : "+S"(addr), "+c"(count) : "d"(port) : "memory");	\
 }									\
 									\
 static inline void ins##bwl(int port, void *addr, unsigned long count)	\
 {									\
 	asm volatile("rep; ins" #bwl					\
-		     : "+D"(addr), "+c"(count) : "d"(port));		\
+		     : "+D"(addr), "+c"(count) : "d"(port) : "memory");	\
 }
 
 BUILDIO(b, b, char)
-- 
GitLab


From d460131dd50599e0e9405d5f4ae02c27d529a44a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jul 2017 14:53:03 +0200
Subject: [PATCH 1861/1958] x86/build: Silence the build with "make -s"

Every kernel build on x86 will result in some output:

  Setup is 13084 bytes (padded to 13312 bytes).
  System is 4833 kB
  CRC 6d35fa35
  Kernel: arch/x86/boot/bzImage is ready  (#2)

This shuts it up, so that 'make -s' is truely silent as long as
everything works. Building without '-s' should produce unchanged
output.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170719125310.2487451-6-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 0d810fb15eac8..d88a2fddba8c7 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -73,12 +73,13 @@ UBSAN_SANITIZE := n
 $(obj)/bzImage: asflags-y  := $(SVGA_MODE)
 
 quiet_cmd_image = BUILD   $@
+silent_redirect_image = >/dev/null
 cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
-			       $(obj)/zoffset.h $@
+			       $(obj)/zoffset.h $@ $($(quiet)redirect_image)
 
 $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
 	$(call if_changed,image)
-	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+	@$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')'
 
 OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
 $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
-- 
GitLab


From d689c64d189e43d782fec5649fb0afe303c5b3f9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jul 2017 14:53:05 +0200
Subject: [PATCH 1862/1958] x86/platform: Add PCI dependency for
 PUNIT_ATOM_DEBUG

The IOSF_MBI option requires PCI support, without it we get a harmless
Kconfig warning when it gets selected by PUNIT_ATOM_DEBUG:

  warning: (X86_INTEL_LPSS && SND_SST_IPC_ACPI && MMC_SDHCI_ACPI && PUNIT_ATOM_DEBUG) selects IOSF_MBI which has unmet direct dependencies (PCI)

This adds another dependency to avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170719125310.2487451-8-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fcb7604172cec..cd20ca0b40434 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -348,6 +348,7 @@ config X86_DEBUG_FPU
 
 config PUNIT_ATOM_DEBUG
 	tristate "ATOM Punit debug driver"
+	depends on PCI
 	select DEBUG_FS
 	select IOSF_MBI
 	---help---
-- 
GitLab


From 0bc73048d7baecf94117d1a948853a627e6ba5c8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jul 2017 14:53:06 +0200
Subject: [PATCH 1863/1958] x86/platform/intel-mid: Fix a format string
 overflow warning

We have space for exactly three characters for the index in "max7315_%d_base",
but as GCC points out having more would cause an string overflow:

  arch/x86/platform/intel-mid/device_libs/platform_max7315.c: In function 'max7315_platform_data':
  arch/x86/platform/intel-mid/device_libs/platform_max7315.c:41:26: error: '%d' directive writing between 1 and 11 bytes into a region of size 9 [-Werror=format-overflow=]
     sprintf(base_pin_name, "max7315_%d_base", nr);
                          ^~~~~~~~~~~~~~~~~
  arch/x86/platform/intel-mid/device_libs/platform_max7315.c:41:26: note: directive argument in the range [-2147483647, 2147483647]
  arch/x86/platform/intel-mid/device_libs/platform_max7315.c:41:3: note: 'sprintf' output between 15 and 25 bytes into a destination of size 17
     sprintf(base_pin_name, "max7315_%d_base", nr);

This makes it use an snprintf() to truncate the string if that happened
rather than overflowing the stack. In practice, this is safe, because
there won't be a large number of max7315 devices in the systems, and
both the format and the length are defined by the firmware interface.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170719125310.2487451-9-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/platform_max7315.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
index 6e075afa7877b..58337b2bc6823 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
@@ -38,8 +38,10 @@ static void __init *max7315_platform_data(void *info)
 	 */
 	strcpy(i2c_info->type, "max7315");
 	if (nr++) {
-		sprintf(base_pin_name, "max7315_%d_base", nr);
-		sprintf(intr_pin_name, "max7315_%d_int", nr);
+		snprintf(base_pin_name, sizeof(base_pin_name),
+			 "max7315_%d_base", nr);
+		snprintf(intr_pin_name, sizeof(intr_pin_name),
+			 "max7315_%d_int", nr);
 	} else {
 		strcpy(base_pin_name, "max7315_base");
 		strcpy(intr_pin_name, "max7315_int");
-- 
GitLab


From 1d88f183734c0d916428911df006e645a6162cab Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 20 Jul 2017 11:08:21 +0200
Subject: [PATCH 1864/1958] ovl: fix xattr get and set with selinux

inode_doinit_with_dentry() in SELinux wants to read the upper inode's xattr
to get security label, and ovl_xattr_get() calls ovl_dentry_real(), which
depends on dentry->d_inode, but d_inode is null and not initialized yet at
this point resulting in an Oops.

Fix by getting the upperdentry info from the inode directly in this case.

Reported-by: Eryu Guan <eguan@redhat.com>
Fixes: 09d8b586731b ("ovl: move __upperdentry to ovl_inode")
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c     | 32 +++++++++++++++++---------------
 fs/overlayfs/overlayfs.h |  7 ++++---
 fs/overlayfs/super.c     |  8 ++++----
 fs/overlayfs/util.c      |  7 ++++++-
 4 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 69f4fc26ee398..5bc71642b2260 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -202,37 +202,38 @@ bool ovl_is_private_xattr(const char *name)
 		       sizeof(OVL_XATTR_PREFIX) - 1) == 0;
 }
 
-int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
-		  size_t size, int flags)
+int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+		  const void *value, size_t size, int flags)
 {
 	int err;
-	struct path realpath;
-	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
 	const struct cred *old_cred;
 
 	err = ovl_want_write(dentry);
 	if (err)
 		goto out;
 
-	if (!value && !OVL_TYPE_UPPER(type)) {
-		err = vfs_getxattr(realpath.dentry, name, NULL, 0);
+	if (!value && !upperdentry) {
+		err = vfs_getxattr(realdentry, name, NULL, 0);
 		if (err < 0)
 			goto out_drop_write;
 	}
 
-	err = ovl_copy_up(dentry);
-	if (err)
-		goto out_drop_write;
+	if (!upperdentry) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			goto out_drop_write;
 
-	if (!OVL_TYPE_UPPER(type))
-		ovl_path_upper(dentry, &realpath);
+		realdentry = ovl_dentry_upper(dentry);
+	}
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	if (value)
-		err = vfs_setxattr(realpath.dentry, name, value, size, flags);
+		err = vfs_setxattr(realdentry, name, value, size, flags);
 	else {
 		WARN_ON(flags != XATTR_REPLACE);
-		err = vfs_removexattr(realpath.dentry, name);
+		err = vfs_removexattr(realdentry, name);
 	}
 	revert_creds(old_cred);
 
@@ -242,12 +243,13 @@ int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
 	return err;
 }
 
-int ovl_xattr_get(struct dentry *dentry, const char *name,
+int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
 		  void *value, size_t size)
 {
-	struct dentry *realdentry = ovl_dentry_real(dentry);
 	ssize_t res;
 	const struct cred *old_cred;
+	struct dentry *realdentry =
+		ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	res = vfs_getxattr(realdentry, name, value, size);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 032120a761c4c..e927a62c97ae3 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -200,6 +200,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
 struct dentry *ovl_dentry_upper(struct dentry *dentry);
 struct dentry *ovl_dentry_lower(struct dentry *dentry);
 struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct dentry *ovl_i_dentry_upper(struct inode *inode);
 struct inode *ovl_inode_upper(struct inode *inode);
 struct inode *ovl_inode_lower(struct inode *inode);
 struct inode *ovl_inode_real(struct inode *inode);
@@ -271,9 +272,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr);
 int ovl_getattr(const struct path *path, struct kstat *stat,
 		u32 request_mask, unsigned int flags);
 int ovl_permission(struct inode *inode, int mask);
-int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
-		  size_t size, int flags);
-int ovl_xattr_get(struct dentry *dentry, const char *name,
+int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+		  const void *value, size_t size, int flags);
+int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
 		  void *value, size_t size);
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
 struct posix_acl *ovl_get_acl(struct inode *inode, int type);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index c88493b01d8db..d86e89f972016 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -692,7 +692,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
 			struct dentry *dentry, struct inode *inode,
 			const char *name, void *buffer, size_t size)
 {
-	return ovl_xattr_get(dentry, handler->name, buffer, size);
+	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
 }
 
 static int __maybe_unused
@@ -742,7 +742,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 			return err;
 	}
 
-	err = ovl_xattr_set(dentry, handler->name, value, size, flags);
+	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
 	if (!err)
 		ovl_copyattr(ovl_inode_real(inode), inode);
 
@@ -772,7 +772,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler,
 			       struct dentry *dentry, struct inode *inode,
 			       const char *name, void *buffer, size_t size)
 {
-	return ovl_xattr_get(dentry, name, buffer, size);
+	return ovl_xattr_get(dentry, inode, name, buffer, size);
 }
 
 static int ovl_other_xattr_set(const struct xattr_handler *handler,
@@ -780,7 +780,7 @@ static int ovl_other_xattr_set(const struct xattr_handler *handler,
 			       const char *name, const void *value,
 			       size_t size, int flags)
 {
-	return ovl_xattr_set(dentry, name, value, size, flags);
+	return ovl_xattr_set(dentry, inode, name, value, size, flags);
 }
 
 static const struct xattr_handler __maybe_unused
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index c492ba75c6595..f46ad75dc96af 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -157,9 +157,14 @@ struct dentry *ovl_dentry_real(struct dentry *dentry)
 	return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
 }
 
+struct dentry *ovl_i_dentry_upper(struct inode *inode)
+{
+	return ovl_upperdentry_dereference(OVL_I(inode));
+}
+
 struct inode *ovl_inode_upper(struct inode *inode)
 {
-	struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode));
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
 
 	return upperdentry ? d_inode(upperdentry) : NULL;
 }
-- 
GitLab


From 61b674710cd9afa2a8b17bdd1ac80670c9b79f1d Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 18 Jul 2017 21:07:42 +0300
Subject: [PATCH 1865/1958] ovl: do not cleanup directory and whiteout index
 entries

Directory index entries are going to be used for looking up
redirected upper dirs by lower dir fh when decoding an overlay
file handle of a merge dir.

Whiteout index entries are going to be used as an indication that
an exported overlay file handle should be treated as stale (i.e.
after unlink of the overlay inode).

We don't know the verification rules for directory and whiteout
index entries, because they have not been implemented yet, so fail
to mount overlay rw if those entries are found to avoid corrupting
an index that was created by a newer kernel.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c   | 19 +++++++++++++++----
 fs/overlayfs/readdir.c |  5 ++++-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 9bc0e580a5b3f..229a88ff335c4 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -397,8 +397,19 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack,
 	if (!d_inode(index))
 		return 0;
 
-	err = -EISDIR;
-	if (d_is_dir(index))
+	/*
+	 * Directory index entries are going to be used for looking up
+	 * redirected upper dirs by lower dir fh when decoding an overlay
+	 * file handle of a merge dir. Whiteout index entries are going to be
+	 * used as an indication that an exported overlay file handle should
+	 * be treated as stale (i.e. after unlink of the overlay inode).
+	 * We don't know the verification rules for directory and whiteout
+	 * index entries, because they have not been implemented yet, so return
+	 * EROFS if those entries are found to avoid corrupting an index that
+	 * was created by a newer kernel.
+	 */
+	err = -EROFS;
+	if (d_is_dir(index) || ovl_is_whiteout(index))
 		goto fail;
 
 	err = -EINVAL;
@@ -436,8 +447,8 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack,
 	return err;
 
 fail:
-	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n",
-			    index, err);
+	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
+			    index, d_inode(index)->i_mode & S_IFMT, err);
 	goto out;
 }
 
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 0298463cf9c3f..3d424a51cabbf 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -703,7 +703,10 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
 			err = PTR_ERR(index);
 			break;
 		}
-		if (ovl_verify_index(index, lowerstack, numlower)) {
+		err = ovl_verify_index(index, lowerstack, numlower);
+		if (err) {
+			if (err == -EROFS)
+				break;
 			err = ovl_cleanup(dir, index);
 			if (err)
 				break;
-- 
GitLab


From 0e082555cec9510d276965fe391f709acb32c0f4 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 18 Jul 2017 21:07:43 +0300
Subject: [PATCH 1866/1958] ovl: check for bad and whiteout index on lookup

Index should always be of the same file type as origin, except for
the case of a whiteout index.  A whiteout index should only exist
if all lower aliases have been unlinked, which means that finding
a lower origin on lookup whose index is a whiteout should be treated
as a lookup error.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 229a88ff335c4..8aef2b304b2d2 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -513,6 +513,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
 		goto out;
 	}
 
+	inode = d_inode(index);
 	if (d_is_negative(index)) {
 		if (upper && d_inode(origin)->i_nlink > 1) {
 			pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n",
@@ -522,11 +523,22 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
 
 		dput(index);
 		index = NULL;
-	} else if (upper && d_inode(index) != d_inode(upper)) {
-		inode = d_inode(index);
-		pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n",
-				    d_inode(index)->i_ino,
-				    d_inode(upper)->i_ino);
+	} else if (upper && d_inode(upper) != inode) {
+		pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n",
+				    index, inode->i_ino, d_inode(upper)->i_ino);
+		goto fail;
+	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
+		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
+		/*
+		 * Index should always be of the same file type as origin
+		 * except for the case of a whiteout index. A whiteout
+		 * index should only exist if all lower aliases have been
+		 * unlinked, which means that finding a lower origin on lookup
+		 * whose index is a whiteout should be treated as an error.
+		 */
+		pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+				    index, d_inode(index)->i_mode & S_IFMT,
+				    d_inode(origin)->i_mode & S_IFMT);
 		goto fail;
 	}
 
-- 
GitLab


From 325cdacd03c12629aa5f9ee2ace49b1f3dc184a8 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Sat, 15 Jul 2017 00:10:58 -0500
Subject: [PATCH 1867/1958] debug: Fix WARN_ON_ONCE() for modules

Mike Galbraith reported a situation where a WARN_ON_ONCE() call in DRM
code turned into an oops.  As it turns out, WARN_ON_ONCE() seems to be
completely broken when called from a module.

The bug was introduced with the following commit:

  19d436268dde ("debug: Add _ONCE() logic to report_bug()")

That commit changed WARN_ON_ONCE() to move its 'once' logic into the bug
trap handler.  It requires a writable bug table so that the BUGFLAG_DONE
bit can be written to the flags to indicate the first warning has
occurred.

The bug table was made writable for vmlinux, which relies on
vmlinux.lds.S and vmlinux.lds.h for laying out the sections.  However,
it wasn't made writable for modules, which rely on the ELF section
header flags.

Reported-by: Mike Galbraith <efault@gmx.de>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 19d436268dde ("debug: Add _ONCE() logic to report_bug()")
Link: http://lkml.kernel.org/r/a53b04235a65478dd9afc51f5b329fdc65c84364.1500095401.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/bug.h      | 2 +-
 arch/arm64/include/asm/bug.h    | 2 +-
 arch/blackfin/include/asm/bug.h | 4 ++--
 arch/mn10300/include/asm/bug.h  | 2 +-
 arch/parisc/include/asm/bug.h   | 6 +++---
 arch/powerpc/include/asm/bug.h  | 8 ++++----
 arch/s390/include/asm/bug.h     | 4 ++--
 arch/sh/include/asm/bug.h       | 4 ++--
 arch/x86/include/asm/bug.h      | 4 ++--
 9 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index 4e6e88a6b2f4b..2244a94ed9c9d 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -37,7 +37,7 @@ do {								\
 		".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \
 		"2:\t.asciz " #__file "\n" 			\
 		".popsection\n" 				\
-		".pushsection __bug_table,\"a\"\n"		\
+		".pushsection __bug_table,\"aw\"\n"		\
 		".align 2\n"					\
 		"3:\t.word 1b, 2b\n"				\
 		"\t.hword " #__line ", 0\n"			\
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index 366448eb0fb7a..a02a57186f568 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -36,7 +36,7 @@
 #ifdef CONFIG_GENERIC_BUG
 
 #define __BUG_ENTRY(flags) 				\
-		".pushsection __bug_table,\"a\"\n\t"	\
+		".pushsection __bug_table,\"aw\"\n\t"	\
 		".align 2\n\t"				\
 	"0:	.long 1f - 0b\n\t"			\
 _BUGVERBOSE_LOCATION(__FILE__, __LINE__)		\
diff --git a/arch/blackfin/include/asm/bug.h b/arch/blackfin/include/asm/bug.h
index 8d9b1eba89c48..76b2e82ee7309 100644
--- a/arch/blackfin/include/asm/bug.h
+++ b/arch/blackfin/include/asm/bug.h
@@ -21,7 +21,7 @@
 #define _BUG_OR_WARN(flags)						\
 	asm volatile(							\
 		"1:	.hword	%0\n"					\
-		"	.section __bug_table,\"a\",@progbits\n"		\
+		"	.section __bug_table,\"aw\",@progbits\n"	\
 		"2:	.long	1b\n"					\
 		"	.long	%1\n"					\
 		"	.short	%2\n"					\
@@ -38,7 +38,7 @@
 #define _BUG_OR_WARN(flags)						\
 	asm volatile(							\
 		"1:	.hword	%0\n"					\
-		"	.section __bug_table,\"a\",@progbits\n"		\
+		"	.section __bug_table,\"aw\",@progbits\n"	\
 		"2:	.long	1b\n"					\
 		"	.short	%1\n"					\
 		"	.org	2b + %2\n"				\
diff --git a/arch/mn10300/include/asm/bug.h b/arch/mn10300/include/asm/bug.h
index aa6a388863914..811414fb002de 100644
--- a/arch/mn10300/include/asm/bug.h
+++ b/arch/mn10300/include/asm/bug.h
@@ -21,7 +21,7 @@ do {								\
 	asm volatile(						\
 		"	syscall 15			\n"	\
 		"0:					\n"	\
-		"	.section __bug_table,\"a\"	\n"	\
+		"	.section __bug_table,\"aw\"	\n"	\
 		"	.long 0b,%0,%1			\n"	\
 		"	.previous			\n"	\
 		:						\
diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h
index d2742273a685d..07ea467f22fcd 100644
--- a/arch/parisc/include/asm/bug.h
+++ b/arch/parisc/include/asm/bug.h
@@ -27,7 +27,7 @@
 	do {								\
 		asm volatile("\n"					\
 			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
-			     "\t.pushsection __bug_table,\"a\"\n"	\
+			     "\t.pushsection __bug_table,\"aw\"\n"	\
 			     "2:\t" ASM_WORD_INSN "1b, %c0\n"		\
 			     "\t.short %c1, %c2\n"			\
 			     "\t.org 2b+%c3\n"				\
@@ -50,7 +50,7 @@
 	do {								\
 		asm volatile("\n"					\
 			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
-			     "\t.pushsection __bug_table,\"a\"\n"	\
+			     "\t.pushsection __bug_table,\"aw\"\n"	\
 			     "2:\t" ASM_WORD_INSN "1b, %c0\n"		\
 			     "\t.short %c1, %c2\n"			\
 			     "\t.org 2b+%c3\n"				\
@@ -64,7 +64,7 @@
 	do {								\
 		asm volatile("\n"					\
 			     "1:\t" PARISC_BUG_BREAK_ASM "\n"		\
-			     "\t.pushsection __bug_table,\"a\"\n"	\
+			     "\t.pushsection __bug_table,\"aw\"\n"	\
 			     "2:\t" ASM_WORD_INSN "1b\n"		\
 			     "\t.short %c0\n"				\
 			     "\t.org 2b+%c1\n"				\
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 0151af6c2a505..87fcc19488177 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -18,7 +18,7 @@
 #include <asm/asm-offsets.h>
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 .macro EMIT_BUG_ENTRY addr,file,line,flags
-	 .section __bug_table,"a"
+	 .section __bug_table,"aw"
 5001:	 PPC_LONG \addr, 5002f
 	 .short \line, \flags
 	 .org 5001b+BUG_ENTRY_SIZE
@@ -29,7 +29,7 @@
 .endm
 #else
 .macro EMIT_BUG_ENTRY addr,file,line,flags
-	 .section __bug_table,"a"
+	 .section __bug_table,"aw"
 5001:	 PPC_LONG \addr
 	 .short \flags
 	 .org 5001b+BUG_ENTRY_SIZE
@@ -42,14 +42,14 @@
    sizeof(struct bug_entry), respectively */
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define _EMIT_BUG_ENTRY				\
-	".section __bug_table,\"a\"\n"		\
+	".section __bug_table,\"aw\"\n"		\
 	"2:\t" PPC_LONG "1b, %0\n"		\
 	"\t.short %1, %2\n"			\
 	".org 2b+%3\n"				\
 	".previous\n"
 #else
 #define _EMIT_BUG_ENTRY				\
-	".section __bug_table,\"a\"\n"		\
+	".section __bug_table,\"aw\"\n"		\
 	"2:\t" PPC_LONG "1b\n"			\
 	"\t.short %2\n"				\
 	".org 2b+%3\n"				\
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 1bbd9dbfe4e0a..ce9cc123988b9 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -14,7 +14,7 @@
 		".section .rodata.str,\"aMS\",@progbits,1\n"	\
 		"2:	.asciz	\""__FILE__"\"\n"		\
 		".previous\n"					\
-		".section __bug_table,\"a\"\n"			\
+		".section __bug_table,\"aw\"\n"			\
 		"3:	.long	1b-3b,2b-3b\n"			\
 		"	.short	%0,%1\n"			\
 		"	.org	3b+%2\n"			\
@@ -30,7 +30,7 @@
 	asm volatile(					\
 		"0:	j	0b+2\n"			\
 		"1:\n"					\
-		".section __bug_table,\"a\"\n"		\
+		".section __bug_table,\"aw\"\n"		\
 		"2:	.long	1b-2b\n"		\
 		"	.short	%0\n"			\
 		"	.org	2b+%1\n"		\
diff --git a/arch/sh/include/asm/bug.h b/arch/sh/include/asm/bug.h
index 1b77f068be2b1..986c8781d89f3 100644
--- a/arch/sh/include/asm/bug.h
+++ b/arch/sh/include/asm/bug.h
@@ -24,14 +24,14 @@
  */
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define _EMIT_BUG_ENTRY				\
-	"\t.pushsection __bug_table,\"a\"\n"	\
+	"\t.pushsection __bug_table,\"aw\"\n"	\
 	"2:\t.long 1b, %O1\n"			\
 	"\t.short %O2, %O3\n"			\
 	"\t.org 2b+%O4\n"			\
 	"\t.popsection\n"
 #else
 #define _EMIT_BUG_ENTRY				\
-	"\t.pushsection __bug_table,\"a\"\n"	\
+	"\t.pushsection __bug_table,\"aw\"\n"	\
 	"2:\t.long 1b\n"			\
 	"\t.short %O3\n"			\
 	"\t.org 2b+%O4\n"			\
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 39e702d90cdbd..aa6b2023d8f8b 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -35,7 +35,7 @@
 #define _BUG_FLAGS(ins, flags)						\
 do {									\
 	asm volatile("1:\t" ins "\n"					\
-		     ".pushsection __bug_table,\"a\"\n"			\
+		     ".pushsection __bug_table,\"aw\"\n"		\
 		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
 		     "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"	\
 		     "\t.word %c1"        "\t# bug_entry::line\n"	\
@@ -52,7 +52,7 @@ do {									\
 #define _BUG_FLAGS(ins, flags)						\
 do {									\
 	asm volatile("1:\t" ins "\n"					\
-		     ".pushsection __bug_table,\"a\"\n"			\
+		     ".pushsection __bug_table,\"aw\"\n"		\
 		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
 		     "\t.word %c0"        "\t# bug_entry::flags\n"	\
 		     "\t.org 2b+%c1\n"					\
-- 
GitLab


From 4b895868bb2da60a386a17cde3bf9ecbc70c79f4 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 20 Jul 2017 14:48:26 +0300
Subject: [PATCH 1868/1958] xhci: Fix NULL pointer dereference when cleaning up
 streams for removed host

This off by one in stream_id indexing caused NULL pointer dereference and
soft lockup on machines with USB attached SCSI devices connected to a
hotpluggable xhci controller.

The code that cleans up pending URBs for dead hosts tried to dereference
a stream ring at the invalid stream_id 0.
ep->stream_info->stream_rings[0] doesn't point to a ring.

Start looping stream_id from 1 like in all the other places in the driver,
and check that the ring exists before trying to kill URBs on it.

Reported-by: rocko r <rockorequin@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index c50c902d009ed..cc368ad2b51e4 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -864,13 +864,16 @@ static void xhci_kill_endpoint_urbs(struct xhci_hcd *xhci,
 			(ep->ep_state & EP_GETTING_NO_STREAMS)) {
 		int stream_id;
 
-		for (stream_id = 0; stream_id < ep->stream_info->num_streams;
+		for (stream_id = 1; stream_id < ep->stream_info->num_streams;
 				stream_id++) {
+			ring = ep->stream_info->stream_rings[stream_id];
+			if (!ring)
+				continue;
+
 			xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
 					"Killing URBs for slot ID %u, ep index %u, stream %u",
-					slot_id, ep_index, stream_id + 1);
-			xhci_kill_ring_urbs(xhci,
-					ep->stream_info->stream_rings[stream_id]);
+					slot_id, ep_index, stream_id);
+			xhci_kill_ring_urbs(xhci, ring);
 		}
 	} else {
 		ring = ep->ring;
-- 
GitLab


From 9da5a1092b13468839b1a864b126cacfb72ad016 Mon Sep 17 00:00:00 2001
From: Jiahau Chang <jiahau@gmail.com>
Date: Thu, 20 Jul 2017 14:48:27 +0300
Subject: [PATCH 1869/1958] xhci: Bad Ethernet performance plugged in ASM1042A
 host

When USB Ethernet is plugged in ASMEDIA ASM1042A xHCI host, bad
performance was manifesting in Web browser use (like download
large file such as ISO image). It is known limitation of
ASM1042A that is not compatible with driver scheduling,
As a workaround we can modify flow control handling of ASM1042A.
The register we modify is changes the behavior

[use quirk bit 28, usleep_range 40-60us, empty non-pci function -Mathias]
Cc: <stable@vger.kernel.org>
Signed-off-by: Jiahau Chang <Lars_chang@asmedia.com.tw>
Signed-off-by: Ian Pilcher <arequipeno@gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/pci-quirks.c | 54 +++++++++++++++++++++++++++++++++++
 drivers/usb/host/pci-quirks.h |  2 ++
 drivers/usb/host/xhci-pci.c   |  6 ++++
 drivers/usb/host/xhci.c       |  6 ++++
 drivers/usb/host/xhci.h       |  1 +
 5 files changed, 69 insertions(+)

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index a9a1e4c40480c..c8989c62a2621 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -77,6 +77,16 @@
 #define USB_INTEL_USB3_PSSEN   0xD8
 #define USB_INTEL_USB3PRM      0xDC
 
+/* ASMEDIA quirk use */
+#define ASMT_DATA_WRITE0_REG	0xF8
+#define ASMT_DATA_WRITE1_REG	0xFC
+#define ASMT_CONTROL_REG	0xE0
+#define ASMT_CONTROL_WRITE_BIT	0x02
+#define ASMT_WRITEREG_CMD	0x10423
+#define ASMT_FLOWCTL_ADDR	0xFA30
+#define ASMT_FLOWCTL_DATA	0xBA
+#define ASMT_PSEUDO_DATA	0
+
 /*
  * amd_chipset_gen values represent AMD different chipset generations
  */
@@ -412,6 +422,50 @@ void usb_amd_quirk_pll_disable(void)
 }
 EXPORT_SYMBOL_GPL(usb_amd_quirk_pll_disable);
 
+static int usb_asmedia_wait_write(struct pci_dev *pdev)
+{
+	unsigned long retry_count;
+	unsigned char value;
+
+	for (retry_count = 1000; retry_count > 0; --retry_count) {
+
+		pci_read_config_byte(pdev, ASMT_CONTROL_REG, &value);
+
+		if (value == 0xff) {
+			dev_err(&pdev->dev, "%s: check_ready ERROR", __func__);
+			return -EIO;
+		}
+
+		if ((value & ASMT_CONTROL_WRITE_BIT) == 0)
+			return 0;
+
+		usleep_range(40, 60);
+	}
+
+	dev_warn(&pdev->dev, "%s: check_write_ready timeout", __func__);
+	return -ETIMEDOUT;
+}
+
+void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev)
+{
+	if (usb_asmedia_wait_write(pdev) != 0)
+		return;
+
+	/* send command and address to device */
+	pci_write_config_dword(pdev, ASMT_DATA_WRITE0_REG, ASMT_WRITEREG_CMD);
+	pci_write_config_dword(pdev, ASMT_DATA_WRITE1_REG, ASMT_FLOWCTL_ADDR);
+	pci_write_config_byte(pdev, ASMT_CONTROL_REG, ASMT_CONTROL_WRITE_BIT);
+
+	if (usb_asmedia_wait_write(pdev) != 0)
+		return;
+
+	/* send data to device */
+	pci_write_config_dword(pdev, ASMT_DATA_WRITE0_REG, ASMT_FLOWCTL_DATA);
+	pci_write_config_dword(pdev, ASMT_DATA_WRITE1_REG, ASMT_PSEUDO_DATA);
+	pci_write_config_byte(pdev, ASMT_CONTROL_REG, ASMT_CONTROL_WRITE_BIT);
+}
+EXPORT_SYMBOL_GPL(usb_asmedia_modifyflowcontrol);
+
 void usb_amd_quirk_pll_enable(void)
 {
 	usb_amd_quirk_pll(0);
diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
index 0222195bd5b0e..6559944801987 100644
--- a/drivers/usb/host/pci-quirks.h
+++ b/drivers/usb/host/pci-quirks.h
@@ -11,6 +11,7 @@ bool usb_amd_prefetch_quirk(void);
 void usb_amd_dev_put(void);
 void usb_amd_quirk_pll_disable(void);
 void usb_amd_quirk_pll_enable(void);
+void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev);
 void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev);
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
@@ -18,6 +19,7 @@ void sb800_prefetch(struct device *dev, int on);
 struct pci_dev;
 static inline void usb_amd_quirk_pll_disable(void) {}
 static inline void usb_amd_quirk_pll_enable(void) {}
+static inline void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) {}
 static inline void usb_amd_dev_put(void) {}
 static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {}
 static inline void sb800_prefetch(struct device *dev, int on) {}
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 53882e2babbb2..5b0fa553c8bc9 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -59,6 +59,8 @@
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_2			0x43bb
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_1			0x43bc
 
+#define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI		0x1142
+
 static const char hcd_name[] = "xhci_hcd";
 
 static struct hc_driver __read_mostly xhci_pci_hc_driver;
@@ -217,6 +219,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 			pdev->device == 0x1142)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 
+	if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
+		pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI)
+		xhci->quirks |= XHCI_ASMEDIA_MODIFY_FLOWCONTROL;
+
 	if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241)
 		xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7;
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 56f85df013dbd..51326425f9cc7 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -198,6 +198,9 @@ int xhci_reset(struct xhci_hcd *xhci)
 	if (ret)
 		return ret;
 
+	if (xhci->quirks & XHCI_ASMEDIA_MODIFY_FLOWCONTROL)
+		usb_asmedia_modifyflowcontrol(to_pci_dev(xhci_to_hcd(xhci)->self.controller));
+
 	xhci_dbg_trace(xhci, trace_xhci_dbg_init,
 			 "Wait for controller to be ready for doorbell rings");
 	/*
@@ -1085,6 +1088,9 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 	if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && !comp_timer_running)
 		compliance_mode_recovery_timer_init(xhci);
 
+	if (xhci->quirks & XHCI_ASMEDIA_MODIFY_FLOWCONTROL)
+		usb_asmedia_modifyflowcontrol(to_pci_dev(hcd->self.controller));
+
 	/* Re-enable port polling. */
 	xhci_dbg(xhci, "%s: starting port polling.\n", __func__);
 	set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 3c6da1f93c845..e3e935291ed6f 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1820,6 +1820,7 @@ struct xhci_hcd {
 #define XHCI_BROKEN_PORT_PED	(1 << 25)
 #define XHCI_LIMIT_ENDPOINT_INTERVAL_7	(1 << 26)
 #define XHCI_U2_DISABLE_WAKE	(1 << 27)
+#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL	(1 << 28)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
-- 
GitLab


From 28a2369f7d72ece55089f33e7d7b9c1223673cc3 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Thu, 20 Jul 2017 14:48:28 +0300
Subject: [PATCH 1870/1958] usb: xhci: Issue stop EP command only when the EP
 state is running

on AMD platforms with SNPS 3.1 USB controller if stop endpoint command is
issued the controller does not respond, when the EP is not in running
state. HW completes the command execution and reports
"Context State Error" completion code. This is as per the spec. However
HW on receiving the second command additionally marks EP to Flow control
state in HW which is RTL bug. This bug causes the HW not to respond
to any further doorbells that are rung by the driver. This makes the EP
to not functional anymore and causes gross functional failures.

As a workaround, not to hit this problem, it's better to check the EP state
and issue a stop EP command only when the EP is in running state.

As a sidenote, even with this patch there is still a possibility of
triggering the RTL bug if the context state races with the stop endpoint
command as described in xHCI spec 4.6.9

[code simplification and reworded sidenote in commit message -Mathias]
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Nehal Shah <Nehal-bakulchandra.Shah@amd.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 1adae9eab8313..364f602750435 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -398,14 +398,21 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend)
 	spin_lock_irqsave(&xhci->lock, flags);
 	for (i = LAST_EP_INDEX; i > 0; i--) {
 		if (virt_dev->eps[i].ring && virt_dev->eps[i].ring->dequeue) {
+			struct xhci_ep_ctx *ep_ctx;
 			struct xhci_command *command;
+
+			ep_ctx = xhci_get_ep_ctx(xhci, virt_dev->out_ctx, i);
+
+			/* Check ep is running, required by AMD SNPS 3.1 xHC */
+			if (GET_EP_CTX_STATE(ep_ctx) != EP_STATE_RUNNING)
+				continue;
+
 			command = xhci_alloc_command(xhci, false, false,
 						     GFP_NOWAIT);
 			if (!command) {
 				spin_unlock_irqrestore(&xhci->lock, flags);
 				xhci_free_command(xhci, cmd);
 				return -ENOMEM;
-
 			}
 			xhci_queue_stop_endpoint(xhci, command, slot_id, i,
 						 suspend);
-- 
GitLab


From a54408d0a004757789863d74e29c2297edae0b4d Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 20 Jul 2017 14:48:29 +0300
Subject: [PATCH 1871/1958] xhci: fix 20000ms port resume timeout

A uncleared PLC (port link change) bit will prevent furuther port event
interrupts for that port. Leaving it uncleared caused get_port_status()
to timeout after 20000ms while waiting to get the final port event
interrupt for resume -> U0 state change.

This is a targeted fix for a specific case where we get a port resume event
racing with xhci resume. The port event interrupt handler notices xHC is
not yet running and bails out early, leaving PLC uncleared.

The whole xhci port resuming needs more attention, but while working on it
it anyways makes sense to always ensure PLC is cleared in get_port_status
before setting a new link state and waiting for its completion.

Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 364f602750435..9ef5f68e32131 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -904,6 +904,9 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 			clear_bit(wIndex, &bus_state->resuming_ports);
 
 			set_bit(wIndex, &bus_state->rexit_ports);
+
+			xhci_test_and_clear_bit(xhci, port_array, wIndex,
+						PORT_PLC);
 			xhci_set_link_state(xhci, port_array, wIndex,
 					XDEV_U0);
 
-- 
GitLab


From 576d55460e7f209139545a348746c2fcadf61bc3 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Thu, 20 Jul 2017 14:48:30 +0300
Subject: [PATCH 1872/1958] usb: xhci: fix spinlock recursion for USB2 test
 mode

Both xhci_hub_control and xhci_disable_slot tries to hold spinlock, the
spinlock recursion occurs when enters USB2 test mode. Fix it by unlock
spinlock before calling xhci_disable_slot.

Cc: <stable@vger.kernel.org>
Fixes: 0f1d832ed1fb ("usb: xhci: Add port test modes support for usb2")
Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 9ef5f68e32131..00721e8807ab4 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -610,12 +610,14 @@ static int xhci_enter_test_mode(struct xhci_hcd *xhci,
 
 	/* Disable all Device Slots */
 	xhci_dbg(xhci, "Disable all slots\n");
+	spin_unlock_irqrestore(&xhci->lock, *flags);
 	for (i = 1; i <= HCS_MAX_SLOTS(xhci->hcs_params1); i++) {
 		retval = xhci_disable_slot(xhci, NULL, i);
 		if (retval)
 			xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n",
 				 i, retval);
 	}
+	spin_lock_irqsave(&xhci->lock, *flags);
 	/* Put all ports to the Disable state by clear PP */
 	xhci_dbg(xhci, "Disable all port (PP = 0)\n");
 	/* Power off USB3 ports*/
-- 
GitLab


From d6f5f071f1e13cadecf8aef1faa7e5d6fbc9f33b Mon Sep 17 00:00:00 2001
From: Shu Wang <shuwang@redhat.com>
Date: Thu, 20 Jul 2017 14:48:31 +0300
Subject: [PATCH 1873/1958] xhci: fix memleak in xhci_run()

Found this issue by kmemleak.
xhci_run() did not check return val and free command for
xhci_queue_vendor_command()

unreferenced object 0xffff88011c0be500 (size 64):
  comm "kworker/0:1", pid 58, jiffies 4294670908 (age 50.420s)
  hex dump (first 32 bytes):
  backtrace:
    [<ffffffff8176166a>] kmemleak_alloc+0x4a/0xa0
    [<ffffffff8121801a>] kmem_cache_alloc_trace+0xca/0x1d0
    [<ffffffff81576bf4>] xhci_alloc_command+0x44/0x130
    [<ffffffff8156f1cc>] xhci_run+0x4cc/0x630
    [<ffffffff8153b84b>] usb_add_hcd+0x3bb/0x950
    [<ffffffff8154eac8>] usb_hcd_pci_probe+0x188/0x500
    [<ffffffff815851ac>] xhci_pci_probe+0x2c/0x220
    [<ffffffff813d2ca5>] local_pci_probe+0x45/0xa0
    [<ffffffff810a54e4>] work_for_cpu_fn+0x14/0x20
    [<ffffffff810a8409>] process_one_work+0x149/0x360
    [<ffffffff810a8d08>] worker_thread+0x1d8/0x3c0
    [<ffffffff810ae7d9>] kthread+0x109/0x140
    [<ffffffff8176d585>] ret_from_fork+0x25/0x30
    [<ffffffffffffffff>] 0xffffffffffffffff

Cc: <stable@vger.kernel.org>
Signed-off-by: Shu Wang <shuwang@redhat.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 51326425f9cc7..b2ff1ff1a02fa 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -625,8 +625,10 @@ int xhci_run(struct usb_hcd *hcd)
 		if (!command)
 			return -ENOMEM;
 
-		xhci_queue_vendor_command(xhci, command, 0, 0, 0,
+		ret = xhci_queue_vendor_command(xhci, command, 0, 0, 0,
 				TRB_TYPE(TRB_NEC_GET_FW));
+		if (ret)
+			xhci_free_command(xhci, command);
 	}
 	xhci_dbg_trace(xhci, trace_xhci_dbg_init,
 			"Finished xhci_run for USB2 roothub");
-- 
GitLab


From db9108e054700c96322b0f0028546aa4e643cf0b Mon Sep 17 00:00:00 2001
From: Chunyu Hu <chuhu@redhat.com>
Date: Thu, 20 Jul 2017 18:36:09 +0800
Subject: [PATCH 1874/1958] tracing: Fix kmemleak in instance_rmdir

Hit the kmemleak when executing instance_rmdir, it forgot releasing
mem of tracing_cpumask. With this fix, the warn does not appear any
more.

unreferenced object 0xffff93a8dfaa7c18 (size 8):
  comm "mkdir", pid 1436, jiffies 4294763622 (age 9134.308s)
  hex dump (first 8 bytes):
    ff ff ff ff ff ff ff ff                          ........
  backtrace:
    [<ffffffff88b6567a>] kmemleak_alloc+0x4a/0xa0
    [<ffffffff8861ea41>] __kmalloc_node+0xf1/0x280
    [<ffffffff88b505d3>] alloc_cpumask_var_node+0x23/0x30
    [<ffffffff88b5060e>] alloc_cpumask_var+0xe/0x10
    [<ffffffff88571ab0>] instance_mkdir+0x90/0x240
    [<ffffffff886e5100>] tracefs_syscall_mkdir+0x40/0x70
    [<ffffffff886565c9>] vfs_mkdir+0x109/0x1b0
    [<ffffffff8865b1d0>] SyS_mkdir+0xd0/0x100
    [<ffffffff88403857>] do_syscall_64+0x67/0x150
    [<ffffffff88b710e7>] return_from_SYSCALL_64+0x0/0x6a
    [<ffffffffffffffff>] 0xffffffffffffffff

Link: http://lkml.kernel.org/r/1500546969-12594-1-git-send-email-chuhu@redhat.com

Cc: stable@vger.kernel.org
Fixes: ccfe9e42e451 ("tracing: Make tracing_cpumask available for all instances")
Signed-off-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2d0ffcc49dba0..42b9355033d45 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7774,6 +7774,7 @@ static int instance_rmdir(const char *name)
 	}
 	kfree(tr->topts);
 
+	free_cpumask_var(tr->tracing_cpumask);
 	kfree(tr->name);
 	kfree(tr);
 
-- 
GitLab


From f86f418059b94aa01f9342611a272ca60c583e89 Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <zhang.chunyan@linaro.org>
Date: Wed, 7 Jun 2017 16:12:51 +0800
Subject: [PATCH 1875/1958] trace: fix the errors caused by incompatible type
 of RCU variables

The variables which are processed by RCU functions should be annotated
as RCU, otherwise sparse will report the errors like below:

"error: incompatible types in comparison expression (different
address spaces)"

Link: http://lkml.kernel.org/r/1496823171-7758-1-git-send-email-zhang.chunyan@linaro.org

Signed-off-by: Chunyan Zhang <zhang.chunyan@linaro.org>
[ Updated to not be 100% 80 column strict ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h       |  6 +++---
 include/linux/trace_events.h |  2 +-
 kernel/trace/ftrace.c        | 41 ++++++++++++++++++++++++------------
 kernel/trace/trace.h         |  6 +++---
 4 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5857390ac35aa..6383115e9d2c5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -145,8 +145,8 @@ enum {
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
-	struct ftrace_hash		*notrace_hash;
-	struct ftrace_hash		*filter_hash;
+	struct ftrace_hash __rcu	*notrace_hash;
+	struct ftrace_hash __rcu	*filter_hash;
 	struct mutex			regex_lock;
 };
 
@@ -168,7 +168,7 @@ static inline void ftrace_free_init_mem(void) { }
  */
 struct ftrace_ops {
 	ftrace_func_t			func;
-	struct ftrace_ops		*next;
+	struct ftrace_ops __rcu		*next;
 	unsigned long			flags;
 	void				*private;
 	ftrace_func_t			saved_func;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index f73cedfa2e0b9..536c80ff7ad96 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -338,7 +338,7 @@ enum {
 struct trace_event_file {
 	struct list_head		list;
 	struct trace_event_call		*event_call;
-	struct event_filter		*filter;
+	struct event_filter __rcu	*filter;
 	struct dentry			*dir;
 	struct trace_array		*tr;
 	struct trace_subsystem_dir	*system;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 53f6b6401cf05..02004ae918608 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -113,7 +113,7 @@ static int ftrace_disabled __read_mostly;
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
 
@@ -169,8 +169,11 @@ int ftrace_nr_registered_ops(void)
 
 	mutex_lock(&ftrace_lock);
 
-	for (ops = ftrace_ops_list;
-	     ops != &ftrace_list_end; ops = ops->next)
+	for (ops = rcu_dereference_protected(ftrace_ops_list,
+					     lockdep_is_held(&ftrace_lock));
+	     ops != &ftrace_list_end;
+	     ops = rcu_dereference_protected(ops->next,
+					     lockdep_is_held(&ftrace_lock)))
 		cnt++;
 
 	mutex_unlock(&ftrace_lock);
@@ -275,10 +278,11 @@ static void update_ftrace_function(void)
 	 * If there's only one ftrace_ops registered, the ftrace_ops_list
 	 * will point to the ops we want.
 	 */
-	set_function_trace_op = ftrace_ops_list;
+	set_function_trace_op = rcu_dereference_protected(ftrace_ops_list,
+						lockdep_is_held(&ftrace_lock));
 
 	/* If there's no ftrace_ops registered, just call the stub function */
-	if (ftrace_ops_list == &ftrace_list_end) {
+	if (set_function_trace_op == &ftrace_list_end) {
 		func = ftrace_stub;
 
 	/*
@@ -286,7 +290,8 @@ static void update_ftrace_function(void)
 	 * recursion safe and not dynamic and the arch supports passing ops,
 	 * then have the mcount trampoline call the function directly.
 	 */
-	} else if (ftrace_ops_list->next == &ftrace_list_end) {
+	} else if (rcu_dereference_protected(ftrace_ops_list->next,
+			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
 		func = ftrace_ops_get_list_func(ftrace_ops_list);
 
 	} else {
@@ -348,9 +353,11 @@ int using_ftrace_ops_list_func(void)
 	return ftrace_trace_function == ftrace_ops_list_func;
 }
 
-static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
+static void add_ftrace_ops(struct ftrace_ops __rcu **list,
+			   struct ftrace_ops *ops)
 {
-	ops->next = *list;
+	rcu_assign_pointer(ops->next, *list);
+
 	/*
 	 * We are entering ops into the list but another
 	 * CPU might be walking that list. We need to make sure
@@ -360,7 +367,8 @@ static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 	rcu_assign_pointer(*list, ops);
 }
 
-static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
+static int remove_ftrace_ops(struct ftrace_ops __rcu **list,
+			     struct ftrace_ops *ops)
 {
 	struct ftrace_ops **p;
 
@@ -368,7 +376,10 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 	 * If we are removing the last function, then simply point
 	 * to the ftrace_stub.
 	 */
-	if (*list == ops && ops->next == &ftrace_list_end) {
+	if (rcu_dereference_protected(*list,
+			lockdep_is_held(&ftrace_lock)) == ops &&
+	    rcu_dereference_protected(ops->next,
+			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
 		*list = &ftrace_list_end;
 		return 0;
 	}
@@ -1569,8 +1580,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 		return 0;
 #endif
 
-	hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
-	hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
+	rcu_assign_pointer(hash.filter_hash, ops->func_hash->filter_hash);
+	rcu_assign_pointer(hash.notrace_hash, ops->func_hash->notrace_hash);
 
 	if (hash_contains_ip(ip, &hash))
 		ret = 1;
@@ -2840,7 +2851,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 * If there's no more ops registered with ftrace, run a
 	 * sanity check to make sure all rec flags are cleared.
 	 */
-	if (ftrace_ops_list == &ftrace_list_end) {
+	if (rcu_dereference_protected(ftrace_ops_list,
+			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
 		struct ftrace_page *pg;
 		struct dyn_ftrace *rec;
 
@@ -6453,7 +6465,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 	if (ftrace_enabled) {
 
 		/* we are starting ftrace again */
-		if (ftrace_ops_list != &ftrace_list_end)
+		if (rcu_dereference_protected(ftrace_ops_list,
+			lockdep_is_held(&ftrace_lock)) != &ftrace_list_end)
 			update_ftrace_function();
 
 		ftrace_startup_sysctl();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6ade1c55cc3ac..490ba229931d7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1210,9 +1210,9 @@ struct ftrace_event_field {
 struct event_filter {
 	int			n_preds;	/* Number assigned */
 	int			a_preds;	/* allocated */
-	struct filter_pred	*preds;
-	struct filter_pred	*root;
-	char			*filter_string;
+	struct filter_pred __rcu	*preds;
+	struct filter_pred __rcu	*root;
+	char				*filter_string;
 };
 
 struct event_subsystem {
-- 
GitLab


From dc1a0afbacaeaced8f5679a99047c0467f1099e9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 14 Jul 2017 11:12:09 +0200
Subject: [PATCH 1876/1958] nvme: fix byte swapping in the streams code

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c |  2 +-
 include/linux/nvme.h     | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cb96f4a7ae3a9..3b77cfe5aa1eb 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -336,7 +336,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
 
 	c.directive.opcode = nvme_admin_directive_recv;
 	c.directive.nsid = cpu_to_le32(nsid);
-	c.directive.numd = sizeof(*s);
+	c.directive.numd = cpu_to_le32(sizeof(*s));
 	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
 	c.directive.dtype = NVME_DIR_STREAMS;
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6b8ee9e628e18..bc74da018bdcd 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -963,14 +963,14 @@ struct nvme_dbbuf {
 };
 
 struct streams_directive_params {
-	__u16	msl;
-	__u16	nssa;
-	__u16	nsso;
+	__le16	msl;
+	__le16	nssa;
+	__le16	nsso;
 	__u8	rsvd[10];
-	__u32	sws;
-	__u16	sgs;
-	__u16	nsa;
-	__u16	nso;
+	__le32	sws;
+	__le16	sgs;
+	__le16	nsa;
+	__le16	nso;
 	__u8	rsvd2[6];
 };
 
-- 
GitLab


From 7722ecdc54a4019eaeeebfdac53915bf0c68a7ff Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 14 Jul 2017 11:14:46 +0200
Subject: [PATCH 1877/1958] nvmet-fc: fix byte swapping in
 nvmet_fc_ls_create_association

We always need to do non-equal comparisms on the native endian versions
to get the correct result.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 1e6dcc241b3cf..d5801c150b1cb 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1174,14 +1174,14 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 	 */
 	if (iod->rqstdatalen < FCNVME_LSDESC_CRA_RQST_MINLEN)
 		ret = VERR_CR_ASSOC_LEN;
-	else if (rqst->desc_list_len <
-			cpu_to_be32(FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN))
+	else if (be32_to_cpu(rqst->desc_list_len) <
+			FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN)
 		ret = VERR_CR_ASSOC_RQST_LEN;
 	else if (rqst->assoc_cmd.desc_tag !=
 			cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD))
 		ret = VERR_CR_ASSOC_CMD;
-	else if (rqst->assoc_cmd.desc_len <
-			cpu_to_be32(FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN))
+	else if (be32_to_cpu(rqst->assoc_cmd.desc_len) <
+			FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN)
 		ret = VERR_CR_ASSOC_CMD_LEN;
 	else if (!rqst->assoc_cmd.ersp_ratio ||
 		 (be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >=
-- 
GitLab


From f99cb7af40f99703bacf1640dc8a4b09062c1f0f Mon Sep 17 00:00:00 2001
From: David Wayne Fugate <david.fugate@intel.com>
Date: Mon, 10 Jul 2017 12:39:59 -0600
Subject: [PATCH 1878/1958] nvme-pci: add another device ID with stripe quirk

Adds a fourth Intel controller which has the "stripe" quirk.

Signed-off-by: David Wayne Fugate <david.fugate@intel.com>
Acked-by: Keith Busch <keith.busch@intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d10d2f279d19a..454f97bbbb2cb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2466,6 +2466,9 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x0a54),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
 				NVME_QUIRK_DEALLOCATE_ZEROES, },
+	{ PCI_VDEVICE(INTEL, 0x0a55),
+		.driver_data = NVME_QUIRK_STRIPE_SIZE |
+				NVME_QUIRK_DEALLOCATE_ZEROES, },
 	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
 		.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
-- 
GitLab


From 86eea2895d11dde9bf43fa2046331e84154e00f4 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 12 Jul 2017 15:59:07 -0400
Subject: [PATCH 1879/1958] nvme-pci: Remove nvme_setup_prps BUG_ON

This patch replaces the invalid nvme SGL kernel panic with a warning,
and returns an appropriate error. The warning will occur only on the
first occurance, and sgl details will be printed to help debug how the
request was allowed to form.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 454f97bbbb2cb..1e5c9f2ddba67 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -539,7 +539,7 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
 }
 #endif
 
-static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
+static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct dma_pool *pool;
@@ -556,7 +556,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 
 	length -= (page_size - offset);
 	if (length <= 0)
-		return true;
+		return BLK_STS_OK;
 
 	dma_len -= (page_size - offset);
 	if (dma_len) {
@@ -569,7 +569,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 
 	if (length <= page_size) {
 		iod->first_dma = dma_addr;
-		return true;
+		return BLK_STS_OK;
 	}
 
 	nprps = DIV_ROUND_UP(length, page_size);
@@ -585,7 +585,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 	if (!prp_list) {
 		iod->first_dma = dma_addr;
 		iod->npages = -1;
-		return false;
+		return BLK_STS_RESOURCE;
 	}
 	list[0] = prp_list;
 	iod->first_dma = prp_dma;
@@ -595,7 +595,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 			__le64 *old_prp_list = prp_list;
 			prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
 			if (!prp_list)
-				return false;
+				return BLK_STS_RESOURCE;
 			list[iod->npages++] = prp_list;
 			prp_list[0] = old_prp_list[i - 1];
 			old_prp_list[i - 1] = cpu_to_le64(prp_dma);
@@ -609,13 +609,29 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 			break;
 		if (dma_len > 0)
 			continue;
-		BUG_ON(dma_len < 0);
+		if (unlikely(dma_len < 0))
+			goto bad_sgl;
 		sg = sg_next(sg);
 		dma_addr = sg_dma_address(sg);
 		dma_len = sg_dma_len(sg);
 	}
 
-	return true;
+	return BLK_STS_OK;
+
+ bad_sgl:
+	if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
+				blk_rq_payload_bytes(req), iod->nents)) {
+		for_each_sg(iod->sg, sg, iod->nents, i) {
+			dma_addr_t phys = sg_phys(sg);
+			pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
+			       "dma_address:%pad dma_length:%d\n", i, &phys,
+					sg->offset, sg->length,
+					&sg_dma_address(sg),
+					sg_dma_len(sg));
+		}
+	}
+	return BLK_STS_IOERR;
+
 }
 
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
@@ -637,7 +653,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 				DMA_ATTR_NO_WARN))
 		goto out;
 
-	if (!nvme_setup_prps(dev, req))
+	ret = nvme_setup_prps(dev, req);
+	if (ret != BLK_STS_OK)
 		goto out_unmap;
 
 	ret = BLK_STS_IOERR;
-- 
GitLab


From b00c9b7aa06786fc5469783965ff3e2a705a1dec Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 16 Jul 2017 10:39:03 +0200
Subject: [PATCH 1880/1958] nvme-pci: Fix an error handling path in
 'nvme_probe()'

Release resources in the correct order in order not to miss a
'put_device()' if 'nvme_dev_map()' fails.

Fixes: b00a726a9fd8 ("NVMe: Don't unmap controller registers on reset")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 1e5c9f2ddba67..8569ee7712693 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2299,7 +2299,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	result = nvme_dev_map(dev);
 	if (result)
-		goto free;
+		goto put_pci;
 
 	INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
 	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
@@ -2308,7 +2308,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	result = nvme_setup_prp_pools(dev);
 	if (result)
-		goto put_pci;
+		goto unmap;
 
 	quirks |= check_dell_samsung_bug(pdev);
 
@@ -2325,9 +2325,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
  release_pools:
 	nvme_release_prp_pools(dev);
+ unmap:
+	nvme_dev_unmap(dev);
  put_pci:
 	put_device(dev->dev);
-	nvme_dev_unmap(dev);
  free:
 	kfree(dev->queues);
 	kfree(dev);
-- 
GitLab


From 41528f8036f835e44884f4b0ce0d18191aa0a98f Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 14 Jul 2017 15:36:54 +0200
Subject: [PATCH 1881/1958] nvmet: prefix version configfs file with attr

The NVMe target's attribute files need an attr prefix in order to have
nvmetcli recognize them. Add this attribute.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/configfs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index a358ecd93e110..ceee57bb0c247 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -650,7 +650,7 @@ static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
 
-static ssize_t nvmet_subsys_version_show(struct config_item *item,
+static ssize_t nvmet_subsys_attr_version_show(struct config_item *item,
 					      char *page)
 {
 	struct nvmet_subsys *subsys = to_subsys(item);
@@ -666,7 +666,7 @@ static ssize_t nvmet_subsys_version_show(struct config_item *item,
 				(int)NVME_MINOR(subsys->ver));
 }
 
-static ssize_t nvmet_subsys_version_store(struct config_item *item,
+static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
 					       const char *page, size_t count)
 {
 	struct nvmet_subsys *subsys = to_subsys(item);
@@ -684,11 +684,11 @@ static ssize_t nvmet_subsys_version_store(struct config_item *item,
 
 	return count;
 }
-CONFIGFS_ATTR(nvmet_subsys_, version);
+CONFIGFS_ATTR(nvmet_subsys_, attr_version);
 
 static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_allow_any_host,
-	&nvmet_subsys_attr_version,
+	&nvmet_subsys_attr_attr_version,
 	NULL,
 };
 
-- 
GitLab


From 2e7f5d2af2155084c6f7c86328d36e698cd84954 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 14 Jul 2017 15:36:55 +0200
Subject: [PATCH 1882/1958] nvmet: Move serial number from controller to
 subsystem

The NVMe specification defines the serial number as:

"Serial Number (SN): Contains the serial number for the NVM subsystem
that is assigned by the vendor as an ASCII string. Refer to section
7.10 for unique identifier requirements. Refer to section 1.5 for ASCII
string requirements"

So move it from the controller to the subsystem, where it belongs.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/admin-cmd.c | 2 +-
 drivers/nvme/target/core.c      | 5 ++---
 drivers/nvme/target/nvmet.h     | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 35f930db3c02c..f7ba006d6a656 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -185,7 +185,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->ssvid = 0;
 
 	memset(id->sn, ' ', sizeof(id->sn));
-	snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
+	snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->subsys->serial);
 
 	memset(id->mn, ' ', sizeof(id->mn));
 	strncpy((char *)id->mn, "Linux", sizeof(id->mn));
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b5b4ac1037484..f4b02bb4a1a89 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -767,9 +767,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 
-	/* generate a random serial number as our controllers are ephemeral: */
-	get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
-
 	kref_init(&ctrl->ref);
 	ctrl->subsys = subsys;
 
@@ -928,6 +925,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 		return NULL;
 
 	subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
+	/* generate a random serial number as our controllers are ephemeral: */
+	get_random_bytes(&subsys->serial, sizeof(subsys->serial));
 
 	switch (type) {
 	case NVME_NQN_NVME:
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 747bbdb4f9c61..e3b244c7e443e 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -112,7 +112,6 @@ struct nvmet_ctrl {
 
 	struct mutex		lock;
 	u64			cap;
-	u64			serial;
 	u32			cc;
 	u32			csts;
 
@@ -152,6 +151,7 @@ struct nvmet_subsys {
 	u16			max_qid;
 
 	u64			ver;
+	u64			serial;
 	char			*subsysnqn;
 
 	struct config_group	group;
-- 
GitLab


From fcbc545959676282e7f46be5c8d8aea26a89ea47 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 14 Jul 2017 15:36:56 +0200
Subject: [PATCH 1883/1958] nvmet: preserve controller serial number between
 reboots

The NVMe target has no way to preserve controller serial
IDs across reboots which breaks udev scripts doing
SYMLINK+="dev/disk/by-id/nvme-$env{ID_SERIAL}-part%n.

Export the randomly generated serial number via configfs and allow
setting of a serial via configfs to mitigate this breakage.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/configfs.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index ceee57bb0c247..0a0067e771f59 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -686,9 +686,31 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
 }
 CONFIGFS_ATTR(nvmet_subsys_, attr_version);
 
+static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item,
+					     char *page)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+
+	return snprintf(page, PAGE_SIZE, "%llx\n", subsys->serial);
+}
+
+static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
+					      const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+
+	down_write(&nvmet_config_sem);
+	sscanf(page, "%llx\n", &subsys->serial);
+	up_write(&nvmet_config_sem);
+
+	return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_serial);
+
 static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_allow_any_host,
 	&nvmet_subsys_attr_attr_version,
+	&nvmet_subsys_attr_attr_serial,
 	NULL,
 };
 
-- 
GitLab


From 42de82a8b544fa55670feef7d6f85085fba48fc0 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Fri, 14 Jul 2017 00:25:31 +0200
Subject: [PATCH 1884/1958] nvmet: don't report 0-bytes in serial number

The NVME standard mandates that the SN, MN, and FR fields of the Identify
Controller Data Structure be "ASCII strings".  That means that they may
not contain 0-bytes, not even string terminators.

Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
[hch: fixed for the move of the serial field, updated description]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/admin-cmd.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index f7ba006d6a656..2d7a98ab53fbf 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -168,11 +168,21 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
 	nvmet_req_complete(req, status);
 }
 
+static void copy_and_pad(char *dst, int dst_len, const char *src, int src_len)
+{
+	int len = min(src_len, dst_len);
+
+	memcpy(dst, src, len);
+	if (dst_len > len)
+		memset(dst + len, ' ', dst_len - len);
+}
+
 static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvme_id_ctrl *id;
 	u16 status = 0;
+	const char model[] = "Linux";
 
 	id = kzalloc(sizeof(*id), GFP_KERNEL);
 	if (!id) {
@@ -184,8 +194,10 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->vid = 0;
 	id->ssvid = 0;
 
-	memset(id->sn, ' ', sizeof(id->sn));
-	snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->subsys->serial);
+	bin2hex(id->sn, &ctrl->subsys->serial,
+		min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
+	copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1);
+	copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE));
 
 	memset(id->mn, ' ', sizeof(id->mn));
 	strncpy((char *)id->mn, "Linux", sizeof(id->mn));
-- 
GitLab


From f244deed7a254b4a61333e6886575ce0102e8356 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 20 Jul 2017 01:11:54 -0700
Subject: [PATCH 1885/1958] KVM: VMX: Fix invalid guest state detection after
 task-switch emulation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can be reproduced by EPT=1, unrestricted_guest=N, emulate_invalid_state=Y
or EPT=0, the trace of kvm-unit-tests/taskswitch2.flat is like below, it tries
to emulate invalid guest state task-switch:

kvm_exit: reason TASK_SWITCH rip 0x0 info 40000058 0
kvm_emulate_insn: 42000:0:0f 0b (0x2)
kvm_emulate_insn: 42000:0:0f 0b (0x2) failed
kvm_inj_exception: #UD (0x0)
kvm_entry: vcpu 0
kvm_exit: reason TASK_SWITCH rip 0x0 info 40000058 0
kvm_emulate_insn: 42000:0:0f 0b (0x2)
kvm_emulate_insn: 42000:0:0f 0b (0x2) failed
kvm_inj_exception: #UD (0x0)
......................

It appears that the task-switch emulation updates rflags (and vm86
flag) only after the segments are loaded, causing vmx->emulation_required
to be set, when in fact invalid guest state emulation is not needed.

This patch fixes it by updating vmx->emulation_required after the
rflags (and vm86 flag) is updated in task-switch emulation.

Thanks Radim for moving the update to vmx__set_flags and adding Paolo's
suggestion for the check.

Suggested-by: Nadav Amit <nadav.amit@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 791c018a00343..29fd8af5c347a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2327,6 +2327,11 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 	__vmx_load_host_state(to_vmx(vcpu));
 }
 
+static bool emulation_required(struct kvm_vcpu *vcpu)
+{
+	return emulate_invalid_guest_state && !guest_state_valid(vcpu);
+}
+
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
 
 /*
@@ -2364,6 +2369,8 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
+	unsigned long old_rflags = vmx_get_rflags(vcpu);
+
 	__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
 	to_vmx(vcpu)->rflags = rflags;
 	if (to_vmx(vcpu)->rmode.vm86_active) {
@@ -2371,6 +2378,9 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 	}
 	vmcs_writel(GUEST_RFLAGS, rflags);
+
+	if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
+		to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
 }
 
 static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
@@ -3858,11 +3868,6 @@ static __init int alloc_kvm_area(void)
 	return 0;
 }
 
-static bool emulation_required(struct kvm_vcpu *vcpu)
-{
-	return emulate_invalid_guest_state && !guest_state_valid(vcpu);
-}
-
 static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
 		struct kvm_segment *save)
 {
-- 
GitLab


From f1ff89ec4447c4e39d275a1ca3de43eed2a92745 Mon Sep 17 00:00:00 2001
From: Roman Kagan <rkagan@virtuozzo.com>
Date: Thu, 20 Jul 2017 17:26:40 +0300
Subject: [PATCH 1886/1958] kvm: x86: hyperv: avoid livelock in oneshot SynIC
 timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the SynIC timer message delivery fails due to SINT message slot being
busy, there's no point to attempt starting the timer again until we're
notified of the slot being released by the guest (via EOM or EOI).

Even worse, when a oneshot timer fails to deliver its message, its
re-arming with an expiration time in the past leads to immediate retry
of the delivery, and so on, without ever letting the guest vcpu to run
and release the slot, which results in a livelock.

To avoid that, only start the timer when there's no timer message
pending delivery.  When there is, meaning the slot is busy, the
processing will be restarted upon notification from the guest that the
slot is released.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/hyperv.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 2695a34fa1c51..337b6d2730fa1 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -649,9 +649,10 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
 				}
 
 				if ((stimer->config & HV_STIMER_ENABLE) &&
-				    stimer->count)
-					stimer_start(stimer);
-				else
+				    stimer->count) {
+					if (!stimer->msg_pending)
+						stimer_start(stimer);
+				} else
 					stimer_cleanup(stimer);
 			}
 		}
-- 
GitLab


From be8822db62ddda6d316d2dd682679732ed2f0abf Mon Sep 17 00:00:00 2001
From: Mustafa Ismail <mustafa.ismail@intel.com>
Date: Fri, 23 Jun 2017 16:03:55 -0500
Subject: [PATCH 1887/1958] i40iw: Fix order of cleanup in close

The order for calling i40iw_destroy_pble_pool is incorrect.
Also, add PBLE_CHUNK_MEM init state to track pble pool
creation and destruction.

Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index e0f47cc2effc3..8fc61b3bd2237 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1474,6 +1474,9 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
 			unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
 		}
 		/* fallthrough */
+	case PBLE_CHUNK_MEM:
+		i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
+		/* fallthrough */
 	case CEQ_CREATED:
 		i40iw_dele_ceqs(iwdev, reset);
 		/* fallthrough */
@@ -1489,9 +1492,6 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
 	case CCQ_CREATED:
 		i40iw_destroy_ccq(iwdev, reset);
 		/* fallthrough */
-	case PBLE_CHUNK_MEM:
-		i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
-		/* fallthrough */
 	case HMC_OBJS_CREATED:
 		i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
 		/* fallthrough */
@@ -1670,6 +1670,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
 		status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
 		if (status)
 			break;
+		iwdev->init_state = PBLE_CHUNK_MEM;
 		iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM);
 		i40iw_register_notifiers();
 		iwdev->init_state = INET_NOTIFIER;
-- 
GitLab


From 415920aa174666c0ac8c47eee974acc9f49efec4 Mon Sep 17 00:00:00 2001
From: Mustafa Ismail <mustafa.ismail@intel.com>
Date: Fri, 23 Jun 2017 16:03:56 -0500
Subject: [PATCH 1888/1958] i40iw: Do not poll CCQ after it is destroyed

Control Queue Pair (CQP) OPs, in this case - Update SDs,
cannot poll the Control Completion Queue (CCQ) after CCQ is
destroyed. Instead, poll via registers.

Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index a027e2072477a..9ec1ae9a82c98 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -1970,6 +1970,8 @@ static enum i40iw_status_code i40iw_sc_ccq_destroy(struct i40iw_sc_cq *ccq,
 		ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
 	}
 
+	cqp->process_cqp_sds = i40iw_update_sds_noccq;
+
 	return ret_code;
 }
 
-- 
GitLab


From 6c1d94de4e75160d3ea5af3bf51d290341db1d44 Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 23 Jun 2017 16:03:57 -0500
Subject: [PATCH 1889/1958] i40iw: Utilize iwdev->reset during PCI function
 reset

Utilize iwdev->reset on a PCI function reset notification
instead of passing in reset flag for resource clean-up.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_main.c | 51 +++++++++++-------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 8fc61b3bd2237..3bad7d967abe3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -274,13 +274,12 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
 /**
  * i40iw_destroy_aeq - destroy aeq
  * @iwdev: iwarp device
- * @reset: true if called before reset
  *
  * Issue a destroy aeq request and
  * free the resources associated with the aeq
  * The function is called during driver unload
  */
-static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
+static void i40iw_destroy_aeq(struct i40iw_device *iwdev)
 {
 	enum i40iw_status_code status = I40IW_ERR_NOT_READY;
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
@@ -288,7 +287,7 @@ static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
 
 	if (!iwdev->msix_shared)
 		i40iw_disable_irq(dev, iwdev->iw_msixtbl, (void *)iwdev);
-	if (reset)
+	if (iwdev->reset)
 		goto exit;
 
 	if (!dev->aeq_ops->aeq_destroy(&aeq->sc_aeq, 0, 1))
@@ -304,19 +303,17 @@ static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
  * i40iw_destroy_ceq - destroy ceq
  * @iwdev: iwarp device
  * @iwceq: ceq to be destroyed
- * @reset: true if called before reset
  *
  * Issue a destroy ceq request and
  * free the resources associated with the ceq
  */
 static void i40iw_destroy_ceq(struct i40iw_device *iwdev,
-			      struct i40iw_ceq *iwceq,
-			      bool reset)
+			      struct i40iw_ceq *iwceq)
 {
 	enum i40iw_status_code status;
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 
-	if (reset)
+	if (iwdev->reset)
 		goto exit;
 
 	status = dev->ceq_ops->ceq_destroy(&iwceq->sc_ceq, 0, 1);
@@ -335,12 +332,11 @@ static void i40iw_destroy_ceq(struct i40iw_device *iwdev,
 /**
  * i40iw_dele_ceqs - destroy all ceq's
  * @iwdev: iwarp device
- * @reset: true if called before reset
  *
  * Go through all of the device ceq's and for each ceq
  * disable the ceq interrupt and destroy the ceq
  */
-static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset)
+static void i40iw_dele_ceqs(struct i40iw_device *iwdev)
 {
 	u32 i = 0;
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
@@ -349,32 +345,31 @@ static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset)
 
 	if (iwdev->msix_shared) {
 		i40iw_disable_irq(dev, msix_vec, (void *)iwdev);
-		i40iw_destroy_ceq(iwdev, iwceq, reset);
+		i40iw_destroy_ceq(iwdev, iwceq);
 		iwceq++;
 		i++;
 	}
 
 	for (msix_vec++; i < iwdev->ceqs_count; i++, msix_vec++, iwceq++) {
 		i40iw_disable_irq(dev, msix_vec, (void *)iwceq);
-		i40iw_destroy_ceq(iwdev, iwceq, reset);
+		i40iw_destroy_ceq(iwdev, iwceq);
 	}
 }
 
 /**
  * i40iw_destroy_ccq - destroy control cq
  * @iwdev: iwarp device
- * @reset: true if called before reset
  *
  * Issue destroy ccq request and
  * free the resources associated with the ccq
  */
-static void i40iw_destroy_ccq(struct i40iw_device *iwdev, bool reset)
+static void i40iw_destroy_ccq(struct i40iw_device *iwdev)
 {
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 	struct i40iw_ccq *ccq = &iwdev->ccq;
 	enum i40iw_status_code status = 0;
 
-	if (!reset)
+	if (!iwdev->reset)
 		status = dev->ccq_ops->ccq_destroy(dev->ccq, 0, true);
 	if (status)
 		i40iw_pr_err("ccq destroy failed %d\n", status);
@@ -810,7 +805,7 @@ static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev,
 		iwceq->msix_idx = msix_vec->idx;
 		status = i40iw_configure_ceq_vector(iwdev, iwceq, ceq_id, msix_vec);
 		if (status) {
-			i40iw_destroy_ceq(iwdev, iwceq, false);
+			i40iw_destroy_ceq(iwdev, iwceq);
 			break;
 		}
 		i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx);
@@ -912,7 +907,7 @@ static enum i40iw_status_code i40iw_setup_aeq(struct i40iw_device *iwdev)
 
 	status = i40iw_configure_aeq_vector(iwdev);
 	if (status) {
-		i40iw_destroy_aeq(iwdev, false);
+		i40iw_destroy_aeq(iwdev);
 		return status;
 	}
 
@@ -1442,12 +1437,11 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
 /**
  * i40iw_deinit_device - clean up the device resources
  * @iwdev: iwarp device
- * @reset: true if called before reset
  *
  * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
  * destroy the device queues and free the pble and the hmc objects
  */
-static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
+static void i40iw_deinit_device(struct i40iw_device *iwdev)
 {
 	struct i40e_info *ldev = iwdev->ldev;
 
@@ -1464,7 +1458,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
 		i40iw_destroy_rdma_device(iwdev->iwibdev);
 		/* fallthrough */
 	case IP_ADDR_REGISTERED:
-		if (!reset)
+		if (!iwdev->reset)
 			i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
 		/* fallthrough */
 	case INET_NOTIFIER:
@@ -1478,22 +1472,22 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
 		i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
 		/* fallthrough */
 	case CEQ_CREATED:
-		i40iw_dele_ceqs(iwdev, reset);
+		i40iw_dele_ceqs(iwdev);
 		/* fallthrough */
 	case AEQ_CREATED:
-		i40iw_destroy_aeq(iwdev, reset);
+		i40iw_destroy_aeq(iwdev);
 		/* fallthrough */
 	case IEQ_CREATED:
-		i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
+		i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, iwdev->reset);
 		/* fallthrough */
 	case ILQ_CREATED:
-		i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
+		i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, iwdev->reset);
 		/* fallthrough */
 	case CCQ_CREATED:
-		i40iw_destroy_ccq(iwdev, reset);
+		i40iw_destroy_ccq(iwdev);
 		/* fallthrough */
 	case HMC_OBJS_CREATED:
-		i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
+		i40iw_del_hmc_objects(dev, dev->hmc_info, true, iwdev->reset);
 		/* fallthrough */
 	case CQP_CREATED:
 		i40iw_destroy_cqp(iwdev, true);
@@ -1694,7 +1688,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
 	} while (0);
 
 	i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
-	i40iw_deinit_device(iwdev, false);
+	i40iw_deinit_device(iwdev);
 	return -ERESTART;
 }
 
@@ -1775,9 +1769,12 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool
 	iwdev = &hdl->device;
 	iwdev->closing = true;
 
+	if (reset)
+		iwdev->reset = true;
+
 	i40iw_cm_disconnect_all(iwdev);
 	destroy_workqueue(iwdev->virtchnl_wq);
-	i40iw_deinit_device(iwdev, reset);
+	i40iw_deinit_device(iwdev);
 }
 
 /**
-- 
GitLab


From 6327cb09dfda103f7255ef218ac18697b293554a Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 23 Jun 2017 16:03:58 -0500
Subject: [PATCH 1890/1958] i40iw: Release cm_id ref on PCI function reset

On PCI function reset, cm_id reference is not released
which causes an application hang, as it waits on the
cm_id to be released on rdma_destroy.

To fix this, call i40iw_cm_disconn during a PCI function
reset to clean-up resources and release cm_id reference.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_cm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 6ae98aa7f74eb..5a2fa743676ca 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -3487,7 +3487,8 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
 	if (((original_hw_tcp_state == I40IW_TCP_STATE_CLOSED) ||
 	     (original_hw_tcp_state == I40IW_TCP_STATE_TIME_WAIT) ||
 	     (last_ae == I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE) ||
-	     (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
+	     (last_ae == I40IW_AE_LLP_CONNECTION_RESET) ||
+	      iwdev->reset)) {
 		issue_close = 1;
 		iwqp->cm_id = NULL;
 		if (!iwqp->flush_issued) {
@@ -4265,6 +4266,8 @@ void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
 		cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry);
 		attr.qp_state = IB_QPS_ERR;
 		i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+		if (iwdev->reset)
+			i40iw_cm_disconn(cm_node->iwqp);
 		i40iw_rem_ref_cm_node(cm_node);
 	}
 }
-- 
GitLab


From b5e452a04a10f12763f9836d3d3999f3bb1e56fb Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 23 Jun 2017 16:03:59 -0500
Subject: [PATCH 1891/1958] i40iw: Free QP resources on CQP destroy QP failure

Current flow leaves software QP structures in memory if
Control Queue Pair (CQP) destroy QP OP fails. To fix this,
free QP resources on fail of CQP destroy QP OP.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_utils.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 56d986924a4c1..ded8e48ed3c82 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -546,8 +546,12 @@ void i40iw_rem_ref(struct ib_qp *ibqp)
 	cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
 	cqp_info->in.u.qp_destroy.remove_hash_idx = true;
 	status = i40iw_handle_cqp_op(iwdev, cqp_request);
-	if (status)
-		i40iw_pr_err("CQP-OP Destroy QP fail");
+	if (!status)
+		return;
+
+	i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
+	i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+	i40iw_rem_devusecount(iwdev);
 }
 
 /**
-- 
GitLab


From c5c9d27e6c79ab3ab36092fe67fb7f2c6a120171 Mon Sep 17 00:00:00 2001
From: Henry Orosco <henry.orosco@intel.com>
Date: Fri, 23 Jun 2017 16:04:00 -0500
Subject: [PATCH 1892/1958] i40iw: Add missing memory barrier

Add missing write memory barrier before writing the
header containing valid bit to the WQE in i40iw_puda_send.

Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_puda.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index db41ab40da9ce..1bb16814560f4 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -408,6 +408,9 @@ enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
 	set_64bit_val(wqe, 0, info->paddr);
 	set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN));
 	set_64bit_val(wqe, 16, header[0]);
+
+	/* Ensure all data is written before writing valid bit */
+	wmb();
 	set_64bit_val(wqe, 24, header[1]);
 
 	i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32);
-- 
GitLab


From c709d7f229a273c7c5664e9dfe5432b031842d0c Mon Sep 17 00:00:00 2001
From: Henry Orosco <henry.orosco@intel.com>
Date: Fri, 23 Jun 2017 16:04:01 -0500
Subject: [PATCH 1893/1958] i40iw: Update list correctly

To avoid infinite loop, in i40iw_ieq_handle_exception, update
plist inside while loop.

Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_puda.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 1bb16814560f4..71050c5d29a05 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -1414,10 +1414,10 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
 
 	if (!list_empty(rxlist)) {
 		tmpbuf = (struct i40iw_puda_buf *)rxlist->next;
-		plist = &tmpbuf->list;
 		while ((struct list_head *)tmpbuf != rxlist) {
 			if ((int)(buf->seqnum - tmpbuf->seqnum) < 0)
 				break;
+			plist = &tmpbuf->list;
 			tmpbuf = (struct i40iw_puda_buf *)plist->next;
 		}
 		/* Insert buf before tmpbuf */
-- 
GitLab


From 44b99f88cdd5b47046c511aa64ae71ad2c9e5b1e Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 23 Jun 2017 16:04:02 -0500
Subject: [PATCH 1894/1958] i40iw: Avoid memory leak of CQP request objects

Control Queue Pair (CQP) request objects, which have
not received a completion upon interface close, remain
in memory.

To fix this, identify and free all pending CQP request
objects during destroy CQP OP.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Henry Orosco <henry.orosco@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw.h       |  1 +
 drivers/infiniband/hw/i40iw/i40iw_main.c  |  2 +
 drivers/infiniband/hw/i40iw/i40iw_utils.c | 52 +++++++++++++++++++++++
 3 files changed, 55 insertions(+)

diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index da2eb5a281fa1..9b1566468744e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -527,6 +527,7 @@ enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
 int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
 void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq);
 
+void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev);
 void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
 void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
 void i40iw_rem_devusecount(struct i40iw_device *iwdev);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 3bad7d967abe3..ae8463ff59a76 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -243,6 +243,8 @@ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
 	if (free_hwcqp)
 		dev->cqp_ops->cqp_destroy(dev->cqp);
 
+	i40iw_cleanup_pending_cqp_op(iwdev);
+
 	i40iw_free_dma_mem(dev->hw, &cqp->sq);
 	kfree(cqp->scratch_array);
 	iwdev->cqp.scratch_array = NULL;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index ded8e48ed3c82..e311ec559f4eb 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -337,6 +337,7 @@ struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait
  */
 void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request)
 {
+	struct i40iw_device *iwdev = container_of(cqp, struct i40iw_device, cqp);
 	unsigned long flags;
 
 	if (cqp_request->dynamic) {
@@ -350,6 +351,7 @@ void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp
 		list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
 		spin_unlock_irqrestore(&cqp->req_lock, flags);
 	}
+	wake_up(&iwdev->close_wq);
 }
 
 /**
@@ -364,6 +366,56 @@ void i40iw_put_cqp_request(struct i40iw_cqp *cqp,
 		i40iw_free_cqp_request(cqp, cqp_request);
 }
 
+/**
+ * i40iw_free_pending_cqp_request -free pending cqp request objs
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+static void i40iw_free_pending_cqp_request(struct i40iw_cqp *cqp,
+					   struct i40iw_cqp_request *cqp_request)
+{
+	struct i40iw_device *iwdev = container_of(cqp, struct i40iw_device, cqp);
+
+	if (cqp_request->waiting) {
+		cqp_request->compl_info.error = true;
+		cqp_request->request_done = true;
+		wake_up(&cqp_request->waitq);
+	}
+	i40iw_put_cqp_request(cqp, cqp_request);
+	wait_event_timeout(iwdev->close_wq,
+			   !atomic_read(&cqp_request->refcount),
+			   1000);
+}
+
+/**
+ * i40iw_cleanup_pending_cqp_op - clean-up cqp with no completions
+ * @iwdev: iwarp device
+ */
+void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_cqp *cqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request = NULL;
+	struct cqp_commands_info *pcmdinfo = NULL;
+	u32 i, pending_work, wqe_idx;
+
+	pending_work = I40IW_RING_WORK_AVAILABLE(cqp->sc_cqp.sq_ring);
+	wqe_idx = I40IW_RING_GETCURRENT_TAIL(cqp->sc_cqp.sq_ring);
+	for (i = 0; i < pending_work; i++) {
+		cqp_request = (struct i40iw_cqp_request *)(unsigned long)cqp->scratch_array[wqe_idx];
+		if (cqp_request)
+			i40iw_free_pending_cqp_request(cqp, cqp_request);
+		wqe_idx = (wqe_idx + 1) % I40IW_RING_GETSIZE(cqp->sc_cqp.sq_ring);
+	}
+
+	while (!list_empty(&dev->cqp_cmd_head)) {
+		pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head);
+		cqp_request = container_of(pcmdinfo, struct i40iw_cqp_request, info);
+		if (cqp_request)
+			i40iw_free_pending_cqp_request(cqp, cqp_request);
+	}
+}
+
 /**
  * i40iw_free_qp - callback after destroy cqp completes
  * @cqp_request: cqp request for destroy qp
-- 
GitLab


From af56e53ccd29bda062a1ae75276dc9c0f8eedf47 Mon Sep 17 00:00:00 2001
From: Tatyana Nikolova <Tatyana.E.Nikolova@intel.com>
Date: Wed, 5 Jul 2017 21:25:33 -0500
Subject: [PATCH 1895/1958] i40iw: Free QP PBLEs when the QP is destroyed

If the physical buffer list entries (PBLEs) of a QP are freed
up at i40iw_dereg_mr, they can be assigned to a newly
created QP before the previous QP is destroyed. Fix this
by freeing PBLEs only when the QP is destroyed.

Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_verbs.c | 15 +++++++++++----
 drivers/infiniband/hw/i40iw/i40iw_verbs.h |  2 +-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4dbe61ec7a77c..4aa0264ccb3ab 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -426,9 +426,13 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev,
 			     struct i40iw_qp *iwqp,
 			     u32 qp_num)
 {
+	struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
+
 	i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
 	if (qp_num)
 		i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
+	if (iwpbl->pbl_allocated)
+		i40iw_free_pble(iwdev->pble_rsrc, &iwpbl->pble_alloc);
 	i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem);
 	i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
 	kfree(iwqp->kqp.wrid_mem);
@@ -483,7 +487,7 @@ static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
 			       struct i40iw_qp *iwqp,
 			       struct i40iw_qp_init_info *init_info)
 {
-	struct i40iw_pbl *iwpbl = iwqp->iwpbl;
+	struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
 	struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
 
 	iwqp->page = qpmr->sq_page;
@@ -688,19 +692,22 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
 			ucontext = to_ucontext(ibpd->uobject->context);
 
 			if (req.user_wqe_buffers) {
+				struct i40iw_pbl *iwpbl;
+
 				spin_lock_irqsave(
 				    &ucontext->qp_reg_mem_list_lock, flags);
-				iwqp->iwpbl = i40iw_get_pbl(
+				iwpbl = i40iw_get_pbl(
 				    (unsigned long)req.user_wqe_buffers,
 				    &ucontext->qp_reg_mem_list);
 				spin_unlock_irqrestore(
 				    &ucontext->qp_reg_mem_list_lock, flags);
 
-				if (!iwqp->iwpbl) {
+				if (!iwpbl) {
 					err_code = -ENODATA;
 					i40iw_pr_err("no pbl info\n");
 					goto error;
 				}
+				memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl));
 			}
 		}
 		err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
@@ -2063,7 +2070,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
 			ucontext = to_ucontext(ibpd->uobject->context);
 			i40iw_del_memlist(iwmr, ucontext);
 		}
-		if (iwpbl->pbl_allocated)
+		if (iwpbl->pbl_allocated && iwmr->type != IW_MEMREG_TYPE_QP)
 			i40iw_free_pble(iwdev->pble_rsrc, palloc);
 		kfree(iwmr);
 		return 0;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 07c3fec77de6a..9067443cd3115 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -170,7 +170,7 @@ struct i40iw_qp {
 	struct i40iw_qp_kmode kqp;
 	struct i40iw_dma_mem host_ctx;
 	struct timer_list terminate_timer;
-	struct i40iw_pbl *iwpbl;
+	struct i40iw_pbl iwpbl;
 	struct i40iw_dma_mem q2_ctx_mem;
 	struct i40iw_dma_mem ietf_mem;
 	struct completion sq_drained;
-- 
GitLab


From 653f0a71daf1a71d55d3af368c28c8114c11c607 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 10 Jul 2017 10:22:47 +0300
Subject: [PATCH 1896/1958] RDMA/bnxt_re: checking for NULL instead of IS_ERR()

bnxt_re_alloc_mw() doesn't return NULL, it returns error pointers.

Fixes: 9152e0b722b2 ("RDMA/bnxt_re: HW workarounds for handling specific conditions")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index c7bd68311d0c5..e794b0fa4ae65 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -588,10 +588,10 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
 
 	/* Create a fence MW only for kernel consumers */
 	mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
-	if (!mw) {
+	if (IS_ERR(mw)) {
 		dev_err(rdev_to_dev(rdev),
 			"Failed to create fence-MW for PD: %p\n", pd);
-		rc = -EINVAL;
+		rc = PTR_ERR(mw);
 		goto fail;
 	}
 	fence->mw = mw;
-- 
GitLab


From 5c8857b653e71a9850a02837e1268e3198abbd1a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:45:48 +0300
Subject: [PATCH 1897/1958] IB/IPoIB: Fix error code in ipoib_add_port()

We accidentally don't see the error code on some of these error paths.
It means we return ERR_PTR(0) which is NULL and it results in a NULL
dereference in the caller.

This bug dates to pre-git days.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 70dacaf9044ec..4ce315c92b480 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2239,6 +2239,7 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto register_failed;
 	}
 
+	result = -ENOMEM;
 	if (ipoib_cm_add_mode_attr(priv->dev))
 		goto sysfs_failed;
 	if (ipoib_add_pkey_attr(priv->dev))
-- 
GitLab


From 6031e079aa4656743298ea235b894ee883f45c71 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:47:22 +0300
Subject: [PATCH 1898/1958] IB/i40iw: Fix error code in i40iw_create_cq()

We accidentally forgot to set the error code if ib_copy_from_udata()
fails.  It means we return ERR_PTR(0) which is NULL and results in a
NULL dereference in the callers.

Fixes: d37498417947 ("i40iw: add files for iwarp interface")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_verbs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4aa0264ccb3ab..02d871db7ca56 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1168,8 +1168,10 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
 		memset(&req, 0, sizeof(req));
 		iwcq->user_mode = true;
 		ucontext = to_ucontext(context);
-		if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req)))
+		if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req))) {
+			err_code = -EFAULT;
 			goto cq_free_resources;
+		}
 
 		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
 		iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer,
-- 
GitLab


From 6ebedacbb44602d4dec3348dee5ec31dd9b09521 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:47:40 +0300
Subject: [PATCH 1899/1958] cxgb4: Fix error codes in c4iw_create_cq()

If one of these kmalloc() calls fails then we return ERR_PTR(0) which is
NULL.  It results in a NULL dereference in the callers.

Fixes: cfdda9d76436 ("RDMA/cxgb4: Add driver for Chelsio T4 RNIC")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index e16fcaf6b5a3f..be07da1997e68 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -963,6 +963,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 		goto err3;
 
 	if (ucontext) {
+		ret = -ENOMEM;
 		mm = kmalloc(sizeof *mm, GFP_KERNEL);
 		if (!mm)
 			goto err4;
-- 
GitLab


From 9064d6055c14f700aa13f7c72fd3e63d12bee643 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:48:00 +0300
Subject: [PATCH 1900/1958] IB/cxgb3: Fix error codes in iwch_alloc_mr()

We accidentally don't set the error code on some error paths.  It means
return ERR_PTR(0) which is NULL and results in a NULL dereference in the
caller.

Fixes: 13a239330abd ("RDMA/cxgb3: Don't ignore insert_handle() failures")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb3/iwch_provider.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 29d30744d6c9e..0cd0c1fa27d45 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -718,7 +718,7 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
 	struct iwch_mr *mhp;
 	u32 mmid;
 	u32 stag = 0;
-	int ret = 0;
+	int ret = -ENOMEM;
 
 	if (mr_type != IB_MR_TYPE_MEM_REG ||
 	    max_num_sg > T3_MAX_FASTREG_DEPTH)
@@ -731,10 +731,8 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
 		goto err;
 
 	mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
-	if (!mhp->pages) {
-		ret = -ENOMEM;
+	if (!mhp->pages)
 		goto pl_err;
-	}
 
 	mhp->rhp = rhp;
 	ret = iwch_alloc_pbl(mhp, max_num_sg);
@@ -751,7 +749,8 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
 	mhp->attr.state = 1;
 	mmid = (stag) >> 8;
 	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-	if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
+	ret = insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+	if (ret)
 		goto err3;
 
 	pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
-- 
GitLab


From dd75cfa6d3216c79c695f5af13e52208afe374ad Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:46:14 +0300
Subject: [PATCH 1901/1958] RDMA/ocrdma: Fix an error code in ocrdma_alloc_pd()

We should preserve the original "status" error code instead of resetting
it to zero.  Returning ERR_PTR(0) is the same as NULL and results in a
NULL dereference in the callers.  I added a printk() on error instead.

Fixes: 45e86b33ec8b ("RDMA/ocrdma: Cache recv DB until QP moved to RTR")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 2f30bda8457a9..cc317e8580408 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -744,7 +744,8 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
 	if (is_uctx_pd) {
 		ocrdma_release_ucontext_pd(uctx);
 	} else {
-		status = _ocrdma_dealloc_pd(dev, pd);
+		if (_ocrdma_dealloc_pd(dev, pd))
+			pr_err("%s: _ocrdma_dealloc_pd() failed\n", __func__);
 	}
 exit:
 	return ERR_PTR(status);
-- 
GitLab


From f0c6e88288d65c93bbc7da4fb6f7d51b2733228a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:46:49 +0300
Subject: [PATCH 1902/1958] RDMA/ocrdma: Fix error codes in ocrdma_create_srq()

If either of these allocations fail then we return ERR_PTR(0).  That's
equivalent to NULL and results in a NULL pointer dereference in the
caller.

Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index cc317e8580408..27d5e8d9f08d3 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1902,6 +1902,7 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
 		goto err;
 
 	if (udata == NULL) {
+		status = -ENOMEM;
 		srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
 			    GFP_KERNEL);
 		if (srq->rqe_wr_id_tbl == NULL)
-- 
GitLab


From 396551eb00e46aa8f843c448bced0c76971ec58c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 14 Jun 2017 13:20:09 +0300
Subject: [PATCH 1903/1958] IB/mlx5: Fix a warning message

"umem" is a valid pointer.  We intended to print "*umem" or even just
"err" instead.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8ab2f1360a456..2c40a2e989d26 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -835,7 +835,7 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
 			    access_flags, 0);
 	err = PTR_ERR_OR_ZERO(*umem);
 	if (err < 0) {
-		mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
+		mlx5_ib_err(dev, "umem get failed (%d)\n", err);
 		return err;
 	}
 
-- 
GitLab


From b3b2c7c5506bd8d15214136f8b11a2e6c9728033 Mon Sep 17 00:00:00 2001
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:08 -0700
Subject: [PATCH 1904/1958] RDMA/bnxt_re: Free doorbell page index (DPI) during
 dealloc ucontext

The driver must free the DPI during the dealloc_ucontext
instead of freeing it during dealloc_pd. However, the DPI
allocation scheme remains unchanged.

Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 58 ++++++++++++------------
 drivers/infiniband/hw/bnxt_re/ib_verbs.h |  3 +-
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index e794b0fa4ae65..e743ffd392c62 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -612,30 +612,13 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 	int rc;
 
 	bnxt_re_destroy_fence_mr(pd);
-	if (ib_pd->uobject && pd->dpi.dbr) {
-		struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
-		struct bnxt_re_ucontext *ucntx;
 
-		/* Free DPI only if this is the first PD allocated by the
-		 * application and mark the context dpi as NULL
-		 */
-		ucntx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
-
-		rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
-					    &rdev->qplib_res.dpi_tbl,
-					    &pd->dpi);
+	if (pd->qplib_pd.id) {
+		rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
+					   &rdev->qplib_res.pd_tbl,
+					   &pd->qplib_pd);
 		if (rc)
-			dev_err(rdev_to_dev(rdev), "Failed to deallocate HW DPI");
-			/* Don't fail, continue*/
-		ucntx->dpi = NULL;
-	}
-
-	rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
-				   &rdev->qplib_res.pd_tbl,
-				   &pd->qplib_pd);
-	if (rc) {
-		dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
-		return rc;
+			dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
 	}
 
 	kfree(pd);
@@ -667,23 +650,22 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
 	if (udata) {
 		struct bnxt_re_pd_resp resp;
 
-		if (!ucntx->dpi) {
+		if (!ucntx->dpi.dbr) {
 			/* Allocate DPI in alloc_pd to avoid failing of
 			 * ibv_devinfo and family of application when DPIs
 			 * are depleted.
 			 */
 			if (bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
-						 &pd->dpi, ucntx)) {
+						 &ucntx->dpi, ucntx)) {
 				rc = -ENOMEM;
 				goto dbfail;
 			}
-			ucntx->dpi = &pd->dpi;
 		}
 
 		resp.pdid = pd->qplib_pd.id;
 		/* Still allow mapping this DBR to the new user PD. */
-		resp.dpi = ucntx->dpi->dpi;
-		resp.dbr = (u64)ucntx->dpi->umdbr;
+		resp.dpi = ucntx->dpi.dpi;
+		resp.dbr = (u64)ucntx->dpi.umdbr;
 
 		rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
 		if (rc) {
@@ -960,7 +942,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
 		qplib_qp->rq.nmap = umem->nmap;
 	}
 
-	qplib_qp->dpi = cntx->dpi;
+	qplib_qp->dpi = &cntx->dpi;
 	return 0;
 rqfail:
 	ib_umem_release(qp->sumem);
@@ -2403,7 +2385,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 		}
 		cq->qplib_cq.sghead = cq->umem->sg_head.sgl;
 		cq->qplib_cq.nmap = cq->umem->nmap;
-		cq->qplib_cq.dpi = uctx->dpi;
+		cq->qplib_cq.dpi = &uctx->dpi;
 	} else {
 		cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
 		cq->cql = kcalloc(cq->max_cql, sizeof(struct bnxt_qplib_cqe),
@@ -3388,8 +3370,26 @@ int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
 	struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
 						   struct bnxt_re_ucontext,
 						   ib_uctx);
+
+	struct bnxt_re_dev *rdev = uctx->rdev;
+	int rc = 0;
+
 	if (uctx->shpg)
 		free_page((unsigned long)uctx->shpg);
+
+	if (uctx->dpi.dbr) {
+		/* Free DPI only if this is the first PD allocated by the
+		 * application and mark the context dpi as NULL
+		 */
+		rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+					    &rdev->qplib_res.dpi_tbl,
+					    &uctx->dpi);
+		if (rc)
+			dev_err(rdev_to_dev(rdev), "Deallocte HW DPI failed!");
+			/* Don't fail, continue*/
+		uctx->dpi.dbr = NULL;
+	}
+
 	kfree(uctx);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 6c160f6a53987..a0bb7e33d7ca4 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -59,7 +59,6 @@ struct bnxt_re_pd {
 	struct bnxt_re_dev	*rdev;
 	struct ib_pd		ib_pd;
 	struct bnxt_qplib_pd	qplib_pd;
-	struct bnxt_qplib_dpi	dpi;
 	struct bnxt_re_fence_data fence;
 };
 
@@ -127,7 +126,7 @@ struct bnxt_re_mw {
 struct bnxt_re_ucontext {
 	struct bnxt_re_dev	*rdev;
 	struct ib_ucontext	ib_uctx;
-	struct bnxt_qplib_dpi	*dpi;
+	struct bnxt_qplib_dpi	dpi;
 	void			*shpg;
 	spinlock_t		sh_lock;	/* protect shpg */
 };
-- 
GitLab


From ab69d4c8da38024191f3514c1296e9e8deea4e98 Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:09 -0700
Subject: [PATCH 1905/1958] RDMA/bnxt_re: Fix WQE Size posted to HW to prevent
 it from throwing error

Posting WQE size of 2 results in a WQE_FORMAT_ERROR
thrown by the HW as it requires host to supply WQE Size with room
for atleast one SGE so that the resulting WQE size be atleast 3.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/qplib_fp.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index f05500bcdcf1e..8ef39df2f44fa 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1128,6 +1128,11 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 		}
 		/* Each SGE entry = 1 WQE size16 */
 		wqe_size16 = wqe->num_sge;
+		/* HW requires wqe size has room for atleast one SGE even if
+		 * none was supplied by ULP
+		 */
+		if (!wqe->num_sge)
+			wqe_size16++;
 	}
 
 	/* Specifics */
@@ -1364,6 +1369,11 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 	rqe->flags = wqe->flags;
 	rqe->wqe_size = wqe->num_sge +
 			((offsetof(typeof(*rqe), data) + 15) >> 4);
+	/* HW requires wqe size has room for atleast one SGE even if none
+	 * was supplied by ULP
+	 */
+	if (!wqe->num_sge)
+		rqe->wqe_size++;
 
 	/* Supply the rqe->wr_id index to the wr_id_tbl for now */
 	rqe->wr_id[0] = cpu_to_le32(sw_prod);
-- 
GitLab


From 4a62c5e9e2e1c15ceb1654715d9284d97f921119 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:11 -0700
Subject: [PATCH 1906/1958] RDMA/bnxt_re: Do not free the ctx_tbl entry if
 delete GID fails

This fix is added only to avoid system crash in some a
specific scenario. When bnxt_re driver is loaded and if
user tries to change interface mac address, delete GID
fails because QP1 is still associated with existing MAC
(default GID). If the above command fails GID tables are
not modified in the h/w or driver, but the GID context memory
is freed. Now, if the user changes the mac back to the original
value, another add_gid comes to the driver where the driver
reports that the GID is already present in its table
and tries to access the context which was already freed.

So, in this case, in order to  avoid NULL pointer de-reference,
this patch removes the context memory free  if delete_gid fails
and the same context memory is re-used in new add_gid.
Memory cleanup will be taken care during driver unload, while
deleting the GID table.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index e743ffd392c62..0cd8372989ce4 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -390,15 +390,17 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
 			return -EINVAL;
 		ctx->refcnt--;
 		if (!ctx->refcnt) {
-			rc = bnxt_qplib_del_sgid
-					(sgid_tbl,
-					 &sgid_tbl->tbl[ctx->idx], true);
-			if (rc)
+			rc = bnxt_qplib_del_sgid(sgid_tbl,
+						 &sgid_tbl->tbl[ctx->idx],
+						 true);
+			if (rc) {
 				dev_err(rdev_to_dev(rdev),
 					"Failed to remove GID: %#x", rc);
-			ctx_tbl = sgid_tbl->ctx;
-			ctx_tbl[ctx->idx] = NULL;
-			kfree(ctx);
+			} else {
+				ctx_tbl = sgid_tbl->ctx;
+				ctx_tbl[ctx->idx] = NULL;
+				kfree(ctx);
+			}
 		}
 	} else {
 		return -EINVAL;
-- 
GitLab


From 58d4a671d0eac45db1c7f27c8684c277249ac127 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:12 -0700
Subject: [PATCH 1907/1958] RDMA/bnxt_re: Report supported value to IB stack in
 query_device

- Report supported value for max_mr_size to IB stack in query_device.
   Also, check and log if MR size requested by application in
   reg_user_mr() is greater than value currently supported by driver.
 - Report only 4K page size support for now
 - Fix Max_QP value returned by ibv_devinfo -vv.
   In case of PF, FW reserves 129 QPs for creating QP1s of VFs
   and PF. So the max_qp value reported by FW for PF doesn'tt include
   the QP1. Fixing this issue by adding 1 with the value reported
   by FW.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h  |  2 ++
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 12 ++++++++----
 drivers/infiniband/hw/bnxt_re/qplib_sp.c |  2 ++
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 08772836fded4..4cb257dc61be6 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -51,6 +51,8 @@
 #define BNXT_RE_PAGE_SIZE_8M		BIT(23)
 #define BNXT_RE_PAGE_SIZE_1G		BIT(30)
 
+#define BNXT_RE_MAX_MR_SIZE		BIT(30)
+
 #define BNXT_RE_MAX_QPC_COUNT		(64 * 1024)
 #define BNXT_RE_MAX_MRW_COUNT		(64 * 1024)
 #define BNXT_RE_MAX_SRQC_COUNT		(64 * 1024)
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 0cd8372989ce4..4e3e5b91d8556 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -145,10 +145,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	ib_attr->fw_ver = (u64)(unsigned long)(dev_attr->fw_ver);
 	bnxt_qplib_get_guid(rdev->netdev->dev_addr,
 			    (u8 *)&ib_attr->sys_image_guid);
-	ib_attr->max_mr_size = ~0ull;
-	ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_8K |
-				 BNXT_RE_PAGE_SIZE_64K | BNXT_RE_PAGE_SIZE_2M |
-				 BNXT_RE_PAGE_SIZE_8M | BNXT_RE_PAGE_SIZE_1G;
+	ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE;
+	ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K;
 
 	ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
 	ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
@@ -3229,6 +3227,12 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 	struct scatterlist *sg;
 	int entry;
 
+	if (length > BNXT_RE_MAX_MR_SIZE) {
+		dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n",
+			length, BNXT_RE_MAX_MR_SIZE);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index fde18cf0e406b..5827573875d48 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -81,6 +81,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 
 	/* Extract the context from the side buffer */
 	attr->max_qp = le32_to_cpu(sb->max_qp);
+	/* max_qp value reported by FW for PF doesn't include the QP1 for PF */
+	attr->max_qp += 1;
 	attr->max_qp_rd_atom =
 		sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
 		BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
-- 
GitLab


From a25d112fe9c8e8817cde1df17a82aee472c55993 Mon Sep 17 00:00:00 2001
From: Eddie Wai <eddie.wai@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:13 -0700
Subject: [PATCH 1908/1958] RDMA/bnxt_re: Fixed the max_rd_atomic support for
 initiator and destination QP

There's a couple of bugs in the support of max_rd_atomic and
max_dest_rd_atomic. In the modify_qp, if the requested max_rd_atomic,
which is the ORRQ size, is greater than what the chip can support,
then we have to cap the request to chip max as we can't have the HW
overflow the ORRQ. Capping the max_rd_atomic support internally is okay
to do as the remaining read/atomic WRs will still be sitting in the SQ.
However, for the max_dest_rd_atomic, the driver has to error out as
this dictates the IRRQ size and we can't control what the remote
side sends.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 4e3e5b91d8556..4d3cdca03c022 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -172,7 +172,7 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	ib_attr->max_mr = dev_attr->max_mr;
 	ib_attr->max_pd = dev_attr->max_pd;
 	ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
-	ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_rd_atom;
+	ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom;
 	ib_attr->atomic_cap = IB_ATOMIC_HCA;
 	ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
 
@@ -1512,13 +1512,24 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	if (qp_attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
 		qp->qplib_qp.modify_flags |=
 				CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC;
-		qp->qplib_qp.max_rd_atomic = qp_attr->max_rd_atomic;
+		/* Cap the max_rd_atomic to device max */
+		qp->qplib_qp.max_rd_atomic = min_t(u32, qp_attr->max_rd_atomic,
+						   dev_attr->max_qp_rd_atom);
 	}
 	if (qp_attr_mask & IB_QP_SQ_PSN) {
 		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
 		qp->qplib_qp.sq.psn = qp_attr->sq_psn;
 	}
 	if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+		if (qp_attr->max_dest_rd_atomic >
+		    dev_attr->max_qp_init_rd_atom) {
+			dev_err(rdev_to_dev(rdev),
+				"max_dest_rd_atomic requested%d is > dev_max%d",
+				qp_attr->max_dest_rd_atomic,
+				dev_attr->max_qp_init_rd_atom);
+			return -EINVAL;
+		}
+
 		qp->qplib_qp.modify_flags |=
 				CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC;
 		qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic;
-- 
GitLab


From 536f092805cecc5c2c0ba7f051a7552619bd4491 Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:14 -0700
Subject: [PATCH 1909/1958] RDMA/bnxt_re: Specify RDMA component when
 allocating stats context

Starting FW version 20.6.47, firmware is keeping separate statistics
for L2 and RDMA. However, driver needs to specify RDMA or not when
allocating stat_ctx.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 1fce5e73216be..ceae2d92fb08b 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -333,6 +333,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
 	req.update_period_ms = cpu_to_le32(1000);
 	req.stats_dma_addr = cpu_to_le64(dma_map);
+	req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
-- 
GitLab


From 254cd2590d408f2c0375fbf73ef8250d47529e7b Mon Sep 17 00:00:00 2001
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:16 -0700
Subject: [PATCH 1910/1958] RDMA/bnxt_re: Enable atomics only if host bios
 supports

Driver shall check if the host system bios has enabled
Atomic operations capability in PCI Device Control 2
register of the pci-device. Expose the ATOMIC_HCA
flag only if the Atomic operations capability is set.

Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c |  6 ++++--
 drivers/infiniband/hw/bnxt_re/qplib_sp.c | 14 ++++++++++++++
 drivers/infiniband/hw/bnxt_re/qplib_sp.h |  3 +++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 4d3cdca03c022..7b17030d2696c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -173,8 +173,10 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	ib_attr->max_pd = dev_attr->max_pd;
 	ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
 	ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom;
-	ib_attr->atomic_cap = IB_ATOMIC_HCA;
-	ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
+	if (dev_attr->is_atomic) {
+		ib_attr->atomic_cap = IB_ATOMIC_HCA;
+		ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
+	}
 
 	ib_attr->max_ee_rd_atom = 0;
 	ib_attr->max_res_rd_atom = 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 5827573875d48..ef91ab786dd4c 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -51,6 +51,19 @@ const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0,
 						     0, 0, 0, 0, 0, 0, 0, 0 } };
 
 /* Device */
+
+static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
+{
+	int rc;
+	u16 pcie_ctl2;
+
+	rc = pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2,
+				       &pcie_ctl2);
+	if (rc)
+		return false;
+	return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
+}
+
 int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 			    struct bnxt_qplib_dev_attr *attr)
 {
@@ -131,6 +144,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 		attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
 	}
 
+	attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
 bail:
 	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
 	return rc;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index a543f959098bd..2ce7e2a32cf0d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -42,6 +42,8 @@
 
 #define BNXT_QPLIB_RESERVED_QP_WRS	128
 
+#define PCI_EXP_DEVCTL2_ATOMIC_REQ      0x0040
+
 struct bnxt_qplib_dev_attr {
 	char				fw_ver[32];
 	u16				max_sgid;
@@ -70,6 +72,7 @@ struct bnxt_qplib_dev_attr {
 	u32				max_inline_data;
 	u32				l2_db_size;
 	u8				tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
+	bool				is_atomic;
 };
 
 struct bnxt_qplib_pd {
-- 
GitLab


From 10d1dedf9b428ed776f244eb8b43f28974da3fb0 Mon Sep 17 00:00:00 2001
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:17 -0700
Subject: [PATCH 1911/1958] RDMA/bnxt_re: Fix return value of poll routine

Fix the incorrect reporting of number of polled
entries by taking into account the max CQ depth
in the driver.

Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 7b17030d2696c..b92a06d17186b 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -2900,6 +2900,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 
 	spin_lock_irqsave(&cq->cq_lock, flags);
 	budget = min_t(u32, num_entries, cq->max_cql);
+	num_entries = budget;
 	if (!cq->cql) {
 		dev_err(rdev_to_dev(cq->rdev), "POLL CQ : no CQL to use");
 		goto exit;
-- 
GitLab


From 499e456981d376ef614e257d5dcb280581db9ac6 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:18 -0700
Subject: [PATCH 1912/1958] RDMA/bnxt_re: Report MISSED_EVENTS in req_notify_cq

While invoking the req_notify_cq hook, ULPs can request
whether the CQs have any CQEs pending. If CQEs are pending,
drivers can indicate  it by returning 1 for req_notify_cq.
The stack will poll CQ again till CQ is empty.

This patch peeks the CQ for any valid entries and return accordingly.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c |  5 +++++
 drivers/infiniband/hw/bnxt_re/qplib_fp.c | 19 +++++++++++++++++++
 drivers/infiniband/hw/bnxt_re/qplib_fp.h |  1 +
 3 files changed, 25 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index b92a06d17186b..d5aa5a1a7e00a 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3027,6 +3027,11 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq,
 	else if (ib_cqn_flags & IB_CQ_SOLICITED)
 		type = DBR_DBR_TYPE_CQ_ARMSE;
 
+	/* Poll to see if there are missed events */
+	if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+	    !(bnxt_qplib_is_cq_empty(&cq->qplib_cq)))
+		return 1;
+
 	bnxt_qplib_req_notify_cq(&cq->qplib_cq, type);
 
 	return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 8ef39df2f44fa..9af1514e59448 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1895,6 +1895,25 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 	return rc;
 }
 
+bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
+{
+	struct cq_base *hw_cqe, **hw_cqe_ptr;
+	unsigned long flags;
+	u32 sw_cons, raw_cons;
+	bool rc = true;
+
+	spin_lock_irqsave(&cq->hwq.lock, flags);
+	raw_cons = cq->hwq.cons;
+	sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
+	hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+	hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)];
+
+	 /* Check for Valid bit. If the CQE is valid, return false */
+	rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
+	spin_unlock_irqrestore(&cq->hwq.lock, flags);
+	return rc;
+}
+
 static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 						struct cq_res_raweth_qp1 *hwcqe,
 						struct bnxt_qplib_cqe **pcqe,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 36b7b7db0e3f9..19176e06c98a7 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -449,6 +449,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		       int num, struct bnxt_qplib_qp **qp);
+bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq);
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
 int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
-- 
GitLab


From 601577b7d14cba5848b4c44ef719881f8538f702 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Thu, 29 Jun 2017 12:28:19 -0700
Subject: [PATCH 1913/1958] RDMA/bnxt_re: Fix the value reported for local ack
 delay

Local ack delay exposed by the driver is 0 which means infinite QP
timeout. Reporting the default value to 16 (approx 260ms)

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h  | 7 +++++++
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 4cb257dc61be6..85527532c49db 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -62,6 +62,13 @@
 
 #define BNXT_RE_RQ_WQE_THRESHOLD	32
 
+/*
+ * Setting the default ack delay value to 16, which means
+ * the default timeout is approx. 260ms(4 usec * 2 ^(timeout))
+ */
+
+#define BNXT_RE_DEFAULT_ACK_DELAY	16
+
 struct bnxt_re_work {
 	struct work_struct	work;
 	unsigned long		event;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index d5aa5a1a7e00a..f0e01b3ac7116 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -201,7 +201,7 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	ib_attr->max_fast_reg_page_list_len = MAX_PBL_LVL_1_PGS;
 
 	ib_attr->max_pkeys = 1;
-	ib_attr->local_ca_ack_delay = 0;
+	ib_attr->local_ca_ack_delay = BNXT_RE_DEFAULT_ACK_DELAY;
 	return 0;
 }
 
-- 
GitLab


From 266098b841d48f7f0db40424bdbc072e4db14e9b Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 8 Jun 2017 17:30:47 +0300
Subject: [PATCH 1914/1958] IB/core: Fix sparse warnings

Delete unused variables to prevent sparse warnings.

Fixes: db1b5ddd5336 ("IB/core: Rename uverbs event file structure")
Fixes: fd3c7904db6e ("IB/core: Change idr objects to use the new schema")

Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3f55d18a37914..71451eae42de4 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1296,7 +1296,6 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 	struct ib_uobject		*uobj;
 	struct ib_cq               	*cq;
 	struct ib_ucq_object        	*obj;
-	struct ib_uverbs_event_queue	*ev_queue;
 	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1313,7 +1312,6 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 	 */
 	uverbs_uobject_get(uobj);
 	cq      = uobj->object;
-	ev_queue = cq->cq_context;
 	obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
 
 	memset(&resp, 0, sizeof(resp));
@@ -2088,7 +2086,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_qp      cmd;
 	struct ib_uverbs_destroy_qp_resp resp;
 	struct ib_uobject		*uobj;
-	struct ib_qp               	*qp;
 	struct ib_uqp_object        	*obj;
 	int                        	 ret = -EINVAL;
 
@@ -2102,7 +2099,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	qp  = uobj->object;
 	obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
 	/*
 	 * Make sure we don't free the memory in remove_commit as we still
@@ -3004,7 +3000,6 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_ex_destroy_wq	cmd = {};
 	struct ib_uverbs_ex_destroy_wq_resp	resp = {};
-	struct ib_wq			*wq;
 	struct ib_uobject		*uobj;
 	struct ib_uwq_object		*obj;
 	size_t required_cmd_sz;
@@ -3038,7 +3033,6 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	wq = uobj->object;
 	obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
 	/*
 	 * Make sure we don't free the memory in remove_commit as we still
@@ -3728,10 +3722,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_srq      cmd;
 	struct ib_uverbs_destroy_srq_resp resp;
 	struct ib_uobject		 *uobj;
-	struct ib_srq               	 *srq;
 	struct ib_uevent_object        	 *obj;
 	int                         	  ret = -EINVAL;
-	enum ib_srq_type		  srq_type;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
@@ -3741,9 +3733,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	srq = uobj->object;
 	obj = container_of(uobj, struct ib_uevent_object, uobject);
-	srq_type = srq->srq_type;
 	/*
 	 * Make sure we don't free the memory in remove_commit as we still
 	 * needs the uobject memory to create the response.
-- 
GitLab


From a25ce4270bfdd522207b02f81a594c7d1746b697 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Sat, 17 Jun 2017 10:37:26 -0700
Subject: [PATCH 1915/1958] IB/rdmavt: Setting of QP timeout can overflow
 jiffies computation

Current computation of qp->timeout_jiffies in rvt_modify_qp() will cause
overflow due to the fact that the input to the function usecs_to_jiffies
is only 32-bit ( unsigned int). Overflow will occur when attr->timeout is
equal to or greater than 30. The consequence is unnecessarily excessive
retry and thus degradation of the system performance.

This patch fixes the problem by limiting the input to 5-bit and calling
usecs_to_jiffies() before multiplying the scaling factor.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rdmavt/qp.c |  4 +---
 include/rdma/rdmavt_qp.h          | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 459865439a0bf..8876ee7bc326c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1258,9 +1258,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 	if (attr_mask & IB_QP_TIMEOUT) {
 		qp->timeout = attr->timeout;
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
+		qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout);
 	}
 
 	if (attr_mask & IB_QP_QKEY)
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index be6472e5b06bd..d664d2e762808 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -647,6 +647,20 @@ static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len)
 	return len >> qp->log_pmtu;
 }
 
+/**
+ * rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies
+ * @timeout - timeout input(0 - 31).
+ *
+ * Return a timeout value in jiffies.
+ */
+static inline unsigned long rvt_timeout_to_jiffies(u8 timeout)
+{
+	if (timeout > 31)
+		timeout = 31;
+
+	return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL;
+}
+
 extern const int  ib_rvt_state_ops[];
 
 struct rvt_dev_info;
-- 
GitLab


From 4542e3c79a2c5a167cbeb4f4190d5f705d272002 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Bugge?= <haakon.bugge@oracle.com>
Date: Tue, 20 Jun 2017 14:07:50 +0200
Subject: [PATCH 1916/1958] IB/mlx4: Fix CM REQ retries in paravirt mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CM REQs cannot be successfully retried, because a new pv_cm_id is
created for each request, without checking if one already exists.

By checking if an id exists before creating one, the bug is fixed.

This bug can be provoked by running an RDMA CM user-land application,
but inserting a five seconds delay before the rdma_accept() call on
the passive side. This delay is larger than the default CMA timeout,
and triggers a retry from the active side. The retried REQ will use
another pv_cm_id (the cm_id on the wire). This confuses the CM
protocol and two REJs are sent from the passive side.

Here is an excerpt from ibdump running without the patch:

3.285092       LID: 4 -> LID: 4       SDP 290 CM: ConnectRequest(SDP Hello)
7.382711       LID: 4 -> LID: 4       SDP 290 CM: ConnectRequest(SDP Hello)
7.382861       LID: 4 -> LID: 4       InfiniBand 290 CM: ConnectReject
7.387644       LID: 4 -> LID: 4       InfiniBand 290 CM: ConnectReject

and here is the same with bug fix applied:

3.251010       LID: 4 -> LID: 4       SDP 290 CM: ConnectRequest(SDP Hello)
7.349387       LID: 4 -> LID: 4       SDP 290 CM: ConnectRequest(SDP Hello)
8.258443       LID: 4 -> LID: 4       SDP 290 CM: ConnectReply(SDP Hello)
8.259890       LID: 4 -> LID: 4       InfiniBand 290 CM: ReadyToUse

Suggested-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Reported-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Tested-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Acked-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx4/cm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index 1e6c526450d9c..fedaf82601054 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -323,6 +323,9 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
 			mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
 			mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
 		sl_cm_id = get_local_comm_id(mad);
+		id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
+		if (id)
+			goto cont;
 		id = id_map_alloc(ibdev, slave_id, sl_cm_id);
 		if (IS_ERR(id)) {
 			mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
@@ -343,6 +346,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
 		return -EINVAL;
 	}
 
+cont:
 	set_local_comm_id(mad, id->pv_cm_id);
 
 	if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
-- 
GitLab


From 720336c42e41a917002fcae3aa14e30f5022bbb7 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Wed, 21 Jun 2017 19:55:43 +0530
Subject: [PATCH 1917/1958] iw_cxgb4: don't use WR keys/addrs for 0 byte reads

Only use the read sge lkey/addr and the remote rkey/addr if the
length of the read is not zero. Otherwise the read response might
be treated as the RTR read response and not delivered to the
application. Or worse Terminator hardware will fail a 0B read
if the STAG is 0 even if the read length is 0.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/qp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index bfc77596acbe1..cb7fc0d35d1d1 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -569,7 +569,7 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 {
 	if (wr->num_sge > 1)
 		return -EINVAL;
-	if (wr->num_sge) {
+	if (wr->num_sge && wr->sg_list[0].length) {
 		wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
 		wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
 							>> 32));
-- 
GitLab


From c75d3ec8c0ee469de79ae83c1a827d753603e49f Mon Sep 17 00:00:00 2001
From: "Amrani, Ram" <Ram.Amrani@cavium.com>
Date: Mon, 26 Jun 2017 19:05:04 +0300
Subject: [PATCH 1918/1958] RDMA/qedr: Prevent memory overrun in verbs' user
 responses

Wrap ib_copy_to_udata with a function that ensures that the data
being copied over to user space isn't longer than the allowed.

Fixes: cecbcddf6461 ("qedr: Add support for QP verbs")
Fixes: a7efd7773e31 ("qedr: Add support for PD,PKEY and CQ verbs")
Fixes: ac1b36e55a51 ("qedr: Add support for user context verbs")
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/qedr/verbs.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 548e4d1e998f1..2ae71b8f1ba8a 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -53,6 +53,14 @@
 
 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
 
+static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
+					size_t len)
+{
+	size_t min_len = min_t(size_t, len, udata->outlen);
+
+	return ib_copy_to_udata(udata, src, min_len);
+}
+
 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
 {
 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
@@ -378,7 +386,7 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
 	uresp.max_cqes = QEDR_MAX_CQES;
 
-	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (rc)
 		goto err;
 
@@ -499,7 +507,7 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
 
 		uresp.pd_id = pd_id;
 
-		rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 		if (rc) {
 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
@@ -729,7 +737,7 @@ static int qedr_copy_cq_uresp(struct qedr_dev *dev,
 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
 	uresp.icid = cq->icid;
 
-	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (rc)
 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
 
@@ -1238,7 +1246,7 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
 	uresp.qp_id = qp->qp_id;
 
-	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (rc)
 		DP_ERR(dev,
 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
-- 
GitLab


From 1217197142d1681a8b8aaa88cdf4b245b76974cd Mon Sep 17 00:00:00 2001
From: Vijay Immanuel <vijayi@attalasystems.com>
Date: Tue, 27 Jun 2017 12:19:38 +0300
Subject: [PATCH 1919/1958] rxe: fix broken receive queue draining

If we modified the qp to ERROR state, and
drained the recieve queue, post_recv must
trigger the responder task to complete
the drain work request.

Cc: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Vijay Immanuel <vijayi@attalasystems.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>--
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c  | 3 +++
 drivers/infiniband/sw/rxe/rxe_verbs.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index be944d5aa9afc..a958ee918a49f 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -1219,6 +1219,9 @@ void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
 		kfree_skb(skb);
 	}
 
+	if (notify)
+		return;
+
 	while (!qp->srq && qp->rq.queue && queue_head(qp->rq.queue))
 		advance_consumer(qp->rq.queue);
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 07511718d98d8..af90a7d42b96a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -914,6 +914,9 @@ static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 	spin_unlock_irqrestore(&rq->producer_lock, flags);
 
+	if (qp->resp.state == QP_STATE_ERROR)
+		rxe_run_task(&qp->resp.task, 1);
+
 err1:
 	return err;
 }
-- 
GitLab


From e6e52aec494900912fedd7b595b8827ba70a670d Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 6 Jul 2017 10:21:36 +0300
Subject: [PATCH 1920/1958] RDMA/iser: don't send an rkey if all data is
 written as immadiate-data

We might get some bogus error completions in case the target will
remotely invalidate the rkey and the HCA will need to retransmit
from this buffer.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/iser/iser_initiator.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 12ed62ce9ff7e..2a07692007bdd 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -137,8 +137,10 @@ iser_prepare_write_cmd(struct iscsi_task *task,
 
 	if (unsol_sz < edtl) {
 		hdr->flags     |= ISER_WSV;
-		hdr->write_stag = cpu_to_be32(mem_reg->rkey);
-		hdr->write_va   = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
+		if (buf_out->data_len > imm_sz) {
+			hdr->write_stag = cpu_to_be32(mem_reg->rkey);
+			hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
+		}
 
 		iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
 			 "VA:%#llX + unsol:%d\n",
-- 
GitLab


From 963916fdb3e5ad4af57ac959b5a03bf23f7568ca Mon Sep 17 00:00:00 2001
From: "Kalderon, Michal" <Michal.Kalderon@cavium.com>
Date: Thu, 6 Jul 2017 23:22:11 +0300
Subject: [PATCH 1921/1958] IB/cma: Fix reference count leak when no ipv4
 addresses are set

Once in_dev_get is called to receive in_device pointer, the
in_device reference counter is increased, but if there are
no ipv4 addresses configured on the net-device the ifa_list
will be null, resulting in a flow that doesn't call in_dev_put
to decrease the ref_cnt.
This was exposed when running RoCE over ipv6 without any ipv4
addresses configured

Fixes: commit 8e3867310c90 ("IB/cma: Fix a race condition in iboe_addr_get_sgid()")

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/ib_addr.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 4b34c51f859e8..b73a14edc85e3 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -205,11 +205,13 @@ static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
 	dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
 	if (dev) {
 		ip4 = in_dev_get(dev);
-		if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) {
+		if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address)
 			ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address,
 					       (struct in6_addr *)gid);
+
+		if (ip4)
 			in_dev_put(ip4);
-		}
+
 		dev_put(dev);
 	}
 }
-- 
GitLab


From 5a7a88f1b488e4ee49eb3d5b82612d4d9ffdf2c3 Mon Sep 17 00:00:00 2001
From: "Ismail, Mustafa" <mustafa.ismail@intel.com>
Date: Fri, 14 Jul 2017 09:41:30 -0500
Subject: [PATCH 1922/1958] RDMA/uverbs: Fix the check for port number

The port number is only valid if IB_QP_PORT is set in the mask.
So only check port number if it is valid to prevent modify_qp from
failing due to an invalid port number.

Fixes: 5ecce4c9b17b("Check port number supplied by user verbs cmds")
Cc: <stable@vger.kernel.org> # v2.6.14+
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Tested-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 71451eae42de4..2c98533a0203b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1933,7 +1933,8 @@ static int modify_qp(struct ib_uverbs_file *file,
 		goto out;
 	}
 
-	if (!rdma_is_port_valid(qp->device, cmd->base.port_num)) {
+	if ((cmd->base.attr_mask & IB_QP_PORT) &&
+	    !rdma_is_port_valid(qp->device, cmd->base.port_num)) {
 		ret = -EINVAL;
 		goto release_qp;
 	}
-- 
GitLab


From a62ab66b13a0f9bcb17b7b761f6670941ed5cd62 Mon Sep 17 00:00:00 2001
From: "Ismail, Mustafa" <mustafa.ismail@intel.com>
Date: Fri, 14 Jul 2017 09:41:31 -0500
Subject: [PATCH 1923/1958] RDMA/core: Initialize port_num in qp_attr

Initialize the port_num for iWARP in rdma_init_qp_attr.

Fixes: 5ecce4c9b17b("Check port number supplied by user verbs cmds")
Cc: <stable@vger.kernel.org> # v2.6.14+
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Tested-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/cma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 11aff923b633f..0eb393237ba2f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1033,6 +1033,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 		} else
 			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
 						 qp_attr_mask);
+		qp_attr->port_num = id_priv->id.port_num;
+		*qp_attr_mask |= IB_QP_PORT;
 	} else
 		ret = -ENOSYS;
 
-- 
GitLab


From 54a7d50b9205b5064628c1d10de6531d2d9fbc90 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 20 Jul 2017 11:34:47 -0700
Subject: [PATCH 1924/1958] x86: mark kprobe templates as character arrays, not
 single characters

They really are, and the "take the address of a single character" makes
the string fortification code unhappy (it believes that you can now only
acccess one byte, rather than a byte range, and then raises errors for
the memory copies going on in there).

We could now remove a few 'addressof' operators (since arrays naturally
degrade to pointers), but this is the minimal patch that just changes
the C prototypes of those template arrays (the templates themselves are
defined in inline asm).

Reported-by: kernel test robot <xiaolong.ye@intel.com>
Acked-and-tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/kprobes.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 34b984c607903..6cf65437b5e50 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -52,10 +52,10 @@ typedef u8 kprobe_opcode_t;
 #define flush_insn_slot(p)	do { } while (0)
 
 /* optinsn template addresses */
-extern __visible kprobe_opcode_t optprobe_template_entry;
-extern __visible kprobe_opcode_t optprobe_template_val;
-extern __visible kprobe_opcode_t optprobe_template_call;
-extern __visible kprobe_opcode_t optprobe_template_end;
+extern __visible kprobe_opcode_t optprobe_template_entry[];
+extern __visible kprobe_opcode_t optprobe_template_val[];
+extern __visible kprobe_opcode_t optprobe_template_call[];
+extern __visible kprobe_opcode_t optprobe_template_end[];
 #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
 #define MAX_OPTINSN_SIZE 				\
 	(((unsigned long)&optprobe_template_end -	\
-- 
GitLab


From 4cabc5b186b5427b9ee5a7495172542af105f02b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 Jul 2017 00:00:21 +0200
Subject: [PATCH 1925/1958] bpf: fix mixed signed/unsigned derived min/max
 value bounds

Edward reported that there's an issue in min/max value bounds
tracking when signed and unsigned compares both provide hints
on limits when having unknown variables. E.g. a program such
as the following should have been rejected:

   0: (7a) *(u64 *)(r10 -8) = 0
   1: (bf) r2 = r10
   2: (07) r2 += -8
   3: (18) r1 = 0xffff8a94cda93400
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+7
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
   7: (7a) *(u64 *)(r10 -16) = -8
   8: (79) r1 = *(u64 *)(r10 -16)
   9: (b7) r2 = -1
  10: (2d) if r1 > r2 goto pc+3
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0
  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
  11: (65) if r1 s> 0x1 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1
  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
  12: (0f) r0 += r1
  13: (72) *(u8 *)(r0 +0) = 0
  R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1
  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
  14: (b7) r0 = 0
  15: (95) exit

What happens is that in the first part ...

   8: (79) r1 = *(u64 *)(r10 -16)
   9: (b7) r2 = -1
  10: (2d) if r1 > r2 goto pc+3

... r1 carries an unsigned value, and is compared as unsigned
against a register carrying an immediate. Verifier deduces in
reg_set_min_max() that since the compare is unsigned and operation
is greater than (>), that in the fall-through/false case, r1's
minimum bound must be 0 and maximum bound must be r2. Latter is
larger than the bound and thus max value is reset back to being
'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now
'R1=inv,min_value=0'. The subsequent test ...

  11: (65) if r1 s> 0x1 goto pc+2

... is a signed compare of r1 with immediate value 1. Here,
verifier deduces in reg_set_min_max() that since the compare
is signed this time and operation is greater than (>), that
in the fall-through/false case, we can deduce that r1's maximum
bound must be 1, meaning with prior test, we result in r1 having
the following state: R1=inv,min_value=0,max_value=1. Given that
the actual value this holds is -8, the bounds are wrongly deduced.
When this is being added to r0 which holds the map_value(_adj)
type, then subsequent store access in above case will go through
check_mem_access() which invokes check_map_access_adj(), that
will then probe whether the map memory is in bounds based
on the min_value and max_value as well as access size since
the actual unknown value is min_value <= x <= max_value; commit
fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{,
_adj} register types") provides some more explanation on the
semantics.

It's worth to note in this context that in the current code,
min_value and max_value tracking are used for two things, i)
dynamic map value access via check_map_access_adj() and since
commit 06c1c049721a ("bpf: allow helpers access to variable memory")
ii) also enforced at check_helper_mem_access() when passing a
memory address (pointer to packet, map value, stack) and length
pair to a helper and the length in this case is an unknown value
defining an access range through min_value/max_value in that
case. The min_value/max_value tracking is /not/ used in the
direct packet access case to track ranges. However, the issue
also affects case ii), for example, the following crafted program
based on the same principle must be rejected as well:

   0: (b7) r2 = 0
   1: (bf) r3 = r10
   2: (07) r3 += -512
   3: (7a) *(u64 *)(r10 -16) = -8
   4: (79) r4 = *(u64 *)(r10 -16)
   5: (b7) r6 = -1
   6: (2d) if r4 > r6 goto pc+5
  R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512
  R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
   7: (65) if r4 s> 0x1 goto pc+4
  R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512
  R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1
  R10=fp
   8: (07) r4 += 1
   9: (b7) r5 = 0
  10: (6a) *(u16 *)(r10 -512) = 0
  11: (85) call bpf_skb_load_bytes#26
  12: (b7) r0 = 0
  13: (95) exit

Meaning, while we initialize the max_value stack slot that the
verifier thinks we access in the [1,2] range, in reality we
pass -7 as length which is interpreted as u32 in the helper.
Thus, this issue is relevant also for the case of helper ranges.
Resetting both bounds in check_reg_overflow() in case only one
of them exceeds limits is also not enough as similar test can be
created that uses values which are within range, thus also here
learned min value in r1 is incorrect when mixed with later signed
test to create a range:

   0: (7a) *(u64 *)(r10 -8) = 0
   1: (bf) r2 = r10
   2: (07) r2 += -8
   3: (18) r1 = 0xffff880ad081fa00
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+7
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
   7: (7a) *(u64 *)(r10 -16) = -8
   8: (79) r1 = *(u64 *)(r10 -16)
   9: (b7) r2 = 2
  10: (3d) if r2 >= r1 goto pc+3
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
  11: (65) if r1 s> 0x4 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0
  R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
  12: (0f) r0 += r1
  13: (72) *(u8 *)(r0 +0) = 0
  R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4
  R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
  14: (b7) r0 = 0
  15: (95) exit

This leaves us with two options for fixing this: i) to invalidate
all prior learned information once we switch signed context, ii)
to track min/max signed and unsigned boundaries separately as
done in [0]. (Given latter introduces major changes throughout
the whole verifier, it's rather net-next material, thus this
patch follows option i), meaning we can derive bounds either
from only signed tests or only unsigned tests.) There is still the
case of adjust_reg_min_max_vals(), where we adjust bounds on ALU
operations, meaning programs like the following where boundaries
on the reg get mixed in context later on when bounds are merged
on the dst reg must get rejected, too:

   0: (7a) *(u64 *)(r10 -8) = 0
   1: (bf) r2 = r10
   2: (07) r2 += -8
   3: (18) r1 = 0xffff89b2bf87ce00
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+6
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
   7: (7a) *(u64 *)(r10 -16) = -8
   8: (79) r1 = *(u64 *)(r10 -16)
   9: (b7) r2 = 2
  10: (3d) if r2 >= r1 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
  11: (b7) r7 = 1
  12: (65) if r7 s> 0x0 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp
  13: (b7) r0 = 0
  14: (95) exit

  from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0
  R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp
  15: (0f) r7 += r1
  16: (65) if r7 s> 0x4 goto pc+2
  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp
  17: (0f) r0 += r7
  18: (72) *(u8 *)(r0 +0) = 0
  R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3
  R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp
  19: (b7) r0 = 0
  20: (95) exit

Meaning, in adjust_reg_min_max_vals() we must also reset range
values on the dst when src/dst registers have mixed signed/
unsigned derived min/max value bounds with one unbounded value
as otherwise they can be added together deducing false boundaries.
Once both boundaries are established from either ALU ops or
compare operations w/o mixing signed/unsigned insns, then they
can safely be added to other regs also having both boundaries
established. Adding regs with one unbounded side to a map value
where the bounded side has been learned w/o mixing ops is
possible, but the resulting map value won't recover from that,
meaning such op is considered invalid on the time of actual
access. Invalid bounds are set on the dst reg in case i) src reg,
or ii) in case dst reg already had them. The only way to recover
would be to perform i) ALU ops but only 'add' is allowed on map
value types or ii) comparisons, but these are disallowed on
pointers in case they span a range. This is fine as only BPF_JEQ
and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers
which potentially turn them into PTR_TO_MAP_VALUE type depending
on the branch, so only here min/max value cannot be invalidated
for them.

In terms of state pruning, value_from_signed is considered
as well in states_equal() when dealing with adjusted map values.
With regards to breaking existing programs, there is a small
risk, but use-cases are rather quite narrow where this could
occur and mixing compares probably unlikely.

Joint work with Josef and Edward.

  [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html

Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Reported-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf_verifier.h |   1 +
 kernel/bpf/verifier.c        | 108 ++++++++++++++++++++++++++++++-----
 2 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 621076f56251d..8e5d31f6faefd 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -43,6 +43,7 @@ struct bpf_reg_state {
 	u32 min_align;
 	u32 aux_off;
 	u32 aux_off_align;
+	bool value_from_signed;
 };
 
 enum bpf_stack_slot_type {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6a86723c5b64b..af9e84a4944e6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -504,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
 {
 	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
 	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+	regs[regno].value_from_signed = false;
 	regs[regno].min_align = 0;
 }
 
@@ -777,12 +778,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 	return -EACCES;
 }
 
-static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
+static bool __is_pointer_value(bool allow_ptr_leaks,
+			       const struct bpf_reg_state *reg)
 {
-	if (env->allow_ptr_leaks)
+	if (allow_ptr_leaks)
 		return false;
 
-	switch (env->cur_state.regs[regno].type) {
+	switch (reg->type) {
 	case UNKNOWN_VALUE:
 	case CONST_IMM:
 		return false;
@@ -791,6 +793,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 	}
 }
 
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
+{
+	return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
+}
+
 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
 {
@@ -1832,10 +1839,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 	dst_align = dst_reg->min_align;
 
 	/* We don't know anything about what was done to this register, mark it
-	 * as unknown.
+	 * as unknown. Also, if both derived bounds came from signed/unsigned
+	 * mixed compares and one side is unbounded, we cannot really do anything
+	 * with them as boundaries cannot be trusted. Thus, arithmetic of two
+	 * regs of such kind will get invalidated bounds on the dst side.
 	 */
-	if (min_val == BPF_REGISTER_MIN_RANGE &&
-	    max_val == BPF_REGISTER_MAX_RANGE) {
+	if ((min_val == BPF_REGISTER_MIN_RANGE &&
+	     max_val == BPF_REGISTER_MAX_RANGE) ||
+	    (BPF_SRC(insn->code) == BPF_X &&
+	     ((min_val != BPF_REGISTER_MIN_RANGE &&
+	       max_val == BPF_REGISTER_MAX_RANGE) ||
+	      (min_val == BPF_REGISTER_MIN_RANGE &&
+	       max_val != BPF_REGISTER_MAX_RANGE) ||
+	      (dst_reg->min_value != BPF_REGISTER_MIN_RANGE &&
+	       dst_reg->max_value == BPF_REGISTER_MAX_RANGE) ||
+	      (dst_reg->min_value == BPF_REGISTER_MIN_RANGE &&
+	       dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) &&
+	     regs[insn->dst_reg].value_from_signed !=
+	     regs[insn->src_reg].value_from_signed)) {
 		reset_reg_range_values(regs, insn->dst_reg);
 		return;
 	}
@@ -2023,6 +2044,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			regs[insn->dst_reg].max_value = insn->imm;
 			regs[insn->dst_reg].min_value = insn->imm;
 			regs[insn->dst_reg].min_align = calc_align(insn->imm);
+			regs[insn->dst_reg].value_from_signed = false;
 		}
 
 	} else if (opcode > BPF_END) {
@@ -2198,40 +2220,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 			    struct bpf_reg_state *false_reg, u64 val,
 			    u8 opcode)
 {
+	bool value_from_signed = true;
+	bool is_range = true;
+
 	switch (opcode) {
 	case BPF_JEQ:
 		/* If this is false then we know nothing Jon Snow, but if it is
 		 * true then we know for sure.
 		 */
 		true_reg->max_value = true_reg->min_value = val;
+		is_range = false;
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
 		false_reg->max_value = false_reg->min_value = val;
+		is_range = false;
 		break;
 	case BPF_JGT:
-		/* Unsigned comparison, the minimum value is 0. */
-		false_reg->min_value = 0;
+		value_from_signed = false;
 		/* fallthrough */
 	case BPF_JSGT:
+		if (true_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(true_reg, 0);
+		if (false_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(false_reg, 0);
+		if (opcode == BPF_JGT) {
+			/* Unsigned comparison, the minimum value is 0. */
+			false_reg->min_value = 0;
+		}
 		/* If this is false then we know the maximum val is val,
 		 * otherwise we know the min val is val+1.
 		 */
 		false_reg->max_value = val;
+		false_reg->value_from_signed = value_from_signed;
 		true_reg->min_value = val + 1;
+		true_reg->value_from_signed = value_from_signed;
 		break;
 	case BPF_JGE:
-		/* Unsigned comparison, the minimum value is 0. */
-		false_reg->min_value = 0;
+		value_from_signed = false;
 		/* fallthrough */
 	case BPF_JSGE:
+		if (true_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(true_reg, 0);
+		if (false_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(false_reg, 0);
+		if (opcode == BPF_JGE) {
+			/* Unsigned comparison, the minimum value is 0. */
+			false_reg->min_value = 0;
+		}
 		/* If this is false then we know the maximum value is val - 1,
 		 * otherwise we know the mimimum value is val.
 		 */
 		false_reg->max_value = val - 1;
+		false_reg->value_from_signed = value_from_signed;
 		true_reg->min_value = val;
+		true_reg->value_from_signed = value_from_signed;
 		break;
 	default:
 		break;
@@ -2239,6 +2284,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 
 	check_reg_overflow(false_reg);
 	check_reg_overflow(true_reg);
+	if (is_range) {
+		if (__is_pointer_value(false, false_reg))
+			reset_reg_range_values(false_reg, 0);
+		if (__is_pointer_value(false, true_reg))
+			reset_reg_range_values(true_reg, 0);
+	}
 }
 
 /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
@@ -2248,41 +2299,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 				struct bpf_reg_state *false_reg, u64 val,
 				u8 opcode)
 {
+	bool value_from_signed = true;
+	bool is_range = true;
+
 	switch (opcode) {
 	case BPF_JEQ:
 		/* If this is false then we know nothing Jon Snow, but if it is
 		 * true then we know for sure.
 		 */
 		true_reg->max_value = true_reg->min_value = val;
+		is_range = false;
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
 		false_reg->max_value = false_reg->min_value = val;
+		is_range = false;
 		break;
 	case BPF_JGT:
-		/* Unsigned comparison, the minimum value is 0. */
-		true_reg->min_value = 0;
+		value_from_signed = false;
 		/* fallthrough */
 	case BPF_JSGT:
+		if (true_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(true_reg, 0);
+		if (false_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(false_reg, 0);
+		if (opcode == BPF_JGT) {
+			/* Unsigned comparison, the minimum value is 0. */
+			true_reg->min_value = 0;
+		}
 		/*
 		 * If this is false, then the val is <= the register, if it is
 		 * true the register <= to the val.
 		 */
 		false_reg->min_value = val;
+		false_reg->value_from_signed = value_from_signed;
 		true_reg->max_value = val - 1;
+		true_reg->value_from_signed = value_from_signed;
 		break;
 	case BPF_JGE:
-		/* Unsigned comparison, the minimum value is 0. */
-		true_reg->min_value = 0;
+		value_from_signed = false;
 		/* fallthrough */
 	case BPF_JSGE:
+		if (true_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(true_reg, 0);
+		if (false_reg->value_from_signed != value_from_signed)
+			reset_reg_range_values(false_reg, 0);
+		if (opcode == BPF_JGE) {
+			/* Unsigned comparison, the minimum value is 0. */
+			true_reg->min_value = 0;
+		}
 		/* If this is false then constant < register, if it is true then
 		 * the register < constant.
 		 */
 		false_reg->min_value = val + 1;
+		false_reg->value_from_signed = value_from_signed;
 		true_reg->max_value = val;
+		true_reg->value_from_signed = value_from_signed;
 		break;
 	default:
 		break;
@@ -2290,6 +2364,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 
 	check_reg_overflow(false_reg);
 	check_reg_overflow(true_reg);
+	if (is_range) {
+		if (__is_pointer_value(false, false_reg))
+			reset_reg_range_values(false_reg, 0);
+		if (__is_pointer_value(false, true_reg))
+			reset_reg_range_values(true_reg, 0);
+	}
 }
 
 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
-- 
GitLab


From d655490417ee22da3267fe6592a0ec2023c3c0db Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 Jul 2017 00:00:22 +0200
Subject: [PATCH 1926/1958] bpf: allow to specify log level and reduce it for
 test_verifier

For the test_verifier case, it's quite hard to parse log level 2 to
figure out what's causing an issue when used to log level 1. We do
want to use bpf_verify_program() in order to simulate some of the
tests with strict alignment. So just add an argument to pass the level
and put it to 1 for test_verifier.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/bpf.c                         | 4 ++--
 tools/lib/bpf/bpf.h                         | 2 +-
 tools/testing/selftests/bpf/test_align.c    | 2 +-
 tools/testing/selftests/bpf/test_verifier.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 7e0405e1651d2..412a7c82995ae 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -120,7 +120,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		       size_t insns_cnt, int strict_alignment,
 		       const char *license, __u32 kern_version,
-		       char *log_buf, size_t log_buf_sz)
+		       char *log_buf, size_t log_buf_sz, int log_level)
 {
 	union bpf_attr attr;
 
@@ -131,7 +131,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	attr.license = ptr_to_u64(license);
 	attr.log_buf = ptr_to_u64(log_buf);
 	attr.log_size = log_buf_sz;
-	attr.log_level = 2;
+	attr.log_level = log_level;
 	log_buf[0] = 0;
 	attr.kern_version = kern_version;
 	attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 16de44a14b487..418c86e69bcbf 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -38,7 +38,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		       size_t insns_cnt, int strict_alignment,
 		       const char *license, __u32 kern_version,
-		       char *log_buf, size_t log_buf_sz);
+		       char *log_buf, size_t log_buf_sz, int log_level);
 
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags);
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index bccebd935907a..29793694cbc79 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -380,7 +380,7 @@ static int do_test_single(struct bpf_align_test *test)
 	prog_len = probe_filter_length(prog);
 	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
 				     prog, prog_len, 1, "GPL", 0,
-				     bpf_vlog, sizeof(bpf_vlog));
+				     bpf_vlog, sizeof(bpf_vlog), 2);
 	if (fd_prog < 0) {
 		printf("Failed to load program.\n");
 		printf("%s", bpf_vlog);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 404aec5208128..f4d0a1de39253 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5633,7 +5633,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
 				     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
-				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog));
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
-- 
GitLab


From a1502132866fd2d2705eef4041dd6d7d849f48a2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 Jul 2017 00:00:23 +0200
Subject: [PATCH 1927/1958] bpf: fix up test cases with mixed signed/unsigned
 bounds

Fix the few existing test cases that used mixed signed/unsigned
bounds and switch them only to one flavor. Reason why we need this
is that proper boundaries cannot be derived from mixed tests.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index f4d0a1de39253..64b39d37d91d8 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4969,7 +4969,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
 				sizeof(struct test_val), 4),
 			BPF_MOV64_IMM(BPF_REG_4, 0),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 			BPF_MOV64_IMM(BPF_REG_3, 0),
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -4995,7 +4995,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
 				sizeof(struct test_val) + 1, 4),
 			BPF_MOV64_IMM(BPF_REG_4, 0),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 			BPF_MOV64_IMM(BPF_REG_3, 0),
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -5023,7 +5023,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
 				sizeof(struct test_val) - 20, 4),
 			BPF_MOV64_IMM(BPF_REG_4, 0),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 			BPF_MOV64_IMM(BPF_REG_3, 0),
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -5050,7 +5050,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
 				sizeof(struct test_val) - 19, 4),
 			BPF_MOV64_IMM(BPF_REG_4, 0),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 			BPF_MOV64_IMM(BPF_REG_3, 0),
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-- 
GitLab


From b712296a41ce0a114895fdff68fc22aada165b07 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 21 Jul 2017 00:00:24 +0200
Subject: [PATCH 1928/1958] bpf: add test for mixed signed and unsigned bounds
 checks

These failed due to a bug in verifier bounds handling.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 52 +++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 64b39d37d91d8..48b7997c0ae7c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5510,6 +5510,58 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid bpf_context access",
 		.prog_type = BPF_PROG_TYPE_LWT_IN,
 	},
+	{
+		"bounds checks mixing signed and unsigned, positive bounds",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 2),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
GitLab


From 8641250251bfcd93479c71783c6792ae3325d7e4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 Jul 2017 00:00:25 +0200
Subject: [PATCH 1929/1958] bpf: more tests for mixed signed and unsigned
 bounds checks

Add a couple of more test cases to BPF selftests that are related
to mixed signed and unsigned checks.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 418 ++++++++++++++++++++
 1 file changed, 418 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 48b7997c0ae7c..af7d173910f4b 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5562,6 +5562,424 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.result_unpriv = REJECT,
 	},
+	{
+		"bounds checks mixing signed and unsigned, variant 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+			BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R8 invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 3",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+			BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R8 invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 4",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 5",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 invalid mem access",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 6",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_6, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R4 min value is negative, either use unsigned",
+		.errstr = "R4 min value is negative, either use unsigned",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 7",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 8",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024 + 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 9",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 10",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 11",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 12",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+			/* Dead branch. */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 13",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -6),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 14",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 2),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 15",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_MOV64_IMM(BPF_REG_8, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
+		},
+		.fixup_map1 = { 4 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 16",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -6),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
GitLab


From be35e8c516c1915a3035d266a2015b41f73ba3f9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 20 Jul 2017 12:25:22 -0700
Subject: [PATCH 1930/1958] net: dsa: b53: Add missing ARL entries for BCM53125

The BCM53125 entry was missing an arl_entries member which would
basically prevent the ARL search from terminating properly. This switch
has 4 ARL entries, so add that.

Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index e68d368e20ac8..7f36d3e3c98bc 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1665,6 +1665,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53125",
 		.vlans = 4096,
 		.enabled_ports = 0xff,
+		.arl_entries = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
-- 
GitLab


From 153711f9421be5dbc973dc57a4109dc9d54c89b1 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 20 Jul 2017 11:27:57 -0700
Subject: [PATCH 1931/1958] rtnetlink: allocate more memory for
 dev_set_mac_address()

virtnet_set_mac_address() interprets mac address as struct
sockaddr, but upper layer only allocates dev->addr_len
which is ETH_ALEN + sizeof(sa_family_t) in this case.

We lack a unified definition for mac address, so just fix
the upper layer, this also allows drivers to interpret it
to struct sockaddr freely.

Reported-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 11b25fbf3dd29..9201e36213511 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2031,7 +2031,8 @@ static int do_setlink(const struct sk_buff *skb,
 		struct sockaddr *sa;
 		int len;
 
-		len = sizeof(sa_family_t) + dev->addr_len;
+		len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len,
+						  sizeof(*sa));
 		sa = kmalloc(len, GFP_KERNEL);
 		if (!sa) {
 			err = -ENOMEM;
-- 
GitLab


From 8799a221f5944a7d74516ecf46d58c28ec1d1f75 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Wed, 19 Jul 2017 15:41:33 -0700
Subject: [PATCH 1932/1958] ipv4: initialize fib_trie prior to
 register_netdev_notifier call.

Net stack initialization currently initializes fib-trie after the
first call to netdevice_notifier() call. In fact fib_trie initialization
needs to happen before first rtnl_register(). It does not cause any problem
since there are no devices UP at this moment, but trying to bring 'lo'
UP at initialization would make this assumption wrong and exposes the issue.

Fixes following crash

 Call Trace:
  ? alternate_node_alloc+0x76/0xa0
  fib_table_insert+0x1b7/0x4b0
  fib_magic.isra.17+0xea/0x120
  fib_add_ifaddr+0x7b/0x190
  fib_netdev_event+0xc0/0x130
  register_netdevice_notifier+0x1c1/0x1d0
  ip_fib_init+0x72/0x85
  ip_rt_init+0x187/0x1e9
  ip_init+0xe/0x1a
  inet_init+0x171/0x26c
  ? ipv4_offload_init+0x66/0x66
  do_one_initcall+0x43/0x160
  kernel_init_freeable+0x191/0x219
  ? rest_init+0x80/0x80
  kernel_init+0xe/0x150
  ret_from_fork+0x22/0x30
 Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08
 RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28
 CR2: 0000000000000014

Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.")
Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization")

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e678fa892ddc..044d2a159a3c5 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1334,13 +1334,14 @@ static struct pernet_operations fib_net_ops = {
 
 void __init ip_fib_init(void)
 {
-	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
-	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+	fib_trie_init();
 
 	register_pernet_subsys(&fib_net_ops);
+
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
 
-	fib_trie_init();
+	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
 }
-- 
GitLab


From 070f9c658a59f9a736b1c040001d37b0952e778e Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 20 Jul 2017 16:59:52 +0530
Subject: [PATCH 1933/1958] net: ethernet: ti: cpsw: Push the request_irq
 function to the end of probe

Push the request_irq function to the end of probe so as
to ensure all the required fields are populated in the event
of an ISR getting executed right after requesting the irq.

Currently while loading the crash kernel a crash was seen as
soon as devm_request_threaded_irq was called. This was due to
n->poll being NULL which is called as part of net_rx_action
function.

Suggested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 49 +++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 1850e348f5555..badd0a8caeb9e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -3089,6 +3089,31 @@ static int cpsw_probe(struct platform_device *pdev)
 			cpsw->quirk_irq = true;
 	}
 
+	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	ndev->netdev_ops = &cpsw_netdev_ops;
+	ndev->ethtool_ops = &cpsw_ethtool_ops;
+	netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
+	netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
+	cpsw_split_res(ndev);
+
+	/* register the network device */
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+	ret = register_netdev(ndev);
+	if (ret) {
+		dev_err(priv->dev, "error registering net device\n");
+		ret = -ENODEV;
+		goto clean_ale_ret;
+	}
+
+	if (cpsw->data.dual_emac) {
+		ret = cpsw_probe_dual_emac(priv);
+		if (ret) {
+			cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
+			goto clean_unregister_netdev_ret;
+		}
+	}
+
 	/* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and
 	 * MISC IRQs which are always kept disabled with this driver so
 	 * we will not request them.
@@ -3127,33 +3152,9 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_ale_ret;
 	}
 
-	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-
-	ndev->netdev_ops = &cpsw_netdev_ops;
-	ndev->ethtool_ops = &cpsw_ethtool_ops;
-	netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
-	netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
-	cpsw_split_res(ndev);
-
-	/* register the network device */
-	SET_NETDEV_DEV(ndev, &pdev->dev);
-	ret = register_netdev(ndev);
-	if (ret) {
-		dev_err(priv->dev, "error registering net device\n");
-		ret = -ENODEV;
-		goto clean_ale_ret;
-	}
-
 	cpsw_notice(priv, probe,
 		    "initialized device (regs %pa, irq %d, pool size %d)\n",
 		    &ss_res->start, ndev->irq, dma_params.descs_pool_size);
-	if (cpsw->data.dual_emac) {
-		ret = cpsw_probe_dual_emac(priv);
-		if (ret) {
-			cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
-			goto clean_unregister_netdev_ret;
-		}
-	}
 
 	pm_runtime_put(&pdev->dev);
 
-- 
GitLab


From e623a48ee433985f6ca0fb238f0002cc2eccdf53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Bugge?= <Haakon.Bugge@oracle.com>
Date: Thu, 20 Jul 2017 12:28:55 +0200
Subject: [PATCH 1934/1958] rds: Make sure updates to cp_send_gen can be
 observed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cp->cp_send_gen is treated as a normal variable, although it may be
used by different threads.

This is fixed by using {READ,WRITE}_ONCE when it is incremented and
READ_ONCE when it is read outside the {acquire,release}_in_xmit
protection.

Normative reference from the Linux-Kernel Memory Model:

    Loads from and stores to shared (but non-atomic) variables should
    be protected with the READ_ONCE(), WRITE_ONCE(), and
    ACCESS_ONCE().

Clause 5.1.2.4/25 in the C standard is also relevant.

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Knut Omang <knut.omang@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/rds/send.c b/net/rds/send.c
index e81aa176f4e2c..41b9f0f5bb9c7 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -170,8 +170,8 @@ int rds_send_xmit(struct rds_conn_path *cp)
 	 * The acquire_in_xmit() check above ensures that only one
 	 * caller can increment c_send_gen at any time.
 	 */
-	cp->cp_send_gen++;
-	send_gen = cp->cp_send_gen;
+	send_gen = READ_ONCE(cp->cp_send_gen) + 1;
+	WRITE_ONCE(cp->cp_send_gen, send_gen);
 
 	/*
 	 * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
@@ -431,7 +431,7 @@ int rds_send_xmit(struct rds_conn_path *cp)
 		smp_mb();
 		if ((test_bit(0, &conn->c_map_queued) ||
 		     !list_empty(&cp->cp_send_queue)) &&
-		    send_gen == cp->cp_send_gen) {
+			send_gen == READ_ONCE(cp->cp_send_gen)) {
 			rds_stats_inc(s_send_lock_queue_raced);
 			if (batch_count < send_batch_count)
 				goto restart;
-- 
GitLab


From cbf5ecb305601d063dc94a57680dfbc3f96c188d Mon Sep 17 00:00:00 2001
From: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
Date: Thu, 20 Jul 2017 05:20:40 +0000
Subject: [PATCH 1935/1958] net: bonding: Fix transmit load balancing in
 balance-alb mode

balance-alb mode used to have transmit dynamic load balancing feature
enabled by default.  However, transmit dynamic load balancing no longer
works in balance-alb after commit 8b426dc54cf4 ("bonding: remove
hardcoded value").

Both balance-tlb and balance-alb use the function bond_do_alb_xmit() to
send packets.  This function uses the parameter tlb_dynamic_lb.
tlb_dynamic_lb used to have the default value of 1 for balance-alb, but
now the value is set to 0 except in balance-tlb.

Re-enable transmit dyanmic load balancing by initializing tlb_dynamic_lb
for balance-alb similar to balance-tlb.

Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value")
Signed-off-by: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
Acked-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 14ff622190a5b..181839d6fbea4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4596,7 +4596,7 @@ static int bond_check_params(struct bond_params *params)
 	}
 	ad_user_port_key = valptr->value;
 
-	if (bond_mode == BOND_MODE_TLB) {
+	if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) {
 		bond_opt_initstr(&newval, "default");
 		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB),
 					&newval);
-- 
GitLab


From 921edf312a6a20be16cf2b60e0dec3dce35e5cb9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jul 2017 11:25:13 +0200
Subject: [PATCH 1936/1958] ide: avoid warning for timings calculation

gcc-7 warns about the result of a constant multiplication used as
a boolean:

drivers/ide/ide-timings.c: In function 'ide_timing_quantize':
drivers/ide/ide-timings.c:112:24: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context]
  q->setup   = EZ(t->setup   * 1000,  T);

This slightly rearranges the macro to simplify the code and avoid
the warning at the same time.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-timings.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c
index 0e05f75934c98..1858e3ce3993a 100644
--- a/drivers/ide/ide-timings.c
+++ b/drivers/ide/ide-timings.c
@@ -104,19 +104,19 @@ u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio)
 EXPORT_SYMBOL_GPL(ide_pio_cycle_time);
 
 #define ENOUGH(v, unit)		(((v) - 1) / (unit) + 1)
-#define EZ(v, unit)		((v) ? ENOUGH(v, unit) : 0)
+#define EZ(v, unit)		((v) ? ENOUGH((v) * 1000, unit) : 0)
 
 static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q,
 				int T, int UT)
 {
-	q->setup   = EZ(t->setup   * 1000,  T);
-	q->act8b   = EZ(t->act8b   * 1000,  T);
-	q->rec8b   = EZ(t->rec8b   * 1000,  T);
-	q->cyc8b   = EZ(t->cyc8b   * 1000,  T);
-	q->active  = EZ(t->active  * 1000,  T);
-	q->recover = EZ(t->recover * 1000,  T);
-	q->cycle   = EZ(t->cycle   * 1000,  T);
-	q->udma    = EZ(t->udma    * 1000, UT);
+	q->setup   = EZ(t->setup,   T);
+	q->act8b   = EZ(t->act8b,   T);
+	q->rec8b   = EZ(t->rec8b,   T);
+	q->cyc8b   = EZ(t->cyc8b,   T);
+	q->active  = EZ(t->active,  T);
+	q->recover = EZ(t->recover, T);
+	q->cycle   = EZ(t->cycle,   T);
+	q->udma    = EZ(t->udma,    UT);
 }
 
 void ide_timing_merge(struct ide_timing *a, struct ide_timing *b,
-- 
GitLab


From 2aeb1883547626d82c597cce2c99f0b9c62e2425 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 20 Jul 2017 16:14:55 +0200
Subject: [PATCH 1937/1958] perf/core: Fix locking for children siblings group
 read

We're missing ctx lock when iterating children siblings
within the perf_read path for group reading. Following
race and crash can happen:

User space doing read syscall on event group leader:

T1:
  perf_read
    lock event->ctx->mutex
    perf_read_group
      lock leader->child_mutex
      __perf_read_group_add(child)
        list_for_each_entry(sub, &leader->sibling_list, group_entry)

---->   sub might be invalid at this point, because it could
        get removed via perf_event_exit_task_context in T2

Child exiting and cleaning up its events:

T2:
  perf_event_exit_task_context
    lock ctx->mutex
    list_for_each_entry_safe(child_event, next, &child_ctx->event_list,...
      perf_event_exit_event(child)
        lock ctx->lock
        perf_group_detach(child)
        unlock ctx->lock

---->   child is removed from sibling_list without any sync
        with T1 path above

        ...
        free_event(child)

Before the child is removed from the leader's child_list,
(and thus is omitted from perf_read_group processing), we
need to ensure that perf_read_group touches child's
siblings under its ctx->lock.

Peter further notes:

| One additional note; this bug got exposed by commit:
|
|   ba5213ae6b88 ("perf/core: Correct event creation with PERF_FORMAT_GROUP")
|
| which made it possible to actually trigger this code-path.

Tested-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: ba5213ae6b88 ("perf/core: Correct event creation with PERF_FORMAT_GROUP")
Link: http://lkml.kernel.org/r/20170720141455.2106-1-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index c9cdbd3967709..c17c0881fd36a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4372,7 +4372,9 @@ EXPORT_SYMBOL_GPL(perf_event_read_value);
 static int __perf_read_group_add(struct perf_event *leader,
 					u64 read_format, u64 *values)
 {
+	struct perf_event_context *ctx = leader->ctx;
 	struct perf_event *sub;
+	unsigned long flags;
 	int n = 1; /* skip @nr */
 	int ret;
 
@@ -4402,12 +4404,15 @@ static int __perf_read_group_add(struct perf_event *leader,
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
 
+	raw_spin_lock_irqsave(&ctx->lock, flags);
+
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		values[n++] += perf_event_count(sub);
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
 	}
 
+	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 	return 0;
 }
 
-- 
GitLab


From 2fe9a5c6ade4dfb53ff1c137cca3828d9d1d0948 Mon Sep 17 00:00:00 2001
From: Andrew Banman <abanman@hpe.com>
Date: Thu, 20 Jul 2017 17:05:51 -0500
Subject: [PATCH 1938/1958] x86/platform/uv/BAU: Disable BAU on single hub
 configurations

The BAU confers no benefit to a UV system running with only one hub/socket.
Permanently disable the BAU driver if there are less than two hubs online
to avoid BAU overhead. We have observed failed boots on single-socket UV4
systems caused by BAU that are avoided with this patch.

Also, while at it, consolidate initialization error blocks and fix a
memory leak.

Signed-off-by: Andrew Banman <abanman@hpe.com>
Acked-by: Russ Anderson <rja@hpe.com>
Acked-by: Mike Travis <mike.travis@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: tony.ernst@hpe.com
Link: http://lkml.kernel.org/r/1500588351-78016-1-git-send-email-abanman@hpe.com
[ Minor cleanups. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/uv/tlb_uv.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index fd8759111b65b..3e4bdb442fbcf 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2221,13 +2221,17 @@ static int __init uv_bau_init(void)
 	else if (is_uv1_hub())
 		ops = uv1_bau_ops;
 
+	nuvhubs = uv_num_possible_blades();
+	if (nuvhubs < 2) {
+		pr_crit("UV: BAU disabled - insufficient hub count\n");
+		goto err_bau_disable;
+	}
+
 	for_each_possible_cpu(cur_cpu) {
 		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
 		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
 	}
 
-	nuvhubs = uv_num_possible_blades();
-
 	uv_base_pnode = 0x7fffffff;
 	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 		cpus = uv_blade_nr_possible_cpus(uvhub);
@@ -2240,9 +2244,8 @@ static int __init uv_bau_init(void)
 		enable_timeouts();
 
 	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
-		set_bau_off();
-		nobau_perm = 1;
-		return 0;
+		pr_crit("UV: BAU disabled - per CPU init failed\n");
+		goto err_bau_disable;
 	}
 
 	vector = UV_BAU_MESSAGE;
@@ -2268,6 +2271,16 @@ static int __init uv_bau_init(void)
 	}
 
 	return 0;
+
+err_bau_disable:
+
+	for_each_possible_cpu(cur_cpu)
+		free_cpumask_var(per_cpu(uv_flush_tlb_mask, cur_cpu));
+
+	set_bau_off();
+	nobau_perm = 1;
+
+	return -EINVAL;
 }
 core_initcall(uv_bau_init);
 fs_initcall(uv_ptc_init);
-- 
GitLab


From df6c3db8d30fb1699ccbc403196b86324f4257af Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 19 Jul 2017 09:52:47 +0200
Subject: [PATCH 1939/1958] perf/x86/intel: Add proper condition to run
 sched_task callbacks

We have 2 functions using the same sched_task callback:

  - PEBS drain for free running counters
  - LBR save/store

Both of them are called from intel_pmu_sched_task() and
either of them can be unwillingly triggered when the
other one is configured to run.

Let's say there's PEBS drain configured in sched_task
callback for the event, but in the callback itself
(intel_pmu_sched_task()) we will also run the code for
LBR save/restore, which we did not ask for, but the
code in intel_pmu_sched_task() does not check for that.

This can lead to extra cycles in some perf monitoring,
like when we monitor PEBS event without LBR data.

  # perf record --no-timestamp -c 10000 -e cycles:p ./perf bench sched pipe -l 1000000

  (We need PEBS, non freq/non timestamp event to enable
   the sched_task callback)

The perf stat of cycles and msr:write_msr for above
command before the change:
  ...
  Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
                                 ./perf bench sched pipe -l 1000000' (5 runs):

    18,519,557,441      cycles:k
        91,195,527      msr:write_msr

      29.334476406 seconds time elapsed

And after the change:
  ...
  Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
                                 ./perf bench sched pipe -l 1000000' (5 runs):

    18,704,973,540      cycles:k
        27,184,720      msr:write_msr

      16.977875900 seconds time elapsed

There's no affect on cycles:k because the sched_task happens
with events switched off, however the msr:write_msr tracepoint
counter together with almost 50% of time speedup show the
improvement.

Monitoring LBR event and having extra PEBS drain processing
in sched_task callback showed just a little speedup, because
the drain function does not do much extra work in case there
is no PEBS data.

Adding conditions to recognize the configured work that needs
to be done in the x86_pmu's sched_task callback.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20170719075247.GA27506@krava
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c |  6 ++----
 arch/x86/events/intel/ds.c   | 14 ++++++++------
 arch/x86/events/intel/lbr.c  |  4 ++++
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ede97710c2f4f..98b0f07295273 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3397,10 +3397,8 @@ static void intel_pmu_cpu_dying(int cpu)
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
 				 bool sched_in)
 {
-	if (x86_pmu.pebs_active)
-		intel_pmu_pebs_sched_task(ctx, sched_in);
-	if (x86_pmu.lbr_nr)
-		intel_pmu_lbr_sched_task(ctx, sched_in);
+	intel_pmu_pebs_sched_task(ctx, sched_in);
+	intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 6dc8a59e1bfb8..a322fed5f8edc 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void)
 	x86_pmu.drain_pebs(&regs);
 }
 
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-	if (!sched_in)
-		intel_pmu_drain_pebs_buffer();
-}
-
 /*
  * PEBS
  */
@@ -822,6 +816,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
 }
 
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!sched_in && pebs_needs_sched_cb(cpuc))
+		intel_pmu_drain_pebs_buffer();
+}
+
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index eb261656a320d..955457a30197e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
+	if (!cpuc->lbr_users)
+		return;
+
 	/*
 	 * If LBR callstack feature is enabled and the stack was saved when
 	 * the task was scheduled out, restore the stack. Otherwise flush
-- 
GitLab


From db15e7f27369b81b6605a546d54eb844f87370a5 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jul 2017 16:42:47 -0500
Subject: [PATCH 1940/1958] x86/devicetree: Convert to using %pOF instead of
 ->full_name

Now that we have a custom printf format specifier, convert users of
full_name to use %pOF instead. This is preparation to remove storing
of the full path string for each device node.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: devicetree@vger.kernel.org
Link: http://lkml.kernel.org/r/20170718214339.7774-7-robh@kernel.org
[ Clarify the error message while at it, as 'node' is ambiguous. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/devicetree.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3fe45f84ced44..cbf1f6ba39a83 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -235,8 +235,7 @@ static void __init dtb_add_ioapic(struct device_node *dn)
 
 	ret = of_address_to_resource(dn, 0, &r);
 	if (ret) {
-		printk(KERN_ERR "Can't obtain address from node %s.\n",
-				dn->full_name);
+		printk(KERN_ERR "Can't obtain address from device node %pOF.\n", dn);
 		return;
 	}
 	mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg);
-- 
GitLab


From ecc7b435d2beb025d7506af74cf749af2cef5734 Mon Sep 17 00:00:00 2001
From: Eryu Guan <eguan@redhat.com>
Date: Tue, 18 Jul 2017 13:32:32 +0800
Subject: [PATCH 1941/1958] nfs: count correct array for mnt3_counts array size

Array size of mnt3_counts should be the size of array
mnt3_procedures, not mnt_procedures, though they're same in size
right now. Found this by code inspection.

Fixes: 1c5876ddbdb4 ("sunrpc: move p_count out of struct rpc_procinfo")
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/mount_clnt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 3efe946672beb..60bad882c1235 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -512,7 +512,7 @@ static const struct rpc_version mnt_version1 = {
 	.counts		= mnt_counts,
 };
 
-static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)];
+static unsigned int mnt3_counts[ARRAY_SIZE(mnt3_procedures)];
 static const struct rpc_version mnt_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(mnt3_procedures),
-- 
GitLab


From 3ffbc1d65583394be12801655781dd2b079ce169 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 19 Jul 2017 14:05:01 +1000
Subject: [PATCH 1942/1958] net/sunrpc/xprt_sock: fix regression in connection
 error reporting.

Commit 3d4762639dd3 ("tcp: remove poll() flakes when receiving
RST") in v4.12 changed the order in which ->sk_state_change()
and ->sk_error_report() are called when a socket is shut
down - sk_state_change() is now called first.

This causes xs_tcp_state_change() -> xs_sock_mark_closed() ->
xprt_disconnect_done() to wake all pending tasked with -EAGAIN.
When the ->sk_error_report() callback arrives, it is too late to
pass the error on, and it is lost.

As easy way to demonstrate the problem caused is to try to start
rpc.nfsd while rcpbind isn't running.
nfsd will attempt a tcp connection to rpcbind.  A ECONNREFUSED
error is returned, but sunrpc code loses the error and keeps
retrying.  If it saw the ECONNREFUSED, it would abort.

To fix this, handle the sk->sk_err in the TCP_CLOSE branch of
xs_tcp_state_change().

Fixes: 3d4762639dd3 ("tcp: remove poll() flakes when receiving RST")
Cc: stable@vger.kernel.org (v4.12)
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtsock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d5b54c020decd..4f154d3887483 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1624,6 +1624,8 @@ static void xs_tcp_state_change(struct sock *sk)
 		if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
 					&transport->sock_state))
 			xprt_clear_connecting(xprt);
+		if (sk->sk_err)
+			xprt_wake_pending_tasks(xprt, -sk->sk_err);
 		xs_sock_mark_closed(xprt);
 	}
  out:
-- 
GitLab


From 15d4b73ac2232d6f2beb61d8b2400ea66e4da606 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:54:32 -0400
Subject: [PATCH 1943/1958] NFS: Refactor NFS access to kernel access mask
 calculation

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1255891e56955..24b3a6748062b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2375,16 +2375,31 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 }
 EXPORT_SYMBOL_GPL(nfs_access_add_cache);
 
+#define NFS_MAY_READ (NFS4_ACCESS_READ)
+#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \
+		NFS4_ACCESS_EXTEND | \
+		NFS4_ACCESS_DELETE)
+#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP)
+#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE)
+static int
+nfs_access_calc_mask(u32 access_result)
+{
+	int mask = 0;
+
+	if (access_result & NFS_MAY_READ)
+		mask |= MAY_READ;
+	if (access_result & NFS_MAY_WRITE)
+		mask |= MAY_WRITE;
+	if (access_result & NFS_MAY_LOOKUP)
+		mask |= MAY_EXEC;
+	if (access_result & NFS_MAY_EXECUTE)
+		mask |= MAY_EXEC;
+	return mask;
+}
+
 void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
 {
-	entry->mask = 0;
-	if (access_result & NFS4_ACCESS_READ)
-		entry->mask |= MAY_READ;
-	if (access_result &
-	    (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE))
-		entry->mask |= MAY_WRITE;
-	if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
-		entry->mask |= MAY_EXEC;
+	entry->mask = nfs_access_calc_mask(access_result);
 }
 EXPORT_SYMBOL_GPL(nfs_access_set_mask);
 
-- 
GitLab


From eda3e20847788c453aa7ab478aeaceb56ed29cb6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:54:33 -0400
Subject: [PATCH 1944/1958] NFSv3: Convert nfs3_proc_access() to use
 nfs_access_set_mask()

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs3proc.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index df4a7d3ab9157..d1e87ec0df848 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -220,15 +220,8 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_refresh_inode(inode, res.fattr);
-	if (status == 0) {
-		entry->mask = 0;
-		if (res.access & NFS3_ACCESS_READ)
-			entry->mask |= MAY_READ;
-		if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE))
-			entry->mask |= MAY_WRITE;
-		if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
-			entry->mask |= MAY_EXEC;
-	}
+	if (status == 0)
+		nfs_access_set_mask(entry, res.access);
 	nfs_free_fattr(res.fattr);
 out:
 	dprintk("NFS reply access: %d\n", status);
-- 
GitLab


From bd8b2441742b49c76bec707757bd9c028ea9838e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:54:34 -0400
Subject: [PATCH 1945/1958] NFS: Store the raw NFS access mask in the inode's
 access cache

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c           | 9 ++++++---
 include/linux/nfs_fs.h | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 24b3a6748062b..8fae8b00b8f5f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2399,7 +2399,7 @@ nfs_access_calc_mask(u32 access_result)
 
 void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
 {
-	entry->mask = nfs_access_calc_mask(access_result);
+	entry->mask = access_result;
 }
 EXPORT_SYMBOL_GPL(nfs_access_set_mask);
 
@@ -2407,6 +2407,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 {
 	struct nfs_access_entry cache;
 	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
+	int cache_mask;
 	int status;
 
 	trace_nfs_access_enter(inode);
@@ -2422,7 +2423,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 		goto out;
 
 	/* Be clever: ask server to check for all possible rights */
-	cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
+	cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE
+		     | NFS_MAY_WRITE | NFS_MAY_READ;
 	cache.cred = cred;
 	cache.jiffies = jiffies;
 	status = NFS_PROTO(inode)->access(inode, &cache);
@@ -2436,7 +2438,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 	}
 	nfs_access_add_cache(inode, &cache);
 out_cached:
-	if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
+	cache_mask = nfs_access_calc_mask(cache.mask);
+	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
 		status = -EACCES;
 out:
 	trace_nfs_access_exit(inode, status);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index e52cc55ac300f..5cc91d6381a35 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -51,7 +51,7 @@ struct nfs_access_entry {
 	struct list_head	lru;
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
-	int			mask;
+	__u32			mask;
 	struct rcu_head		rcu_head;
 };
 
-- 
GitLab


From ecbb903c56745d59c301db26dd7d8b74b520eb84 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 11 Jul 2017 17:54:35 -0400
Subject: [PATCH 1946/1958] NFS: Be more careful about mapping file permissions

When mapping a directory, we want the MAY_WRITE permissions to reflect
whether or not we have permission to modify, add and delete the directory
entries. MAY_EXEC must map to lookup permissions.

On the other hand, for files, we want MAY_WRITE to reflect a permission
to modify and extend the file.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8fae8b00b8f5f..37a6180ee2e8d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2379,21 +2379,30 @@ EXPORT_SYMBOL_GPL(nfs_access_add_cache);
 #define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \
 		NFS4_ACCESS_EXTEND | \
 		NFS4_ACCESS_DELETE)
+#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \
+		NFS4_ACCESS_EXTEND)
+#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
 #define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP)
 #define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE)
 static int
-nfs_access_calc_mask(u32 access_result)
+nfs_access_calc_mask(u32 access_result, umode_t umode)
 {
 	int mask = 0;
 
 	if (access_result & NFS_MAY_READ)
 		mask |= MAY_READ;
-	if (access_result & NFS_MAY_WRITE)
-		mask |= MAY_WRITE;
-	if (access_result & NFS_MAY_LOOKUP)
-		mask |= MAY_EXEC;
-	if (access_result & NFS_MAY_EXECUTE)
-		mask |= MAY_EXEC;
+	if (S_ISDIR(umode)) {
+		if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
+			mask |= MAY_WRITE;
+		if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
+			mask |= MAY_EXEC;
+	} else if (S_ISREG(umode)) {
+		if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
+			mask |= MAY_WRITE;
+		if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
+			mask |= MAY_EXEC;
+	} else if (access_result & NFS_MAY_WRITE)
+			mask |= MAY_WRITE;
 	return mask;
 }
 
@@ -2438,7 +2447,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 	}
 	nfs_access_add_cache(inode, &cache);
 out_cached:
-	cache_mask = nfs_access_calc_mask(cache.mask);
+	cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
 	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
 		status = -EACCES;
 out:
-- 
GitLab


From 1ebf980127924c639e2b85c08468311ba1c95b70 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 20 Jul 2017 17:00:02 -0400
Subject: [PATCH 1947/1958] NFS/filelayout: Fix racy setting of fl->dsaddr in
 filelayout_check_deviceid()

We must set fl->dsaddr once, and once only, even if there are multiple
processes calling filelayout_check_deviceid() for the same layout
segment.

Reported-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayout.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 080fc6b278bd5..44c638b7876cf 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -542,6 +542,10 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo,
 	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
 
+	/* Is the deviceid already set? If so, we're good. */
+	if (fl->dsaddr != NULL)
+		return 0;
+
 	/* find and reference the deviceid */
 	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
 			lo->plh_lc_cred, gfp_flags);
@@ -553,8 +557,6 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo,
 	if (filelayout_test_devid_unavailable(&dsaddr->id_node))
 		goto out_put;
 
-	fl->dsaddr = dsaddr;
-
 	if (fl->first_stripe_index >= dsaddr->stripe_count) {
 		dprintk("%s Bad first_stripe_index %u\n",
 				__func__, fl->first_stripe_index);
@@ -570,6 +572,13 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo,
 		goto out_put;
 	}
 	status = 0;
+
+	/*
+	 * Atomic compare and xchange to ensure we don't scribble
+	 * over a non-NULL pointer.
+	 */
+	if (cmpxchg(&fl->dsaddr, NULL, dsaddr) != NULL)
+		goto out_put;
 out:
 	return status;
 out_put:
-- 
GitLab


From 82abbea734d659b4218ad06734b4927b43261985 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 21 Jul 2017 13:32:27 -0700
Subject: [PATCH 1948/1958] MAINTAINERS: fix alphabetical ordering

Fix major alphabetic errors.  No attempt to fix items that all begin
with the same word (like ARM, BROADCOM, DRM, EDAC, FREESCALE, INTEL,
OMAP, PCI, SAMSUNG, TI, USB, etc.).

(diffstat +/- is different by one line because TI KEYSTONE MULTICORE
had 2 blank lines after it.)

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1683 +++++++++++++++++++++++++--------------------------
 1 file changed, 841 insertions(+), 842 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2784ac59801d1..2bc3664721697 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -492,13 +492,6 @@ S:	Maintained
 F:	Documentation/hwmon/adt7475
 F:	drivers/hwmon/adt7475.c
 
-ADXL34X THREE-AXIS DIGITAL ACCELEROMETER DRIVER (ADXL345/ADXL346)
-M:	Michael Hennerich <michael.hennerich@analog.com>
-W:	http://wiki.analog.com/ADXL345
-W:	http://ez.analog.com/community/linux-device-drivers
-S:	Supported
-F:	drivers/input/misc/adxl34x.c
-
 ADVANSYS SCSI DRIVER
 M:	Matthew Wilcox <matthew@wil.cx>
 M:	Hannes Reinecke <hare@suse.com>
@@ -507,6 +500,13 @@ S:	Maintained
 F:	Documentation/scsi/advansys.txt
 F:	drivers/scsi/advansys.c
 
+ADXL34X THREE-AXIS DIGITAL ACCELEROMETER DRIVER (ADXL345/ADXL346)
+M:	Michael Hennerich <michael.hennerich@analog.com>
+W:	http://wiki.analog.com/ADXL345
+W:	http://ez.analog.com/community/linux-device-drivers
+S:	Supported
+F:	drivers/input/misc/adxl34x.c
+
 AEDSP16 DRIVER
 M:	Riccardo Facchetti <fizban@tin.it>
 S:	Maintained
@@ -872,6 +872,15 @@ F:	include/linux/apm_bios.h
 F:	include/uapi/linux/apm_bios.h
 F:	drivers/char/apm-emulation.c
 
+APPARMOR SECURITY MODULE
+M:	John Johansen <john.johansen@canonical.com>
+L:	apparmor@lists.ubuntu.com (subscribers-only, general discussion)
+W:	apparmor.wiki.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
+S:	Supported
+F:	security/apparmor/
+F:	Documentation/admin-guide/LSM/apparmor.rst
+
 APPLE BCM5974 MULTITOUCH DRIVER
 M:	Henrik Rydberg <rydberg@bitmath.org>
 L:	linux-input@vger.kernel.org
@@ -930,6 +939,12 @@ S:	Maintained
 F:	drivers/video/fbdev/arcfb.c
 F:	drivers/video/fbdev/core/fb_defio.c
 
+ARC PGU DRM DRIVER
+M:	Alexey Brodkin <abrodkin@synopsys.com>
+S:	Supported
+F:	drivers/gpu/drm/arc/
+F:	Documentation/devicetree/bindings/display/snps,arcpgu.txt
+
 ARCNET NETWORK LAYER
 M:	Michael Grzeschik <m.grzeschik@pengutronix.de>
 L:	netdev@vger.kernel.org
@@ -937,12 +952,6 @@ S:	Maintained
 F:	drivers/net/arcnet/
 F:	include/uapi/linux/if_arcnet.h
 
-ARC PGU DRM DRIVER
-M:	Alexey Brodkin <abrodkin@synopsys.com>
-S:	Supported
-F:	drivers/gpu/drm/arc/
-F:	Documentation/devicetree/bindings/display/snps,arcpgu.txt
-
 ARM ARCHITECTED TIMER DRIVER
 M:	Mark Rutland <mark.rutland@arm.com>
 M:	Marc Zyngier <marc.zyngier@arm.com>
@@ -2207,21 +2216,10 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
 S:	Supported
 F:	drivers/net/wireless/ath/ath6kl/
 
-WILOCITY WIL6210 WIRELESS DRIVER
-M:	Maya Erez <qca_merez@qca.qualcomm.com>
-L:	linux-wireless@vger.kernel.org
-L:	wil6210@qca.qualcomm.com
-S:	Supported
-W:	http://wireless.kernel.org/en/users/Drivers/wil6210
-F:	drivers/net/wireless/ath/wil6210/
-F:	include/uapi/linux/wil6210_uapi.h
-
-CARL9170 LINUX COMMUNITY WIRELESS DRIVER
-M:	Christian Lamparter <chunkeey@googlemail.com>
-L:	linux-wireless@vger.kernel.org
-W:	http://wireless.kernel.org/en/users/Drivers/carl9170
+ATI_REMOTE2 DRIVER
+M:	Ville Syrjala <syrjala@sci.fi>
 S:	Maintained
-F:	drivers/net/wireless/ath/carl9170/
+F:	drivers/input/misc/ati_remote2.c
 
 ATK0110 HWMON DRIVER
 M:	Luca Tettamanti <kronos.it@gmail.com>
@@ -2229,11 +2227,6 @@ L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/asus_atk0110.c
 
-ATI_REMOTE2 DRIVER
-M:	Ville Syrjala <syrjala@sci.fi>
-S:	Maintained
-F:	drivers/input/misc/ati_remote2.c
-
 ATLX ETHERNET DRIVERS
 M:	Jay Cliburn <jcliburn@gmail.com>
 M:	Chris Snook <chris.snook@gmail.com>
@@ -2507,13 +2500,11 @@ W:	https://linuxtv.org
 S:	Supported
 F:	drivers/media/platform/sti/bdisp
 
-DELTA ST MEDIA DRIVER
-M:	Hugues Fruchet <hugues.fruchet@st.com>
-L:	linux-media@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-W:	https://linuxtv.org
-S:	Supported
-F:	drivers/media/platform/sti/delta
+BECKHOFF CX5020 ETHERCAT MASTER DRIVER
+M:	Dariusz Marcinkiewicz <reksio@newterm.pl>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/ec_bhf.c
 
 BEFS FILE SYSTEM
 M:	Luis de Bethencourt <luisbg@kernel.org>
@@ -2523,11 +2514,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/luisbg/linux-befs.git
 F:	Documentation/filesystems/befs.txt
 F:	fs/befs/
 
-BECKHOFF CX5020 ETHERCAT MASTER DRIVER
-M:	Dariusz Marcinkiewicz <reksio@newterm.pl>
-L:	netdev@vger.kernel.org
+BFQ I/O SCHEDULER
+M:	Paolo Valente <paolo.valente@linaro.org>
+M:	Jens Axboe <axboe@kernel.dk>
+L:	linux-block@vger.kernel.org
 S:	Maintained
-F:	drivers/net/ethernet/ec_bhf.c
+F:	block/bfq-*
+F:	Documentation/block/bfq-iosched.txt
 
 BFS FILE SYSTEM
 M:	"Tigran A. Aivazian" <aivazian.tigran@gmail.com>
@@ -2606,14 +2599,6 @@ F:	block/
 F:	kernel/trace/blktrace.c
 F:	lib/sbitmap.c
 
-BFQ I/O SCHEDULER
-M:	Paolo Valente <paolo.valente@linaro.org>
-M:	Jens Axboe <axboe@kernel.dk>
-L:	linux-block@vger.kernel.org
-S:	Maintained
-F:	block/bfq-*
-F:	Documentation/block/bfq-iosched.txt
-
 BLOCK2MTD DRIVER
 M:	Joern Engel <joern@lazybastard.org>
 L:	linux-mtd@lists.infradead.org
@@ -3013,6 +2998,15 @@ S:	Odd fixes
 F:	Documentation/media/v4l-drivers/bttv*
 F:	drivers/media/pci/bt8xx/bttv*
 
+BUS FREQUENCY DRIVER FOR SAMSUNG EXYNOS
+M:	Chanwoo Choi <cw00.choi@samsung.com>
+L:	linux-pm@vger.kernel.org
+L:	linux-samsung-soc@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+S:	Maintained
+F:	drivers/devfreq/exynos-bus.c
+F:	Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+
 BUSLOGIC SCSI DRIVER
 M:	Khalid Aziz <khalid@gonehiking.org>
 L:	linux-scsi@vger.kernel.org
@@ -3132,6 +3126,13 @@ M:	Kevin Tsai <ktsai@capellamicro.com>
 S:	Maintained
 F:	drivers/iio/light/cm*
 
+CARL9170 LINUX COMMUNITY WIRELESS DRIVER
+M:	Christian Lamparter <chunkeey@googlemail.com>
+L:	linux-wireless@vger.kernel.org
+W:	http://wireless.kernel.org/en/users/Drivers/carl9170
+S:	Maintained
+F:	drivers/net/wireless/ath/carl9170/
+
 CAVIUM THUNDERX2 ARM64 SOC
 M:	Jayachandran C <jnair@caviumnetworks.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -3260,12 +3261,6 @@ F:	drivers/usb/host/whci/
 F:	drivers/usb/wusbcore/
 F:	include/linux/usb/wusb*
 
-HT16K33 LED CONTROLLER DRIVER
-M:	Robin van der Gracht <robin@protonic.nl>
-S:	Maintained
-F:	drivers/auxdisplay/ht16k33.c
-F:	Documentation/devicetree/bindings/display/ht16k33.txt
-
 CFAG12864B LCD DRIVER
 M:	Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
 W:	http://miguelojeda.es/auxdisplay.htm
@@ -3337,19 +3332,6 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bleung/chrome-platform.git
 F:	drivers/platform/chrome/
 
-CISCO VIC ETHERNET NIC DRIVER
-M:	Christian Benvenuti <benve@cisco.com>
-M:	Govindarajulu Varadarajan <_govind@gmx.com>
-M:	Neel Patel <neepatel@cisco.com>
-S:	Supported
-F:	drivers/net/ethernet/cisco/enic/
-
-CISCO VIC LOW LATENCY NIC DRIVER
-M:	Christian Benvenuti <benve@cisco.com>
-M:	Dave Goodell <dgoodell@cisco.com>
-S:	Supported
-F:	drivers/infiniband/hw/usnic/
-
 CIRRUS LOGIC EP93XX ETHERNET DRIVER
 M:	Hartley Sweeten <hsweeten@visionengravers.com>
 L:	netdev@vger.kernel.org
@@ -3363,6 +3345,34 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Maintained
 F:	sound/soc/codecs/cs*
 
+CISCO FCOE HBA DRIVER
+M:	Satish Kharat <satishkh@cisco.com>
+M:	Sesidhar Baddela <sebaddel@cisco.com>
+M:	Karan Tilak Kumar <kartilak@cisco.com>
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/fnic/
+
+CISCO SCSI HBA DRIVER
+M:	Karan Tilak Kumar <kartilak@cisco.com>
+M:	Sesidhar Baddela <sebaddel@cisco.com>
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/snic/
+
+CISCO VIC ETHERNET NIC DRIVER
+M:	Christian Benvenuti <benve@cisco.com>
+M:	Govindarajulu Varadarajan <_govind@gmx.com>
+M:	Neel Patel <neepatel@cisco.com>
+S:	Supported
+F:	drivers/net/ethernet/cisco/enic/
+
+CISCO VIC LOW LATENCY NIC DRIVER
+M:	Christian Benvenuti <benve@cisco.com>
+M:	Dave Goodell <dgoodell@cisco.com>
+S:	Supported
+F:	drivers/infiniband/hw/usnic/
+
 CLEANCACHE API
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 L:	linux-kernel@vger.kernel.org
@@ -3384,21 +3394,6 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
 S:	Supported
 F:	drivers/clocksource
 
-CISCO FCOE HBA DRIVER
-M:	Satish Kharat <satishkh@cisco.com>
-M:	Sesidhar Baddela <sebaddel@cisco.com>
-M:	Karan Tilak Kumar <kartilak@cisco.com>
-L:	linux-scsi@vger.kernel.org
-S:	Supported
-F:	drivers/scsi/fnic/
-
-CISCO SCSI HBA DRIVER
-M:	Karan Tilak Kumar <kartilak@cisco.com>
-M:	Sesidhar Baddela <sebaddel@cisco.com>
-L:	linux-scsi@vger.kernel.org
-S:	Supported
-F:	drivers/scsi/snic/
-
 CMPC ACPI DRIVER
 M:	Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
 M:	Daniel Oliveira Nascimento <don@syst.com.br>
@@ -3587,6 +3582,18 @@ F:	drivers/cpufreq/arm_big_little.h
 F:	drivers/cpufreq/arm_big_little.c
 F:	drivers/cpufreq/arm_big_little_dt.c
 
+CPU POWER MONITORING SUBSYSTEM
+M:	Thomas Renninger <trenn@suse.com>
+L:	linux-pm@vger.kernel.org
+S:	Maintained
+F:	tools/power/cpupower/
+
+CPUID/MSR DRIVER
+M:	"H. Peter Anvin" <hpa@zytor.com>
+S:	Maintained
+F:	arch/x86/kernel/cpuid.c
+F:	arch/x86/kernel/msr.c
+
 CPUIDLE DRIVER - ARM BIG LITTLE
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 M:	Daniel Lezcano <daniel.lezcano@linaro.org>
@@ -3616,18 +3623,6 @@ B:	https://bugzilla.kernel.org
 F:	drivers/cpuidle/*
 F:	include/linux/cpuidle.h
 
-CPUID/MSR DRIVER
-M:	"H. Peter Anvin" <hpa@zytor.com>
-S:	Maintained
-F:	arch/x86/kernel/cpuid.c
-F:	arch/x86/kernel/msr.c
-
-CPU POWER MONITORING SUBSYSTEM
-M:	Thomas Renninger <trenn@suse.com>
-L:	linux-pm@vger.kernel.org
-S:	Maintained
-F:	tools/power/cpupower/
-
 CRAMFS FILESYSTEM
 W:	http://sourceforge.net/projects/cramfs/
 S:	Orphan / Obsolete
@@ -3815,14 +3810,6 @@ F:	drivers/scsi/cxlflash/
 F:	include/uapi/scsi/cxlflash_ioctls.h
 F:	Documentation/powerpc/cxlflash.txt
 
-STMMAC ETHERNET DRIVER
-M:	Giuseppe Cavallaro <peppe.cavallaro@st.com>
-M:	Alexandre Torgue <alexandre.torgue@st.com>
-L:	netdev@vger.kernel.org
-W:	http://www.stlinux.com
-S:	Supported
-F:	drivers/net/ethernet/stmicro/stmmac/
-
 CYBERPRO FB DRIVER
 M:	Russell King <linux@armlinux.org.uk>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -3980,6 +3967,14 @@ L:	linux-mtd@lists.infradead.org
 S:	Supported
 F:	drivers/mtd/nand/denali*
 
+DELTA ST MEDIA DRIVER
+M:	Hugues Fruchet <hugues.fruchet@st.com>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+W:	https://linuxtv.org
+S:	Supported
+F:	drivers/media/platform/sti/delta
+
 DESIGNWARE USB2 DRD IP DRIVER
 M:	John Youn <johnyoun@synopsys.com>
 L:	linux-usb@vger.kernel.org
@@ -4028,15 +4023,6 @@ F:	drivers/devfreq/devfreq-event.c
 F:	include/linux/devfreq-event.h
 F:	Documentation/devicetree/bindings/devfreq/event/
 
-BUS FREQUENCY DRIVER FOR SAMSUNG EXYNOS
-M:	Chanwoo Choi <cw00.choi@samsung.com>
-L:	linux-pm@vger.kernel.org
-L:	linux-samsung-soc@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
-S:	Maintained
-F:	drivers/devfreq/exynos-bus.c
-F:	Documentation/devicetree/bindings/devfreq/exynos-bus.txt
-
 DEVICE NUMBER REGISTRY
 M:	Torben Mathiasen <device@lanana.org>
 W:	http://lanana.org/docs/device-list/index.html
@@ -4186,20 +4172,6 @@ F:	include/linux/*fence.h
 F:	Documentation/driver-api/dma-buf.rst
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 
-SYNC FILE FRAMEWORK
-M:	Sumit Semwal <sumit.semwal@linaro.org>
-R:	Gustavo Padovan <gustavo@padovan.org>
-S:	Maintained
-L:	linux-media@vger.kernel.org
-L:	dri-devel@lists.freedesktop.org
-F:	drivers/dma-buf/sync_*
-F:	drivers/dma-buf/dma-fence*
-F:	drivers/dma-buf/sw_sync.c
-F:	include/linux/sync_file.h
-F:	include/uapi/linux/sync_file.h
-F:	Documentation/sync_file.txt
-T:	git git://anongit.freedesktop.org/drm/drm-misc
-
 DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
 M:	Vinod Koul <vinod.koul@intel.com>
 L:	dmaengine@vger.kernel.org
@@ -4292,6 +4264,14 @@ F:	include/linux/debugfs.h
 F:	include/linux/kobj*
 F:	lib/kobj*
 
+DRIVERS FOR ADAPTIVE VOLTAGE SCALING (AVS)
+M:	Kevin Hilman <khilman@kernel.org>
+M:	Nishanth Menon <nm@ti.com>
+S:	Maintained
+F:	drivers/power/avs/
+F:	include/linux/power/smartreflex.h
+L:	linux-pm@vger.kernel.org
+
 DRM DRIVERS
 M:	David Airlie <airlied@linux.ie>
 L:	dri-devel@lists.freedesktop.org
@@ -5086,25 +5066,53 @@ M:	David Woodhouse <dwmw2@infradead.org>
 L:	linux-embedded@vger.kernel.org
 S:	Maintained
 
-EMULEX/BROADCOM LPFC FC/FCOE SCSI DRIVER
-M:	James Smart <james.smart@broadcom.com>
-M:	Dick Kennedy <dick.kennedy@broadcom.com>
+Emulex 10Gbps iSCSI - OneConnect DRIVER
+M:	Subbu Seetharaman <subbu.seetharaman@broadcom.com>
+M:	Ketan Mukadam <ketan.mukadam@broadcom.com>
+M:	Jitendra Bhivare <jitendra.bhivare@broadcom.com>
 L:	linux-scsi@vger.kernel.org
 W:	http://www.broadcom.com
 S:	Supported
-F:	drivers/scsi/lpfc/
-
-ENE CB710 FLASH CARD READER DRIVER
-M:	Michał Mirosław <mirq-linux@rere.qmqm.pl>
-S:	Maintained
-F:	drivers/misc/cb710/
-F:	drivers/mmc/host/cb710-mmc.*
-F:	include/linux/cb710.h
+F:	drivers/scsi/be2iscsi/
 
-ENE KB2426 (ENE0100/ENE020XX) INFRARED RECEIVER
-M:	Maxim Levitsky <maximlevitsky@gmail.com>
-S:	Maintained
-F:	drivers/media/rc/ene_ir.*
+Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER (be2net)
+M:	Sathya Perla <sathya.perla@broadcom.com>
+M:	Ajit Khaparde <ajit.khaparde@broadcom.com>
+M:	Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
+M:	Somnath Kotur <somnath.kotur@broadcom.com>
+L:	netdev@vger.kernel.org
+W:	http://www.emulex.com
+S:	Supported
+F:	drivers/net/ethernet/emulex/benet/
+
+EMULEX ONECONNECT ROCE DRIVER
+M:	Selvin Xavier <selvin.xavier@broadcom.com>
+M:	Devesh Sharma <devesh.sharma@broadcom.com>
+L:	linux-rdma@vger.kernel.org
+W:	http://www.broadcom.com
+S:	Odd Fixes
+F:	drivers/infiniband/hw/ocrdma/
+F:	include/uapi/rdma/ocrdma-abi.h
+
+EMULEX/BROADCOM LPFC FC/FCOE SCSI DRIVER
+M:	James Smart <james.smart@broadcom.com>
+M:	Dick Kennedy <dick.kennedy@broadcom.com>
+L:	linux-scsi@vger.kernel.org
+W:	http://www.broadcom.com
+S:	Supported
+F:	drivers/scsi/lpfc/
+
+ENE CB710 FLASH CARD READER DRIVER
+M:	Michał Mirosław <mirq-linux@rere.qmqm.pl>
+S:	Maintained
+F:	drivers/misc/cb710/
+F:	drivers/mmc/host/cb710-mmc.*
+F:	include/linux/cb710.h
+
+ENE KB2426 (ENE0100/ENE020XX) INFRARED RECEIVER
+M:	Maxim Levitsky <maximlevitsky@gmail.com>
+S:	Maintained
+F:	drivers/media/rc/ene_ir.*
 
 EPSON S1D13XXX FRAMEBUFFER DRIVER
 M:	Kristoffer Ericson <kristoffer.ericson@gmail.com>
@@ -5201,6 +5209,19 @@ S:	Supported
 F:	arch/arc/plat-eznps
 F:	arch/arc/boot/dts/eznps.dts
 
+F2FS FILE SYSTEM
+M:	Jaegeuk Kim <jaegeuk@kernel.org>
+M:	Chao Yu <yuchao0@huawei.com>
+L:	linux-f2fs-devel@lists.sourceforge.net
+W:	https://f2fs.wiki.kernel.org/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
+S:	Maintained
+F:	Documentation/filesystems/f2fs.txt
+F:	Documentation/ABI/testing/sysfs-fs-f2fs
+F:	fs/f2fs/
+F:	include/linux/f2fs_fs.h
+F:	include/trace/events/f2fs.h
+
 F71805F HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-hwmon@vger.kernel.org
@@ -5208,23 +5229,6 @@ S:	Maintained
 F:	Documentation/hwmon/f71805f
 F:	drivers/hwmon/f71805f.c
 
-FC0011 TUNER DRIVER
-M:	Michael Buesch <m@bues.ch>
-L:	linux-media@vger.kernel.org
-S:	Maintained
-F:	drivers/media/tuners/fc0011.h
-F:	drivers/media/tuners/fc0011.c
-
-FC2580 MEDIA DRIVER
-M:	Antti Palosaari <crope@iki.fi>
-L:	linux-media@vger.kernel.org
-W:	https://linuxtv.org
-W:	http://palosaari.fi/linux/
-Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/anttip/media_tree.git
-S:	Maintained
-F:	drivers/media/tuners/fc2580*
-
 FANOTIFY
 M:	Eric Paris <eparis@redhat.com>
 S:	Maintained
@@ -5249,6 +5253,23 @@ M:	Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
 S:	Maintained
 F:	drivers/staging/fbtft/
 
+FC0011 TUNER DRIVER
+M:	Michael Buesch <m@bues.ch>
+L:	linux-media@vger.kernel.org
+S:	Maintained
+F:	drivers/media/tuners/fc0011.h
+F:	drivers/media/tuners/fc0011.c
+
+FC2580 MEDIA DRIVER
+M:	Antti Palosaari <crope@iki.fi>
+L:	linux-media@vger.kernel.org
+W:	https://linuxtv.org
+W:	http://palosaari.fi/linux/
+Q:	http://patchwork.linuxtv.org/project/linux-media/list/
+T:	git git://linuxtv.org/anttip/media_tree.git
+S:	Maintained
+F:	drivers/media/tuners/fc2580*
+
 FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
 M:	Johannes Thumshirn <jth@kernel.org>
 L:	fcoe-devel@open-fcoe.org
@@ -5570,19 +5591,6 @@ S:	Supported
 F:	fs/crypto/
 F:	include/linux/fscrypt*.h
 
-F2FS FILE SYSTEM
-M:	Jaegeuk Kim <jaegeuk@kernel.org>
-M:	Chao Yu <yuchao0@huawei.com>
-L:	linux-f2fs-devel@lists.sourceforge.net
-W:	https://f2fs.wiki.kernel.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
-S:	Maintained
-F:	Documentation/filesystems/f2fs.txt
-F:	Documentation/ABI/testing/sysfs-fs-f2fs
-F:	fs/f2fs/
-F:	include/linux/f2fs_fs.h
-F:	include/trace/events/f2fs.h
-
 FUJITSU FR-V (FRV) PORT
 S:	Orphan
 F:	arch/frv/
@@ -5656,6 +5664,12 @@ S:	Maintained
 F:	kernel/gcov/
 F:	Documentation/dev-tools/gcov.rst
 
+GDB KERNEL DEBUGGING HELPER SCRIPTS
+M:	Jan Kiszka <jan.kiszka@siemens.com>
+M:	Kieran Bingham <kieran@bingham.xyz>
+S:	Supported
+F:	scripts/gdb/
+
 GDT SCSI DISK ARRAY CONTROLLER DRIVER
 M:	Achim Leubner <achim_leubner@adaptec.com>
 L:	linux-scsi@vger.kernel.org
@@ -5663,12 +5677,6 @@ W:	http://www.icp-vortex.com/
 S:	Supported
 F:	drivers/scsi/gdt*
 
-GDB KERNEL DEBUGGING HELPER SCRIPTS
-M:	Jan Kiszka <jan.kiszka@siemens.com>
-M:	Kieran Bingham <kieran@bingham.xyz>
-S:	Supported
-F:	scripts/gdb/
-
 GEMTEK FM RADIO RECEIVER DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
@@ -5735,17 +5743,17 @@ L:	kvm@vger.kernel.org
 S:	Supported
 F:	drivers/uio/uio_pci_generic.c
 
-GET_MAINTAINER SCRIPT
-M:	Joe Perches <joe@perches.com>
-S:	Maintained
-F:	scripts/get_maintainer.pl
-
 GENWQE (IBM Generic Workqueue Card)
 M:	Frank Haverkamp <haver@linux.vnet.ibm.com>
 M:	Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
 S:	Supported
 F:	drivers/misc/genwqe/
 
+GET_MAINTAINER SCRIPT
+M:	Joe Perches <joe@perches.com>
+S:	Maintained
+F:	scripts/get_maintainer.pl
+
 GFS2 FILE SYSTEM
 M:	Steven Whitehouse <swhiteho@redhat.com>
 M:	Bob Peterson <rpeterso@redhat.com>
@@ -5966,13 +5974,6 @@ L:	linux-efi@vger.kernel.org
 S:	Maintained
 F:	block/partitions/efi.*
 
-STK1160 USB VIDEO CAPTURE DRIVER
-M:	Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
-L:	linux-media@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-S:	Maintained
-F:	drivers/media/usb/stk1160/
-
 H8/300 ARCHITECTURE
 M:	Yoshinori Sato <ysato@users.sourceforge.jp>
 L:	uclinux-h8-devel@lists.sourceforge.jp (moderated for non-subscribers)
@@ -5984,33 +5985,6 @@ F:	drivers/clocksource/h8300_*.c
 F:	drivers/clk/h8300/
 F:	drivers/irqchip/irq-renesas-h8*.c
 
-HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER
-M:	Frank Seidel <frank@f-seidel.de>
-L:	platform-driver-x86@vger.kernel.org
-W:	http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/
-S:	Maintained
-F:	drivers/platform/x86/hdaps.c
-
-HDPVR USB VIDEO ENCODER DRIVER
-M:	Hans Verkuil <hverkuil@xs4all.nl>
-L:	linux-media@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-W:	https://linuxtv.org
-S:	Odd Fixes
-F:	drivers/media/usb/hdpvr/
-
-HWPOISON MEMORY FAILURE HANDLING
-M:	Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
-L:	linux-mm@kvack.org
-S:	Maintained
-F:	mm/memory-failure.c
-F:	mm/hwpoison-inject.c
-
-HYPERVISOR VIRTUAL CONSOLE DRIVER
-L:	linuxppc-dev@lists.ozlabs.org
-S:	Odd Fixes
-F:	drivers/tty/hvc/
-
 HACKRF MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
@@ -6021,6 +5995,13 @@ T:	git git://linuxtv.org/anttip/media_tree.git
 S:	Maintained
 F:	drivers/media/usb/hackrf/
 
+HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER
+M:	Frank Seidel <frank@f-seidel.de>
+L:	platform-driver-x86@vger.kernel.org
+W:	http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/
+S:	Maintained
+F:	drivers/platform/x86/hdaps.c
+
 HARDWARE MONITORING
 M:	Jean Delvare <jdelvare@suse.com>
 M:	Guenter Roeck <linux@roeck-us.net>
@@ -6059,6 +6040,14 @@ L:	linux-parisc@vger.kernel.org
 S:	Maintained
 F:	sound/parisc/harmony.*
 
+HDPVR USB VIDEO ENCODER DRIVER
+M:	Hans Verkuil <hverkuil@xs4all.nl>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+W:	https://linuxtv.org
+S:	Odd Fixes
+F:	drivers/media/usb/hdpvr/
+
 HEWLETT PACKARD ENTERPRISE ILO NMI WATCHDOG DRIVER
 M:	Jimmy Vance <jimmy.vance@hpe.com>
 S:	Supported
@@ -6085,13 +6074,6 @@ F:	drivers/block/cciss*
 F:	include/linux/cciss_ioctl.h
 F:	include/uapi/linux/cciss_ioctl.h
 
-OPA-VNIC DRIVER
-M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
-M:	Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
-L:	linux-rdma@vger.kernel.org
-S:	Supported
-F:	drivers/infiniband/ulp/opa_vnic
-
 HFI1 DRIVER
 M:	Mike Marciniszyn <mike.marciniszyn@intel.com>
 M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
@@ -6269,6 +6251,12 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	net/hsr/
 
+HT16K33 LED CONTROLLER DRIVER
+M:	Robin van der Gracht <robin@protonic.nl>
+S:	Maintained
+F:	drivers/auxdisplay/ht16k33.c
+F:	Documentation/devicetree/bindings/display/ht16k33.txt
+
 HTCPEN TOUCHSCREEN DRIVER
 M:	Pau Oliva Fora <pof@eslack.org>
 L:	linux-input@vger.kernel.org
@@ -6288,6 +6276,13 @@ W:	https://linuxtv.org
 S:	Supported
 F:	drivers/media/platform/sti/hva
 
+HWPOISON MEMORY FAILURE HANDLING
+M:	Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+L:	linux-mm@kvack.org
+S:	Maintained
+F:	mm/memory-failure.c
+F:	mm/hwpoison-inject.c
+
 Hyper-V CORE AND DRIVERS
 M:	"K. Y. Srinivasan" <kys@microsoft.com>
 M:	Haiyang Zhang <haiyangz@microsoft.com>
@@ -6310,6 +6305,11 @@ F:	include/linux/hyperv.h
 F:	tools/hv/
 F:	Documentation/ABI/stable/sysfs-bus-vmbus
 
+HYPERVISOR VIRTUAL CONSOLE DRIVER
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Odd Fixes
+F:	drivers/tty/hvc/
+
 I2C MUXES
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-i2c@vger.kernel.org
@@ -6518,11 +6518,6 @@ S:	Maintained
 F:	drivers/mfd/lpc_ich.c
 F:	drivers/gpio/gpio-ich.c
 
-IDT VersaClock 5 CLOCK DRIVER
-M:	Marek Vasut <marek.vasut@gmail.com>
-S:	Maintained
-F:	drivers/clk/clk-versaclock5.c
-
 IDE SUBSYSTEM
 M:	"David S. Miller" <davem@davemloft.net>
 L:	linux-ide@vger.kernel.org
@@ -6533,6 +6528,13 @@ F:	Documentation/ide/
 F:	drivers/ide/
 F:	include/linux/ide.h
 
+IDE/ATAPI DRIVERS
+M:	Borislav Petkov <bp@alien8.de>
+L:	linux-ide@vger.kernel.org
+S:	Maintained
+F:	Documentation/cdrom/ide-cd
+F:	drivers/ide/ide-cd*
+
 IDEAPAD LAPTOP EXTRAS DRIVER
 M:	Ike Panhc <ike.pan@canonical.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -6547,12 +6549,10 @@ W:	https://github.com/o2genum/ideapad-slidebar
 S:	Maintained
 F:	drivers/input/misc/ideapad_slidebar.c
 
-IDE/ATAPI DRIVERS
-M:	Borislav Petkov <bp@alien8.de>
-L:	linux-ide@vger.kernel.org
+IDT VersaClock 5 CLOCK DRIVER
+M:	Marek Vasut <marek.vasut@gmail.com>
 S:	Maintained
-F:	Documentation/cdrom/ide-cd
-F:	drivers/ide/ide-cd*
+F:	drivers/clk/clk-versaclock5.c
 
 IEEE 802.15.4 SUBSYSTEM
 M:	Alexander Aring <alex.aring@gmail.com>
@@ -6642,6 +6642,16 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
 F:	drivers/auxdisplay/img-ascii-lcd.c
 
+IMGTEC IR DECODER DRIVER
+M:	James Hogan <james.hogan@imgtec.com>
+S:	Maintained
+F:	drivers/media/rc/img-ir/
+
+IMS TWINTURBO FRAMEBUFFER DRIVER
+L:	linux-fbdev@vger.kernel.org
+S:	Orphan
+F:	drivers/video/fbdev/imsttfb.c
+
 INA209 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
 L:	linux-hwmon@vger.kernel.org
@@ -6667,37 +6677,6 @@ W:	http://industrypack.sourceforge.net
 S:	Maintained
 F:	drivers/ipack/
 
-INGENIC JZ4780 DMA Driver
-M:	Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
-S:	Maintained
-F:	drivers/dma/dma-jz4780.c
-
-INGENIC JZ4780 NAND DRIVER
-M:	Harvey Hunt <harveyhuntnexus@gmail.com>
-L:	linux-mtd@lists.infradead.org
-S:	Maintained
-F:	drivers/mtd/nand/jz4780_*
-
-INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
-M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
-M:	Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
-L:	linux-ima-devel@lists.sourceforge.net
-L:	linux-ima-user@lists.sourceforge.net
-L:	linux-security-module@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
-S:	Supported
-F:	security/integrity/ima/
-
-IMGTEC IR DECODER DRIVER
-M:	James Hogan <james.hogan@imgtec.com>
-S:	Maintained
-F:	drivers/media/rc/img-ir/
-
-IMS TWINTURBO FRAMEBUFFER DRIVER
-L:	linux-fbdev@vger.kernel.org
-S:	Orphan
-F:	drivers/video/fbdev/imsttfb.c
-
 INFINIBAND SUBSYSTEM
 M:	Doug Ledford <dledford@redhat.com>
 M:	Sean Hefty <sean.hefty@intel.com>
@@ -6714,7 +6693,18 @@ F:	include/uapi/linux/if_infiniband.h
 F:	include/uapi/rdma/
 F:	include/rdma/
 
-INOTIFY
+INGENIC JZ4780 DMA Driver
+M:	Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
+S:	Maintained
+F:	drivers/dma/dma-jz4780.c
+
+INGENIC JZ4780 NAND DRIVER
+M:	Harvey Hunt <harveyhuntnexus@gmail.com>
+L:	linux-mtd@lists.infradead.org
+S:	Maintained
+F:	drivers/mtd/nand/jz4780_*
+
+INOTIFY
 M:	John McCutchan <john@johnmccutchan.com>
 M:	Robert Love <rlove@rlove.org>
 M:	Eric Paris <eparis@parisplace.org>
@@ -6752,6 +6742,16 @@ F:	drivers/crypto/inside-secure/
 S:	Maintained
 L:	linux-crypto@vger.kernel.org
 
+INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
+M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
+M:	Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
+L:	linux-ima-devel@lists.sourceforge.net
+L:	linux-ima-user@lists.sourceforge.net
+L:	linux-security-module@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
+S:	Supported
+F:	security/integrity/ima/
+
 INTEL ASoC BDW/HSW DRIVERS
 M:	Jie Yang <yang.jie@linux.intel.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -7048,13 +7048,6 @@ F:	drivers/char/ipmi/
 F:	include/linux/ipmi*
 F:	include/uapi/linux/ipmi*
 
-QCOM AUDIO (ASoC) DRIVERS
-M:	Patrick Lai <plai@codeaurora.org>
-M:	Banajit Goswami <bgoswami@codeaurora.org>
-L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
-S:	Supported
-F:	sound/soc/qcom/
-
 IPS SCSI RAID DRIVER
 M:	Adaptec OEM Raid Solutions <aacraid@adaptec.com>
 L:	linux-scsi@vger.kernel.org
@@ -7108,6 +7101,15 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
 F:	kernel/irq/
 
+IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
+M:	Marc Zyngier <marc.zyngier@arm.com>
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
+F:	Documentation/IRQ-domain.txt
+F:	include/linux/irqdomain.h
+F:	kernel/irq/irqdomain.c
+F:	kernel/irq/msi.c
+
 IRQCHIP DRIVERS
 M:	Thomas Gleixner <tglx@linutronix.de>
 M:	Jason Cooper <jason@lakedaemon.net>
@@ -7119,15 +7121,6 @@ T:	git git://git.infradead.org/users/jcooper/linux.git irqchip/core
 F:	Documentation/devicetree/bindings/interrupt-controller/
 F:	drivers/irqchip/
 
-IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
-M:	Marc Zyngier <marc.zyngier@arm.com>
-S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
-F:	Documentation/IRQ-domain.txt
-F:	include/linux/irqdomain.h
-F:	kernel/irq/irqdomain.c
-F:	kernel/irq/msi.c
-
 ISA
 M:	William Breathitt Gray <vilhelm.gray@gmail.com>
 S:	Maintained
@@ -7135,13 +7128,6 @@ F:	Documentation/isa.txt
 F:	drivers/base/isa.c
 F:	include/linux/isa.h
 
-ISAPNP
-M:	Jaroslav Kysela <perex@perex.cz>
-S:	Maintained
-F:	Documentation/isapnp.txt
-F:	drivers/pnp/isapnp/
-F:	include/linux/isapnp.h
-
 ISA RADIO MODULE
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
@@ -7150,11 +7136,12 @@ W:	https://linuxtv.org
 S:	Maintained
 F:	drivers/media/radio/radio-isa*
 
-iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER
-M:	Peter Jones <pjones@redhat.com>
-M:	Konrad Rzeszutek Wilk <konrad@kernel.org>
+ISAPNP
+M:	Jaroslav Kysela <perex@perex.cz>
 S:	Maintained
-F:	drivers/firmware/iscsi_ibft*
+F:	Documentation/isapnp.txt
+F:	drivers/pnp/isapnp/
+F:	include/linux/isapnp.h
 
 ISCSI
 M:	Lee Duncan <lduncan@suse.com>
@@ -7165,6 +7152,12 @@ S:	Maintained
 F:	drivers/scsi/*iscsi*
 F:	include/scsi/*iscsi*
 
+iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER
+M:	Peter Jones <pjones@redhat.com>
+M:	Konrad Rzeszutek Wilk <konrad@kernel.org>
+S:	Maintained
+F:	drivers/firmware/iscsi_ibft*
+
 ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
 M:	Or Gerlitz <ogerlitz@mellanox.com>
 M:	Sagi Grimberg <sagi@grimberg.me>
@@ -7860,6 +7853,11 @@ M:	Chris Wright <chrisw@sous-sol.org>
 L:	linux-security-module@vger.kernel.org
 S:	Supported
 
+LINUX KERNEL DUMP TEST MODULE (LKDTM)
+M:	Kees Cook <keescook@chromium.org>
+S:	Maintained
+F:	drivers/misc/lkdtm*
+
 LIS3LV02D ACCELEROMETER DRIVER
 M:	Eric Piel <eric.piel@tremplin-utc.net>
 S:	Maintained
@@ -7884,11 +7882,6 @@ F:	samples/livepatch/
 L:	live-patching@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/livepatching.git
 
-LINUX KERNEL DUMP TEST MODULE (LKDTM)
-M:	Kees Cook <keescook@chromium.org>
-S:	Maintained
-F:	drivers/misc/lkdtm*
-
 LLC (802.2)
 L:	netdev@vger.kernel.org
 S:	Odd fixes
@@ -7941,6 +7934,13 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 S:	Maintained
 F:	drivers/media/usb/dvb-usb-v2/lmedm04*
 
+LOADPIN SECURITY MODULE
+M:	Kees Cook <keescook@chromium.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git lsm/loadpin
+S:	Supported
+F:	security/loadpin/
+F:	Documentation/admin-guide/LSM/LoadPin.rst
+
 LOCKING PRIMITIVES
 M:	Peter Zijlstra <peterz@infradead.org>
 M:	Ingo Molnar <mingo@redhat.com>
@@ -8492,6 +8492,13 @@ F:	Documentation/scsi/megaraid.txt
 F:	drivers/scsi/megaraid.*
 F:	drivers/scsi/megaraid/
 
+MELEXIS MLX90614 DRIVER
+M:	Crt Mori <cmo@melexis.com>
+L:	linux-iio@vger.kernel.org
+W:	http://www.melexis.com
+S:	Supported
+F:	drivers/iio/temperature/mlx90614.c
+
 MELFAS MIP4 TOUCHSCREEN DRIVER
 M:	Sangwon Jee <jeesw@melfas.com>
 W:	http://www.melfas.com
@@ -8581,14 +8588,48 @@ S:	Supported
 F:	drivers/platform/x86/mlxcpld-hotplug.c
 F:	include/linux/platform_data/mlxcpld-hotplug.h
 
-SOFT-ROCE DRIVER (rxe)
-M:	Moni Shoua <monis@mellanox.com>
+MELLANOX MLX4 core VPI driver
+M:	Tariq Toukan <tariqt@mellanox.com>
+L:	netdev@vger.kernel.org
 L:	linux-rdma@vger.kernel.org
+W:	http://www.mellanox.com
+Q:	http://patchwork.ozlabs.org/project/netdev/list/
 S:	Supported
-W:	https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+F:	drivers/net/ethernet/mellanox/mlx4/
+F:	include/linux/mlx4/
+
+MELLANOX MLX4 IB driver
+M:	Yishai Hadas <yishaih@mellanox.com>
+L:	linux-rdma@vger.kernel.org
+W:	http://www.mellanox.com
 Q:	http://patchwork.kernel.org/project/linux-rdma/list/
-F:	drivers/infiniband/sw/rxe/
-F:	include/uapi/rdma/rdma_user_rxe.h
+S:	Supported
+F:	drivers/infiniband/hw/mlx4/
+F:	include/linux/mlx4/
+F:	include/uapi/rdma/mlx4-abi.h
+
+MELLANOX MLX5 core VPI driver
+M:	Saeed Mahameed <saeedm@mellanox.com>
+M:	Matan Barak <matanb@mellanox.com>
+M:	Leon Romanovsky <leonro@mellanox.com>
+L:	netdev@vger.kernel.org
+L:	linux-rdma@vger.kernel.org
+W:	http://www.mellanox.com
+Q:	http://patchwork.ozlabs.org/project/netdev/list/
+S:	Supported
+F:	drivers/net/ethernet/mellanox/mlx5/core/
+F:	include/linux/mlx5/
+
+MELLANOX MLX5 IB driver
+M:	Matan Barak <matanb@mellanox.com>
+M:	Leon Romanovsky <leonro@mellanox.com>
+L:	linux-rdma@vger.kernel.org
+W:	http://www.mellanox.com
+Q:	http://patchwork.kernel.org/project/linux-rdma/list/
+S:	Supported
+F:	drivers/infiniband/hw/mlx5/
+F:	include/linux/mlx5/
+F:	include/uapi/rdma/mlx5-abi.h
 
 MEMBARRIER SUPPORT
 M:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
@@ -8710,6 +8751,18 @@ S:	Maintained
 F:	drivers/usb/misc/usb251xb.c
 F:	Documentation/devicetree/bindings/usb/usb251xb.txt
 
+MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi)
+M:	Don Brace <don.brace@microsemi.com>
+L:	esc.storagedev@microsemi.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/smartpqi/smartpqi*.[ch]
+F:	drivers/scsi/smartpqi/Kconfig
+F:	drivers/scsi/smartpqi/Makefile
+F:	include/linux/cciss*.h
+F:	include/uapi/linux/cciss*.h
+F:	Documentation/scsi/smartpqi.txt
+
 MICROSOFT SURFACE PRO 3 BUTTON DRIVER
 M:	Chen Yu <yu.c.chen@intel.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -8765,67 +8818,15 @@ W:	https://linuxtv.org
 S:	Odd Fixes
 F:	drivers/media/radio/radio-miropcm20*
 
-MELLANOX MLX4 core VPI driver
-M:	Tariq Toukan <tariqt@mellanox.com>
-L:	netdev@vger.kernel.org
-L:	linux-rdma@vger.kernel.org
-W:	http://www.mellanox.com
-Q:	http://patchwork.ozlabs.org/project/netdev/list/
-S:	Supported
-F:	drivers/net/ethernet/mellanox/mlx4/
-F:	include/linux/mlx4/
-
-MELLANOX MLX4 IB driver
-M:	Yishai Hadas <yishaih@mellanox.com>
-L:	linux-rdma@vger.kernel.org
-W:	http://www.mellanox.com
-Q:	http://patchwork.kernel.org/project/linux-rdma/list/
-S:	Supported
-F:	drivers/infiniband/hw/mlx4/
-F:	include/linux/mlx4/
-F:	include/uapi/rdma/mlx4-abi.h
-
-MELLANOX MLX5 core VPI driver
-M:	Saeed Mahameed <saeedm@mellanox.com>
-M:	Matan Barak <matanb@mellanox.com>
-M:	Leon Romanovsky <leonro@mellanox.com>
-L:	netdev@vger.kernel.org
-L:	linux-rdma@vger.kernel.org
-W:	http://www.mellanox.com
-Q:	http://patchwork.ozlabs.org/project/netdev/list/
-S:	Supported
-F:	drivers/net/ethernet/mellanox/mlx5/core/
-F:	include/linux/mlx5/
-
-MELLANOX MLX5 IB driver
-M:	Matan Barak <matanb@mellanox.com>
-M:	Leon Romanovsky <leonro@mellanox.com>
-L:	linux-rdma@vger.kernel.org
-W:	http://www.mellanox.com
-Q:	http://patchwork.kernel.org/project/linux-rdma/list/
-S:	Supported
-F:	drivers/infiniband/hw/mlx5/
-F:	include/linux/mlx5/
-F:	include/uapi/rdma/mlx5-abi.h
-
-MELEXIS MLX90614 DRIVER
-M:	Crt Mori <cmo@melexis.com>
-L:	linux-iio@vger.kernel.org
-W:	http://www.melexis.com
-S:	Supported
-F:	drivers/iio/temperature/mlx90614.c
-
-MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi)
-M:	Don Brace <don.brace@microsemi.com>
-L:	esc.storagedev@microsemi.com
-L:	linux-scsi@vger.kernel.org
-S:	Supported
-F:	drivers/scsi/smartpqi/smartpqi*.[ch]
-F:	drivers/scsi/smartpqi/Kconfig
-F:	drivers/scsi/smartpqi/Makefile
-F:	include/linux/cciss*.h
-F:	include/uapi/linux/cciss*.h
-F:	Documentation/scsi/smartpqi.txt
+MMP SUPPORT
+M:	Eric Miao <eric.y.miao@gmail.com>
+M:	Haojian Zhuang <haojian.zhuang@gmail.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+T:	git git://github.com/hzhuang1/linux.git
+T:	git git://git.linaro.org/people/ycmiao/pxa-linux.git
+S:	Maintained
+F:	arch/arm/boot/dts/mmp*
+F:	arch/arm/mach-mmp/
 
 MN88472 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
@@ -9036,10 +9037,6 @@ S:	Maintained
 F:	drivers/mtd/nand/
 F:	include/linux/mtd/nand*.h
 
-NATSEMI ETHERNET DRIVER (DP8381x)
-S:	Orphan
-F:	drivers/net/ethernet/natsemi/natsemi.c
-
 NATIVE INSTRUMENTS USB SOUND INTERFACE DRIVER
 M:	Daniel Mack <zonque@gmail.com>
 S:	Maintained
@@ -9047,6 +9044,10 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 W:	http://www.native-instruments.com
 F:	sound/usb/caiaq/
 
+NATSEMI ETHERNET DRIVER (DP8381x)
+S:	Orphan
+F:	drivers/net/ethernet/natsemi/natsemi.c
+
 NCP FILESYSTEM
 M:	Petr Vandrovec <petr@vandrovec.name>
 S:	Odd Fixes
@@ -9754,12 +9755,17 @@ F:	drivers/scsi/osst.*
 F:	drivers/scsi/osst_*.h
 F:	drivers/scsi/st.h
 
-OPENCORES I2C BUS DRIVER
-M:	Peter Korsgaard <jacmet@sunsite.dk>
-L:	linux-i2c@vger.kernel.org
+OP-TEE DRIVER
+M:	Jens Wiklander <jens.wiklander@linaro.org>
 S:	Maintained
-F:	Documentation/i2c/busses/i2c-ocores
-F:	drivers/i2c/busses/i2c-ocores.c
+F:	drivers/tee/optee/
+
+OPA-VNIC DRIVER
+M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
+M:	Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/ulp/opa_vnic
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE
 M:	Rob Herring <robh+dt@kernel.org>
@@ -9793,6 +9799,13 @@ F:	Documentation/devicetree/overlay-notes.txt
 F:	drivers/of/overlay.c
 F:	drivers/of/resolver.c
 
+OPENCORES I2C BUS DRIVER
+M:	Peter Korsgaard <jacmet@sunsite.dk>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	Documentation/i2c/busses/i2c-ocores
+F:	drivers/i2c/busses/i2c-ocores.c
+
 OPENRISC ARCHITECTURE
 M:	Jonas Bonn <jonas@southpole.se>
 M:	Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
@@ -9840,11 +9853,6 @@ F:	arch/*/oprofile/
 F:	drivers/oprofile/
 F:	include/linux/oprofile.h
 
-OP-TEE DRIVER
-M:	Jens Wiklander <jens.wiklander@linaro.org>
-S:	Maintained
-F:	drivers/tee/optee/
-
 ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
 M:	Mark Fasheh <mfasheh@versity.com>
 M:	Joel Becker <jlbec@evilplan.org>
@@ -9855,8 +9863,16 @@ F:	Documentation/filesystems/ocfs2.txt
 F:	Documentation/filesystems/dlmfs.txt
 F:	fs/ocfs2/
 
-ORINOCO DRIVER
-L:	linux-wireless@vger.kernel.org
+ORANGEFS FILESYSTEM
+M:	Mike Marshall <hubcap@omnibond.com>
+L:	pvfs2-developers@beowulf-underground.org (subscribers-only)
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux.git
+S:	Supported
+F:	fs/orangefs/
+F:	Documentation/filesystems/orangefs.txt
+
+ORINOCO DRIVER
+L:	linux-wireless@vger.kernel.org
 W:	http://wireless.kernel.org/en/users/Drivers/orinoco
 W:	http://www.nongnu.org/orinoco/
 S:	Orphan
@@ -9869,6 +9885,16 @@ F:	drivers/scsi/osd/
 F:	include/scsi/osd_*
 F:	fs/exofs/
 
+OV2659 OMNIVISION SENSOR DRIVER
+M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
+L:	linux-media@vger.kernel.org
+W:	https://linuxtv.org
+Q:	http://patchwork.linuxtv.org/project/linux-media/list/
+T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
+S:	Maintained
+F:	drivers/media/i2c/ov2659.c
+F:	include/media/i2c/ov2659.h
+
 OVERLAY FILESYSTEM
 M:	Miklos Szeredi <miklos@szeredi.hu>
 L:	linux-unionfs@vger.kernel.org
@@ -9877,14 +9903,6 @@ S:	Supported
 F:	fs/overlayfs/
 F:	Documentation/filesystems/overlayfs.txt
 
-ORANGEFS FILESYSTEM
-M:	Mike Marshall <hubcap@omnibond.com>
-L:	pvfs2-developers@beowulf-underground.org (subscribers-only)
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux.git
-S:	Supported
-F:	fs/orangefs/
-F:	Documentation/filesystems/orangefs.txt
-
 P54 WIRELESS DRIVER
 M:	Christian Lamparter <chunkeey@googlemail.com>
 L:	linux-wireless@vger.kernel.org
@@ -10496,6 +10514,11 @@ L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/pm8001/
 
+PNP SUPPORT
+M:	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+S:	Maintained
+F:	drivers/pnp/
+
 POSIX CLOCKS and TIMERS
 M:	Thomas Gleixner <tglx@linutronix.de>
 L:	linux-kernel@vger.kernel.org
@@ -10541,11 +10564,6 @@ L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/char/powernv-op-panel.c
 
-PNP SUPPORT
-M:	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
-S:	Maintained
-F:	drivers/pnp/
-
 PPP PROTOCOL DRIVERS AND COMPRESSORS
 M:	Paul Mackerras <paulus@samba.org>
 L:	linux-ppp@vger.kernel.org
@@ -10776,16 +10794,6 @@ L:	linux-mtd@lists.infradead.org
 S:	Maintained
 F:	drivers/mtd/nand/pxa3xx_nand.c
 
-MMP SUPPORT
-M:	Eric Miao <eric.y.miao@gmail.com>
-M:	Haojian Zhuang <haojian.zhuang@gmail.com>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-T:	git git://github.com/hzhuang1/linux.git
-T:	git git://git.linaro.org/people/ycmiao/pxa-linux.git
-S:	Maintained
-F:	arch/arm/boot/dts/mmp*
-F:	arch/arm/mach-mmp/
-
 PXA MMCI DRIVER
 S:	Orphan
 
@@ -10801,6 +10809,20 @@ L:	qat-linux@intel.com
 S:	Supported
 F:	drivers/crypto/qat/
 
+QCOM AUDIO (ASoC) DRIVERS
+M:	Patrick Lai <plai@codeaurora.org>
+M:	Banajit Goswami <bgoswami@codeaurora.org>
+L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
+S:	Supported
+F:	sound/soc/qcom/
+
+QEMU MACHINE EMULATOR AND VIRTUALIZER SUPPORT
+M:	Gabriel Somlo <somlo@cmu.edu>
+M:	"Michael S. Tsirkin" <mst@redhat.com>
+L:	qemu-devel@nongnu.org
+S:	Maintained
+F:	drivers/firmware/qemu_fw_cfg.c
+
 QIB DRIVER
 M:	Mike Marciniszyn <infinipath@intel.com>
 L:	linux-rdma@vger.kernel.org
@@ -10950,13 +10972,6 @@ T:	git git://github.com/KrasnikovEugene/wcn36xx.git
 S:	Supported
 F:	drivers/net/wireless/ath/wcn36xx/
 
-QEMU MACHINE EMULATOR AND VIRTUALIZER SUPPORT
-M:	Gabriel Somlo <somlo@cmu.edu>
-M:	"Michael S. Tsirkin" <mst@redhat.com>
-L:	qemu-devel@nongnu.org
-S:	Maintained
-F:	drivers/firmware/qemu_fw_cfg.c
-
 QUANTENNA QTNFMAC WIRELESS DRIVER
 M:   Igor Mitsyanko <imitsyanko@quantenna.com>
 M:   Avinash Patil <avinashp@quantenna.com>
@@ -10965,19 +10980,6 @@ L:   linux-wireless@vger.kernel.org
 S:   Maintained
 F:   drivers/net/wireless/quantenna
 
-RADOS BLOCK DEVICE (RBD)
-M:	Ilya Dryomov <idryomov@gmail.com>
-M:	Sage Weil <sage@redhat.com>
-M:	Alex Elder <elder@kernel.org>
-L:	ceph-devel@vger.kernel.org
-W:	http://ceph.com/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
-T:	git git://github.com/ceph/ceph-client.git
-S:	Supported
-F:	Documentation/ABI/testing/sysfs-bus-rbd
-F:	drivers/block/rbd.c
-F:	drivers/block/rbd_types.h
-
 RADEON FRAMEBUFFER DISPLAY DRIVER
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 L:	linux-fbdev@vger.kernel.org
@@ -11000,6 +11002,19 @@ S:	Maintained
 F:	drivers/media/radio/radio-shark2.c
 F:	drivers/media/radio/radio-tea5777.c
 
+RADOS BLOCK DEVICE (RBD)
+M:	Ilya Dryomov <idryomov@gmail.com>
+M:	Sage Weil <sage@redhat.com>
+M:	Alex Elder <elder@kernel.org>
+L:	ceph-devel@vger.kernel.org
+W:	http://ceph.com/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+T:	git git://github.com/ceph/ceph-client.git
+S:	Supported
+F:	Documentation/ABI/testing/sysfs-bus-rbd
+F:	drivers/block/rbd.c
+F:	drivers/block/rbd_types.h
+
 RAGE128 FRAMEBUFFER DISPLAY DRIVER
 M:	Paul Mackerras <paulus@samba.org>
 L:	linux-fbdev@vger.kernel.org
@@ -11079,6 +11094,12 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/rdc/r6040.c
 
+RDMAVT - RDMA verbs software
+M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/sw/rdmavt
+
 RDS - RELIABLE DATAGRAM SOCKETS
 M:	Santosh Shilimkar <santosh.shilimkar@oracle.com>
 L:	netdev@vger.kernel.org
@@ -11089,12 +11110,6 @@ S:	Supported
 F:	net/rds/
 F:	Documentation/networking/rds.txt
 
-RDMAVT - RDMA verbs software
-M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
-L:	linux-rdma@vger.kernel.org
-S:	Supported
-F:	drivers/infiniband/sw/rdmavt
-
 RDT - RESOURCE ALLOCATION
 M:	Fenghua Yu <fenghua.yu@intel.com>
 L:	linux-kernel@vger.kernel.org
@@ -11143,11 +11158,6 @@ S:	Maintained
 F:	sound/soc/codecs/rt*
 F:	include/sound/rt*.h
 
-REISERFS FILE SYSTEM
-L:	reiserfs-devel@vger.kernel.org
-S:	Supported
-F:	fs/reiserfs/
-
 REGISTER MAP ABSTRACTION
 M:	Mark Brown <broonie@kernel.org>
 L:	linux-kernel@vger.kernel.org
@@ -11157,6 +11167,11 @@ F:	Documentation/devicetree/bindings/regmap/
 F:	drivers/base/regmap/
 F:	include/linux/regmap.h
 
+REISERFS FILE SYSTEM
+L:	reiserfs-devel@vger.kernel.org
+S:	Supported
+F:	fs/reiserfs/
+
 REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
 M:	Ohad Ben-Cohen <ohad@wizery.com>
 M:	Bjorn Andersson <bjorn.andersson@linaro.org>
@@ -11611,126 +11626,6 @@ F:	drivers/phy/samsung/phy-s5pv210-usb2.c
 F:	drivers/phy/samsung/phy-samsung-usb2.c
 F:	drivers/phy/samsung/phy-samsung-usb2.h
 
-SERIAL DRIVERS
-M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L:	linux-serial@vger.kernel.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/serial/
-F:	drivers/tty/serial/
-
-SERIAL DEVICE BUS
-M:	Rob Herring <robh@kernel.org>
-L:	linux-serial@vger.kernel.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/serial/slave-device.txt
-F:	drivers/tty/serdev/
-F:	include/linux/serdev.h
-
-SERIAL IR RECEIVER
-M:	Sean Young <sean@mess.org>
-L:	linux-media@vger.kernel.org
-S:	Maintained
-F:	drivers/media/rc/serial_ir.c
-
-STI CEC DRIVER
-M:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
-S:	Maintained
-F:	drivers/staging/media/st-cec/
-F:	Documentation/devicetree/bindings/media/stih-cec.txt
-
-SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
-M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
-L:	linux-s390@vger.kernel.org
-W:	http://www.ibm.com/developerworks/linux/linux390/
-S:	Supported
-F:	net/smc/
-
-SYNOPSYS DESIGNWARE DMAC DRIVER
-M:	Viresh Kumar <vireshk@kernel.org>
-M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
-S:	Maintained
-F:	include/linux/dma/dw.h
-F:	include/linux/platform_data/dma-dw.h
-F:	drivers/dma/dw/
-
-SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
-M:	Jie Deng <jiedeng@synopsys.com>
-L:	netdev@vger.kernel.org
-S:	Supported
-F:	drivers/net/ethernet/synopsys/
-
-SYNOPSYS DESIGNWARE I2C DRIVER
-M:	Jarkko Nikula <jarkko.nikula@linux.intel.com>
-R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
-R:	Mika Westerberg <mika.westerberg@linux.intel.com>
-L:	linux-i2c@vger.kernel.org
-S:	Maintained
-F:	drivers/i2c/busses/i2c-designware-*
-F:	include/linux/platform_data/i2c-designware.h
-
-SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
-M:	Jaehoon Chung <jh80.chung@samsung.com>
-L:	linux-mmc@vger.kernel.org
-S:	Maintained
-F:	drivers/mmc/host/dw_mmc*
-
-SYSTEM TRACE MODULE CLASS
-M:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
-S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ash/stm.git
-F:	Documentation/trace/stm.txt
-F:	drivers/hwtracing/stm/
-F:	include/linux/stm.h
-F:	include/uapi/linux/stm.h
-
-TEE SUBSYSTEM
-M:	Jens Wiklander <jens.wiklander@linaro.org>
-S:	Maintained
-F:	include/linux/tee_drv.h
-F:	include/uapi/linux/tee.h
-F:	drivers/tee/
-F:	Documentation/tee.txt
-
-THUNDERBOLT DRIVER
-M:	Andreas Noever <andreas.noever@gmail.com>
-M:	Michael Jamet <michael.jamet@intel.com>
-M:	Mika Westerberg <mika.westerberg@linux.intel.com>
-M:	Yehezkel Bernat <yehezkel.bernat@intel.com>
-S:	Maintained
-F:	drivers/thunderbolt/
-
-TI BQ27XXX POWER SUPPLY DRIVER
-R:	Andrew F. Davis <afd@ti.com>
-F:	include/linux/power/bq27xxx_battery.h
-F:	drivers/power/supply/bq27xxx_battery.c
-F:	drivers/power/supply/bq27xxx_battery_i2c.c
-
-TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
-M:	John Stultz <john.stultz@linaro.org>
-M:	Thomas Gleixner <tglx@linutronix.de>
-R:	Stephen Boyd <sboyd@codeaurora.org>
-L:	linux-kernel@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
-S:	Supported
-F:	include/linux/clocksource.h
-F:	include/linux/time.h
-F:	include/linux/timex.h
-F:	include/uapi/linux/time.h
-F:	include/uapi/linux/timex.h
-F:	kernel/time/clocksource.c
-F:	kernel/time/time*.c
-F:	kernel/time/alarmtimer.c
-F:	kernel/time/ntp.c
-F:	tools/testing/selftests/timers/
-
-TI TRF7970A NFC DRIVER
-M:	Mark Greer <mgreer@animalcreek.com>
-L:	linux-wireless@vger.kernel.org
-L:	linux-nfc@lists.01.org (moderated for non-subscribers)
-S:	Supported
-F:	drivers/nfc/trf7970a.c
-F:	Documentation/devicetree/bindings/net/nfc/trf7970a.txt
-
 SC1200 WDT DRIVER
 M:	Zwane Mwaikambo <zwanem@gmail.com>
 S:	Maintained
@@ -11759,16 +11654,6 @@ M:	Lubomir Rintel <lkundrak@v3.sk>
 S:	Supported
 F:	drivers/char/pcmcia/scr24x_cs.c
 
-SYSTEM CONTROL & POWER INTERFACE (SCPI) Message Protocol drivers
-M:	Sudeep Holla <sudeep.holla@arm.com>
-L:	linux-arm-kernel@lists.infradead.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/arm/arm,scpi.txt
-F:	drivers/clk/clk-scpi.c
-F:	drivers/cpufreq/scpi-cpufreq.c
-F:	drivers/firmware/arm_scpi.c
-F:	include/linux/scpi_protocol.h
-
 SCSI CDROM DRIVER
 M:	Jens Axboe <axboe@kernel.dk>
 L:	linux-scsi@vger.kernel.org
@@ -11853,14 +11738,6 @@ L:	sdricohcs-devel@lists.sourceforge.net (subscribers-only)
 S:	Maintained
 F:	drivers/mmc/host/sdricoh_cs.c
 
-SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
-M:	Adrian Hunter <adrian.hunter@intel.com>
-L:	linux-mmc@vger.kernel.org
-T:	git git://git.infradead.org/users/ahunter/linux-sdhci.git
-S:	Maintained
-F:	drivers/mmc/host/sdhci*
-F:	include/linux/mmc/sdhci*
-
 SECURE COMPUTING
 M:	Kees Cook <keescook@chromium.org>
 R:	Andy Lutomirski <luto@amacapital.net>
@@ -11876,7 +11753,15 @@ F:	Documentation/userspace-api/seccomp_filter.rst
 K:	\bsecure_computing
 K:	\bTIF_SECCOMP\b
 
-SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) Broadcom BRCMSTB DRIVER
+SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
+M:	Adrian Hunter <adrian.hunter@intel.com>
+L:	linux-mmc@vger.kernel.org
+T:	git git://git.infradead.org/users/ahunter/linux-sdhci.git
+S:	Maintained
+F:	drivers/mmc/host/sdhci*
+F:	include/linux/mmc/sdhci*
+
+SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) Broadcom BRCMSTB DRIVER
 M:	Al Cooper <alcooperx@gmail.com>
 L:	linux-mmc@vger.kernel.org
 L:	bcm-kernel-feedback-list@broadcom.com
@@ -11933,62 +11818,32 @@ F:	security/selinux/
 F:	scripts/selinux/
 F:	Documentation/admin-guide/LSM/SELinux.rst
 
-APPARMOR SECURITY MODULE
-M:	John Johansen <john.johansen@canonical.com>
-L:	apparmor@lists.ubuntu.com (subscribers-only, general discussion)
-W:	apparmor.wiki.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
-S:	Supported
-F:	security/apparmor/
-F:	Documentation/admin-guide/LSM/apparmor.rst
-
-LOADPIN SECURITY MODULE
-M:	Kees Cook <keescook@chromium.org>
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git lsm/loadpin
-S:	Supported
-F:	security/loadpin/
-F:	Documentation/admin-guide/LSM/LoadPin.rst
-
-YAMA SECURITY MODULE
-M:	Kees Cook <keescook@chromium.org>
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git yama/tip
-S:	Supported
-F:	security/yama/
-F:	Documentation/admin-guide/LSM/Yama.rst
-
 SENSABLE PHANTOM
 M:	Jiri Slaby <jirislaby@gmail.com>
 S:	Maintained
 F:	drivers/misc/phantom.c
 F:	include/uapi/linux/phantom.h
 
-Emulex 10Gbps iSCSI - OneConnect DRIVER
-M:	Subbu Seetharaman <subbu.seetharaman@broadcom.com>
-M:	Ketan Mukadam <ketan.mukadam@broadcom.com>
-M:	Jitendra Bhivare <jitendra.bhivare@broadcom.com>
-L:	linux-scsi@vger.kernel.org
-W:	http://www.broadcom.com
-S:	Supported
-F:	drivers/scsi/be2iscsi/
+SERIAL DRIVERS
+M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+L:	linux-serial@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/serial/
+F:	drivers/tty/serial/
 
-Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER (be2net)
-M:	Sathya Perla <sathya.perla@broadcom.com>
-M:	Ajit Khaparde <ajit.khaparde@broadcom.com>
-M:	Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
-M:	Somnath Kotur <somnath.kotur@broadcom.com>
-L:	netdev@vger.kernel.org
-W:	http://www.emulex.com
-S:	Supported
-F:	drivers/net/ethernet/emulex/benet/
+SERIAL DEVICE BUS
+M:	Rob Herring <robh@kernel.org>
+L:	linux-serial@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/serial/slave-device.txt
+F:	drivers/tty/serdev/
+F:	include/linux/serdev.h
 
-EMULEX ONECONNECT ROCE DRIVER
-M:	Selvin Xavier <selvin.xavier@broadcom.com>
-M:	Devesh Sharma <devesh.sharma@broadcom.com>
-L:	linux-rdma@vger.kernel.org
-W:	http://www.broadcom.com
-S:	Odd Fixes
-F:	drivers/infiniband/hw/ocrdma/
-F:	include/uapi/rdma/ocrdma-abi.h
+SERIAL IR RECEIVER
+M:	Sean Young <sean@mess.org>
+L:	linux-media@vger.kernel.org
+S:	Maintained
+F:	drivers/media/rc/serial_ir.c
 
 SFC NETWORK DRIVER
 M:	Solarflare linux maintainers <linux-net-drivers@solarflare.com>
@@ -12017,6 +11872,24 @@ M:	Robin Holt <robinmholt@gmail.com>
 S:	Maintained
 F:	drivers/misc/sgi-xp/
 
+SH_VEU V4L2 MEM2MEM DRIVER
+L:	linux-media@vger.kernel.org
+S:	Orphan
+F:	drivers/media/platform/sh_veu.c
+
+SH_VOU V4L2 OUTPUT DRIVER
+L:	linux-media@vger.kernel.org
+S:	Orphan
+F:	drivers/media/platform/sh_vou.c
+F:	include/media/drv-intf/sh_vou.h
+
+SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
+M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
+L:	linux-s390@vger.kernel.org
+W:	http://www.ibm.com/developerworks/linux/linux390/
+S:	Supported
+F:	net/smc/
+
 SI2157 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
@@ -12099,24 +11972,14 @@ S:	Maintained
 F:	drivers/input/touchscreen/silead.c
 F:	drivers/platform/x86/silead_dmi.c
 
-SIMPLEFB FB DRIVER
-M:	Hans de Goede <hdegoede@redhat.com>
+SILICON MOTION SM712 FRAME BUFFER DRIVER
+M:	Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+M:	Teddy Wang <teddy.wang@siliconmotion.com>
+M:	Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
 L:	linux-fbdev@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/display/simple-framebuffer.txt
-F:	drivers/video/fbdev/simplefb.c
-F:	include/linux/platform_data/simplefb.h
-
-SH_VEU V4L2 MEM2MEM DRIVER
-L:	linux-media@vger.kernel.org
-S:	Orphan
-F:	drivers/media/platform/sh_veu.c
-
-SH_VOU V4L2 OUTPUT DRIVER
-L:	linux-media@vger.kernel.org
-S:	Orphan
-F:	drivers/media/platform/sh_vou.c
-F:	include/media/drv-intf/sh_vou.h
+F:	drivers/video/fbdev/sm712*
+F:	Documentation/fb/sm712fb.txt
 
 SIMPLE FIRMWARE INTERFACE (SFI)
 M:	Len Brown <lenb@kernel.org>
@@ -12128,6 +11991,14 @@ F:	arch/x86/platform/sfi/
 F:	drivers/sfi/
 F:	include/linux/sfi*.h
 
+SIMPLEFB FB DRIVER
+M:	Hans de Goede <hdegoede@redhat.com>
+L:	linux-fbdev@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/display/simple-framebuffer.txt
+F:	drivers/video/fbdev/simplefb.c
+F:	include/linux/platform_data/simplefb.h
+
 SIMTEC EB110ATX (Chalice CATS)
 P:	Ben Dooks
 P:	Vincent Sanders <vince@simtec.co.uk>
@@ -12152,61 +12023,6 @@ F:	lib/siphash.c
 F:	lib/test_siphash.c
 F:	include/linux/siphash.h
 
-TI DAVINCI MACHINE SUPPORT
-M:	Sekhar Nori <nsekhar@ti.com>
-M:	Kevin Hilman <khilman@kernel.org>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git
-S:	Supported
-F:	arch/arm/mach-davinci/
-F:	drivers/i2c/busses/i2c-davinci.c
-F:	arch/arm/boot/dts/da850*
-
-TI DAVINCI SERIES MEDIA DRIVER
-M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
-L:	linux-media@vger.kernel.org
-W:	https://linuxtv.org
-Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
-S:	Maintained
-F:	drivers/media/platform/davinci/
-F:	include/media/davinci/
-
-TI DAVINCI SERIES GPIO DRIVER
-M:	Keerthy <j-keerthy@ti.com>
-L:	linux-gpio@vger.kernel.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/gpio/gpio-davinci.txt
-F:	drivers/gpio/gpio-davinci.c
-
-TI AM437X VPFE DRIVER
-M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
-L:	linux-media@vger.kernel.org
-W:	https://linuxtv.org
-Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
-S:	Maintained
-F:	drivers/media/platform/am437x/
-
-OV2659 OMNIVISION SENSOR DRIVER
-M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
-L:	linux-media@vger.kernel.org
-W:	https://linuxtv.org
-Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
-S:	Maintained
-F:	drivers/media/i2c/ov2659.c
-F:	include/media/i2c/ov2659.h
-
-SILICON MOTION SM712 FRAME BUFFER DRIVER
-M:	Sudip Mukherjee <sudipm.mukherjee@gmail.com>
-M:	Teddy Wang <teddy.wang@siliconmotion.com>
-M:	Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
-L:	linux-fbdev@vger.kernel.org
-S:	Maintained
-F:	drivers/video/fbdev/sm712*
-F:	Documentation/fb/sm712fb.txt
-
 SIS 190 ETHERNET DRIVER
 M:	Francois Romieu <romieu@fr.zoreil.com>
 L:	netdev@vger.kernel.org
@@ -12267,14 +12083,6 @@ S:	Maintained
 F:	Documentation/admin-guide/LSM/Smack.rst
 F:	security/smack/
 
-DRIVERS FOR ADAPTIVE VOLTAGE SCALING (AVS)
-M:	Kevin Hilman <khilman@kernel.org>
-M:	Nishanth Menon <nm@ti.com>
-S:	Maintained
-F:	drivers/power/avs/
-F:	include/linux/power/smartreflex.h
-L:	linux-pm@vger.kernel.org
-
 SMC91x ETHERNET DRIVER
 M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Odd Fixes
@@ -12352,6 +12160,15 @@ M:	Chris Boot <bootc@bootc.net>
 S:	Maintained
 F:	drivers/leds/leds-net48xx.c
 
+SOFT-ROCE DRIVER (rxe)
+M:	Moni Shoua <monis@mellanox.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+W:	https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+Q:	http://patchwork.kernel.org/project/linux-rdma/list/
+F:	drivers/infiniband/sw/rxe/
+F:	include/uapi/rdma/rdma_user_rxe.h
+
 SOFTLOGIC 6x10 MPEG CODEC
 M:	Bluecherry Maintainers <maintainers@bluecherrydvr.com>
 M:	Anton Sviridenko <anton@corp.bluecherry.net>
@@ -12668,6 +12485,27 @@ M:	Ion Badulescu <ionut@badula.org>
 S:	Odd Fixes
 F:	drivers/net/ethernet/adaptec/starfire*
 
+STI CEC DRIVER
+M:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
+S:	Maintained
+F:	drivers/staging/media/st-cec/
+F:	Documentation/devicetree/bindings/media/stih-cec.txt
+
+STK1160 USB VIDEO CAPTURE DRIVER
+M:	Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Maintained
+F:	drivers/media/usb/stk1160/
+
+STMMAC ETHERNET DRIVER
+M:	Giuseppe Cavallaro <peppe.cavallaro@st.com>
+M:	Alexandre Torgue <alexandre.torgue@st.com>
+L:	netdev@vger.kernel.org
+W:	http://www.stlinux.com
+S:	Supported
+F:	drivers/net/ethernet/stmicro/stmmac/
+
 SUN3/3X
 M:	Sam Creasey <sammy@sammy.net>
 W:	http://sammy.net/sun3/
@@ -12739,6 +12577,20 @@ S:	Supported
 F:	net/switchdev/
 F:	include/net/switchdev.h
 
+SYNC FILE FRAMEWORK
+M:	Sumit Semwal <sumit.semwal@linaro.org>
+R:	Gustavo Padovan <gustavo@padovan.org>
+S:	Maintained
+L:	linux-media@vger.kernel.org
+L:	dri-devel@lists.freedesktop.org
+F:	drivers/dma-buf/sync_*
+F:	drivers/dma-buf/dma-fence*
+F:	drivers/dma-buf/sw_sync.c
+F:	include/linux/sync_file.h
+F:	include/uapi/linux/sync_file.h
+F:	Documentation/sync_file.txt
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+
 SYNOPSYS ARC ARCHITECTURE
 M:	Vineet Gupta <vgupta@synopsys.com>
 L:	linux-snps-arc@lists.infradead.org
@@ -12757,6 +12609,35 @@ F:	arch/arc/plat-axs10x
 F:	arch/arc/boot/dts/ax*
 F:	Documentation/devicetree/bindings/arc/axs10*
 
+SYNOPSYS DESIGNWARE DMAC DRIVER
+M:	Viresh Kumar <vireshk@kernel.org>
+M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+S:	Maintained
+F:	include/linux/dma/dw.h
+F:	include/linux/platform_data/dma-dw.h
+F:	drivers/dma/dw/
+
+SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
+M:	Jie Deng <jiedeng@synopsys.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/synopsys/
+
+SYNOPSYS DESIGNWARE I2C DRIVER
+M:	Jarkko Nikula <jarkko.nikula@linux.intel.com>
+R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+R:	Mika Westerberg <mika.westerberg@linux.intel.com>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	drivers/i2c/busses/i2c-designware-*
+F:	include/linux/platform_data/i2c-designware.h
+
+SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
+M:	Jaehoon Chung <jh80.chung@samsung.com>
+L:	linux-mmc@vger.kernel.org
+S:	Maintained
+F:	drivers/mmc/host/dw_mmc*
+
 SYSTEM CONFIGURATION (SYSCON)
 M:	Lee Jones <lee.jones@linaro.org>
 M:	Arnd Bergmann <arnd@arndb.de>
@@ -12764,6 +12645,16 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git
 S:	Supported
 F:	drivers/mfd/syscon.c
 
+SYSTEM CONTROL & POWER INTERFACE (SCPI) Message Protocol drivers
+M:	Sudeep Holla <sudeep.holla@arm.com>
+L:	linux-arm-kernel@lists.infradead.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/arm/arm,scpi.txt
+F:	drivers/clk/clk-scpi.c
+F:	drivers/cpufreq/scpi-cpufreq.c
+F:	drivers/firmware/arm_scpi.c
+F:	include/linux/scpi_protocol.h
+
 SYSTEM RESET/SHUTDOWN DRIVERS
 M:	Sebastian Reichel <sre@kernel.org>
 L:	linux-pm@vger.kernel.org
@@ -12772,6 +12663,15 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/power/reset/
 F:	drivers/power/reset/
 
+SYSTEM TRACE MODULE CLASS
+M:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ash/stm.git
+F:	Documentation/trace/stm.txt
+F:	drivers/hwtracing/stm/
+F:	include/linux/stm.h
+F:	include/uapi/linux/stm.h
+
 SYSV FILESYSTEM
 M:	Christoph Hellwig <hch@infradead.org>
 S:	Maintained
@@ -12941,6 +12841,14 @@ L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/rc/ttusbir.c
 
+TEE SUBSYSTEM
+M:	Jens Wiklander <jens.wiklander@linaro.org>
+S:	Maintained
+F:	include/linux/tee_drv.h
+F:	include/uapi/linux/tee.h
+F:	drivers/tee/
+F:	Documentation/tee.txt
+
 TEGRA ARCHITECTURE SUPPORT
 M:	Thierry Reding <thierry.reding@gmail.com>
 M:	Jonathan Hunter <jonathanh@nvidia.com>
@@ -13072,6 +12980,56 @@ T:	git git://repo.or.cz/linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git
 S:	Maintained
 F:	drivers/platform/x86/thinkpad_acpi.c
 
+THUNDERBOLT DRIVER
+M:	Andreas Noever <andreas.noever@gmail.com>
+M:	Michael Jamet <michael.jamet@intel.com>
+M:	Mika Westerberg <mika.westerberg@linux.intel.com>
+M:	Yehezkel Bernat <yehezkel.bernat@intel.com>
+S:	Maintained
+F:	drivers/thunderbolt/
+
+TI BQ27XXX POWER SUPPLY DRIVER
+R:	Andrew F. Davis <afd@ti.com>
+F:	include/linux/power/bq27xxx_battery.h
+F:	drivers/power/supply/bq27xxx_battery.c
+F:	drivers/power/supply/bq27xxx_battery_i2c.c
+
+TI DAVINCI MACHINE SUPPORT
+M:	Sekhar Nori <nsekhar@ti.com>
+M:	Kevin Hilman <khilman@kernel.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git
+S:	Supported
+F:	arch/arm/mach-davinci/
+F:	drivers/i2c/busses/i2c-davinci.c
+F:	arch/arm/boot/dts/da850*
+
+TI DAVINCI SERIES MEDIA DRIVER
+M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
+L:	linux-media@vger.kernel.org
+W:	https://linuxtv.org
+Q:	http://patchwork.linuxtv.org/project/linux-media/list/
+T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
+S:	Maintained
+F:	drivers/media/platform/davinci/
+F:	include/media/davinci/
+
+TI DAVINCI SERIES GPIO DRIVER
+M:	Keerthy <j-keerthy@ti.com>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/gpio/gpio-davinci.txt
+F:	drivers/gpio/gpio-davinci.c
+
+TI AM437X VPFE DRIVER
+M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
+L:	linux-media@vger.kernel.org
+W:	https://linuxtv.org
+Q:	http://patchwork.linuxtv.org/project/linux-media/list/
+T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
+S:	Maintained
+F:	drivers/media/platform/am437x/
+
 TI BANDGAP AND THERMAL DRIVER
 M:	Eduardo Valentin <edubezval@gmail.com>
 M:	Keerthy <j-keerthy@ti.com>
@@ -13123,7 +13081,6 @@ S:	Maintained
 F:	drivers/soc/ti/*
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
 
-
 TI LM49xxx FAMILY ASoC CODEC DRIVERS
 M:	M R Swami Reddy <mr.swami.reddy@ti.com>
 M:	Vishwas A Deshpande <vishwas.a.deshpande@ti.com>
@@ -13168,6 +13125,14 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Odd Fixes
 F:	sound/soc/codecs/tas571x*
 
+TI TRF7970A NFC DRIVER
+M:	Mark Greer <mgreer@animalcreek.com>
+L:	linux-wireless@vger.kernel.org
+L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+S:	Supported
+F:	drivers/nfc/trf7970a.c
+F:	Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+
 TI TWL4030 SERIES SOC CODEC DRIVER
 M:	Peter Ujfalusi <peter.ujfalusi@ti.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -13183,16 +13148,6 @@ S:	Orphan
 F:	drivers/net/wireless/ti/
 F:	include/linux/wl12xx.h
 
-TIPC NETWORK LAYER
-M:	Jon Maloy <jon.maloy@ericsson.com>
-M:	Ying Xue <ying.xue@windriver.com>
-L:	netdev@vger.kernel.org (core kernel code)
-L:	tipc-discussion@lists.sourceforge.net (user apps, general discussion)
-W:	http://tipc.sourceforge.net/
-S:	Maintained
-F:	include/uapi/linux/tipc*.h
-F:	net/tipc/
-
 TILE ARCHITECTURE
 M:	Chris Metcalf <cmetcalf@mellanox.com>
 W:	http://www.mellanox.com/repository/solutions/tile-scm/
@@ -13208,13 +13163,73 @@ F:	drivers/tty/serial/tilegx.c
 F:	drivers/usb/host/*-tilegx.c
 F:	include/linux/usb/tilegx.h
 
-TLAN NETWORK DRIVER
-M:	Samuel Chessman <chessman@tux.org>
-L:	tlan-devel@lists.sourceforge.net (subscribers-only)
-W:	http://sourceforge.net/projects/tlan/
+TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
+M:	John Stultz <john.stultz@linaro.org>
+M:	Thomas Gleixner <tglx@linutronix.de>
+R:	Stephen Boyd <sboyd@codeaurora.org>
+L:	linux-kernel@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
+S:	Supported
+F:	include/linux/clocksource.h
+F:	include/linux/time.h
+F:	include/linux/timex.h
+F:	include/uapi/linux/time.h
+F:	include/uapi/linux/timex.h
+F:	kernel/time/clocksource.c
+F:	kernel/time/time*.c
+F:	kernel/time/alarmtimer.c
+F:	kernel/time/ntp.c
+F:	tools/testing/selftests/timers/
+
+TIPC NETWORK LAYER
+M:	Jon Maloy <jon.maloy@ericsson.com>
+M:	Ying Xue <ying.xue@windriver.com>
+L:	netdev@vger.kernel.org (core kernel code)
+L:	tipc-discussion@lists.sourceforge.net (user apps, general discussion)
+W:	http://tipc.sourceforge.net/
+S:	Maintained
+F:	include/uapi/linux/tipc*.h
+F:	net/tipc/
+
+TLAN NETWORK DRIVER
+M:	Samuel Chessman <chessman@tux.org>
+L:	tlan-devel@lists.sourceforge.net (subscribers-only)
+W:	http://sourceforge.net/projects/tlan/
+S:	Maintained
+F:	Documentation/networking/tlan.txt
+F:	drivers/net/ethernet/ti/tlan.*
+
+TM6000 VIDEO4LINUX DRIVER
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
+L:	linux-media@vger.kernel.org
+W:	https://linuxtv.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Odd fixes
+F:	drivers/media/usb/tm6000/
+F:	Documentation/media/v4l-drivers/tm6000*
+
+TMIO/SDHI MMC DRIVER
+M:	Wolfram Sang <wsa+renesas@sang-engineering.com>
+L:	linux-mmc@vger.kernel.org
+S:	Supported
+F:	drivers/mmc/host/tmio_mmc*
+F:	drivers/mmc/host/renesas_sdhi*
+F:	include/linux/mfd/tmio.h
+
+TMP401 HARDWARE MONITOR DRIVER
+M:	Guenter Roeck <linux@roeck-us.net>
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
-F:	Documentation/networking/tlan.txt
-F:	drivers/net/ethernet/ti/tlan.*
+F:	Documentation/hwmon/tmp401
+F:	drivers/hwmon/tmp401.c
+
+TMPFS (SHMEM FILESYSTEM)
+M:	Hugh Dickins <hughd@google.com>
+L:	linux-mm@kvack.org
+S:	Maintained
+F:	include/linux/shmem_fs.h
+F:	mm/shmem.c
 
 TOMOYO SECURITY MODULE
 M:	Kentaro Takeda <takedakn@nttdata.co.jp>
@@ -13273,63 +13288,6 @@ S:	Maintained
 F:	drivers/media/i2c/tc358743*
 F:	include/media/i2c/tc358743.h
 
-TMIO/SDHI MMC DRIVER
-M:	Wolfram Sang <wsa+renesas@sang-engineering.com>
-L:	linux-mmc@vger.kernel.org
-S:	Supported
-F:	drivers/mmc/host/tmio_mmc*
-F:	drivers/mmc/host/renesas_sdhi*
-F:	include/linux/mfd/tmio.h
-
-TMP401 HARDWARE MONITOR DRIVER
-M:	Guenter Roeck <linux@roeck-us.net>
-L:	linux-hwmon@vger.kernel.org
-S:	Maintained
-F:	Documentation/hwmon/tmp401
-F:	drivers/hwmon/tmp401.c
-
-TMPFS (SHMEM FILESYSTEM)
-M:	Hugh Dickins <hughd@google.com>
-L:	linux-mm@kvack.org
-S:	Maintained
-F:	include/linux/shmem_fs.h
-F:	mm/shmem.c
-
-TM6000 VIDEO4LINUX DRIVER
-M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
-M:	Mauro Carvalho Chehab <mchehab@kernel.org>
-L:	linux-media@vger.kernel.org
-W:	https://linuxtv.org
-T:	git git://linuxtv.org/media_tree.git
-S:	Odd fixes
-F:	drivers/media/usb/tm6000/
-F:	Documentation/media/v4l-drivers/tm6000*
-
-TW5864 VIDEO4LINUX DRIVER
-M:	Bluecherry Maintainers <maintainers@bluecherrydvr.com>
-M:	Anton Sviridenko <anton@corp.bluecherry.net>
-M:	Andrey Utkin <andrey.utkin@corp.bluecherry.net>
-M:	Andrey Utkin <andrey_utkin@fastmail.com>
-L:	linux-media@vger.kernel.org
-S:	Supported
-F:	drivers/media/pci/tw5864/
-
-TW68 VIDEO4LINUX DRIVER
-M:	Hans Verkuil <hverkuil@xs4all.nl>
-L:	linux-media@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-W:	https://linuxtv.org
-S:	Odd Fixes
-F:	drivers/media/pci/tw68/
-
-TW686X VIDEO4LINUX DRIVER
-M:	Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
-L:	linux-media@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-W:	http://linuxtv.org
-S:	Maintained
-F:	drivers/media/pci/tw686x/
-
 TPM DEVICE DRIVER
 M:	Peter Huewe <peterhuewe@gmx.de>
 M:	Marcel Selhorst <tpmdd@selhorst.net>
@@ -13430,6 +13388,31 @@ S:	Maintained
 F:	drivers/tc/
 F:	include/linux/tc.h
 
+TW5864 VIDEO4LINUX DRIVER
+M:	Bluecherry Maintainers <maintainers@bluecherrydvr.com>
+M:	Anton Sviridenko <anton@corp.bluecherry.net>
+M:	Andrey Utkin <andrey.utkin@corp.bluecherry.net>
+M:	Andrey Utkin <andrey_utkin@fastmail.com>
+L:	linux-media@vger.kernel.org
+S:	Supported
+F:	drivers/media/pci/tw5864/
+
+TW68 VIDEO4LINUX DRIVER
+M:	Hans Verkuil <hverkuil@xs4all.nl>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+W:	https://linuxtv.org
+S:	Odd Fixes
+F:	drivers/media/pci/tw68/
+
+TW686X VIDEO4LINUX DRIVER
+M:	Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+W:	http://linuxtv.org
+S:	Maintained
+F:	drivers/media/pci/tw686x/
+
 UBI FILE SYSTEM (UBIFS)
 M:	Richard Weinberger <richard@nod.at>
 M:	Artem Bityutskiy <dedekind1@gmail.com>
@@ -13479,6 +13462,13 @@ S:	Maintained
 F:	drivers/hid/uhid.c
 F:	include/uapi/linux/uhid.h
 
+ULPI BUS
+M:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+L:	linux-usb@vger.kernel.org
+S:	Maintained
+F:	drivers/usb/common/ulpi.c
+F:	include/linux/ulpi/
+
 ULTRA-WIDEBAND (UWB) SUBSYSTEM:
 L:	linux-usb@vger.kernel.org
 S:	Orphan
@@ -13824,13 +13814,6 @@ S:	Maintained
 F:	Documentation/media/v4l-drivers/zr364xx*
 F:	drivers/media/usb/zr364xx/
 
-ULPI BUS
-M:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-L:	linux-usb@vger.kernel.org
-S:	Maintained
-F:	drivers/usb/common/ulpi.c
-F:	include/linux/ulpi/
-
 USER-MODE LINUX (UML)
 M:	Jeff Dike <jdike@addtoit.com>
 M:	Richard Weinberger <richard@nod.at>
@@ -13924,6 +13907,31 @@ F:	drivers/gpu/vga/vga_switcheroo.c
 F:	include/linux/vga_switcheroo.h
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 
+VIA RHINE NETWORK DRIVER
+S:	Orphan
+F:	drivers/net/ethernet/via/via-rhine.c
+
+VIA SD/MMC CARD CONTROLLER DRIVER
+M:	Bruce Chang <brucechang@via.com.tw>
+M:	Harald Welte <HaraldWelte@viatech.com>
+S:	Maintained
+F:	drivers/mmc/host/via-sdmmc.c
+
+VIA UNICHROME(PRO)/CHROME9 FRAMEBUFFER DRIVER
+M:	Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
+L:	linux-fbdev@vger.kernel.org
+S:	Maintained
+F:	include/linux/via-core.h
+F:	include/linux/via-gpio.h
+F:	include/linux/via_i2c.h
+F:	drivers/video/fbdev/via/
+
+VIA VELOCITY NETWORK DRIVER
+M:	Francois Romieu <romieu@fr.zoreil.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/via/via-velocity.*
+
 VIDEOBUF2 FRAMEWORK
 M:	Pawel Osciak <pawel@osciak.com>
 M:	Marek Szyprowski <m.szyprowski@samsung.com>
@@ -13939,6 +13947,21 @@ L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/platform/video-mux.c
 
+VIMC VIRTUAL MEDIA CONTROLLER DRIVER
+M:	Helen Koike <helen.koike@collabora.com>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+W:	https://linuxtv.org
+S:	Maintained
+F:	drivers/media/platform/vimc/*
+
+VIRT LIB
+M:	Alex Williamson <alex.williamson@redhat.com>
+M:	Paolo Bonzini <pbonzini@redhat.com>
+L:	kvm@vger.kernel.org
+S:	Supported
+F:	virt/lib/
+
 VIRTIO AND VHOST VSOCK DRIVER
 M:	Stefan Hajnoczi <stefanha@redhat.com>
 L:	kvm@vger.kernel.org
@@ -13955,12 +13978,6 @@ F:	drivers/net/vsockmon.c
 F:	drivers/vhost/vsock.c
 F:	drivers/vhost/vsock.h
 
-VIRTUAL SERIO DEVICE DRIVER
-M:	Stephen Chandler Paul <thatslyude@gmail.com>
-S:	Maintained
-F:	drivers/input/serio/userio.c
-F:	include/uapi/linux/userio.h
-
 VIRTIO CONSOLE DRIVER
 M:	Amit Shah <amit@kernel.org>
 L:	virtualization@lists.linux-foundation.org
@@ -14027,37 +14044,11 @@ S:  Maintained
 F:  drivers/crypto/virtio/
 F:  include/uapi/linux/virtio_crypto.h
 
-VIA RHINE NETWORK DRIVER
-S:	Orphan
-F:	drivers/net/ethernet/via/via-rhine.c
-
-VIA SD/MMC CARD CONTROLLER DRIVER
-M:	Bruce Chang <brucechang@via.com.tw>
-M:	Harald Welte <HaraldWelte@viatech.com>
-S:	Maintained
-F:	drivers/mmc/host/via-sdmmc.c
-
-VIA UNICHROME(PRO)/CHROME9 FRAMEBUFFER DRIVER
-M:	Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
-L:	linux-fbdev@vger.kernel.org
-S:	Maintained
-F:	include/linux/via-core.h
-F:	include/linux/via-gpio.h
-F:	include/linux/via_i2c.h
-F:	drivers/video/fbdev/via/
-
-VIA VELOCITY NETWORK DRIVER
-M:	Francois Romieu <romieu@fr.zoreil.com>
-L:	netdev@vger.kernel.org
+VIRTUAL SERIO DEVICE DRIVER
+M:	Stephen Chandler Paul <thatslyude@gmail.com>
 S:	Maintained
-F:	drivers/net/ethernet/via/via-velocity.*
-
-VIRT LIB
-M:	Alex Williamson <alex.williamson@redhat.com>
-M:	Paolo Bonzini <pbonzini@redhat.com>
-L:	kvm@vger.kernel.org
-S:	Supported
-F:	virt/lib/
+F:	drivers/input/serio/userio.c
+F:	include/uapi/linux/userio.h
 
 VIVID VIRTUAL VIDEO DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
@@ -14067,14 +14058,6 @@ W:	https://linuxtv.org
 S:	Maintained
 F:	drivers/media/platform/vivid/*
 
-VIMC VIRTUAL MEDIA CONTROLLER DRIVER
-M:	Helen Koike <helen.koike@collabora.com>
-L:	linux-media@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-W:	https://linuxtv.org
-S:	Maintained
-F:	drivers/media/platform/vimc/*
-
 VLYNQ BUS
 M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	openwrt-devel@lists.openwrt.org (subscribers-only)
@@ -14236,6 +14219,27 @@ L:	linux-input@vger.kernel.org
 S:	Maintained
 F:	drivers/hid/hid-wiimote*
 
+WILOCITY WIL6210 WIRELESS DRIVER
+M:	Maya Erez <qca_merez@qca.qualcomm.com>
+L:	linux-wireless@vger.kernel.org
+L:	wil6210@qca.qualcomm.com
+S:	Supported
+W:	http://wireless.kernel.org/en/users/Drivers/wil6210
+F:	drivers/net/wireless/ath/wil6210/
+F:	include/uapi/linux/wil6210_uapi.h
+
+WIMAX STACK
+M:	Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+M:	linux-wimax@intel.com
+L:	wimax@linuxwimax.org (subscribers-only)
+S:	Supported
+W:	http://linuxwimax.org
+F:	Documentation/wimax/README.wimax
+F:	include/linux/wimax/debug.h
+F:	include/net/wimax.h
+F:	include/uapi/linux/wimax.h
+F:	net/wimax/
+
 WINBOND CIR DRIVER
 M:	David Härdeman <david@hardeman.nu>
 S:	Maintained
@@ -14253,18 +14257,6 @@ L:	linux-gpio@vger.kernel.org
 S:	Maintained
 F:	drivers/gpio/gpio-ws16c48.c
 
-WIMAX STACK
-M:	Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
-M:	linux-wimax@intel.com
-L:	wimax@linuxwimax.org (subscribers-only)
-S:	Supported
-W:	http://linuxwimax.org
-F:	Documentation/wimax/README.wimax
-F:	include/linux/wimax/debug.h
-F:	include/net/wimax.h
-F:	include/uapi/linux/wimax.h
-F:	net/wimax/
-
 WISTRON LAPTOP BUTTON DRIVER
 M:	Miloslav Trmac <mitr@volny.cz>
 S:	Maintained
@@ -14514,6 +14506,13 @@ S:	Maintained
 F:	drivers/net/hamradio/yam*
 F:	include/linux/yam.h
 
+YAMA SECURITY MODULE
+M:	Kees Cook <keescook@chromium.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git yama/tip
+S:	Supported
+F:	security/yama/
+F:	Documentation/admin-guide/LSM/Yama.rst
+
 YEALINK PHONE DRIVER
 M:	Henk Vergonet <Henk.Vergonet@gmail.com>
 L:	usbb2k-api-dev@nongnu.org
@@ -14548,23 +14547,23 @@ L:	zd1211-devs@lists.sourceforge.net (subscribers-only)
 S:	Maintained
 F:	drivers/net/wireless/zydas/zd1211rw/
 
-ZD1301_DEMOD MEDIA DRIVER
+ZD1301 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org/
 W:	http://palosaari.fi/linux/
 Q:	https://patchwork.linuxtv.org/project/linux-media/list/
 S:	Maintained
-F:	drivers/media/dvb-frontends/zd1301_demod*
+F:	drivers/media/usb/dvb-usb-v2/zd1301*
 
-ZD1301 MEDIA DRIVER
+ZD1301_DEMOD MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
 W:	https://linuxtv.org/
 W:	http://palosaari.fi/linux/
 Q:	https://patchwork.linuxtv.org/project/linux-media/list/
 S:	Maintained
-F:	drivers/media/usb/dvb-usb-v2/zd1301*
+F:	drivers/media/dvb-frontends/zd1301_demod*
 
 ZPOOL COMPRESSED PAGE STORAGE API
 M:	Dan Streetman <ddstreet@ieee.org>
-- 
GitLab


From 0e4d394fe50308f69eb061eb89de78279df1e9dc Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 26 Jun 2017 18:39:30 +0200
Subject: [PATCH 1949/1958] xen/x86: Don't BUG on CPU0 offlining

CONFIG_BOOTPARAM_HOTPLUG_CPU0 allows to offline CPU0 but Xen HVM guests
BUG() in xen_teardown_timer(). Remove the BUG_ON(), this is probably a
leftover from ancient times when CPU0 hotplug was impossible, it works
just fine for HVM.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/time.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1895a8e85c15..1ecb05db36320 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -309,7 +309,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
 void xen_teardown_timer(int cpu)
 {
 	struct clock_event_device *evt;
-	BUG_ON(cpu == 0);
 	evt = &per_cpu(xen_clock_events, cpu).evt;
 
 	if (evt->irq >= 0) {
-- 
GitLab


From 29d11cfd8698038b87458ba4d1329b9da81150a5 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Tue, 18 Jul 2017 09:40:35 +0200
Subject: [PATCH 1950/1958] xen/grant-table: log the lack of grants

log a message when we enter this situation:
1) we already allocated the max number of available grants from hypervisor
and
2) we still need more (but the request fails because of 1)).

Sometimes the lack of grants causes IO hangs in xen_blkfront devices.
Adding this log would help debuging.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/grant-table.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index d6786b87e13b2..2c6a9114d332c 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -42,6 +42,7 @@
 #include <linux/delay.h>
 #include <linux/hardirq.h>
 #include <linux/workqueue.h>
+#include <linux/ratelimit.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
@@ -1072,8 +1073,14 @@ static int gnttab_expand(unsigned int req_entries)
 	cur = nr_grant_frames;
 	extra = ((req_entries + (grefs_per_grant_frame-1)) /
 		 grefs_per_grant_frame);
-	if (cur + extra > gnttab_max_grant_frames())
+	if (cur + extra > gnttab_max_grant_frames()) {
+		pr_warn_ratelimited("xen/grant-table: max_grant_frames reached"
+				    " cur=%u extra=%u limit=%u"
+				    " gnttab_free_count=%u req_entries=%u\n",
+				    cur, extra, gnttab_max_grant_frames(),
+				    gnttab_free_count, req_entries);
 		return -ENOSPC;
+	}
 
 	rc = gnttab_map(cur, cur + extra - 1);
 	if (rc == 0)
-- 
GitLab


From c185ddec54657c145a0c2055e4b87918da24974f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 5 Jul 2017 16:05:20 +0200
Subject: [PATCH 1951/1958] xen/x86: fix cpu hotplug

Commit dc6416f1d711eb4c1726e845d653235dcaae12e1 ("xen/x86: Call
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE) from xen_play_dead()")
introduced an error leading to a stack overflow of the idle task when
a cpu was brought offline/online many times: by calling
cpu_startup_entry() instead of returning at the end of xen_play_dead()
do_idle() would be entered again and again.

Don't use cpu_startup_entry(), but cpuhp_online_idle() instead allowing
to return from xen_play_dead().

Cc: <stable@vger.kernel.org> # 4.12
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/smp_pv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 1ea598e5f0302..51471408fdd13 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -19,6 +19,7 @@
 #include <linux/irq_work.h>
 #include <linux/tick.h>
 #include <linux/nmi.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/paravirt.h>
 #include <asm/desc.h>
@@ -413,7 +414,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
 	 */
 	tick_nohz_idle_enter();
 
-	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+	cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
 }
 
 #else /* !CONFIG_HOTPLUG_CPU */
-- 
GitLab


From 96edd61dcf44362d3ef0bed1a5361e0ac7886a63 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 10 Jul 2017 10:10:45 +0200
Subject: [PATCH 1952/1958] xen/balloon: don't online new memory initially

When setting up the Xenstore watch for the memory target size the new
watch will fire at once. Don't try to reach the configured target size
by onlining new memory in this case, as the current memory size will
be smaller in almost all cases due to e.g. BIOS reserved pages.

Onlining new memory will lead to more problems e.g. undesired conflicts
with NVMe devices meant to be operated as block devices.

Instead remember the difference between target size and current size
when the watch fires for the first time and apply it to any further
size changes, too.

In order to avoid races between balloon.c and xen-balloon.c init calls
do the xen-balloon.c initialization from balloon.c.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/balloon.c     |  3 +++
 drivers/xen/xen-balloon.c | 22 ++++++++++++----------
 include/xen/balloon.h     |  8 ++++++++
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 50dcb68d80707..ab609255a0f35 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -780,6 +780,9 @@ static int __init balloon_init(void)
 	}
 #endif
 
+	/* Init the xen-balloon driver. */
+	xen_balloon_init();
+
 	return 0;
 }
 subsys_initcall(balloon_init);
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index e7715cb62eefc..e89136ab851e3 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -59,6 +59,8 @@ static void watch_target(struct xenbus_watch *watch,
 {
 	unsigned long long new_target;
 	int err;
+	static bool watch_fired;
+	static long target_diff;
 
 	err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
 	if (err != 1) {
@@ -69,7 +71,14 @@ static void watch_target(struct xenbus_watch *watch,
 	/* The given memory/target value is in KiB, so it needs converting to
 	 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
 	 */
-	balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+	new_target >>= PAGE_SHIFT - 10;
+	if (watch_fired) {
+		balloon_set_new_target(new_target - target_diff);
+		return;
+	}
+
+	watch_fired = true;
+	target_diff = new_target - balloon_stats.target_pages;
 }
 static struct xenbus_watch target_watch = {
 	.node = "memory/target",
@@ -94,22 +103,15 @@ static struct notifier_block xenstore_notifier = {
 	.notifier_call = balloon_init_watcher,
 };
 
-static int __init balloon_init(void)
+void xen_balloon_init(void)
 {
-	if (!xen_domain())
-		return -ENODEV;
-
-	pr_info("Initialising balloon driver\n");
-
 	register_balloon(&balloon_dev);
 
 	register_xen_selfballooning(&balloon_dev);
 
 	register_xenstore_notifier(&xenstore_notifier);
-
-	return 0;
 }
-subsys_initcall(balloon_init);
+EXPORT_SYMBOL_GPL(xen_balloon_init);
 
 #define BALLOON_SHOW(name, format, args...)				\
 	static ssize_t show_##name(struct device *dev,			\
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index d1767dfb0d95d..8906361bb50ca 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -35,3 +35,11 @@ static inline int register_xen_selfballooning(struct device *dev)
 	return -ENOSYS;
 }
 #endif
+
+#ifdef CONFIG_XEN_BALLOON
+void xen_balloon_init(void);
+#else
+static inline void xen_balloon_init(void)
+{
+}
+#endif
-- 
GitLab


From f47e07bc5f1a5c48ed60a8ee55352cb4b2bf4d51 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 23 Jul 2017 15:08:05 -0700
Subject: [PATCH 1953/1958] Fix up MAINTAINERS file problems

Prepping for scripting the MAINTAINERS file cleanup (and possible split)
showed a couple of cases where the headers for a couple of entries were
bogus.

There's a few different kinds of bogosities:

 - the X-GENE SOC EDAC case was confused and split over two lines

 - there were four entries for "GREYBUS PROTOCOLS DRIVERS" that were all
   different things.

 - the NOKIA N900 CAMERA SUPPORT" was duplicated

all of which were more obvious when you started doing associative arrays
in perl to track these things by the header (so that we can alphabetize
this thing properly, and so that we might split it up by the data too).

Cc: Joe Perches <joe@perches.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8a266f41e1118..64e4c3545a6d6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4986,7 +4986,6 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/skx_edac.c
 
-EDAC-XGENE
 APPLIED MICRO (APM) X-GENE SOC EDAC
 M:     Loc Ho <lho@apm.com>
 S:     Supported
@@ -5857,7 +5856,7 @@ F:	drivers/staging/greybus/audio_manager_sysfs.c
 F:	drivers/staging/greybus/audio_module.c
 F:	drivers/staging/greybus/audio_topology.c
 
-GREYBUS PROTOCOLS DRIVERS
+GREYBUS SDIO/GPIO/SPI PROTOCOLS DRIVERS
 M:	Rui Miguel Silva <rmfrfs@gmail.com>
 S:	Maintained
 F:	drivers/staging/greybus/sdio.c
@@ -5867,14 +5866,14 @@ F:	drivers/staging/greybus/power_supply.c
 F:	drivers/staging/greybus/spi.c
 F:	drivers/staging/greybus/spilib.c
 
-GREYBUS PROTOCOLS DRIVERS
+GREYBUS LOOBACK/TIME PROTOCOLS DRIVERS
 M:	Bryan O'Donoghue <pure.logic@nexus-software.ie>
 S:	Maintained
 F:	drivers/staging/greybus/loopback.c
 F:	drivers/staging/greybus/timesync.c
 F:	drivers/staging/greybus/timesync_platform.c
 
-GREYBUS PROTOCOLS DRIVERS
+GREYBUS FW/HID/SPI PROTOCOLS DRIVERS
 M:	Viresh Kumar <vireshk@kernel.org>
 S:	Maintained
 F:	drivers/staging/greybus/authentication.c
@@ -5891,7 +5890,7 @@ F:	drivers/staging/greybus/spi.c
 F:	drivers/staging/greybus/spilib.c
 F:	drivers/staging/greybus/spilib.h
 
-GREYBUS PROTOCOLS DRIVERS
+GREYBUS UART PROTOCOLS DRIVERS
 M:	David Lin <dtwlin@gmail.com>
 S:	Maintained
 F:	drivers/staging/greybus/uart.c
@@ -9374,14 +9373,6 @@ S:	Maintained
 F:	drivers/media/i2c/et8ek8
 F:	drivers/media/i2c/ad5820.c
 
-NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS)
-M:	Pavel Machek <pavel@ucw.cz>
-M:	Sakari Ailus <sakari.ailus@iki.fi>
-L:	linux-media@vger.kernel.org
-S:	Maintained
-F:	drivers/media/i2c/et8ek8
-F:	drivers/media/i2c/ad5820.c
-
 NOKIA N900 POWER SUPPLY DRIVERS
 R:	Pali Rohár <pali.rohar@gmail.com>
 F:	include/linux/power/bq2415x_charger.h
-- 
GitLab


From 7683e9e529258d01ce99216ad3be21f59eff83ec Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 23 Jul 2017 16:06:21 -0700
Subject: [PATCH 1954/1958] Properly alphabetize MAINTAINERS file

This adds a perl script to actually parse the MAINTAINERS file, clean up
some whitespace in it, warn about errors in it, and then properly sort
the end result.

My perl-fu is atrocious, so the script has basically been created by
randomly putting various characters in a pile, mixing them around, and
then looking it the end result does anything interesting when used as a
perl script.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS                  | 3585 +++++++++++++++++-----------------
 scripts/parse-maintainers.pl |   77 +
 2 files changed, 1868 insertions(+), 1794 deletions(-)
 create mode 100644 scripts/parse-maintainers.pl

diff --git a/MAINTAINERS b/MAINTAINERS
index 64e4c3545a6d6..f66488dfdbc9c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -205,7 +205,6 @@ F:	include/net/9p/
 F:	include/uapi/linux/virtio_9p.h
 F:	include/trace/events/9p.h
 
-
 A8293 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
@@ -808,6 +807,12 @@ W:	http://blackfin.uclinux.org/
 S:	Supported
 F:	sound/soc/blackfin/*
 
+ANALOG DEVICES INC DMA DRIVERS
+M:	Lars-Peter Clausen <lars@metafoo.de>
+W:	http://ez.analog.com/community/linux-device-drivers
+S:	Supported
+F:	drivers/dma/dma-axi-dmac.c
+
 ANALOG DEVICES INC IIO DRIVERS
 M:	Lars-Peter Clausen <lars@metafoo.de>
 M:	Michael Hennerich <Michael.Hennerich@analog.com>
@@ -820,12 +825,6 @@ X:	drivers/iio/*/adjd*
 F:	drivers/staging/iio/*/ad*
 F:	drivers/staging/iio/trigger/iio-trig-bfin-timer.c
 
-ANALOG DEVICES INC DMA DRIVERS
-M:	Lars-Peter Clausen <lars@metafoo.de>
-W:	http://ez.analog.com/community/linux-device-drivers
-S:	Supported
-F:	drivers/dma/dma-axi-dmac.c
-
 ANDROID CONFIG FRAGMENTS
 M:	Rob Herring <robh@kernel.org>
 S:	Supported
@@ -904,6 +903,18 @@ M:	Duc Dang <dhdang@apm.com>
 S:	Supported
 F:	arch/arm64/boot/dts/apm/
 
+APPLIED MICRO (APM) X-GENE SOC EDAC
+M:	Loc Ho <lho@apm.com>
+S:	Supported
+F:	drivers/edac/xgene_edac.c
+F:	Documentation/devicetree/bindings/edac/apm-xgene-edac.txt
+
+APPLIED MICRO (APM) X-GENE SOC ETHERNET (V2) DRIVER
+M:	Iyappan Subramanian <isubramanian@apm.com>
+M:	Keyur Chudgar <kchudgar@apm.com>
+S:	Supported
+F:	drivers/net/ethernet/apm/xgene-v2/
+
 APPLIED MICRO (APM) X-GENE SOC ETHERNET DRIVER
 M:	Iyappan Subramanian <isubramanian@apm.com>
 M:	Keyur Chudgar <kchudgar@apm.com>
@@ -914,12 +925,6 @@ F:	drivers/net/phy/mdio-xgene.c
 F:	Documentation/devicetree/bindings/net/apm-xgene-enet.txt
 F:	Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
 
-APPLIED MICRO (APM) X-GENE SOC ETHERNET (V2) DRIVER
-M:	Iyappan Subramanian <isubramanian@apm.com>
-M:	Keyur Chudgar <kchudgar@apm.com>
-S:	Supported
-F:	drivers/net/ethernet/apm/xgene-v2/
-
 APPLIED MICRO (APM) X-GENE SOC PMU
 M:	Tai Nguyen <ttnguyen@apm.com>
 S:	Supported
@@ -1004,18 +1009,17 @@ S:	Maintained
 T:	git git://git.armlinux.org.uk/~rmk/linux-arm.git
 F:	arch/arm/
 
-ARM SUB-ARCHITECTURES
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	arch/arm/mach-*/
-F:	arch/arm/plat-*/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc.git
-
 ARM PRIMECELL AACI PL041 DRIVER
 M:	Russell King <linux@armlinux.org.uk>
 S:	Maintained
 F:	sound/arm/aaci.*
 
+ARM PRIMECELL BUS SUPPORT
+M:	Russell King <linux@armlinux.org.uk>
+S:	Maintained
+F:	drivers/amba/
+F:	include/linux/amba/bus.h
+
 ARM PRIMECELL CLCD PL110 DRIVER
 M:	Russell King <linux@armlinux.org.uk>
 S:	Maintained
@@ -1039,11 +1043,22 @@ S:	Maintained
 F:	drivers/tty/serial/amba-pl01*.c
 F:	include/linux/amba/serial.h
 
-ARM PRIMECELL BUS SUPPORT
-M:	Russell King <linux@armlinux.org.uk>
+ARM SMMU DRIVERS
+M:	Will Deacon <will.deacon@arm.com>
+R:	Robin Murphy <robin.murphy@arm.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	drivers/amba/
-F:	include/linux/amba/bus.h
+F:	drivers/iommu/arm-smmu.c
+F:	drivers/iommu/arm-smmu-v3.c
+F:	drivers/iommu/io-pgtable-arm.c
+F:	drivers/iommu/io-pgtable-arm-v7s.c
+
+ARM SUB-ARCHITECTURES
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm/mach-*/
+F:	arch/arm/plat-*/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc.git
 
 ARM/ACTIONS SEMI ARCHITECTURE
 M:	Andreas Färber <afaerber@suse.de>
@@ -1076,6 +1091,11 @@ M:	Lennert Buytenhek <kernel@wantstofly.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
+ARM/Allwinner SoC Clock Support
+M:	Emilio López <emilio@elopez.com.ar>
+S:	Maintained
+F:	drivers/clk/sunxi/
+
 ARM/Allwinner sunXi SoC support
 M:	Maxime Ripard <maxime.ripard@free-electrons.com>
 M:	Chen-Yu Tsai <wens@csie.org>
@@ -1090,10 +1110,15 @@ F:	drivers/pinctrl/sunxi/
 F:	drivers/soc/sunxi/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git
 
-ARM/Allwinner SoC Clock Support
-M:	Emilio López <emilio@elopez.com.ar>
+ARM/Amlogic Meson SoC CLOCK FRAMEWORK
+M:	Neil Armstrong <narmstrong@baylibre.com>
+M:	Jerome Brunet <jbrunet@baylibre.com>
+L:	linux-amlogic@lists.infradead.org
 S:	Maintained
-F:	drivers/clk/sunxi/
+F:	drivers/clk/meson/
+F:	include/dt-bindings/clock/meson*
+F:	include/dt-bindings/clock/gxbb*
+F:	Documentation/devicetree/bindings/clock/amlogic*
 
 ARM/Amlogic Meson SoC support
 M:	Carlo Caione <carlo@caione.org>
@@ -1105,20 +1130,10 @@ S:	Maintained
 F:	arch/arm/mach-meson/
 F:	arch/arm/boot/dts/meson*
 F:	arch/arm64/boot/dts/amlogic/
-F: 	drivers/pinctrl/meson/
+F:	drivers/pinctrl/meson/
 F:	drivers/mmc/host/meson*
 N:	meson
 
-ARM/Amlogic Meson SoC CLOCK FRAMEWORK
-M:	Neil Armstrong <narmstrong@baylibre.com>
-M:	Jerome Brunet <jbrunet@baylibre.com>
-L:	linux-amlogic@lists.infradead.org
-S:	Maintained
-F:	drivers/clk/meson/
-F:	include/dt-bindings/clock/meson*
-F:	include/dt-bindings/clock/gxbb*
-F:	Documentation/devicetree/bindings/clock/amlogic*
-
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:	Tsahee Zidenberg <tsahee@annapurnalabs.com>
 M:	Antoine Tenart <antoine.tenart@free-electrons.com>
@@ -1141,13 +1156,6 @@ F:	drivers/clk/axis
 F:	drivers/pinctrl/pinctrl-artpec*
 F:	Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt
 
-ARM/ASPEED MACHINE SUPPORT
-M:	Joel Stanley <joel@jms.id.au>
-S:	Maintained
-F:	arch/arm/mach-aspeed/
-F:	arch/arm/boot/dts/aspeed-*
-F:	drivers/*/*aspeed*
-
 ARM/ASPEED I2C DRIVER
 M:	Brendan Higgins <brendanhiggins@google.com>
 R:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
@@ -1160,6 +1168,18 @@ F:	drivers/i2c/busses/i2c-aspeed.c
 F:	Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.txt
 F:	Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
 
+ARM/ASPEED MACHINE SUPPORT
+M:	Joel Stanley <joel@jms.id.au>
+S:	Maintained
+F:	arch/arm/mach-aspeed/
+F:	arch/arm/boot/dts/aspeed-*
+F:	drivers/*/*aspeed*
+
+ARM/ATMEL AT91 Clock Support
+M:	Boris Brezillon <boris.brezillon@free-electrons.com>
+S:	Maintained
+F:	drivers/clk/at91
+
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 M:	Alexandre Belloni <alexandre.belloni@free-electrons.com>
@@ -1176,11 +1196,6 @@ F:	arch/arm/boot/dts/sama*.dtsi
 F:	arch/arm/include/debug/at91.S
 F:	drivers/memory/atmel*
 
-ARM/ATMEL AT91 Clock Support
-M:	Boris Brezillon <boris.brezillon@free-electrons.com>
-S:	Maintained
-F:	drivers/clk/at91
-
 ARM/CALXEDA HIGHBANK ARCHITECTURE
 M:	Rob Herring <robh@kernel.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1207,6 +1222,11 @@ L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Odd Fixes
 N:	clps711x
 
+ARM/CIRRUS LOGIC EDB9315A MACHINE SUPPORT
+M:	Lennert Buytenhek <kernel@wantstofly.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+
 ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
 M:	Hartley Sweeten <hsweeten@visionengravers.com>
 M:	Alexander Sverdlin <alexander.sverdlin@gmail.com>
@@ -1215,11 +1235,6 @@ S:	Maintained
 F:	arch/arm/mach-ep93xx/
 F:	arch/arm/mach-ep93xx/include/mach/
 
-ARM/CIRRUS LOGIC EDB9315A MACHINE SUPPORT
-M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-
 ARM/CLKDEV SUPPORT
 M:	Russell King <linux@armlinux.org.uk>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1233,6 +1248,13 @@ M:	Mike Rapoport <mike@compulab.co.il>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
+ARM/CONEXANT DIGICOLOR MACHINE SUPPORT
+M:	Baruch Siach <baruch@tkos.co.il>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm/boot/dts/cx92755*
+N:	digicolor
+
 ARM/CONTEC MICRO9 MACHINE SUPPORT
 M:	Hubert Feurstein <hubert.feurstein@contec.at>
 S:	Maintained
@@ -1278,13 +1300,6 @@ F:	drivers/clocksource/timer-prima2.c
 F:	drivers/clocksource/timer-atlas7.c
 N:	[^a-z]sirf
 
-ARM/CONEXANT DIGICOLOR MACHINE SUPPORT
-M:	Baruch Siach <baruch@tkos.co.il>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	arch/arm/boot/dts/cx92755*
-N:	digicolor
-
 ARM/EBSA110 MACHINE SUPPORT
 M:	Russell King <linux@armlinux.org.uk>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1398,6 +1413,11 @@ L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-pxa/colibri-pxa270-income.c
 
+ARM/INTEL IOP13XX ARM ARCHITECTURE
+M:	Lennert Buytenhek <kernel@wantstofly.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+
 ARM/INTEL IOP32X ARM ARCHITECTURE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1407,11 +1427,6 @@ ARM/INTEL IOP33X ARM ARCHITECTURE
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Orphan
 
-ARM/INTEL IOP13XX ARM ARCHITECTURE
-M:	Lennert Buytenhek <kernel@wantstofly.org>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-
 ARM/INTEL IQ81342EX MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1446,39 +1461,6 @@ M:	Lennert Buytenhek <kernel@wantstofly.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
-ARM/TEXAS INSTRUMENT KEYSTONE ARCHITECTURE
-M:	Santosh Shilimkar <ssantosh@kernel.org>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	arch/arm/mach-keystone/
-F:	arch/arm/boot/dts/keystone-*
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
-
-ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
-M:	Santosh Shilimkar <ssantosh@kernel.org>
-L:	linux-kernel@vger.kernel.org
-S:	Maintained
-F:	drivers/clk/keystone/
-
-ARM/TEXAS INSTRUMENT KEYSTONE ClOCKSOURCE
-M:	Santosh Shilimkar <ssantosh@kernel.org>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:	linux-kernel@vger.kernel.org
-S:	Maintained
-F:	drivers/clocksource/timer-keystone.c
-
-ARM/TEXAS INSTRUMENT KEYSTONE RESET DRIVER
-M:	Santosh Shilimkar <ssantosh@kernel.org>
-L:	linux-kernel@vger.kernel.org
-S:	Maintained
-F:	drivers/power/reset/keystone-reset.c
-
-ARM/TEXAS INSTRUMENT AEMIF/EMIF DRIVERS
-M:	Santosh Shilimkar <ssantosh@kernel.org>
-L:	linux-kernel@vger.kernel.org
-S:	Maintained
-F:	drivers/memory/*emif*
-
 ARM/LG1K ARCHITECTURE
 M:	Chanho Min <chanho.min@lge.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1521,24 +1503,6 @@ ARM/MAGICIAN MACHINE SUPPORT
 M:	Philipp Zabel <philipp.zabel@gmail.com>
 S:	Maintained
 
-ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support
-M:	Jason Cooper <jason@lakedaemon.net>
-M:	Andrew Lunn <andrew@lunn.ch>
-M:	Gregory Clement <gregory.clement@free-electrons.com>
-M:	Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	arch/arm/boot/dts/armada*
-F:	arch/arm/boot/dts/kirkwood*
-F:	arch/arm/configs/mvebu_*_defconfig
-F:	arch/arm/mach-mvebu/
-F:	arch/arm64/boot/dts/marvell/armada*
-F:	drivers/cpufreq/mvebu-cpufreq.c
-F:	drivers/irqchip/irq-armada-370-xp.c
-F:	drivers/irqchip/irq-mvebu-*
-F:	drivers/pinctrl/mvebu/
-F:	drivers/rtc/rtc-armada38x.c
-
 ARM/Marvell Berlin SoC support
 M:	Jisheng Zhang <jszhang@marvell.com>
 M:	Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
@@ -1548,7 +1512,6 @@ F:	arch/arm/mach-berlin/
 F:	arch/arm/boot/dts/berlin*
 F:	arch/arm64/boot/dts/marvell/berlin*
 
-
 ARM/Marvell Dove/MV78xx0/Orion SOC support
 M:	Jason Cooper <jason@lakedaemon.net>
 M:	Andrew Lunn <andrew@lunn.ch>
@@ -1564,24 +1527,23 @@ F:	arch/arm/plat-orion/
 F:	arch/arm/boot/dts/dove*
 F:	arch/arm/boot/dts/orion5x*
 
-
-ARM/Orion SoC/Technologic Systems TS-78xx platform support
-M:	Alexander Clouter <alex@digriz.org.uk>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-W:	http://www.digriz.org.uk/ts78xx/kernel
-S:	Maintained
-F:	arch/arm/mach-orion5x/ts78xx-*
-
-ARM/OXNAS platform support
-M:	Neil Armstrong <narmstrong@baylibre.com>
+ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support
+M:	Jason Cooper <jason@lakedaemon.net>
+M:	Andrew Lunn <andrew@lunn.ch>
+M:	Gregory Clement <gregory.clement@free-electrons.com>
+M:	Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:	linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers)
 S:	Maintained
-F:	arch/arm/mach-oxnas/
-F:	arch/arm/boot/dts/ox8*.dtsi
-F:	arch/arm/boot/dts/wd-mbwe.dts
-F:	arch/arm/boot/dts/cloudengines-pogoplug-series-3.dts
-N:	oxnas
+F:	arch/arm/boot/dts/armada*
+F:	arch/arm/boot/dts/kirkwood*
+F:	arch/arm/configs/mvebu_*_defconfig
+F:	arch/arm/mach-mvebu/
+F:	arch/arm64/boot/dts/marvell/armada*
+F:	drivers/cpufreq/mvebu-cpufreq.c
+F:	drivers/irqchip/irq-armada-370-xp.c
+F:	drivers/irqchip/irq-mvebu-*
+F:	drivers/pinctrl/mvebu/
+F:	drivers/rtc/rtc-armada38x.c
 
 ARM/Mediatek RTC DRIVER
 M:	Eddie Huang <eddie.huang@mediatek.com>
@@ -1636,22 +1598,59 @@ F:	drivers/pinctrl/nomadik/
 F:	drivers/i2c/busses/i2c-nomadik.c
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
 
+ARM/NUVOTON W90X900 ARM ARCHITECTURE
+M:	Wan ZongShun <mcuos.com@gmail.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+W:	http://www.mcuos.com
+S:	Maintained
+F:	arch/arm/mach-w90x900/
+F:	drivers/input/keyboard/w90p910_keypad.c
+F:	drivers/input/touchscreen/w90p910_ts.c
+F:	drivers/watchdog/nuc900_wdt.c
+F:	drivers/net/ethernet/nuvoton/w90p910_ether.c
+F:	drivers/mtd/nand/nuc900_nand.c
+F:	drivers/rtc/rtc-nuc900.c
+F:	drivers/spi/spi-nuc900.c
+F:	drivers/usb/host/ehci-w90x900.c
+F:	drivers/video/fbdev/nuc900fb.c
+
 ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT
 M:	Nelson Castillo <arhuaco@freaks-unidos.net>
 L:	openmoko-kernel@lists.openmoko.org (subscribers-only)
 W:	http://wiki.openmoko.org/wiki/Neo_FreeRunner
 S:	Supported
 
-ARM/TOSA MACHINE SUPPORT
-M:	Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
-M:	Dirk Opfer <dirk@opfer-online.de>
+ARM/Orion SoC/Technologic Systems TS-78xx platform support
+M:	Alexander Clouter <alex@digriz.org.uk>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+W:	http://www.digriz.org.uk/ts78xx/kernel
 S:	Maintained
+F:	arch/arm/mach-orion5x/ts78xx-*
 
-ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT
-M:	Marek Vasut <marek.vasut@gmail.com>
-L:	linux-arm-kernel@lists.infradead.org
-W:	http://hackndev.com
-S:	Maintained
+ARM/OXNAS platform support
+M:	Neil Armstrong <narmstrong@baylibre.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm/mach-oxnas/
+F:	arch/arm/boot/dts/ox8*.dtsi
+F:	arch/arm/boot/dts/wd-mbwe.dts
+F:	arch/arm/boot/dts/cloudengines-pogoplug-series-3.dts
+N:	oxnas
+
+ARM/PALM TREO SUPPORT
+M:	Tomas Cech <sleep_walker@suse.com>
+L:	linux-arm-kernel@lists.infradead.org
+W:	http://hackndev.com
+S:	Maintained
+F:	arch/arm/mach-pxa/include/mach/palmtreo.h
+F:	arch/arm/mach-pxa/palmtreo.c
+
+ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT
+M:	Marek Vasut <marek.vasut@gmail.com>
+L:	linux-arm-kernel@lists.infradead.org
+W:	http://hackndev.com
+S:	Maintained
 F:	arch/arm/mach-pxa/include/mach/palmtx.h
 F:	arch/arm/mach-pxa/palmtx.c
 F:	arch/arm/mach-pxa/include/mach/palmt5.h
@@ -1663,14 +1662,6 @@ F:	arch/arm/mach-pxa/palmte2.c
 F:	arch/arm/mach-pxa/include/mach/palmtc.h
 F:	arch/arm/mach-pxa/palmtc.c
 
-ARM/PALM TREO SUPPORT
-M:	Tomas Cech <sleep_walker@suse.com>
-L:	linux-arm-kernel@lists.infradead.org
-W:	http://hackndev.com
-S:	Maintained
-F:	arch/arm/mach-pxa/include/mach/palmtreo.h
-F:	arch/arm/mach-pxa/palmtreo.c
-
 ARM/PALMZ72 SUPPORT
 M:	Sergey Lapin <slapin@ossfans.org>
 L:	linux-arm-kernel@lists.infradead.org
@@ -1811,17 +1802,6 @@ L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/platform/s5p-g2d/
 
-ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
-M:	Kyungmin Park <kyungmin.park@samsung.com>
-M:	Kamil Debski <kamil@wypas.org>
-M:	Jeongtae Park <jtp.park@samsung.com>
-M:	Andrzej Hajda <a.hajda@samsung.com>
-L:	linux-arm-kernel@lists.infradead.org
-L:	linux-media@vger.kernel.org
-S:	Maintained
-F:	arch/arm/plat-samsung/s5p-dev-mfc.c
-F:	drivers/media/platform/s5p-mfc/
-
 ARM/SAMSUNG S5P SERIES HDMI CEC SUBSYSTEM SUPPORT
 M:	Marek Szyprowski <m.szyprowski@samsung.com>
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
@@ -1838,6 +1818,17 @@ L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/platform/s5p-jpeg/
 
+ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
+M:	Kyungmin Park <kyungmin.park@samsung.com>
+M:	Kamil Debski <kamil@wypas.org>
+M:	Jeongtae Park <jtp.park@samsung.com>
+M:	Andrzej Hajda <a.hajda@samsung.com>
+L:	linux-arm-kernel@lists.infradead.org
+L:	linux-media@vger.kernel.org
+S:	Maintained
+F:	arch/arm/plat-samsung/s5p-dev-mfc.c
+F:	drivers/media/platform/s5p-mfc/
+
 ARM/SHMOBILE ARM ARCHITECTURE
 M:	Simon Horman <horms@verge.net.au>
 M:	Magnus Damm <magnus.damm@gmail.com>
@@ -1931,26 +1922,48 @@ M:	"Mark F. Brown" <mark.brown314@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
+ARM/TEXAS INSTRUMENT AEMIF/EMIF DRIVERS
+M:	Santosh Shilimkar <ssantosh@kernel.org>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/memory/*emif*
+
+ARM/TEXAS INSTRUMENT KEYSTONE ARCHITECTURE
+M:	Santosh Shilimkar <ssantosh@kernel.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm/mach-keystone/
+F:	arch/arm/boot/dts/keystone-*
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
+
+ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
+M:	Santosh Shilimkar <ssantosh@kernel.org>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/clk/keystone/
+
+ARM/TEXAS INSTRUMENT KEYSTONE ClOCKSOURCE
+M:	Santosh Shilimkar <ssantosh@kernel.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/clocksource/timer-keystone.c
+
+ARM/TEXAS INSTRUMENT KEYSTONE RESET DRIVER
+M:	Santosh Shilimkar <ssantosh@kernel.org>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/power/reset/keystone-reset.c
+
 ARM/THECUS N2100 MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
-ARM/NUVOTON W90X900 ARM ARCHITECTURE
-M:	Wan ZongShun <mcuos.com@gmail.com>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-W:	http://www.mcuos.com
+ARM/TOSA MACHINE SUPPORT
+M:	Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+M:	Dirk Opfer <dirk@opfer-online.de>
 S:	Maintained
-F:	arch/arm/mach-w90x900/
-F:	drivers/input/keyboard/w90p910_keypad.c
-F:	drivers/input/touchscreen/w90p910_ts.c
-F:	drivers/watchdog/nuc900_wdt.c
-F:	drivers/net/ethernet/nuvoton/w90p910_ether.c
-F:	drivers/mtd/nand/nuc900_nand.c
-F:	drivers/rtc/rtc-nuc900.c
-F:	drivers/spi/spi-nuc900.c
-F:	drivers/usb/host/ehci-w90x900.c
-F:	drivers/video/fbdev/nuc900fb.c
 
 ARM/U300 MACHINE SUPPORT
 M:	Linus Walleij <linus.walleij@linaro.org>
@@ -2095,16 +2108,6 @@ F:	drivers/i2c/busses/i2c-cadence.c
 F:	drivers/mmc/host/sdhci-of-arasan.c
 F:	drivers/edac/synopsys_edac.c
 
-ARM SMMU DRIVERS
-M:	Will Deacon <will.deacon@arm.com>
-R:	Robin Murphy <robin.murphy@arm.com>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	drivers/iommu/arm-smmu.c
-F:	drivers/iommu/arm-smmu-v3.c
-F:	drivers/iommu/io-pgtable-arm.c
-F:	drivers/iommu/io-pgtable-arm-v7s.c
-
 ARM64 PORT (AARCH64 ARCHITECTURE)
 M:	Catalin Marinas <catalin.marinas@arm.com>
 M:	Will Deacon <will.deacon@arm.com>
@@ -2256,25 +2259,12 @@ M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 S:	Supported
 F:	drivers/power/reset/at91-sama5d2_shdwc.c
 
-ATMEL SAMA5D2 ADC DRIVER
-M:	Ludovic Desroches <ludovic.desroches@microchip.com>
-L:	linux-iio@vger.kernel.org
-S:	Supported
-F:	drivers/iio/adc/at91-sama5d2_adc.c
-
 ATMEL Audio ALSA driver
 M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Supported
 F:	sound/soc/atmel
 
-ATMEL XDMA DRIVER
-M:	Ludovic Desroches <ludovic.desroches@microchip.com>
-L:	linux-arm-kernel@lists.infradead.org
-L:	dmaengine@vger.kernel.org
-S:	Supported
-F:	drivers/dma/at_xdmac.c
-
 ATMEL I2C DRIVER
 M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 L:	linux-i2c@vger.kernel.org
@@ -2300,6 +2290,14 @@ M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 S:	Supported
 F:	drivers/net/ethernet/cadence/
 
+ATMEL MAXTOUCH DRIVER
+M:	Nick Dyer <nick@shmanahar.org>
+T:	git git://github.com/ndyer/linux.git
+S:	Maintained
+F:	Documentation/devicetree/bindings/input/atmel,maxtouch.txt
+F:	drivers/input/touchscreen/atmel_mxt_ts.c
+F:	include/linux/platform_data/atmel_mxt_ts.h
+
 ATMEL NAND DRIVER
 M:	Wenyou Yang <wenyou.yang@atmel.com>
 M:	Josh Wu <rainyfeeling@outlook.com>
@@ -2307,6 +2305,12 @@ L:	linux-mtd@lists.infradead.org
 S:	Supported
 F:	drivers/mtd/nand/atmel/*
 
+ATMEL SAMA5D2 ADC DRIVER
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
+L:	linux-iio@vger.kernel.org
+S:	Supported
+F:	drivers/iio/adc/at91-sama5d2_adc.c
+
 ATMEL SDMMC DRIVER
 M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 L:	linux-mmc@vger.kernel.org
@@ -2346,13 +2350,12 @@ W:	http://atmelwlandriver.sourceforge.net/
 S:	Maintained
 F:	drivers/net/wireless/atmel/atmel*
 
-ATMEL MAXTOUCH DRIVER
-M:	Nick Dyer <nick@shmanahar.org>
-T:	git git://github.com/ndyer/linux.git
-S:	Maintained
-F:	Documentation/devicetree/bindings/input/atmel,maxtouch.txt
-F:	drivers/input/touchscreen/atmel_mxt_ts.c
-F:	include/linux/platform_data/atmel_mxt_ts.h
+ATMEL XDMA DRIVER
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
+L:	linux-arm-kernel@lists.infradead.org
+L:	dmaengine@vger.kernel.org
+S:	Supported
+F:	drivers/dma/at_xdmac.c
 
 ATOMIC INFRASTRUCTURE
 M:	Will Deacon <will.deacon@arm.com>
@@ -2406,13 +2409,6 @@ F:	include/uapi/linux/ax25.h
 F:	include/net/ax25.h
 F:	net/ax25/
 
-AXENTIA ASOC DRIVERS
-M:	Peter Rosin <peda@axentia.se>
-L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
-S:	Maintained
-F:	Documentation/devicetree/bindings/sound/axentia,*
-F:	sound/soc/atmel/tse850-pcm5142.c
-
 AXENTIA ARM DEVICES
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -2421,6 +2417,13 @@ F:	Documentation/devicetree/bindings/arm/axentia.txt
 F:	arch/arm/boot/dts/at91-linea.dtsi
 F:	arch/arm/boot/dts/at91-tse850-3.dts
 
+AXENTIA ASOC DRIVERS
+M:	Peter Rosin <peda@axentia.se>
+L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/sound/axentia,*
+F:	sound/soc/atmel/tse850-pcm5142.c
+
 AZ6007 DVB DRIVER
 M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:	Mauro Carvalho Chehab <mchehab@kernel.org>
@@ -2543,6 +2546,22 @@ W:	http://blackfin.uclinux.org
 S:	Supported
 F:	drivers/net/ethernet/adi/
 
+BLACKFIN I2C TWI DRIVER
+M:	Sonic Zhang <sonic.zhang@analog.com>
+L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
+W:	http://blackfin.uclinux.org/
+S:	Supported
+F:	drivers/i2c/busses/i2c-bfin-twi.c
+
+BLACKFIN MEDIA DRIVER
+M:	Scott Jiang <scott.jiang.linux@gmail.com>
+L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
+W:	http://blackfin.uclinux.org/
+S:	Supported
+F:	drivers/media/platform/blackfin/
+F:	drivers/media/i2c/adv7183*
+F:	drivers/media/i2c/vs6624*
+
 BLACKFIN RTC DRIVER
 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	http://blackfin.uclinux.org
@@ -2569,22 +2588,6 @@ W:	http://blackfin.uclinux.org
 S:	Supported
 F:	drivers/watchdog/bfin_wdt.c
 
-BLACKFIN I2C TWI DRIVER
-M:	Sonic Zhang <sonic.zhang@analog.com>
-L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
-W:	http://blackfin.uclinux.org/
-S:	Supported
-F:	drivers/i2c/busses/i2c-bfin-twi.c
-
-BLACKFIN MEDIA DRIVER
-M:	Scott Jiang <scott.jiang.linux@gmail.com>
-L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
-W:	http://blackfin.uclinux.org/
-S:	Supported
-F:	drivers/media/platform/blackfin/
-F:	drivers/media/i2c/adv7183*
-F:	drivers/media/i2c/vs6624*
-
 BLINKM RGB LED DRIVER
 M:	Jan-Simon Moeller <jansimon.moeller@gmx.de>
 S:	Maintained
@@ -2628,21 +2631,6 @@ S:	Maintained
 F:	net/bluetooth/
 F:	include/net/bluetooth/
 
-DMA MAPPING HELPERS
-M:	Christoph Hellwig <hch@lst.de>
-M:	Marek Szyprowski <m.szyprowski@samsung.com>
-R:	Robin Murphy <robin.murphy@arm.com>
-L:	linux-kernel@vger.kernel.org
-T:	git git://git.infradead.org/users/hch/dma-mapping.git
-W:	http://git.infradead.org/users/hch/dma-mapping.git
-S:	Supported
-F:	lib/dma-debug.c
-F:	lib/dma-noop.c
-F:	lib/dma-virt.c
-F:	drivers/base/dma-mapping.c
-F:	drivers/base/dma-coherent.c
-F:	include/linux/dma-mapping.h
-
 BONDING DRIVER
 M:	Jay Vosburgh <j.vosburgh@gmail.com>
 M:	Veaceslav Falico <vfalico@gmail.com>
@@ -2690,35 +2678,6 @@ S:	Supported
 F:	drivers/net/dsa/b53/*
 F:	include/linux/platform_data/b53.h
 
-BROADCOM GENET ETHERNET DRIVER
-M:	Florian Fainelli <f.fainelli@gmail.com>
-L:	netdev@vger.kernel.org
-S:	Supported
-F:	drivers/net/ethernet/broadcom/genet/
-
-BROADCOM BNX2 GIGABIT ETHERNET DRIVER
-M:	Rasesh Mody <rasesh.mody@cavium.com>
-M:	Harish Patil <harish.patil@cavium.com>
-M:	Dept-GELinuxNICDev@cavium.com
-L:	netdev@vger.kernel.org
-S:	Supported
-F:	drivers/net/ethernet/broadcom/bnx2.*
-F:	drivers/net/ethernet/broadcom/bnx2_*
-
-BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:	Yuval Mintz <Yuval.Mintz@cavium.com>
-M:	Ariel Elior <ariel.elior@cavium.com>
-M:	everest-linux-l2@cavium.com
-L:	netdev@vger.kernel.org
-S:	Supported
-F:	drivers/net/ethernet/broadcom/bnx2x/
-
-BROADCOM BNXT_EN 50 GIGABIT ETHERNET DRIVER
-M:	Michael Chan <michael.chan@broadcom.com>
-L:	netdev@vger.kernel.org
-S:	Supported
-F:	drivers/net/ethernet/broadcom/bnxt/
-
 BROADCOM BCM281XX/BCM11XXX/BCM216XX ARM ARCHITECTURE
 M:	Florian Fainelli <f.fainelli@gmail.com>
 M:	Ray Jui <rjui@broadcom.com>
@@ -2797,6 +2756,13 @@ F:	arch/arm/boot/dts/bcm7*.dts*
 F:	drivers/bus/brcmstb_gisb.c
 N:	brcmstb
 
+BROADCOM BMIPS CPUFREQ DRIVER
+M:	Markus Mayer <mmayer@broadcom.com>
+M:	bcm-kernel-feedback-list@broadcom.com
+L:	linux-pm@vger.kernel.org
+S:	Maintained
+F:	drivers/cpufreq/bmips-cpufreq.c
+
 BROADCOM BMIPS MIPS ARCHITECTURE
 M:	Kevin Cernekee <cernekee@gmail.com>
 M:	Florian Fainelli <f.fainelli@gmail.com>
@@ -2813,20 +2779,40 @@ F:	drivers/irqchip/irq-brcmstb*
 F:	include/linux/bcm963xx_nvram.h
 F:	include/linux/bcm963xx_tag.h
 
-BROADCOM BMIPS CPUFREQ DRIVER
-M:	Markus Mayer <mmayer@broadcom.com>
-M:	bcm-kernel-feedback-list@broadcom.com
-L:	linux-pm@vger.kernel.org
-S:	Maintained
-F:	drivers/cpufreq/bmips-cpufreq.c
+BROADCOM BNX2 GIGABIT ETHERNET DRIVER
+M:	Rasesh Mody <rasesh.mody@cavium.com>
+M:	Harish Patil <harish.patil@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/broadcom/bnx2.*
+F:	drivers/net/ethernet/broadcom/bnx2_*
 
-BROADCOM TG3 GIGABIT ETHERNET DRIVER
-M:	Siva Reddy Kallam <siva.kallam@broadcom.com>
-M:	Prashant Sreedharan <prashant@broadcom.com>
-M:	Michael Chan <mchan@broadcom.com>
+BROADCOM BNX2FC 10 GIGABIT FCOE DRIVER
+M:	QLogic-Storage-Upstream@qlogic.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/bnx2fc/
+
+BROADCOM BNX2I 1/10 GIGABIT iSCSI DRIVER
+M:	QLogic-Storage-Upstream@qlogic.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/bnx2i/
+
+BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
+M:	Yuval Mintz <Yuval.Mintz@cavium.com>
+M:	Ariel Elior <ariel.elior@cavium.com>
+M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	drivers/net/ethernet/broadcom/tg3.*
+F:	drivers/net/ethernet/broadcom/bnx2x/
+
+BROADCOM BNXT_EN 50 GIGABIT ETHERNET DRIVER
+M:	Michael Chan <michael.chan@broadcom.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/broadcom/bnxt/
 
 BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
 M:	Arend van Spriel <arend.vanspriel@broadcom.com>
@@ -2840,17 +2826,18 @@ L:	brcm80211-dev-list@cypress.com
 S:	Supported
 F:	drivers/net/wireless/broadcom/brcm80211/
 
-BROADCOM BNX2FC 10 GIGABIT FCOE DRIVER
-M:	QLogic-Storage-Upstream@qlogic.com
-L:	linux-scsi@vger.kernel.org
+BROADCOM BRCMSTB GPIO DRIVER
+M:	Gregory Fong <gregory.0xf0@gmail.com>
+L:	bcm-kernel-feedback-list@broadcom.com
 S:	Supported
-F:	drivers/scsi/bnx2fc/
+F:	drivers/gpio/gpio-brcmstb.c
+F:	Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
 
-BROADCOM BNX2I 1/10 GIGABIT iSCSI DRIVER
-M:	QLogic-Storage-Upstream@qlogic.com
-L:	linux-scsi@vger.kernel.org
+BROADCOM GENET ETHERNET DRIVER
+M:	Florian Fainelli <f.fainelli@gmail.com>
+L:	netdev@vger.kernel.org
 S:	Supported
-F:	drivers/scsi/bnx2i/
+F:	drivers/net/ethernet/broadcom/genet/
 
 BROADCOM IPROC ARM ARCHITECTURE
 M:	Ray Jui <rjui@broadcom.com>
@@ -2877,13 +2864,6 @@ F:	arch/arm64/boot/dts/broadcom/ns2*
 F:	drivers/clk/bcm/clk-ns*
 F:	drivers/pinctrl/bcm/pinctrl-ns*
 
-BROADCOM BRCMSTB GPIO DRIVER
-M:	Gregory Fong <gregory.0xf0@gmail.com>
-L:	bcm-kernel-feedback-list@broadcom.com
-S:	Supported
-F:	drivers/gpio/gpio-brcmstb.c
-F:	Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
-
 BROADCOM KONA GPIO DRIVER
 M:	Ray Jui <rjui@broadcom.com>
 L:	bcm-kernel-feedback-list@broadcom.com
@@ -2891,19 +2871,29 @@ S:	Supported
 F:	drivers/gpio/gpio-bcm-kona.c
 F:	Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
 
+BROADCOM NETXTREME-E ROCE DRIVER
+M:	Selvin Xavier <selvin.xavier@broadcom.com>
+M:	Devesh Sharma <devesh.sharma@broadcom.com>
+M:	Somnath Kotur <somnath.kotur@broadcom.com>
+M:	Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
+L:	linux-rdma@vger.kernel.org
+W:	http://www.broadcom.com
+S:	Supported
+F:	drivers/infiniband/hw/bnxt_re/
+F:	include/uapi/rdma/bnxt_re-abi.h
+
 BROADCOM NVRAM DRIVER
 M:	Rafał Miłecki <zajec5@gmail.com>
 L:	linux-mips@linux-mips.org
 S:	Maintained
 F:	drivers/firmware/broadcom/*
 
-BROADCOM STB NAND FLASH DRIVER
-M:	Brian Norris <computersforpeace@gmail.com>
-M:	Kamal Dasu <kdasu.kdev@gmail.com>
-L:	linux-mtd@lists.infradead.org
-L:	bcm-kernel-feedback-list@broadcom.com
+BROADCOM SPECIFIC AMBA DRIVER (BCMA)
+M:	Rafał Miłecki <zajec5@gmail.com>
+L:	linux-wireless@vger.kernel.org
 S:	Maintained
-F:	drivers/mtd/nand/brcmnand/
+F:	drivers/bcma/
+F:	include/linux/bcma/
 
 BROADCOM STB AVS CPUFREQ DRIVER
 M:	Markus Mayer <mmayer@broadcom.com>
@@ -2913,12 +2903,13 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt
 F:	drivers/cpufreq/brcmstb*
 
-BROADCOM SPECIFIC AMBA DRIVER (BCMA)
-M:	Rafał Miłecki <zajec5@gmail.com>
-L:	linux-wireless@vger.kernel.org
+BROADCOM STB NAND FLASH DRIVER
+M:	Brian Norris <computersforpeace@gmail.com>
+M:	Kamal Dasu <kdasu.kdev@gmail.com>
+L:	linux-mtd@lists.infradead.org
+L:	bcm-kernel-feedback-list@broadcom.com
 S:	Maintained
-F:	drivers/bcma/
-F:	include/linux/bcma/
+F:	drivers/mtd/nand/brcmnand/
 
 BROADCOM SYSTEMPORT ETHERNET DRIVER
 M:	Florian Fainelli <f.fainelli@gmail.com>
@@ -2926,16 +2917,13 @@ L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bcmsysport.*
 
-BROADCOM NETXTREME-E ROCE DRIVER
-M:	Selvin Xavier <selvin.xavier@broadcom.com>
-M:	Devesh Sharma <devesh.sharma@broadcom.com>
-M:	Somnath Kotur <somnath.kotur@broadcom.com>
-M:	Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
-L:	linux-rdma@vger.kernel.org
-W:	http://www.broadcom.com
+BROADCOM TG3 GIGABIT ETHERNET DRIVER
+M:	Siva Reddy Kallam <siva.kallam@broadcom.com>
+M:	Prashant Sreedharan <prashant@broadcom.com>
+M:	Michael Chan <mchan@broadcom.com>
+L:	netdev@vger.kernel.org
 S:	Supported
-F:	drivers/infiniband/hw/bnxt_re/
-F:	include/uapi/rdma/bnxt_re-abi.h
+F:	drivers/net/ethernet/broadcom/tg3.*
 
 BROCADE BFA FC SCSI DRIVER
 M:	Anil Gurumurthy <anil.gurumurthy@qlogic.com>
@@ -3081,6 +3069,21 @@ F:	arch/x86/kernel/tce_64.c
 F:	arch/x86/include/asm/calgary.h
 F:	arch/x86/include/asm/tce.h
 
+CAN NETWORK DRIVERS
+M:	Wolfgang Grandegger <wg@grandegger.com>
+M:	Marc Kleine-Budde <mkl@pengutronix.de>
+L:	linux-can@vger.kernel.org
+W:	https://github.com/linux-can
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
+S:	Maintained
+F:	Documentation/devicetree/bindings/net/can/
+F:	drivers/net/can/
+F:	include/linux/can/dev.h
+F:	include/linux/can/platform/
+F:	include/uapi/linux/can/error.h
+F:	include/uapi/linux/can/netlink.h
+
 CAN NETWORK LAYER
 M:	Oliver Hartkopp <socketcan@hartkopp.net>
 M:	Marc Kleine-Budde <mkl@pengutronix.de>
@@ -3097,21 +3100,6 @@ F:	include/uapi/linux/can/bcm.h
 F:	include/uapi/linux/can/raw.h
 F:	include/uapi/linux/can/gw.h
 
-CAN NETWORK DRIVERS
-M:	Wolfgang Grandegger <wg@grandegger.com>
-M:	Marc Kleine-Budde <mkl@pengutronix.de>
-L:	linux-can@vger.kernel.org
-W:	https://github.com/linux-can
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
-S:	Maintained
-F:	Documentation/devicetree/bindings/net/can/
-F:	drivers/net/can/
-F:	include/linux/can/dev.h
-F:	include/linux/can/platform/
-F:	include/uapi/linux/can/error.h
-F:	include/uapi/linux/can/netlink.h
-
 CAPABILITIES
 M:	Serge Hallyn <serge@hallyn.com>
 L:	linux-security-module@vger.kernel.org
@@ -3133,13 +3121,6 @@ W:	http://wireless.kernel.org/en/users/Drivers/carl9170
 S:	Maintained
 F:	drivers/net/wireless/ath/carl9170/
 
-CAVIUM THUNDERX2 ARM64 SOC
-M:	Jayachandran C <jnair@caviumnetworks.com>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	arch/arm64/boot/dts/cavium/thunder2-99xx*
-F:	Documentation/devicetree/bindings/arm/cavium-thunder2.txt
-
 CAVIUM I2C DRIVER
 M:	Jan Glauber <jglauber@cavium.com>
 M:	David Daney <david.daney@cavium.com>
@@ -3148,6 +3129,16 @@ S:	Supported
 F:	drivers/i2c/busses/i2c-octeon*
 F:	drivers/i2c/busses/i2c-thunderx*
 
+CAVIUM LIQUIDIO NETWORK DRIVER
+M:	Derek Chickles <derek.chickles@caviumnetworks.com>
+M:	Satanand Burla <satananda.burla@caviumnetworks.com>
+M:	Felix Manlunas <felix.manlunas@caviumnetworks.com>
+M:	Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
+L:	netdev@vger.kernel.org
+W:	http://www.cavium.com
+S:	Supported
+F:	drivers/net/ethernet/cavium/liquidio/
+
 CAVIUM MMC DRIVER
 M:	Jan Glauber <jglauber@cavium.com>
 M:	David Daney <david.daney@cavium.com>
@@ -3156,16 +3147,6 @@ W:	http://www.cavium.com
 S:	Supported
 F:	drivers/mmc/host/cavium*
 
-CAVIUM LIQUIDIO NETWORK DRIVER
-M:     Derek Chickles <derek.chickles@caviumnetworks.com>
-M:     Satanand Burla <satananda.burla@caviumnetworks.com>
-M:     Felix Manlunas <felix.manlunas@caviumnetworks.com>
-M:     Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
-L:     netdev@vger.kernel.org
-W:     http://www.cavium.com
-S:     Supported
-F:     drivers/net/ethernet/cavium/liquidio/
-
 CAVIUM OCTEON-TX CRYPTO DRIVER
 M:	George Cherian <george.cherian@cavium.com>
 L:	linux-crypto@vger.kernel.org
@@ -3173,6 +3154,13 @@ W:	http://www.cavium.com
 S:	Supported
 F:	drivers/crypto/cavium/cpt/
 
+CAVIUM THUNDERX2 ARM64 SOC
+M:	Jayachandran C <jnair@caviumnetworks.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm64/boot/dts/cavium/thunder2-99xx*
+F:	Documentation/devicetree/bindings/arm/cavium-thunder2.txt
+
 CC2520 IEEE-802.15.4 RADIO DRIVER
 M:	Varka Bhadram <varkabhadram@gmail.com>
 L:	linux-wpan@vger.kernel.org
@@ -3332,12 +3320,6 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bleung/chrome-platform.git
 F:	drivers/platform/chrome/
 
-CIRRUS LOGIC EP93XX ETHERNET DRIVER
-M:	Hartley Sweeten <hsweeten@visionengravers.com>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/cirrus/ep93xx_eth.c
-
 CIRRUS LOGIC AUDIO CODEC DRIVERS
 M:	Brian Austin <brian.austin@cirrus.com>
 M:	Paul Handrigan <Paul.Handrigan@cirrus.com>
@@ -3345,6 +3327,12 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Maintained
 F:	sound/soc/codecs/cs*
 
+CIRRUS LOGIC EP93XX ETHERNET DRIVER
+M:	Hartley Sweeten <hsweeten@visionengravers.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/cirrus/ep93xx_eth.c
+
 CISCO FCOE HBA DRIVER
 M:	Satish Kharat <satishkh@cisco.com>
 M:	Sesidhar Baddela <sebaddel@cisco.com>
@@ -3469,17 +3457,17 @@ L:	linux-pci@vger.kernel.org
 S:	Maintained
 F:	drivers/pci/hotplug/cpci_hotplug*
 
-COMPACTPCI HOTPLUG ZIATECH ZT5550 DRIVER
+COMPACTPCI HOTPLUG GENERIC DRIVER
 M:	Scott Murray <scott@spiteful.org>
 L:	linux-pci@vger.kernel.org
 S:	Maintained
-F:	drivers/pci/hotplug/cpcihp_zt5550.*
+F:	drivers/pci/hotplug/cpcihp_generic.c
 
-COMPACTPCI HOTPLUG GENERIC DRIVER
+COMPACTPCI HOTPLUG ZIATECH ZT5550 DRIVER
 M:	Scott Murray <scott@spiteful.org>
 L:	linux-pci@vger.kernel.org
 S:	Maintained
-F:	drivers/pci/hotplug/cpcihp_generic.c
+F:	drivers/pci/hotplug/cpcihp_zt5550.*
 
 COMPAL LAPTOP SUPPORT
 M:	Cezary Jackiewicz <cezary.jackiewicz@gmail.com>
@@ -3752,6 +3740,13 @@ S:	Supported
 F:	drivers/infiniband/hw/cxgb3/
 F:	include/uapi/rdma/cxgb3-abi.h
 
+CXGB4 CRYPTO DRIVER (chcr)
+M:	Harsh Jain <harsh@chelsio.com>
+L:	linux-crypto@vger.kernel.org
+W:	http://www.chelsio.com
+S:	Supported
+F:	drivers/crypto/chelsio
+
 CXGB4 ETHERNET DRIVER (CXGB4)
 M:	Ganesh Goudar <ganeshgr@chelsio.com>
 L:	netdev@vger.kernel.org
@@ -3774,13 +3769,6 @@ S:	Supported
 F:	drivers/infiniband/hw/cxgb4/
 F:	include/uapi/rdma/cxgb4-abi.h
 
-CXGB4 CRYPTO DRIVER (chcr)
-M:	Harsh Jain <harsh@chelsio.com>
-L:	linux-crypto@vger.kernel.org
-W:	http://www.chelsio.com
-S:	Supported
-F:	drivers/crypto/chelsio
-
 CXGB4VF ETHERNET DRIVER (CXGB4VF)
 M:	Casey Leedom <leedom@chelsio.com>
 L:	netdev@vger.kernel.org
@@ -3933,15 +3921,15 @@ L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/dell-laptop.c
 
-DELL LAPTOP RBTN DRIVER
+DELL LAPTOP FREEFALL DRIVER
 M:	Pali Rohár <pali.rohar@gmail.com>
 S:	Maintained
-F:	drivers/platform/x86/dell-rbtn.*
+F:	drivers/platform/x86/dell-smo8800.c
 
-DELL LAPTOP FREEFALL DRIVER
+DELL LAPTOP RBTN DRIVER
 M:	Pali Rohár <pali.rohar@gmail.com>
 S:	Maintained
-F:	drivers/platform/x86/dell-smo8800.c
+F:	drivers/platform/x86/dell-rbtn.*
 
 DELL LAPTOP SMM DRIVER
 M:	Pali Rohár <pali.rohar@gmail.com>
@@ -3961,12 +3949,6 @@ M:	Pali Rohár <pali.rohar@gmail.com>
 S:	Maintained
 F:	drivers/platform/x86/dell-wmi.c
 
-DENALI NAND DRIVER
-M:	Masahiro Yamada <yamada.masahiro@socionext.com>
-L:	linux-mtd@lists.infradead.org
-S:	Supported
-F:	drivers/mtd/nand/denali*
-
 DELTA ST MEDIA DRIVER
 M:	Hugues Fruchet <hugues.fruchet@st.com>
 L:	linux-media@vger.kernel.org
@@ -3975,6 +3957,12 @@ W:	https://linuxtv.org
 S:	Supported
 F:	drivers/media/platform/sti/delta
 
+DENALI NAND DRIVER
+M:	Masahiro Yamada <yamada.masahiro@socionext.com>
+L:	linux-mtd@lists.infradead.org
+S:	Supported
+F:	drivers/mtd/nand/denali*
+
 DESIGNWARE USB2 DRD IP DRIVER
 M:	John Youn <johnyoun@synopsys.com>
 L:	linux-usb@vger.kernel.org
@@ -4183,6 +4171,21 @@ F:	Documentation/devicetree/bindings/dma/
 F:	Documentation/dmaengine/
 T:	git git://git.infradead.org/users/vkoul/slave-dma.git
 
+DMA MAPPING HELPERS
+M:	Christoph Hellwig <hch@lst.de>
+M:	Marek Szyprowski <m.szyprowski@samsung.com>
+R:	Robin Murphy <robin.murphy@arm.com>
+L:	linux-kernel@vger.kernel.org
+T:	git git://git.infradead.org/users/hch/dma-mapping.git
+W:	http://git.infradead.org/users/hch/dma-mapping.git
+S:	Supported
+F:	lib/dma-debug.c
+F:	lib/dma-noop.c
+F:	lib/dma-virt.c
+F:	drivers/base/dma-mapping.c
+F:	drivers/base/dma-coherent.c
+F:	include/linux/dma-mapping.h
+
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
 L:	linux-hwmon@vger.kernel.org
@@ -4213,6 +4216,13 @@ X:	Documentation/spi
 X:	Documentation/media
 T:	git git://git.lwn.net/linux.git docs-next
 
+DONGWOON DW9714 LENS VOICE COIL DRIVER
+M:	Sakari Ailus <sakari.ailus@linux.intel.com>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Maintained
+F:	drivers/media/i2c/dw9714.c
+
 DOUBLETALK DRIVER
 M:	"James R. Van Zandt" <jrv@vanzandt.mv.com>
 L:	blinux-list@redhat.com
@@ -4272,37 +4282,6 @@ F:	drivers/power/avs/
 F:	include/linux/power/smartreflex.h
 L:	linux-pm@vger.kernel.org
 
-DRM DRIVERS
-M:	David Airlie <airlied@linux.ie>
-L:	dri-devel@lists.freedesktop.org
-T:	git git://people.freedesktop.org/~airlied/linux
-B:	https://bugs.freedesktop.org/
-C:	irc://chat.freenode.net/dri-devel
-S:	Maintained
-F:	drivers/gpu/drm/
-F:	drivers/gpu/vga/
-F:	Documentation/devicetree/bindings/display/
-F:	Documentation/devicetree/bindings/gpu/
-F:	Documentation/devicetree/bindings/video/
-F:	Documentation/gpu/
-F:	include/drm/
-F:	include/uapi/drm/
-F:	include/linux/vga*
-
-DRM DRIVERS AND MISC GPU PATCHES
-M:	Daniel Vetter <daniel.vetter@intel.com>
-M:	Jani Nikula <jani.nikula@linux.intel.com>
-M:	Sean Paul <seanpaul@chromium.org>
-W:	https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
-S:	Maintained
-T:	git git://anongit.freedesktop.org/drm/drm-misc
-F:	Documentation/gpu/
-F:	drivers/gpu/vga/
-F:	drivers/gpu/drm/*
-F:	include/drm/drm*
-F:	include/uapi/drm/drm*
-F:	include/linux/vga*
-
 DRM DRIVER FOR ARM PL111 CLCD
 M:	Eric Anholt <eric@anholt.net>
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -4314,14 +4293,6 @@ M:	Dave Airlie <airlied@redhat.com>
 S:	Odd Fixes
 F:	drivers/gpu/drm/ast/
 
-DRM DRIVERS FOR BRIDGE CHIPS
-M:	Archit Taneja <architt@codeaurora.org>
-M:	Andrzej Hajda <a.hajda@samsung.com>
-R:	Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
-S:	Maintained
-T:	git git://anongit.freedesktop.org/drm/drm-misc
-F:	drivers/gpu/drm/bridge/
-
 DRM DRIVER FOR BOCHS VIRTUAL GPU
 M:	Gerd Hoffmann <kraxel@redhat.com>
 L:	virtualization@lists.linux-foundation.org
@@ -4329,6 +4300,47 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 S:	Maintained
 F:	drivers/gpu/drm/bochs/
 
+DRM DRIVER FOR INTEL I810 VIDEO CARDS
+S:	Orphan / Obsolete
+F:	drivers/gpu/drm/i810/
+F:	include/uapi/drm/i810_drm.h
+
+DRM DRIVER FOR MATROX G200/G400 GRAPHICS CARDS
+S:	Orphan / Obsolete
+F:	drivers/gpu/drm/mga/
+F:	include/uapi/drm/mga_drm.h
+
+DRM DRIVER FOR MGA G200 SERVER GRAPHICS CHIPS
+M:	Dave Airlie <airlied@redhat.com>
+S:	Odd Fixes
+F:	drivers/gpu/drm/mgag200/
+
+DRM DRIVER FOR MI0283QT
+M:	Noralf Trønnes <noralf@tronnes.org>
+S:	Maintained
+F:	drivers/gpu/drm/tinydrm/mi0283qt.c
+F:	Documentation/devicetree/bindings/display/multi-inno,mi0283qt.txt
+
+DRM DRIVER FOR MSM ADRENO GPU
+M:	Rob Clark <robdclark@gmail.com>
+L:	linux-arm-msm@vger.kernel.org
+L:	dri-devel@lists.freedesktop.org
+L:	freedreno@lists.freedesktop.org
+T:	git git://people.freedesktop.org/~robclark/linux
+S:	Maintained
+F:	drivers/gpu/drm/msm/
+F:	include/uapi/drm/msm_drm.h
+F:	Documentation/devicetree/bindings/display/msm/
+
+DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
+M:	Ben Skeggs <bskeggs@redhat.com>
+L:	dri-devel@lists.freedesktop.org
+L:	nouveau@lists.freedesktop.org
+T:	git git://github.com/skeggsb/linux
+S:	Supported
+F:	drivers/gpu/drm/nouveau/
+F:	include/uapi/drm/nouveau_drm.h
+
 DRM DRIVER FOR QEMU'S CIRRUS DEVICE
 M:	Dave Airlie <airlied@redhat.com>
 M:	Gerd Hoffmann <kraxel@redhat.com>
@@ -4338,59 +4350,80 @@ S:	Obsolete
 W:	https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
 F:	drivers/gpu/drm/cirrus/
 
-RADEON and AMDGPU DRM DRIVERS
-M:	Alex Deucher <alexander.deucher@amd.com>
-M:	Christian König <christian.koenig@amd.com>
-L:	amd-gfx@lists.freedesktop.org
-T:	git git://people.freedesktop.org/~agd5f/linux
+DRM DRIVER FOR QXL VIRTUAL GPU
+M:	Dave Airlie <airlied@redhat.com>
+M:	Gerd Hoffmann <kraxel@redhat.com>
+L:	virtualization@lists.linux-foundation.org
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/qxl/
+F:	include/uapi/drm/qxl_drm.h
+
+DRM DRIVER FOR RAGE 128 VIDEO CARDS
+S:	Orphan / Obsolete
+F:	drivers/gpu/drm/r128/
+F:	include/uapi/drm/r128_drm.h
+
+DRM DRIVER FOR SAVAGE VIDEO CARDS
+S:	Orphan / Obsolete
+F:	drivers/gpu/drm/savage/
+F:	include/uapi/drm/savage_drm.h
+
+DRM DRIVER FOR SIS VIDEO CARDS
+S:	Orphan / Obsolete
+F:	drivers/gpu/drm/sis/
+F:	include/uapi/drm/sis_drm.h
+
+DRM DRIVER FOR TDFX VIDEO CARDS
+S:	Orphan / Obsolete
+F:	drivers/gpu/drm/tdfx/
+
+DRM DRIVER FOR USB DISPLAYLINK VIDEO ADAPTERS
+M:	Dave Airlie <airlied@redhat.com>
+S:	Odd Fixes
+F:	drivers/gpu/drm/udl/
+
+DRM DRIVER FOR VMWARE VIRTUAL GPU
+M:	"VMware Graphics" <linux-graphics-maintainer@vmware.com>
+M:	Sinclair Yeh <syeh@vmware.com>
+M:	Thomas Hellstrom <thellstrom@vmware.com>
+L:	dri-devel@lists.freedesktop.org
+T:	git git://people.freedesktop.org/~syeh/repos_linux
+T:	git git://people.freedesktop.org/~thomash/linux
 S:	Supported
-F:	drivers/gpu/drm/radeon/
-F:	include/uapi/drm/radeon_drm.h
-F:	drivers/gpu/drm/amd/
-F:	include/uapi/drm/amdgpu_drm.h
+F:	drivers/gpu/drm/vmwgfx/
+F:	include/uapi/drm/vmwgfx_drm.h
 
-DRM PANEL DRIVERS
-M:	Thierry Reding <thierry.reding@gmail.com>
+DRM DRIVERS
+M:	David Airlie <airlied@linux.ie>
 L:	dri-devel@lists.freedesktop.org
-T:	git git://anongit.freedesktop.org/tegra/linux.git
+T:	git git://people.freedesktop.org/~airlied/linux
+B:	https://bugs.freedesktop.org/
+C:	irc://chat.freenode.net/dri-devel
 S:	Maintained
-F:	drivers/gpu/drm/drm_panel.c
-F:	drivers/gpu/drm/panel/
-F:	include/drm/drm_panel.h
-F:	Documentation/devicetree/bindings/display/panel/
+F:	drivers/gpu/drm/
+F:	drivers/gpu/vga/
+F:	Documentation/devicetree/bindings/display/
+F:	Documentation/devicetree/bindings/gpu/
+F:	Documentation/devicetree/bindings/video/
+F:	Documentation/gpu/
+F:	include/drm/
+F:	include/uapi/drm/
+F:	include/linux/vga*
 
-INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
+DRM DRIVERS AND MISC GPU PATCHES
 M:	Daniel Vetter <daniel.vetter@intel.com>
 M:	Jani Nikula <jani.nikula@linux.intel.com>
-L:	intel-gfx@lists.freedesktop.org
-W:	https://01.org/linuxgraphics/
-B:	https://01.org/linuxgraphics/documentation/how-report-bugs
-C:	irc://chat.freenode.net/intel-gfx
-Q:	http://patchwork.freedesktop.org/project/intel-gfx/
-T:	git git://anongit.freedesktop.org/drm-intel
-S:	Supported
-F:	drivers/gpu/drm/i915/
-F:	include/drm/i915*
-F:	include/uapi/drm/i915_drm.h
-F:	Documentation/gpu/i915.rst
-
-INTEL GVT-g DRIVERS (Intel GPU Virtualization)
-M:      Zhenyu Wang <zhenyuw@linux.intel.com>
-M:      Zhi Wang <zhi.a.wang@intel.com>
-L:      intel-gvt-dev@lists.freedesktop.org
-L:      intel-gfx@lists.freedesktop.org
-W:      https://01.org/igvt-g
-T:      git https://github.com/01org/gvt-linux.git
-S:      Supported
-F:      drivers/gpu/drm/i915/gvt/
-
-DRM DRIVERS FOR ATMEL HLCDC
-M:	Boris Brezillon <boris.brezillon@free-electrons.com>
-L:	dri-devel@lists.freedesktop.org
-S:	Supported
-F:	drivers/gpu/drm/atmel-hlcdc/
-F:	Documentation/devicetree/bindings/drm/atmel/
+M:	Sean Paul <seanpaul@chromium.org>
+W:	https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
+S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	Documentation/gpu/
+F:	drivers/gpu/vga/
+F:	drivers/gpu/drm/*
+F:	include/drm/drm*
+F:	include/uapi/drm/drm*
+F:	include/linux/vga*
 
 DRM DRIVERS FOR ALLWINNER A10
 M:	Maxime Ripard  <maxime.ripard@free-electrons.com>
@@ -4412,6 +4445,22 @@ F:	Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt
 F:	Documentation/gpu/meson.rst
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 
+DRM DRIVERS FOR ATMEL HLCDC
+M:	Boris Brezillon <boris.brezillon@free-electrons.com>
+L:	dri-devel@lists.freedesktop.org
+S:	Supported
+F:	drivers/gpu/drm/atmel-hlcdc/
+F:	Documentation/devicetree/bindings/drm/atmel/
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+
+DRM DRIVERS FOR BRIDGE CHIPS
+M:	Archit Taneja <architt@codeaurora.org>
+M:	Andrzej Hajda <a.hajda@samsung.com>
+R:	Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
+S:	Maintained
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	drivers/gpu/drm/bridge/
+
 DRM DRIVERS FOR EXYNOS
 M:	Inki Dae <inki.dae@samsung.com>
 M:	Joonyoung Shim <jy0922.shim@samsung.com>
@@ -4460,11 +4509,6 @@ S:	Maintained
 F:	drivers/gpu/drm/hisilicon/
 F:	Documentation/devicetree/bindings/display/hisilicon/
 
-DRM DRIVER FOR INTEL I810 VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/i810/
-F:	include/uapi/drm/i810_drm.h
-
 DRM DRIVERS FOR MEDIATEK
 M:	CK Hu <ck.hu@mediatek.com>
 M:	Philipp Zabel <p.zabel@pengutronix.de>
@@ -4473,32 +4517,6 @@ S:	Supported
 F:	drivers/gpu/drm/mediatek/
 F:	Documentation/devicetree/bindings/display/mediatek/
 
-DRM DRIVER FOR MI0283QT
-M:	Noralf Trønnes <noralf@tronnes.org>
-S:	Maintained
-F:	drivers/gpu/drm/tinydrm/mi0283qt.c
-F:	Documentation/devicetree/bindings/display/multi-inno,mi0283qt.txt
-
-DRM DRIVER FOR MSM ADRENO GPU
-M:	Rob Clark <robdclark@gmail.com>
-L:	linux-arm-msm@vger.kernel.org
-L:	dri-devel@lists.freedesktop.org
-L:	freedreno@lists.freedesktop.org
-T:	git git://people.freedesktop.org/~robclark/linux
-S:	Maintained
-F:	drivers/gpu/drm/msm/
-F:	include/uapi/drm/msm_drm.h
-F:	Documentation/devicetree/bindings/display/msm/
-
-DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
-M:	Ben Skeggs <bskeggs@redhat.com>
-L:	dri-devel@lists.freedesktop.org
-L:	nouveau@lists.freedesktop.org
-T:	git git://github.com/skeggsb/linux
-S:	Supported
-F:	drivers/gpu/drm/nouveau/
-F:	include/uapi/drm/nouveau_drm.h
-
 DRM DRIVERS FOR NVIDIA TEGRA
 M:	Thierry Reding <thierry.reding@gmail.com>
 L:	dri-devel@lists.freedesktop.org
@@ -4511,21 +4529,6 @@ F:	include/linux/host1x.h
 F:	include/uapi/drm/tegra_drm.h
 F:	Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
 
-DRM DRIVER FOR MATROX G200/G400 GRAPHICS CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/mga/
-F:	include/uapi/drm/mga_drm.h
-
-DRM DRIVER FOR MGA G200 SERVER GRAPHICS CHIPS
-M:	Dave Airlie <airlied@redhat.com>
-S:	Odd Fixes
-F:	drivers/gpu/drm/mgag200/
-
-DRM DRIVER FOR RAGE 128 VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/r128/
-F:	include/uapi/drm/r128_drm.h
-
 DRM DRIVERS FOR RENESAS
 M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:	dri-devel@lists.freedesktop.org
@@ -4538,15 +4541,6 @@ F:	include/linux/platform_data/shmob_drm.h
 F:	Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
 F:	Documentation/devicetree/bindings/display/renesas,du.txt
 
-DRM DRIVER FOR QXL VIRTUAL GPU
-M:	Dave Airlie <airlied@redhat.com>
-M:	Gerd Hoffmann <kraxel@redhat.com>
-L:	virtualization@lists.linux-foundation.org
-T:	git git://anongit.freedesktop.org/drm/drm-misc
-S:	Maintained
-F:	drivers/gpu/drm/qxl/
-F:	include/uapi/drm/qxl_drm.h
-
 DRM DRIVERS FOR ROCKCHIP
 M:	Mark Yao <mark.yao@rock-chips.com>
 L:	dri-devel@lists.freedesktop.org
@@ -4555,16 +4549,6 @@ F:	drivers/gpu/drm/rockchip/
 F:	Documentation/devicetree/bindings/display/rockchip/
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 
-DRM DRIVER FOR SAVAGE VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/savage/
-F:	include/uapi/drm/savage_drm.h
-
-DRM DRIVER FOR SIS VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/sis/
-F:	include/uapi/drm/sis_drm.h
-
 DRM DRIVERS FOR STI
 M:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
 M:	Vincent Abriou <vincent.abriou@st.com>
@@ -4585,36 +4569,20 @@ S:	Maintained
 F:	drivers/gpu/drm/stm
 F:	Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
 
-DRM DRIVER FOR TDFX VIDEO CARDS
-S:	Orphan / Obsolete
-F:	drivers/gpu/drm/tdfx/
-
-DRM DRIVER FOR USB DISPLAYLINK VIDEO ADAPTERS
-M:	Dave Airlie <airlied@redhat.com>
-S:	Odd Fixes
-F:	drivers/gpu/drm/udl/
-
-DRM DRIVERS FOR VIVANTE GPU IP
-M:	Lucas Stach <l.stach@pengutronix.de>
-R:	Russell King <linux+etnaviv@armlinux.org.uk>
-R:	Christian Gmeiner <christian.gmeiner@gmail.com>
-L:	etnaviv@lists.freedesktop.org
+DRM DRIVERS FOR TI LCDC
+M:	Jyri Sarha <jsarha@ti.com>
+R:	Tomi Valkeinen <tomi.valkeinen@ti.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
-F:	drivers/gpu/drm/etnaviv/
-F:	include/uapi/drm/etnaviv_drm.h
-F:	Documentation/devicetree/bindings/display/etnaviv/
+F:	drivers/gpu/drm/tilcdc/
+F:	Documentation/devicetree/bindings/display/tilcdc/
 
-DRM DRIVER FOR VMWARE VIRTUAL GPU
-M:	"VMware Graphics" <linux-graphics-maintainer@vmware.com>
-M:	Sinclair Yeh <syeh@vmware.com>
-M:	Thomas Hellstrom <thellstrom@vmware.com>
+DRM DRIVERS FOR TI OMAP
+M:	Tomi Valkeinen <tomi.valkeinen@ti.com>
 L:	dri-devel@lists.freedesktop.org
-T:	git git://people.freedesktop.org/~syeh/repos_linux
-T:	git git://people.freedesktop.org/~thomash/linux
-S:	Supported
-F:	drivers/gpu/drm/vmwgfx/
-F:	include/uapi/drm/vmwgfx_drm.h
+S:	Maintained
+F:	drivers/gpu/drm/omapdrm/
+F:	Documentation/devicetree/bindings/display/ti/
 
 DRM DRIVERS FOR VC4
 M:	Eric Anholt <eric@anholt.net>
@@ -4625,20 +4593,16 @@ F:	include/uapi/drm/vc4_drm.h
 F:	Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 
-DRM DRIVERS FOR TI OMAP
-M:	Tomi Valkeinen <tomi.valkeinen@ti.com>
-L:	dri-devel@lists.freedesktop.org
-S:	Maintained
-F:	drivers/gpu/drm/omapdrm/
-F:	Documentation/devicetree/bindings/display/ti/
-
-DRM DRIVERS FOR TI LCDC
-M:	Jyri Sarha <jsarha@ti.com>
-R:	Tomi Valkeinen <tomi.valkeinen@ti.com>
+DRM DRIVERS FOR VIVANTE GPU IP
+M:	Lucas Stach <l.stach@pengutronix.de>
+R:	Russell King <linux+etnaviv@armlinux.org.uk>
+R:	Christian Gmeiner <christian.gmeiner@gmail.com>
+L:	etnaviv@lists.freedesktop.org
 L:	dri-devel@lists.freedesktop.org
 S:	Maintained
-F:	drivers/gpu/drm/tilcdc/
-F:	Documentation/devicetree/bindings/display/tilcdc/
+F:	drivers/gpu/drm/etnaviv/
+F:	include/uapi/drm/etnaviv_drm.h
+F:	Documentation/devicetree/bindings/display/etnaviv/
 
 DRM DRIVERS FOR ZTE ZX
 M:	Shawn Guo <shawnguo@kernel.org>
@@ -4648,6 +4612,16 @@ F:	drivers/gpu/drm/zte/
 F:	Documentation/devicetree/bindings/display/zte,vou.txt
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 
+DRM PANEL DRIVERS
+M:	Thierry Reding <thierry.reding@gmail.com>
+L:	dri-devel@lists.freedesktop.org
+T:	git git://anongit.freedesktop.org/tegra/linux.git
+S:	Maintained
+F:	drivers/gpu/drm/drm_panel.c
+F:	drivers/gpu/drm/panel/
+F:	include/drm/drm_panel.h
+F:	Documentation/devicetree/bindings/display/panel/
+
 DSBR100 USB FM RADIO DRIVER
 M:	Alexey Klimov <klimov.linux@gmail.com>
 L:	linux-media@vger.kernel.org
@@ -4779,13 +4753,6 @@ S:	Maintained
 F:	drivers/media/usb/dvb-usb-v2/dvb_usb*
 F:	drivers/media/usb/dvb-usb-v2/usb_urb.c
 
-DONGWOON DW9714 LENS VOICE COIL DRIVER
-M:	Sakari Ailus <sakari.ailus@linux.intel.com>
-L:	linux-media@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-S:	Maintained
-F:	drivers/media/i2c/dw9714.c
-
 DYNAMIC DEBUG
 M:	Jason Baron <jbaron@akamai.com>
 S:	Maintained
@@ -4841,19 +4808,6 @@ S:	Supported
 F:	Documentation/filesystems/ecryptfs.txt
 F:	fs/ecryptfs/
 
-EDAC-CORE
-M:	Borislav Petkov <bp@alien8.de>
-M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
-M:	Mauro Carvalho Chehab <mchehab@kernel.org>
-L:	linux-edac@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next
-S:	Supported
-F:	Documentation/admin-guide/ras.rst
-F:	Documentation/driver-api/edac.rst
-F:	drivers/edac/
-F:	include/linux/edac.h
-
 EDAC-AMD64
 M:	Borislav Petkov <bp@alien8.de>
 L:	linux-edac@vger.kernel.org
@@ -4875,6 +4829,19 @@ S:	Supported
 F:	drivers/edac/octeon_edac*
 F:	drivers/edac/thunderx_edac*
 
+EDAC-CORE
+M:	Borislav Petkov <bp@alien8.de>
+M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
+M:	Mauro Carvalho Chehab <mchehab@kernel.org>
+L:	linux-edac@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next
+S:	Supported
+F:	Documentation/admin-guide/ras.rst
+F:	Documentation/driver-api/edac.rst
+F:	drivers/edac/
+F:	include/linux/edac.h
+
 EDAC-E752X
 M:	Mark Gross <mark.gross@intel.com>
 L:	linux-edac@vger.kernel.org
@@ -4899,12 +4866,6 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/ghes_edac.c
 
-EDAC-I82443BXGX
-M:	Tim Small <tim@buttersideup.com>
-L:	linux-edac@vger.kernel.org
-S:	Maintained
-F:	drivers/edac/i82443bxgx_edac.c
-
 EDAC-I3000
 L:	linux-edac@vger.kernel.org
 S:	Orphan
@@ -4936,6 +4897,12 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/i7core_edac.c
 
+EDAC-I82443BXGX
+M:	Tim Small <tim@buttersideup.com>
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/i82443bxgx_edac.c
+
 EDAC-I82975X
 M:	Ranganathan Desikan <ravi@jetztechnologies.com>
 M:	"Arvind R." <arvino55@gmail.com>
@@ -4955,18 +4922,18 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/mpc85xx_edac.[ch]
 
-EDAC-PND2
-M:	Tony Luck <tony.luck@intel.com>
-L:	linux-edac@vger.kernel.org
-S:	Maintained
-F:	drivers/edac/pnd2_edac.[ch]
-
 EDAC-PASEMI
 M:	Egor Martovetsky <egor@pasemi.com>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/pasemi_edac.c
 
+EDAC-PND2
+M:	Tony Luck <tony.luck@intel.com>
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/pnd2_edac.[ch]
+
 EDAC-R82600
 M:	Tim Small <tim@buttersideup.com>
 L:	linux-edac@vger.kernel.org
@@ -4986,12 +4953,6 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/skx_edac.c
 
-APPLIED MICRO (APM) X-GENE SOC EDAC
-M:     Loc Ho <lho@apm.com>
-S:     Supported
-F:     drivers/edac/xgene_edac.c
-F:     Documentation/devicetree/bindings/edac/apm-xgene-edac.txt
-
 EDIROL UA-101/UA-1000 DRIVER
 M:	Clemens Ladisch <clemens@ladisch.de>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -4999,21 +4960,12 @@ T:	git git://git.alsa-project.org/alsa-kernel.git
 S:	Maintained
 F:	sound/usb/misc/ua101.c
 
-EXTENSIBLE FIRMWARE INTERFACE (EFI)
-M:	Matt Fleming <matt@codeblueprint.co.uk>
-M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
+EFI TEST DRIVER
 L:	linux-efi@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
+M:	Ivan Hu <ivan.hu@canonical.com>
+M:	Matt Fleming <matt@codeblueprint.co.uk>
 S:	Maintained
-F:	Documentation/efi-stub.txt
-F:	arch/*/kernel/efi.c
-F:	arch/x86/boot/compressed/eboot.[ch]
-F:	arch/*/include/asm/efi.h
-F:	arch/x86/platform/efi/
-F:	drivers/firmware/efi/
-F:	include/linux/efi*.h
-F:	arch/arm/boot/compressed/efi-header.S
-F:	arch/arm64/kernel/efi-entry.S
+F:	drivers/firmware/efi/test/
 
 EFI VARIABLE FILESYSTEM
 M:	Matthew Garrett <matthew.garrett@nebula.com>
@@ -5030,13 +4982,6 @@ M:	Peter Jones <pjones@redhat.com>
 S:	Maintained
 F:	drivers/video/fbdev/efifb.c
 
-EFI TEST DRIVER
-L:	linux-efi@vger.kernel.org
-M:	Ivan Hu <ivan.hu@canonical.com>
-M:	Matt Fleming <matt@codeblueprint.co.uk>
-S:	Maintained
-F:	drivers/firmware/efi/test/
-
 EFS FILESYSTEM
 W:	http://aeschi.ch.eu.org/efs/
 S:	Orphan
@@ -5178,6 +5123,22 @@ L:	linux-security-module@vger.kernel.org
 S:	Supported
 F:	security/integrity/evm/
 
+EXTENSIBLE FIRMWARE INTERFACE (EFI)
+M:	Matt Fleming <matt@codeblueprint.co.uk>
+M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
+L:	linux-efi@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
+S:	Maintained
+F:	Documentation/efi-stub.txt
+F:	arch/*/kernel/efi.c
+F:	arch/x86/boot/compressed/eboot.[ch]
+F:	arch/*/include/asm/efi.h
+F:	arch/x86/platform/efi/
+F:	drivers/firmware/efi/
+F:	include/linux/efi*.h
+F:	arch/arm/boot/compressed/efi-header.S
+F:	arch/arm64/kernel/efi-entry.S
+
 EXTERNAL CONNECTOR SUBSYSTEM (EXTCON)
 M:	MyungJoo Ham <myungjoo.ham@samsung.com>
 M:	Chanwoo Choi <cw00.choi@samsung.com>
@@ -5428,6 +5389,14 @@ L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/dma/fsldma.*
 
+FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
+M:	Claudiu Manoil <claudiu.manoil@freescale.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/freescale/gianfar*
+X:	drivers/net/ethernet/freescale/gianfar_ptp.c
+F:	Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
+
 FREESCALE GPMI NAND DRIVER
 M:	Han Xu <han.xu@nxp.com>
 L:	linux-mtd@lists.infradead.org
@@ -5441,6 +5410,15 @@ L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-cpm.c
 
+FREESCALE IMX / MXC FEC DRIVER
+M:	Fugang Duan <fugang.duan@nxp.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/freescale/fec_main.c
+F:	drivers/net/ethernet/freescale/fec_ptp.c
+F:	drivers/net/ethernet/freescale/fec.h
+F:	Documentation/devicetree/bindings/net/fsl-fec.txt
+
 FREESCALE IMX / MXC FRAMEBUFFER DRIVER
 M:	Sascha Hauer <kernel@pengutronix.de>
 L:	linux-fbdev@vger.kernel.org
@@ -5449,29 +5427,11 @@ S:	Maintained
 F:	include/linux/platform_data/video-imxfb.h
 F:	drivers/video/fbdev/imxfb.c
 
-FREESCALE QUAD SPI DRIVER
-M:	Han Xu <han.xu@nxp.com>
-L:	linux-mtd@lists.infradead.org
-S:	Maintained
-F:	drivers/mtd/spi-nor/fsl-quadspi.c
-
-FREESCALE SOC FS_ENET DRIVER
-M:	Pantelis Antoniou <pantelis.antoniou@gmail.com>
-M:	Vitaly Bordug <vbordug@ru.mvista.com>
-L:	linuxppc-dev@lists.ozlabs.org
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/freescale/fs_enet/
-F:	include/linux/fs_enet_pd.h
-
-FREESCALE IMX / MXC FEC DRIVER
-M:	Fugang Duan <fugang.duan@nxp.com>
+FREESCALE QORIQ DPAA ETHERNET DRIVER
+M:	Madalin Bucur <madalin.bucur@nxp.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	drivers/net/ethernet/freescale/fec_main.c
-F:	drivers/net/ethernet/freescale/fec_ptp.c
-F:	drivers/net/ethernet/freescale/fec.h
-F:	Documentation/devicetree/bindings/net/fsl-fec.txt
+F:	drivers/net/ethernet/freescale/dpaa
 
 FREESCALE QORIQ DPAA FMAN DRIVER
 M:	Madalin Bucur <madalin.bucur@nxp.com>
@@ -5480,20 +5440,11 @@ S:	Maintained
 F:	drivers/net/ethernet/freescale/fman
 F:	Documentation/devicetree/bindings/powerpc/fsl/fman.txt
 
-FREESCALE QORIQ DPAA ETHERNET DRIVER
-M:	Madalin Bucur <madalin.bucur@nxp.com>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/freescale/dpaa
-
-FREESCALE SOC DRIVERS
-M:	Li Yang <leoyang.li@nxp.com>
-L:	linuxppc-dev@lists.ozlabs.org
-L:	linux-arm-kernel@lists.infradead.org
+FREESCALE QUAD SPI DRIVER
+M:	Han Xu <han.xu@nxp.com>
+L:	linux-mtd@lists.infradead.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/soc/fsl/
-F:	drivers/soc/fsl/
-F:	include/linux/fsl/
+F:	drivers/mtd/spi-nor/fsl-quadspi.c
 
 FREESCALE QUICC ENGINE LIBRARY
 M:	Qiang Zhao <qiang.zhao@nxp.com>
@@ -5503,13 +5454,6 @@ F:	drivers/soc/fsl/qe/
 F:	include/soc/fsl/*qe*.h
 F:	include/soc/fsl/*ucc*.h
 
-FREESCALE USB PERIPHERAL DRIVERS
-M:	Li Yang <leoyang.li@nxp.com>
-L:	linux-usb@vger.kernel.org
-L:	linuxppc-dev@lists.ozlabs.org
-S:	Maintained
-F:	drivers/usb/gadget/udc/fsl*
-
 FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
 M:	Li Yang <leoyang.li@nxp.com>
 L:	netdev@vger.kernel.org
@@ -5517,14 +5461,6 @@ L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/net/ethernet/freescale/ucc_geth*
 
-FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
-M:	Claudiu Manoil <claudiu.manoil@freescale.com>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/freescale/gianfar*
-X:	drivers/net/ethernet/freescale/gianfar_ptp.c
-F:	Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
-
 FREESCALE QUICC ENGINE UCC HDLC DRIVER
 M:	Zhao Qiang <qiang.zhao@nxp.com>
 L:	netdev@vger.kernel.org
@@ -5538,6 +5474,24 @@ L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/tty/serial/ucc_uart.c
 
+FREESCALE SOC DRIVERS
+M:	Li Yang <leoyang.li@nxp.com>
+L:	linuxppc-dev@lists.ozlabs.org
+L:	linux-arm-kernel@lists.infradead.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/soc/fsl/
+F:	drivers/soc/fsl/
+F:	include/linux/fsl/
+
+FREESCALE SOC FS_ENET DRIVER
+M:	Pantelis Antoniou <pantelis.antoniou@gmail.com>
+M:	Vitaly Bordug <vbordug@ru.mvista.com>
+L:	linuxppc-dev@lists.ozlabs.org
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/freescale/fs_enet/
+F:	include/linux/fs_enet_pd.h
+
 FREESCALE SOC SOUND DRIVERS
 M:	Timur Tabi <timur@tabi.org>
 M:	Nicolin Chen <nicoleotsuka@gmail.com>
@@ -5550,6 +5504,13 @@ F:	sound/soc/fsl/fsl*
 F:	sound/soc/fsl/imx*
 F:	sound/soc/fsl/mpc8610_hpcd.c
 
+FREESCALE USB PERIPHERAL DRIVERS
+M:	Li Yang <leoyang.li@nxp.com>
+L:	linux-usb@vger.kernel.org
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Maintained
+F:	drivers/usb/gadget/udc/fsl*
+
 FREEVXFS FILESYSTEM
 M:	Christoph Hellwig <hch@infradead.org>
 W:	ftp://ftp.openlinux.org/pub/people/hch/vxfs
@@ -5785,6 +5746,15 @@ L:	linux-input@vger.kernel.org
 S:	Maintained
 F:	drivers/input/touchscreen/goodix.c
 
+GPIO ACPI SUPPORT
+M:	Mika Westerberg <mika.westerberg@linux.intel.com>
+M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+L:	linux-gpio@vger.kernel.org
+L:	linux-acpi@vger.kernel.org
+S:	Maintained
+F:	Documentation/acpi/gpio-properties.txt
+F:	drivers/gpio/gpiolib-acpi.c
+
 GPIO MOCKUP DRIVER
 M:	Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
 L:	linux-gpio@vger.kernel.org
@@ -5808,15 +5778,6 @@ F:	include/asm-generic/gpio.h
 F:	include/uapi/linux/gpio.h
 F:	tools/gpio/
 
-GPIO ACPI SUPPORT
-M:	Mika Westerberg <mika.westerberg@linux.intel.com>
-M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
-L:	linux-gpio@vger.kernel.org
-L:	linux-acpi@vger.kernel.org
-S:	Maintained
-F:	Documentation/acpi/gpio-properties.txt
-F:	drivers/gpio/gpiolib-acpi.c
-
 GRE DEMULTIPLEXER DRIVER
 M:	Dmitry Kozlov <xeb@mail.ru>
 L:	netdev@vger.kernel.org
@@ -5831,14 +5792,6 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/aeroflex/
 
-GREYBUS SUBSYSTEM
-M:	Johan Hovold <johan@kernel.org>
-M:	Alex Elder <elder@kernel.org>
-M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-S:	Maintained
-F:	drivers/staging/greybus/
-L:	greybus-dev@lists.linaro.org (moderated for non-subscribers)
-
 GREYBUS AUDIO PROTOCOLS DRIVERS
 M:	Vaibhav Agarwal <vaibhav.sr@gmail.com>
 M:	Mark Greer <mgreer@animalcreek.com>
@@ -5856,23 +5809,6 @@ F:	drivers/staging/greybus/audio_manager_sysfs.c
 F:	drivers/staging/greybus/audio_module.c
 F:	drivers/staging/greybus/audio_topology.c
 
-GREYBUS SDIO/GPIO/SPI PROTOCOLS DRIVERS
-M:	Rui Miguel Silva <rmfrfs@gmail.com>
-S:	Maintained
-F:	drivers/staging/greybus/sdio.c
-F:	drivers/staging/greybus/light.c
-F:	drivers/staging/greybus/gpio.c
-F:	drivers/staging/greybus/power_supply.c
-F:	drivers/staging/greybus/spi.c
-F:	drivers/staging/greybus/spilib.c
-
-GREYBUS LOOBACK/TIME PROTOCOLS DRIVERS
-M:	Bryan O'Donoghue <pure.logic@nexus-software.ie>
-S:	Maintained
-F:	drivers/staging/greybus/loopback.c
-F:	drivers/staging/greybus/timesync.c
-F:	drivers/staging/greybus/timesync_platform.c
-
 GREYBUS FW/HID/SPI PROTOCOLS DRIVERS
 M:	Viresh Kumar <vireshk@kernel.org>
 S:	Maintained
@@ -5890,11 +5826,12 @@ F:	drivers/staging/greybus/spi.c
 F:	drivers/staging/greybus/spilib.c
 F:	drivers/staging/greybus/spilib.h
 
-GREYBUS UART PROTOCOLS DRIVERS
-M:	David Lin <dtwlin@gmail.com>
+GREYBUS LOOBACK/TIME PROTOCOLS DRIVERS
+M:	Bryan O'Donoghue <pure.logic@nexus-software.ie>
 S:	Maintained
-F:	drivers/staging/greybus/uart.c
-F:	drivers/staging/greybus/log.c
+F:	drivers/staging/greybus/loopback.c
+F:	drivers/staging/greybus/timesync.c
+F:	drivers/staging/greybus/timesync_platform.c
 
 GREYBUS PLATFORM DRIVERS
 M:	Vaibhav Hiremath <hvaibhav.linux@gmail.com>
@@ -5903,6 +5840,30 @@ F:	drivers/staging/greybus/arche-platform.c
 F:	drivers/staging/greybus/arche-apb-ctrl.c
 F:	drivers/staging/greybus/arche_platform.h
 
+GREYBUS SDIO/GPIO/SPI PROTOCOLS DRIVERS
+M:	Rui Miguel Silva <rmfrfs@gmail.com>
+S:	Maintained
+F:	drivers/staging/greybus/sdio.c
+F:	drivers/staging/greybus/light.c
+F:	drivers/staging/greybus/gpio.c
+F:	drivers/staging/greybus/power_supply.c
+F:	drivers/staging/greybus/spi.c
+F:	drivers/staging/greybus/spilib.c
+
+GREYBUS SUBSYSTEM
+M:	Johan Hovold <johan@kernel.org>
+M:	Alex Elder <elder@kernel.org>
+M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+S:	Maintained
+F:	drivers/staging/greybus/
+L:	greybus-dev@lists.linaro.org (moderated for non-subscribers)
+
+GREYBUS UART PROTOCOLS DRIVERS
+M:	David Lin <dtwlin@gmail.com>
+S:	Maintained
+F:	drivers/staging/greybus/uart.c
+F:	drivers/staging/greybus/log.c
+
 GS1662 VIDEO SERIALIZER
 M:	Charles-Antoine Couret <charles-antoine.couret@nexvision.fr>
 L:	linux-media@vger.kernel.org
@@ -6309,6 +6270,13 @@ L:	linuxppc-dev@lists.ozlabs.org
 S:	Odd Fixes
 F:	drivers/tty/hvc/
 
+I2C ACPI SUPPORT
+M:	Mika Westerberg <mika.westerberg@linux.intel.com>
+L:	linux-i2c@vger.kernel.org
+L:	linux-acpi@vger.kernel.org
+S:	Maintained
+F:	drivers/i2c/i2c-core-acpi.c
+
 I2C MUXES
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-i2c@vger.kernel.org
@@ -6331,6 +6299,36 @@ F:	Documentation/i2c/busses/i2c-parport-light
 F:	drivers/i2c/busses/i2c-parport.c
 F:	drivers/i2c/busses/i2c-parport-light.c
 
+I2C SUBSYSTEM
+M:	Wolfram Sang <wsa@the-dreams.de>
+L:	linux-i2c@vger.kernel.org
+W:	https://i2c.wiki.kernel.org/
+Q:	https://patchwork.ozlabs.org/project/linux-i2c/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
+S:	Maintained
+F:	Documentation/devicetree/bindings/i2c/
+F:	Documentation/i2c/
+F:	drivers/i2c/
+F:	drivers/i2c/*/
+F:	include/linux/i2c.h
+F:	include/linux/i2c-*.h
+F:	include/uapi/linux/i2c.h
+F:	include/uapi/linux/i2c-*.h
+
+I2C-TAOS-EVM DRIVER
+M:	Jean Delvare <jdelvare@suse.com>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	Documentation/i2c/busses/i2c-taos-evm
+F:	drivers/i2c/busses/i2c-taos-evm.c
+
+I2C-TINY-USB DRIVER
+M:	Till Harbaum <till@harbaum.org>
+L:	linux-i2c@vger.kernel.org
+W:	http://www.harbaum.org/till/i2c_tiny_usb
+S:	Maintained
+F:	drivers/i2c/busses/i2c-tiny-usb.c
+
 I2C/SMBUS CONTROLLER DRIVERS FOR PC
 M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-i2c@vger.kernel.org
@@ -6372,48 +6370,11 @@ L:	linux-i2c@vger.kernel.org
 F:	drivers/i2c/busses/i2c-ismt.c
 F:	Documentation/i2c/busses/i2c-ismt
 
-I2C/SMBUS STUB DRIVER
-M:	Jean Delvare <jdelvare@suse.com>
-L:	linux-i2c@vger.kernel.org
-S:	Maintained
-F:	drivers/i2c/i2c-stub.c
-
-I2C SUBSYSTEM
-M:	Wolfram Sang <wsa@the-dreams.de>
-L:	linux-i2c@vger.kernel.org
-W:	https://i2c.wiki.kernel.org/
-Q:	https://patchwork.ozlabs.org/project/linux-i2c/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
-S:	Maintained
-F:	Documentation/devicetree/bindings/i2c/
-F:	Documentation/i2c/
-F:	drivers/i2c/
-F:	drivers/i2c/*/
-F:	include/linux/i2c.h
-F:	include/linux/i2c-*.h
-F:	include/uapi/linux/i2c.h
-F:	include/uapi/linux/i2c-*.h
-
-I2C ACPI SUPPORT
-M:	Mika Westerberg <mika.westerberg@linux.intel.com>
-L:	linux-i2c@vger.kernel.org
-L:	linux-acpi@vger.kernel.org
-S:	Maintained
-F:	drivers/i2c/i2c-core-acpi.c
-
-I2C-TAOS-EVM DRIVER
-M:	Jean Delvare <jdelvare@suse.com>
-L:	linux-i2c@vger.kernel.org
-S:	Maintained
-F:	Documentation/i2c/busses/i2c-taos-evm
-F:	drivers/i2c/busses/i2c-taos-evm.c
-
-I2C-TINY-USB DRIVER
-M:	Till Harbaum <till@harbaum.org>
+I2C/SMBUS STUB DRIVER
+M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-i2c@vger.kernel.org
-W:	http://www.harbaum.org/till/i2c_tiny_usb
 S:	Maintained
-F:	drivers/i2c/busses/i2c-tiny-usb.c
+F:	drivers/i2c/i2c-stub.c
 
 i386 BOOT CODE
 M:	"H. Peter Anvin" <hpa@zytor.com>
@@ -6433,17 +6394,15 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux.git
 S:	Maintained
 F:	arch/ia64/
 
-IBM Power VMX Cryptographic instructions
-M:	Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
-M:	Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
-L:	linux-crypto@vger.kernel.org
+IBM Power 842 compression accelerator
+M:	Haren Myneni <haren@us.ibm.com>
 S:	Supported
-F:	drivers/crypto/vmx/Makefile
-F:	drivers/crypto/vmx/Kconfig
-F:	drivers/crypto/vmx/vmx.c
-F:	drivers/crypto/vmx/aes*
-F:	drivers/crypto/vmx/ghash*
-F:	drivers/crypto/vmx/ppc-xlate.pl
+F:	drivers/crypto/nx/Makefile
+F:	drivers/crypto/nx/Kconfig
+F:	drivers/crypto/nx/nx-842*
+F:	include/linux/sw842.h
+F:	crypto/842.c
+F:	lib/842/
 
 IBM Power in-Nest Crypto Acceleration
 M:	Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
@@ -6458,33 +6417,29 @@ F:	drivers/crypto/nx/nx.*
 F:	drivers/crypto/nx/nx_csbcpb.h
 F:	drivers/crypto/nx/nx_debugfs.h
 
-IBM Power 842 compression accelerator
-M:	Haren Myneni <haren@us.ibm.com>
-S:	Supported
-F:	drivers/crypto/nx/Makefile
-F:	drivers/crypto/nx/Kconfig
-F:	drivers/crypto/nx/nx-842*
-F:	include/linux/sw842.h
-F:	crypto/842.c
-F:	lib/842/
-
 IBM Power Linux RAID adapter
 M:	Brian King <brking@us.ibm.com>
 S:	Supported
 F:	drivers/scsi/ipr.*
 
-IBM Power Virtual Ethernet Device Driver
+IBM Power SRIOV Virtual NIC Device Driver
 M:	Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
+M:	John Allen <jallen@linux.vnet.ibm.com>
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	drivers/net/ethernet/ibm/ibmveth.*
+F:	drivers/net/ethernet/ibm/ibmvnic.*
 
-IBM Power SRIOV Virtual NIC Device Driver
+IBM Power Virtual Ethernet Device Driver
 M:	Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
-M:	John Allen <jallen@linux.vnet.ibm.com>
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	drivers/net/ethernet/ibm/ibmvnic.*
+F:	drivers/net/ethernet/ibm/ibmveth.*
+
+IBM Power Virtual FC Device Drivers
+M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/ibmvscsi/ibmvfc*
 
 IBM Power Virtual SCSI Device Drivers
 M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
@@ -6501,11 +6456,17 @@ L:	target-devel@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/ibmvscsi_tgt/
 
-IBM Power Virtual FC Device Drivers
-M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
-L:	linux-scsi@vger.kernel.org
+IBM Power VMX Cryptographic instructions
+M:	Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
+M:	Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
+L:	linux-crypto@vger.kernel.org
 S:	Supported
-F:	drivers/scsi/ibmvscsi/ibmvfc*
+F:	drivers/crypto/vmx/Makefile
+F:	drivers/crypto/vmx/Kconfig
+F:	drivers/crypto/vmx/vmx.c
+F:	drivers/crypto/vmx/aes*
+F:	drivers/crypto/vmx/ghash*
+F:	drivers/crypto/vmx/ppc-xlate.pl
 
 IBM ServeRAID RAID DRIVER
 S:	Orphan
@@ -6751,6 +6712,12 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
 S:	Supported
 F:	security/integrity/ima/
 
+INTEL 810/815 FRAMEBUFFER DRIVER
+M:	Antonino Daplas <adaplas@gmail.com>
+L:	linux-fbdev@vger.kernel.org
+S:	Maintained
+F:	drivers/video/fbdev/i810/
+
 INTEL ASoC BDW/HSW DRIVERS
 M:	Jie Yang <yang.jie@linux.intel.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -6768,96 +6735,20 @@ T:	git git://git.code.sf.net/p/intel-sas/isci
 S:	Supported
 F:	drivers/scsi/isci/
 
-INTEL HID EVENT DRIVER
-M:	Alex Hung <alex.hung@canonical.com>
-L:	platform-driver-x86@vger.kernel.org
-S:	Maintained
-F:	drivers/platform/x86/intel-hid.c
-
-INTEL VIRTUAL BUTTON DRIVER
-M:	AceLan Kao <acelan.kao@canonical.com>
-L:	platform-driver-x86@vger.kernel.org
-S:	Maintained
-F:	drivers/platform/x86/intel-vbtn.c
-
-INTEL IDLE DRIVER
-M:	Jacob Pan <jacob.jun.pan@linux.intel.com>
-M:	Len Brown <lenb@kernel.org>
-L:	linux-pm@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
-B:	https://bugzilla.kernel.org
-S:	Supported
-F:	drivers/idle/intel_idle.c
-
-INTEL INTEGRATED SENSOR HUB DRIVER
-M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
-M:	Jiri Kosina <jikos@kernel.org>
-L:	linux-input@vger.kernel.org
-S:	Maintained
-F:	drivers/hid/intel-ish-hid/
-
-INTEL PSTATE DRIVER
-M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
-M:	Len Brown <lenb@kernel.org>
-L:	linux-pm@vger.kernel.org
-S:	Supported
-F:	drivers/cpufreq/intel_pstate.c
-
-INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
-M:	Maik Broemme <mbroemme@libmpq.org>
-L:	linux-fbdev@vger.kernel.org
-S:	Maintained
-F:	Documentation/fb/intelfb.txt
-F:	drivers/video/fbdev/intelfb/
-
-INTEL 810/815 FRAMEBUFFER DRIVER
-M:	Antonino Daplas <adaplas@gmail.com>
-L:	linux-fbdev@vger.kernel.org
-S:	Maintained
-F:	drivers/video/fbdev/i810/
-
-INTEL MENLOW THERMAL DRIVER
-M:	Sujith Thomas <sujith.thomas@intel.com>
-L:	platform-driver-x86@vger.kernel.org
-W:	https://01.org/linux-acpi
-S:	Supported
-F:	drivers/platform/x86/intel_menlow.c
-
-INTEL I/OAT DMA DRIVER
-M:	Dave Jiang <dave.jiang@intel.com>
-R:	Dan Williams <dan.j.williams@intel.com>
-L:	dmaengine@vger.kernel.org
-Q:	https://patchwork.kernel.org/project/linux-dmaengine/list/
-S:	Supported
-F:	drivers/dma/ioat*
-
-INTEL IOMMU (VT-d)
-M:	David Woodhouse <dwmw2@infradead.org>
-L:	iommu@lists.linux-foundation.org
-T:	git git://git.infradead.org/iommu-2.6.git
+INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
+M:	Daniel Vetter <daniel.vetter@intel.com>
+M:	Jani Nikula <jani.nikula@linux.intel.com>
+L:	intel-gfx@lists.freedesktop.org
+W:	https://01.org/linuxgraphics/
+B:	https://01.org/linuxgraphics/documentation/how-report-bugs
+C:	irc://chat.freenode.net/intel-gfx
+Q:	http://patchwork.freedesktop.org/project/intel-gfx/
+T:	git git://anongit.freedesktop.org/drm-intel
 S:	Supported
-F:	drivers/iommu/intel-iommu.c
-F:	include/linux/intel-iommu.h
-
-INTEL IOP-ADMA DMA DRIVER
-R:	Dan Williams <dan.j.williams@intel.com>
-S:	Odd fixes
-F:	drivers/dma/iop-adma.c
-
-INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
-M:	Krzysztof Halasa <khalasa@piap.pl>
-S:	Maintained
-F:	arch/arm/mach-ixp4xx/include/mach/qmgr.h
-F:	arch/arm/mach-ixp4xx/include/mach/npe.h
-F:	arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
-F:	arch/arm/mach-ixp4xx/ixp4xx_npe.c
-F:	drivers/net/ethernet/xscale/ixp4xx_eth.c
-F:	drivers/net/wan/ixp4xx_hss.c
-
-INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT
-M:	Deepak Saxena <dsaxena@plexity.net>
-S:	Maintained
-F:	drivers/char/hw_random/ixp4xx-rng.c
+F:	drivers/gpu/drm/i915/
+F:	include/drm/i915*
+F:	include/uapi/drm/i915_drm.h
+F:	Documentation/gpu/i915.rst
 
 INTEL ETHERNET DRIVERS
 M:	Jeff Kirsher <jeffrey.t.kirsher@intel.com>
@@ -6882,75 +6773,80 @@ F:	drivers/net/ethernet/intel/
 F:	drivers/net/ethernet/intel/*/
 F:	include/linux/avf/virtchnl.h
 
-INTEL RDMA RNIC DRIVER
-M:     Faisal Latif <faisal.latif@intel.com>
-M:     Shiraz Saleem <shiraz.saleem@intel.com>
-L:     linux-rdma@vger.kernel.org
-S:     Supported
-F:     drivers/infiniband/hw/i40iw/
-
-INTEL MERRIFIELD GPIO DRIVER
-M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
-L:	linux-gpio@vger.kernel.org
+INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
+M:	Maik Broemme <mbroemme@libmpq.org>
+L:	linux-fbdev@vger.kernel.org
 S:	Maintained
-F:	drivers/gpio/gpio-merrifield.c
+F:	Documentation/fb/intelfb.txt
+F:	drivers/video/fbdev/intelfb/
 
-INTEL-MID GPIO DRIVER
-M:	David Cohen <david.a.cohen@linux.intel.com>
-L:	linux-gpio@vger.kernel.org
-S:	Maintained
-F:	drivers/gpio/gpio-intel-mid.c
+INTEL GVT-g DRIVERS (Intel GPU Virtualization)
+M:	Zhenyu Wang <zhenyuw@linux.intel.com>
+M:	Zhi Wang <zhi.a.wang@intel.com>
+L:	intel-gvt-dev@lists.freedesktop.org
+L:	intel-gfx@lists.freedesktop.org
+W:	https://01.org/igvt-g
+T:	git https://github.com/01org/gvt-linux.git
+S:	Supported
+F:	drivers/gpu/drm/i915/gvt/
 
-INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
-M:	Stanislav Yakovlev <stas.yakovlev@gmail.com>
-L:	linux-wireless@vger.kernel.org
+INTEL HID EVENT DRIVER
+M:	Alex Hung <alex.hung@canonical.com>
+L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
-F:	Documentation/networking/README.ipw2100
-F:	Documentation/networking/README.ipw2200
-F:	drivers/net/wireless/intel/ipw2x00/
-
-INTEL(R) TRACE HUB
-M:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
-S:	Supported
-F:	Documentation/trace/intel_th.txt
-F:	drivers/hwtracing/intel_th/
+F:	drivers/platform/x86/intel-hid.c
 
-INTEL(R) TRUSTED EXECUTION TECHNOLOGY (TXT)
-M:	Ning Sun <ning.sun@intel.com>
-L:	tboot-devel@lists.sourceforge.net
-W:	http://tboot.sourceforge.net
-T:	hg http://tboot.hg.sourceforge.net:8000/hgroot/tboot/tboot
+INTEL I/OAT DMA DRIVER
+M:	Dave Jiang <dave.jiang@intel.com>
+R:	Dan Williams <dan.j.williams@intel.com>
+L:	dmaengine@vger.kernel.org
+Q:	https://patchwork.kernel.org/project/linux-dmaengine/list/
 S:	Supported
-F:	Documentation/intel_txt.txt
-F:	include/linux/tboot.h
-F:	arch/x86/kernel/tboot.c
+F:	drivers/dma/ioat*
 
-INTEL WIRELESS WIMAX CONNECTION 2400
-M:	Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
-M:	linux-wimax@intel.com
-L:	wimax@linuxwimax.org (subscribers-only)
+INTEL IDLE DRIVER
+M:	Jacob Pan <jacob.jun.pan@linux.intel.com>
+M:	Len Brown <lenb@kernel.org>
+L:	linux-pm@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
+B:	https://bugzilla.kernel.org
 S:	Supported
-W:	http://linuxwimax.org
-F:	Documentation/wimax/README.i2400m
-F:	drivers/net/wimax/i2400m/
-F:	include/uapi/linux/wimax/i2400m.h
+F:	drivers/idle/intel_idle.c
 
-INTEL WIRELESS 3945ABG/BG, 4965AGN (iwlegacy)
-M:	Stanislaw Gruszka <sgruszka@redhat.com>
-L:	linux-wireless@vger.kernel.org
-S:	Supported
-F:	drivers/net/wireless/intel/iwlegacy/
+INTEL INTEGRATED SENSOR HUB DRIVER
+M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+M:	Jiri Kosina <jikos@kernel.org>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/hid/intel-ish-hid/
 
-INTEL WIRELESS WIFI LINK (iwlwifi)
-M:	Johannes Berg <johannes.berg@intel.com>
-M:	Emmanuel Grumbach <emmanuel.grumbach@intel.com>
-M:	Luca Coelho <luciano.coelho@intel.com>
-M:	Intel Linux Wireless <linuxwifi@intel.com>
-L:	linux-wireless@vger.kernel.org
-W:	http://intellinuxwireless.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git
+INTEL IOMMU (VT-d)
+M:	David Woodhouse <dwmw2@infradead.org>
+L:	iommu@lists.linux-foundation.org
+T:	git git://git.infradead.org/iommu-2.6.git
 S:	Supported
-F:	drivers/net/wireless/intel/iwlwifi/
+F:	drivers/iommu/intel-iommu.c
+F:	include/linux/intel-iommu.h
+
+INTEL IOP-ADMA DMA DRIVER
+R:	Dan Williams <dan.j.williams@intel.com>
+S:	Odd fixes
+F:	drivers/dma/iop-adma.c
+
+INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
+M:	Krzysztof Halasa <khalasa@piap.pl>
+S:	Maintained
+F:	arch/arm/mach-ixp4xx/include/mach/qmgr.h
+F:	arch/arm/mach-ixp4xx/include/mach/npe.h
+F:	arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
+F:	arch/arm/mach-ixp4xx/ixp4xx_npe.c
+F:	drivers/net/ethernet/xscale/ixp4xx_eth.c
+F:	drivers/net/wan/ixp4xx_hss.c
+
+INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT
+M:	Deepak Saxena <dsaxena@plexity.net>
+S:	Maintained
+F:	drivers/char/hw_random/ixp4xx-rng.c
 
 INTEL MANAGEMENT ENGINE (mei)
 M:	Tomas Winkler <tomas.winkler@intel.com>
@@ -6963,6 +6859,19 @@ F:	drivers/watchdog/mei_wdt.c
 F:	Documentation/misc-devices/mei/*
 F:	samples/mei/*
 
+INTEL MENLOW THERMAL DRIVER
+M:	Sujith Thomas <sujith.thomas@intel.com>
+L:	platform-driver-x86@vger.kernel.org
+W:	https://01.org/linux-acpi
+S:	Supported
+F:	drivers/platform/x86/intel_menlow.c
+
+INTEL MERRIFIELD GPIO DRIVER
+M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+F:	drivers/gpio/gpio-merrifield.c
+
 INTEL MIC DRIVERS (mic)
 M:	Sudeep Dutt <sudeep.dutt@intel.com>
 M:	Ashutosh Dixit <ashutosh.dixit@intel.com>
@@ -6972,13 +6881,21 @@ W:	http://software.intel.com/en-us/mic-developer
 F:	include/linux/mic_bus.h
 F:	include/linux/scif.h
 F:	include/uapi/linux/mic_common.h
-F: 	include/uapi/linux/mic_ioctl.h
+F:	include/uapi/linux/mic_ioctl.h
 F:	include/uapi/linux/scif_ioctl.h
 F:	drivers/misc/mic/
 F:	drivers/dma/mic_x100_dma.c
 F:	drivers/dma/mic_x100_dma.h
 F:	Documentation/mic/
 
+INTEL PMC CORE DRIVER
+M:	Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
+M:	Vishwanath Somayaji <vishwanath.somayaji@intel.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	arch/x86/include/asm/pmc_core.h
+F:	drivers/platform/x86/intel_pmc_core*
+
 INTEL PMC/P-Unit IPC DRIVER
 M:	Zha Qipeng<qipeng.zha@intel.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -6988,6 +6905,28 @@ F:	drivers/platform/x86/intel_punit_ipc.c
 F:	arch/x86/include/asm/intel_pmc_ipc.h
 F:	arch/x86/include/asm/intel_punit_ipc.h
 
+INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
+M:	Stanislav Yakovlev <stas.yakovlev@gmail.com>
+L:	linux-wireless@vger.kernel.org
+S:	Maintained
+F:	Documentation/networking/README.ipw2100
+F:	Documentation/networking/README.ipw2200
+F:	drivers/net/wireless/intel/ipw2x00/
+
+INTEL PSTATE DRIVER
+M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+M:	Len Brown <lenb@kernel.org>
+L:	linux-pm@vger.kernel.org
+S:	Supported
+F:	drivers/cpufreq/intel_pstate.c
+
+INTEL RDMA RNIC DRIVER
+M:	Faisal Latif <faisal.latif@intel.com>
+M:	Shiraz Saleem <shiraz.saleem@intel.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/hw/i40iw/
+
 INTEL TELEMETRY DRIVER
 M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -6995,13 +6934,60 @@ S:	Maintained
 F:	arch/x86/include/asm/intel_telemetry.h
 F:	drivers/platform/x86/intel_telemetry*
 
-INTEL PMC CORE DRIVER
-M:	Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
-M:	Vishwanath Somayaji <vishwanath.somayaji@intel.com>
+INTEL VIRTUAL BUTTON DRIVER
+M:	AceLan Kao <acelan.kao@canonical.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
-F:	arch/x86/include/asm/pmc_core.h
-F:	drivers/platform/x86/intel_pmc_core*
+F:	drivers/platform/x86/intel-vbtn.c
+
+INTEL WIRELESS 3945ABG/BG, 4965AGN (iwlegacy)
+M:	Stanislaw Gruszka <sgruszka@redhat.com>
+L:	linux-wireless@vger.kernel.org
+S:	Supported
+F:	drivers/net/wireless/intel/iwlegacy/
+
+INTEL WIRELESS WIFI LINK (iwlwifi)
+M:	Johannes Berg <johannes.berg@intel.com>
+M:	Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+M:	Luca Coelho <luciano.coelho@intel.com>
+M:	Intel Linux Wireless <linuxwifi@intel.com>
+L:	linux-wireless@vger.kernel.org
+W:	http://intellinuxwireless.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git
+S:	Supported
+F:	drivers/net/wireless/intel/iwlwifi/
+
+INTEL WIRELESS WIMAX CONNECTION 2400
+M:	Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+M:	linux-wimax@intel.com
+L:	wimax@linuxwimax.org (subscribers-only)
+S:	Supported
+W:	http://linuxwimax.org
+F:	Documentation/wimax/README.i2400m
+F:	drivers/net/wimax/i2400m/
+F:	include/uapi/linux/wimax/i2400m.h
+
+INTEL(R) TRACE HUB
+M:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
+S:	Supported
+F:	Documentation/trace/intel_th.txt
+F:	drivers/hwtracing/intel_th/
+
+INTEL(R) TRUSTED EXECUTION TECHNOLOGY (TXT)
+M:	Ning Sun <ning.sun@intel.com>
+L:	tboot-devel@lists.sourceforge.net
+W:	http://tboot.sourceforge.net
+T:	hg http://tboot.hg.sourceforge.net:8000/hgroot/tboot/tboot
+S:	Supported
+F:	Documentation/intel_txt.txt
+F:	include/linux/tboot.h
+F:	arch/x86/kernel/tboot.c
+
+INTEL-MID GPIO DRIVER
+M:	David Cohen <david.a.cohen@linux.intel.com>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+F:	drivers/gpio/gpio-intel-mid.c
 
 INVENSENSE MPU-3050 GYROSCOPE DRIVER
 M:	Linus Walleij <linus.walleij@linaro.org>
@@ -7093,13 +7079,6 @@ F:	drivers/net/irda/
 F:	include/net/irda/
 F:	net/irda/
 
-IRQ SUBSYSTEM
-M:	Thomas Gleixner <tglx@linutronix.de>
-L:	linux-kernel@vger.kernel.org
-S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
-F:	kernel/irq/
-
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:	Marc Zyngier <marc.zyngier@arm.com>
 S:	Maintained
@@ -7109,6 +7088,13 @@ F:	include/linux/irqdomain.h
 F:	kernel/irq/irqdomain.c
 F:	kernel/irq/msi.c
 
+IRQ SUBSYSTEM
+M:	Thomas Gleixner <tglx@linutronix.de>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
+F:	kernel/irq/
+
 IRQCHIP DRIVERS
 M:	Thomas Gleixner <tglx@linutronix.de>
 M:	Jason Cooper <jason@lakedaemon.net>
@@ -7412,27 +7398,6 @@ S:	Maintained
 F:	arch/x86/include/asm/svm.h
 F:	arch/x86/kvm/svm.c
 
-KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
-M:	Alexander Graf <agraf@suse.com>
-L:	kvm-ppc@vger.kernel.org
-W:	http://www.linux-kvm.org/
-T:	git git://github.com/agraf/linux-2.6.git
-S:	Supported
-F:	arch/powerpc/include/asm/kvm*
-F:	arch/powerpc/kvm/
-
-KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
-M:	Christian Borntraeger <borntraeger@de.ibm.com>
-M:	Cornelia Huck <cohuck@redhat.com>
-L:	linux-s390@vger.kernel.org
-W:	http://www.ibm.com/developerworks/linux/linux390/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
-S:	Supported
-F:	Documentation/s390/kvm.txt
-F:	arch/s390/include/asm/kvm*
-F:	arch/s390/kvm/
-F:	arch/s390/mm/gmap.c
-
 KERNEL VIRTUAL MACHINE (KVM) FOR ARM
 M:	Christoffer Dall <christoffer.dall@linaro.org>
 M:	Marc Zyngier <marc.zyngier@arm.com>
@@ -7447,6 +7412,15 @@ F:	arch/arm/kvm/
 F:	virt/kvm/arm/
 F:	include/kvm/arm_*
 
+KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
+M:	Alexander Graf <agraf@suse.com>
+L:	kvm-ppc@vger.kernel.org
+W:	http://www.linux-kvm.org/
+T:	git git://github.com/agraf/linux-2.6.git
+S:	Supported
+F:	arch/powerpc/include/asm/kvm*
+F:	arch/powerpc/kvm/
+
 KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
 M:	Christoffer Dall <christoffer.dall@linaro.org>
 M:	Marc Zyngier <marc.zyngier@arm.com>
@@ -7465,6 +7439,18 @@ F:	arch/mips/include/uapi/asm/kvm*
 F:	arch/mips/include/asm/kvm*
 F:	arch/mips/kvm/
 
+KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
+M:	Christian Borntraeger <borntraeger@de.ibm.com>
+M:	Cornelia Huck <cohuck@redhat.com>
+L:	linux-s390@vger.kernel.org
+W:	http://www.ibm.com/developerworks/linux/linux390/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
+S:	Supported
+F:	Documentation/s390/kvm.txt
+F:	arch/s390/include/asm/kvm*
+F:	arch/s390/kvm/
+F:	arch/s390/mm/gmap.c
+
 KERNFS
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 M:	Tejun Heo <tj@kernel.org>
@@ -7482,17 +7468,15 @@ F:	include/linux/kexec.h
 F:	include/uapi/linux/kexec.h
 F:	kernel/kexec*
 
-KEYS/KEYRINGS:
-M:	David Howells <dhowells@redhat.com>
+KEYS-ENCRYPTED
+M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
+M:	David Safford <safford@us.ibm.com>
+L:	linux-security-module@vger.kernel.org
 L:	keyrings@vger.kernel.org
-S:	Maintained
-F:	Documentation/security/keys/core.rst
-F:	include/linux/key.h
-F:	include/linux/key-type.h
-F:	include/linux/keyctl.h
-F:	include/uapi/linux/keyctl.h
-F:	include/keys/
-F:	security/keys/
+S:	Supported
+F:	Documentation/security/keys/trusted-encrypted.rst
+F:	include/keys/encrypted-type.h
+F:	security/keys/encrypted-keys/
 
 KEYS-TRUSTED
 M:	David Safford <safford@us.ibm.com>
@@ -7505,15 +7489,17 @@ F:	include/keys/trusted-type.h
 F:	security/keys/trusted.c
 F:	security/keys/trusted.h
 
-KEYS-ENCRYPTED
-M:	Mimi Zohar <zohar@linux.vnet.ibm.com>
-M:	David Safford <safford@us.ibm.com>
-L:	linux-security-module@vger.kernel.org
+KEYS/KEYRINGS:
+M:	David Howells <dhowells@redhat.com>
 L:	keyrings@vger.kernel.org
-S:	Supported
-F:	Documentation/security/keys/trusted-encrypted.rst
-F:	include/keys/encrypted-type.h
-F:	security/keys/encrypted-keys/
+S:	Maintained
+F:	Documentation/security/keys/core.rst
+F:	include/linux/key.h
+F:	include/linux/key-type.h
+F:	include/linux/keyctl.h
+F:	include/uapi/linux/keyctl.h
+F:	include/keys/
+F:	security/keys/
 
 KGDB / KDB /debug_core
 M:	Jason Wessel <jason.wessel@windriver.com>
@@ -7657,16 +7643,6 @@ F:	drivers/lguest/
 F:	include/linux/lguest*.h
 F:	tools/lguest/
 
-LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
-M:	Tejun Heo <tj@kernel.org>
-L:	linux-ide@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
-S:	Maintained
-F:	drivers/ata/
-F:	include/linux/ata.h
-F:	include/linux/libata.h
-F:	Documentation/devicetree/bindings/ata/
-
 LIBATA PATA ARASAN COMPACT FLASH CONTROLLER
 M:	Viresh Kumar <vireshk@kernel.org>
 L:	linux-ide@vger.kernel.org
@@ -7710,23 +7686,21 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
 S:	Maintained
 F:	drivers/ata/sata_promise.*
 
+LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
+M:	Tejun Heo <tj@kernel.org>
+L:	linux-ide@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+S:	Maintained
+F:	drivers/ata/
+F:	include/linux/ata.h
+F:	include/linux/libata.h
+F:	Documentation/devicetree/bindings/ata/
+
 LIBLOCKDEP
 M:	Sasha Levin <alexander.levin@verizon.com>
 S:	Maintained
 F:	tools/lib/lockdep/
 
-LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
-M:	Dan Williams <dan.j.williams@intel.com>
-L:	linux-nvdimm@lists.01.org
-Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git
-S:	Supported
-F:	drivers/nvdimm/*
-F:	drivers/acpi/nfit/*
-F:	include/linux/nd.h
-F:	include/linux/libnvdimm.h
-F:	include/uapi/linux/ndctl.h
-
 LIBNVDIMM BLK: MMIO-APERTURE DRIVER
 M:	Ross Zwisler <ross.zwisler@linux.intel.com>
 L:	linux-nvdimm@lists.01.org
@@ -7747,7 +7721,19 @@ M:	Ross Zwisler <ross.zwisler@linux.intel.com>
 L:	linux-nvdimm@lists.01.org
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
-F:	drivers/nvdimm/pmem*
+F:	drivers/nvdimm/pmem*
+
+LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
+M:	Dan Williams <dan.j.williams@intel.com>
+L:	linux-nvdimm@lists.01.org
+Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git
+S:	Supported
+F:	drivers/nvdimm/*
+F:	drivers/acpi/nfit/*
+F:	include/linux/nd.h
+F:	include/linux/libnvdimm.h
+F:	include/uapi/linux/ndctl.h
 
 LIGHTNVM PLATFORM SUPPORT
 M:	Matias Bjorling <mb@lightnvm.io>
@@ -7758,6 +7744,14 @@ F:	drivers/lightnvm/
 F:	include/linux/lightnvm.h
 F:	include/uapi/linux/lightnvm.h
 
+LINUX FOR POWER MACINTOSH
+M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
+W:	http://www.penguinppc.org/
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Maintained
+F:	arch/powerpc/platforms/powermac/
+F:	drivers/macintosh/
+
 LINUX FOR POWERPC (32-BIT AND 64-BIT)
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 M:	Paul Mackerras <paulus@samba.org>
@@ -7791,14 +7785,6 @@ N:	powernv
 N:	[^a-z0-9]ps3
 N:	pseries
 
-LINUX FOR POWER MACINTOSH
-M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
-W:	http://www.penguinppc.org/
-L:	linuxppc-dev@lists.ozlabs.org
-S:	Maintained
-F:	arch/powerpc/platforms/powermac/
-F:	drivers/macintosh/
-
 LINUX FOR POWERPC EMBEDDED MPC5XXX
 M:	Anatolij Gustschin <agust@denx.de>
 L:	linuxppc-dev@lists.ozlabs.org
@@ -7816,19 +7802,6 @@ S:	Maintained
 F:	arch/powerpc/platforms/40x/
 F:	arch/powerpc/platforms/44x/
 
-LINUX FOR POWERPC EMBEDDED XILINX VIRTEX
-L:	linuxppc-dev@lists.ozlabs.org
-S:	Orphan
-F:	arch/powerpc/*/*virtex*
-F:	arch/powerpc/*/*/*virtex*
-
-LINUX FOR POWERPC EMBEDDED PPC8XX
-M:	Vitaly Bordug <vitb@kernel.crashing.org>
-W:	http://www.penguinppc.org/
-L:	linuxppc-dev@lists.ozlabs.org
-S:	Maintained
-F:	arch/powerpc/platforms/8xx/
-
 LINUX FOR POWERPC EMBEDDED PPC83XX AND PPC85XX
 M:	Scott Wood <oss@buserror.net>
 M:	Kumar Gala <galak@kernel.crashing.org>
@@ -7840,6 +7813,19 @@ F:	arch/powerpc/platforms/83xx/
 F:	arch/powerpc/platforms/85xx/
 F:	Documentation/devicetree/bindings/powerpc/fsl/
 
+LINUX FOR POWERPC EMBEDDED PPC8XX
+M:	Vitaly Bordug <vitb@kernel.crashing.org>
+W:	http://www.penguinppc.org/
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Maintained
+F:	arch/powerpc/platforms/8xx/
+
+LINUX FOR POWERPC EMBEDDED XILINX VIRTEX
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Orphan
+F:	arch/powerpc/*/*virtex*
+F:	arch/powerpc/*/*/*virtex*
+
 LINUX FOR POWERPC PA SEMI PWRFICIENT
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Orphan
@@ -7847,16 +7833,16 @@ F:	arch/powerpc/platforms/pasemi/
 F:	drivers/*/*pasemi*
 F:	drivers/*/*/*pasemi*
 
-LINUX SECURITY MODULE (LSM) FRAMEWORK
-M:	Chris Wright <chrisw@sous-sol.org>
-L:	linux-security-module@vger.kernel.org
-S:	Supported
-
 LINUX KERNEL DUMP TEST MODULE (LKDTM)
 M:	Kees Cook <keescook@chromium.org>
 S:	Maintained
 F:	drivers/misc/lkdtm*
 
+LINUX SECURITY MODULE (LSM) FRAMEWORK
+M:	Chris Wright <chrisw@sous-sol.org>
+L:	linux-security-module@vger.kernel.org
+S:	Supported
+
 LIS3LV02D ACCELEROMETER DRIVER
 M:	Eric Piel <eric.piel@tremplin-utc.net>
 S:	Maintained
@@ -8230,14 +8216,6 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/sound/max9860.txt
 F:	sound/soc/codecs/max9860.*
 
-MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M:	Krzysztof Kozlowski <krzk@kernel.org>
-M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
-L:	linux-pm@vger.kernel.org
-S:	Supported
-F:	drivers/power/supply/max14577_charger.c
-F:	drivers/power/supply/max77693_charger.c
-
 MAXIM MAX77802 PMIC REGULATOR DEVICE DRIVER
 M:	Javier Martinez Canillas <javier@dowhile0.org>
 L:	linux-kernel@vger.kernel.org
@@ -8246,6 +8224,14 @@ F:	drivers/regulator/max77802-regulator.c
 F:	Documentation/devicetree/bindings/*/*max77802.txt
 F:	include/dt-bindings/*/*max77802.h
 
+MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
+M:	Krzysztof Kozlowski <krzk@kernel.org>
+M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+L:	linux-pm@vger.kernel.org
+S:	Supported
+F:	drivers/power/supply/max14577_charger.c
+F:	drivers/power/supply/max77693_charger.c
+
 MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
 M:	Chanwoo Choi <cw00.choi@samsung.com>
 M:	Krzysztof Kozlowski <krzk@kernel.org>
@@ -8288,14 +8274,25 @@ L:	linux-iio@vger.kernel.org
 S:	Maintained
 F:	drivers/iio/dac/cio-dac.c
 
-MEDIA DRIVERS FOR RENESAS - DRIF
-M:	Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
+MEDIA DRIVERS FOR ASCOT2E
+M:	Sergey Kozlov <serjk@netup.ru>
+M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
-L:	linux-renesas-soc@vger.kernel.org
+W:	https://linuxtv.org
+W:	http://netup.tv/
 T:	git git://linuxtv.org/media_tree.git
 S:	Supported
-F:	Documentation/devicetree/bindings/media/renesas,drif.txt
-F:	drivers/media/platform/rcar_drif.c
+F:	drivers/media/dvb-frontends/ascot2e*
+
+MEDIA DRIVERS FOR CXD2841ER
+M:	Sergey Kozlov <serjk@netup.ru>
+M:	Abylay Ospan <aospan@netup.ru>
+L:	linux-media@vger.kernel.org
+W:	https://linuxtv.org
+W:	http://netup.tv/
+T:	git git://linuxtv.org/media_tree.git
+S:	Supported
+F:	drivers/media/dvb-frontends/cxd2841er*
 
 MEDIA DRIVERS FOR FREESCALE IMX
 M:	Steve Longerbeam <slongerbeam@gmail.com>
@@ -8309,43 +8306,6 @@ F:	drivers/staging/media/imx/
 F:	include/linux/imx-media.h
 F:	include/media/imx.h
 
-MEDIA DRIVERS FOR RENESAS - FCP
-M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
-L:	linux-media@vger.kernel.org
-L:	linux-renesas-soc@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-S:	Supported
-F:	Documentation/devicetree/bindings/media/renesas,fcp.txt
-F:	drivers/media/platform/rcar-fcp.c
-F:	include/media/rcar-fcp.h
-
-MEDIA DRIVERS FOR RENESAS - FDP1
-M:	Kieran Bingham <kieran@bingham.xyz>
-L:	linux-media@vger.kernel.org
-L:	linux-renesas-soc@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-S:	Supported
-F:	Documentation/devicetree/bindings/media/renesas,fdp1.txt
-F:	drivers/media/platform/rcar_fdp1.c
-
-MEDIA DRIVERS FOR RENESAS - VIN
-M:	Niklas Söderlund <niklas.soderlund@ragnatech.se>
-L:	linux-media@vger.kernel.org
-L:	linux-renesas-soc@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-S:	Supported
-F:	Documentation/devicetree/bindings/media/rcar_vin.txt
-F:	drivers/media/platform/rcar-vin/
-
-MEDIA DRIVERS FOR RENESAS - VSP1
-M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
-L:	linux-media@vger.kernel.org
-L:	linux-renesas-soc@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-S:	Supported
-F:	Documentation/devicetree/bindings/media/renesas,vsp1.txt
-F:	drivers/media/platform/vsp1/
-
 MEDIA DRIVERS FOR HELENE
 M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
@@ -8355,7 +8315,7 @@ T:	git git://linuxtv.org/media_tree.git
 S:	Supported
 F:	drivers/media/dvb-frontends/helene*
 
-MEDIA DRIVERS FOR ASCOT2E
+MEDIA DRIVERS FOR HORUS3A
 M:	Sergey Kozlov <serjk@netup.ru>
 M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
@@ -8363,9 +8323,9 @@ W:	https://linuxtv.org
 W:	http://netup.tv/
 T:	git git://linuxtv.org/media_tree.git
 S:	Supported
-F:	drivers/media/dvb-frontends/ascot2e*
+F:	drivers/media/dvb-frontends/horus3a*
 
-MEDIA DRIVERS FOR CXD2841ER
+MEDIA DRIVERS FOR LNBH25
 M:	Sergey Kozlov <serjk@netup.ru>
 M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
@@ -8373,9 +8333,9 @@ W:	https://linuxtv.org
 W:	http://netup.tv/
 T:	git git://linuxtv.org/media_tree.git
 S:	Supported
-F:	drivers/media/dvb-frontends/cxd2841er*
+F:	drivers/media/dvb-frontends/lnbh25*
 
-MEDIA DRIVERS FOR HORUS3A
+MEDIA DRIVERS FOR NETUP PCI UNIVERSAL DVB devices
 M:	Sergey Kozlov <serjk@netup.ru>
 M:	Abylay Ospan <aospan@netup.ru>
 L:	linux-media@vger.kernel.org
@@ -8383,27 +8343,53 @@ W:	https://linuxtv.org
 W:	http://netup.tv/
 T:	git git://linuxtv.org/media_tree.git
 S:	Supported
-F:	drivers/media/dvb-frontends/horus3a*
+F:	drivers/media/pci/netup_unidvb/*
 
-MEDIA DRIVERS FOR LNBH25
-M:	Sergey Kozlov <serjk@netup.ru>
-M:	Abylay Ospan <aospan@netup.ru>
+MEDIA DRIVERS FOR RENESAS - DRIF
+M:	Ramesh Shanmugasundaram <ramesh.shanmugasundaram@bp.renesas.com>
 L:	linux-media@vger.kernel.org
-W:	https://linuxtv.org
-W:	http://netup.tv/
+L:	linux-renesas-soc@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 S:	Supported
-F:	drivers/media/dvb-frontends/lnbh25*
+F:	Documentation/devicetree/bindings/media/renesas,drif.txt
+F:	drivers/media/platform/rcar_drif.c
 
-MEDIA DRIVERS FOR NETUP PCI UNIVERSAL DVB devices
-M:	Sergey Kozlov <serjk@netup.ru>
-M:	Abylay Ospan <aospan@netup.ru>
+MEDIA DRIVERS FOR RENESAS - FCP
+M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:	linux-media@vger.kernel.org
-W:	https://linuxtv.org
-W:	http://netup.tv/
+L:	linux-renesas-soc@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 S:	Supported
-F:	drivers/media/pci/netup_unidvb/*
+F:	Documentation/devicetree/bindings/media/renesas,fcp.txt
+F:	drivers/media/platform/rcar-fcp.c
+F:	include/media/rcar-fcp.h
+
+MEDIA DRIVERS FOR RENESAS - FDP1
+M:	Kieran Bingham <kieran@bingham.xyz>
+L:	linux-media@vger.kernel.org
+L:	linux-renesas-soc@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Supported
+F:	Documentation/devicetree/bindings/media/renesas,fdp1.txt
+F:	drivers/media/platform/rcar_fdp1.c
+
+MEDIA DRIVERS FOR RENESAS - VIN
+M:	Niklas Söderlund <niklas.soderlund@ragnatech.se>
+L:	linux-media@vger.kernel.org
+L:	linux-renesas-soc@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Supported
+F:	Documentation/devicetree/bindings/media/rcar_vin.txt
+F:	drivers/media/platform/rcar-vin/
+
+MEDIA DRIVERS FOR RENESAS - VSP1
+M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+L:	linux-media@vger.kernel.org
+L:	linux-renesas-soc@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Supported
+F:	Documentation/devicetree/bindings/media/renesas,vsp1.txt
+F:	drivers/media/platform/vsp1/
 
 MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
 M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
@@ -8442,15 +8428,6 @@ S:	Supported
 F:	drivers/media/platform/mtk-jpeg/
 F:	Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt
 
-MEDIATEK MEDIA DRIVER
-M:	Tiffany Lin <tiffany.lin@mediatek.com>
-M:	Andrew-CT Chen <andrew-ct.chen@mediatek.com>
-S:	Supported
-F:	drivers/media/platform/mtk-vcodec/
-F:	drivers/media/platform/mtk-vpu/
-F:	Documentation/devicetree/bindings/media/mediatek-vcodec.txt
-F:	Documentation/devicetree/bindings/media/mediatek-vpu.txt
-
 MEDIATEK MDP DRIVER
 M:	Minghsiu Tsai <minghsiu.tsai@mediatek.com>
 M:	Houlong Wei <houlong.wei@mediatek.com>
@@ -8460,6 +8437,15 @@ F:	drivers/media/platform/mtk-mdp/
 F:	drivers/media/platform/mtk-vpu/
 F:	Documentation/devicetree/bindings/media/mediatek-mdp.txt
 
+MEDIATEK MEDIA DRIVER
+M:	Tiffany Lin <tiffany.lin@mediatek.com>
+M:	Andrew-CT Chen <andrew-ct.chen@mediatek.com>
+S:	Supported
+F:	drivers/media/platform/mtk-vcodec/
+F:	drivers/media/platform/mtk-vpu/
+F:	Documentation/devicetree/bindings/media/mediatek-vcodec.txt
+F:	Documentation/devicetree/bindings/media/mediatek-vpu.txt
+
 MEDIATEK MT7601U WIRELESS LAN DRIVER
 M:	Jakub Kicinski <kubakici@wp.pl>
 L:	linux-wireless@vger.kernel.org
@@ -8467,9 +8453,9 @@ S:	Maintained
 F:	drivers/net/wireless/mediatek/mt7601u/
 
 MEDIATEK RANDOM NUMBER GENERATOR SUPPORT
-M:      Sean Wang <sean.wang@mediatek.com>
-S:      Maintained
-F:      drivers/char/hw_random/mtk-rng.c
+M:	Sean Wang <sean.wang@mediatek.com>
+S:	Maintained
+F:	drivers/char/hw_random/mtk-rng.c
 
 MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
 M:	Peter Senna Tschudin <peter.senna@collabora.com>
@@ -8558,28 +8544,6 @@ W:	http://www.mellanox.com
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlxfw/
 
-MELLANOX MLXCPLD I2C AND MUX DRIVER
-M:	Vadim Pasternak <vadimp@mellanox.com>
-M:	Michael Shych <michaelsh@mellanox.com>
-L:	linux-i2c@vger.kernel.org
-S:	Supported
-F:	drivers/i2c/busses/i2c-mlxcpld.c
-F:	drivers/i2c/muxes/i2c-mux-mlxcpld.c
-F:	Documentation/i2c/busses/i2c-mlxcpld
-
-MELLANOX MLXCPLD LED DRIVER
-M:	Vadim Pasternak <vadimp@mellanox.com>
-L:	linux-leds@vger.kernel.org
-S:	Supported
-F:	drivers/leds/leds-mlxcpld.c
-F:	Documentation/leds/leds-mlxcpld.txt
-
-MELLANOX PLATFORM DRIVER
-M:      Vadim Pasternak <vadimp@mellanox.com>
-L:      platform-driver-x86@vger.kernel.org
-S:      Supported
-F:      drivers/platform/x86/mlx-platform.c
-
 MELLANOX MLX CPLD HOTPLUG DRIVER
 M:	Vadim Pasternak <vadimp@mellanox.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -8616,19 +8580,41 @@ L:	linux-rdma@vger.kernel.org
 W:	http://www.mellanox.com
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 S:	Supported
-F:	drivers/net/ethernet/mellanox/mlx5/core/
-F:	include/linux/mlx5/
+F:	drivers/net/ethernet/mellanox/mlx5/core/
+F:	include/linux/mlx5/
+
+MELLANOX MLX5 IB driver
+M:	Matan Barak <matanb@mellanox.com>
+M:	Leon Romanovsky <leonro@mellanox.com>
+L:	linux-rdma@vger.kernel.org
+W:	http://www.mellanox.com
+Q:	http://patchwork.kernel.org/project/linux-rdma/list/
+S:	Supported
+F:	drivers/infiniband/hw/mlx5/
+F:	include/linux/mlx5/
+F:	include/uapi/rdma/mlx5-abi.h
+
+MELLANOX MLXCPLD I2C AND MUX DRIVER
+M:	Vadim Pasternak <vadimp@mellanox.com>
+M:	Michael Shych <michaelsh@mellanox.com>
+L:	linux-i2c@vger.kernel.org
+S:	Supported
+F:	drivers/i2c/busses/i2c-mlxcpld.c
+F:	drivers/i2c/muxes/i2c-mux-mlxcpld.c
+F:	Documentation/i2c/busses/i2c-mlxcpld
+
+MELLANOX MLXCPLD LED DRIVER
+M:	Vadim Pasternak <vadimp@mellanox.com>
+L:	linux-leds@vger.kernel.org
+S:	Supported
+F:	drivers/leds/leds-mlxcpld.c
+F:	Documentation/leds/leds-mlxcpld.txt
 
-MELLANOX MLX5 IB driver
-M:	Matan Barak <matanb@mellanox.com>
-M:	Leon Romanovsky <leonro@mellanox.com>
-L:	linux-rdma@vger.kernel.org
-W:	http://www.mellanox.com
-Q:	http://patchwork.kernel.org/project/linux-rdma/list/
+MELLANOX PLATFORM DRIVER
+M:	Vadim Pasternak <vadimp@mellanox.com>
+L:	platform-driver-x86@vger.kernel.org
 S:	Supported
-F:	drivers/infiniband/hw/mlx5/
-F:	include/linux/mlx5/
-F:	include/uapi/rdma/mlx5-abi.h
+F:	drivers/platform/x86/mlx-platform.c
 
 MEMBARRIER SUPPORT
 M:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
@@ -8784,6 +8770,16 @@ F:	Documentation/devicetree/bindings/mips/
 F:	Documentation/mips/
 F:	arch/mips/
 
+MIPS BOSTON DEVELOPMENT BOARD
+M:	Paul Burton <paul.burton@imgtec.com>
+L:	linux-mips@linux-mips.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/clock/img,boston-clock.txt
+F:	arch/mips/boot/dts/img/boston.dts
+F:	arch/mips/configs/generic/board-boston.config
+F:	drivers/clk/imgtec/clk-boston.c
+F:	include/dt-bindings/clock/boston-clock.h
+
 MIPS GENERIC PLATFORM
 M:	Paul Burton <paul.burton@imgtec.com>
 L:	linux-mips@linux-mips.org
@@ -8799,16 +8795,6 @@ F:	arch/mips/include/asm/mach-loongson32/
 F:	drivers/*/*loongson1*
 F:	drivers/*/*/*loongson1*
 
-MIPS BOSTON DEVELOPMENT BOARD
-M:	Paul Burton <paul.burton@imgtec.com>
-L:	linux-mips@linux-mips.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/clock/img,boston-clock.txt
-F:	arch/mips/boot/dts/img/boston.dts
-F:	arch/mips/configs/generic/board-boston.config
-F:	drivers/clk/imgtec/clk-boston.c
-F:	include/dt-bindings/clock/boston-clock.h
-
 MIROSOUND PCM20 FM RADIO RECEIVER DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
@@ -8959,6 +8945,11 @@ F:	drivers/mfd/
 F:	include/linux/mfd/
 F:	include/dt-bindings/mfd/
 
+MULTIMEDIA CARD (MMC) ETC. OVER SPI
+S:	Orphan
+F:	drivers/mmc/host/mmc_spi.c
+F:	include/linux/spi/mmc_spi.h
+
 MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM
 M:	Ulf Hansson <ulf.hansson@linaro.org>
 L:	linux-mmc@vger.kernel.org
@@ -8969,11 +8960,6 @@ F:	drivers/mmc/
 F:	include/linux/mmc/
 F:	include/uapi/linux/mmc/
 
-MULTIMEDIA CARD (MMC) ETC. OVER SPI
-S:	Orphan
-F:	drivers/mmc/host/mmc_spi.c
-F:	include/linux/spi/mmc_spi.h
-
 MULTIPLEXER SUBSYSTEM
 M:	Peter Rosin <peda@axentia.se>
 S:	Maintained
@@ -9166,6 +9152,35 @@ S:	Maintained
 W:	https://fedorahosted.org/dropwatch/
 F:	net/core/drop_monitor.c
 
+NETWORKING DRIVERS
+L:	netdev@vger.kernel.org
+W:	http://www.linuxfoundation.org/en/Net
+Q:	http://patchwork.ozlabs.org/project/netdev/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+S:	Odd Fixes
+F:	Documentation/devicetree/bindings/net/
+F:	drivers/net/
+F:	include/linux/if_*
+F:	include/linux/netdevice.h
+F:	include/linux/etherdevice.h
+F:	include/linux/fcdevice.h
+F:	include/linux/fddidevice.h
+F:	include/linux/hippidevice.h
+F:	include/linux/inetdevice.h
+F:	include/uapi/linux/if_*
+F:	include/uapi/linux/netdevice.h
+
+NETWORKING DRIVERS (WIRELESS)
+M:	Kalle Valo <kvalo@codeaurora.org>
+L:	linux-wireless@vger.kernel.org
+Q:	http://patchwork.kernel.org/project/linux-wireless/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git
+S:	Maintained
+F:	Documentation/devicetree/bindings/net/wireless/
+F:	drivers/net/wireless/
+
 NETWORKING [DSA]
 M:	Andrew Lunn <andrew@lunn.ch>
 M:	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
@@ -9197,28 +9212,6 @@ F:	tools/net/
 F:	tools/testing/selftests/net/
 F:	lib/random32.c
 
-NETWORKING [IPv4/IPv6]
-M:	"David S. Miller" <davem@davemloft.net>
-M:	Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
-M:	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
-L:	netdev@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-S:	Maintained
-F:	net/ipv4/
-F:	net/ipv6/
-F:	include/net/ip*
-F:	arch/x86/net/*
-
-NETWORKING [TLS]
-M:	Ilya Lesokhin <ilyal@mellanox.com>
-M:	Aviad Yehezkel <aviadye@mellanox.com>
-M:	Dave Watson <davejwatson@fb.com>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	net/tls/*
-F:	include/uapi/linux/tls.h
-F:	include/net/tls.h
-
 NETWORKING [IPSEC]
 M:	Steffen Klassert <steffen.klassert@secunet.com>
 M:	Herbert Xu <herbert@gondor.apana.org.au>
@@ -9243,43 +9236,36 @@ F:	net/ipv6/ip6_vti.c
 F:	include/uapi/linux/xfrm.h
 F:	include/net/xfrm.h
 
+NETWORKING [IPv4/IPv6]
+M:	"David S. Miller" <davem@davemloft.net>
+M:	Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+M:	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
+L:	netdev@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+S:	Maintained
+F:	net/ipv4/
+F:	net/ipv6/
+F:	include/net/ip*
+F:	arch/x86/net/*
+
 NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK)
 M:	Paul Moore <paul@paul-moore.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 
-NETWORKING [WIRELESS]
-L:	linux-wireless@vger.kernel.org
-Q:	http://patchwork.kernel.org/project/linux-wireless/list/
-
-NETWORKING DRIVERS
+NETWORKING [TLS]
+M:	Ilya Lesokhin <ilyal@mellanox.com>
+M:	Aviad Yehezkel <aviadye@mellanox.com>
+M:	Dave Watson <davejwatson@fb.com>
 L:	netdev@vger.kernel.org
-W:	http://www.linuxfoundation.org/en/Net
-Q:	http://patchwork.ozlabs.org/project/netdev/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
-S:	Odd Fixes
-F:	Documentation/devicetree/bindings/net/
-F:	drivers/net/
-F:	include/linux/if_*
-F:	include/linux/netdevice.h
-F:	include/linux/etherdevice.h
-F:	include/linux/fcdevice.h
-F:	include/linux/fddidevice.h
-F:	include/linux/hippidevice.h
-F:	include/linux/inetdevice.h
-F:	include/uapi/linux/if_*
-F:	include/uapi/linux/netdevice.h
+S:	Maintained
+F:	net/tls/*
+F:	include/uapi/linux/tls.h
+F:	include/net/tls.h
 
-NETWORKING DRIVERS (WIRELESS)
-M:	Kalle Valo <kvalo@codeaurora.org>
+NETWORKING [WIRELESS]
 L:	linux-wireless@vger.kernel.org
 Q:	http://patchwork.kernel.org/project/linux-wireless/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git
-S:	Maintained
-F:	Documentation/devicetree/bindings/net/wireless/
-F:	drivers/net/wireless/
 
 NETXEN (1/10) GbE SUPPORT
 M:	Manish Chopra <manish.chopra@cavium.com>
@@ -9384,6 +9370,12 @@ F:	drivers/power/supply/bq27xxx_battery_i2c.c
 F:	drivers/power/supply/isp1704_charger.c
 F:	drivers/power/supply/rx51_battery.c
 
+NTB AMD DRIVER
+M:	Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+L:	linux-ntb@googlegroups.com
+S:	Supported
+F:	drivers/ntb/hw/amd/
+
 NTB DRIVER CORE
 M:	Jon Mason <jdmason@kudzu.us>
 M:	Dave Jiang <dave.jiang@intel.com>
@@ -9413,12 +9405,6 @@ W:	https://github.com/jonmason/ntb/wiki
 T:	git git://github.com/jonmason/ntb.git
 F:	drivers/ntb/hw/intel/
 
-NTB AMD DRIVER
-M:	Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
-L:	linux-ntb@googlegroups.com
-S:	Supported
-F:	drivers/ntb/hw/amd/
-
 NTFS FILESYSTEM
 M:	Anton Altaparmakov <anton@tuxera.com>
 L:	linux-ntfs-dev@lists.sourceforge.net
@@ -9448,15 +9434,6 @@ F:	drivers/nvme/host/
 F:	include/linux/nvme.h
 F:	include/uapi/linux/nvme_ioctl.h
 
-NVM EXPRESS TARGET DRIVER
-M:	Christoph Hellwig <hch@lst.de>
-M:	Sagi Grimberg <sagi@grimberg.me>
-L:	linux-nvme@lists.infradead.org
-T:	git://git.infradead.org/nvme.git
-W:	http://git.infradead.org/nvme.git
-S:	Supported
-F:	drivers/nvme/target/
-
 NVM EXPRESS FC TRANSPORT DRIVERS
 M:	James Smart <james.smart@broadcom.com>
 L:	linux-nvme@lists.infradead.org
@@ -9467,6 +9444,15 @@ F:	drivers/nvme/host/fc.c
 F:	drivers/nvme/target/fc.c
 F:	drivers/nvme/target/fcloop.c
 
+NVM EXPRESS TARGET DRIVER
+M:	Christoph Hellwig <hch@lst.de>
+M:	Sagi Grimberg <sagi@grimberg.me>
+L:	linux-nvme@lists.infradead.org
+T:	git://git.infradead.org/nvme.git
+W:	http://git.infradead.org/nvme.git
+S:	Supported
+F:	drivers/nvme/target/
+
 NVMEM FRAMEWORK
 M:	Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
 S:	Maintained
@@ -9475,13 +9461,6 @@ F:	Documentation/devicetree/bindings/nvmem/
 F:	include/linux/nvmem-consumer.h
 F:	include/linux/nvmem-provider.h
 
-NXP-NCI NFC DRIVER
-M:	Clément Perrochaud <clement.perrochaud@effinnov.com>
-R:	Charles Gorand <charles.gorand@effinnov.com>
-L:	linux-nfc@lists.01.org (moderated for non-subscribers)
-S:	Supported
-F:	drivers/nfc/nxp-nci
-
 NXP TDA998X DRM DRIVER
 M:	Russell King <linux@armlinux.org.uk>
 S:	Supported
@@ -9496,55 +9475,31 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Maintained
 F:	sound/soc/codecs/tfa9879*
 
+NXP-NCI NFC DRIVER
+M:	Clément Perrochaud <clement.perrochaud@effinnov.com>
+R:	Charles Gorand <charles.gorand@effinnov.com>
+L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+S:	Supported
+F:	drivers/nfc/nxp-nci
+
 OBJTOOL
 M:	Josh Poimboeuf <jpoimboe@redhat.com>
 S:	Supported
 F:	tools/objtool/
 
-OMAP1 SUPPORT
-M:	Aaro Koskinen <aaro.koskinen@iki.fi>
-M:	Tony Lindgren <tony@atomide.com>
+OMAP AUDIO SUPPORT
+M:	Peter Ujfalusi <peter.ujfalusi@ti.com>
+M:	Jarkko Nikula <jarkko.nikula@bitmer.com>
+L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 L:	linux-omap@vger.kernel.org
-Q:	http://patchwork.kernel.org/project/linux-omap/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
 S:	Maintained
-F:	arch/arm/mach-omap1/
-F:	arch/arm/plat-omap/
-F:	arch/arm/configs/omap1_defconfig
-F:	drivers/i2c/busses/i2c-omap.c
-F:	include/linux/i2c-omap.h
+F:	sound/soc/omap/
 
-OMAP2+ SUPPORT
-M:	Tony Lindgren <tony@atomide.com>
+OMAP CLOCK FRAMEWORK SUPPORT
+M:	Paul Walmsley <paul@pwsan.com>
 L:	linux-omap@vger.kernel.org
-W:	http://www.muru.com/linux/omap/
-W:	http://linux.omap.com/
-Q:	http://patchwork.kernel.org/project/linux-omap/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
 S:	Maintained
-F:	arch/arm/mach-omap2/
-F:	arch/arm/plat-omap/
-F:	arch/arm/configs/omap2plus_defconfig
-F:	drivers/i2c/busses/i2c-omap.c
-F:	drivers/irqchip/irq-omap-intc.c
-F:	drivers/mfd/*omap*.c
-F:	drivers/mfd/menelaus.c
-F:	drivers/mfd/palmas.c
-F:	drivers/mfd/tps65217.c
-F:	drivers/mfd/tps65218.c
-F:	drivers/mfd/tps65910.c
-F:	drivers/mfd/twl-core.[ch]
-F:	drivers/mfd/twl4030*.c
-F:	drivers/mfd/twl6030*.c
-F:	drivers/mfd/twl6040*.c
-F:	drivers/regulator/palmas-regulator*.c
-F:	drivers/regulator/pbias-regulator.c
-F:	drivers/regulator/tps65217-regulator.c
-F:	drivers/regulator/tps65218-regulator.c
-F:	drivers/regulator/tps65910-regulator.c
-F:	drivers/regulator/twl-regulator.c
-F:	drivers/regulator/twl6030-regulator.c
-F:	include/linux/i2c-omap.h
+F:	arch/arm/*omap*/*clock*
 
 OMAP DEVICE TREE SUPPORT
 M:	Benoît Cousson <bcousson@baylibre.com>
@@ -9552,39 +9507,26 @@ M:	Tony Lindgren <tony@atomide.com>
 L:	linux-omap@vger.kernel.org
 L:	devicetree@vger.kernel.org
 S:	Maintained
-F:	arch/arm/boot/dts/*omap*
-F:	arch/arm/boot/dts/*am3*
-F:	arch/arm/boot/dts/*am4*
-F:	arch/arm/boot/dts/*am5*
-F:	arch/arm/boot/dts/*dra7*
-
-OMAP CLOCK FRAMEWORK SUPPORT
-M:	Paul Walmsley <paul@pwsan.com>
-L:	linux-omap@vger.kernel.org
-S:	Maintained
-F:	arch/arm/*omap*/*clock*
-
-OMAP POWER MANAGEMENT SUPPORT
-M:	Kevin Hilman <khilman@kernel.org>
-L:	linux-omap@vger.kernel.org
-S:	Maintained
-F:	arch/arm/*omap*/*pm*
-F:	drivers/cpufreq/omap-cpufreq.c
+F:	arch/arm/boot/dts/*omap*
+F:	arch/arm/boot/dts/*am3*
+F:	arch/arm/boot/dts/*am4*
+F:	arch/arm/boot/dts/*am5*
+F:	arch/arm/boot/dts/*dra7*
 
-OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT
-M:	Rajendra Nayak <rnayak@codeaurora.org>
-M:	Paul Walmsley <paul@pwsan.com>
+OMAP DISPLAY SUBSYSTEM and FRAMEBUFFER SUPPORT (DSS2)
+M:	Tomi Valkeinen <tomi.valkeinen@ti.com>
 L:	linux-omap@vger.kernel.org
+L:	linux-fbdev@vger.kernel.org
 S:	Maintained
-F:	arch/arm/mach-omap2/prm*
+F:	drivers/video/fbdev/omap2/
+F:	Documentation/arm/OMAP/DSS
 
-OMAP AUDIO SUPPORT
-M:	Peter Ujfalusi <peter.ujfalusi@ti.com>
-M:	Jarkko Nikula <jarkko.nikula@bitmer.com>
-L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
+OMAP FRAMEBUFFER SUPPORT
+M:	Tomi Valkeinen <tomi.valkeinen@ti.com>
+L:	linux-fbdev@vger.kernel.org
 L:	linux-omap@vger.kernel.org
 S:	Maintained
-F:	sound/soc/omap/
+F:	drivers/video/fbdev/omap/
 
 OMAP GENERAL PURPOSE MEMORY CONTROLLER SUPPORT
 M:	Roger Quadros <rogerq@ti.com>
@@ -9594,20 +9536,14 @@ S:	Maintained
 F:	drivers/memory/omap-gpmc.c
 F:	arch/arm/mach-omap2/*gpmc*
 
-OMAP FRAMEBUFFER SUPPORT
-M:	Tomi Valkeinen <tomi.valkeinen@ti.com>
-L:	linux-fbdev@vger.kernel.org
-L:	linux-omap@vger.kernel.org
-S:	Maintained
-F:	drivers/video/fbdev/omap/
-
-OMAP DISPLAY SUBSYSTEM and FRAMEBUFFER SUPPORT (DSS2)
-M:	Tomi Valkeinen <tomi.valkeinen@ti.com>
+OMAP GPIO DRIVER
+M:	Grygorii Strashko <grygorii.strashko@ti.com>
+M:	Santosh Shilimkar <ssantosh@kernel.org>
+M:	Kevin Hilman <khilman@kernel.org>
 L:	linux-omap@vger.kernel.org
-L:	linux-fbdev@vger.kernel.org
 S:	Maintained
-F:	drivers/video/fbdev/omap2/
-F:	Documentation/arm/OMAP/DSS
+F:	Documentation/devicetree/bindings/gpio/gpio-omap.txt
+F:	drivers/gpio/gpio-omap.c
 
 OMAP HARDWARE SPINLOCK SUPPORT
 M:	Ohad Ben-Cohen <ohad@wizery.com>
@@ -9615,30 +9551,12 @@ L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	drivers/hwspinlock/omap_hwspinlock.c
 
-OMAP MMC SUPPORT
-M:	Jarkko Lavinen <jarkko.lavinen@nokia.com>
-L:	linux-omap@vger.kernel.org
-S:	Maintained
-F:	drivers/mmc/host/omap.c
-
 OMAP HS MMC SUPPORT
 L:	linux-mmc@vger.kernel.org
 L:	linux-omap@vger.kernel.org
 S:	Orphan
 F:	drivers/mmc/host/omap_hsmmc.c
 
-OMAP RANDOM NUMBER GENERATOR SUPPORT
-M:	Deepak Saxena <dsaxena@plexity.net>
-S:	Maintained
-F:	drivers/char/hw_random/omap-rng.c
-
-OMAP HWMOD SUPPORT
-M:	Benoît Cousson <bcousson@baylibre.com>
-M:	Paul Walmsley <paul@pwsan.com>
-L:	linux-omap@vger.kernel.org
-S:	Maintained
-F:	arch/arm/mach-omap2/omap_hwmod.*
-
 OMAP HWMOD DATA
 M:	Paul Walmsley <paul@pwsan.com>
 L:	linux-omap@vger.kernel.org
@@ -9651,6 +9569,13 @@ L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	arch/arm/mach-omap2/omap_hwmod_44xx_data.c
 
+OMAP HWMOD SUPPORT
+M:	Benoît Cousson <bcousson@baylibre.com>
+M:	Paul Walmsley <paul@pwsan.com>
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	arch/arm/mach-omap2/omap_hwmod.*
+
 OMAP IMAGING SUBSYSTEM (OMAP3 ISP and OMAP4 ISS)
 M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:	linux-media@vger.kernel.org
@@ -9659,6 +9584,31 @@ F:	Documentation/devicetree/bindings/media/ti,omap3isp.txt
 F:	drivers/media/platform/omap3isp/
 F:	drivers/staging/media/omap4iss/
 
+OMAP MMC SUPPORT
+M:	Jarkko Lavinen <jarkko.lavinen@nokia.com>
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	drivers/mmc/host/omap.c
+
+OMAP POWER MANAGEMENT SUPPORT
+M:	Kevin Hilman <khilman@kernel.org>
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	arch/arm/*omap*/*pm*
+F:	drivers/cpufreq/omap-cpufreq.c
+
+OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT
+M:	Rajendra Nayak <rnayak@codeaurora.org>
+M:	Paul Walmsley <paul@pwsan.com>
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	arch/arm/mach-omap2/prm*
+
+OMAP RANDOM NUMBER GENERATOR SUPPORT
+M:	Deepak Saxena <dsaxena@plexity.net>
+S:	Maintained
+F:	drivers/char/hw_random/omap-rng.c
+
 OMAP USB SUPPORT
 L:	linux-usb@vger.kernel.org
 L:	linux-omap@vger.kernel.org
@@ -9666,21 +9616,57 @@ S:	Orphan
 F:	drivers/usb/*/*omap*
 F:	arch/arm/*omap*/usb*
 
-OMAP GPIO DRIVER
-M:	Grygorii Strashko <grygorii.strashko@ti.com>
-M:	Santosh Shilimkar <ssantosh@kernel.org>
-M:	Kevin Hilman <khilman@kernel.org>
-L:	linux-omap@vger.kernel.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/gpio/gpio-omap.txt
-F:	drivers/gpio/gpio-omap.c
-
 OMAP/NEWFLOW NANOBONE MACHINE SUPPORT
 M:	Mark Jackson <mpfj@newflow.co.uk>
 L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	arch/arm/boot/dts/am335x-nano.dts
 
+OMAP1 SUPPORT
+M:	Aaro Koskinen <aaro.koskinen@iki.fi>
+M:	Tony Lindgren <tony@atomide.com>
+L:	linux-omap@vger.kernel.org
+Q:	http://patchwork.kernel.org/project/linux-omap/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
+S:	Maintained
+F:	arch/arm/mach-omap1/
+F:	arch/arm/plat-omap/
+F:	arch/arm/configs/omap1_defconfig
+F:	drivers/i2c/busses/i2c-omap.c
+F:	include/linux/i2c-omap.h
+
+OMAP2+ SUPPORT
+M:	Tony Lindgren <tony@atomide.com>
+L:	linux-omap@vger.kernel.org
+W:	http://www.muru.com/linux/omap/
+W:	http://linux.omap.com/
+Q:	http://patchwork.kernel.org/project/linux-omap/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
+S:	Maintained
+F:	arch/arm/mach-omap2/
+F:	arch/arm/plat-omap/
+F:	arch/arm/configs/omap2plus_defconfig
+F:	drivers/i2c/busses/i2c-omap.c
+F:	drivers/irqchip/irq-omap-intc.c
+F:	drivers/mfd/*omap*.c
+F:	drivers/mfd/menelaus.c
+F:	drivers/mfd/palmas.c
+F:	drivers/mfd/tps65217.c
+F:	drivers/mfd/tps65218.c
+F:	drivers/mfd/tps65910.c
+F:	drivers/mfd/twl-core.[ch]
+F:	drivers/mfd/twl4030*.c
+F:	drivers/mfd/twl6030*.c
+F:	drivers/mfd/twl6040*.c
+F:	drivers/regulator/palmas-regulator*.c
+F:	drivers/regulator/pbias-regulator.c
+F:	drivers/regulator/tps65217-regulator.c
+F:	drivers/regulator/tps65218-regulator.c
+F:	drivers/regulator/tps65910-regulator.c
+F:	drivers/regulator/twl-regulator.c
+F:	drivers/regulator/twl6030-regulator.c
+F:	include/linux/i2c-omap.h
+
 OMFS FILESYSTEM
 M:	Bob Copeland <me@bobcopeland.com>
 L:	linux-karma-devel@lists.sourceforge.net
@@ -9700,6 +9686,13 @@ M:	Harald Welte <laforge@gnumonks.org>
 S:	Maintained
 F:	drivers/char/pcmcia/cm4040_cs.*
 
+OMNIVISION OV13858 SENSOR DRIVER
+M:	Sakari Ailus <sakari.ailus@linux.intel.com>
+L:	linux-media@vger.kernel.org
+T:	git git://linuxtv.org/media_tree.git
+S:	Maintained
+F:	drivers/media/i2c/ov13858.c
+
 OMNIVISION OV5640 SENSOR DRIVER
 M:	Steve Longerbeam <slongerbeam@gmail.com>
 L:	linux-media@vger.kernel.org
@@ -9722,13 +9715,6 @@ S:	Maintained
 F:	drivers/media/i2c/ov7670.c
 F:	Documentation/devicetree/bindings/media/i2c/ov7670.txt
 
-OMNIVISION OV13858 SENSOR DRIVER
-M:	Sakari Ailus <sakari.ailus@linux.intel.com>
-L:	linux-media@vger.kernel.org
-T:	git git://linuxtv.org/media_tree.git
-S:	Maintained
-F:	drivers/media/i2c/ov13858.c
-
 ONENAND FLASH DRIVER
 M:	Kyungmin Park <kyungmin.park@samsung.com>
 L:	linux-mtd@lists.infradead.org
@@ -9758,6 +9744,15 @@ L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/ulp/opa_vnic
 
+OPEN FIRMWARE AND DEVICE TREE OVERLAYS
+M:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+L:	devicetree@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/dynamic-resolution-notes.txt
+F:	Documentation/devicetree/overlay-notes.txt
+F:	drivers/of/overlay.c
+F:	drivers/of/resolver.c
+
 OPEN FIRMWARE AND FLATTENED DEVICE TREE
 M:	Rob Herring <robh+dt@kernel.org>
 M:	Frank Rowand <frowand.list@gmail.com>
@@ -9781,15 +9776,6 @@ F:	Documentation/devicetree/
 F:	arch/*/boot/dts/
 F:	include/dt-bindings/
 
-OPEN FIRMWARE AND DEVICE TREE OVERLAYS
-M:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
-L:	devicetree@vger.kernel.org
-S:	Maintained
-F:	Documentation/devicetree/dynamic-resolution-notes.txt
-F:	Documentation/devicetree/overlay-notes.txt
-F:	drivers/of/overlay.c
-F:	drivers/of/resolver.c
-
 OPENCORES I2C BUS DRIVER
 M:	Peter Korsgaard <jacmet@sunsite.dk>
 L:	linux-i2c@vger.kernel.org
@@ -9934,11 +9920,11 @@ F:	Documentation/mn10300/
 F:	arch/mn10300/
 
 PARALLEL LCD/KEYPAD PANEL DRIVER
-M:      Willy Tarreau <willy@haproxy.com>
-M:      Ksenija Stanojevic <ksenija.stanojevic@gmail.com>
-S:      Odd Fixes
-F:      Documentation/misc-devices/lcd-panel-cgram.txt
-F:      drivers/misc/panel.c
+M:	Willy Tarreau <willy@haproxy.com>
+M:	Ksenija Stanojevic <ksenija.stanojevic@gmail.com>
+S:	Odd Fixes
+F:	Documentation/misc-devices/lcd-panel-cgram.txt
+F:	drivers/misc/panel.c
 
 PARALLEL PORT SUBSYSTEM
 M:	Sudip Mukherjee <sudipm.mukherjee@gmail.com>
@@ -10034,42 +10020,13 @@ M:	Khalid Aziz <khalid@gonehiking.org>
 S:	Maintained
 F:	drivers/firmware/pcdp.*
 
-PCI ERROR RECOVERY
-M:	Linas Vepstas <linasvepstas@gmail.com>
-L:	linux-pci@vger.kernel.org
-S:	Supported
-F:	Documentation/PCI/pci-error-recovery.txt
-
-PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
-M:	Russell Currey <ruscur@russell.cc>
-L:	linuxppc-dev@lists.ozlabs.org
-S:	Supported
-F:	Documentation/powerpc/eeh-pci-error-recovery.txt
-F:	arch/powerpc/kernel/eeh*.c
-F:	arch/powerpc/platforms/*/eeh*.c
-F:	arch/powerpc/include/*/eeh*.h
-
-PCI SUBSYSTEM
-M:	Bjorn Helgaas <bhelgaas@google.com>
-L:	linux-pci@vger.kernel.org
-Q:	http://patchwork.ozlabs.org/project/linux-pci/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
-S:	Supported
-F:	Documentation/devicetree/bindings/pci/
-F:	Documentation/PCI/
-F:	drivers/pci/
-F:	include/linux/pci*
-F:	arch/x86/pci/
-F:	arch/x86/kernel/quirks.c
-
-PCI ENDPOINT SUBSYSTEM
-M:	Kishon Vijay Abraham I <kishon@ti.com>
+PCI DRIVER FOR AARDVARK (Marvell Armada 3700)
+M:	Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
 L:	linux-pci@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/pci-endpoint.git
-S:	Supported
-F:	drivers/pci/endpoint/
-F:	drivers/misc/pci_endpoint_test.c
-F:	tools/pci/
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/pci/aardvark-pci.txt
+F:	drivers/pci/host/pci-aardvark.c
 
 PCI DRIVER FOR ALTERA PCIE IP
 M:	Ley Foon Tan <lftan@altera.com>
@@ -10079,6 +10036,14 @@ S:	Supported
 F:	Documentation/devicetree/bindings/pci/altera-pcie.txt
 F:	drivers/pci/host/pcie-altera.c
 
+PCI DRIVER FOR APPLIEDMICRO XGENE
+M:	Tanmay Inamdar <tinamdar@apm.com>
+L:	linux-pci@vger.kernel.org
+L:	linux-arm-kernel@lists.infradead.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/pci/xgene-pci.txt
+F:	drivers/pci/host/pci-xgene.c
+
 PCI DRIVER FOR ARM VERSATILE PLATFORM
 M:	Rob Herring <robh@kernel.org>
 L:	linux-pci@vger.kernel.org
@@ -10095,14 +10060,6 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/pci/pci-armada8k.txt
 F:	drivers/pci/dwc/pcie-armada8k.c
 
-PCI DRIVER FOR APPLIEDMICRO XGENE
-M:	Tanmay Inamdar <tinamdar@apm.com>
-L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/pci/xgene-pci.txt
-F:	drivers/pci/host/pci-xgene.c
-
 PCI DRIVER FOR FREESCALE LAYERSCAPE
 M:	Minghuan Lian <minghuan.Lian@freescale.com>
 M:	Mingkai Hu <mingkai.hu@freescale.com>
@@ -10113,37 +10070,29 @@ L:	linux-arm-kernel@lists.infradead.org
 S:	Maintained
 F:	drivers/pci/dwc/*layerscape*
 
-PCI DRIVER FOR IMX6
-M:	Richard Zhu <hongxing.zhu@nxp.com>
-M:	Lucas Stach <l.stach@pengutronix.de>
-L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
-F:	drivers/pci/dwc/*imx6*
-
-PCI DRIVER FOR TI KEYSTONE
-M:	Murali Karicheri <m-karicheri2@ti.com>
-L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	drivers/pci/dwc/*keystone*
-
-PCI DRIVER FOR MVEBU (Marvell Armada 370 and Armada XP SOC support)
-M:	Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
-M:	Jason Cooper <jason@lakedaemon.net>
+PCI DRIVER FOR GENERIC OF HOSTS
+M:	Will Deacon <will.deacon@arm.com>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	drivers/pci/host/*mvebu*
+F:	Documentation/devicetree/bindings/pci/host-generic-pci.txt
+F:	drivers/pci/host/pci-host-common.c
+F:	drivers/pci/host/pci-host-generic.c
 
-PCI DRIVER FOR AARDVARK (Marvell Armada 3700)
-M:	Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+PCI DRIVER FOR IMX6
+M:	Richard Zhu <hongxing.zhu@nxp.com>
+M:	Lucas Stach <l.stach@pengutronix.de>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	Documentation/devicetree/bindings/pci/aardvark-pci.txt
-F:	drivers/pci/host/pci-aardvark.c
+F:	Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
+F:	drivers/pci/dwc/*imx6*
+
+PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
+M:	Keith Busch <keith.busch@intel.com>
+L:	linux-pci@vger.kernel.org
+S:	Supported
+F:	drivers/pci/host/vmd.c
 
 PCI DRIVER FOR MICROSEMI SWITCHTEC
 M:	Kurt Schwemmer <kurt.schwemmer@microsemi.com>
@@ -10156,6 +10105,14 @@ F:	Documentation/ABI/testing/sysfs-class-switchtec
 F:	drivers/pci/switch/switchtec*
 F:	include/uapi/linux/switchtec_ioctl.h
 
+PCI DRIVER FOR MVEBU (Marvell Armada 370 and Armada XP SOC support)
+M:	Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+M:	Jason Cooper <jason@lakedaemon.net>
+L:	linux-pci@vger.kernel.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/pci/host/*mvebu*
+
 PCI DRIVER FOR NVIDIA TEGRA
 M:	Thierry Reding <thierry.reding@gmail.com>
 L:	linux-tegra@vger.kernel.org
@@ -10164,14 +10121,6 @@ S:	Supported
 F:	Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
 F:	drivers/pci/host/pci-tegra.c
 
-PCI DRIVER FOR TI DRA7XX
-M:	Kishon Vijay Abraham I <kishon@ti.com>
-L:	linux-omap@vger.kernel.org
-L:	linux-pci@vger.kernel.org
-S:	Supported
-F:	Documentation/devicetree/bindings/pci/ti-pci.txt
-F:	drivers/pci/dwc/pci-dra7xx.c
-
 PCI DRIVER FOR RENESAS R-CAR
 M:	Simon Horman <horms@verge.net.au>
 L:	linux-pci@vger.kernel.org
@@ -10195,26 +10144,44 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/pci/designware-pcie.txt
 F:	drivers/pci/dwc/*designware*
 
-PCI DRIVER FOR GENERIC OF HOSTS
-M:	Will Deacon <will.deacon@arm.com>
+PCI DRIVER FOR TI DRA7XX
+M:	Kishon Vijay Abraham I <kishon@ti.com>
+L:	linux-omap@vger.kernel.org
+L:	linux-pci@vger.kernel.org
+S:	Supported
+F:	Documentation/devicetree/bindings/pci/ti-pci.txt
+F:	drivers/pci/dwc/pci-dra7xx.c
+
+PCI DRIVER FOR TI KEYSTONE
+M:	Murali Karicheri <m-karicheri2@ti.com>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	Documentation/devicetree/bindings/pci/host-generic-pci.txt
-F:	drivers/pci/host/pci-host-common.c
-F:	drivers/pci/host/pci-host-generic.c
+F:	drivers/pci/dwc/*keystone*
 
-PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
-M:	Keith Busch <keith.busch@intel.com>
+PCI ENDPOINT SUBSYSTEM
+M:	Kishon Vijay Abraham I <kishon@ti.com>
 L:	linux-pci@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/pci-endpoint.git
 S:	Supported
-F:	drivers/pci/host/vmd.c
+F:	drivers/pci/endpoint/
+F:	drivers/misc/pci_endpoint_test.c
+F:	tools/pci/
 
-PCIE DRIVER FOR ST SPEAR13XX
-M:	Pratyush Anand <pratyush.anand@gmail.com>
+PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
+M:	Russell Currey <ruscur@russell.cc>
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Supported
+F:	Documentation/powerpc/eeh-pci-error-recovery.txt
+F:	arch/powerpc/kernel/eeh*.c
+F:	arch/powerpc/platforms/*/eeh*.c
+F:	arch/powerpc/include/*/eeh*.h
+
+PCI ERROR RECOVERY
+M:	Linas Vepstas <linasvepstas@gmail.com>
 L:	linux-pci@vger.kernel.org
-S:	Maintained
-F:	drivers/pci/dwc/*spear*
+S:	Supported
+F:	Documentation/PCI/pci-error-recovery.txt
 
 PCI MSI DRIVER FOR ALTERA MSI IP
 M:	Ley Foon Tan <lftan@altera.com>
@@ -10232,6 +10199,19 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/pci/xgene-pci-msi.txt
 F:	drivers/pci/host/pci-xgene-msi.c
 
+PCI SUBSYSTEM
+M:	Bjorn Helgaas <bhelgaas@google.com>
+L:	linux-pci@vger.kernel.org
+Q:	http://patchwork.ozlabs.org/project/linux-pci/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
+S:	Supported
+F:	Documentation/devicetree/bindings/pci/
+F:	Documentation/PCI/
+F:	drivers/pci/
+F:	include/linux/pci*
+F:	arch/x86/pci/
+F:	arch/x86/kernel/quirks.c
+
 PCIE DRIVER FOR AXIS ARTPEC
 M:	Niklas Cassel <niklas.cassel@axis.com>
 M:	Jesper Nilsson <jesper.nilsson@axis.com>
@@ -10241,6 +10221,14 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/pci/axis,artpec*
 F:	drivers/pci/dwc/*artpec*
 
+PCIE DRIVER FOR CAVIUM THUNDERX
+M:	David Daney <david.daney@cavium.com>
+L:	linux-pci@vger.kernel.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Supported
+F:	Documentation/devicetree/bindings/pci/pci-thunder-*
+F:	drivers/pci/host/pci-thunder-*
+
 PCIE DRIVER FOR HISILICON
 M:	Zhou Wang <wangzhou1@hisilicon.com>
 M:	Gabriele Paoloni <gabriele.paoloni@huawei.com>
@@ -10257,6 +10245,21 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/pci/pcie-kirin.txt
 F:	drivers/pci/dwc/pcie-kirin.c
 
+PCIE DRIVER FOR MEDIATEK
+M:	Ryder Lee <ryder.lee@mediatek.com>
+L:	linux-pci@vger.kernel.org
+L:	linux-mediatek@lists.infradead.org
+S:	Supported
+F:	Documentation/devicetree/bindings/pci/mediatek*
+F:	drivers/pci/host/*mediatek*
+
+PCIE DRIVER FOR QUALCOMM MSM
+M:	Stanimir Varbanov <svarbanov@mm-sol.com>
+L:	linux-pci@vger.kernel.org
+L:	linux-arm-msm@vger.kernel.org
+S:	Maintained
+F:	drivers/pci/dwc/*qcom*
+
 PCIE DRIVER FOR ROCKCHIP
 M:	Shawn Lin <shawn.lin@rock-chips.com>
 L:	linux-pci@vger.kernel.org
@@ -10265,28 +10268,11 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/pci/rockchip-pcie.txt
 F:	drivers/pci/host/pcie-rockchip.c
 
-PCIE DRIVER FOR QUALCOMM MSM
-M:     Stanimir Varbanov <svarbanov@mm-sol.com>
-L:     linux-pci@vger.kernel.org
-L:     linux-arm-msm@vger.kernel.org
-S:     Maintained
-F:     drivers/pci/dwc/*qcom*
-
-PCIE DRIVER FOR CAVIUM THUNDERX
-M:	David Daney <david.daney@cavium.com>
+PCIE DRIVER FOR ST SPEAR13XX
+M:	Pratyush Anand <pratyush.anand@gmail.com>
 L:	linux-pci@vger.kernel.org
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Supported
-F:	Documentation/devicetree/bindings/pci/pci-thunder-*
-F:	drivers/pci/host/pci-thunder-*
-
-PCIE DRIVER FOR MEDIATEK
-M:      Ryder Lee <ryder.lee@mediatek.com>
-L:      linux-pci@vger.kernel.org
-L:      linux-mediatek@lists.infradead.org
-S:      Supported
-F:      Documentation/devicetree/bindings/pci/mediatek*
-F:      drivers/pci/host/*mediatek*
+S:	Maintained
+F:	drivers/pci/dwc/*spear*
 
 PCMCIA SUBSYSTEM
 P:	Linux PCMCIA Team
@@ -10455,14 +10441,14 @@ S:	Maintained
 F:	drivers/pinctrl/spear/
 
 PISTACHIO SOC SUPPORT
-M:      James Hartley <james.hartley@imgtec.com>
-M:      Ionela Voinescu <ionela.voinescu@imgtec.com>
-L:      linux-mips@linux-mips.org
-S:      Maintained
-F:      arch/mips/pistachio/
-F:      arch/mips/include/asm/mach-pistachio/
-F:      arch/mips/boot/dts/img/pistachio*
-F:      arch/mips/configs/pistachio*_defconfig
+M:	James Hartley <james.hartley@imgtec.com>
+M:	Ionela Voinescu <ionela.voinescu@imgtec.com>
+L:	linux-mips@linux-mips.org
+S:	Maintained
+F:	arch/mips/pistachio/
+F:	arch/mips/include/asm/mach-pistachio/
+F:	arch/mips/boot/dts/img/pistachio*
+F:	arch/mips/configs/pistachio*_defconfig
 
 PKTCDVD DRIVER
 S:	Orphan
@@ -10531,15 +10517,6 @@ F:	include/linux/pm_*
 F:	include/linux/powercap.h
 F:	drivers/powercap/
 
-POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
-M:	Sebastian Reichel <sre@kernel.org>
-L:	linux-pm@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
-S:	Maintained
-F:	Documentation/devicetree/bindings/power/supply/
-F:	include/linux/power_supply.h
-F:	drivers/power/supply/
-
 POWER STATE COORDINATION INTERFACE (PSCI)
 M:	Mark Rutland <mark.rutland@arm.com>
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
@@ -10549,18 +10526,21 @@ F:	drivers/firmware/psci*.c
 F:	include/linux/psci.h
 F:	include/uapi/linux/psci.h
 
+POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
+M:	Sebastian Reichel <sre@kernel.org>
+L:	linux-pm@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
+S:	Maintained
+F:	Documentation/devicetree/bindings/power/supply/
+F:	include/linux/power_supply.h
+F:	drivers/power/supply/
+
 POWERNV OPERATOR PANEL LCD DISPLAY DRIVER
 M:	Suraj Jitindar Singh <sjitindarsingh@gmail.com>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/char/powernv-op-panel.c
 
-PPP PROTOCOL DRIVERS AND COMPRESSORS
-M:	Paul Mackerras <paulus@samba.org>
-L:	linux-ppp@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ppp/ppp_*
-
 PPP OVER ATM (RFC 2364)
 M:	Mitchell Blank Jr <mitch@sfgoth.com>
 S:	Maintained
@@ -10580,6 +10560,12 @@ F:	net/l2tp/l2tp_ppp.c
 F:	include/linux/if_pppol2tp.h
 F:	include/uapi/linux/if_pppol2tp.h
 
+PPP PROTOCOL DRIVERS AND COMPRESSORS
+M:	Paul Mackerras <paulus@samba.org>
+L:	linux-ppp@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ppp/ppp_*
+
 PPS SUPPORT
 M:	Rodolfo Giometti <giometti@enneenne.com>
 W:	http://wiki.enneenne.com/index.php/LinuxPPS_support
@@ -10754,6 +10740,20 @@ F:	include/linux/pwm_backlight.h
 F:	drivers/gpio/gpio-mvebu.c
 F:	Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
 
+PXA GPIO DRIVER
+M:	Robert Jarzmik <robert.jarzmik@free.fr>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+F:	drivers/gpio/gpio-pxa.c
+
+PXA MMCI DRIVER
+S:	Orphan
+
+PXA RTC DRIVER
+M:	Robert Jarzmik <robert.jarzmik@free.fr>
+L:	linux-rtc@vger.kernel.org
+S:	Maintained
+
 PXA2xx/PXA3xx SUPPORT
 M:	Daniel Mack <daniel@zonque.org>
 M:	Haojian Zhuang <haojian.zhuang@gmail.com>
@@ -10773,26 +10773,12 @@ F:	include/sound/pxa2xx-lib.h
 F:	sound/arm/pxa*
 F:	sound/soc/pxa/
 
-PXA GPIO DRIVER
-M:	Robert Jarzmik <robert.jarzmik@free.fr>
-L:	linux-gpio@vger.kernel.org
-S:	Maintained
-F:	drivers/gpio/gpio-pxa.c
-
 PXA3xx NAND FLASH DRIVER
 M:	Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
 L:	linux-mtd@lists.infradead.org
 S:	Maintained
 F:	drivers/mtd/nand/pxa3xx_nand.c
 
-PXA MMCI DRIVER
-S:	Orphan
-
-PXA RTC DRIVER
-M:	Robert Jarzmik <robert.jarzmik@free.fr>
-L:	linux-rtc@vger.kernel.org
-S:	Maintained
-
 QAT DRIVER
 M:	Giovanni Cabiddu <giovanni.cabiddu@intel.com>
 M:	Salvatore Benedetto <salvatore.benedetto@intel.com>
@@ -10820,6 +10806,36 @@ L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/qib/
 
+QLOGIC QL41xxx FCOE DRIVER
+M:	QLogic-Storage-Upstream@cavium.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/qedf/
+
+QLOGIC QL41xxx ISCSI DRIVER
+M:	QLogic-Storage-Upstream@cavium.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/qedi/
+
+QLOGIC QL4xxx ETHERNET DRIVER
+M:	Yuval Mintz <Yuval.Mintz@cavium.com>
+M:	Ariel Elior <Ariel.Elior@cavium.com>
+M:	everest-linux-l2@cavium.com
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/qlogic/qed/
+F:	include/linux/qed/
+F:	drivers/net/ethernet/qlogic/qede/
+
+QLOGIC QL4xxx RDMA DRIVER
+M:	Ram Amrani <Ram.Amrani@cavium.com>
+M:	Ariel Elior <Ariel.Elior@cavium.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/hw/qedr/
+F:	include/uapi/rdma/qedr-abi.h
+
 QLOGIC QLA1280 SCSI DRIVER
 M:	Michael Reed <mdr@sgi.com>
 L:	linux-scsi@vger.kernel.org
@@ -10833,13 +10849,6 @@ S:	Supported
 F:	Documentation/scsi/LICENSE.qla2xxx
 F:	drivers/scsi/qla2xxx/
 
-QLOGIC QLA4XXX iSCSI DRIVER
-M:	QLogic-Storage-Upstream@qlogic.com
-L:	linux-scsi@vger.kernel.org
-S:	Supported
-F:	Documentation/scsi/LICENSE.qla4xxx
-F:	drivers/scsi/qla4xxx/
-
 QLOGIC QLA3XXX NETWORK DRIVER
 M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
@@ -10847,51 +10856,28 @@ S:	Supported
 F:	Documentation/networking/LICENSE.qla3xxx
 F:	drivers/net/ethernet/qlogic/qla3xxx.*
 
-QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
-M:	Harish Patil <harish.patil@cavium.com>
-M:	Manish Chopra <manish.chopra@cavium.com>
-M:	Dept-GELinuxNICDev@cavium.com
-L:	netdev@vger.kernel.org
+QLOGIC QLA4XXX iSCSI DRIVER
+M:	QLogic-Storage-Upstream@qlogic.com
+L:	linux-scsi@vger.kernel.org
 S:	Supported
-F:	drivers/net/ethernet/qlogic/qlcnic/
+F:	Documentation/scsi/LICENSE.qla4xxx
+F:	drivers/scsi/qla4xxx/
 
-QLOGIC QLGE 10Gb ETHERNET DRIVER
+QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
 M:	Harish Patil <harish.patil@cavium.com>
 M:	Manish Chopra <manish.chopra@cavium.com>
 M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	drivers/net/ethernet/qlogic/qlge/
-
-QLOGIC QL4xxx ETHERNET DRIVER
-M:	Yuval Mintz <Yuval.Mintz@cavium.com>
-M:	Ariel Elior <Ariel.Elior@cavium.com>
-M:	everest-linux-l2@cavium.com
-L:	netdev@vger.kernel.org
-S:	Supported
-F:	drivers/net/ethernet/qlogic/qed/
-F:	include/linux/qed/
-F:	drivers/net/ethernet/qlogic/qede/
-
-QLOGIC QL41xxx ISCSI DRIVER
-M:	QLogic-Storage-Upstream@cavium.com
-L:	linux-scsi@vger.kernel.org
-S:	Supported
-F:	drivers/scsi/qedi/
-
-QLOGIC QL41xxx FCOE DRIVER
-M:	QLogic-Storage-Upstream@cavium.com
-L:	linux-scsi@vger.kernel.org
-S:	Supported
-F:	drivers/scsi/qedf/
+F:	drivers/net/ethernet/qlogic/qlcnic/
 
-QLOGIC QL4xxx RDMA DRIVER
-M:	Ram Amrani <Ram.Amrani@cavium.com>
-M:	Ariel Elior <Ariel.Elior@cavium.com>
-L:	linux-rdma@vger.kernel.org
+QLOGIC QLGE 10Gb ETHERNET DRIVER
+M:	Harish Patil <harish.patil@cavium.com>
+M:	Manish Chopra <manish.chopra@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
+L:	netdev@vger.kernel.org
 S:	Supported
-F:	drivers/infiniband/hw/qedr/
-F:	include/uapi/rdma/qedr-abi.h
+F:	drivers/net/ethernet/qlogic/qlge/
 
 QNX4 FILESYSTEM
 M:	Anders Larsen <al@alarsen.net>
@@ -10919,13 +10905,6 @@ T:	git git://linuxtv.org/anttip/media_tree.git
 S:	Maintained
 F:	drivers/media/tuners/qt1010*
 
-QUALCOMM ATHEROS ATH9K WIRELESS DRIVER
-M:	QCA ath9k Development <ath9k-devel@qca.qualcomm.com>
-L:	linux-wireless@vger.kernel.org
-W:	http://wireless.kernel.org/en/users/Drivers/ath9k
-S:	Supported
-F:	drivers/net/wireless/ath/ath9k/
-
 QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
 M:	Kalle Valo <kvalo@qca.qualcomm.com>
 L:	ath10k@lists.infradead.org
@@ -10934,6 +10913,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
 S:	Supported
 F:	drivers/net/wireless/ath/ath10k/
 
+QUALCOMM ATHEROS ATH9K WIRELESS DRIVER
+M:	QCA ath9k Development <ath9k-devel@qca.qualcomm.com>
+L:	linux-wireless@vger.kernel.org
+W:	http://wireless.kernel.org/en/users/Drivers/ath9k
+S:	Supported
+F:	drivers/net/wireless/ath/ath9k/
+
 QUALCOMM EMAC GIGABIT ETHERNET DRIVER
 M:	Timur Tabi <timur@codeaurora.org>
 L:	netdev@vger.kernel.org
@@ -10964,12 +10950,23 @@ S:	Supported
 F:	drivers/net/wireless/ath/wcn36xx/
 
 QUANTENNA QTNFMAC WIRELESS DRIVER
-M:   Igor Mitsyanko <imitsyanko@quantenna.com>
-M:   Avinash Patil <avinashp@quantenna.com>
-M:   Sergey Matyukevich <smatyukevich@quantenna.com>
-L:   linux-wireless@vger.kernel.org
-S:   Maintained
-F:   drivers/net/wireless/quantenna
+M:	Igor Mitsyanko <imitsyanko@quantenna.com>
+M:	Avinash Patil <avinashp@quantenna.com>
+M:	Sergey Matyukevich <smatyukevich@quantenna.com>
+L:	linux-wireless@vger.kernel.org
+S:	Maintained
+F:	drivers/net/wireless/quantenna
+
+RADEON and AMDGPU DRM DRIVERS
+M:	Alex Deucher <alexander.deucher@amd.com>
+M:	Christian König <christian.koenig@amd.com>
+L:	amd-gfx@lists.freedesktop.org
+T:	git git://people.freedesktop.org/~agd5f/linux
+S:	Supported
+F:	drivers/gpu/drm/radeon/
+F:	include/uapi/drm/radeon_drm.h
+F:	drivers/gpu/drm/amd/
+F:	include/uapi/drm/amdgpu_drm.h
 
 RADEON FRAMEBUFFER DISPLAY DRIVER
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
@@ -11238,16 +11235,16 @@ S:	Maintained
 F:	lib/rhashtable.c
 F:	include/linux/rhashtable.h
 
-RICOH SMARTMEDIA/XD DRIVER
+RICOH R5C592 MEMORYSTICK DRIVER
 M:	Maxim Levitsky <maximlevitsky@gmail.com>
 S:	Maintained
-F:	drivers/mtd/nand/r852.c
-F:	drivers/mtd/nand/r852.h
+F:	drivers/memstick/host/r592.*
 
-RICOH R5C592 MEMORYSTICK DRIVER
+RICOH SMARTMEDIA/XD DRIVER
 M:	Maxim Levitsky <maximlevitsky@gmail.com>
 S:	Maintained
-F:	drivers/memstick/host/r592.*
+F:	drivers/mtd/nand/r852.c
+F:	drivers/mtd/nand/r852.h
 
 ROCCAT DRIVERS
 M:	Stefan Achatz <erazor_de@users.sourceforge.net>
@@ -11384,6 +11381,23 @@ S:	Supported
 F:	drivers/s390/block/dasd*
 F:	block/partitions/ibm.c
 
+S390 IOMMU (PCI)
+M:	Gerald Schaefer <gerald.schaefer@de.ibm.com>
+L:	linux-s390@vger.kernel.org
+W:	http://www.ibm.com/developerworks/linux/linux390/
+S:	Supported
+F:	drivers/iommu/s390-iommu.c
+
+S390 IUCV NETWORK LAYER
+M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
+M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
+L:	linux-s390@vger.kernel.org
+W:	http://www.ibm.com/developerworks/linux/linux390/
+S:	Supported
+F:	drivers/s390/net/*iucv*
+F:	include/net/iucv/
+F:	net/iucv/
+
 S390 NETWORK DRIVERS
 M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
 M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
@@ -11401,6 +11415,16 @@ S:	Supported
 F:	arch/s390/pci/
 F:	drivers/pci/hotplug/s390_pci_hpc.c
 
+S390 VFIO-CCW DRIVER
+M:	Cornelia Huck <cohuck@redhat.com>
+M:	Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+L:	linux-s390@vger.kernel.org
+L:	kvm@vger.kernel.org
+S:	Supported
+F:	drivers/s390/cio/vfio_ccw*
+F:	Documentation/s390/vfio-ccw.txt
+F:	include/uapi/linux/vfio_ccw.h
+
 S390 ZCRYPT DRIVER
 M:	Harald Freudenberger <freude@de.ibm.com>
 L:	linux-s390@vger.kernel.org
@@ -11415,33 +11439,6 @@ W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 F:	drivers/s390/scsi/zfcp_*
 
-S390 IUCV NETWORK LAYER
-M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
-M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
-L:	linux-s390@vger.kernel.org
-W:	http://www.ibm.com/developerworks/linux/linux390/
-S:	Supported
-F:	drivers/s390/net/*iucv*
-F:	include/net/iucv/
-F:	net/iucv/
-
-S390 IOMMU (PCI)
-M:	Gerald Schaefer <gerald.schaefer@de.ibm.com>
-L:	linux-s390@vger.kernel.org
-W:	http://www.ibm.com/developerworks/linux/linux390/
-S:	Supported
-F:	drivers/iommu/s390-iommu.c
-
-S390 VFIO-CCW DRIVER
-M:	Cornelia Huck <cohuck@redhat.com>
-M:	Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
-L:	linux-s390@vger.kernel.org
-L:	kvm@vger.kernel.org
-S:	Supported
-F:	drivers/s390/cio/vfio_ccw*
-F:	Documentation/s390/vfio-ccw.txt
-F:	include/uapi/linux/vfio_ccw.h
-
 S3C24XX SD/MMC Driver
 M:	Ben Dooks <ben-linux@fluff.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -11475,12 +11472,6 @@ F:	drivers/media/common/saa7146/
 F:	drivers/media/pci/saa7146/
 F:	include/media/saa7146*
 
-SAMSUNG LAPTOP DRIVER
-M:	Corentin Chary <corentin.chary@gmail.com>
-L:	platform-driver-x86@vger.kernel.org
-S:	Maintained
-F:	drivers/platform/x86/samsung-laptop.c
-
 SAMSUNG AUDIO (ASoC) DRIVERS
 M:	Krzysztof Kozlowski <krzk@kernel.org>
 M:	Sangbeom Kim <sbkim73@samsung.com>
@@ -11503,6 +11494,12 @@ L:	linux-fbdev@vger.kernel.org
 S:	Maintained
 F:	drivers/video/fbdev/s3c-fb.c
 
+SAMSUNG LAPTOP DRIVER
+M:	Corentin Chary <corentin.chary@gmail.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	drivers/platform/x86/samsung-laptop.c
+
 SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
 M:	Sangbeom Kim <sbkim73@samsung.com>
 M:	Krzysztof Kozlowski <krzk@kernel.org>
@@ -11521,22 +11518,6 @@ F:	Documentation/devicetree/bindings/regulator/samsung,s2m*.txt
 F:	Documentation/devicetree/bindings/regulator/samsung,s5m*.txt
 F:	Documentation/devicetree/bindings/clock/samsung,s2mps11.txt
 
-SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M:	Krzysztof Kozlowski <krzk@kernel.org>
-M:	Vladimir Zapolskiy <vz@mleia.com>
-L:	linux-crypto@vger.kernel.org
-L:	linux-samsung-soc@vger.kernel.org
-S:	Maintained
-F:	drivers/crypto/s5p-sss.c
-
-SAMSUNG S5P/EXYNOS4 SOC SERIES CAMERA SUBSYSTEM DRIVERS
-M:	Kyungmin Park <kyungmin.park@samsung.com>
-M:	Sylwester Nawrocki <s.nawrocki@samsung.com>
-L:	linux-media@vger.kernel.org
-Q:	https://patchwork.linuxtv.org/project/linux-media/list/
-S:	Supported
-F:	drivers/media/platform/exynos4-is/
-
 SAMSUNG S3C24XX/S3C64XX SOC SERIES CAMIF DRIVER
 M:	Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
 L:	linux-media@vger.kernel.org
@@ -11545,6 +11526,13 @@ S:	Maintained
 F:	drivers/media/platform/s3c-camif/
 F:	include/media/drv-intf/s3c_camif.h
 
+SAMSUNG S3FWRN5 NFC DRIVER
+M:	Robert Baldyga <r.baldyga@samsung.com>
+M:	Krzysztof Opasiak <k.opasiak@samsung.com>
+L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+S:	Supported
+F:	drivers/nfc/s3fwrn5
+
 SAMSUNG S5C73M3 CAMERA DRIVER
 M:	Kyungmin Park <kyungmin.park@samsung.com>
 M:	Andrzej Hajda <a.hajda@samsung.com>
@@ -11559,12 +11547,21 @@ L:	linux-media@vger.kernel.org
 S:	Supported
 F:	drivers/media/i2c/s5k5baf.c
 
-SAMSUNG S3FWRN5 NFC DRIVER
-M:	Robert Baldyga <r.baldyga@samsung.com>
-M:	Krzysztof Opasiak <k.opasiak@samsung.com>
-L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+SAMSUNG S5P Security SubSystem (SSS) DRIVER
+M:	Krzysztof Kozlowski <krzk@kernel.org>
+M:	Vladimir Zapolskiy <vz@mleia.com>
+L:	linux-crypto@vger.kernel.org
+L:	linux-samsung-soc@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/s5p-sss.c
+
+SAMSUNG S5P/EXYNOS4 SOC SERIES CAMERA SUBSYSTEM DRIVERS
+M:	Kyungmin Park <kyungmin.park@samsung.com>
+M:	Sylwester Nawrocki <s.nawrocki@samsung.com>
+L:	linux-media@vger.kernel.org
+Q:	https://patchwork.linuxtv.org/project/linux-media/list/
 S:	Supported
-F:	drivers/nfc/s3fwrn5
+F:	drivers/media/platform/exynos4-is/
 
 SAMSUNG SOC CLOCK DRIVERS
 M:	Sylwester Nawrocki <s.nawrocki@samsung.com>
@@ -11744,6 +11741,13 @@ F:	Documentation/userspace-api/seccomp_filter.rst
 K:	\bsecure_computing
 K:	\bTIF_SECCOMP\b
 
+SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) Broadcom BRCMSTB DRIVER
+M:	Al Cooper <alcooperx@gmail.com>
+L:	linux-mmc@vger.kernel.org
+L:	bcm-kernel-feedback-list@broadcom.com
+S:	Maintained
+F:	drivers/mmc/host/sdhci-brcmstb*
+
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
 M:	Adrian Hunter <adrian.hunter@intel.com>
 L:	linux-mmc@vger.kernel.org
@@ -11752,13 +11756,6 @@ S:	Maintained
 F:	drivers/mmc/host/sdhci*
 F:	include/linux/mmc/sdhci*
 
-SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) Broadcom BRCMSTB DRIVER
-M:	Al Cooper <alcooperx@gmail.com>
-L:	linux-mmc@vger.kernel.org
-L:	bcm-kernel-feedback-list@broadcom.com
-S:	Maintained
-F:	drivers/mmc/host/sdhci-brcmstb*
-
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) SAMSUNG DRIVER
 M:	Ben Dooks <ben-linux@fluff.org>
 M:	Jaehoon Chung <jh80.chung@samsung.com>
@@ -11783,6 +11780,10 @@ F:	block/opal_proto.h
 F:	include/linux/sed*
 F:	include/uapi/linux/sed*
 
+SECURITY CONTACT
+M:	Security Officers <security@kernel.org>
+S:	Supported
+
 SECURITY SUBSYSTEM
 M:	James Morris <james.l.morris@oracle.com>
 M:	"Serge E. Hallyn" <serge@hallyn.com>
@@ -11792,10 +11793,6 @@ W:	http://kernsec.org/
 S:	Supported
 F:	security/
 
-SECURITY CONTACT
-M:	Security Officers <security@kernel.org>
-S:	Supported
-
 SELINUX SECURITY MODULE
 M:	Paul Moore <paul@paul-moore.com>
 M:	Stephen Smalley <sds@tycho.nsa.gov>
@@ -11815,13 +11812,6 @@ S:	Maintained
 F:	drivers/misc/phantom.c
 F:	include/uapi/linux/phantom.h
 
-SERIAL DRIVERS
-M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L:	linux-serial@vger.kernel.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/serial/
-F:	drivers/tty/serial/
-
 SERIAL DEVICE BUS
 M:	Rob Herring <robh@kernel.org>
 L:	linux-serial@vger.kernel.org
@@ -11830,6 +11820,13 @@ F:	Documentation/devicetree/bindings/serial/slave-device.txt
 F:	drivers/tty/serdev/
 F:	include/linux/serdev.h
 
+SERIAL DRIVERS
+M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+L:	linux-serial@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/serial/
+F:	drivers/tty/serial/
+
 SERIAL IR RECEIVER
 M:	Sean Young <sean@mess.org>
 L:	linux-media@vger.kernel.org
@@ -11863,6 +11860,13 @@ M:	Robin Holt <robinmholt@gmail.com>
 S:	Maintained
 F:	drivers/misc/sgi-xp/
 
+SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
+M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
+L:	linux-s390@vger.kernel.org
+W:	http://www.ibm.com/developerworks/linux/linux390/
+S:	Supported
+F:	net/smc/
+
 SH_VEU V4L2 MEM2MEM DRIVER
 L:	linux-media@vger.kernel.org
 S:	Orphan
@@ -11874,13 +11878,6 @@ S:	Orphan
 F:	drivers/media/platform/sh_vou.c
 F:	include/media/drv-intf/sh_vou.h
 
-SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
-M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
-L:	linux-s390@vger.kernel.org
-W:	http://www.ibm.com/developerworks/linux/linux390/
-S:	Supported
-F:	net/smc/
-
 SI2157 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
@@ -12111,6 +12108,12 @@ S:	Supported
 F:	Documentation/hwmon/sch5627
 F:	drivers/hwmon/sch5627.c
 
+SMSC UFX6000 and UFX7000 USB to VGA DRIVER
+M:	Steve Glendinning <steve.glendinning@shawell.net>
+L:	linux-fbdev@vger.kernel.org
+S:	Maintained
+F:	drivers/video/fbdev/smscufx.c
+
 SMSC47B397 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-hwmon@vger.kernel.org
@@ -12131,12 +12134,6 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/smsc/smsc9420.*
 
-SMSC UFX6000 and UFX7000 USB to VGA DRIVER
-M:	Steve Glendinning <steve.glendinning@shawell.net>
-L:	linux-fbdev@vger.kernel.org
-S:	Maintained
-F:	drivers/video/fbdev/smscufx.c
-
 SOC-CAMERA V4L2 SUBSYSTEM
 M:	Guennadi Liakhovetski <g.liakhovetski@gmx.de>
 L:	linux-media@vger.kernel.org
@@ -12192,16 +12189,6 @@ S:	Maintained
 F:	drivers/ssb/
 F:	include/linux/ssb/
 
-SONY VAIO CONTROL DEVICE DRIVER
-M:	Mattia Dongili <malattia@linux.it>
-L:	platform-driver-x86@vger.kernel.org
-W:	http://www.linux.it/~malattia/wiki/index.php/Sony_drivers
-S:	Maintained
-F:	Documentation/laptops/sony-laptop.txt
-F:	drivers/char/sonypi.c
-F:	drivers/platform/x86/sony-laptop.c
-F:	include/linux/sony-laptop.h
-
 SONY MEMORYSTICK CARD SUPPORT
 M:	Alex Dubov <oakad@yahoo.com>
 W:	http://tifmxx.berlios.de/
@@ -12213,6 +12200,16 @@ M:	Maxim Levitsky <maximlevitsky@gmail.com>
 S:	Maintained
 F:	drivers/memstick/core/ms_block.*
 
+SONY VAIO CONTROL DEVICE DRIVER
+M:	Mattia Dongili <malattia@linux.it>
+L:	platform-driver-x86@vger.kernel.org
+W:	http://www.linux.it/~malattia/wiki/index.php/Sony_drivers
+S:	Maintained
+F:	Documentation/laptops/sony-laptop.txt
+F:	drivers/char/sonypi.c
+F:	drivers/platform/x86/sony-laptop.c
+F:	include/linux/sony-laptop.h
+
 SOUND
 M:	Jaroslav Kysela <perex@perex.cz>
 M:	Takashi Iwai <tiwai@suse.com>
@@ -12238,6 +12235,13 @@ F:	include/uapi/sound/compress_*
 F:	sound/core/compress_offload.c
 F:	sound/soc/soc-compress.c
 
+SOUND - DMAENGINE HELPERS
+M:	Lars-Peter Clausen <lars@metafoo.de>
+S:	Supported
+F:	include/sound/dmaengine_pcm.h
+F:	sound/core/pcm_dmaengine.c
+F:	sound/soc/soc-generic-dmaengine-pcm.c
+
 SOUND - SOC LAYER / DYNAMIC AUDIO POWER MANAGEMENT (ASoC)
 M:	Liam Girdwood <lgirdwood@gmail.com>
 M:	Mark Brown <broonie@kernel.org>
@@ -12247,15 +12251,8 @@ W:	http://alsa-project.org/main/index.php/ASoC
 S:	Supported
 F:	Documentation/devicetree/bindings/sound/
 F:	Documentation/sound/alsa/soc/
-F:	sound/soc/
-F:	include/sound/soc*
-
-SOUND - DMAENGINE HELPERS
-M:	Lars-Peter Clausen <lars@metafoo.de>
-S:	Supported
-F:	include/sound/dmaengine_pcm.h
-F:	sound/core/pcm_dmaengine.c
-F:	sound/soc/soc-generic-dmaengine-pcm.c
+F:	sound/soc/
+F:	include/sound/soc*
 
 SP2 MEDIA DRIVER
 M:	Olli Salonen <olli.salonen@iki.fi>
@@ -12299,21 +12296,21 @@ T:	git git://git.kernel.org/pub/scm/devel/sparse/chrisl/sparse.git
 S:	Maintained
 F:	include/linux/compiler.h
 
-SPEAR PLATFORM SUPPORT
+SPEAR CLOCK FRAMEWORK SUPPORT
 M:	Viresh Kumar <vireshk@kernel.org>
-M:	Shiraz Hashim <shiraz.linux.kernel@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.st.com/spear
 S:	Maintained
-F:	arch/arm/boot/dts/spear*
-F:	arch/arm/mach-spear/
+F:	drivers/clk/spear/
 
-SPEAR CLOCK FRAMEWORK SUPPORT
+SPEAR PLATFORM SUPPORT
 M:	Viresh Kumar <vireshk@kernel.org>
+M:	Shiraz Hashim <shiraz.linux.kernel@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.st.com/spear
 S:	Maintained
-F:	drivers/clk/spear/
+F:	arch/arm/boot/dts/spear*
+F:	arch/arm/mach-spear/
 
 SPI NOR SUBSYSTEM
 M:	Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
@@ -12384,13 +12381,6 @@ L:	stable@vger.kernel.org
 S:	Supported
 F:	Documentation/process/stable-kernel-rules.rst
 
-STAGING SUBSYSTEM
-M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
-L:	devel@driverdev.osuosl.org
-S:	Supported
-F:	drivers/staging/
-
 STAGING - COMEDI
 M:	Ian Abbott <abbotti@mev.co.uk>
 M:	H Hartley Sweeten <hsweeten@visionengravers.com>
@@ -12480,6 +12470,13 @@ M:	Arnaud Patard <arnaud.patard@rtp-net.org>
 S:	Odd Fixes
 F:	drivers/staging/xgifb/
 
+STAGING SUBSYSTEM
+M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
+L:	devel@driverdev.osuosl.org
+S:	Supported
+F:	drivers/staging/
+
 STARFIRE/DURALAN NETWORK DRIVER
 M:	Ion Badulescu <ionut@badula.org>
 S:	Odd Fixes
@@ -12988,39 +12985,6 @@ M:	Yehezkel Bernat <yehezkel.bernat@intel.com>
 S:	Maintained
 F:	drivers/thunderbolt/
 
-TI BQ27XXX POWER SUPPLY DRIVER
-R:	Andrew F. Davis <afd@ti.com>
-F:	include/linux/power/bq27xxx_battery.h
-F:	drivers/power/supply/bq27xxx_battery.c
-F:	drivers/power/supply/bq27xxx_battery_i2c.c
-
-TI DAVINCI MACHINE SUPPORT
-M:	Sekhar Nori <nsekhar@ti.com>
-M:	Kevin Hilman <khilman@kernel.org>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git
-S:	Supported
-F:	arch/arm/mach-davinci/
-F:	drivers/i2c/busses/i2c-davinci.c
-F:	arch/arm/boot/dts/da850*
-
-TI DAVINCI SERIES MEDIA DRIVER
-M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
-L:	linux-media@vger.kernel.org
-W:	https://linuxtv.org
-Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
-S:	Maintained
-F:	drivers/media/platform/davinci/
-F:	include/media/davinci/
-
-TI DAVINCI SERIES GPIO DRIVER
-M:	Keerthy <j-keerthy@ti.com>
-L:	linux-gpio@vger.kernel.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/gpio/gpio-davinci.txt
-F:	drivers/gpio/gpio-davinci.c
-
 TI AM437X VPFE DRIVER
 M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
 L:	linux-media@vger.kernel.org
@@ -13038,13 +13002,11 @@ L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	drivers/thermal/ti-soc-thermal/
 
-TI VPE/CAL DRIVERS
-M:	Benoit Parrot <bparrot@ti.com>
-L:	linux-media@vger.kernel.org
-W:	http://linuxtv.org/
-Q:	http://patchwork.linuxtv.org/project/linux-media/list/
-S:	Maintained
-F:	drivers/media/platform/ti-vpe/
+TI BQ27XXX POWER SUPPLY DRIVER
+R:	Andrew F. Davis <afd@ti.com>
+F:	include/linux/power/bq27xxx_battery.h
+F:	drivers/power/supply/bq27xxx_battery.c
+F:	drivers/power/supply/bq27xxx_battery_i2c.c
 
 TI CDCE706 CLOCK DRIVER
 M:	Max Filippov <jcmvbkbc@gmail.com>
@@ -13058,6 +13020,33 @@ S:	Maintained
 F:	drivers/clk/ti/
 F:	include/linux/clk/ti.h
 
+TI DAVINCI MACHINE SUPPORT
+M:	Sekhar Nori <nsekhar@ti.com>
+M:	Kevin Hilman <khilman@kernel.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git
+S:	Supported
+F:	arch/arm/mach-davinci/
+F:	drivers/i2c/busses/i2c-davinci.c
+F:	arch/arm/boot/dts/da850*
+
+TI DAVINCI SERIES GPIO DRIVER
+M:	Keerthy <j-keerthy@ti.com>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/gpio/gpio-davinci.txt
+F:	drivers/gpio/gpio-davinci.c
+
+TI DAVINCI SERIES MEDIA DRIVER
+M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
+L:	linux-media@vger.kernel.org
+W:	https://linuxtv.org
+Q:	http://patchwork.linuxtv.org/project/linux-media/list/
+T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
+S:	Maintained
+F:	drivers/media/platform/davinci/
+F:	include/media/davinci/
+
 TI ETHERNET SWITCH DRIVER (CPSW)
 R:	Grygorii Strashko <grygorii.strashko@ti.com>
 L:	linux-omap@vger.kernel.org
@@ -13139,6 +13128,14 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Maintained
 F:	sound/soc/codecs/twl4030*
 
+TI VPE/CAL DRIVERS
+M:	Benoit Parrot <bparrot@ti.com>
+L:	linux-media@vger.kernel.org
+W:	http://linuxtv.org/
+Q:	http://patchwork.linuxtv.org/project/linux-media/list/
+S:	Maintained
+F:	drivers/media/platform/ti-vpe/
+
 TI WILINK WIRELESS DRIVERS
 L:	linux-wireless@vger.kernel.org
 W:	http://wireless.kernel.org/en/users/Drivers/wl12xx
@@ -13267,12 +13264,6 @@ L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/toshiba_haps.c
 
-TOSHIBA WMI HOTKEYS DRIVER
-M:	Azael Avalos <coproscefalo@gmail.com>
-L:	platform-driver-x86@vger.kernel.org
-S:	Maintained
-F:	drivers/platform/x86/toshiba-wmi.c
-
 TOSHIBA SMM DRIVER
 M:	Jonathan Buzzard <jonathan@buzzard.org.uk>
 W:	http://www.buzzard.org.uk/toshiba/
@@ -13288,6 +13279,12 @@ S:	Maintained
 F:	drivers/media/i2c/tc358743*
 F:	include/media/i2c/tc358743.h
 
+TOSHIBA WMI HOTKEYS DRIVER
+M:	Azael Avalos <coproscefalo@gmail.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	drivers/platform/x86/toshiba-wmi.c
+
 TPM DEVICE DRIVER
 M:	Peter Huewe <peterhuewe@gmx.de>
 M:	Marcel Selhorst <tpmdd@selhorst.net>
@@ -13529,6 +13526,14 @@ F:	drivers/mtd/ubi/
 F:	include/linux/mtd/ubi.h
 F:	include/uapi/mtd/ubi-user.h
 
+USB "USBNET" DRIVER FRAMEWORK
+M:	Oliver Neukum <oneukum@suse.com>
+L:	netdev@vger.kernel.org
+W:	http://www.linux-usb.org/usbnet
+S:	Maintained
+F:	drivers/net/usb/usbnet.c
+F:	include/linux/usb/usbnet.h
+
 USB ACM DRIVER
 M:	Oliver Neukum <oneukum@suse.com>
 L:	linux-usb@vger.kernel.org
@@ -13752,14 +13757,6 @@ L:	linux-usb@vger.kernel.org
 S:	Maintained
 F:	drivers/usb/host/uhci*
 
-USB "USBNET" DRIVER FRAMEWORK
-M:	Oliver Neukum <oneukum@suse.com>
-L:	netdev@vger.kernel.org
-W:	http://www.linux-usb.org/usbnet
-S:	Maintained
-F:	drivers/net/usb/usbnet.c
-F:	include/linux/usb/usbnet.h
-
 USB VIDEO CLASS
 M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:	linux-uvc-devel@lists.sourceforge.net (subscribers-only)
@@ -13932,6 +13929,12 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/via/via-velocity.*
 
+VIDEO MULTIPLEXER DRIVER
+M:	Philipp Zabel <p.zabel@pengutronix.de>
+L:	linux-media@vger.kernel.org
+S:	Maintained
+F:	drivers/media/platform/video-mux.c
+
 VIDEOBUF2 FRAMEWORK
 M:	Pawel Osciak <pawel@osciak.com>
 M:	Marek Szyprowski <m.szyprowski@samsung.com>
@@ -13941,12 +13944,6 @@ S:	Maintained
 F:	drivers/media/v4l2-core/videobuf2-*
 F:	include/media/videobuf2-*
 
-VIDEO MULTIPLEXER DRIVER
-M:	Philipp Zabel <p.zabel@pengutronix.de>
-L:	linux-media@vger.kernel.org
-S:	Maintained
-F:	drivers/media/platform/video-mux.c
-
 VIMC VIRTUAL MEDIA CONTROLLER DRIVER
 M:	Helen Koike <helen.koike@collabora.com>
 L:	linux-media@vger.kernel.org
@@ -14000,6 +13997,14 @@ F:	include/linux/virtio*.h
 F:	include/uapi/linux/virtio_*.h
 F:	drivers/crypto/virtio/
 
+VIRTIO CRYPTO DRIVER
+M:	Gonglei <arei.gonglei@huawei.com>
+L:	virtualization@lists.linux-foundation.org
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/virtio/
+F:	include/uapi/linux/virtio_crypto.h
+
 VIRTIO DRIVERS FOR S390
 M:	Cornelia Huck <cohuck@redhat.com>
 M:	Halil Pasic <pasic@linux.vnet.ibm.com>
@@ -14036,14 +14041,6 @@ S:	Maintained
 F:	drivers/virtio/virtio_input.c
 F:	include/uapi/linux/virtio_input.h
 
-VIRTIO CRYPTO DRIVER
-M:  Gonglei <arei.gonglei@huawei.com>
-L:  virtualization@lists.linux-foundation.org
-L:  linux-crypto@vger.kernel.org
-S:  Maintained
-F:  drivers/crypto/virtio/
-F:  include/uapi/linux/virtio_crypto.h
-
 VIRTUAL SERIO DEVICE DRIVER
 M:	Stephen Chandler Paul <thatslyude@gmail.com>
 S:	Maintained
@@ -14077,12 +14074,6 @@ F:	drivers/staging/vme/
 F:	drivers/vme/
 F:	include/linux/vme*
 
-VMWARE HYPERVISOR INTERFACE
-M:	Alok Kataria <akataria@vmware.com>
-L:	virtualization@lists.linux-foundation.org
-S:	Supported
-F:	arch/x86/kernel/cpu/vmware.c
-
 VMWARE BALLOON DRIVER
 M:	Xavier Deguillard <xdeguillard@vmware.com>
 M:	Philip Moltmann <moltmann@vmware.com>
@@ -14091,6 +14082,27 @@ L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	drivers/misc/vmw_balloon.c
 
+VMWARE HYPERVISOR INTERFACE
+M:	Alok Kataria <akataria@vmware.com>
+L:	virtualization@lists.linux-foundation.org
+S:	Supported
+F:	arch/x86/kernel/cpu/vmware.c
+
+VMWARE PVRDMA DRIVER
+M:	Adit Ranadive <aditr@vmware.com>
+M:	VMware PV-Drivers <pv-drivers@vmware.com>
+L:	linux-rdma@vger.kernel.org
+S:	Maintained
+F:	drivers/infiniband/hw/vmw_pvrdma/
+
+VMware PVSCSI driver
+M:	Jim Gill <jgill@vmware.com>
+M:	VMware PV-Drivers <pv-drivers@vmware.com>
+L:	linux-scsi@vger.kernel.org
+S:	Maintained
+F:	drivers/scsi/vmw_pvscsi.c
+F:	drivers/scsi/vmw_pvscsi.h
+
 VMWARE VMMOUSE SUBDRIVER
 M:	"VMware Graphics" <linux-graphics-maintainer@vmware.com>
 M:	"VMware, Inc." <pv-drivers@vmware.com>
@@ -14106,21 +14118,6 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/vmxnet3/
 
-VMware PVSCSI driver
-M:	Jim Gill <jgill@vmware.com>
-M:	VMware PV-Drivers <pv-drivers@vmware.com>
-L:	linux-scsi@vger.kernel.org
-S:	Maintained
-F:	drivers/scsi/vmw_pvscsi.c
-F:	drivers/scsi/vmw_pvscsi.h
-
-VMWARE PVRDMA DRIVER
-M:	Adit Ranadive <aditr@vmware.com>
-M:	VMware PV-Drivers <pv-drivers@vmware.com>
-L:	linux-rdma@vger.kernel.org
-S:	Maintained
-F:	drivers/infiniband/hw/vmw_pvrdma/
-
 VOLTAGE AND CURRENT REGULATOR FRAMEWORK
 M:	Liam Girdwood <lgirdwood@gmail.com>
 M:	Mark Brown <broonie@kernel.org>
@@ -14341,15 +14338,6 @@ S:	Maintained
 F:	Documentation/x86/
 F:	arch/x86/
 
-X86 PLATFORM DRIVERS
-M:	Darren Hart <dvhart@infradead.org>
-M:	Andy Shevchenko <andy@infradead.org>
-L:	platform-driver-x86@vger.kernel.org
-T:	git git://git.infradead.org/users/dvhart/linux-platform-drivers-x86.git
-S:	Maintained
-F:	drivers/platform/x86/
-F:	drivers/platform/olpc/
-
 X86 MCE INFRASTRUCTURE
 M:	Tony Luck <tony.luck@intel.com>
 M:	Borislav Petkov <bp@alien8.de>
@@ -14362,6 +14350,15 @@ M:	Borislav Petkov <bp@alien8.de>
 S:	Maintained
 F:	arch/x86/kernel/cpu/microcode/*
 
+X86 PLATFORM DRIVERS
+M:	Darren Hart <dvhart@infradead.org>
+M:	Andy Shevchenko <andy@infradead.org>
+L:	platform-driver-x86@vger.kernel.org
+T:	git git://git.infradead.org/users/dvhart/linux-platform-drivers-x86.git
+S:	Maintained
+F:	drivers/platform/x86/
+F:	drivers/platform/olpc/
+
 X86 VDSO
 M:	Andy Lutomirski <luto@amacapital.net>
 L:	linux-kernel@vger.kernel.org
@@ -14378,20 +14375,13 @@ T:	git git://linuxtv.org/media_tree.git
 S:	Maintained
 F:	drivers/media/tuners/tuner-xc2028.*
 
-XEN HYPERVISOR INTERFACE
-M:	Boris Ostrovsky <boris.ostrovsky@oracle.com>
-M:	Juergen Gross <jgross@suse.com>
+XEN BLOCK SUBSYSTEM
+M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+M:	Roger Pau Monné <roger.pau@citrix.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git
 S:	Supported
-F:	arch/x86/xen/
-F:	drivers/*/xen-*front.c
-F:	drivers/xen/
-F:	arch/x86/include/asm/xen/
-F:	include/xen/
-F:	include/uapi/xen/
-F:	Documentation/ABI/stable/sysfs-hypervisor-xen
-F:	Documentation/ABI/testing/sysfs-hypervisor-xen
+F:	drivers/block/xen-blkback/*
+F:	drivers/block/xen*
 
 XEN HYPERVISOR ARM
 M:	Stefano Stabellini <sstabellini@kernel.org>
@@ -14407,6 +14397,21 @@ S:	Maintained
 F:	arch/arm64/xen/
 F:	arch/arm64/include/asm/xen/
 
+XEN HYPERVISOR INTERFACE
+M:	Boris Ostrovsky <boris.ostrovsky@oracle.com>
+M:	Juergen Gross <jgross@suse.com>
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git
+S:	Supported
+F:	arch/x86/xen/
+F:	drivers/*/xen-*front.c
+F:	drivers/xen/
+F:	arch/x86/include/asm/xen/
+F:	include/xen/
+F:	include/uapi/xen/
+F:	Documentation/ABI/stable/sysfs-hypervisor-xen
+F:	Documentation/ABI/testing/sysfs-hypervisor-xen
+
 XEN NETWORK BACKEND DRIVER
 M:	Wei Liu <wei.liu2@citrix.com>
 M:	Paul Durrant <paul.durrant@citrix.com>
@@ -14422,14 +14427,6 @@ S:	Supported
 F:	arch/x86/pci/*xen*
 F:	drivers/pci/*xen*
 
-XEN BLOCK SUBSYSTEM
-M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-M:	Roger Pau Monné <roger.pau@citrix.com>
-L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
-S:	Supported
-F:	drivers/block/xen-blkback/*
-F:	drivers/block/xen*
-
 XEN PVSCSI DRIVERS
 M:	Juergen Gross <jgross@suse.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl
new file mode 100644
index 0000000000000..a0fe34349b24f
--- /dev/null
+++ b/scripts/parse-maintainers.pl
@@ -0,0 +1,77 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my %map;
+
+# sort comparison function
+sub by_category($$) {
+    my ($a, $b) = @_;
+
+    $a = uc $a;
+    $b = uc $b;
+
+    # This always sorts last
+    $a =~ s/THE REST/ZZZZZZ/g;
+    $b =~ s/THE REST/ZZZZZZ/g;
+
+    $a cmp $b;
+}
+
+sub alpha_output {
+    my $key;
+    my $sort_method = \&by_category;
+    my $sep = "";
+
+    foreach $key (sort $sort_method keys %map) {
+        if ($key ne " ") {
+            print $sep . $key . "\n";
+            $sep = "\n";
+        }
+        print $map{$key};
+    }
+}
+
+sub trim {
+    my $s = shift;
+    $s =~ s/\s+$//;
+    $s =~ s/^\s+//;
+    return $s;
+}
+
+sub file_input {
+    my $lastline = "";
+    my $case = " ";
+    $map{$case} = "";
+
+    while (<>) {
+        my $line = $_;
+
+        # Pattern line?
+        if ($line =~ m/^([A-Z]):\s*(.*)/) {
+            $line = $1 . ":\t" . trim($2) . "\n";
+            if ($lastline eq "") {
+                $map{$case} = $map{$case} . $line;
+                next;
+            }
+            $case = trim($lastline);
+            exists $map{$case} and die "Header '$case' already exists";
+            $map{$case} = $line;
+            $lastline = "";
+            next;
+        }
+
+        if ($case eq " ") {
+            $map{$case} = $map{$case} . $lastline;
+            $lastline = $line;
+            next;
+        }
+        trim($lastline) eq "" or die ("Odd non-pattern line '$lastline' for '$case'");
+        $lastline = $line;
+    }
+    $map{$case} = $map{$case} . $lastline;
+}
+
+&file_input;
+&alpha_output;
+exit(0);
-- 
GitLab


From 520eccdfe187591a51ea9ab4c1a024ae4d0f68d9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 23 Jul 2017 16:15:17 -0700
Subject: [PATCH 1955/1958] Linux 4.13-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b4fb9a1d1594e..0662b5201d3ed 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
GitLab


From 581c4484769e692eade761c17c22549aaefe6749 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 1 Aug 2017 15:38:01 -0700
Subject: [PATCH 1956/1958] HID: input: map digitizer battery usage

We already mapped battery strength reports from the generic device
control page, but we did not update capacity from input reports, nor we
mapped the battery strength report from the digitizer page, so let's
implement this now.

Batteries driven by the input reports will now start in "unknown" state,
and will get updated once we receive first report containing battery
strength from the device.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 184 +++++++++++++++++++++++++++-------------
 include/linux/hid.h     |   2 +
 2 files changed, 127 insertions(+), 59 deletions(-)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index ccdff1ee1f0c8..2158ec766dd59 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -340,13 +340,45 @@ static unsigned find_battery_quirk(struct hid_device *hdev)
 	return quirks;
 }
 
+static int hidinput_scale_battery_capacity(struct hid_device *dev,
+					   int value)
+{
+	if (dev->battery_min < dev->battery_max &&
+	    value >= dev->battery_min && value <= dev->battery_max)
+		value = ((value - dev->battery_min) * 100) /
+			(dev->battery_max - dev->battery_min);
+
+	return value;
+}
+
+static int hidinput_query_battery_capacity(struct hid_device *dev)
+{
+	u8 *buf;
+	int ret;
+
+	buf = kmalloc(2, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = hid_hw_raw_request(dev, dev->battery_report_id, buf, 2,
+				 dev->battery_report_type, HID_REQ_GET_REPORT);
+	if (ret != 2) {
+		kfree(buf);
+		return -ENODATA;
+	}
+
+	ret = hidinput_scale_battery_capacity(dev, buf[1]);
+	kfree(buf);
+	return ret;
+}
+
 static int hidinput_get_battery_property(struct power_supply *psy,
 					 enum power_supply_property prop,
 					 union power_supply_propval *val)
 {
 	struct hid_device *dev = power_supply_get_drvdata(psy);
+	int value;
 	int ret = 0;
-	__u8 *buf;
 
 	switch (prop) {
 	case POWER_SUPPLY_PROP_PRESENT:
@@ -355,29 +387,15 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 		break;
 
 	case POWER_SUPPLY_PROP_CAPACITY:
-
-		buf = kmalloc(2 * sizeof(__u8), GFP_KERNEL);
-		if (!buf) {
-			ret = -ENOMEM;
-			break;
-		}
-		ret = hid_hw_raw_request(dev, dev->battery_report_id, buf, 2,
-					 dev->battery_report_type,
-					 HID_REQ_GET_REPORT);
-
-		if (ret != 2) {
-			ret = -ENODATA;
-			kfree(buf);
-			break;
+		if (dev->battery_report_type == HID_FEATURE_REPORT) {
+			value = hidinput_query_battery_capacity(dev);
+			if (value < 0)
+				return value;
+		} else  {
+			value = dev->battery_capacity;
 		}
-		ret = 0;
 
-		if (dev->battery_min < dev->battery_max &&
-		    buf[1] >= dev->battery_min &&
-		    buf[1] <= dev->battery_max)
-			val->intval = (100 * (buf[1] - dev->battery_min)) /
-				(dev->battery_max - dev->battery_min);
-		kfree(buf);
+		val->intval = value;
 		break;
 
 	case POWER_SUPPLY_PROP_MODEL_NAME:
@@ -385,7 +403,22 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 		break;
 
 	case POWER_SUPPLY_PROP_STATUS:
-		val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		if (!dev->battery_reported &&
+		    dev->battery_report_type == HID_FEATURE_REPORT) {
+			value = hidinput_query_battery_capacity(dev);
+			if (value < 0)
+				return value;
+
+			dev->battery_capacity = value;
+			dev->battery_reported = true;
+		}
+
+		if (!dev->battery_reported)
+			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+		else if (dev->battery_capacity == 100)
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		else
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
 		break;
 
 	case POWER_SUPPLY_PROP_SCOPE:
@@ -400,18 +433,16 @@ static int hidinput_get_battery_property(struct power_supply *psy,
 	return ret;
 }
 
-static bool hidinput_setup_battery(struct hid_device *dev, unsigned report_type, struct hid_field *field)
+static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, struct hid_field *field)
 {
-	struct power_supply_desc *psy_desc = NULL;
+	struct power_supply_desc *psy_desc;
 	struct power_supply_config psy_cfg = { .drv_data = dev, };
 	unsigned quirks;
 	s32 min, max;
+	int error;
 
-	if (field->usage->hid != HID_DC_BATTERYSTRENGTH)
-		return false;	/* no match */
-
-	if (dev->battery != NULL)
-		goto out;	/* already initialized? */
+	if (dev->battery)
+		return 0;	/* already initialized? */
 
 	quirks = find_battery_quirk(dev);
 
@@ -419,16 +450,16 @@ static bool hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 		dev->bus, dev->vendor, dev->product, dev->version, quirks);
 
 	if (quirks & HID_BATTERY_QUIRK_IGNORE)
-		goto out;
+		return 0;
 
 	psy_desc = kzalloc(sizeof(*psy_desc), GFP_KERNEL);
-	if (psy_desc == NULL)
-		goto out;
+	if (!psy_desc)
+		return -ENOMEM;
 
 	psy_desc->name = kasprintf(GFP_KERNEL, "hid-%s-battery", dev->uniq);
-	if (psy_desc->name == NULL) {
-		kfree(psy_desc);
-		goto out;
+	if (!psy_desc->name) {
+		error = -ENOMEM;
+		goto err_free_mem;
 	}
 
 	psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
@@ -455,17 +486,20 @@ static bool hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 
 	dev->battery = power_supply_register(&dev->dev, psy_desc, &psy_cfg);
 	if (IS_ERR(dev->battery)) {
-		hid_warn(dev, "can't register power supply: %ld\n",
-				PTR_ERR(dev->battery));
-		kfree(psy_desc->name);
-		kfree(psy_desc);
-		dev->battery = NULL;
-	} else {
-		power_supply_powers(dev->battery, &dev->dev);
+		error = PTR_ERR(dev->battery);
+		hid_warn(dev, "can't register power supply: %d\n", error);
+		goto err_free_name;
 	}
 
-out:
-	return true;
+	power_supply_powers(dev->battery, &dev->dev);
+	return 0;
+
+err_free_name:
+	kfree(psy_desc->name);
+err_free_mem:
+	kfree(psy_desc);
+	dev->battery = NULL;
+	return error;
 }
 
 static void hidinput_cleanup_battery(struct hid_device *dev)
@@ -481,16 +515,33 @@ static void hidinput_cleanup_battery(struct hid_device *dev)
 	kfree(psy_desc);
 	dev->battery = NULL;
 }
+
+static void hidinput_update_battery(struct hid_device *dev, int value)
+{
+	if (!dev->battery)
+		return;
+
+	if (value == 0 || value < dev->battery_min || value > dev->battery_max)
+		return;
+
+	dev->battery_capacity = hidinput_scale_battery_capacity(dev, value);
+	dev->battery_reported = true;
+	power_supply_changed(dev->battery);
+}
 #else  /* !CONFIG_HID_BATTERY_STRENGTH */
-static bool hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
-				   struct hid_field *field)
+static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
+				  struct hid_field *field)
 {
-	return false;
+	return 0;
 }
 
 static void hidinput_cleanup_battery(struct hid_device *dev)
 {
 }
+
+static void hidinput_update_battery(struct hid_device *dev, int value)
+{
+}
 #endif	/* CONFIG_HID_BATTERY_STRENGTH */
 
 static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_field *field,
@@ -710,6 +761,11 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 			}
 			break;
 
+		case 0x3b: /* Battery Strength */
+			hidinput_setup_battery(device, HID_INPUT_REPORT, field);
+			usage->type = EV_PWR;
+			goto ignore;
+
 		case 0x3c: /* Invert */
 			map_key_clear(BTN_TOOL_RUBBER);
 			break;
@@ -944,11 +1000,13 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		break;
 
 	case HID_UP_GENDEVCTRLS:
-		if (hidinput_setup_battery(device, HID_INPUT_REPORT, field))
+		switch (usage->hid) {
+		case HID_DC_BATTERYSTRENGTH:
+			hidinput_setup_battery(device, HID_INPUT_REPORT, field);
+			usage->type = EV_PWR;
 			goto ignore;
-		else
-			goto unknown;
-		break;
+		}
+		goto unknown;
 
 	case HID_UP_HPVENDOR:	/* Reported on a Dutch layout HP5308 */
 		set_bit(EV_REP, input->evbit);
@@ -1031,7 +1089,6 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 	if (usage->code > max)
 		goto ignore;
 
-
 	if (usage->type == EV_ABS) {
 
 		int a = field->logical_minimum;
@@ -1090,14 +1147,19 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
 	struct input_dev *input;
 	unsigned *quirks = &hid->quirks;
 
-	if (!field->hidinput)
+	if (!usage->type)
 		return;
 
-	input = field->hidinput->input;
+	if (usage->type == EV_PWR) {
+		hidinput_update_battery(hid, value);
+		return;
+	}
 
-	if (!usage->type)
+	if (!field->hidinput)
 		return;
 
+	input = field->hidinput->input;
+
 	if (usage->hat_min < usage->hat_max || usage->hat_dir) {
 		int hat_dir = usage->hat_dir;
 		if (!hat_dir)
@@ -1373,6 +1435,7 @@ static void report_features(struct hid_device *hid)
 	struct hid_driver *drv = hid->driver;
 	struct hid_report_enum *rep_enum;
 	struct hid_report *rep;
+	struct hid_usage *usage;
 	int i, j;
 
 	rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
@@ -1383,12 +1446,15 @@ static void report_features(struct hid_device *hid)
 				continue;
 
 			for (j = 0; j < rep->field[i]->maxusage; j++) {
+				usage = &rep->field[i]->usage[j];
+
 				/* Verify if Battery Strength feature is available */
-				hidinput_setup_battery(hid, HID_FEATURE_REPORT, rep->field[i]);
+				if (usage->hid == HID_DC_BATTERYSTRENGTH)
+					hidinput_setup_battery(hid, HID_FEATURE_REPORT,
+							       rep->field[i]);
 
 				if (drv->feature_mapping)
-					drv->feature_mapping(hid, rep->field[i],
-							     rep->field[i]->usage + j);
+					drv->feature_mapping(hid, rep->field[i], usage);
 			}
 		}
 }
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 5006f9b5d8370..281d1ffcbe02c 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -542,10 +542,12 @@ struct hid_device {							/* device report descriptor */
 	 * battery is non-NULL.
 	 */
 	struct power_supply *battery;
+	__s32 battery_capacity;
 	__s32 battery_min;
 	__s32 battery_max;
 	__s32 battery_report_type;
 	__s32 battery_report_id;
+	bool battery_reported;
 #endif
 
 	unsigned int status;						/* see STAT flags above */
-- 
GitLab


From 0922386538f9da8f242b1d52b5538bf9b8ddded5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 1 Aug 2017 15:38:02 -0700
Subject: [PATCH 1957/1958] HID: input: optionally use device id in battery
 name

Manufacturers do not always populate serial number in their devices, so
let's fall back to device ID when forming the battery device name. As a
result, batteries in devices without serial number will be named like
this:

	hid-0018:2D1F:510E.0001-battery

(as opposed to hid--battery for the first one, and failing to create
batteries for the subsequent ones).

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 2158ec766dd59..27d8442b017dc 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -456,7 +456,9 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
 	if (!psy_desc)
 		return -ENOMEM;
 
-	psy_desc->name = kasprintf(GFP_KERNEL, "hid-%s-battery", dev->uniq);
+	psy_desc->name = kasprintf(GFP_KERNEL, "hid-%s-battery",
+				   strlen(dev->uniq) ?
+					dev->uniq : dev_name(&dev->dev));
 	if (!psy_desc->name) {
 		error = -ENOMEM;
 		goto err_free_mem;
-- 
GitLab


From 0152b29c89650654abf4f0e96bbf2566b85ae55d Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 10 Aug 2017 12:47:56 -0700
Subject: [PATCH 1958/1958] HID: input: throttle battery uevents

The power_supply subsystem tends to emit uevent every time
power_supply_changed() is called, so we should call this API only when battery
strength reported by the device is actually different from the previous
readings, otherwise we'll drown the system in uevents.

Fixes: 581c4484769e ("HID: input: map digitizer battery usage")
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 27d8442b017dc..199f6a01fc629 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -520,15 +520,21 @@ static void hidinput_cleanup_battery(struct hid_device *dev)
 
 static void hidinput_update_battery(struct hid_device *dev, int value)
 {
+	int capacity;
+
 	if (!dev->battery)
 		return;
 
 	if (value == 0 || value < dev->battery_min || value > dev->battery_max)
 		return;
 
-	dev->battery_capacity = hidinput_scale_battery_capacity(dev, value);
-	dev->battery_reported = true;
-	power_supply_changed(dev->battery);
+	capacity = hidinput_scale_battery_capacity(dev, value);
+
+	if (!dev->battery_reported || capacity != dev->battery_capacity) {
+		dev->battery_capacity = capacity;
+		dev->battery_reported = true;
+		power_supply_changed(dev->battery);
+	}
 }
 #else  /* !CONFIG_HID_BATTERY_STRENGTH */
 static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
-- 
GitLab